1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file contains the declarations of the Vectorization Plan base classes: 11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual 12 /// VPBlockBase, together implementing a Hierarchical CFG; 13 /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained 14 /// within VPBasicBlocks; 15 /// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that 16 /// also inherit from VPValue. 17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned 18 /// instruction; 19 /// 5. The VPlan class holding a candidate for vectorization; 20 /// These are documented in docs/VectorizationPlan.rst. 21 // 22 //===----------------------------------------------------------------------===// 23 24 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 25 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 26 27 #include "VPlanAnalysis.h" 28 #include "VPlanValue.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/SmallBitVector.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Twine.h" 34 #include "llvm/ADT/ilist.h" 35 #include "llvm/ADT/ilist_node.h" 36 #include "llvm/Analysis/IVDescriptors.h" 37 #include "llvm/Analysis/VectorUtils.h" 38 #include "llvm/IR/DebugLoc.h" 39 #include "llvm/IR/FMF.h" 40 #include "llvm/IR/Operator.h" 41 #include "llvm/Support/Compiler.h" 42 #include "llvm/Support/InstructionCost.h" 43 #include <algorithm> 44 #include <cassert> 45 #include <cstddef> 46 #include <string> 47 48 namespace llvm { 49 50 class BasicBlock; 51 class DominatorTree; 52 class InnerLoopVectorizer; 53 class IRBuilderBase; 54 struct VPTransformState; 55 class raw_ostream; 56 class RecurrenceDescriptor; 57 class SCEV; 58 class Type; 59 class VPBasicBlock; 60 class VPBuilder; 61 class VPDominatorTree; 62 class VPRegionBlock; 63 class VPlan; 64 class VPLane; 65 class VPReplicateRecipe; 66 class VPlanSlp; 67 class Value; 68 class LoopVectorizationCostModel; 69 class LoopVersioning; 70 71 struct VPCostContext; 72 73 namespace Intrinsic { 74 typedef unsigned ID; 75 } 76 77 using VPlanPtr = std::unique_ptr<VPlan>; 78 79 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. 80 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. 81 class LLVM_ABI_FOR_TEST VPBlockBase { 82 friend class VPBlockUtils; 83 84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). 85 86 /// An optional name for the block. 87 std::string Name; 88 89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if 90 /// it is a topmost VPBlockBase. 91 VPRegionBlock *Parent = nullptr; 92 93 /// List of predecessor blocks. 94 SmallVector<VPBlockBase *, 1> Predecessors; 95 96 /// List of successor blocks. 97 SmallVector<VPBlockBase *, 1> Successors; 98 99 /// VPlan containing the block. Can only be set on the entry block of the 100 /// plan. 101 VPlan *Plan = nullptr; 102 103 /// Add \p Successor as the last successor to this block. appendSuccessor(VPBlockBase * Successor)104 void appendSuccessor(VPBlockBase *Successor) { 105 assert(Successor && "Cannot add nullptr successor!"); 106 Successors.push_back(Successor); 107 } 108 109 /// Add \p Predecessor as the last predecessor to this block. appendPredecessor(VPBlockBase * Predecessor)110 void appendPredecessor(VPBlockBase *Predecessor) { 111 assert(Predecessor && "Cannot add nullptr predecessor!"); 112 Predecessors.push_back(Predecessor); 113 } 114 115 /// Remove \p Predecessor from the predecessors of this block. removePredecessor(VPBlockBase * Predecessor)116 void removePredecessor(VPBlockBase *Predecessor) { 117 auto Pos = find(Predecessors, Predecessor); 118 assert(Pos && "Predecessor does not exist"); 119 Predecessors.erase(Pos); 120 } 121 122 /// Remove \p Successor from the successors of this block. removeSuccessor(VPBlockBase * Successor)123 void removeSuccessor(VPBlockBase *Successor) { 124 auto Pos = find(Successors, Successor); 125 assert(Pos && "Successor does not exist"); 126 Successors.erase(Pos); 127 } 128 129 /// This function replaces one predecessor with another, useful when 130 /// trying to replace an old block in the CFG with a new one. replacePredecessor(VPBlockBase * Old,VPBlockBase * New)131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) { 132 auto I = find(Predecessors, Old); 133 assert(I != Predecessors.end()); 134 assert(Old->getParent() == New->getParent() && 135 "replaced predecessor must have the same parent"); 136 *I = New; 137 } 138 139 /// This function replaces one successor with another, useful when 140 /// trying to replace an old block in the CFG with a new one. replaceSuccessor(VPBlockBase * Old,VPBlockBase * New)141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) { 142 auto I = find(Successors, Old); 143 assert(I != Successors.end()); 144 assert(Old->getParent() == New->getParent() && 145 "replaced successor must have the same parent"); 146 *I = New; 147 } 148 149 protected: VPBlockBase(const unsigned char SC,const std::string & N)150 VPBlockBase(const unsigned char SC, const std::string &N) 151 : SubclassID(SC), Name(N) {} 152 153 public: 154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase 155 /// that are actually instantiated. Values of this enumeration are kept in the 156 /// SubclassID field of the VPBlockBase objects. They are used for concrete 157 /// type identification. 158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC }; 159 160 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>; 161 162 virtual ~VPBlockBase() = default; 163 getName()164 const std::string &getName() const { return Name; } 165 setName(const Twine & newName)166 void setName(const Twine &newName) { Name = newName.str(); } 167 168 /// \return an ID for the concrete type of this object. 169 /// This is used to implement the classof checks. This should not be used 170 /// for any other purpose, as the values may change as LLVM evolves. getVPBlockID()171 unsigned getVPBlockID() const { return SubclassID; } 172 getParent()173 VPRegionBlock *getParent() { return Parent; } getParent()174 const VPRegionBlock *getParent() const { return Parent; } 175 176 /// \return A pointer to the plan containing the current block. 177 VPlan *getPlan(); 178 const VPlan *getPlan() const; 179 180 /// Sets the pointer of the plan containing the block. The block must be the 181 /// entry block into the VPlan. 182 void setPlan(VPlan *ParentPlan); 183 setParent(VPRegionBlock * P)184 void setParent(VPRegionBlock *P) { Parent = P; } 185 186 /// \return the VPBasicBlock that is the entry of this VPBlockBase, 187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 188 /// VPBlockBase is a VPBasicBlock, it is returned. 189 const VPBasicBlock *getEntryBasicBlock() const; 190 VPBasicBlock *getEntryBasicBlock(); 191 192 /// \return the VPBasicBlock that is the exiting this VPBlockBase, 193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 194 /// VPBlockBase is a VPBasicBlock, it is returned. 195 const VPBasicBlock *getExitingBasicBlock() const; 196 VPBasicBlock *getExitingBasicBlock(); 197 getSuccessors()198 const VPBlocksTy &getSuccessors() const { return Successors; } getSuccessors()199 VPBlocksTy &getSuccessors() { return Successors; } 200 successors()201 iterator_range<VPBlockBase **> successors() { return Successors; } predecessors()202 iterator_range<VPBlockBase **> predecessors() { return Predecessors; } 203 getPredecessors()204 const VPBlocksTy &getPredecessors() const { return Predecessors; } getPredecessors()205 VPBlocksTy &getPredecessors() { return Predecessors; } 206 207 /// \return the successor of this VPBlockBase if it has a single successor. 208 /// Otherwise return a null pointer. getSingleSuccessor()209 VPBlockBase *getSingleSuccessor() const { 210 return (Successors.size() == 1 ? *Successors.begin() : nullptr); 211 } 212 213 /// \return the predecessor of this VPBlockBase if it has a single 214 /// predecessor. Otherwise return a null pointer. getSinglePredecessor()215 VPBlockBase *getSinglePredecessor() const { 216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr); 217 } 218 getNumSuccessors()219 size_t getNumSuccessors() const { return Successors.size(); } getNumPredecessors()220 size_t getNumPredecessors() const { return Predecessors.size(); } 221 222 /// An Enclosing Block of a block B is any block containing B, including B 223 /// itself. \return the closest enclosing block starting from "this", which 224 /// has successors. \return the root enclosing block if all enclosing blocks 225 /// have no successors. 226 VPBlockBase *getEnclosingBlockWithSuccessors(); 227 228 /// \return the closest enclosing block starting from "this", which has 229 /// predecessors. \return the root enclosing block if all enclosing blocks 230 /// have no predecessors. 231 VPBlockBase *getEnclosingBlockWithPredecessors(); 232 233 /// \return the successors either attached directly to this VPBlockBase or, if 234 /// this VPBlockBase is the exit block of a VPRegionBlock and has no 235 /// successors of its own, search recursively for the first enclosing 236 /// VPRegionBlock that has successors and return them. If no such 237 /// VPRegionBlock exists, return the (empty) successors of the topmost 238 /// VPBlockBase reached. getHierarchicalSuccessors()239 const VPBlocksTy &getHierarchicalSuccessors() { 240 return getEnclosingBlockWithSuccessors()->getSuccessors(); 241 } 242 243 /// \return the hierarchical successor of this VPBlockBase if it has a single 244 /// hierarchical successor. Otherwise return a null pointer. getSingleHierarchicalSuccessor()245 VPBlockBase *getSingleHierarchicalSuccessor() { 246 return getEnclosingBlockWithSuccessors()->getSingleSuccessor(); 247 } 248 249 /// \return the predecessors either attached directly to this VPBlockBase or, 250 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no 251 /// predecessors of its own, search recursively for the first enclosing 252 /// VPRegionBlock that has predecessors and return them. If no such 253 /// VPRegionBlock exists, return the (empty) predecessors of the topmost 254 /// VPBlockBase reached. getHierarchicalPredecessors()255 const VPBlocksTy &getHierarchicalPredecessors() { 256 return getEnclosingBlockWithPredecessors()->getPredecessors(); 257 } 258 259 /// \return the hierarchical predecessor of this VPBlockBase if it has a 260 /// single hierarchical predecessor. Otherwise return a null pointer. getSingleHierarchicalPredecessor()261 VPBlockBase *getSingleHierarchicalPredecessor() { 262 return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); 263 } 264 265 /// Set a given VPBlockBase \p Successor as the single successor of this 266 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor. 267 /// This VPBlockBase must have no successors. setOneSuccessor(VPBlockBase * Successor)268 void setOneSuccessor(VPBlockBase *Successor) { 269 assert(Successors.empty() && "Setting one successor when others exist."); 270 assert(Successor->getParent() == getParent() && 271 "connected blocks must have the same parent"); 272 appendSuccessor(Successor); 273 } 274 275 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two 276 /// successors of this VPBlockBase. This VPBlockBase is not added as 277 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no 278 /// successors. setTwoSuccessors(VPBlockBase * IfTrue,VPBlockBase * IfFalse)279 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) { 280 assert(Successors.empty() && "Setting two successors when others exist."); 281 appendSuccessor(IfTrue); 282 appendSuccessor(IfFalse); 283 } 284 285 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase. 286 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added 287 /// as successor of any VPBasicBlock in \p NewPreds. setPredecessors(ArrayRef<VPBlockBase * > NewPreds)288 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) { 289 assert(Predecessors.empty() && "Block predecessors already set."); 290 for (auto *Pred : NewPreds) 291 appendPredecessor(Pred); 292 } 293 294 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase. 295 /// This VPBlockBase must have no successors. This VPBlockBase is not added 296 /// as predecessor of any VPBasicBlock in \p NewSuccs. setSuccessors(ArrayRef<VPBlockBase * > NewSuccs)297 void setSuccessors(ArrayRef<VPBlockBase *> NewSuccs) { 298 assert(Successors.empty() && "Block successors already set."); 299 for (auto *Succ : NewSuccs) 300 appendSuccessor(Succ); 301 } 302 303 /// Remove all the predecessor of this block. clearPredecessors()304 void clearPredecessors() { Predecessors.clear(); } 305 306 /// Remove all the successors of this block. clearSuccessors()307 void clearSuccessors() { Successors.clear(); } 308 309 /// Swap predecessors of the block. The block must have exactly 2 310 /// predecessors. swapPredecessors()311 void swapPredecessors() { 312 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap"); 313 std::swap(Predecessors[0], Predecessors[1]); 314 } 315 316 /// Swap successors of the block. The block must have exactly 2 successors. 317 // TODO: This should be part of introducing conditional branch recipes rather 318 // than being independent. swapSuccessors()319 void swapSuccessors() { 320 assert(Successors.size() == 2 && "must have 2 successors to swap"); 321 std::swap(Successors[0], Successors[1]); 322 } 323 324 /// Returns the index for \p Pred in the blocks predecessors list. getIndexForPredecessor(const VPBlockBase * Pred)325 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const { 326 assert(count(Predecessors, Pred) == 1 && 327 "must have Pred exactly once in Predecessors"); 328 return std::distance(Predecessors.begin(), find(Predecessors, Pred)); 329 } 330 331 /// Returns the index for \p Succ in the blocks successor list. getIndexForSuccessor(const VPBlockBase * Succ)332 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const { 333 assert(count(Successors, Succ) == 1 && 334 "must have Succ exactly once in Successors"); 335 return std::distance(Successors.begin(), find(Successors, Succ)); 336 } 337 338 /// The method which generates the output IR that correspond to this 339 /// VPBlockBase, thereby "executing" the VPlan. 340 virtual void execute(VPTransformState *State) = 0; 341 342 /// Return the cost of the block. 343 virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0; 344 345 /// Return true if it is legal to hoist instructions into this block. isLegalToHoistInto()346 bool isLegalToHoistInto() { 347 // There are currently no constraints that prevent an instruction to be 348 // hoisted into a VPBlockBase. 349 return true; 350 } 351 352 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 353 void printAsOperand(raw_ostream &OS, bool PrintType = false) const { 354 OS << getName(); 355 } 356 357 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines 358 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using 359 /// consequtive numbers. 360 /// 361 /// Note that the numbering is applied to the whole VPlan, so printing 362 /// individual blocks is consistent with the whole VPlan printing. 363 virtual void print(raw_ostream &O, const Twine &Indent, 364 VPSlotTracker &SlotTracker) const = 0; 365 366 /// Print plain-text dump of this VPlan to \p O. 367 void print(raw_ostream &O) const; 368 369 /// Print the successors of this block to \p O, prefixing all lines with \p 370 /// Indent. 371 void printSuccessors(raw_ostream &O, const Twine &Indent) const; 372 373 /// Dump this VPBlockBase to dbgs(). dump()374 LLVM_DUMP_METHOD void dump() const { print(dbgs()); } 375 #endif 376 377 /// Clone the current block and it's recipes without updating the operands of 378 /// the cloned recipes, including all blocks in the single-entry single-exit 379 /// region for VPRegionBlocks. 380 virtual VPBlockBase *clone() = 0; 381 }; 382 383 /// VPRecipeBase is a base class modeling a sequence of one or more output IR 384 /// instructions. VPRecipeBase owns the VPValues it defines through VPDef 385 /// and is responsible for deleting its defined values. Single-value 386 /// recipes must inherit from VPSingleDef instead of inheriting from both 387 /// VPRecipeBase and VPValue separately. 388 class LLVM_ABI_FOR_TEST VPRecipeBase 389 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>, 390 public VPDef, 391 public VPUser { 392 friend VPBasicBlock; 393 friend class VPBlockUtils; 394 395 /// Each VPRecipe belongs to a single VPBasicBlock. 396 VPBasicBlock *Parent = nullptr; 397 398 /// The debug location for the recipe. 399 DebugLoc DL; 400 401 public: 402 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands, 403 DebugLoc DL = {}) VPDef(SC)404 : VPDef(SC), VPUser(Operands), DL(DL) {} 405 406 virtual ~VPRecipeBase() = default; 407 408 /// Clone the current recipe. 409 virtual VPRecipeBase *clone() = 0; 410 411 /// \return the VPBasicBlock which this VPRecipe belongs to. getParent()412 VPBasicBlock *getParent() { return Parent; } getParent()413 const VPBasicBlock *getParent() const { return Parent; } 414 415 /// The method which generates the output IR instructions that correspond to 416 /// this VPRecipe, thereby "executing" the VPlan. 417 virtual void execute(VPTransformState &State) = 0; 418 419 /// Return the cost of this recipe, taking into account if the cost 420 /// computation should be skipped and the ForceTargetInstructionCost flag. 421 /// Also takes care of printing the cost for debugging. 422 InstructionCost cost(ElementCount VF, VPCostContext &Ctx); 423 424 /// Insert an unlinked recipe into a basic block immediately before 425 /// the specified recipe. 426 void insertBefore(VPRecipeBase *InsertPos); 427 /// Insert an unlinked recipe into \p BB immediately before the insertion 428 /// point \p IP; 429 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP); 430 431 /// Insert an unlinked Recipe into a basic block immediately after 432 /// the specified Recipe. 433 void insertAfter(VPRecipeBase *InsertPos); 434 435 /// Unlink this recipe from its current VPBasicBlock and insert it into 436 /// the VPBasicBlock that MovePos lives in, right after MovePos. 437 void moveAfter(VPRecipeBase *MovePos); 438 439 /// Unlink this recipe and insert into BB before I. 440 /// 441 /// \pre I is a valid iterator into BB. 442 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I); 443 444 /// This method unlinks 'this' from the containing basic block, but does not 445 /// delete it. 446 void removeFromParent(); 447 448 /// This method unlinks 'this' from the containing basic block and deletes it. 449 /// 450 /// \returns an iterator pointing to the element after the erased one 451 iplist<VPRecipeBase>::iterator eraseFromParent(); 452 453 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPDef * D)454 static inline bool classof(const VPDef *D) { 455 // All VPDefs are also VPRecipeBases. 456 return true; 457 } 458 classof(const VPUser * U)459 static inline bool classof(const VPUser *U) { return true; } 460 461 /// Returns true if the recipe may have side-effects. 462 bool mayHaveSideEffects() const; 463 464 /// Returns true for PHI-like recipes. 465 bool isPhi() const; 466 467 /// Returns true if the recipe may read from memory. 468 bool mayReadFromMemory() const; 469 470 /// Returns true if the recipe may write to memory. 471 bool mayWriteToMemory() const; 472 473 /// Returns true if the recipe may read from or write to memory. mayReadOrWriteMemory()474 bool mayReadOrWriteMemory() const { 475 return mayReadFromMemory() || mayWriteToMemory(); 476 } 477 478 /// Returns the debug location of the recipe. getDebugLoc()479 DebugLoc getDebugLoc() const { return DL; } 480 481 /// Return true if the recipe is a scalar cast. 482 bool isScalarCast() const; 483 484 /// Set the recipe's debug location to \p NewDL. setDebugLoc(DebugLoc NewDL)485 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; } 486 487 protected: 488 /// Compute the cost of this recipe either using a recipe's specialized 489 /// implementation or using the legacy cost model and the underlying 490 /// instructions. 491 virtual InstructionCost computeCost(ElementCount VF, 492 VPCostContext &Ctx) const; 493 }; 494 495 // Helper macro to define common classof implementations for recipes. 496 #define VP_CLASSOF_IMPL(VPDefID) \ 497 static inline bool classof(const VPDef *D) { \ 498 return D->getVPDefID() == VPDefID; \ 499 } \ 500 static inline bool classof(const VPValue *V) { \ 501 auto *R = V->getDefiningRecipe(); \ 502 return R && R->getVPDefID() == VPDefID; \ 503 } \ 504 static inline bool classof(const VPUser *U) { \ 505 auto *R = dyn_cast<VPRecipeBase>(U); \ 506 return R && R->getVPDefID() == VPDefID; \ 507 } \ 508 static inline bool classof(const VPRecipeBase *R) { \ 509 return R->getVPDefID() == VPDefID; \ 510 } \ 511 static inline bool classof(const VPSingleDefRecipe *R) { \ 512 return R->getVPDefID() == VPDefID; \ 513 } 514 515 /// VPSingleDef is a base class for recipes for modeling a sequence of one or 516 /// more output IR that define a single result VPValue. 517 /// Note that VPRecipeBase must be inherited from before VPValue. 518 class VPSingleDefRecipe : public VPRecipeBase, public VPValue { 519 public: 520 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands, 521 DebugLoc DL = {}) VPRecipeBase(SC,Operands,DL)522 : VPRecipeBase(SC, Operands, DL), VPValue(this) {} 523 524 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands, 525 Value *UV, DebugLoc DL = {}) VPRecipeBase(SC,Operands,DL)526 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {} 527 classof(const VPRecipeBase * R)528 static inline bool classof(const VPRecipeBase *R) { 529 switch (R->getVPDefID()) { 530 case VPRecipeBase::VPDerivedIVSC: 531 case VPRecipeBase::VPEVLBasedIVPHISC: 532 case VPRecipeBase::VPExpandSCEVSC: 533 case VPRecipeBase::VPExpressionSC: 534 case VPRecipeBase::VPInstructionSC: 535 case VPRecipeBase::VPReductionEVLSC: 536 case VPRecipeBase::VPReductionSC: 537 case VPRecipeBase::VPReplicateSC: 538 case VPRecipeBase::VPScalarIVStepsSC: 539 case VPRecipeBase::VPVectorPointerSC: 540 case VPRecipeBase::VPVectorEndPointerSC: 541 case VPRecipeBase::VPWidenCallSC: 542 case VPRecipeBase::VPWidenCanonicalIVSC: 543 case VPRecipeBase::VPWidenCastSC: 544 case VPRecipeBase::VPWidenGEPSC: 545 case VPRecipeBase::VPWidenIntrinsicSC: 546 case VPRecipeBase::VPWidenSC: 547 case VPRecipeBase::VPWidenSelectSC: 548 case VPRecipeBase::VPBlendSC: 549 case VPRecipeBase::VPPredInstPHISC: 550 case VPRecipeBase::VPCanonicalIVPHISC: 551 case VPRecipeBase::VPActiveLaneMaskPHISC: 552 case VPRecipeBase::VPFirstOrderRecurrencePHISC: 553 case VPRecipeBase::VPWidenPHISC: 554 case VPRecipeBase::VPWidenIntOrFpInductionSC: 555 case VPRecipeBase::VPWidenPointerInductionSC: 556 case VPRecipeBase::VPReductionPHISC: 557 case VPRecipeBase::VPPartialReductionSC: 558 return true; 559 case VPRecipeBase::VPBranchOnMaskSC: 560 case VPRecipeBase::VPInterleaveSC: 561 case VPRecipeBase::VPIRInstructionSC: 562 case VPRecipeBase::VPWidenLoadEVLSC: 563 case VPRecipeBase::VPWidenLoadSC: 564 case VPRecipeBase::VPWidenStoreEVLSC: 565 case VPRecipeBase::VPWidenStoreSC: 566 case VPRecipeBase::VPHistogramSC: 567 // TODO: Widened stores don't define a value, but widened loads do. Split 568 // the recipes to be able to make widened loads VPSingleDefRecipes. 569 return false; 570 } 571 llvm_unreachable("Unhandled VPDefID"); 572 } 573 classof(const VPUser * U)574 static inline bool classof(const VPUser *U) { 575 auto *R = dyn_cast<VPRecipeBase>(U); 576 return R && classof(R); 577 } 578 579 virtual VPSingleDefRecipe *clone() override = 0; 580 581 /// Returns the underlying instruction. getUnderlyingInstr()582 Instruction *getUnderlyingInstr() { 583 return cast<Instruction>(getUnderlyingValue()); 584 } getUnderlyingInstr()585 const Instruction *getUnderlyingInstr() const { 586 return cast<Instruction>(getUnderlyingValue()); 587 } 588 589 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 590 /// Print this VPSingleDefRecipe to dbgs() (for debugging). 591 LLVM_DUMP_METHOD void dump() const; 592 #endif 593 }; 594 595 /// Class to record and manage LLVM IR flags. 596 class VPIRFlags { 597 enum class OperationType : unsigned char { 598 Cmp, 599 OverflowingBinOp, 600 Trunc, 601 DisjointOp, 602 PossiblyExactOp, 603 GEPOp, 604 FPMathOp, 605 NonNegOp, 606 Other 607 }; 608 609 public: 610 struct WrapFlagsTy { 611 char HasNUW : 1; 612 char HasNSW : 1; 613 WrapFlagsTyWrapFlagsTy614 WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {} 615 }; 616 617 struct TruncFlagsTy { 618 char HasNUW : 1; 619 char HasNSW : 1; 620 TruncFlagsTyTruncFlagsTy621 TruncFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {} 622 }; 623 624 struct DisjointFlagsTy { 625 char IsDisjoint : 1; DisjointFlagsTyDisjointFlagsTy626 DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {} 627 }; 628 629 struct NonNegFlagsTy { 630 char NonNeg : 1; NonNegFlagsTyNonNegFlagsTy631 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {} 632 }; 633 634 private: 635 struct ExactFlagsTy { 636 char IsExact : 1; 637 }; 638 struct FastMathFlagsTy { 639 char AllowReassoc : 1; 640 char NoNaNs : 1; 641 char NoInfs : 1; 642 char NoSignedZeros : 1; 643 char AllowReciprocal : 1; 644 char AllowContract : 1; 645 char ApproxFunc : 1; 646 647 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF); 648 }; 649 650 OperationType OpType; 651 652 union { 653 CmpInst::Predicate CmpPredicate; 654 WrapFlagsTy WrapFlags; 655 TruncFlagsTy TruncFlags; 656 DisjointFlagsTy DisjointFlags; 657 ExactFlagsTy ExactFlags; 658 GEPNoWrapFlags GEPFlags; 659 NonNegFlagsTy NonNegFlags; 660 FastMathFlagsTy FMFs; 661 unsigned AllFlags; 662 }; 663 664 public: VPIRFlags()665 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {} 666 VPIRFlags(Instruction & I)667 VPIRFlags(Instruction &I) { 668 if (auto *Op = dyn_cast<CmpInst>(&I)) { 669 OpType = OperationType::Cmp; 670 CmpPredicate = Op->getPredicate(); 671 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) { 672 OpType = OperationType::DisjointOp; 673 DisjointFlags.IsDisjoint = Op->isDisjoint(); 674 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) { 675 OpType = OperationType::OverflowingBinOp; 676 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()}; 677 } else if (auto *Op = dyn_cast<TruncInst>(&I)) { 678 OpType = OperationType::Trunc; 679 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()}; 680 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) { 681 OpType = OperationType::PossiblyExactOp; 682 ExactFlags.IsExact = Op->isExact(); 683 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { 684 OpType = OperationType::GEPOp; 685 GEPFlags = GEP->getNoWrapFlags(); 686 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) { 687 OpType = OperationType::NonNegOp; 688 NonNegFlags.NonNeg = PNNI->hasNonNeg(); 689 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) { 690 OpType = OperationType::FPMathOp; 691 FMFs = Op->getFastMathFlags(); 692 } else { 693 OpType = OperationType::Other; 694 AllFlags = 0; 695 } 696 } 697 VPIRFlags(CmpInst::Predicate Pred)698 VPIRFlags(CmpInst::Predicate Pred) 699 : OpType(OperationType::Cmp), CmpPredicate(Pred) {} 700 VPIRFlags(WrapFlagsTy WrapFlags)701 VPIRFlags(WrapFlagsTy WrapFlags) 702 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {} 703 VPIRFlags(FastMathFlags FMFs)704 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {} 705 VPIRFlags(DisjointFlagsTy DisjointFlags)706 VPIRFlags(DisjointFlagsTy DisjointFlags) 707 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {} 708 VPIRFlags(NonNegFlagsTy NonNegFlags)709 VPIRFlags(NonNegFlagsTy NonNegFlags) 710 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {} 711 VPIRFlags(GEPNoWrapFlags GEPFlags)712 VPIRFlags(GEPNoWrapFlags GEPFlags) 713 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {} 714 715 public: transferFlags(VPIRFlags & Other)716 void transferFlags(VPIRFlags &Other) { 717 OpType = Other.OpType; 718 AllFlags = Other.AllFlags; 719 } 720 721 /// Drop all poison-generating flags. dropPoisonGeneratingFlags()722 void dropPoisonGeneratingFlags() { 723 // NOTE: This needs to be kept in-sync with 724 // Instruction::dropPoisonGeneratingFlags. 725 switch (OpType) { 726 case OperationType::OverflowingBinOp: 727 WrapFlags.HasNUW = false; 728 WrapFlags.HasNSW = false; 729 break; 730 case OperationType::Trunc: 731 TruncFlags.HasNUW = false; 732 TruncFlags.HasNSW = false; 733 break; 734 case OperationType::DisjointOp: 735 DisjointFlags.IsDisjoint = false; 736 break; 737 case OperationType::PossiblyExactOp: 738 ExactFlags.IsExact = false; 739 break; 740 case OperationType::GEPOp: 741 GEPFlags = GEPNoWrapFlags::none(); 742 break; 743 case OperationType::FPMathOp: 744 FMFs.NoNaNs = false; 745 FMFs.NoInfs = false; 746 break; 747 case OperationType::NonNegOp: 748 NonNegFlags.NonNeg = false; 749 break; 750 case OperationType::Cmp: 751 case OperationType::Other: 752 break; 753 } 754 } 755 756 /// Apply the IR flags to \p I. applyFlags(Instruction & I)757 void applyFlags(Instruction &I) const { 758 switch (OpType) { 759 case OperationType::OverflowingBinOp: 760 I.setHasNoUnsignedWrap(WrapFlags.HasNUW); 761 I.setHasNoSignedWrap(WrapFlags.HasNSW); 762 break; 763 case OperationType::Trunc: 764 I.setHasNoUnsignedWrap(TruncFlags.HasNUW); 765 I.setHasNoSignedWrap(TruncFlags.HasNSW); 766 break; 767 case OperationType::DisjointOp: 768 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint); 769 break; 770 case OperationType::PossiblyExactOp: 771 I.setIsExact(ExactFlags.IsExact); 772 break; 773 case OperationType::GEPOp: 774 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags); 775 break; 776 case OperationType::FPMathOp: 777 I.setHasAllowReassoc(FMFs.AllowReassoc); 778 I.setHasNoNaNs(FMFs.NoNaNs); 779 I.setHasNoInfs(FMFs.NoInfs); 780 I.setHasNoSignedZeros(FMFs.NoSignedZeros); 781 I.setHasAllowReciprocal(FMFs.AllowReciprocal); 782 I.setHasAllowContract(FMFs.AllowContract); 783 I.setHasApproxFunc(FMFs.ApproxFunc); 784 break; 785 case OperationType::NonNegOp: 786 I.setNonNeg(NonNegFlags.NonNeg); 787 break; 788 case OperationType::Cmp: 789 case OperationType::Other: 790 break; 791 } 792 } 793 getPredicate()794 CmpInst::Predicate getPredicate() const { 795 assert(OpType == OperationType::Cmp && 796 "recipe doesn't have a compare predicate"); 797 return CmpPredicate; 798 } 799 setPredicate(CmpInst::Predicate Pred)800 void setPredicate(CmpInst::Predicate Pred) { 801 assert(OpType == OperationType::Cmp && 802 "recipe doesn't have a compare predicate"); 803 CmpPredicate = Pred; 804 } 805 getGEPNoWrapFlags()806 GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; } 807 808 /// Returns true if the recipe has fast-math flags. hasFastMathFlags()809 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; } 810 811 LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const; 812 813 /// Returns true if the recipe has non-negative flag. hasNonNegFlag()814 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; } 815 isNonNeg()816 bool isNonNeg() const { 817 assert(OpType == OperationType::NonNegOp && 818 "recipe doesn't have a NNEG flag"); 819 return NonNegFlags.NonNeg; 820 } 821 hasNoUnsignedWrap()822 bool hasNoUnsignedWrap() const { 823 switch (OpType) { 824 case OperationType::OverflowingBinOp: 825 return WrapFlags.HasNUW; 826 case OperationType::Trunc: 827 return TruncFlags.HasNUW; 828 default: 829 llvm_unreachable("recipe doesn't have a NUW flag"); 830 } 831 } 832 hasNoSignedWrap()833 bool hasNoSignedWrap() const { 834 switch (OpType) { 835 case OperationType::OverflowingBinOp: 836 return WrapFlags.HasNSW; 837 case OperationType::Trunc: 838 return TruncFlags.HasNSW; 839 default: 840 llvm_unreachable("recipe doesn't have a NSW flag"); 841 } 842 } 843 isDisjoint()844 bool isDisjoint() const { 845 assert(OpType == OperationType::DisjointOp && 846 "recipe cannot have a disjoing flag"); 847 return DisjointFlags.IsDisjoint; 848 } 849 850 #if !defined(NDEBUG) 851 /// Returns true if the set flags are valid for \p Opcode. 852 bool flagsValidForOpcode(unsigned Opcode) const; 853 #endif 854 855 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 856 void printFlags(raw_ostream &O) const; 857 #endif 858 }; 859 860 /// A pure-virtual common base class for recipes defining a single VPValue and 861 /// using IR flags. 862 struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { 863 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, 864 DebugLoc DL = {}) VPSingleDefRecipeVPRecipeWithIRFlags865 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {} 866 VPRecipeWithIRFlagsVPRecipeWithIRFlags867 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, 868 Instruction &I) 869 : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {} 870 871 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, 872 const VPIRFlags &Flags, DebugLoc DL = {}) VPSingleDefRecipeVPRecipeWithIRFlags873 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {} 874 classofVPRecipeWithIRFlags875 static inline bool classof(const VPRecipeBase *R) { 876 return R->getVPDefID() == VPRecipeBase::VPInstructionSC || 877 R->getVPDefID() == VPRecipeBase::VPWidenSC || 878 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || 879 R->getVPDefID() == VPRecipeBase::VPWidenCallSC || 880 R->getVPDefID() == VPRecipeBase::VPWidenCastSC || 881 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC || 882 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC || 883 R->getVPDefID() == VPRecipeBase::VPReductionSC || 884 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC || 885 R->getVPDefID() == VPRecipeBase::VPReplicateSC || 886 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC || 887 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC; 888 } 889 classofVPRecipeWithIRFlags890 static inline bool classof(const VPUser *U) { 891 auto *R = dyn_cast<VPRecipeBase>(U); 892 return R && classof(R); 893 } 894 classofVPRecipeWithIRFlags895 static inline bool classof(const VPValue *V) { 896 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe()); 897 return R && classof(R); 898 } 899 900 void execute(VPTransformState &State) override = 0; 901 }; 902 903 /// Helper to access the operand that contains the unroll part for this recipe 904 /// after unrolling. 905 template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor { 906 protected: 907 /// Return the VPValue operand containing the unroll part or null if there is 908 /// no such operand. 909 VPValue *getUnrollPartOperand(VPUser &U) const; 910 911 /// Return the unroll part. 912 unsigned getUnrollPart(VPUser &U) const; 913 }; 914 915 /// Helper to manage IR metadata for recipes. It filters out metadata that 916 /// cannot be propagated. 917 class VPIRMetadata { 918 SmallVector<std::pair<unsigned, MDNode *>> Metadata; 919 920 public: VPIRMetadata()921 VPIRMetadata() {} 922 923 /// Adds metatadata that can be preserved from the original instruction 924 /// \p I. VPIRMetadata(Instruction & I)925 VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); } 926 927 /// Adds metatadata that can be preserved from the original instruction 928 /// \p I and noalias metadata guaranteed by runtime checks using \p LVer. 929 VPIRMetadata(Instruction &I, LoopVersioning *LVer); 930 931 /// Copy constructor for cloning. VPIRMetadata(const VPIRMetadata & Other)932 VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {} 933 934 /// Add all metadata to \p I. 935 void applyMetadata(Instruction &I) const; 936 937 /// Add metadata with kind \p Kind and \p Node. addMetadata(unsigned Kind,MDNode * Node)938 void addMetadata(unsigned Kind, MDNode *Node) { 939 Metadata.emplace_back(Kind, Node); 940 } 941 }; 942 943 /// This is a concrete Recipe that models a single VPlan-level instruction. 944 /// While as any Recipe it may generate a sequence of IR instructions when 945 /// executed, these instructions would always form a single-def expression as 946 /// the VPInstruction is also a single def-use vertex. 947 class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, 948 public VPIRMetadata, 949 public VPUnrollPartAccessor<1> { 950 friend class VPlanSlp; 951 952 public: 953 /// VPlan opcodes, extending LLVM IR with idiomatics instructions. 954 enum { 955 FirstOrderRecurrenceSplice = 956 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous 957 // values of a first-order recurrence. 958 Not, 959 SLPLoad, 960 SLPStore, 961 ActiveLaneMask, 962 ExplicitVectorLength, 963 CalculateTripCountMinusVF, 964 // Increment the canonical IV separately for each unrolled part. 965 CanonicalIVIncrementForPart, 966 BranchOnCount, 967 BranchOnCond, 968 Broadcast, 969 /// Given operands of (the same) struct type, creates a struct of fixed- 970 /// width vectors each containing a struct field of all operands. The 971 /// number of operands matches the element count of every vector. 972 BuildStructVector, 973 /// Creates a fixed-width vector containing all operands. The number of 974 /// operands matches the vector element count. 975 BuildVector, 976 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y), 977 /// where one of (x,y) is loop invariant, and both x and y are integer type. 978 ComputeAnyOfResult, 979 ComputeFindIVResult, 980 ComputeReductionResult, 981 // Extracts the last lane from its operand if it is a vector, or the last 982 // part if scalar. In the latter case, the recipe will be removed during 983 // unrolling. 984 ExtractLastElement, 985 // Extracts the second-to-last lane from its operand or the second-to-last 986 // part if it is scalar. In the latter case, the recipe will be removed 987 // during unrolling. 988 ExtractPenultimateElement, 989 LogicalAnd, // Non-poison propagating logical And. 990 // Add an offset in bytes (second operand) to a base pointer (first 991 // operand). Only generates scalar values (either for the first lane only or 992 // for all lanes, depending on its uses). 993 PtrAdd, 994 // Returns a scalar boolean value, which is true if any lane of its 995 // (boolean) vector operands is true. It produces the reduced value across 996 // all unrolled iterations. Unrolling will add all copies of its original 997 // operand as additional operands. 998 AnyOf, 999 // Calculates the first active lane index of the vector predicate operands. 1000 // It produces the lane index across all unrolled iterations. Unrolling will 1001 // add all copies of its original operand as additional operands. 1002 FirstActiveLane, 1003 1004 // The opcodes below are used for VPInstructionWithType. 1005 // 1006 /// Scale the first operand (vector step) by the second operand 1007 /// (scalar-step). Casts both operands to the result type if needed. 1008 WideIVStep, 1009 /// Start vector for reductions with 3 operands: the original start value, 1010 /// the identity value for the reduction and an integer indicating the 1011 /// scaling factor. 1012 ReductionStartVector, 1013 // Creates a step vector starting from 0 to VF with a step of 1. 1014 StepVector, 1015 1016 }; 1017 1018 private: 1019 typedef unsigned char OpcodeTy; 1020 OpcodeTy Opcode; 1021 1022 /// An optional name that can be used for the generated IR instruction. 1023 const std::string Name; 1024 1025 /// Returns true if this VPInstruction generates scalar values for all lanes. 1026 /// Most VPInstructions generate a single value per part, either vector or 1027 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar) 1028 /// values per all lanes, stemming from an original ingredient. This method 1029 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an 1030 /// underlying ingredient. 1031 bool doesGeneratePerAllLanes() const; 1032 1033 /// Returns true if we can generate a scalar for the first lane only if 1034 /// needed. 1035 bool canGenerateScalarForFirstLane() const; 1036 1037 /// Utility methods serving execute(): generates a single vector instance of 1038 /// the modeled instruction. \returns the generated value. . In some cases an 1039 /// existing value is returned rather than a generated one. 1040 Value *generate(VPTransformState &State); 1041 1042 /// Utility methods serving execute(): generates a scalar single instance of 1043 /// the modeled instruction for a given lane. \returns the scalar generated 1044 /// value for lane \p Lane. 1045 Value *generatePerLane(VPTransformState &State, const VPLane &Lane); 1046 1047 #if !defined(NDEBUG) 1048 /// Return the number of operands determined by the opcode of the 1049 /// VPInstruction. Returns -1u if the number of operands cannot be determined 1050 /// directly by the opcode. 1051 static unsigned getNumOperandsForOpcode(unsigned Opcode); 1052 #endif 1053 1054 public: 1055 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL = {}, 1056 const Twine &Name = "") VPRecipeWithIRFlags(VPDef::VPInstructionSC,Operands,DL)1057 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL), 1058 VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {} 1059 1060 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, 1061 const VPIRFlags &Flags, DebugLoc DL = {}, 1062 const Twine &Name = ""); 1063 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)1064 VP_CLASSOF_IMPL(VPDef::VPInstructionSC) 1065 1066 VPInstruction *clone() override { 1067 SmallVector<VPValue *, 2> Operands(operands()); 1068 auto *New = new VPInstruction(Opcode, Operands, *this, getDebugLoc(), Name); 1069 if (getUnderlyingValue()) 1070 New->setUnderlyingValue(getUnderlyingInstr()); 1071 return New; 1072 } 1073 getOpcode()1074 unsigned getOpcode() const { return Opcode; } 1075 1076 /// Generate the instruction. 1077 /// TODO: We currently execute only per-part unless a specific instance is 1078 /// provided. 1079 void execute(VPTransformState &State) override; 1080 1081 /// Return the cost of this VPInstruction. 1082 InstructionCost computeCost(ElementCount VF, 1083 VPCostContext &Ctx) const override; 1084 1085 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1086 /// Print the VPInstruction to \p O. 1087 void print(raw_ostream &O, const Twine &Indent, 1088 VPSlotTracker &SlotTracker) const override; 1089 1090 /// Print the VPInstruction to dbgs() (for debugging). 1091 LLVM_DUMP_METHOD void dump() const; 1092 #endif 1093 hasResult()1094 bool hasResult() const { 1095 // CallInst may or may not have a result, depending on the called function. 1096 // Conservatively return calls have results for now. 1097 switch (getOpcode()) { 1098 case Instruction::Ret: 1099 case Instruction::Br: 1100 case Instruction::Store: 1101 case Instruction::Switch: 1102 case Instruction::IndirectBr: 1103 case Instruction::Resume: 1104 case Instruction::CatchRet: 1105 case Instruction::Unreachable: 1106 case Instruction::Fence: 1107 case Instruction::AtomicRMW: 1108 case VPInstruction::BranchOnCond: 1109 case VPInstruction::BranchOnCount: 1110 return false; 1111 default: 1112 return true; 1113 } 1114 } 1115 1116 /// Returns true if the underlying opcode may read from or write to memory. 1117 bool opcodeMayReadOrWriteFromMemory() const; 1118 1119 /// Returns true if the recipe only uses the first lane of operand \p Op. 1120 bool onlyFirstLaneUsed(const VPValue *Op) const override; 1121 1122 /// Returns true if the recipe only uses the first part of operand \p Op. 1123 bool onlyFirstPartUsed(const VPValue *Op) const override; 1124 1125 /// Returns true if this VPInstruction produces a scalar value from a vector, 1126 /// e.g. by performing a reduction or extracting a lane. 1127 bool isVectorToScalar() const; 1128 1129 /// Returns true if this VPInstruction's operands are single scalars and the 1130 /// result is also a single scalar. 1131 bool isSingleScalar() const; 1132 1133 /// Returns the symbolic name assigned to the VPInstruction. getName()1134 StringRef getName() const { return Name; } 1135 }; 1136 1137 /// A specialization of VPInstruction augmenting it with a dedicated result 1138 /// type, to be used when the opcode and operands of the VPInstruction don't 1139 /// directly determine the result type. Note that there is no separate VPDef ID 1140 /// for VPInstructionWithType; it shares the same ID as VPInstruction and is 1141 /// distinguished purely by the opcode. 1142 class VPInstructionWithType : public VPInstruction { 1143 /// Scalar result type produced by the recipe. 1144 Type *ResultTy; 1145 1146 public: 1147 VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands, 1148 Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL, 1149 const Twine &Name = "") VPInstruction(Opcode,Operands,Flags,DL,Name)1150 : VPInstruction(Opcode, Operands, Flags, DL, Name), ResultTy(ResultTy) {} 1151 classof(const VPRecipeBase * R)1152 static inline bool classof(const VPRecipeBase *R) { 1153 // VPInstructionWithType are VPInstructions with specific opcodes requiring 1154 // type information. 1155 if (R->isScalarCast()) 1156 return true; 1157 auto *VPI = dyn_cast<VPInstruction>(R); 1158 if (!VPI) 1159 return false; 1160 switch (VPI->getOpcode()) { 1161 case VPInstruction::WideIVStep: 1162 case VPInstruction::StepVector: 1163 return true; 1164 default: 1165 return false; 1166 } 1167 } 1168 classof(const VPUser * R)1169 static inline bool classof(const VPUser *R) { 1170 return isa<VPInstructionWithType>(cast<VPRecipeBase>(R)); 1171 } 1172 clone()1173 VPInstruction *clone() override { 1174 SmallVector<VPValue *, 2> Operands(operands()); 1175 auto *New = 1176 new VPInstructionWithType(getOpcode(), Operands, getResultType(), *this, 1177 getDebugLoc(), getName()); 1178 New->setUnderlyingValue(getUnderlyingValue()); 1179 return New; 1180 } 1181 1182 void execute(VPTransformState &State) override; 1183 1184 /// Return the cost of this VPInstruction. computeCost(ElementCount VF,VPCostContext & Ctx)1185 InstructionCost computeCost(ElementCount VF, 1186 VPCostContext &Ctx) const override { 1187 // TODO: Compute accurate cost after retiring the legacy cost model. 1188 return 0; 1189 } 1190 getResultType()1191 Type *getResultType() const { return ResultTy; } 1192 1193 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1194 /// Print the recipe. 1195 void print(raw_ostream &O, const Twine &Indent, 1196 VPSlotTracker &SlotTracker) const override; 1197 #endif 1198 }; 1199 1200 /// Helper type to provide functions to access incoming values and blocks for 1201 /// phi-like recipes. 1202 class VPPhiAccessors { 1203 protected: 1204 /// Return a VPRecipeBase* to the current object. 1205 virtual const VPRecipeBase *getAsRecipe() const = 0; 1206 1207 public: 1208 virtual ~VPPhiAccessors() = default; 1209 1210 /// Returns the incoming VPValue with index \p Idx. getIncomingValue(unsigned Idx)1211 VPValue *getIncomingValue(unsigned Idx) const { 1212 return getAsRecipe()->getOperand(Idx); 1213 } 1214 1215 /// Returns the incoming block with index \p Idx. 1216 const VPBasicBlock *getIncomingBlock(unsigned Idx) const; 1217 1218 /// Returns the number of incoming values, also number of incoming blocks. getNumIncoming()1219 virtual unsigned getNumIncoming() const { 1220 return getAsRecipe()->getNumOperands(); 1221 } 1222 1223 /// Removes the incoming value for \p IncomingBlock, which must be a 1224 /// predecessor. 1225 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const; 1226 1227 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1228 /// Print the recipe. 1229 void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const; 1230 #endif 1231 }; 1232 1233 struct LLVM_ABI_FOR_TEST VPPhi : public VPInstruction, public VPPhiAccessors { 1234 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "") VPInstructionVPPhi1235 : VPInstruction(Instruction::PHI, Operands, DL, Name) {} 1236 classofVPPhi1237 static inline bool classof(const VPUser *U) { 1238 auto *R = dyn_cast<VPInstruction>(U); 1239 return R && R->getOpcode() == Instruction::PHI; 1240 } 1241 cloneVPPhi1242 VPPhi *clone() override { 1243 return new VPPhi(operands(), getDebugLoc(), getName()); 1244 } 1245 1246 void execute(VPTransformState &State) override; 1247 1248 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1249 /// Print the recipe. 1250 void print(raw_ostream &O, const Twine &Indent, 1251 VPSlotTracker &SlotTracker) const override; 1252 #endif 1253 1254 protected: getAsRecipeVPPhi1255 const VPRecipeBase *getAsRecipe() const override { return this; } 1256 }; 1257 1258 /// A recipe to wrap on original IR instruction not to be modified during 1259 /// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass. 1260 /// Expect PHIs, VPIRInstructions cannot have any operands. 1261 class VPIRInstruction : public VPRecipeBase { 1262 Instruction &I; 1263 1264 protected: 1265 /// VPIRInstruction::create() should be used to create VPIRInstructions, as 1266 /// subclasses may need to be created, e.g. VPIRPhi. VPIRInstruction(Instruction & I)1267 VPIRInstruction(Instruction &I) 1268 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {} 1269 1270 public: 1271 ~VPIRInstruction() override = default; 1272 1273 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a 1274 /// VPIRInstruction. 1275 static VPIRInstruction *create(Instruction &I); 1276 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)1277 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC) 1278 1279 VPIRInstruction *clone() override { 1280 auto *R = create(I); 1281 for (auto *Op : operands()) 1282 R->addOperand(Op); 1283 return R; 1284 } 1285 1286 void execute(VPTransformState &State) override; 1287 1288 /// Return the cost of this VPIRInstruction. 1289 InstructionCost computeCost(ElementCount VF, 1290 VPCostContext &Ctx) const override; 1291 getInstruction()1292 Instruction &getInstruction() const { return I; } 1293 1294 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1295 /// Print the recipe. 1296 void print(raw_ostream &O, const Twine &Indent, 1297 VPSlotTracker &SlotTracker) const override; 1298 #endif 1299 usesScalars(const VPValue * Op)1300 bool usesScalars(const VPValue *Op) const override { 1301 assert(is_contained(operands(), Op) && 1302 "Op must be an operand of the recipe"); 1303 return true; 1304 } 1305 onlyFirstPartUsed(const VPValue * Op)1306 bool onlyFirstPartUsed(const VPValue *Op) const override { 1307 assert(is_contained(operands(), Op) && 1308 "Op must be an operand of the recipe"); 1309 return true; 1310 } 1311 onlyFirstLaneUsed(const VPValue * Op)1312 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1313 assert(is_contained(operands(), Op) && 1314 "Op must be an operand of the recipe"); 1315 return true; 1316 } 1317 1318 /// Update the recipes first operand to the last lane of the operand using \p 1319 /// Builder. Must only be used for VPIRInstructions with at least one operand 1320 /// wrapping a PHINode. 1321 void extractLastLaneOfFirstOperand(VPBuilder &Builder); 1322 }; 1323 1324 /// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use 1325 /// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is 1326 /// allowed, and it is used to add a new incoming value for the single 1327 /// predecessor VPBB. 1328 struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors { VPIRPhiVPIRPhi1329 VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {} 1330 classofVPIRPhi1331 static inline bool classof(const VPRecipeBase *U) { 1332 auto *R = dyn_cast<VPIRInstruction>(U); 1333 return R && isa<PHINode>(R->getInstruction()); 1334 } 1335 getIRPhiVPIRPhi1336 PHINode &getIRPhi() { return cast<PHINode>(getInstruction()); } 1337 1338 void execute(VPTransformState &State) override; 1339 1340 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1341 /// Print the recipe. 1342 void print(raw_ostream &O, const Twine &Indent, 1343 VPSlotTracker &SlotTracker) const override; 1344 #endif 1345 1346 protected: getAsRecipeVPIRPhi1347 const VPRecipeBase *getAsRecipe() const override { return this; } 1348 }; 1349 1350 /// VPWidenRecipe is a recipe for producing a widened instruction using the 1351 /// opcode and operands of the recipe. This recipe covers most of the 1352 /// traditional vectorization cases where each recipe transforms into a 1353 /// vectorized version of itself. 1354 class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags, 1355 public VPIRMetadata { 1356 unsigned Opcode; 1357 1358 public: VPWidenRecipe(unsigned Opcode,ArrayRef<VPValue * > Operands,const VPIRFlags & Flags,DebugLoc DL)1359 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands, 1360 const VPIRFlags &Flags, DebugLoc DL) 1361 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL), 1362 Opcode(Opcode) {} 1363 VPWidenRecipe(Instruction & I,ArrayRef<VPValue * > Operands)1364 VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands) 1365 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I), 1366 Opcode(I.getOpcode()) {} 1367 1368 ~VPWidenRecipe() override = default; 1369 clone()1370 VPWidenRecipe *clone() override { 1371 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands()); 1372 R->transferFlags(*this); 1373 return R; 1374 } 1375 1376 VP_CLASSOF_IMPL(VPDef::VPWidenSC) 1377 1378 /// Produce a widened instruction using the opcode and operands of the recipe, 1379 /// processing State.VF elements. 1380 void execute(VPTransformState &State) override; 1381 1382 /// Return the cost of this VPWidenRecipe. 1383 InstructionCost computeCost(ElementCount VF, 1384 VPCostContext &Ctx) const override; 1385 getOpcode()1386 unsigned getOpcode() const { return Opcode; } 1387 1388 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1389 /// Print the recipe. 1390 void print(raw_ostream &O, const Twine &Indent, 1391 VPSlotTracker &SlotTracker) const override; 1392 #endif 1393 }; 1394 1395 /// VPWidenCastRecipe is a recipe to create vector cast instructions. 1396 class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { 1397 /// Cast instruction opcode. 1398 Instruction::CastOps Opcode; 1399 1400 /// Result type for the cast. 1401 Type *ResultTy; 1402 1403 public: VPWidenCastRecipe(Instruction::CastOps Opcode,VPValue * Op,Type * ResultTy,CastInst & UI)1404 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, 1405 CastInst &UI) 1406 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPIRMetadata(UI), 1407 Opcode(Opcode), ResultTy(ResultTy) { 1408 assert(UI.getOpcode() == Opcode && 1409 "opcode of underlying cast doesn't match"); 1410 } 1411 1412 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, 1413 const VPIRFlags &Flags = {}, DebugLoc DL = {}) VPRecipeWithIRFlags(VPDef::VPWidenCastSC,Op,Flags,DL)1414 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL), 1415 VPIRMetadata(), Opcode(Opcode), ResultTy(ResultTy) { 1416 assert(flagsValidForOpcode(Opcode) && 1417 "Set flags not supported for the provided opcode"); 1418 } 1419 1420 ~VPWidenCastRecipe() override = default; 1421 clone()1422 VPWidenCastRecipe *clone() override { 1423 if (auto *UV = getUnderlyingValue()) 1424 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, 1425 *cast<CastInst>(UV)); 1426 1427 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy); 1428 } 1429 1430 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC) 1431 1432 /// Produce widened copies of the cast. 1433 void execute(VPTransformState &State) override; 1434 1435 /// Return the cost of this VPWidenCastRecipe. 1436 InstructionCost computeCost(ElementCount VF, 1437 VPCostContext &Ctx) const override; 1438 1439 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1440 /// Print the recipe. 1441 void print(raw_ostream &O, const Twine &Indent, 1442 VPSlotTracker &SlotTracker) const override; 1443 #endif 1444 getOpcode()1445 Instruction::CastOps getOpcode() const { return Opcode; } 1446 1447 /// Returns the result type of the cast. getResultType()1448 Type *getResultType() const { return ResultTy; } 1449 }; 1450 1451 /// A recipe for widening vector intrinsics. 1452 class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { 1453 /// ID of the vector intrinsic to widen. 1454 Intrinsic::ID VectorIntrinsicID; 1455 1456 /// Scalar return type of the intrinsic. 1457 Type *ResultTy; 1458 1459 /// True if the intrinsic may read from memory. 1460 bool MayReadFromMemory; 1461 1462 /// True if the intrinsic may read write to memory. 1463 bool MayWriteToMemory; 1464 1465 /// True if the intrinsic may have side-effects. 1466 bool MayHaveSideEffects; 1467 1468 public: 1469 VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, 1470 ArrayRef<VPValue *> CallArguments, Type *Ty, 1471 DebugLoc DL = {}) VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC,CallArguments,CI)1472 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI), 1473 VPIRMetadata(CI), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), 1474 MayReadFromMemory(CI.mayReadFromMemory()), 1475 MayWriteToMemory(CI.mayWriteToMemory()), 1476 MayHaveSideEffects(CI.mayHaveSideEffects()) {} 1477 1478 VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, 1479 ArrayRef<VPValue *> CallArguments, Type *Ty, 1480 DebugLoc DL = {}) VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC,CallArguments,DL)1481 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL), 1482 VPIRMetadata(), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) { 1483 LLVMContext &Ctx = Ty->getContext(); 1484 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID); 1485 MemoryEffects ME = Attrs.getMemoryEffects(); 1486 MayReadFromMemory = !ME.onlyWritesMemory(); 1487 MayWriteToMemory = !ME.onlyReadsMemory(); 1488 MayHaveSideEffects = MayWriteToMemory || 1489 !Attrs.hasAttribute(Attribute::NoUnwind) || 1490 !Attrs.hasAttribute(Attribute::WillReturn); 1491 } 1492 1493 ~VPWidenIntrinsicRecipe() override = default; 1494 clone()1495 VPWidenIntrinsicRecipe *clone() override { 1496 if (Value *CI = getUnderlyingValue()) 1497 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID, 1498 operands(), ResultTy, getDebugLoc()); 1499 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy, 1500 getDebugLoc()); 1501 } 1502 1503 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC) 1504 1505 /// Produce a widened version of the vector intrinsic. 1506 void execute(VPTransformState &State) override; 1507 1508 /// Return the cost of this vector intrinsic. 1509 InstructionCost computeCost(ElementCount VF, 1510 VPCostContext &Ctx) const override; 1511 1512 /// Return the ID of the intrinsic. getVectorIntrinsicID()1513 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; } 1514 1515 /// Return the scalar return type of the intrinsic. getResultType()1516 Type *getResultType() const { return ResultTy; } 1517 1518 /// Return to name of the intrinsic as string. 1519 StringRef getIntrinsicName() const; 1520 1521 /// Returns true if the intrinsic may read from memory. mayReadFromMemory()1522 bool mayReadFromMemory() const { return MayReadFromMemory; } 1523 1524 /// Returns true if the intrinsic may write to memory. mayWriteToMemory()1525 bool mayWriteToMemory() const { return MayWriteToMemory; } 1526 1527 /// Returns true if the intrinsic may have side-effects. mayHaveSideEffects()1528 bool mayHaveSideEffects() const { return MayHaveSideEffects; } 1529 1530 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1531 /// Print the recipe. 1532 void print(raw_ostream &O, const Twine &Indent, 1533 VPSlotTracker &SlotTracker) const override; 1534 #endif 1535 1536 bool onlyFirstLaneUsed(const VPValue *Op) const override; 1537 }; 1538 1539 /// A recipe for widening Call instructions using library calls. 1540 class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags, 1541 public VPIRMetadata { 1542 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping 1543 /// between a given VF and the chosen vectorized variant, so there will be a 1544 /// different VPlan for each VF with a valid variant. 1545 Function *Variant; 1546 1547 public: 1548 VPWidenCallRecipe(Value *UV, Function *Variant, 1549 ArrayRef<VPValue *> CallArguments, DebugLoc DL = {}) 1550 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, 1551 *cast<Instruction>(UV)), 1552 VPIRMetadata(*cast<Instruction>(UV)), Variant(Variant) { 1553 assert( 1554 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) && 1555 "last operand must be the called function"); 1556 } 1557 1558 ~VPWidenCallRecipe() override = default; 1559 clone()1560 VPWidenCallRecipe *clone() override { 1561 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(), 1562 getDebugLoc()); 1563 } 1564 1565 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC) 1566 1567 /// Produce a widened version of the call instruction. 1568 void execute(VPTransformState &State) override; 1569 1570 /// Return the cost of this VPWidenCallRecipe. 1571 InstructionCost computeCost(ElementCount VF, 1572 VPCostContext &Ctx) const override; 1573 getCalledScalarFunction()1574 Function *getCalledScalarFunction() const { 1575 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()); 1576 } 1577 args()1578 operand_range args() { return make_range(op_begin(), std::prev(op_end())); } args()1579 const_operand_range args() const { 1580 return make_range(op_begin(), std::prev(op_end())); 1581 } 1582 1583 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1584 /// Print the recipe. 1585 void print(raw_ostream &O, const Twine &Indent, 1586 VPSlotTracker &SlotTracker) const override; 1587 #endif 1588 }; 1589 1590 /// A recipe representing a sequence of load -> update -> store as part of 1591 /// a histogram operation. This means there may be aliasing between vector 1592 /// lanes, which is handled by the llvm.experimental.vector.histogram family 1593 /// of intrinsics. The only update operations currently supported are 1594 /// 'add' and 'sub' where the other term is loop-invariant. 1595 class VPHistogramRecipe : public VPRecipeBase { 1596 /// Opcode of the update operation, currently either add or sub. 1597 unsigned Opcode; 1598 1599 public: 1600 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands, 1601 DebugLoc DL = {}) VPRecipeBase(VPDef::VPHistogramSC,Operands,DL)1602 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {} 1603 1604 ~VPHistogramRecipe() override = default; 1605 clone()1606 VPHistogramRecipe *clone() override { 1607 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc()); 1608 } 1609 1610 VP_CLASSOF_IMPL(VPDef::VPHistogramSC); 1611 1612 /// Produce a vectorized histogram operation. 1613 void execute(VPTransformState &State) override; 1614 1615 /// Return the cost of this VPHistogramRecipe. 1616 InstructionCost computeCost(ElementCount VF, 1617 VPCostContext &Ctx) const override; 1618 getOpcode()1619 unsigned getOpcode() const { return Opcode; } 1620 1621 /// Return the mask operand if one was provided, or a null pointer if all 1622 /// lanes should be executed unconditionally. getMask()1623 VPValue *getMask() const { 1624 return getNumOperands() == 3 ? getOperand(2) : nullptr; 1625 } 1626 1627 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1628 /// Print the recipe 1629 void print(raw_ostream &O, const Twine &Indent, 1630 VPSlotTracker &SlotTracker) const override; 1631 #endif 1632 }; 1633 1634 /// A recipe for widening select instructions. 1635 struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags, 1636 public VPIRMetadata { VPWidenSelectRecipeVPWidenSelectRecipe1637 VPWidenSelectRecipe(SelectInst &I, ArrayRef<VPValue *> Operands) 1638 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I), 1639 VPIRMetadata(I) {} 1640 1641 ~VPWidenSelectRecipe() override = default; 1642 cloneVPWidenSelectRecipe1643 VPWidenSelectRecipe *clone() override { 1644 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()), 1645 operands()); 1646 } 1647 1648 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC) 1649 1650 /// Produce a widened version of the select instruction. 1651 void execute(VPTransformState &State) override; 1652 1653 /// Return the cost of this VPWidenSelectRecipe. 1654 InstructionCost computeCost(ElementCount VF, 1655 VPCostContext &Ctx) const override; 1656 1657 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1658 /// Print the recipe. 1659 void print(raw_ostream &O, const Twine &Indent, 1660 VPSlotTracker &SlotTracker) const override; 1661 #endif 1662 getCondVPWidenSelectRecipe1663 VPValue *getCond() const { 1664 return getOperand(0); 1665 } 1666 isInvariantCondVPWidenSelectRecipe1667 bool isInvariantCond() const { 1668 return getCond()->isDefinedOutsideLoopRegions(); 1669 } 1670 1671 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsedVPWidenSelectRecipe1672 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1673 assert(is_contained(operands(), Op) && 1674 "Op must be an operand of the recipe"); 1675 return Op == getCond() && isInvariantCond(); 1676 } 1677 }; 1678 1679 /// A recipe for handling GEP instructions. 1680 class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { isPointerLoopInvariant()1681 bool isPointerLoopInvariant() const { 1682 return getOperand(0)->isDefinedOutsideLoopRegions(); 1683 } 1684 isIndexLoopInvariant(unsigned I)1685 bool isIndexLoopInvariant(unsigned I) const { 1686 return getOperand(I + 1)->isDefinedOutsideLoopRegions(); 1687 } 1688 areAllOperandsInvariant()1689 bool areAllOperandsInvariant() const { 1690 return all_of(operands(), [](VPValue *Op) { 1691 return Op->isDefinedOutsideLoopRegions(); 1692 }); 1693 } 1694 1695 public: VPWidenGEPRecipe(GetElementPtrInst * GEP,ArrayRef<VPValue * > Operands)1696 VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands) 1697 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) { 1698 SmallVector<std::pair<unsigned, MDNode *>> Metadata; 1699 (void)Metadata; 1700 getMetadataToPropagate(GEP, Metadata); 1701 assert(Metadata.empty() && "unexpected metadata on GEP"); 1702 } 1703 1704 ~VPWidenGEPRecipe() override = default; 1705 clone()1706 VPWidenGEPRecipe *clone() override { 1707 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()), 1708 operands()); 1709 } 1710 1711 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC) 1712 1713 /// Generate the gep nodes. 1714 void execute(VPTransformState &State) override; 1715 1716 /// Return the cost of this VPWidenGEPRecipe. computeCost(ElementCount VF,VPCostContext & Ctx)1717 InstructionCost computeCost(ElementCount VF, 1718 VPCostContext &Ctx) const override { 1719 // TODO: Compute accurate cost after retiring the legacy cost model. 1720 return 0; 1721 } 1722 1723 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1724 /// Print the recipe. 1725 void print(raw_ostream &O, const Twine &Indent, 1726 VPSlotTracker &SlotTracker) const override; 1727 #endif 1728 1729 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsed(const VPValue * Op)1730 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1731 assert(is_contained(operands(), Op) && 1732 "Op must be an operand of the recipe"); 1733 if (Op == getOperand(0)) 1734 return isPointerLoopInvariant(); 1735 else 1736 return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions(); 1737 } 1738 }; 1739 1740 /// A recipe to compute a pointer to the last element of each part of a widened 1741 /// memory access for widened memory accesses of IndexedTy. Used for 1742 /// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. 1743 class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, 1744 public VPUnrollPartAccessor<2> { 1745 Type *IndexedTy; 1746 1747 /// The constant stride of the pointer computed by this recipe, expressed in 1748 /// units of IndexedTy. 1749 int64_t Stride; 1750 1751 public: VPVectorEndPointerRecipe(VPValue * Ptr,VPValue * VF,Type * IndexedTy,int64_t Stride,GEPNoWrapFlags GEPFlags,DebugLoc DL)1752 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, 1753 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL) 1754 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC, 1755 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL), 1756 IndexedTy(IndexedTy), Stride(Stride) { 1757 assert(Stride < 0 && "Stride must be negative"); 1758 } 1759 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)1760 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC) 1761 1762 VPValue *getVFValue() { return getOperand(1); } getVFValue()1763 const VPValue *getVFValue() const { return getOperand(1); } 1764 1765 void execute(VPTransformState &State) override; 1766 onlyFirstLaneUsed(const VPValue * Op)1767 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1768 assert(is_contained(operands(), Op) && 1769 "Op must be an operand of the recipe"); 1770 return true; 1771 } 1772 1773 /// Return the cost of this VPVectorPointerRecipe. computeCost(ElementCount VF,VPCostContext & Ctx)1774 InstructionCost computeCost(ElementCount VF, 1775 VPCostContext &Ctx) const override { 1776 // TODO: Compute accurate cost after retiring the legacy cost model. 1777 return 0; 1778 } 1779 1780 /// Returns true if the recipe only uses the first part of operand \p Op. onlyFirstPartUsed(const VPValue * Op)1781 bool onlyFirstPartUsed(const VPValue *Op) const override { 1782 assert(is_contained(operands(), Op) && 1783 "Op must be an operand of the recipe"); 1784 assert(getNumOperands() <= 2 && "must have at most two operands"); 1785 return true; 1786 } 1787 clone()1788 VPVectorEndPointerRecipe *clone() override { 1789 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy, 1790 Stride, getGEPNoWrapFlags(), 1791 getDebugLoc()); 1792 } 1793 1794 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1795 /// Print the recipe. 1796 void print(raw_ostream &O, const Twine &Indent, 1797 VPSlotTracker &SlotTracker) const override; 1798 #endif 1799 }; 1800 1801 /// A recipe to compute the pointers for widened memory accesses of IndexTy. 1802 class VPVectorPointerRecipe : public VPRecipeWithIRFlags, 1803 public VPUnrollPartAccessor<1> { 1804 Type *IndexedTy; 1805 1806 public: VPVectorPointerRecipe(VPValue * Ptr,Type * IndexedTy,GEPNoWrapFlags GEPFlags,DebugLoc DL)1807 VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags, 1808 DebugLoc DL) 1809 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr), 1810 GEPFlags, DL), 1811 IndexedTy(IndexedTy) {} 1812 1813 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) 1814 1815 void execute(VPTransformState &State) override; 1816 onlyFirstLaneUsed(const VPValue * Op)1817 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1818 assert(is_contained(operands(), Op) && 1819 "Op must be an operand of the recipe"); 1820 return true; 1821 } 1822 1823 /// Returns true if the recipe only uses the first part of operand \p Op. onlyFirstPartUsed(const VPValue * Op)1824 bool onlyFirstPartUsed(const VPValue *Op) const override { 1825 assert(is_contained(operands(), Op) && 1826 "Op must be an operand of the recipe"); 1827 assert(getNumOperands() <= 2 && "must have at most two operands"); 1828 return true; 1829 } 1830 clone()1831 VPVectorPointerRecipe *clone() override { 1832 return new VPVectorPointerRecipe(getOperand(0), IndexedTy, 1833 getGEPNoWrapFlags(), getDebugLoc()); 1834 } 1835 1836 /// Return the cost of this VPHeaderPHIRecipe. computeCost(ElementCount VF,VPCostContext & Ctx)1837 InstructionCost computeCost(ElementCount VF, 1838 VPCostContext &Ctx) const override { 1839 // TODO: Compute accurate cost after retiring the legacy cost model. 1840 return 0; 1841 } 1842 1843 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1844 /// Print the recipe. 1845 void print(raw_ostream &O, const Twine &Indent, 1846 VPSlotTracker &SlotTracker) const override; 1847 #endif 1848 }; 1849 1850 /// A pure virtual base class for all recipes modeling header phis, including 1851 /// phis for first order recurrences, pointer inductions and reductions. The 1852 /// start value is the first operand of the recipe and the incoming value from 1853 /// the backedge is the second operand. 1854 /// 1855 /// Inductions are modeled using the following sub-classes: 1856 /// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop, 1857 /// starting at a specified value (zero for the main vector loop, the resume 1858 /// value for the epilogue vector loop) and stepping by 1. The induction 1859 /// controls exiting of the vector loop by comparing against the vector trip 1860 /// count. Produces a single scalar PHI for the induction value per 1861 /// iteration. 1862 /// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and 1863 /// floating point inductions with arbitrary start and step values. Produces 1864 /// a vector PHI per-part. 1865 /// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding 1866 /// value of an IV with different start and step values. Produces a single 1867 /// scalar value per iteration 1868 /// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a 1869 /// canonical or derived induction. 1870 /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a 1871 /// pointer induction. Produces either a vector PHI per-part or scalar values 1872 /// per-lane based on the canonical induction. 1873 class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe, 1874 public VPPhiAccessors { 1875 protected: 1876 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, 1877 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown()) 1878 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}), 1879 UnderlyingInstr, DL) {} 1880 getAsRecipe()1881 const VPRecipeBase *getAsRecipe() const override { return this; } 1882 1883 public: 1884 ~VPHeaderPHIRecipe() override = default; 1885 1886 /// Method to support type inquiry through isa, cast, and dyn_cast. classof(const VPRecipeBase * B)1887 static inline bool classof(const VPRecipeBase *B) { 1888 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC && 1889 B->getVPDefID() <= VPDef::VPLastHeaderPHISC; 1890 } classof(const VPValue * V)1891 static inline bool classof(const VPValue *V) { 1892 auto *B = V->getDefiningRecipe(); 1893 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC && 1894 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC; 1895 } 1896 1897 /// Generate the phi nodes. 1898 void execute(VPTransformState &State) override = 0; 1899 1900 /// Return the cost of this header phi recipe. 1901 InstructionCost computeCost(ElementCount VF, 1902 VPCostContext &Ctx) const override; 1903 1904 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1905 /// Print the recipe. 1906 void print(raw_ostream &O, const Twine &Indent, 1907 VPSlotTracker &SlotTracker) const override = 0; 1908 #endif 1909 1910 /// Returns the start value of the phi, if one is set. getStartValue()1911 VPValue *getStartValue() { 1912 return getNumOperands() == 0 ? nullptr : getOperand(0); 1913 } getStartValue()1914 VPValue *getStartValue() const { 1915 return getNumOperands() == 0 ? nullptr : getOperand(0); 1916 } 1917 1918 /// Update the start value of the recipe. setStartValue(VPValue * V)1919 void setStartValue(VPValue *V) { setOperand(0, V); } 1920 1921 /// Returns the incoming value from the loop backedge. getBackedgeValue()1922 virtual VPValue *getBackedgeValue() { 1923 return getOperand(1); 1924 } 1925 1926 /// Returns the backedge value as a recipe. The backedge value is guaranteed 1927 /// to be a recipe. getBackedgeRecipe()1928 virtual VPRecipeBase &getBackedgeRecipe() { 1929 return *getBackedgeValue()->getDefiningRecipe(); 1930 } 1931 }; 1932 1933 /// Base class for widened induction (VPWidenIntOrFpInductionRecipe and 1934 /// VPWidenPointerInductionRecipe), providing shared functionality, including 1935 /// retrieving the step value, induction descriptor and original phi node. 1936 class VPWidenInductionRecipe : public VPHeaderPHIRecipe { 1937 const InductionDescriptor &IndDesc; 1938 1939 public: VPWidenInductionRecipe(unsigned char Kind,PHINode * IV,VPValue * Start,VPValue * Step,const InductionDescriptor & IndDesc,DebugLoc DL)1940 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, 1941 VPValue *Step, const InductionDescriptor &IndDesc, 1942 DebugLoc DL) 1943 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) { 1944 addOperand(Step); 1945 } 1946 classof(const VPRecipeBase * R)1947 static inline bool classof(const VPRecipeBase *R) { 1948 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC || 1949 R->getVPDefID() == VPDef::VPWidenPointerInductionSC; 1950 } 1951 classof(const VPValue * V)1952 static inline bool classof(const VPValue *V) { 1953 auto *R = V->getDefiningRecipe(); 1954 return R && classof(R); 1955 } 1956 classof(const VPHeaderPHIRecipe * R)1957 static inline bool classof(const VPHeaderPHIRecipe *R) { 1958 return classof(static_cast<const VPRecipeBase *>(R)); 1959 } 1960 1961 virtual void execute(VPTransformState &State) override = 0; 1962 1963 /// Returns the step value of the induction. getStepValue()1964 VPValue *getStepValue() { return getOperand(1); } getStepValue()1965 const VPValue *getStepValue() const { return getOperand(1); } 1966 1967 /// Update the step value of the recipe. setStepValue(VPValue * V)1968 void setStepValue(VPValue *V) { setOperand(1, V); } 1969 1970 /// Returns the number of incoming values, also number of incoming blocks. 1971 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single 1972 /// incoming value, its start value. getNumIncoming()1973 unsigned getNumIncoming() const override { return 1; } 1974 getPHINode()1975 PHINode *getPHINode() const { return cast<PHINode>(getUnderlyingValue()); } 1976 1977 /// Returns the induction descriptor for the recipe. getInductionDescriptor()1978 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } 1979 getBackedgeValue()1980 VPValue *getBackedgeValue() override { 1981 // TODO: All operands of base recipe must exist and be at same index in 1982 // derived recipe. 1983 llvm_unreachable( 1984 "VPWidenIntOrFpInductionRecipe generates its own backedge value"); 1985 } 1986 getBackedgeRecipe()1987 VPRecipeBase &getBackedgeRecipe() override { 1988 // TODO: All operands of base recipe must exist and be at same index in 1989 // derived recipe. 1990 llvm_unreachable( 1991 "VPWidenIntOrFpInductionRecipe generates its own backedge value"); 1992 } 1993 1994 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsed(const VPValue * Op)1995 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1996 assert(is_contained(operands(), Op) && 1997 "Op must be an operand of the recipe"); 1998 // The recipe creates its own wide start value, so it only requests the 1999 // first lane of the operand. 2000 // TODO: Remove once creating the start value is modeled separately. 2001 return Op == getStartValue() || Op == getStepValue(); 2002 } 2003 }; 2004 2005 /// A recipe for handling phi nodes of integer and floating-point inductions, 2006 /// producing their vector values. This is an abstract recipe and must be 2007 /// converted to concrete recipes before executing. 2008 class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { 2009 TruncInst *Trunc; 2010 2011 // If this recipe is unrolled it will have 2 additional operands. isUnrolled()2012 bool isUnrolled() const { return getNumOperands() == 5; } 2013 2014 public: VPWidenIntOrFpInductionRecipe(PHINode * IV,VPValue * Start,VPValue * Step,VPValue * VF,const InductionDescriptor & IndDesc,DebugLoc DL)2015 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, 2016 VPValue *VF, const InductionDescriptor &IndDesc, 2017 DebugLoc DL) 2018 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, 2019 Step, IndDesc, DL), 2020 Trunc(nullptr) { 2021 addOperand(VF); 2022 } 2023 VPWidenIntOrFpInductionRecipe(PHINode * IV,VPValue * Start,VPValue * Step,VPValue * VF,const InductionDescriptor & IndDesc,TruncInst * Trunc,DebugLoc DL)2024 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, 2025 VPValue *VF, const InductionDescriptor &IndDesc, 2026 TruncInst *Trunc, DebugLoc DL) 2027 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, 2028 Step, IndDesc, DL), 2029 Trunc(Trunc) { 2030 addOperand(VF); 2031 SmallVector<std::pair<unsigned, MDNode *>> Metadata; 2032 (void)Metadata; 2033 if (Trunc) 2034 getMetadataToPropagate(Trunc, Metadata); 2035 assert(Metadata.empty() && "unexpected metadata on Trunc"); 2036 } 2037 2038 ~VPWidenIntOrFpInductionRecipe() override = default; 2039 clone()2040 VPWidenIntOrFpInductionRecipe *clone() override { 2041 return new VPWidenIntOrFpInductionRecipe( 2042 getPHINode(), getStartValue(), getStepValue(), getVFValue(), 2043 getInductionDescriptor(), Trunc, getDebugLoc()); 2044 } 2045 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)2046 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) 2047 2048 void execute(VPTransformState &State) override { 2049 llvm_unreachable("cannot execute this recipe, should be expanded via " 2050 "expandVPWidenIntOrFpInductionRecipe"); 2051 } 2052 2053 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2054 /// Print the recipe. 2055 void print(raw_ostream &O, const Twine &Indent, 2056 VPSlotTracker &SlotTracker) const override; 2057 #endif 2058 getVFValue()2059 VPValue *getVFValue() { return getOperand(2); } getVFValue()2060 const VPValue *getVFValue() const { return getOperand(2); } 2061 getSplatVFValue()2062 VPValue *getSplatVFValue() { 2063 // If the recipe has been unrolled return the VPValue for the induction 2064 // increment. 2065 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr; 2066 } 2067 2068 /// Returns the number of incoming values, also number of incoming blocks. 2069 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single 2070 /// incoming value, its start value. getNumIncoming()2071 unsigned getNumIncoming() const override { return 1; } 2072 2073 /// Returns the first defined value as TruncInst, if it is one or nullptr 2074 /// otherwise. getTruncInst()2075 TruncInst *getTruncInst() { return Trunc; } getTruncInst()2076 const TruncInst *getTruncInst() const { return Trunc; } 2077 2078 /// Returns true if the induction is canonical, i.e. starting at 0 and 2079 /// incremented by UF * VF (= the original IV is incremented by 1) and has the 2080 /// same type as the canonical induction. 2081 bool isCanonical() const; 2082 2083 /// Returns the scalar type of the induction. getScalarType()2084 Type *getScalarType() const { 2085 return Trunc ? Trunc->getType() 2086 : getStartValue()->getLiveInIRValue()->getType(); 2087 } 2088 2089 /// Returns the VPValue representing the value of this induction at 2090 /// the last unrolled part, if it exists. Returns itself if unrolling did not 2091 /// take place. getLastUnrolledPartOperand()2092 VPValue *getLastUnrolledPartOperand() { 2093 return isUnrolled() ? getOperand(getNumOperands() - 1) : this; 2094 } 2095 }; 2096 2097 class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, 2098 public VPUnrollPartAccessor<4> { 2099 bool IsScalarAfterVectorization; 2100 2101 public: 2102 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p 2103 /// Start and the number of elements unrolled \p NumUnrolledElems, typically 2104 /// VF*UF. VPWidenPointerInductionRecipe(PHINode * Phi,VPValue * Start,VPValue * Step,VPValue * NumUnrolledElems,const InductionDescriptor & IndDesc,bool IsScalarAfterVectorization,DebugLoc DL)2105 VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, 2106 VPValue *NumUnrolledElems, 2107 const InductionDescriptor &IndDesc, 2108 bool IsScalarAfterVectorization, DebugLoc DL) 2109 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start, 2110 Step, IndDesc, DL), 2111 IsScalarAfterVectorization(IsScalarAfterVectorization) { 2112 addOperand(NumUnrolledElems); 2113 } 2114 2115 ~VPWidenPointerInductionRecipe() override = default; 2116 clone()2117 VPWidenPointerInductionRecipe *clone() override { 2118 return new VPWidenPointerInductionRecipe( 2119 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1), 2120 getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization, 2121 getDebugLoc()); 2122 } 2123 2124 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC) 2125 2126 /// Generate vector values for the pointer induction. 2127 void execute(VPTransformState &State) override; 2128 2129 /// Returns true if only scalar values will be generated. 2130 bool onlyScalarsGenerated(bool IsScalable); 2131 2132 /// Returns the VPValue representing the value of this induction at 2133 /// the first unrolled part, if it exists. Returns itself if unrolling did not 2134 /// take place. getFirstUnrolledPartOperand()2135 VPValue *getFirstUnrolledPartOperand() { 2136 return getUnrollPart(*this) == 0 ? this : getOperand(3); 2137 } 2138 2139 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2140 /// Print the recipe. 2141 void print(raw_ostream &O, const Twine &Indent, 2142 VPSlotTracker &SlotTracker) const override; 2143 #endif 2144 }; 2145 2146 /// A recipe for widened phis. Incoming values are operands of the recipe and 2147 /// their operand index corresponds to the incoming predecessor block. If the 2148 /// recipe is placed in an entry block to a (non-replicate) region, it must have 2149 /// exactly 2 incoming values, the first from the predecessor of the region and 2150 /// the second from the exiting block of the region. 2151 class LLVM_ABI_FOR_TEST VPWidenPHIRecipe : public VPSingleDefRecipe, 2152 public VPPhiAccessors { 2153 /// Name to use for the generated IR instruction for the widened phi. 2154 std::string Name; 2155 2156 protected: getAsRecipe()2157 const VPRecipeBase *getAsRecipe() const override { return this; } 2158 2159 public: 2160 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and 2161 /// debug location \p DL. 2162 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {}, 2163 const Twine &Name = "") VPSingleDefRecipe(VPDef::VPWidenPHISC,ArrayRef<VPValue * > (),Phi,DL)2164 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL), 2165 Name(Name.str()) { 2166 if (Start) 2167 addOperand(Start); 2168 } 2169 clone()2170 VPWidenPHIRecipe *clone() override { 2171 auto *C = new VPWidenPHIRecipe(cast<PHINode>(getUnderlyingValue()), 2172 getOperand(0), getDebugLoc(), Name); 2173 for (VPValue *Op : llvm::drop_begin(operands())) 2174 C->addOperand(Op); 2175 return C; 2176 } 2177 2178 ~VPWidenPHIRecipe() override = default; 2179 2180 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC) 2181 2182 /// Generate the phi/select nodes. 2183 void execute(VPTransformState &State) override; 2184 2185 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2186 /// Print the recipe. 2187 void print(raw_ostream &O, const Twine &Indent, 2188 VPSlotTracker &SlotTracker) const override; 2189 #endif 2190 }; 2191 2192 /// A recipe for handling first-order recurrence phis. The start value is the 2193 /// first operand of the recipe and the incoming value from the backedge is the 2194 /// second operand. 2195 struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { VPFirstOrderRecurrencePHIRecipeVPFirstOrderRecurrencePHIRecipe2196 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start) 2197 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {} 2198 VP_CLASSOF_IMPLVPFirstOrderRecurrencePHIRecipe2199 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC) 2200 2201 VPFirstOrderRecurrencePHIRecipe *clone() override { 2202 return new VPFirstOrderRecurrencePHIRecipe( 2203 cast<PHINode>(getUnderlyingInstr()), *getOperand(0)); 2204 } 2205 2206 void execute(VPTransformState &State) override; 2207 2208 /// Return the cost of this first-order recurrence phi recipe. 2209 InstructionCost computeCost(ElementCount VF, 2210 VPCostContext &Ctx) const override; 2211 2212 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2213 /// Print the recipe. 2214 void print(raw_ostream &O, const Twine &Indent, 2215 VPSlotTracker &SlotTracker) const override; 2216 #endif 2217 2218 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsedVPFirstOrderRecurrencePHIRecipe2219 bool onlyFirstLaneUsed(const VPValue *Op) const override { 2220 assert(is_contained(operands(), Op) && 2221 "Op must be an operand of the recipe"); 2222 return Op == getStartValue(); 2223 } 2224 }; 2225 2226 /// A recipe for handling reduction phis. The start value is the first operand 2227 /// of the recipe and the incoming value from the backedge is the second 2228 /// operand. 2229 class VPReductionPHIRecipe : public VPHeaderPHIRecipe, 2230 public VPUnrollPartAccessor<2> { 2231 /// The recurrence kind of the reduction. 2232 const RecurKind Kind; 2233 2234 /// The phi is part of an in-loop reduction. 2235 bool IsInLoop; 2236 2237 /// The phi is part of an ordered reduction. Requires IsInLoop to be true. 2238 bool IsOrdered; 2239 2240 /// When expanding the reduction PHI, the plan's VF element count is divided 2241 /// by this factor to form the reduction phi's VF. 2242 unsigned VFScaleFactor = 1; 2243 2244 public: 2245 /// Create a new VPReductionPHIRecipe for the reduction \p Phi. 2246 VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, 2247 bool IsInLoop = false, bool IsOrdered = false, 2248 unsigned VFScaleFactor = 1) 2249 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind), 2250 IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) { 2251 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop"); 2252 } 2253 2254 ~VPReductionPHIRecipe() override = default; 2255 clone()2256 VPReductionPHIRecipe *clone() override { 2257 auto *R = new VPReductionPHIRecipe( 2258 dyn_cast_or_null<PHINode>(getUnderlyingValue()), getRecurrenceKind(), 2259 *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor); 2260 R->addOperand(getBackedgeValue()); 2261 return R; 2262 } 2263 2264 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC) 2265 2266 /// Generate the phi/select nodes. 2267 void execute(VPTransformState &State) override; 2268 2269 /// Get the factor that the VF of this recipe's output should be scaled by. getVFScaleFactor()2270 unsigned getVFScaleFactor() const { return VFScaleFactor; } 2271 2272 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2273 /// Print the recipe. 2274 void print(raw_ostream &O, const Twine &Indent, 2275 VPSlotTracker &SlotTracker) const override; 2276 #endif 2277 2278 /// Returns the recurrence kind of the reduction. getRecurrenceKind()2279 RecurKind getRecurrenceKind() const { return Kind; } 2280 2281 /// Returns true, if the phi is part of an ordered reduction. isOrdered()2282 bool isOrdered() const { return IsOrdered; } 2283 2284 /// Returns true, if the phi is part of an in-loop reduction. isInLoop()2285 bool isInLoop() const { return IsInLoop; } 2286 2287 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsed(const VPValue * Op)2288 bool onlyFirstLaneUsed(const VPValue *Op) const override { 2289 assert(is_contained(operands(), Op) && 2290 "Op must be an operand of the recipe"); 2291 return isOrdered() || isInLoop(); 2292 } 2293 }; 2294 2295 /// A recipe for vectorizing a phi-node as a sequence of mask-based select 2296 /// instructions. 2297 class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe { 2298 public: 2299 /// The blend operation is a User of the incoming values and of their 2300 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can 2301 /// be omitted (implied by passing an odd number of operands) in which case 2302 /// all other incoming values are merged into it. VPBlendRecipe(PHINode * Phi,ArrayRef<VPValue * > Operands)2303 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands) 2304 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) { 2305 assert(Operands.size() > 0 && "Expected at least one operand!"); 2306 } 2307 clone()2308 VPBlendRecipe *clone() override { 2309 SmallVector<VPValue *> Ops(operands()); 2310 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops); 2311 } 2312 VP_CLASSOF_IMPL(VPDef::VPBlendSC)2313 VP_CLASSOF_IMPL(VPDef::VPBlendSC) 2314 2315 /// A normalized blend is one that has an odd number of operands, whereby the 2316 /// first operand does not have an associated mask. 2317 bool isNormalized() const { return getNumOperands() % 2; } 2318 2319 /// Return the number of incoming values, taking into account when normalized 2320 /// the first incoming value will have no mask. getNumIncomingValues()2321 unsigned getNumIncomingValues() const { 2322 return (getNumOperands() + isNormalized()) / 2; 2323 } 2324 2325 /// Return incoming value number \p Idx. getIncomingValue(unsigned Idx)2326 VPValue *getIncomingValue(unsigned Idx) const { 2327 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized()); 2328 } 2329 2330 /// Return mask number \p Idx. getMask(unsigned Idx)2331 VPValue *getMask(unsigned Idx) const { 2332 assert((Idx > 0 || !isNormalized()) && "First index has no mask!"); 2333 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized()); 2334 } 2335 2336 /// Generate the phi/select nodes. 2337 void execute(VPTransformState &State) override; 2338 2339 /// Return the cost of this VPWidenMemoryRecipe. 2340 InstructionCost computeCost(ElementCount VF, 2341 VPCostContext &Ctx) const override; 2342 2343 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2344 /// Print the recipe. 2345 void print(raw_ostream &O, const Twine &Indent, 2346 VPSlotTracker &SlotTracker) const override; 2347 #endif 2348 2349 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsed(const VPValue * Op)2350 bool onlyFirstLaneUsed(const VPValue *Op) const override { 2351 assert(is_contained(operands(), Op) && 2352 "Op must be an operand of the recipe"); 2353 // Recursing through Blend recipes only, must terminate at header phi's the 2354 // latest. 2355 return all_of(users(), 2356 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); }); 2357 } 2358 }; 2359 2360 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load 2361 /// or stores into one wide load/store and shuffles. The first operand of a 2362 /// VPInterleave recipe is the address, followed by the stored values, followed 2363 /// by an optional mask. 2364 class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase { 2365 const InterleaveGroup<Instruction> *IG; 2366 2367 /// Indicates if the interleave group is in a conditional block and requires a 2368 /// mask. 2369 bool HasMask = false; 2370 2371 /// Indicates if gaps between members of the group need to be masked out or if 2372 /// unusued gaps can be loaded speculatively. 2373 bool NeedsMaskForGaps = false; 2374 2375 public: VPInterleaveRecipe(const InterleaveGroup<Instruction> * IG,VPValue * Addr,ArrayRef<VPValue * > StoredValues,VPValue * Mask,bool NeedsMaskForGaps,DebugLoc DL)2376 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, 2377 ArrayRef<VPValue *> StoredValues, VPValue *Mask, 2378 bool NeedsMaskForGaps, DebugLoc DL) 2379 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}, 2380 DL), 2381 2382 IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) { 2383 // TODO: extend the masked interleaved-group support to reversed access. 2384 assert((!Mask || !IG->isReverse()) && 2385 "Reversed masked interleave-group not supported."); 2386 for (unsigned i = 0; i < IG->getFactor(); ++i) 2387 if (Instruction *I = IG->getMember(i)) { 2388 if (I->getType()->isVoidTy()) 2389 continue; 2390 new VPValue(I, this); 2391 } 2392 2393 for (auto *SV : StoredValues) 2394 addOperand(SV); 2395 if (Mask) { 2396 HasMask = true; 2397 addOperand(Mask); 2398 } 2399 } 2400 ~VPInterleaveRecipe() override = default; 2401 clone()2402 VPInterleaveRecipe *clone() override { 2403 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(), 2404 NeedsMaskForGaps, getDebugLoc()); 2405 } 2406 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)2407 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) 2408 2409 /// Return the address accessed by this recipe. 2410 VPValue *getAddr() const { 2411 return getOperand(0); // Address is the 1st, mandatory operand. 2412 } 2413 2414 /// Return the mask used by this recipe. Note that a full mask is represented 2415 /// by a nullptr. getMask()2416 VPValue *getMask() const { 2417 // Mask is optional and therefore the last, currently 2nd operand. 2418 return HasMask ? getOperand(getNumOperands() - 1) : nullptr; 2419 } 2420 2421 /// Return the VPValues stored by this interleave group. If it is a load 2422 /// interleave group, return an empty ArrayRef. getStoredValues()2423 ArrayRef<VPValue *> getStoredValues() const { 2424 // The first operand is the address, followed by the stored values, followed 2425 // by an optional mask. 2426 return ArrayRef<VPValue *>(op_begin(), getNumOperands()) 2427 .slice(1, getNumStoreOperands()); 2428 } 2429 2430 /// Generate the wide load or store, and shuffles. 2431 void execute(VPTransformState &State) override; 2432 2433 /// Return the cost of this VPInterleaveRecipe. 2434 InstructionCost computeCost(ElementCount VF, 2435 VPCostContext &Ctx) const override; 2436 2437 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2438 /// Print the recipe. 2439 void print(raw_ostream &O, const Twine &Indent, 2440 VPSlotTracker &SlotTracker) const override; 2441 #endif 2442 getInterleaveGroup()2443 const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } 2444 2445 /// Returns the number of stored operands of this interleave group. Returns 0 2446 /// for load interleave groups. getNumStoreOperands()2447 unsigned getNumStoreOperands() const { 2448 return getNumOperands() - (HasMask ? 2 : 1); 2449 } 2450 2451 /// The recipe only uses the first lane of the address. onlyFirstLaneUsed(const VPValue * Op)2452 bool onlyFirstLaneUsed(const VPValue *Op) const override { 2453 assert(is_contained(operands(), Op) && 2454 "Op must be an operand of the recipe"); 2455 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); 2456 } 2457 getInsertPos()2458 Instruction *getInsertPos() const { return IG->getInsertPos(); } 2459 }; 2460 2461 /// A recipe to represent inloop reduction operations, performing a reduction on 2462 /// a vector operand into a scalar value, and adding the result to a chain. 2463 /// The Operands are {ChainOp, VecOp, [Condition]}. 2464 class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags { 2465 /// The recurrence kind for the reduction in question. 2466 RecurKind RdxKind; 2467 bool IsOrdered; 2468 /// Whether the reduction is conditional. 2469 bool IsConditional = false; 2470 2471 protected: VPReductionRecipe(const unsigned char SC,RecurKind RdxKind,FastMathFlags FMFs,Instruction * I,ArrayRef<VPValue * > Operands,VPValue * CondOp,bool IsOrdered,DebugLoc DL)2472 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, 2473 FastMathFlags FMFs, Instruction *I, 2474 ArrayRef<VPValue *> Operands, VPValue *CondOp, 2475 bool IsOrdered, DebugLoc DL) 2476 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind), 2477 IsOrdered(IsOrdered) { 2478 if (CondOp) { 2479 IsConditional = true; 2480 addOperand(CondOp); 2481 } 2482 setUnderlyingValue(I); 2483 } 2484 2485 public: 2486 VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, 2487 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, 2488 bool IsOrdered, DebugLoc DL = {}) 2489 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I, 2490 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, 2491 IsOrdered, DL) {} 2492 2493 VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, 2494 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, 2495 bool IsOrdered, DebugLoc DL = {}) 2496 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr, 2497 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, 2498 IsOrdered, DL) {} 2499 2500 ~VPReductionRecipe() override = default; 2501 clone()2502 VPReductionRecipe *clone() override { 2503 return new VPReductionRecipe(RdxKind, getFastMathFlags(), 2504 getUnderlyingInstr(), getChainOp(), getVecOp(), 2505 getCondOp(), IsOrdered, getDebugLoc()); 2506 } 2507 classof(const VPRecipeBase * R)2508 static inline bool classof(const VPRecipeBase *R) { 2509 return R->getVPDefID() == VPRecipeBase::VPReductionSC || 2510 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC; 2511 } 2512 classof(const VPUser * U)2513 static inline bool classof(const VPUser *U) { 2514 auto *R = dyn_cast<VPRecipeBase>(U); 2515 return R && classof(R); 2516 } 2517 2518 /// Generate the reduction in the loop. 2519 void execute(VPTransformState &State) override; 2520 2521 /// Return the cost of VPReductionRecipe. 2522 InstructionCost computeCost(ElementCount VF, 2523 VPCostContext &Ctx) const override; 2524 2525 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2526 /// Print the recipe. 2527 void print(raw_ostream &O, const Twine &Indent, 2528 VPSlotTracker &SlotTracker) const override; 2529 #endif 2530 2531 /// Return the recurrence kind for the in-loop reduction. getRecurrenceKind()2532 RecurKind getRecurrenceKind() const { return RdxKind; } 2533 /// Return true if the in-loop reduction is ordered. isOrdered()2534 bool isOrdered() const { return IsOrdered; }; 2535 /// Return true if the in-loop reduction is conditional. isConditional()2536 bool isConditional() const { return IsConditional; }; 2537 /// The VPValue of the scalar Chain being accumulated. getChainOp()2538 VPValue *getChainOp() const { return getOperand(0); } 2539 /// The VPValue of the vector value to be reduced. getVecOp()2540 VPValue *getVecOp() const { return getOperand(1); } 2541 /// The VPValue of the condition for the block. getCondOp()2542 VPValue *getCondOp() const { 2543 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr; 2544 } 2545 }; 2546 2547 /// A recipe for forming partial reductions. In the loop, an accumulator and 2548 /// vector operand are added together and passed to the next iteration as the 2549 /// next accumulator. After the loop body, the accumulator is reduced to a 2550 /// scalar value. 2551 class VPPartialReductionRecipe : public VPReductionRecipe { 2552 unsigned Opcode; 2553 2554 /// The divisor by which the VF of this recipe's output should be divided 2555 /// during execution. 2556 unsigned VFScaleFactor; 2557 2558 public: VPPartialReductionRecipe(Instruction * ReductionInst,VPValue * Op0,VPValue * Op1,VPValue * Cond,unsigned VFScaleFactor)2559 VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, 2560 VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor) 2561 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond, 2562 VFScaleFactor, ReductionInst) {} 2563 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, 2564 VPValue *Cond, unsigned ScaleFactor, 2565 Instruction *ReductionInst = nullptr) 2566 : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add, 2567 FastMathFlags(), ReductionInst, 2568 ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}), 2569 Opcode(Opcode), VFScaleFactor(ScaleFactor) { 2570 [[maybe_unused]] auto *AccumulatorRecipe = 2571 getChainOp()->getDefiningRecipe(); 2572 assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) || 2573 isa<VPPartialReductionRecipe>(AccumulatorRecipe)) && 2574 "Unexpected operand order for partial reduction recipe"); 2575 } 2576 ~VPPartialReductionRecipe() override = default; 2577 clone()2578 VPPartialReductionRecipe *clone() override { 2579 return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1), 2580 getCondOp(), VFScaleFactor, 2581 getUnderlyingInstr()); 2582 } 2583 2584 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC) 2585 2586 /// Generate the reduction in the loop. 2587 void execute(VPTransformState &State) override; 2588 2589 /// Return the cost of this VPPartialReductionRecipe. 2590 InstructionCost computeCost(ElementCount VF, 2591 VPCostContext &Ctx) const override; 2592 2593 /// Get the binary op's opcode. getOpcode()2594 unsigned getOpcode() const { return Opcode; } 2595 2596 /// Get the factor that the VF of this recipe's output should be scaled by. getVFScaleFactor()2597 unsigned getVFScaleFactor() const { return VFScaleFactor; } 2598 2599 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2600 /// Print the recipe. 2601 void print(raw_ostream &O, const Twine &Indent, 2602 VPSlotTracker &SlotTracker) const override; 2603 #endif 2604 }; 2605 2606 /// A recipe to represent inloop reduction operations with vector-predication 2607 /// intrinsics, performing a reduction on a vector operand with the explicit 2608 /// vector length (EVL) into a scalar value, and adding the result to a chain. 2609 /// The Operands are {ChainOp, VecOp, EVL, [Condition]}. 2610 class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe { 2611 public: 2612 VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, 2613 DebugLoc DL = {}) 2614 : VPReductionRecipe( 2615 VPDef::VPReductionEVLSC, R.getRecurrenceKind(), 2616 R.getFastMathFlags(), 2617 cast_or_null<Instruction>(R.getUnderlyingValue()), 2618 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp, 2619 R.isOrdered(), DL) {} 2620 2621 ~VPReductionEVLRecipe() override = default; 2622 clone()2623 VPReductionEVLRecipe *clone() override { 2624 llvm_unreachable("cloning not implemented yet"); 2625 } 2626 2627 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC) 2628 2629 /// Generate the reduction in the loop 2630 void execute(VPTransformState &State) override; 2631 2632 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2633 /// Print the recipe. 2634 void print(raw_ostream &O, const Twine &Indent, 2635 VPSlotTracker &SlotTracker) const override; 2636 #endif 2637 2638 /// The VPValue of the explicit vector length. getEVL()2639 VPValue *getEVL() const { return getOperand(2); } 2640 2641 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsed(const VPValue * Op)2642 bool onlyFirstLaneUsed(const VPValue *Op) const override { 2643 assert(is_contained(operands(), Op) && 2644 "Op must be an operand of the recipe"); 2645 return Op == getEVL(); 2646 } 2647 }; 2648 2649 /// VPReplicateRecipe replicates a given instruction producing multiple scalar 2650 /// copies of the original scalar type, one per lane, instead of producing a 2651 /// single copy of widened type for all lanes. If the instruction is known to be 2652 /// a single scalar, only one copy, per lane zero, will be generated. 2653 class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, 2654 public VPIRMetadata { 2655 /// Indicator if only a single replica per lane is needed. 2656 bool IsSingleScalar; 2657 2658 /// Indicator if the replicas are also predicated. 2659 bool IsPredicated; 2660 2661 public: 2662 VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands, 2663 bool IsSingleScalar, VPValue *Mask = nullptr, 2664 VPIRMetadata Metadata = {}) 2665 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I), 2666 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar), 2667 IsPredicated(Mask) { 2668 if (Mask) 2669 addOperand(Mask); 2670 } 2671 2672 ~VPReplicateRecipe() override = default; 2673 clone()2674 VPReplicateRecipe *clone() override { 2675 auto *Copy = 2676 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar, 2677 isPredicated() ? getMask() : nullptr, *this); 2678 Copy->transferFlags(*this); 2679 return Copy; 2680 } 2681 2682 VP_CLASSOF_IMPL(VPDef::VPReplicateSC) 2683 2684 /// Generate replicas of the desired Ingredient. Replicas will be generated 2685 /// for all parts and lanes unless a specific part and lane are specified in 2686 /// the \p State. 2687 void execute(VPTransformState &State) override; 2688 2689 /// Return the cost of this VPReplicateRecipe. 2690 InstructionCost computeCost(ElementCount VF, 2691 VPCostContext &Ctx) const override; 2692 2693 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2694 /// Print the recipe. 2695 void print(raw_ostream &O, const Twine &Indent, 2696 VPSlotTracker &SlotTracker) const override; 2697 #endif 2698 isSingleScalar()2699 bool isSingleScalar() const { return IsSingleScalar; } 2700 isPredicated()2701 bool isPredicated() const { return IsPredicated; } 2702 2703 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsed(const VPValue * Op)2704 bool onlyFirstLaneUsed(const VPValue *Op) const override { 2705 assert(is_contained(operands(), Op) && 2706 "Op must be an operand of the recipe"); 2707 return isSingleScalar(); 2708 } 2709 2710 /// Returns true if the recipe uses scalars of operand \p Op. usesScalars(const VPValue * Op)2711 bool usesScalars(const VPValue *Op) const override { 2712 assert(is_contained(operands(), Op) && 2713 "Op must be an operand of the recipe"); 2714 return true; 2715 } 2716 2717 /// Returns true if the recipe is used by a widened recipe via an intervening 2718 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed 2719 /// in a vector. 2720 bool shouldPack() const; 2721 2722 /// Return the mask of a predicated VPReplicateRecipe. getMask()2723 VPValue *getMask() { 2724 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe"); 2725 return getOperand(getNumOperands() - 1); 2726 } 2727 getOpcode()2728 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); } 2729 }; 2730 2731 /// A recipe for generating conditional branches on the bits of a mask. 2732 class LLVM_ABI_FOR_TEST VPBranchOnMaskRecipe : public VPRecipeBase { 2733 public: VPBranchOnMaskRecipe(VPValue * BlockInMask,DebugLoc DL)2734 VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL) 2735 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {} 2736 clone()2737 VPBranchOnMaskRecipe *clone() override { 2738 return new VPBranchOnMaskRecipe(getOperand(0), getDebugLoc()); 2739 } 2740 2741 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC) 2742 2743 /// Generate the extraction of the appropriate bit from the block mask and the 2744 /// conditional branch. 2745 void execute(VPTransformState &State) override; 2746 2747 /// Return the cost of this VPBranchOnMaskRecipe. 2748 InstructionCost computeCost(ElementCount VF, 2749 VPCostContext &Ctx) const override; 2750 2751 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2752 /// Print the recipe. print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker)2753 void print(raw_ostream &O, const Twine &Indent, 2754 VPSlotTracker &SlotTracker) const override { 2755 O << Indent << "BRANCH-ON-MASK "; 2756 printOperands(O, SlotTracker); 2757 } 2758 #endif 2759 2760 /// Returns true if the recipe uses scalars of operand \p Op. usesScalars(const VPValue * Op)2761 bool usesScalars(const VPValue *Op) const override { 2762 assert(is_contained(operands(), Op) && 2763 "Op must be an operand of the recipe"); 2764 return true; 2765 } 2766 }; 2767 2768 /// A recipe to combine multiple recipes into a single 'expression' recipe, 2769 /// which should be considered a single entity for cost-modeling and transforms. 2770 /// The recipe needs to be 'decomposed', i.e. replaced by its individual 2771 /// expression recipes, before execute. The individual expression recipes are 2772 /// completely disconnected from the def-use graph of other recipes not part of 2773 /// the expression. Def-use edges between pairs of expression recipes remain 2774 /// intact, whereas every edge between an expression recipe and a recipe outside 2775 /// the expression is elevated to connect the non-expression recipe with the 2776 /// VPExpressionRecipe itself. 2777 class VPExpressionRecipe : public VPSingleDefRecipe { 2778 /// Recipes included in this VPExpressionRecipe. 2779 SmallVector<VPSingleDefRecipe *> ExpressionRecipes; 2780 2781 /// Temporary VPValues used for external operands of the expression, i.e. 2782 /// operands not defined by recipes in the expression. 2783 SmallVector<VPValue *> LiveInPlaceholders; 2784 2785 enum class ExpressionTypes { 2786 /// Represents an inloop extended reduction operation, performing a 2787 /// reduction on an extended vector operand into a scalar value, and adding 2788 /// the result to a chain. 2789 ExtendedReduction, 2790 /// Represent an inloop multiply-accumulate reduction, multiplying the 2791 /// extended vector operands, performing a reduction.add on the result, and 2792 /// adding the scalar result to a chain. 2793 ExtMulAccReduction, 2794 /// Represent an inloop multiply-accumulate reduction, multiplying the 2795 /// vector operands, performing a reduction.add on the result, and adding 2796 /// the scalar result to a chain. 2797 MulAccReduction, 2798 }; 2799 2800 /// Type of the expression. 2801 ExpressionTypes ExpressionType; 2802 2803 /// Construct a new VPExpressionRecipe by internalizing recipes in \p 2804 /// ExpressionRecipes. External operands (i.e. not defined by another recipe 2805 /// in the expression) are replaced by temporary VPValues and the original 2806 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes 2807 /// as needed (excluding last) to ensure they are only used by other recipes 2808 /// in the expression. 2809 VPExpressionRecipe(ExpressionTypes ExpressionType, 2810 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes); 2811 2812 public: VPExpressionRecipe(VPWidenCastRecipe * Ext,VPReductionRecipe * Red)2813 VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red) 2814 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {} VPExpressionRecipe(VPWidenRecipe * Mul,VPReductionRecipe * Red)2815 VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red) 2816 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {} VPExpressionRecipe(VPWidenCastRecipe * Ext0,VPWidenCastRecipe * Ext1,VPWidenRecipe * Mul,VPReductionRecipe * Red)2817 VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, 2818 VPWidenRecipe *Mul, VPReductionRecipe *Red) 2819 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction, 2820 {Ext0, Ext1, Mul, Red}) {} 2821 ~VPExpressionRecipe()2822 ~VPExpressionRecipe() override { 2823 for (auto *R : reverse(ExpressionRecipes)) 2824 delete R; 2825 for (VPValue *T : LiveInPlaceholders) 2826 delete T; 2827 } 2828 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)2829 VP_CLASSOF_IMPL(VPDef::VPExpressionSC) 2830 2831 VPExpressionRecipe *clone() override { 2832 assert(!ExpressionRecipes.empty() && "empty expressions should be removed"); 2833 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes; 2834 for (auto *R : ExpressionRecipes) 2835 NewExpressiondRecipes.push_back(R->clone()); 2836 for (auto *New : NewExpressiondRecipes) { 2837 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes)) 2838 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]); 2839 // Update placeholder operands in the cloned recipe to use the external 2840 // operands, to be internalized when the cloned expression is constructed. 2841 for (const auto &[Placeholder, OutsideOp] : 2842 zip(LiveInPlaceholders, operands())) 2843 New->replaceUsesOfWith(Placeholder, OutsideOp); 2844 } 2845 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes); 2846 } 2847 2848 /// Return the VPValue to use to infer the result type of the recipe. getOperandOfResultType()2849 VPValue *getOperandOfResultType() const { 2850 unsigned OpIdx = 2851 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2 2852 : 1; 2853 return getOperand(getNumOperands() - OpIdx); 2854 } 2855 2856 /// Insert the recipes of the expression back into the VPlan, directly before 2857 /// the current recipe. Leaves the expression recipe empty, which must be 2858 /// removed before codegen. 2859 void decompose(); 2860 2861 /// Method for generating code, must not be called as this recipe is abstract. execute(VPTransformState & State)2862 void execute(VPTransformState &State) override { 2863 llvm_unreachable("recipe must be removed before execute"); 2864 } 2865 2866 InstructionCost computeCost(ElementCount VF, 2867 VPCostContext &Ctx) const override; 2868 2869 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2870 /// Print the recipe. 2871 void print(raw_ostream &O, const Twine &Indent, 2872 VPSlotTracker &SlotTracker) const override; 2873 #endif 2874 2875 /// Returns true if this expression contains recipes that may read from or 2876 /// write to memory. 2877 bool mayReadOrWriteMemory() const; 2878 2879 /// Returns true if this expression contains recipes that may have side 2880 /// effects. 2881 bool mayHaveSideEffects() const; 2882 }; 2883 2884 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when 2885 /// control converges back from a Branch-on-Mask. The phi nodes are needed in 2886 /// order to merge values that are set under such a branch and feed their uses. 2887 /// The phi nodes can be scalar or vector depending on the users of the value. 2888 /// This recipe works in concert with VPBranchOnMaskRecipe. 2889 class LLVM_ABI_FOR_TEST VPPredInstPHIRecipe : public VPSingleDefRecipe { 2890 public: 2891 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi 2892 /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(VPValue * PredV,DebugLoc DL)2893 VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL) 2894 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {} 2895 ~VPPredInstPHIRecipe() override = default; 2896 clone()2897 VPPredInstPHIRecipe *clone() override { 2898 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc()); 2899 } 2900 2901 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC) 2902 2903 /// Generates phi nodes for live-outs (from a replicate region) as needed to 2904 /// retain SSA form. 2905 void execute(VPTransformState &State) override; 2906 2907 /// Return the cost of this VPPredInstPHIRecipe. computeCost(ElementCount VF,VPCostContext & Ctx)2908 InstructionCost computeCost(ElementCount VF, 2909 VPCostContext &Ctx) const override { 2910 // TODO: Compute accurate cost after retiring the legacy cost model. 2911 return 0; 2912 } 2913 2914 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2915 /// Print the recipe. 2916 void print(raw_ostream &O, const Twine &Indent, 2917 VPSlotTracker &SlotTracker) const override; 2918 #endif 2919 2920 /// Returns true if the recipe uses scalars of operand \p Op. usesScalars(const VPValue * Op)2921 bool usesScalars(const VPValue *Op) const override { 2922 assert(is_contained(operands(), Op) && 2923 "Op must be an operand of the recipe"); 2924 return true; 2925 } 2926 }; 2927 2928 /// A common base class for widening memory operations. An optional mask can be 2929 /// provided as the last operand. 2930 class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase, 2931 public VPIRMetadata { 2932 protected: 2933 Instruction &Ingredient; 2934 2935 /// Whether the accessed addresses are consecutive. 2936 bool Consecutive; 2937 2938 /// Whether the consecutive accessed addresses are in reverse order. 2939 bool Reverse; 2940 2941 /// Whether the memory access is masked. 2942 bool IsMasked = false; 2943 setMask(VPValue * Mask)2944 void setMask(VPValue *Mask) { 2945 assert(!IsMasked && "cannot re-set mask"); 2946 if (!Mask) 2947 return; 2948 addOperand(Mask); 2949 IsMasked = true; 2950 } 2951 VPWidenMemoryRecipe(const char unsigned SC,Instruction & I,std::initializer_list<VPValue * > Operands,bool Consecutive,bool Reverse,const VPIRMetadata & Metadata,DebugLoc DL)2952 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, 2953 std::initializer_list<VPValue *> Operands, 2954 bool Consecutive, bool Reverse, 2955 const VPIRMetadata &Metadata, DebugLoc DL) 2956 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I), 2957 Consecutive(Consecutive), Reverse(Reverse) { 2958 assert((Consecutive || !Reverse) && "Reverse implies consecutive"); 2959 } 2960 2961 public: clone()2962 VPWidenMemoryRecipe *clone() override { 2963 llvm_unreachable("cloning not supported"); 2964 } 2965 classof(const VPRecipeBase * R)2966 static inline bool classof(const VPRecipeBase *R) { 2967 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC || 2968 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC || 2969 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC || 2970 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC; 2971 } 2972 classof(const VPUser * U)2973 static inline bool classof(const VPUser *U) { 2974 auto *R = dyn_cast<VPRecipeBase>(U); 2975 return R && classof(R); 2976 } 2977 2978 /// Return whether the loaded-from / stored-to addresses are consecutive. isConsecutive()2979 bool isConsecutive() const { return Consecutive; } 2980 2981 /// Return whether the consecutive loaded/stored addresses are in reverse 2982 /// order. isReverse()2983 bool isReverse() const { return Reverse; } 2984 2985 /// Return the address accessed by this recipe. getAddr()2986 VPValue *getAddr() const { return getOperand(0); } 2987 2988 /// Returns true if the recipe is masked. isMasked()2989 bool isMasked() const { return IsMasked; } 2990 2991 /// Return the mask used by this recipe. Note that a full mask is represented 2992 /// by a nullptr. getMask()2993 VPValue *getMask() const { 2994 // Mask is optional and therefore the last operand. 2995 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr; 2996 } 2997 2998 /// Generate the wide load/store. execute(VPTransformState & State)2999 void execute(VPTransformState &State) override { 3000 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated."); 3001 } 3002 3003 /// Return the cost of this VPWidenMemoryRecipe. 3004 InstructionCost computeCost(ElementCount VF, 3005 VPCostContext &Ctx) const override; 3006 getIngredient()3007 Instruction &getIngredient() const { return Ingredient; } 3008 }; 3009 3010 /// A recipe for widening load operations, using the address to load from and an 3011 /// optional mask. 3012 struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe, 3013 public VPValue { VPWidenLoadRecipefinal3014 VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, 3015 bool Consecutive, bool Reverse, 3016 const VPIRMetadata &Metadata, DebugLoc DL) 3017 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive, 3018 Reverse, Metadata, DL), 3019 VPValue(this, &Load) { 3020 setMask(Mask); 3021 } 3022 clonefinal3023 VPWidenLoadRecipe *clone() override { 3024 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(), 3025 getMask(), Consecutive, Reverse, *this, 3026 getDebugLoc()); 3027 } 3028 3029 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC); 3030 3031 /// Generate a wide load or gather. 3032 void execute(VPTransformState &State) override; 3033 3034 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3035 /// Print the recipe. 3036 void print(raw_ostream &O, const Twine &Indent, 3037 VPSlotTracker &SlotTracker) const override; 3038 #endif 3039 3040 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsedfinal3041 bool onlyFirstLaneUsed(const VPValue *Op) const override { 3042 assert(is_contained(operands(), Op) && 3043 "Op must be an operand of the recipe"); 3044 // Widened, consecutive loads operations only demand the first lane of 3045 // their address. 3046 return Op == getAddr() && isConsecutive(); 3047 } 3048 }; 3049 3050 /// A recipe for widening load operations with vector-predication intrinsics, 3051 /// using the address to load from, the explicit vector length and an optional 3052 /// mask. 3053 struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue { VPWidenLoadEVLRecipefinal3054 VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask) 3055 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(), 3056 {L.getAddr(), &EVL}, L.isConsecutive(), 3057 L.isReverse(), L, L.getDebugLoc()), 3058 VPValue(this, &getIngredient()) { 3059 setMask(Mask); 3060 } 3061 VP_CLASSOF_IMPLfinal3062 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC) 3063 3064 /// Return the EVL operand. 3065 VPValue *getEVL() const { return getOperand(1); } 3066 3067 /// Generate the wide load or gather. 3068 void execute(VPTransformState &State) override; 3069 3070 /// Return the cost of this VPWidenLoadEVLRecipe. 3071 InstructionCost computeCost(ElementCount VF, 3072 VPCostContext &Ctx) const override; 3073 3074 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3075 /// Print the recipe. 3076 void print(raw_ostream &O, const Twine &Indent, 3077 VPSlotTracker &SlotTracker) const override; 3078 #endif 3079 3080 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsedfinal3081 bool onlyFirstLaneUsed(const VPValue *Op) const override { 3082 assert(is_contained(operands(), Op) && 3083 "Op must be an operand of the recipe"); 3084 // Widened loads only demand the first lane of EVL and consecutive loads 3085 // only demand the first lane of their address. 3086 return Op == getEVL() || (Op == getAddr() && isConsecutive()); 3087 } 3088 }; 3089 3090 /// A recipe for widening store operations, using the stored value, the address 3091 /// to store to and an optional mask. 3092 struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe { VPWidenStoreRecipefinal3093 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, 3094 VPValue *Mask, bool Consecutive, bool Reverse, 3095 const VPIRMetadata &Metadata, DebugLoc DL) 3096 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal}, 3097 Consecutive, Reverse, Metadata, DL) { 3098 setMask(Mask); 3099 } 3100 clonefinal3101 VPWidenStoreRecipe *clone() override { 3102 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(), 3103 getStoredValue(), getMask(), Consecutive, 3104 Reverse, *this, getDebugLoc()); 3105 } 3106 3107 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC); 3108 3109 /// Return the value stored by this recipe. getStoredValuefinal3110 VPValue *getStoredValue() const { return getOperand(1); } 3111 3112 /// Generate a wide store or scatter. 3113 void execute(VPTransformState &State) override; 3114 3115 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3116 /// Print the recipe. 3117 void print(raw_ostream &O, const Twine &Indent, 3118 VPSlotTracker &SlotTracker) const override; 3119 #endif 3120 3121 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsedfinal3122 bool onlyFirstLaneUsed(const VPValue *Op) const override { 3123 assert(is_contained(operands(), Op) && 3124 "Op must be an operand of the recipe"); 3125 // Widened, consecutive stores only demand the first lane of their address, 3126 // unless the same operand is also stored. 3127 return Op == getAddr() && isConsecutive() && Op != getStoredValue(); 3128 } 3129 }; 3130 3131 /// A recipe for widening store operations with vector-predication intrinsics, 3132 /// using the value to store, the address to store to, the explicit vector 3133 /// length and an optional mask. 3134 struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe { VPWidenStoreEVLRecipefinal3135 VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask) 3136 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(), 3137 {S.getAddr(), S.getStoredValue(), &EVL}, 3138 S.isConsecutive(), S.isReverse(), S, 3139 S.getDebugLoc()) { 3140 setMask(Mask); 3141 } 3142 VP_CLASSOF_IMPLfinal3143 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC) 3144 3145 /// Return the address accessed by this recipe. 3146 VPValue *getStoredValue() const { return getOperand(1); } 3147 3148 /// Return the EVL operand. getEVLfinal3149 VPValue *getEVL() const { return getOperand(2); } 3150 3151 /// Generate the wide store or scatter. 3152 void execute(VPTransformState &State) override; 3153 3154 /// Return the cost of this VPWidenStoreEVLRecipe. 3155 InstructionCost computeCost(ElementCount VF, 3156 VPCostContext &Ctx) const override; 3157 3158 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3159 /// Print the recipe. 3160 void print(raw_ostream &O, const Twine &Indent, 3161 VPSlotTracker &SlotTracker) const override; 3162 #endif 3163 3164 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsedfinal3165 bool onlyFirstLaneUsed(const VPValue *Op) const override { 3166 assert(is_contained(operands(), Op) && 3167 "Op must be an operand of the recipe"); 3168 if (Op == getEVL()) { 3169 assert(getStoredValue() != Op && "unexpected store of EVL"); 3170 return true; 3171 } 3172 // Widened, consecutive memory operations only demand the first lane of 3173 // their address, unless the same operand is also stored. That latter can 3174 // happen with opaque pointers. 3175 return Op == getAddr() && isConsecutive() && Op != getStoredValue(); 3176 } 3177 }; 3178 3179 /// Recipe to expand a SCEV expression. 3180 class VPExpandSCEVRecipe : public VPSingleDefRecipe { 3181 const SCEV *Expr; 3182 ScalarEvolution &SE; 3183 3184 public: VPExpandSCEVRecipe(const SCEV * Expr,ScalarEvolution & SE)3185 VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE) 3186 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {} 3187 3188 ~VPExpandSCEVRecipe() override = default; 3189 clone()3190 VPExpandSCEVRecipe *clone() override { 3191 return new VPExpandSCEVRecipe(Expr, SE); 3192 } 3193 3194 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC) 3195 3196 /// Generate a canonical vector induction variable of the vector loop, with 3197 void execute(VPTransformState &State) override; 3198 3199 /// Return the cost of this VPExpandSCEVRecipe. computeCost(ElementCount VF,VPCostContext & Ctx)3200 InstructionCost computeCost(ElementCount VF, 3201 VPCostContext &Ctx) const override { 3202 // TODO: Compute accurate cost after retiring the legacy cost model. 3203 return 0; 3204 } 3205 3206 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3207 /// Print the recipe. 3208 void print(raw_ostream &O, const Twine &Indent, 3209 VPSlotTracker &SlotTracker) const override; 3210 #endif 3211 getSCEV()3212 const SCEV *getSCEV() const { return Expr; } 3213 }; 3214 3215 /// Canonical scalar induction phi of the vector loop. Starting at the specified 3216 /// start value (either 0 or the resume value when vectorizing the epilogue 3217 /// loop). VPWidenCanonicalIVRecipe represents the vector version of the 3218 /// canonical induction variable. 3219 class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { 3220 public: VPCanonicalIVPHIRecipe(VPValue * StartV,DebugLoc DL)3221 VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL) 3222 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {} 3223 3224 ~VPCanonicalIVPHIRecipe() override = default; 3225 clone()3226 VPCanonicalIVPHIRecipe *clone() override { 3227 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc()); 3228 R->addOperand(getBackedgeValue()); 3229 return R; 3230 } 3231 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)3232 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC) 3233 3234 void execute(VPTransformState &State) override { 3235 llvm_unreachable("cannot execute this recipe, should be replaced by a " 3236 "scalar phi recipe"); 3237 } 3238 3239 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3240 /// Print the recipe. 3241 void print(raw_ostream &O, const Twine &Indent, 3242 VPSlotTracker &SlotTracker) const override; 3243 #endif 3244 3245 /// Returns the scalar type of the induction. getScalarType()3246 Type *getScalarType() const { 3247 return getStartValue()->getLiveInIRValue()->getType(); 3248 } 3249 3250 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsed(const VPValue * Op)3251 bool onlyFirstLaneUsed(const VPValue *Op) const override { 3252 assert(is_contained(operands(), Op) && 3253 "Op must be an operand of the recipe"); 3254 return true; 3255 } 3256 3257 /// Returns true if the recipe only uses the first part of operand \p Op. onlyFirstPartUsed(const VPValue * Op)3258 bool onlyFirstPartUsed(const VPValue *Op) const override { 3259 assert(is_contained(operands(), Op) && 3260 "Op must be an operand of the recipe"); 3261 return true; 3262 } 3263 3264 /// Return the cost of this VPCanonicalIVPHIRecipe. computeCost(ElementCount VF,VPCostContext & Ctx)3265 InstructionCost computeCost(ElementCount VF, 3266 VPCostContext &Ctx) const override { 3267 // For now, match the behavior of the legacy cost model. 3268 return 0; 3269 } 3270 }; 3271 3272 /// A recipe for generating the active lane mask for the vector loop that is 3273 /// used to predicate the vector operations. 3274 /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and 3275 /// remove VPActiveLaneMaskPHIRecipe. 3276 class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe { 3277 public: VPActiveLaneMaskPHIRecipe(VPValue * StartMask,DebugLoc DL)3278 VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL) 3279 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask, 3280 DL) {} 3281 3282 ~VPActiveLaneMaskPHIRecipe() override = default; 3283 clone()3284 VPActiveLaneMaskPHIRecipe *clone() override { 3285 auto *R = new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc()); 3286 if (getNumOperands() == 2) 3287 R->addOperand(getOperand(1)); 3288 return R; 3289 } 3290 3291 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC) 3292 3293 /// Generate the active lane mask phi of the vector loop. 3294 void execute(VPTransformState &State) override; 3295 3296 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3297 /// Print the recipe. 3298 void print(raw_ostream &O, const Twine &Indent, 3299 VPSlotTracker &SlotTracker) const override; 3300 #endif 3301 }; 3302 3303 /// A recipe for generating the phi node for the current index of elements, 3304 /// adjusted in accordance with EVL value. It starts at the start value of the 3305 /// canonical induction and gets incremented by EVL in each iteration of the 3306 /// vector loop. 3307 class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { 3308 public: VPEVLBasedIVPHIRecipe(VPValue * StartIV,DebugLoc DL)3309 VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL) 3310 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {} 3311 3312 ~VPEVLBasedIVPHIRecipe() override = default; 3313 clone()3314 VPEVLBasedIVPHIRecipe *clone() override { 3315 llvm_unreachable("cloning not implemented yet"); 3316 } 3317 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)3318 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC) 3319 3320 void execute(VPTransformState &State) override { 3321 llvm_unreachable("cannot execute this recipe, should be replaced by a " 3322 "scalar phi recipe"); 3323 } 3324 3325 /// Return the cost of this VPEVLBasedIVPHIRecipe. computeCost(ElementCount VF,VPCostContext & Ctx)3326 InstructionCost computeCost(ElementCount VF, 3327 VPCostContext &Ctx) const override { 3328 // For now, match the behavior of the legacy cost model. 3329 return 0; 3330 } 3331 3332 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsed(const VPValue * Op)3333 bool onlyFirstLaneUsed(const VPValue *Op) const override { 3334 assert(is_contained(operands(), Op) && 3335 "Op must be an operand of the recipe"); 3336 return true; 3337 } 3338 3339 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3340 /// Print the recipe. 3341 void print(raw_ostream &O, const Twine &Indent, 3342 VPSlotTracker &SlotTracker) const override; 3343 #endif 3344 }; 3345 3346 /// A Recipe for widening the canonical induction variable of the vector loop. 3347 class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe, 3348 public VPUnrollPartAccessor<1> { 3349 public: VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe * CanonicalIV)3350 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV) 3351 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {} 3352 3353 ~VPWidenCanonicalIVRecipe() override = default; 3354 clone()3355 VPWidenCanonicalIVRecipe *clone() override { 3356 return new VPWidenCanonicalIVRecipe( 3357 cast<VPCanonicalIVPHIRecipe>(getOperand(0))); 3358 } 3359 3360 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC) 3361 3362 /// Generate a canonical vector induction variable of the vector loop, with 3363 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and 3364 /// step = <VF*UF, VF*UF, ..., VF*UF>. 3365 void execute(VPTransformState &State) override; 3366 3367 /// Return the cost of this VPWidenCanonicalIVPHIRecipe. computeCost(ElementCount VF,VPCostContext & Ctx)3368 InstructionCost computeCost(ElementCount VF, 3369 VPCostContext &Ctx) const override { 3370 // TODO: Compute accurate cost after retiring the legacy cost model. 3371 return 0; 3372 } 3373 3374 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3375 /// Print the recipe. 3376 void print(raw_ostream &O, const Twine &Indent, 3377 VPSlotTracker &SlotTracker) const override; 3378 #endif 3379 }; 3380 3381 /// A recipe for converting the input value \p IV value to the corresponding 3382 /// value of an IV with different start and step values, using Start + IV * 3383 /// Step. 3384 class VPDerivedIVRecipe : public VPSingleDefRecipe { 3385 /// Kind of the induction. 3386 const InductionDescriptor::InductionKind Kind; 3387 /// If not nullptr, the floating point induction binary operator. Must be set 3388 /// for floating point inductions. 3389 const FPMathOperator *FPBinOp; 3390 3391 /// Name to use for the generated IR instruction for the derived IV. 3392 std::string Name; 3393 3394 public: 3395 VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, 3396 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, 3397 const Twine &Name = "") 3398 : VPDerivedIVRecipe( 3399 IndDesc.getKind(), 3400 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()), 3401 Start, CanonicalIV, Step, Name) {} 3402 3403 VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, 3404 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, 3405 VPValue *Step, const Twine &Name = "") 3406 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind), 3407 FPBinOp(FPBinOp), Name(Name.str()) {} 3408 3409 ~VPDerivedIVRecipe() override = default; 3410 clone()3411 VPDerivedIVRecipe *clone() override { 3412 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1), 3413 getStepValue()); 3414 } 3415 3416 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC) 3417 3418 /// Generate the transformed value of the induction at offset StartValue (1. 3419 /// operand) + IV (2. operand) * StepValue (3, operand). 3420 void execute(VPTransformState &State) override; 3421 3422 /// Return the cost of this VPDerivedIVRecipe. computeCost(ElementCount VF,VPCostContext & Ctx)3423 InstructionCost computeCost(ElementCount VF, 3424 VPCostContext &Ctx) const override { 3425 // TODO: Compute accurate cost after retiring the legacy cost model. 3426 return 0; 3427 } 3428 3429 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3430 /// Print the recipe. 3431 void print(raw_ostream &O, const Twine &Indent, 3432 VPSlotTracker &SlotTracker) const override; 3433 #endif 3434 getScalarType()3435 Type *getScalarType() const { 3436 return getStartValue()->getLiveInIRValue()->getType(); 3437 } 3438 getStartValue()3439 VPValue *getStartValue() const { return getOperand(0); } getStepValue()3440 VPValue *getStepValue() const { return getOperand(2); } 3441 3442 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsed(const VPValue * Op)3443 bool onlyFirstLaneUsed(const VPValue *Op) const override { 3444 assert(is_contained(operands(), Op) && 3445 "Op must be an operand of the recipe"); 3446 return true; 3447 } 3448 }; 3449 3450 /// A recipe for handling phi nodes of integer and floating-point inductions, 3451 /// producing their scalar values. 3452 class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, 3453 public VPUnrollPartAccessor<3> { 3454 Instruction::BinaryOps InductionOpcode; 3455 3456 public: VPScalarIVStepsRecipe(VPValue * IV,VPValue * Step,VPValue * VF,Instruction::BinaryOps Opcode,FastMathFlags FMFs,DebugLoc DL)3457 VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, 3458 Instruction::BinaryOps Opcode, FastMathFlags FMFs, 3459 DebugLoc DL) 3460 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC, 3461 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL), 3462 InductionOpcode(Opcode) {} 3463 3464 VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, 3465 VPValue *Step, VPValue *VF, DebugLoc DL = {}) 3466 : VPScalarIVStepsRecipe( 3467 IV, Step, VF, IndDesc.getInductionOpcode(), 3468 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()) 3469 ? IndDesc.getInductionBinOp()->getFastMathFlags() 3470 : FastMathFlags(), 3471 DL) {} 3472 3473 ~VPScalarIVStepsRecipe() override = default; 3474 clone()3475 VPScalarIVStepsRecipe *clone() override { 3476 return new VPScalarIVStepsRecipe( 3477 getOperand(0), getOperand(1), getOperand(2), InductionOpcode, 3478 hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(), 3479 getDebugLoc()); 3480 } 3481 3482 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that 3483 /// this is only accurate after the VPlan has been unrolled. isPart0()3484 bool isPart0() { return getUnrollPart(*this) == 0; } 3485 3486 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC) 3487 3488 /// Generate the scalarized versions of the phi node as needed by their users. 3489 void execute(VPTransformState &State) override; 3490 3491 /// Return the cost of this VPScalarIVStepsRecipe. computeCost(ElementCount VF,VPCostContext & Ctx)3492 InstructionCost computeCost(ElementCount VF, 3493 VPCostContext &Ctx) const override { 3494 // TODO: Compute accurate cost after retiring the legacy cost model. 3495 return 0; 3496 } 3497 3498 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3499 /// Print the recipe. 3500 void print(raw_ostream &O, const Twine &Indent, 3501 VPSlotTracker &SlotTracker) const override; 3502 #endif 3503 getStepValue()3504 VPValue *getStepValue() const { return getOperand(1); } 3505 3506 /// Returns true if the recipe only uses the first lane of operand \p Op. onlyFirstLaneUsed(const VPValue * Op)3507 bool onlyFirstLaneUsed(const VPValue *Op) const override { 3508 assert(is_contained(operands(), Op) && 3509 "Op must be an operand of the recipe"); 3510 return true; 3511 } 3512 }; 3513 3514 /// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe 3515 /// types implementing VPPhiAccessors. Used by isa<> & co. 3516 template <> struct CastIsPossible<VPPhiAccessors, const VPRecipeBase *> { 3517 static inline bool isPossible(const VPRecipeBase *f) { 3518 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors. 3519 return isa<VPIRPhi, VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPhi>(f); 3520 } 3521 }; 3522 /// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the 3523 /// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co. 3524 template <typename SrcTy> 3525 struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> { 3526 3527 using Self = CastInfo<VPPhiAccessors, SrcTy>; 3528 3529 /// doCast is used by cast<>. 3530 static inline VPPhiAccessors *doCast(SrcTy R) { 3531 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * { 3532 switch (R->getVPDefID()) { 3533 case VPDef::VPInstructionSC: 3534 return cast<VPPhi>(R); 3535 case VPDef::VPIRInstructionSC: 3536 return cast<VPIRPhi>(R); 3537 case VPDef::VPWidenPHISC: 3538 return cast<VPWidenPHIRecipe>(R); 3539 default: 3540 return cast<VPHeaderPHIRecipe>(R); 3541 } 3542 }()); 3543 } 3544 3545 /// doCastIfPossible is used by dyn_cast<>. 3546 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) { 3547 if (!Self::isPossible(f)) 3548 return nullptr; 3549 return doCast(f); 3550 } 3551 }; 3552 template <> 3553 struct CastInfo<VPPhiAccessors, VPRecipeBase *> 3554 : CastInfoVPPhiAccessors<VPRecipeBase *> {}; 3555 template <> 3556 struct CastInfo<VPPhiAccessors, const VPRecipeBase *> 3557 : CastInfoVPPhiAccessors<const VPRecipeBase *> {}; 3558 3559 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It 3560 /// holds a sequence of zero or more VPRecipe's each representing a sequence of 3561 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes. 3562 class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase { 3563 friend class VPlan; 3564 3565 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks. 3566 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) 3567 : VPBlockBase(VPBasicBlockSC, Name.str()) { 3568 if (Recipe) 3569 appendRecipe(Recipe); 3570 } 3571 3572 public: 3573 using RecipeListTy = iplist<VPRecipeBase>; 3574 3575 protected: 3576 /// The VPRecipes held in the order of output instructions to generate. 3577 RecipeListTy Recipes; 3578 3579 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "") 3580 : VPBlockBase(BlockSC, Name.str()) {} 3581 3582 public: 3583 ~VPBasicBlock() override { 3584 while (!Recipes.empty()) 3585 Recipes.pop_back(); 3586 } 3587 3588 /// Instruction iterators... 3589 using iterator = RecipeListTy::iterator; 3590 using const_iterator = RecipeListTy::const_iterator; 3591 using reverse_iterator = RecipeListTy::reverse_iterator; 3592 using const_reverse_iterator = RecipeListTy::const_reverse_iterator; 3593 3594 //===--------------------------------------------------------------------===// 3595 /// Recipe iterator methods 3596 /// 3597 inline iterator begin() { return Recipes.begin(); } 3598 inline const_iterator begin() const { return Recipes.begin(); } 3599 inline iterator end() { return Recipes.end(); } 3600 inline const_iterator end() const { return Recipes.end(); } 3601 3602 inline reverse_iterator rbegin() { return Recipes.rbegin(); } 3603 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); } 3604 inline reverse_iterator rend() { return Recipes.rend(); } 3605 inline const_reverse_iterator rend() const { return Recipes.rend(); } 3606 3607 inline size_t size() const { return Recipes.size(); } 3608 inline bool empty() const { return Recipes.empty(); } 3609 inline const VPRecipeBase &front() const { return Recipes.front(); } 3610 inline VPRecipeBase &front() { return Recipes.front(); } 3611 inline const VPRecipeBase &back() const { return Recipes.back(); } 3612 inline VPRecipeBase &back() { return Recipes.back(); } 3613 3614 /// Returns a reference to the list of recipes. 3615 RecipeListTy &getRecipeList() { return Recipes; } 3616 3617 /// Returns a pointer to a member of the recipe list. 3618 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) { 3619 return &VPBasicBlock::Recipes; 3620 } 3621 3622 /// Method to support type inquiry through isa, cast, and dyn_cast. 3623 static inline bool classof(const VPBlockBase *V) { 3624 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC || 3625 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC; 3626 } 3627 3628 void insert(VPRecipeBase *Recipe, iterator InsertPt) { 3629 assert(Recipe && "No recipe to append."); 3630 assert(!Recipe->Parent && "Recipe already in VPlan"); 3631 Recipe->Parent = this; 3632 Recipes.insert(InsertPt, Recipe); 3633 } 3634 3635 /// Augment the existing recipes of a VPBasicBlock with an additional 3636 /// \p Recipe as the last recipe. 3637 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); } 3638 3639 /// The method which generates the output IR instructions that correspond to 3640 /// this VPBasicBlock, thereby "executing" the VPlan. 3641 void execute(VPTransformState *State) override; 3642 3643 /// Return the cost of this VPBasicBlock. 3644 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; 3645 3646 /// Return the position of the first non-phi node recipe in the block. 3647 iterator getFirstNonPhi(); 3648 3649 /// Returns an iterator range over the PHI-like recipes in the block. 3650 iterator_range<iterator> phis() { 3651 return make_range(begin(), getFirstNonPhi()); 3652 } 3653 3654 /// Split current block at \p SplitAt by inserting a new block between the 3655 /// current block and its successors and moving all recipes starting at 3656 /// SplitAt to the new block. Returns the new block. 3657 VPBasicBlock *splitAt(iterator SplitAt); 3658 3659 VPRegionBlock *getEnclosingLoopRegion(); 3660 const VPRegionBlock *getEnclosingLoopRegion() const; 3661 3662 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3663 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p 3664 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers. 3665 /// 3666 /// Note that the numbering is applied to the whole VPlan, so printing 3667 /// individual blocks is consistent with the whole VPlan printing. 3668 void print(raw_ostream &O, const Twine &Indent, 3669 VPSlotTracker &SlotTracker) const override; 3670 using VPBlockBase::print; // Get the print(raw_stream &O) version. 3671 #endif 3672 3673 /// If the block has multiple successors, return the branch recipe terminating 3674 /// the block. If there are no or only a single successor, return nullptr; 3675 VPRecipeBase *getTerminator(); 3676 const VPRecipeBase *getTerminator() const; 3677 3678 /// Returns true if the block is exiting it's parent region. 3679 bool isExiting() const; 3680 3681 /// Clone the current block and it's recipes, without updating the operands of 3682 /// the cloned recipes. 3683 VPBasicBlock *clone() override; 3684 3685 /// Returns the predecessor block at index \p Idx with the predecessors as per 3686 /// the corresponding plain CFG. If the block is an entry block to a region, 3687 /// the first predecessor is the single predecessor of a region, and the 3688 /// second predecessor is the exiting block of the region. 3689 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const; 3690 3691 protected: 3692 /// Execute the recipes in the IR basic block \p BB. 3693 void executeRecipes(VPTransformState *State, BasicBlock *BB); 3694 3695 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block 3696 /// generated for this VPBB. 3697 void connectToPredecessors(VPTransformState &State); 3698 3699 private: 3700 /// Create an IR BasicBlock to hold the output instructions generated by this 3701 /// VPBasicBlock, and return it. Update the CFGState accordingly. 3702 BasicBlock *createEmptyBasicBlock(VPTransformState &State); 3703 }; 3704 3705 inline const VPBasicBlock * 3706 VPPhiAccessors::getIncomingBlock(unsigned Idx) const { 3707 return getAsRecipe()->getParent()->getCFGPredecessor(Idx); 3708 } 3709 3710 /// A special type of VPBasicBlock that wraps an existing IR basic block. 3711 /// Recipes of the block get added before the first non-phi instruction in the 3712 /// wrapped block. 3713 /// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's 3714 /// preheader block. 3715 class VPIRBasicBlock : public VPBasicBlock { 3716 friend class VPlan; 3717 3718 BasicBlock *IRBB; 3719 3720 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks. 3721 VPIRBasicBlock(BasicBlock *IRBB) 3722 : VPBasicBlock(VPIRBasicBlockSC, 3723 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()), 3724 IRBB(IRBB) {} 3725 3726 public: 3727 ~VPIRBasicBlock() override {} 3728 3729 static inline bool classof(const VPBlockBase *V) { 3730 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC; 3731 } 3732 3733 /// The method which generates the output IR instructions that correspond to 3734 /// this VPBasicBlock, thereby "executing" the VPlan. 3735 void execute(VPTransformState *State) override; 3736 3737 VPIRBasicBlock *clone() override; 3738 3739 BasicBlock *getIRBasicBlock() const { return IRBB; } 3740 }; 3741 3742 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks 3743 /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. 3744 /// A VPRegionBlock may indicate that its contents are to be replicated several 3745 /// times. This is designed to support predicated scalarization, in which a 3746 /// scalar if-then code structure needs to be generated VF * UF times. Having 3747 /// this replication indicator helps to keep a single model for multiple 3748 /// candidate VF's. The actual replication takes place only once the desired VF 3749 /// and UF have been determined. 3750 class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase { 3751 friend class VPlan; 3752 3753 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. 3754 VPBlockBase *Entry; 3755 3756 /// Hold the Single Exiting block of the SESE region modelled by the 3757 /// VPRegionBlock. 3758 VPBlockBase *Exiting; 3759 3760 /// An indicator whether this region is to generate multiple replicated 3761 /// instances of output IR corresponding to its VPBlockBases. 3762 bool IsReplicator; 3763 3764 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks. 3765 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, 3766 const std::string &Name = "", bool IsReplicator = false) 3767 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting), 3768 IsReplicator(IsReplicator) { 3769 assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); 3770 assert(Exiting->getSuccessors().empty() && "Exit block has successors."); 3771 Entry->setParent(this); 3772 Exiting->setParent(this); 3773 } 3774 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) 3775 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr), 3776 IsReplicator(IsReplicator) {} 3777 3778 public: 3779 ~VPRegionBlock() override {} 3780 3781 /// Method to support type inquiry through isa, cast, and dyn_cast. 3782 static inline bool classof(const VPBlockBase *V) { 3783 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC; 3784 } 3785 3786 const VPBlockBase *getEntry() const { return Entry; } 3787 VPBlockBase *getEntry() { return Entry; } 3788 3789 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p 3790 /// EntryBlock must have no predecessors. 3791 void setEntry(VPBlockBase *EntryBlock) { 3792 assert(EntryBlock->getPredecessors().empty() && 3793 "Entry block cannot have predecessors."); 3794 Entry = EntryBlock; 3795 EntryBlock->setParent(this); 3796 } 3797 3798 const VPBlockBase *getExiting() const { return Exiting; } 3799 VPBlockBase *getExiting() { return Exiting; } 3800 3801 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p 3802 /// ExitingBlock must have no successors. 3803 void setExiting(VPBlockBase *ExitingBlock) { 3804 assert(ExitingBlock->getSuccessors().empty() && 3805 "Exit block cannot have successors."); 3806 Exiting = ExitingBlock; 3807 ExitingBlock->setParent(this); 3808 } 3809 3810 /// Returns the pre-header VPBasicBlock of the loop region. 3811 VPBasicBlock *getPreheaderVPBB() { 3812 assert(!isReplicator() && "should only get pre-header of loop regions"); 3813 return getSinglePredecessor()->getExitingBasicBlock(); 3814 } 3815 3816 /// An indicator whether this region is to generate multiple replicated 3817 /// instances of output IR corresponding to its VPBlockBases. 3818 bool isReplicator() const { return IsReplicator; } 3819 3820 /// The method which generates the output IR instructions that correspond to 3821 /// this VPRegionBlock, thereby "executing" the VPlan. 3822 void execute(VPTransformState *State) override; 3823 3824 // Return the cost of this region. 3825 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; 3826 3827 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3828 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with 3829 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using 3830 /// consequtive numbers. 3831 /// 3832 /// Note that the numbering is applied to the whole VPlan, so printing 3833 /// individual regions is consistent with the whole VPlan printing. 3834 void print(raw_ostream &O, const Twine &Indent, 3835 VPSlotTracker &SlotTracker) const override; 3836 using VPBlockBase::print; // Get the print(raw_stream &O) version. 3837 #endif 3838 3839 /// Clone all blocks in the single-entry single-exit region of the block and 3840 /// their recipes without updating the operands of the cloned recipes. 3841 VPRegionBlock *clone() override; 3842 3843 /// Remove the current region from its VPlan, connecting its predecessor to 3844 /// its entry, and its exiting block to its successor. 3845 void dissolveToCFGLoop(); 3846 }; 3847 3848 /// VPlan models a candidate for vectorization, encoding various decisions take 3849 /// to produce efficient output IR, including which branches, basic-blocks and 3850 /// output IR instructions to generate, and their cost. VPlan holds a 3851 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry 3852 /// VPBasicBlock. 3853 class VPlan { 3854 friend class VPlanPrinter; 3855 friend class VPSlotTracker; 3856 3857 /// VPBasicBlock corresponding to the original preheader. Used to place 3858 /// VPExpandSCEV recipes for expressions used during skeleton creation and the 3859 /// rest of VPlan execution. 3860 /// When this VPlan is used for the epilogue vector loop, the entry will be 3861 /// replaced by a new entry block created during skeleton creation. 3862 VPBasicBlock *Entry; 3863 3864 /// VPIRBasicBlock wrapping the header of the original scalar loop. 3865 VPIRBasicBlock *ScalarHeader; 3866 3867 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original 3868 /// scalar loop. Note that some exit blocks may be unreachable at the moment, 3869 /// e.g. if the scalar epilogue always executes. 3870 SmallVector<VPIRBasicBlock *, 2> ExitBlocks; 3871 3872 /// Holds the VFs applicable to this VPlan. 3873 SmallSetVector<ElementCount, 2> VFs; 3874 3875 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for 3876 /// any UF. 3877 SmallSetVector<unsigned, 2> UFs; 3878 3879 /// Holds the name of the VPlan, for printing. 3880 std::string Name; 3881 3882 /// Represents the trip count of the original loop, for folding 3883 /// the tail. 3884 VPValue *TripCount = nullptr; 3885 3886 /// Represents the backedge taken count of the original loop, for folding 3887 /// the tail. It equals TripCount - 1. 3888 VPValue *BackedgeTakenCount = nullptr; 3889 3890 /// Represents the vector trip count. 3891 VPValue VectorTripCount; 3892 3893 /// Represents the vectorization factor of the loop. 3894 VPValue VF; 3895 3896 /// Represents the loop-invariant VF * UF of the vector loop region. 3897 VPValue VFxUF; 3898 3899 /// Holds a mapping between Values and their corresponding VPValue inside 3900 /// VPlan. 3901 Value2VPValueTy Value2VPValue; 3902 3903 /// Contains all the external definitions created for this VPlan. External 3904 /// definitions are VPValues that hold a pointer to their underlying IR. 3905 SmallVector<VPValue *, 16> VPLiveIns; 3906 3907 /// Mapping from SCEVs to the VPValues representing their expansions. 3908 /// NOTE: This mapping is temporary and will be removed once all users have 3909 /// been modeled in VPlan directly. 3910 DenseMap<const SCEV *, VPValue *> SCEVToExpansion; 3911 3912 /// Blocks allocated and owned by the VPlan. They will be deleted once the 3913 /// VPlan is destroyed. 3914 SmallVector<VPBlockBase *> CreatedBlocks; 3915 3916 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader 3917 /// wrapping the original header of the scalar loop. 3918 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader) 3919 : Entry(Entry), ScalarHeader(ScalarHeader) { 3920 Entry->setPlan(this); 3921 assert(ScalarHeader->getNumSuccessors() == 0 && 3922 "scalar header must be a leaf node"); 3923 } 3924 3925 public: 3926 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the 3927 /// original preheader and scalar header of \p L, to be used as entry and 3928 /// scalar header blocks of the new VPlan. 3929 VPlan(Loop *L); 3930 3931 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock 3932 /// wrapping \p ScalarHeaderBB and a trip count of \p TC. 3933 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) { 3934 setEntry(createVPBasicBlock("preheader")); 3935 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB); 3936 TripCount = TC; 3937 } 3938 3939 LLVM_ABI_FOR_TEST ~VPlan(); 3940 3941 void setEntry(VPBasicBlock *VPBB) { 3942 Entry = VPBB; 3943 VPBB->setPlan(this); 3944 } 3945 3946 /// Prepare the plan for execution, setting up the required live-in values. 3947 void prepareToExecute(Value *TripCount, Value *VectorTripCount, 3948 VPTransformState &State); 3949 3950 /// Generate the IR code for this VPlan. 3951 void execute(VPTransformState *State); 3952 3953 /// Return the cost of this plan. 3954 InstructionCost cost(ElementCount VF, VPCostContext &Ctx); 3955 3956 VPBasicBlock *getEntry() { return Entry; } 3957 const VPBasicBlock *getEntry() const { return Entry; } 3958 3959 /// Returns the preheader of the vector loop region, if one exists, or null 3960 /// otherwise. 3961 VPBasicBlock *getVectorPreheader() { 3962 VPRegionBlock *VectorRegion = getVectorLoopRegion(); 3963 return VectorRegion 3964 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()) 3965 : nullptr; 3966 } 3967 3968 /// Returns the VPRegionBlock of the vector loop. 3969 LLVM_ABI_FOR_TEST VPRegionBlock *getVectorLoopRegion(); 3970 LLVM_ABI_FOR_TEST const VPRegionBlock *getVectorLoopRegion() const; 3971 3972 /// Returns the 'middle' block of the plan, that is the block that selects 3973 /// whether to execute the scalar tail loop or the exit block from the loop 3974 /// latch. If there is an early exit from the vector loop, the middle block 3975 /// conceptully has the early exit block as third successor, split accross 2 3976 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar 3977 /// tail loop or the exit bock. If the scalar tail loop or exit block are 3978 /// known to always execute, the middle block may branch directly to that 3979 /// block. This function cannot be called once the vector loop region has been 3980 /// removed. 3981 VPBasicBlock *getMiddleBlock() { 3982 VPRegionBlock *LoopRegion = getVectorLoopRegion(); 3983 assert( 3984 LoopRegion && 3985 "cannot call the function after vector loop region has been removed"); 3986 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor()); 3987 if (RegionSucc->getSingleSuccessor() || 3988 is_contained(RegionSucc->getSuccessors(), getScalarPreheader())) 3989 return RegionSucc; 3990 // There is an early exit. The successor of RegionSucc is the middle block. 3991 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]); 3992 } 3993 3994 const VPBasicBlock *getMiddleBlock() const { 3995 return const_cast<VPlan *>(this)->getMiddleBlock(); 3996 } 3997 3998 /// Return the VPBasicBlock for the preheader of the scalar loop. 3999 VPBasicBlock *getScalarPreheader() const { 4000 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor()); 4001 } 4002 4003 /// Return the VPIRBasicBlock wrapping the header of the scalar loop. 4004 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; } 4005 4006 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of 4007 /// the original scalar loop. 4008 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; } 4009 4010 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an 4011 /// exit block. 4012 VPIRBasicBlock *getExitBlock(BasicBlock *IRBB) const; 4013 4014 /// Returns true if \p VPBB is an exit block. 4015 bool isExitBlock(VPBlockBase *VPBB); 4016 4017 /// The trip count of the original loop. 4018 VPValue *getTripCount() const { 4019 assert(TripCount && "trip count needs to be set before accessing it"); 4020 return TripCount; 4021 } 4022 4023 /// Set the trip count assuming it is currently null; if it is not - use 4024 /// resetTripCount(). 4025 void setTripCount(VPValue *NewTripCount) { 4026 assert(!TripCount && NewTripCount && "TripCount should not be set yet."); 4027 TripCount = NewTripCount; 4028 } 4029 4030 /// Resets the trip count for the VPlan. The caller must make sure all uses of 4031 /// the original trip count have been replaced. 4032 void resetTripCount(VPValue *NewTripCount) { 4033 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 && 4034 "TripCount must be set when resetting"); 4035 TripCount = NewTripCount; 4036 } 4037 4038 /// The backedge taken count of the original loop. 4039 VPValue *getOrCreateBackedgeTakenCount() { 4040 if (!BackedgeTakenCount) 4041 BackedgeTakenCount = new VPValue(); 4042 return BackedgeTakenCount; 4043 } 4044 4045 /// The vector trip count. 4046 VPValue &getVectorTripCount() { return VectorTripCount; } 4047 4048 /// Returns the VF of the vector loop region. 4049 VPValue &getVF() { return VF; }; 4050 4051 /// Returns VF * UF of the vector loop region. 4052 VPValue &getVFxUF() { return VFxUF; } 4053 4054 void addVF(ElementCount VF) { VFs.insert(VF); } 4055 4056 void setVF(ElementCount VF) { 4057 assert(hasVF(VF) && "Cannot set VF not already in plan"); 4058 VFs.clear(); 4059 VFs.insert(VF); 4060 } 4061 4062 bool hasVF(ElementCount VF) const { return VFs.count(VF); } 4063 bool hasScalableVF() const { 4064 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); }); 4065 } 4066 4067 /// Returns an iterator range over all VFs of the plan. 4068 iterator_range<SmallSetVector<ElementCount, 2>::iterator> 4069 vectorFactors() const { 4070 return {VFs.begin(), VFs.end()}; 4071 } 4072 4073 bool hasScalarVFOnly() const { 4074 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar(); 4075 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) && 4076 "Plan with scalar VF should only have a single VF"); 4077 return HasScalarVFOnly; 4078 } 4079 4080 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); } 4081 4082 unsigned getUF() const { 4083 assert(UFs.size() == 1 && "Expected a single UF"); 4084 return UFs[0]; 4085 } 4086 4087 void setUF(unsigned UF) { 4088 assert(hasUF(UF) && "Cannot set the UF not already in plan"); 4089 UFs.clear(); 4090 UFs.insert(UF); 4091 } 4092 4093 /// Returns true if the VPlan already has been unrolled, i.e. it has a single 4094 /// concrete UF. 4095 bool isUnrolled() const { return UFs.size() == 1; } 4096 4097 /// Return a string with the name of the plan and the applicable VFs and UFs. 4098 std::string getName() const; 4099 4100 void setName(const Twine &newName) { Name = newName.str(); } 4101 4102 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists 4103 /// yet) for \p V. 4104 VPValue *getOrAddLiveIn(Value *V) { 4105 assert(V && "Trying to get or add the VPValue of a null Value"); 4106 auto [It, Inserted] = Value2VPValue.try_emplace(V); 4107 if (Inserted) { 4108 VPValue *VPV = new VPValue(V); 4109 VPLiveIns.push_back(VPV); 4110 assert(VPV->isLiveIn() && "VPV must be a live-in."); 4111 It->second = VPV; 4112 } 4113 4114 assert(It->second->isLiveIn() && "Only live-ins should be in mapping"); 4115 return It->second; 4116 } 4117 4118 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise. 4119 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); } 4120 4121 /// Return the list of live-in VPValues available in the VPlan. 4122 ArrayRef<VPValue *> getLiveIns() const { 4123 assert(all_of(Value2VPValue, 4124 [this](const auto &P) { 4125 return is_contained(VPLiveIns, P.second); 4126 }) && 4127 "all VPValues in Value2VPValue must also be in VPLiveIns"); 4128 return VPLiveIns; 4129 } 4130 4131 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 4132 /// Print the live-ins of this VPlan to \p O. 4133 void printLiveIns(raw_ostream &O) const; 4134 4135 /// Print this VPlan to \p O. 4136 void print(raw_ostream &O) const; 4137 4138 /// Print this VPlan in DOT format to \p O. 4139 void printDOT(raw_ostream &O) const; 4140 4141 /// Dump the plan to stderr (for debugging). 4142 LLVM_DUMP_METHOD void dump() const; 4143 #endif 4144 4145 /// Returns the canonical induction recipe of the vector loop. 4146 VPCanonicalIVPHIRecipe *getCanonicalIV() { 4147 VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock(); 4148 if (EntryVPBB->empty()) { 4149 // VPlan native path. 4150 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor()); 4151 } 4152 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin()); 4153 } 4154 4155 VPValue *getSCEVExpansion(const SCEV *S) const { 4156 return SCEVToExpansion.lookup(S); 4157 } 4158 4159 void addSCEVExpansion(const SCEV *S, VPValue *V) { 4160 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded"); 4161 SCEVToExpansion[S] = V; 4162 } 4163 4164 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned 4165 /// recipes to refer to the clones, and return it. 4166 VPlan *duplicate(); 4167 4168 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if 4169 /// present. The returned block is owned by the VPlan and deleted once the 4170 /// VPlan is destroyed. 4171 VPBasicBlock *createVPBasicBlock(const Twine &Name, 4172 VPRecipeBase *Recipe = nullptr) { 4173 auto *VPB = new VPBasicBlock(Name, Recipe); 4174 CreatedBlocks.push_back(VPB); 4175 return VPB; 4176 } 4177 4178 /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p 4179 /// IsReplicator is true, the region is a replicate region. The returned block 4180 /// is owned by the VPlan and deleted once the VPlan is destroyed. 4181 VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, 4182 const std::string &Name = "", 4183 bool IsReplicator = false) { 4184 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator); 4185 CreatedBlocks.push_back(VPB); 4186 return VPB; 4187 } 4188 4189 /// Create a new VPRegionBlock with \p Name and entry and exiting blocks set 4190 /// to nullptr. If \p IsReplicator is true, the region is a replicate region. 4191 /// The returned block is owned by the VPlan and deleted once the VPlan is 4192 /// destroyed. 4193 VPRegionBlock *createVPRegionBlock(const std::string &Name = "", 4194 bool IsReplicator = false) { 4195 auto *VPB = new VPRegionBlock(Name, IsReplicator); 4196 CreatedBlocks.push_back(VPB); 4197 return VPB; 4198 } 4199 4200 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create 4201 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned 4202 /// block is owned by the VPlan and deleted once the VPlan is destroyed. 4203 VPIRBasicBlock *createEmptyVPIRBasicBlock(BasicBlock *IRBB); 4204 4205 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all 4206 /// instructions in \p IRBB, except its terminator which is managed by the 4207 /// successors of the block in VPlan. The returned block is owned by the VPlan 4208 /// and deleted once the VPlan is destroyed. 4209 LLVM_ABI_FOR_TEST VPIRBasicBlock *createVPIRBasicBlock(BasicBlock *IRBB); 4210 4211 /// Returns true if the VPlan is based on a loop with an early exit. That is 4212 /// the case if the VPlan has either more than one exit block or a single exit 4213 /// block with multiple predecessors (one for the exit via the latch and one 4214 /// via the other early exit). 4215 bool hasEarlyExit() const { 4216 return ExitBlocks.size() > 1 || 4217 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1); 4218 } 4219 4220 /// Returns true if the scalar tail may execute after the vector loop. Note 4221 /// that this relies on unneeded branches to the scalar tail loop being 4222 /// removed. 4223 bool hasScalarTail() const { 4224 return !(getScalarPreheader()->getNumPredecessors() == 0 || 4225 getScalarPreheader()->getSinglePredecessor() == getEntry()); 4226 } 4227 }; 4228 4229 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 4230 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) { 4231 Plan.print(OS); 4232 return OS; 4233 } 4234 #endif 4235 4236 } // end namespace llvm 4237 4238 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 4239