1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file contains the declarations of the Vectorization Plan base classes: 11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual 12 /// VPBlockBase, together implementing a Hierarchical CFG; 13 /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained 14 /// within VPBasicBlocks; 15 /// 3. VPInstruction, a concrete Recipe and VPUser modeling a single planned 16 /// instruction; 17 /// 4. The VPlan class holding a candidate for vectorization; 18 /// 5. The VPlanPrinter class providing a way to print a plan in dot format; 19 /// These are documented in docs/VectorizationPlan.rst. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 24 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 25 26 #include "VPlanValue.h" 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/DepthFirstIterator.h" 29 #include "llvm/ADT/MapVector.h" 30 #include "llvm/ADT/SmallBitVector.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Twine.h" 34 #include "llvm/ADT/ilist.h" 35 #include "llvm/ADT/ilist_node.h" 36 #include "llvm/Analysis/LoopInfo.h" 37 #include "llvm/Analysis/VectorUtils.h" 38 #include "llvm/IR/DebugLoc.h" 39 #include "llvm/IR/FMF.h" 40 #include "llvm/Transforms/Utils/LoopVersioning.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <cstddef> 44 #include <string> 45 46 namespace llvm { 47 48 class BasicBlock; 49 class DominatorTree; 50 class InductionDescriptor; 51 class InnerLoopVectorizer; 52 class IRBuilderBase; 53 class LoopInfo; 54 class PredicateScalarEvolution; 55 class raw_ostream; 56 class RecurrenceDescriptor; 57 class SCEV; 58 class Type; 59 class VPBasicBlock; 60 class VPRegionBlock; 61 class VPlan; 62 class VPReplicateRecipe; 63 class VPlanSlp; 64 class Value; 65 66 namespace Intrinsic { 67 typedef unsigned ID; 68 } 69 70 /// Returns a calculation for the total number of elements for a given \p VF. 71 /// For fixed width vectors this value is a constant, whereas for scalable 72 /// vectors it is an expression determined at runtime. 73 Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF); 74 75 /// Return a value for Step multiplied by VF. 76 Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, 77 int64_t Step); 78 79 const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE); 80 81 /// A range of powers-of-2 vectorization factors with fixed start and 82 /// adjustable end. The range includes start and excludes end, e.g.,: 83 /// [1, 9) = {1, 2, 4, 8} 84 struct VFRange { 85 // A power of 2. 86 const ElementCount Start; 87 88 // Need not be a power of 2. If End <= Start range is empty. 89 ElementCount End; 90 91 bool isEmpty() const { 92 return End.getKnownMinValue() <= Start.getKnownMinValue(); 93 } 94 95 VFRange(const ElementCount &Start, const ElementCount &End) 96 : Start(Start), End(End) { 97 assert(Start.isScalable() == End.isScalable() && 98 "Both Start and End should have the same scalable flag"); 99 assert(isPowerOf2_32(Start.getKnownMinValue()) && 100 "Expected Start to be a power of 2"); 101 } 102 }; 103 104 using VPlanPtr = std::unique_ptr<VPlan>; 105 106 /// In what follows, the term "input IR" refers to code that is fed into the 107 /// vectorizer whereas the term "output IR" refers to code that is generated by 108 /// the vectorizer. 109 110 /// VPLane provides a way to access lanes in both fixed width and scalable 111 /// vectors, where for the latter the lane index sometimes needs calculating 112 /// as a runtime expression. 113 class VPLane { 114 public: 115 /// Kind describes how to interpret Lane. 116 enum class Kind : uint8_t { 117 /// For First, Lane is the index into the first N elements of a 118 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>. 119 First, 120 /// For ScalableLast, Lane is the offset from the start of the last 121 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For 122 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of 123 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc. 124 ScalableLast 125 }; 126 127 private: 128 /// in [0..VF) 129 unsigned Lane; 130 131 /// Indicates how the Lane should be interpreted, as described above. 132 Kind LaneKind; 133 134 public: 135 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {} 136 137 static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); } 138 139 static VPLane getLastLaneForVF(const ElementCount &VF) { 140 unsigned LaneOffset = VF.getKnownMinValue() - 1; 141 Kind LaneKind; 142 if (VF.isScalable()) 143 // In this case 'LaneOffset' refers to the offset from the start of the 144 // last subvector with VF.getKnownMinValue() elements. 145 LaneKind = VPLane::Kind::ScalableLast; 146 else 147 LaneKind = VPLane::Kind::First; 148 return VPLane(LaneOffset, LaneKind); 149 } 150 151 /// Returns a compile-time known value for the lane index and asserts if the 152 /// lane can only be calculated at runtime. 153 unsigned getKnownLane() const { 154 assert(LaneKind == Kind::First); 155 return Lane; 156 } 157 158 /// Returns an expression describing the lane index that can be used at 159 /// runtime. 160 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const; 161 162 /// Returns the Kind of lane offset. 163 Kind getKind() const { return LaneKind; } 164 165 /// Returns true if this is the first lane of the whole vector. 166 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; } 167 168 /// Maps the lane to a cache index based on \p VF. 169 unsigned mapToCacheIndex(const ElementCount &VF) const { 170 switch (LaneKind) { 171 case VPLane::Kind::ScalableLast: 172 assert(VF.isScalable() && Lane < VF.getKnownMinValue()); 173 return VF.getKnownMinValue() + Lane; 174 default: 175 assert(Lane < VF.getKnownMinValue()); 176 return Lane; 177 } 178 } 179 180 /// Returns the maxmimum number of lanes that we are able to consider 181 /// caching for \p VF. 182 static unsigned getNumCachedLanes(const ElementCount &VF) { 183 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1); 184 } 185 }; 186 187 /// VPIteration represents a single point in the iteration space of the output 188 /// (vectorized and/or unrolled) IR loop. 189 struct VPIteration { 190 /// in [0..UF) 191 unsigned Part; 192 193 VPLane Lane; 194 195 VPIteration(unsigned Part, unsigned Lane, 196 VPLane::Kind Kind = VPLane::Kind::First) 197 : Part(Part), Lane(Lane, Kind) {} 198 199 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {} 200 201 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); } 202 }; 203 204 /// VPTransformState holds information passed down when "executing" a VPlan, 205 /// needed for generating the output IR. 206 struct VPTransformState { 207 VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, 208 DominatorTree *DT, IRBuilderBase &Builder, 209 InnerLoopVectorizer *ILV, VPlan *Plan) 210 : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan), 211 LVer(nullptr) {} 212 213 /// The chosen Vectorization and Unroll Factors of the loop being vectorized. 214 ElementCount VF; 215 unsigned UF; 216 217 /// Hold the indices to generate specific scalar instructions. Null indicates 218 /// that all instances are to be generated, using either scalar or vector 219 /// instructions. 220 std::optional<VPIteration> Instance; 221 222 struct DataState { 223 /// A type for vectorized values in the new loop. Each value from the 224 /// original loop, when vectorized, is represented by UF vector values in 225 /// the new unrolled loop, where UF is the unroll factor. 226 typedef SmallVector<Value *, 2> PerPartValuesTy; 227 228 DenseMap<VPValue *, PerPartValuesTy> PerPartOutput; 229 230 using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>; 231 DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars; 232 } Data; 233 234 /// Get the generated Value for a given VPValue and a given Part. Note that 235 /// as some Defs are still created by ILV and managed in its ValueMap, this 236 /// method will delegate the call to ILV in such cases in order to provide 237 /// callers a consistent API. 238 /// \see set. 239 Value *get(VPValue *Def, unsigned Part); 240 241 /// Get the generated Value for a given VPValue and given Part and Lane. 242 Value *get(VPValue *Def, const VPIteration &Instance); 243 244 bool hasVectorValue(VPValue *Def, unsigned Part) { 245 auto I = Data.PerPartOutput.find(Def); 246 return I != Data.PerPartOutput.end() && Part < I->second.size() && 247 I->second[Part]; 248 } 249 250 bool hasAnyVectorValue(VPValue *Def) const { 251 return Data.PerPartOutput.find(Def) != Data.PerPartOutput.end(); 252 } 253 254 bool hasScalarValue(VPValue *Def, VPIteration Instance) { 255 auto I = Data.PerPartScalars.find(Def); 256 if (I == Data.PerPartScalars.end()) 257 return false; 258 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); 259 return Instance.Part < I->second.size() && 260 CacheIdx < I->second[Instance.Part].size() && 261 I->second[Instance.Part][CacheIdx]; 262 } 263 264 /// Set the generated Value for a given VPValue and a given Part. 265 void set(VPValue *Def, Value *V, unsigned Part) { 266 if (!Data.PerPartOutput.count(Def)) { 267 DataState::PerPartValuesTy Entry(UF); 268 Data.PerPartOutput[Def] = Entry; 269 } 270 Data.PerPartOutput[Def][Part] = V; 271 } 272 /// Reset an existing vector value for \p Def and a given \p Part. 273 void reset(VPValue *Def, Value *V, unsigned Part) { 274 auto Iter = Data.PerPartOutput.find(Def); 275 assert(Iter != Data.PerPartOutput.end() && 276 "need to overwrite existing value"); 277 Iter->second[Part] = V; 278 } 279 280 /// Set the generated scalar \p V for \p Def and the given \p Instance. 281 void set(VPValue *Def, Value *V, const VPIteration &Instance) { 282 auto Iter = Data.PerPartScalars.insert({Def, {}}); 283 auto &PerPartVec = Iter.first->second; 284 while (PerPartVec.size() <= Instance.Part) 285 PerPartVec.emplace_back(); 286 auto &Scalars = PerPartVec[Instance.Part]; 287 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); 288 while (Scalars.size() <= CacheIdx) 289 Scalars.push_back(nullptr); 290 assert(!Scalars[CacheIdx] && "should overwrite existing value"); 291 Scalars[CacheIdx] = V; 292 } 293 294 /// Reset an existing scalar value for \p Def and a given \p Instance. 295 void reset(VPValue *Def, Value *V, const VPIteration &Instance) { 296 auto Iter = Data.PerPartScalars.find(Def); 297 assert(Iter != Data.PerPartScalars.end() && 298 "need to overwrite existing value"); 299 assert(Instance.Part < Iter->second.size() && 300 "need to overwrite existing value"); 301 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); 302 assert(CacheIdx < Iter->second[Instance.Part].size() && 303 "need to overwrite existing value"); 304 Iter->second[Instance.Part][CacheIdx] = V; 305 } 306 307 /// Add additional metadata to \p To that was not present on \p Orig. 308 /// 309 /// Currently this is used to add the noalias annotations based on the 310 /// inserted memchecks. Use this for instructions that are *cloned* into the 311 /// vector loop. 312 void addNewMetadata(Instruction *To, const Instruction *Orig); 313 314 /// Add metadata from one instruction to another. 315 /// 316 /// This includes both the original MDs from \p From and additional ones (\see 317 /// addNewMetadata). Use this for *newly created* instructions in the vector 318 /// loop. 319 void addMetadata(Instruction *To, Instruction *From); 320 321 /// Similar to the previous function but it adds the metadata to a 322 /// vector of instructions. 323 void addMetadata(ArrayRef<Value *> To, Instruction *From); 324 325 /// Set the debug location in the builder using the debug location in \p V. 326 void setDebugLocFromInst(const Value *V); 327 328 /// Hold state information used when constructing the CFG of the output IR, 329 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. 330 struct CFGState { 331 /// The previous VPBasicBlock visited. Initially set to null. 332 VPBasicBlock *PrevVPBB = nullptr; 333 334 /// The previous IR BasicBlock created or used. Initially set to the new 335 /// header BasicBlock. 336 BasicBlock *PrevBB = nullptr; 337 338 /// The last IR BasicBlock in the output IR. Set to the exit block of the 339 /// vector loop. 340 BasicBlock *ExitBB = nullptr; 341 342 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case 343 /// of replication, maps the BasicBlock of the last replica created. 344 SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB; 345 346 CFGState() = default; 347 348 /// Returns the BasicBlock* mapped to the pre-header of the loop region 349 /// containing \p R. 350 BasicBlock *getPreheaderBBFor(VPRecipeBase *R); 351 } CFG; 352 353 /// Hold a pointer to LoopInfo to register new basic blocks in the loop. 354 LoopInfo *LI; 355 356 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. 357 DominatorTree *DT; 358 359 /// Hold a reference to the IRBuilder used to generate output IR code. 360 IRBuilderBase &Builder; 361 362 VPValue2ValueTy VPValue2Value; 363 364 /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF). 365 Value *CanonicalIV = nullptr; 366 367 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods. 368 InnerLoopVectorizer *ILV; 369 370 /// Pointer to the VPlan code is generated for. 371 VPlan *Plan; 372 373 /// Holds recipes that may generate a poison value that is used after 374 /// vectorization, even when their operands are not poison. 375 SmallPtrSet<VPRecipeBase *, 16> MayGeneratePoisonRecipes; 376 377 /// The loop object for the current parent region, or nullptr. 378 Loop *CurrentVectorLoop = nullptr; 379 380 /// LoopVersioning. It's only set up (non-null) if memchecks were 381 /// used. 382 /// 383 /// This is currently only used to add no-alias metadata based on the 384 /// memchecks. The actually versioning is performed manually. 385 std::unique_ptr<LoopVersioning> LVer; 386 }; 387 388 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. 389 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. 390 class VPBlockBase { 391 friend class VPBlockUtils; 392 393 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). 394 395 /// An optional name for the block. 396 std::string Name; 397 398 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if 399 /// it is a topmost VPBlockBase. 400 VPRegionBlock *Parent = nullptr; 401 402 /// List of predecessor blocks. 403 SmallVector<VPBlockBase *, 1> Predecessors; 404 405 /// List of successor blocks. 406 SmallVector<VPBlockBase *, 1> Successors; 407 408 /// VPlan containing the block. Can only be set on the entry block of the 409 /// plan. 410 VPlan *Plan = nullptr; 411 412 /// Add \p Successor as the last successor to this block. 413 void appendSuccessor(VPBlockBase *Successor) { 414 assert(Successor && "Cannot add nullptr successor!"); 415 Successors.push_back(Successor); 416 } 417 418 /// Add \p Predecessor as the last predecessor to this block. 419 void appendPredecessor(VPBlockBase *Predecessor) { 420 assert(Predecessor && "Cannot add nullptr predecessor!"); 421 Predecessors.push_back(Predecessor); 422 } 423 424 /// Remove \p Predecessor from the predecessors of this block. 425 void removePredecessor(VPBlockBase *Predecessor) { 426 auto Pos = find(Predecessors, Predecessor); 427 assert(Pos && "Predecessor does not exist"); 428 Predecessors.erase(Pos); 429 } 430 431 /// Remove \p Successor from the successors of this block. 432 void removeSuccessor(VPBlockBase *Successor) { 433 auto Pos = find(Successors, Successor); 434 assert(Pos && "Successor does not exist"); 435 Successors.erase(Pos); 436 } 437 438 protected: 439 VPBlockBase(const unsigned char SC, const std::string &N) 440 : SubclassID(SC), Name(N) {} 441 442 public: 443 /// An enumeration for keeping track of the concrete subclass of VPBlockBase 444 /// that are actually instantiated. Values of this enumeration are kept in the 445 /// SubclassID field of the VPBlockBase objects. They are used for concrete 446 /// type identification. 447 using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC }; 448 449 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>; 450 451 virtual ~VPBlockBase() = default; 452 453 const std::string &getName() const { return Name; } 454 455 void setName(const Twine &newName) { Name = newName.str(); } 456 457 /// \return an ID for the concrete type of this object. 458 /// This is used to implement the classof checks. This should not be used 459 /// for any other purpose, as the values may change as LLVM evolves. 460 unsigned getVPBlockID() const { return SubclassID; } 461 462 VPRegionBlock *getParent() { return Parent; } 463 const VPRegionBlock *getParent() const { return Parent; } 464 465 /// \return A pointer to the plan containing the current block. 466 VPlan *getPlan(); 467 const VPlan *getPlan() const; 468 469 /// Sets the pointer of the plan containing the block. The block must be the 470 /// entry block into the VPlan. 471 void setPlan(VPlan *ParentPlan); 472 473 void setParent(VPRegionBlock *P) { Parent = P; } 474 475 /// \return the VPBasicBlock that is the entry of this VPBlockBase, 476 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 477 /// VPBlockBase is a VPBasicBlock, it is returned. 478 const VPBasicBlock *getEntryBasicBlock() const; 479 VPBasicBlock *getEntryBasicBlock(); 480 481 /// \return the VPBasicBlock that is the exiting this VPBlockBase, 482 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 483 /// VPBlockBase is a VPBasicBlock, it is returned. 484 const VPBasicBlock *getExitingBasicBlock() const; 485 VPBasicBlock *getExitingBasicBlock(); 486 487 const VPBlocksTy &getSuccessors() const { return Successors; } 488 VPBlocksTy &getSuccessors() { return Successors; } 489 490 iterator_range<VPBlockBase **> successors() { return Successors; } 491 492 const VPBlocksTy &getPredecessors() const { return Predecessors; } 493 VPBlocksTy &getPredecessors() { return Predecessors; } 494 495 /// \return the successor of this VPBlockBase if it has a single successor. 496 /// Otherwise return a null pointer. 497 VPBlockBase *getSingleSuccessor() const { 498 return (Successors.size() == 1 ? *Successors.begin() : nullptr); 499 } 500 501 /// \return the predecessor of this VPBlockBase if it has a single 502 /// predecessor. Otherwise return a null pointer. 503 VPBlockBase *getSinglePredecessor() const { 504 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr); 505 } 506 507 size_t getNumSuccessors() const { return Successors.size(); } 508 size_t getNumPredecessors() const { return Predecessors.size(); } 509 510 /// An Enclosing Block of a block B is any block containing B, including B 511 /// itself. \return the closest enclosing block starting from "this", which 512 /// has successors. \return the root enclosing block if all enclosing blocks 513 /// have no successors. 514 VPBlockBase *getEnclosingBlockWithSuccessors(); 515 516 /// \return the closest enclosing block starting from "this", which has 517 /// predecessors. \return the root enclosing block if all enclosing blocks 518 /// have no predecessors. 519 VPBlockBase *getEnclosingBlockWithPredecessors(); 520 521 /// \return the successors either attached directly to this VPBlockBase or, if 522 /// this VPBlockBase is the exit block of a VPRegionBlock and has no 523 /// successors of its own, search recursively for the first enclosing 524 /// VPRegionBlock that has successors and return them. If no such 525 /// VPRegionBlock exists, return the (empty) successors of the topmost 526 /// VPBlockBase reached. 527 const VPBlocksTy &getHierarchicalSuccessors() { 528 return getEnclosingBlockWithSuccessors()->getSuccessors(); 529 } 530 531 /// \return the hierarchical successor of this VPBlockBase if it has a single 532 /// hierarchical successor. Otherwise return a null pointer. 533 VPBlockBase *getSingleHierarchicalSuccessor() { 534 return getEnclosingBlockWithSuccessors()->getSingleSuccessor(); 535 } 536 537 /// \return the predecessors either attached directly to this VPBlockBase or, 538 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no 539 /// predecessors of its own, search recursively for the first enclosing 540 /// VPRegionBlock that has predecessors and return them. If no such 541 /// VPRegionBlock exists, return the (empty) predecessors of the topmost 542 /// VPBlockBase reached. 543 const VPBlocksTy &getHierarchicalPredecessors() { 544 return getEnclosingBlockWithPredecessors()->getPredecessors(); 545 } 546 547 /// \return the hierarchical predecessor of this VPBlockBase if it has a 548 /// single hierarchical predecessor. Otherwise return a null pointer. 549 VPBlockBase *getSingleHierarchicalPredecessor() { 550 return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); 551 } 552 553 /// Set a given VPBlockBase \p Successor as the single successor of this 554 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor. 555 /// This VPBlockBase must have no successors. 556 void setOneSuccessor(VPBlockBase *Successor) { 557 assert(Successors.empty() && "Setting one successor when others exist."); 558 appendSuccessor(Successor); 559 } 560 561 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two 562 /// successors of this VPBlockBase. This VPBlockBase is not added as 563 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no 564 /// successors. 565 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) { 566 assert(Successors.empty() && "Setting two successors when others exist."); 567 appendSuccessor(IfTrue); 568 appendSuccessor(IfFalse); 569 } 570 571 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase. 572 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added 573 /// as successor of any VPBasicBlock in \p NewPreds. 574 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) { 575 assert(Predecessors.empty() && "Block predecessors already set."); 576 for (auto *Pred : NewPreds) 577 appendPredecessor(Pred); 578 } 579 580 /// Remove all the predecessor of this block. 581 void clearPredecessors() { Predecessors.clear(); } 582 583 /// Remove all the successors of this block. 584 void clearSuccessors() { Successors.clear(); } 585 586 /// The method which generates the output IR that correspond to this 587 /// VPBlockBase, thereby "executing" the VPlan. 588 virtual void execute(VPTransformState *State) = 0; 589 590 /// Delete all blocks reachable from a given VPBlockBase, inclusive. 591 static void deleteCFG(VPBlockBase *Entry); 592 593 /// Return true if it is legal to hoist instructions into this block. 594 bool isLegalToHoistInto() { 595 // There are currently no constraints that prevent an instruction to be 596 // hoisted into a VPBlockBase. 597 return true; 598 } 599 600 /// Replace all operands of VPUsers in the block with \p NewValue and also 601 /// replaces all uses of VPValues defined in the block with NewValue. 602 virtual void dropAllReferences(VPValue *NewValue) = 0; 603 604 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 605 void printAsOperand(raw_ostream &OS, bool PrintType) const { 606 OS << getName(); 607 } 608 609 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines 610 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using 611 /// consequtive numbers. 612 /// 613 /// Note that the numbering is applied to the whole VPlan, so printing 614 /// individual blocks is consistent with the whole VPlan printing. 615 virtual void print(raw_ostream &O, const Twine &Indent, 616 VPSlotTracker &SlotTracker) const = 0; 617 618 /// Print plain-text dump of this VPlan to \p O. 619 void print(raw_ostream &O) const { 620 VPSlotTracker SlotTracker(getPlan()); 621 print(O, "", SlotTracker); 622 } 623 624 /// Print the successors of this block to \p O, prefixing all lines with \p 625 /// Indent. 626 void printSuccessors(raw_ostream &O, const Twine &Indent) const; 627 628 /// Dump this VPBlockBase to dbgs(). 629 LLVM_DUMP_METHOD void dump() const { print(dbgs()); } 630 #endif 631 }; 632 633 /// A value that is used outside the VPlan. The operand of the user needs to be 634 /// added to the associated LCSSA phi node. 635 class VPLiveOut : public VPUser { 636 PHINode *Phi; 637 638 public: 639 VPLiveOut(PHINode *Phi, VPValue *Op) 640 : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {} 641 642 /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply 643 /// means we need to add the appropriate incoming value from the middle 644 /// block as exiting edges from the scalar epilogue loop (if present) are 645 /// already in place, and we exit the vector loop exclusively to the middle 646 /// block. 647 void fixPhi(VPlan &Plan, VPTransformState &State); 648 649 /// Returns true if the VPLiveOut uses scalars of operand \p Op. 650 bool usesScalars(const VPValue *Op) const override { 651 assert(is_contained(operands(), Op) && 652 "Op must be an operand of the recipe"); 653 return true; 654 } 655 656 PHINode *getPhi() const { return Phi; } 657 }; 658 659 /// VPRecipeBase is a base class modeling a sequence of one or more output IR 660 /// instructions. VPRecipeBase owns the the VPValues it defines through VPDef 661 /// and is responsible for deleting its defined values. Single-value 662 /// VPRecipeBases that also inherit from VPValue must make sure to inherit from 663 /// VPRecipeBase before VPValue. 664 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>, 665 public VPDef, 666 public VPUser { 667 friend VPBasicBlock; 668 friend class VPBlockUtils; 669 670 /// Each VPRecipe belongs to a single VPBasicBlock. 671 VPBasicBlock *Parent = nullptr; 672 673 public: 674 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands) 675 : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {} 676 677 template <typename IterT> 678 VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands) 679 : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {} 680 virtual ~VPRecipeBase() = default; 681 682 /// \return the VPBasicBlock which this VPRecipe belongs to. 683 VPBasicBlock *getParent() { return Parent; } 684 const VPBasicBlock *getParent() const { return Parent; } 685 686 /// The method which generates the output IR instructions that correspond to 687 /// this VPRecipe, thereby "executing" the VPlan. 688 virtual void execute(VPTransformState &State) = 0; 689 690 /// Insert an unlinked recipe into a basic block immediately before 691 /// the specified recipe. 692 void insertBefore(VPRecipeBase *InsertPos); 693 /// Insert an unlinked recipe into \p BB immediately before the insertion 694 /// point \p IP; 695 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP); 696 697 /// Insert an unlinked Recipe into a basic block immediately after 698 /// the specified Recipe. 699 void insertAfter(VPRecipeBase *InsertPos); 700 701 /// Unlink this recipe from its current VPBasicBlock and insert it into 702 /// the VPBasicBlock that MovePos lives in, right after MovePos. 703 void moveAfter(VPRecipeBase *MovePos); 704 705 /// Unlink this recipe and insert into BB before I. 706 /// 707 /// \pre I is a valid iterator into BB. 708 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I); 709 710 /// This method unlinks 'this' from the containing basic block, but does not 711 /// delete it. 712 void removeFromParent(); 713 714 /// This method unlinks 'this' from the containing basic block and deletes it. 715 /// 716 /// \returns an iterator pointing to the element after the erased one 717 iplist<VPRecipeBase>::iterator eraseFromParent(); 718 719 /// Returns the underlying instruction, if the recipe is a VPValue or nullptr 720 /// otherwise. 721 Instruction *getUnderlyingInstr() { 722 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()); 723 } 724 const Instruction *getUnderlyingInstr() const { 725 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()); 726 } 727 728 /// Method to support type inquiry through isa, cast, and dyn_cast. 729 static inline bool classof(const VPDef *D) { 730 // All VPDefs are also VPRecipeBases. 731 return true; 732 } 733 734 static inline bool classof(const VPUser *U) { 735 return U->getVPUserID() == VPUser::VPUserID::Recipe; 736 } 737 738 /// Returns true if the recipe may have side-effects. 739 bool mayHaveSideEffects() const; 740 741 /// Returns true for PHI-like recipes. 742 bool isPhi() const { 743 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC; 744 } 745 746 /// Returns true if the recipe may read from memory. 747 bool mayReadFromMemory() const; 748 749 /// Returns true if the recipe may write to memory. 750 bool mayWriteToMemory() const; 751 752 /// Returns true if the recipe may read from or write to memory. 753 bool mayReadOrWriteMemory() const { 754 return mayReadFromMemory() || mayWriteToMemory(); 755 } 756 }; 757 758 // Helper macro to define common classof implementations for recipes. 759 #define VP_CLASSOF_IMPL(VPDefID) \ 760 static inline bool classof(const VPDef *D) { \ 761 return D->getVPDefID() == VPDefID; \ 762 } \ 763 static inline bool classof(const VPValue *V) { \ 764 auto *R = V->getDefiningRecipe(); \ 765 return R && R->getVPDefID() == VPDefID; \ 766 } \ 767 static inline bool classof(const VPUser *U) { \ 768 auto *R = dyn_cast<VPRecipeBase>(U); \ 769 return R && R->getVPDefID() == VPDefID; \ 770 } \ 771 static inline bool classof(const VPRecipeBase *R) { \ 772 return R->getVPDefID() == VPDefID; \ 773 } 774 775 /// This is a concrete Recipe that models a single VPlan-level instruction. 776 /// While as any Recipe it may generate a sequence of IR instructions when 777 /// executed, these instructions would always form a single-def expression as 778 /// the VPInstruction is also a single def-use vertex. 779 class VPInstruction : public VPRecipeBase, public VPValue { 780 friend class VPlanSlp; 781 782 public: 783 /// VPlan opcodes, extending LLVM IR with idiomatics instructions. 784 enum { 785 FirstOrderRecurrenceSplice = 786 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous 787 // values of a first-order recurrence. 788 Not, 789 ICmpULE, 790 SLPLoad, 791 SLPStore, 792 ActiveLaneMask, 793 CanonicalIVIncrement, 794 CanonicalIVIncrementNUW, 795 // The next two are similar to the above, but instead increment the 796 // canonical IV separately for each unrolled part. 797 CanonicalIVIncrementForPart, 798 CanonicalIVIncrementForPartNUW, 799 BranchOnCount, 800 BranchOnCond 801 }; 802 803 private: 804 typedef unsigned char OpcodeTy; 805 OpcodeTy Opcode; 806 FastMathFlags FMF; 807 DebugLoc DL; 808 809 /// An optional name that can be used for the generated IR instruction. 810 const std::string Name; 811 812 /// Utility method serving execute(): generates a single instance of the 813 /// modeled instruction. 814 void generateInstruction(VPTransformState &State, unsigned Part); 815 816 protected: 817 void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); } 818 819 public: 820 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL, 821 const Twine &Name = "") 822 : VPRecipeBase(VPDef::VPInstructionSC, Operands), VPValue(this), 823 Opcode(Opcode), DL(DL), Name(Name.str()) {} 824 825 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, 826 DebugLoc DL = {}, const Twine &Name = "") 827 : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {} 828 829 VP_CLASSOF_IMPL(VPDef::VPInstructionSC) 830 831 VPInstruction *clone() const { 832 SmallVector<VPValue *, 2> Operands(operands()); 833 return new VPInstruction(Opcode, Operands, DL, Name); 834 } 835 836 unsigned getOpcode() const { return Opcode; } 837 838 /// Generate the instruction. 839 /// TODO: We currently execute only per-part unless a specific instance is 840 /// provided. 841 void execute(VPTransformState &State) override; 842 843 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 844 /// Print the VPInstruction to \p O. 845 void print(raw_ostream &O, const Twine &Indent, 846 VPSlotTracker &SlotTracker) const override; 847 848 /// Print the VPInstruction to dbgs() (for debugging). 849 LLVM_DUMP_METHOD void dump() const; 850 #endif 851 852 /// Return true if this instruction may modify memory. 853 bool mayWriteToMemory() const { 854 // TODO: we can use attributes of the called function to rule out memory 855 // modifications. 856 return Opcode == Instruction::Store || Opcode == Instruction::Call || 857 Opcode == Instruction::Invoke || Opcode == SLPStore; 858 } 859 860 bool hasResult() const { 861 // CallInst may or may not have a result, depending on the called function. 862 // Conservatively return calls have results for now. 863 switch (getOpcode()) { 864 case Instruction::Ret: 865 case Instruction::Br: 866 case Instruction::Store: 867 case Instruction::Switch: 868 case Instruction::IndirectBr: 869 case Instruction::Resume: 870 case Instruction::CatchRet: 871 case Instruction::Unreachable: 872 case Instruction::Fence: 873 case Instruction::AtomicRMW: 874 case VPInstruction::BranchOnCond: 875 case VPInstruction::BranchOnCount: 876 return false; 877 default: 878 return true; 879 } 880 } 881 882 /// Set the fast-math flags. 883 void setFastMathFlags(FastMathFlags FMFNew); 884 885 /// Returns true if the recipe only uses the first lane of operand \p Op. 886 bool onlyFirstLaneUsed(const VPValue *Op) const override { 887 assert(is_contained(operands(), Op) && 888 "Op must be an operand of the recipe"); 889 if (getOperand(0) != Op) 890 return false; 891 switch (getOpcode()) { 892 default: 893 return false; 894 case VPInstruction::ActiveLaneMask: 895 case VPInstruction::CanonicalIVIncrement: 896 case VPInstruction::CanonicalIVIncrementNUW: 897 case VPInstruction::CanonicalIVIncrementForPart: 898 case VPInstruction::CanonicalIVIncrementForPartNUW: 899 case VPInstruction::BranchOnCount: 900 return true; 901 }; 902 llvm_unreachable("switch should return"); 903 } 904 }; 905 906 /// VPWidenRecipe is a recipe for producing a copy of vector type its 907 /// ingredient. This recipe covers most of the traditional vectorization cases 908 /// where each ingredient transforms into a vectorized version of itself. 909 class VPWidenRecipe : public VPRecipeBase, public VPValue { 910 public: 911 template <typename IterT> 912 VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands) 913 : VPRecipeBase(VPDef::VPWidenSC, Operands), VPValue(this, &I) {} 914 915 ~VPWidenRecipe() override = default; 916 917 VP_CLASSOF_IMPL(VPDef::VPWidenSC) 918 919 /// Produce widened copies of all Ingredients. 920 void execute(VPTransformState &State) override; 921 922 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 923 /// Print the recipe. 924 void print(raw_ostream &O, const Twine &Indent, 925 VPSlotTracker &SlotTracker) const override; 926 #endif 927 }; 928 929 /// A recipe for widening Call instructions. 930 class VPWidenCallRecipe : public VPRecipeBase, public VPValue { 931 /// ID of the vector intrinsic to call when widening the call. If set the 932 /// Intrinsic::not_intrinsic, a library call will be used instead. 933 Intrinsic::ID VectorIntrinsicID; 934 935 public: 936 template <typename IterT> 937 VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments, 938 Intrinsic::ID VectorIntrinsicID) 939 : VPRecipeBase(VPDef::VPWidenCallSC, CallArguments), VPValue(this, &I), 940 VectorIntrinsicID(VectorIntrinsicID) {} 941 942 ~VPWidenCallRecipe() override = default; 943 944 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC) 945 946 /// Produce a widened version of the call instruction. 947 void execute(VPTransformState &State) override; 948 949 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 950 /// Print the recipe. 951 void print(raw_ostream &O, const Twine &Indent, 952 VPSlotTracker &SlotTracker) const override; 953 #endif 954 }; 955 956 /// A recipe for widening select instructions. 957 class VPWidenSelectRecipe : public VPRecipeBase, public VPValue { 958 959 /// Is the condition of the select loop invariant? 960 bool InvariantCond; 961 962 public: 963 template <typename IterT> 964 VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands, 965 bool InvariantCond) 966 : VPRecipeBase(VPDef::VPWidenSelectSC, Operands), VPValue(this, &I), 967 InvariantCond(InvariantCond) {} 968 969 ~VPWidenSelectRecipe() override = default; 970 971 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC) 972 973 /// Produce a widened version of the select instruction. 974 void execute(VPTransformState &State) override; 975 976 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 977 /// Print the recipe. 978 void print(raw_ostream &O, const Twine &Indent, 979 VPSlotTracker &SlotTracker) const override; 980 #endif 981 }; 982 983 /// A recipe for handling GEP instructions. 984 class VPWidenGEPRecipe : public VPRecipeBase, public VPValue { 985 bool IsPtrLoopInvariant; 986 SmallBitVector IsIndexLoopInvariant; 987 988 public: 989 template <typename IterT> 990 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands) 991 : VPRecipeBase(VPDef::VPWidenGEPSC, Operands), VPValue(this, GEP), 992 IsIndexLoopInvariant(GEP->getNumIndices(), false) {} 993 994 template <typename IterT> 995 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands, 996 Loop *OrigLoop) 997 : VPRecipeBase(VPDef::VPWidenGEPSC, Operands), VPValue(this, GEP), 998 IsIndexLoopInvariant(GEP->getNumIndices(), false) { 999 IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand()); 1000 for (auto Index : enumerate(GEP->indices())) 1001 IsIndexLoopInvariant[Index.index()] = 1002 OrigLoop->isLoopInvariant(Index.value().get()); 1003 } 1004 ~VPWidenGEPRecipe() override = default; 1005 1006 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC) 1007 1008 /// Generate the gep nodes. 1009 void execute(VPTransformState &State) override; 1010 1011 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1012 /// Print the recipe. 1013 void print(raw_ostream &O, const Twine &Indent, 1014 VPSlotTracker &SlotTracker) const override; 1015 #endif 1016 }; 1017 1018 /// A recipe for handling phi nodes of integer and floating-point inductions, 1019 /// producing their vector values. 1020 class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { 1021 PHINode *IV; 1022 const InductionDescriptor &IndDesc; 1023 bool NeedsVectorIV; 1024 1025 public: 1026 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, 1027 const InductionDescriptor &IndDesc, 1028 bool NeedsVectorIV) 1029 : VPRecipeBase(VPDef::VPWidenIntOrFpInductionSC, {Start, Step}), 1030 VPValue(this, IV), IV(IV), IndDesc(IndDesc), 1031 NeedsVectorIV(NeedsVectorIV) {} 1032 1033 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, 1034 const InductionDescriptor &IndDesc, 1035 TruncInst *Trunc, bool NeedsVectorIV) 1036 : VPRecipeBase(VPDef::VPWidenIntOrFpInductionSC, {Start, Step}), 1037 VPValue(this, Trunc), IV(IV), IndDesc(IndDesc), 1038 NeedsVectorIV(NeedsVectorIV) {} 1039 1040 ~VPWidenIntOrFpInductionRecipe() override = default; 1041 1042 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) 1043 1044 /// Generate the vectorized and scalarized versions of the phi node as 1045 /// needed by their users. 1046 void execute(VPTransformState &State) override; 1047 1048 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1049 /// Print the recipe. 1050 void print(raw_ostream &O, const Twine &Indent, 1051 VPSlotTracker &SlotTracker) const override; 1052 #endif 1053 1054 /// Returns the start value of the induction. 1055 VPValue *getStartValue() { return getOperand(0); } 1056 const VPValue *getStartValue() const { return getOperand(0); } 1057 1058 /// Returns the step value of the induction. 1059 VPValue *getStepValue() { return getOperand(1); } 1060 const VPValue *getStepValue() const { return getOperand(1); } 1061 1062 /// Returns the first defined value as TruncInst, if it is one or nullptr 1063 /// otherwise. 1064 TruncInst *getTruncInst() { 1065 return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue()); 1066 } 1067 const TruncInst *getTruncInst() const { 1068 return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue()); 1069 } 1070 1071 PHINode *getPHINode() { return IV; } 1072 1073 /// Returns the induction descriptor for the recipe. 1074 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } 1075 1076 /// Returns true if the induction is canonical, i.e. starting at 0 and 1077 /// incremented by UF * VF (= the original IV is incremented by 1). 1078 bool isCanonical() const; 1079 1080 /// Returns the scalar type of the induction. 1081 const Type *getScalarType() const { 1082 const TruncInst *TruncI = getTruncInst(); 1083 return TruncI ? TruncI->getType() : IV->getType(); 1084 } 1085 1086 /// Returns true if a vector phi needs to be created for the induction. 1087 bool needsVectorIV() const { return NeedsVectorIV; } 1088 }; 1089 1090 /// A pure virtual base class for all recipes modeling header phis, including 1091 /// phis for first order recurrences, pointer inductions and reductions. The 1092 /// start value is the first operand of the recipe and the incoming value from 1093 /// the backedge is the second operand. 1094 /// 1095 /// Inductions are modeled using the following sub-classes: 1096 /// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop, 1097 /// starting at a specified value (zero for the main vector loop, the resume 1098 /// value for the epilogue vector loop) and stepping by 1. The induction 1099 /// controls exiting of the vector loop by comparing against the vector trip 1100 /// count. Produces a single scalar PHI for the induction value per 1101 /// iteration. 1102 /// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and 1103 /// floating point inductions with arbitrary start and step values. Produces 1104 /// a vector PHI per-part. 1105 /// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding 1106 /// value of an IV with different start and step values. Produces a single 1107 /// scalar value per iteration 1108 /// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a 1109 /// canonical or derived induction. 1110 /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a 1111 /// pointer induction. Produces either a vector PHI per-part or scalar values 1112 /// per-lane based on the canonical induction. 1113 class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue { 1114 protected: 1115 VPHeaderPHIRecipe(unsigned char VPDefID, PHINode *Phi, 1116 VPValue *Start = nullptr) 1117 : VPRecipeBase(VPDefID, {}), VPValue(this, Phi) { 1118 if (Start) 1119 addOperand(Start); 1120 } 1121 1122 public: 1123 ~VPHeaderPHIRecipe() override = default; 1124 1125 /// Method to support type inquiry through isa, cast, and dyn_cast. 1126 static inline bool classof(const VPRecipeBase *B) { 1127 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC && 1128 B->getVPDefID() <= VPDef::VPLastPHISC; 1129 } 1130 static inline bool classof(const VPValue *V) { 1131 auto *B = V->getDefiningRecipe(); 1132 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC && 1133 B->getVPDefID() <= VPRecipeBase::VPLastPHISC; 1134 } 1135 1136 /// Generate the phi nodes. 1137 void execute(VPTransformState &State) override = 0; 1138 1139 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1140 /// Print the recipe. 1141 void print(raw_ostream &O, const Twine &Indent, 1142 VPSlotTracker &SlotTracker) const override = 0; 1143 #endif 1144 1145 /// Returns the start value of the phi, if one is set. 1146 VPValue *getStartValue() { 1147 return getNumOperands() == 0 ? nullptr : getOperand(0); 1148 } 1149 VPValue *getStartValue() const { 1150 return getNumOperands() == 0 ? nullptr : getOperand(0); 1151 } 1152 1153 /// Update the start value of the recipe. 1154 void setStartValue(VPValue *V) { setOperand(0, V); } 1155 1156 /// Returns the incoming value from the loop backedge. 1157 VPValue *getBackedgeValue() { 1158 return getOperand(1); 1159 } 1160 1161 /// Returns the backedge value as a recipe. The backedge value is guaranteed 1162 /// to be a recipe. 1163 VPRecipeBase &getBackedgeRecipe() { 1164 return *getBackedgeValue()->getDefiningRecipe(); 1165 } 1166 }; 1167 1168 class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { 1169 const InductionDescriptor &IndDesc; 1170 1171 bool IsScalarAfterVectorization; 1172 1173 public: 1174 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p 1175 /// Start. 1176 VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, 1177 const InductionDescriptor &IndDesc, 1178 bool IsScalarAfterVectorization) 1179 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi), 1180 IndDesc(IndDesc), 1181 IsScalarAfterVectorization(IsScalarAfterVectorization) { 1182 addOperand(Start); 1183 addOperand(Step); 1184 } 1185 1186 ~VPWidenPointerInductionRecipe() override = default; 1187 1188 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC) 1189 1190 /// Generate vector values for the pointer induction. 1191 void execute(VPTransformState &State) override; 1192 1193 /// Returns true if only scalar values will be generated. 1194 bool onlyScalarsGenerated(ElementCount VF); 1195 1196 /// Returns the induction descriptor for the recipe. 1197 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } 1198 1199 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1200 /// Print the recipe. 1201 void print(raw_ostream &O, const Twine &Indent, 1202 VPSlotTracker &SlotTracker) const override; 1203 #endif 1204 }; 1205 1206 /// A recipe for handling header phis that are widened in the vector loop. 1207 /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are 1208 /// managed in the recipe directly. 1209 class VPWidenPHIRecipe : public VPHeaderPHIRecipe { 1210 /// List of incoming blocks. Only used in the VPlan native path. 1211 SmallVector<VPBasicBlock *, 2> IncomingBlocks; 1212 1213 public: 1214 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. 1215 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr) 1216 : VPHeaderPHIRecipe(VPDef::VPWidenPHISC, Phi) { 1217 if (Start) 1218 addOperand(Start); 1219 } 1220 1221 ~VPWidenPHIRecipe() override = default; 1222 1223 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC) 1224 1225 /// Generate the phi/select nodes. 1226 void execute(VPTransformState &State) override; 1227 1228 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1229 /// Print the recipe. 1230 void print(raw_ostream &O, const Twine &Indent, 1231 VPSlotTracker &SlotTracker) const override; 1232 #endif 1233 1234 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. 1235 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) { 1236 addOperand(IncomingV); 1237 IncomingBlocks.push_back(IncomingBlock); 1238 } 1239 1240 /// Returns the \p I th incoming VPBasicBlock. 1241 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; } 1242 1243 /// Returns the \p I th incoming VPValue. 1244 VPValue *getIncomingValue(unsigned I) { return getOperand(I); } 1245 }; 1246 1247 /// A recipe for handling first-order recurrence phis. The start value is the 1248 /// first operand of the recipe and the incoming value from the backedge is the 1249 /// second operand. 1250 struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { 1251 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start) 1252 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {} 1253 1254 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC) 1255 1256 static inline bool classof(const VPHeaderPHIRecipe *R) { 1257 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC; 1258 } 1259 1260 void execute(VPTransformState &State) override; 1261 1262 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1263 /// Print the recipe. 1264 void print(raw_ostream &O, const Twine &Indent, 1265 VPSlotTracker &SlotTracker) const override; 1266 #endif 1267 }; 1268 1269 /// A recipe for handling reduction phis. The start value is the first operand 1270 /// of the recipe and the incoming value from the backedge is the second 1271 /// operand. 1272 class VPReductionPHIRecipe : public VPHeaderPHIRecipe { 1273 /// Descriptor for the reduction. 1274 const RecurrenceDescriptor &RdxDesc; 1275 1276 /// The phi is part of an in-loop reduction. 1277 bool IsInLoop; 1278 1279 /// The phi is part of an ordered reduction. Requires IsInLoop to be true. 1280 bool IsOrdered; 1281 1282 public: 1283 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p 1284 /// RdxDesc. 1285 VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, 1286 VPValue &Start, bool IsInLoop = false, 1287 bool IsOrdered = false) 1288 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), 1289 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) { 1290 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop"); 1291 } 1292 1293 ~VPReductionPHIRecipe() override = default; 1294 1295 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC) 1296 1297 static inline bool classof(const VPHeaderPHIRecipe *R) { 1298 return R->getVPDefID() == VPDef::VPReductionPHISC; 1299 } 1300 1301 /// Generate the phi/select nodes. 1302 void execute(VPTransformState &State) override; 1303 1304 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1305 /// Print the recipe. 1306 void print(raw_ostream &O, const Twine &Indent, 1307 VPSlotTracker &SlotTracker) const override; 1308 #endif 1309 1310 const RecurrenceDescriptor &getRecurrenceDescriptor() const { 1311 return RdxDesc; 1312 } 1313 1314 /// Returns true, if the phi is part of an ordered reduction. 1315 bool isOrdered() const { return IsOrdered; } 1316 1317 /// Returns true, if the phi is part of an in-loop reduction. 1318 bool isInLoop() const { return IsInLoop; } 1319 }; 1320 1321 /// A recipe for vectorizing a phi-node as a sequence of mask-based select 1322 /// instructions. 1323 class VPBlendRecipe : public VPRecipeBase, public VPValue { 1324 PHINode *Phi; 1325 1326 public: 1327 /// The blend operation is a User of the incoming values and of their 1328 /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value 1329 /// might be incoming with a full mask for which there is no VPValue. 1330 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands) 1331 : VPRecipeBase(VPDef::VPBlendSC, Operands), VPValue(this, Phi), Phi(Phi) { 1332 assert(Operands.size() > 0 && 1333 ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && 1334 "Expected either a single incoming value or a positive even number " 1335 "of operands"); 1336 } 1337 1338 VP_CLASSOF_IMPL(VPDef::VPBlendSC) 1339 1340 /// Return the number of incoming values, taking into account that a single 1341 /// incoming value has no mask. 1342 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; } 1343 1344 /// Return incoming value number \p Idx. 1345 VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); } 1346 1347 /// Return mask number \p Idx. 1348 VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); } 1349 1350 /// Generate the phi/select nodes. 1351 void execute(VPTransformState &State) override; 1352 1353 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1354 /// Print the recipe. 1355 void print(raw_ostream &O, const Twine &Indent, 1356 VPSlotTracker &SlotTracker) const override; 1357 #endif 1358 1359 /// Returns true if the recipe only uses the first lane of operand \p Op. 1360 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1361 assert(is_contained(operands(), Op) && 1362 "Op must be an operand of the recipe"); 1363 // Recursing through Blend recipes only, must terminate at header phi's the 1364 // latest. 1365 return all_of(users(), 1366 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); }); 1367 } 1368 }; 1369 1370 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load 1371 /// or stores into one wide load/store and shuffles. The first operand of a 1372 /// VPInterleave recipe is the address, followed by the stored values, followed 1373 /// by an optional mask. 1374 class VPInterleaveRecipe : public VPRecipeBase { 1375 const InterleaveGroup<Instruction> *IG; 1376 1377 bool HasMask = false; 1378 1379 public: 1380 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, 1381 ArrayRef<VPValue *> StoredValues, VPValue *Mask) 1382 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG) { 1383 for (unsigned i = 0; i < IG->getFactor(); ++i) 1384 if (Instruction *I = IG->getMember(i)) { 1385 if (I->getType()->isVoidTy()) 1386 continue; 1387 new VPValue(I, this); 1388 } 1389 1390 for (auto *SV : StoredValues) 1391 addOperand(SV); 1392 if (Mask) { 1393 HasMask = true; 1394 addOperand(Mask); 1395 } 1396 } 1397 ~VPInterleaveRecipe() override = default; 1398 1399 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) 1400 1401 /// Return the address accessed by this recipe. 1402 VPValue *getAddr() const { 1403 return getOperand(0); // Address is the 1st, mandatory operand. 1404 } 1405 1406 /// Return the mask used by this recipe. Note that a full mask is represented 1407 /// by a nullptr. 1408 VPValue *getMask() const { 1409 // Mask is optional and therefore the last, currently 2nd operand. 1410 return HasMask ? getOperand(getNumOperands() - 1) : nullptr; 1411 } 1412 1413 /// Return the VPValues stored by this interleave group. If it is a load 1414 /// interleave group, return an empty ArrayRef. 1415 ArrayRef<VPValue *> getStoredValues() const { 1416 // The first operand is the address, followed by the stored values, followed 1417 // by an optional mask. 1418 return ArrayRef<VPValue *>(op_begin(), getNumOperands()) 1419 .slice(1, getNumStoreOperands()); 1420 } 1421 1422 /// Generate the wide load or store, and shuffles. 1423 void execute(VPTransformState &State) override; 1424 1425 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1426 /// Print the recipe. 1427 void print(raw_ostream &O, const Twine &Indent, 1428 VPSlotTracker &SlotTracker) const override; 1429 #endif 1430 1431 const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } 1432 1433 /// Returns the number of stored operands of this interleave group. Returns 0 1434 /// for load interleave groups. 1435 unsigned getNumStoreOperands() const { 1436 return getNumOperands() - (HasMask ? 2 : 1); 1437 } 1438 1439 /// The recipe only uses the first lane of the address. 1440 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1441 assert(is_contained(operands(), Op) && 1442 "Op must be an operand of the recipe"); 1443 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); 1444 } 1445 }; 1446 1447 /// A recipe to represent inloop reduction operations, performing a reduction on 1448 /// a vector operand into a scalar value, and adding the result to a chain. 1449 /// The Operands are {ChainOp, VecOp, [Condition]}. 1450 class VPReductionRecipe : public VPRecipeBase, public VPValue { 1451 /// The recurrence decriptor for the reduction in question. 1452 const RecurrenceDescriptor *RdxDesc; 1453 /// Pointer to the TTI, needed to create the target reduction 1454 const TargetTransformInfo *TTI; 1455 1456 public: 1457 VPReductionRecipe(const RecurrenceDescriptor *R, Instruction *I, 1458 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, 1459 const TargetTransformInfo *TTI) 1460 : VPRecipeBase(VPDef::VPReductionSC, {ChainOp, VecOp}), VPValue(this, I), 1461 RdxDesc(R), TTI(TTI) { 1462 if (CondOp) 1463 addOperand(CondOp); 1464 } 1465 1466 ~VPReductionRecipe() override = default; 1467 1468 VP_CLASSOF_IMPL(VPDef::VPReductionSC) 1469 1470 /// Generate the reduction in the loop 1471 void execute(VPTransformState &State) override; 1472 1473 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1474 /// Print the recipe. 1475 void print(raw_ostream &O, const Twine &Indent, 1476 VPSlotTracker &SlotTracker) const override; 1477 #endif 1478 1479 /// The VPValue of the scalar Chain being accumulated. 1480 VPValue *getChainOp() const { return getOperand(0); } 1481 /// The VPValue of the vector value to be reduced. 1482 VPValue *getVecOp() const { return getOperand(1); } 1483 /// The VPValue of the condition for the block. 1484 VPValue *getCondOp() const { 1485 return getNumOperands() > 2 ? getOperand(2) : nullptr; 1486 } 1487 }; 1488 1489 /// VPReplicateRecipe replicates a given instruction producing multiple scalar 1490 /// copies of the original scalar type, one per lane, instead of producing a 1491 /// single copy of widened type for all lanes. If the instruction is known to be 1492 /// uniform only one copy, per lane zero, will be generated. 1493 class VPReplicateRecipe : public VPRecipeBase, public VPValue { 1494 /// Indicator if only a single replica per lane is needed. 1495 bool IsUniform; 1496 1497 /// Indicator if the replicas are also predicated. 1498 bool IsPredicated; 1499 1500 /// Indicator if the scalar values should also be packed into a vector. 1501 bool AlsoPack; 1502 1503 public: 1504 template <typename IterT> 1505 VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands, 1506 bool IsUniform, bool IsPredicated = false) 1507 : VPRecipeBase(VPDef::VPReplicateSC, Operands), VPValue(this, I), 1508 IsUniform(IsUniform), IsPredicated(IsPredicated) { 1509 // Retain the previous behavior of predicateInstructions(), where an 1510 // insert-element of a predicated instruction got hoisted into the 1511 // predicated basic block iff it was its only user. This is achieved by 1512 // having predicated instructions also pack their values into a vector by 1513 // default unless they have a replicated user which uses their scalar value. 1514 AlsoPack = IsPredicated && !I->use_empty(); 1515 } 1516 1517 ~VPReplicateRecipe() override = default; 1518 1519 VP_CLASSOF_IMPL(VPDef::VPReplicateSC) 1520 1521 /// Generate replicas of the desired Ingredient. Replicas will be generated 1522 /// for all parts and lanes unless a specific part and lane are specified in 1523 /// the \p State. 1524 void execute(VPTransformState &State) override; 1525 1526 void setAlsoPack(bool Pack) { AlsoPack = Pack; } 1527 1528 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1529 /// Print the recipe. 1530 void print(raw_ostream &O, const Twine &Indent, 1531 VPSlotTracker &SlotTracker) const override; 1532 #endif 1533 1534 bool isUniform() const { return IsUniform; } 1535 1536 bool isPacked() const { return AlsoPack; } 1537 1538 bool isPredicated() const { return IsPredicated; } 1539 1540 /// Returns true if the recipe only uses the first lane of operand \p Op. 1541 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1542 assert(is_contained(operands(), Op) && 1543 "Op must be an operand of the recipe"); 1544 return isUniform(); 1545 } 1546 1547 /// Returns true if the recipe uses scalars of operand \p Op. 1548 bool usesScalars(const VPValue *Op) const override { 1549 assert(is_contained(operands(), Op) && 1550 "Op must be an operand of the recipe"); 1551 return true; 1552 } 1553 }; 1554 1555 /// A recipe for generating conditional branches on the bits of a mask. 1556 class VPBranchOnMaskRecipe : public VPRecipeBase { 1557 public: 1558 VPBranchOnMaskRecipe(VPValue *BlockInMask) 1559 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) { 1560 if (BlockInMask) // nullptr means all-one mask. 1561 addOperand(BlockInMask); 1562 } 1563 1564 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC) 1565 1566 /// Generate the extraction of the appropriate bit from the block mask and the 1567 /// conditional branch. 1568 void execute(VPTransformState &State) override; 1569 1570 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1571 /// Print the recipe. 1572 void print(raw_ostream &O, const Twine &Indent, 1573 VPSlotTracker &SlotTracker) const override { 1574 O << Indent << "BRANCH-ON-MASK "; 1575 if (VPValue *Mask = getMask()) 1576 Mask->printAsOperand(O, SlotTracker); 1577 else 1578 O << " All-One"; 1579 } 1580 #endif 1581 1582 /// Return the mask used by this recipe. Note that a full mask is represented 1583 /// by a nullptr. 1584 VPValue *getMask() const { 1585 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands"); 1586 // Mask is optional. 1587 return getNumOperands() == 1 ? getOperand(0) : nullptr; 1588 } 1589 1590 /// Returns true if the recipe uses scalars of operand \p Op. 1591 bool usesScalars(const VPValue *Op) const override { 1592 assert(is_contained(operands(), Op) && 1593 "Op must be an operand of the recipe"); 1594 return true; 1595 } 1596 }; 1597 1598 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when 1599 /// control converges back from a Branch-on-Mask. The phi nodes are needed in 1600 /// order to merge values that are set under such a branch and feed their uses. 1601 /// The phi nodes can be scalar or vector depending on the users of the value. 1602 /// This recipe works in concert with VPBranchOnMaskRecipe. 1603 class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue { 1604 public: 1605 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi 1606 /// nodes after merging back from a Branch-on-Mask. 1607 VPPredInstPHIRecipe(VPValue *PredV) 1608 : VPRecipeBase(VPDef::VPPredInstPHISC, PredV), VPValue(this) {} 1609 ~VPPredInstPHIRecipe() override = default; 1610 1611 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC) 1612 1613 /// Generates phi nodes for live-outs as needed to retain SSA form. 1614 void execute(VPTransformState &State) override; 1615 1616 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1617 /// Print the recipe. 1618 void print(raw_ostream &O, const Twine &Indent, 1619 VPSlotTracker &SlotTracker) const override; 1620 #endif 1621 1622 /// Returns true if the recipe uses scalars of operand \p Op. 1623 bool usesScalars(const VPValue *Op) const override { 1624 assert(is_contained(operands(), Op) && 1625 "Op must be an operand of the recipe"); 1626 return true; 1627 } 1628 }; 1629 1630 /// A Recipe for widening load/store operations. 1631 /// The recipe uses the following VPValues: 1632 /// - For load: Address, optional mask 1633 /// - For store: Address, stored value, optional mask 1634 /// TODO: We currently execute only per-part unless a specific instance is 1635 /// provided. 1636 class VPWidenMemoryInstructionRecipe : public VPRecipeBase { 1637 Instruction &Ingredient; 1638 1639 // Whether the loaded-from / stored-to addresses are consecutive. 1640 bool Consecutive; 1641 1642 // Whether the consecutive loaded/stored addresses are in reverse order. 1643 bool Reverse; 1644 1645 void setMask(VPValue *Mask) { 1646 if (!Mask) 1647 return; 1648 addOperand(Mask); 1649 } 1650 1651 bool isMasked() const { 1652 return isStore() ? getNumOperands() == 3 : getNumOperands() == 2; 1653 } 1654 1655 public: 1656 VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, 1657 bool Consecutive, bool Reverse) 1658 : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}), 1659 Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) { 1660 assert((Consecutive || !Reverse) && "Reverse implies consecutive"); 1661 new VPValue(this, &Load); 1662 setMask(Mask); 1663 } 1664 1665 VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, 1666 VPValue *StoredValue, VPValue *Mask, 1667 bool Consecutive, bool Reverse) 1668 : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}), 1669 Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { 1670 assert((Consecutive || !Reverse) && "Reverse implies consecutive"); 1671 setMask(Mask); 1672 } 1673 1674 VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC) 1675 1676 /// Return the address accessed by this recipe. 1677 VPValue *getAddr() const { 1678 return getOperand(0); // Address is the 1st, mandatory operand. 1679 } 1680 1681 /// Return the mask used by this recipe. Note that a full mask is represented 1682 /// by a nullptr. 1683 VPValue *getMask() const { 1684 // Mask is optional and therefore the last operand. 1685 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr; 1686 } 1687 1688 /// Returns true if this recipe is a store. 1689 bool isStore() const { return isa<StoreInst>(Ingredient); } 1690 1691 /// Return the address accessed by this recipe. 1692 VPValue *getStoredValue() const { 1693 assert(isStore() && "Stored value only available for store instructions"); 1694 return getOperand(1); // Stored value is the 2nd, mandatory operand. 1695 } 1696 1697 // Return whether the loaded-from / stored-to addresses are consecutive. 1698 bool isConsecutive() const { return Consecutive; } 1699 1700 // Return whether the consecutive loaded/stored addresses are in reverse 1701 // order. 1702 bool isReverse() const { return Reverse; } 1703 1704 /// Generate the wide load/store. 1705 void execute(VPTransformState &State) override; 1706 1707 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1708 /// Print the recipe. 1709 void print(raw_ostream &O, const Twine &Indent, 1710 VPSlotTracker &SlotTracker) const override; 1711 #endif 1712 1713 /// Returns true if the recipe only uses the first lane of operand \p Op. 1714 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1715 assert(is_contained(operands(), Op) && 1716 "Op must be an operand of the recipe"); 1717 1718 // Widened, consecutive memory operations only demand the first lane of 1719 // their address, unless the same operand is also stored. That latter can 1720 // happen with opaque pointers. 1721 return Op == getAddr() && isConsecutive() && 1722 (!isStore() || Op != getStoredValue()); 1723 } 1724 1725 Instruction &getIngredient() const { return Ingredient; } 1726 }; 1727 1728 /// Recipe to expand a SCEV expression. 1729 class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue { 1730 const SCEV *Expr; 1731 ScalarEvolution &SE; 1732 1733 public: 1734 VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE) 1735 : VPRecipeBase(VPDef::VPExpandSCEVSC, {}), VPValue(this), Expr(Expr), 1736 SE(SE) {} 1737 1738 ~VPExpandSCEVRecipe() override = default; 1739 1740 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC) 1741 1742 /// Generate a canonical vector induction variable of the vector loop, with 1743 void execute(VPTransformState &State) override; 1744 1745 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1746 /// Print the recipe. 1747 void print(raw_ostream &O, const Twine &Indent, 1748 VPSlotTracker &SlotTracker) const override; 1749 #endif 1750 1751 const SCEV *getSCEV() const { return Expr; } 1752 }; 1753 1754 /// Canonical scalar induction phi of the vector loop. Starting at the specified 1755 /// start value (either 0 or the resume value when vectorizing the epilogue 1756 /// loop). VPWidenCanonicalIVRecipe represents the vector version of the 1757 /// canonical induction variable. 1758 class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { 1759 DebugLoc DL; 1760 1761 public: 1762 VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL) 1763 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV), DL(DL) {} 1764 1765 ~VPCanonicalIVPHIRecipe() override = default; 1766 1767 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC) 1768 1769 static inline bool classof(const VPHeaderPHIRecipe *D) { 1770 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC; 1771 } 1772 1773 /// Generate the canonical scalar induction phi of the vector loop. 1774 void execute(VPTransformState &State) override; 1775 1776 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1777 /// Print the recipe. 1778 void print(raw_ostream &O, const Twine &Indent, 1779 VPSlotTracker &SlotTracker) const override; 1780 #endif 1781 1782 /// Returns the scalar type of the induction. 1783 const Type *getScalarType() const { 1784 return getOperand(0)->getLiveInIRValue()->getType(); 1785 } 1786 1787 /// Returns true if the recipe only uses the first lane of operand \p Op. 1788 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1789 assert(is_contained(operands(), Op) && 1790 "Op must be an operand of the recipe"); 1791 return true; 1792 } 1793 1794 /// Check if the induction described by \p ID is canonical, i.e. has the same 1795 /// start, step (of 1), and type as the canonical IV. 1796 bool isCanonical(const InductionDescriptor &ID, Type *Ty) const; 1797 }; 1798 1799 /// A recipe for generating the active lane mask for the vector loop that is 1800 /// used to predicate the vector operations. 1801 /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and 1802 /// remove VPActiveLaneMaskPHIRecipe. 1803 class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe { 1804 DebugLoc DL; 1805 1806 public: 1807 VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL) 1808 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask), 1809 DL(DL) {} 1810 1811 ~VPActiveLaneMaskPHIRecipe() override = default; 1812 1813 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC) 1814 1815 static inline bool classof(const VPHeaderPHIRecipe *D) { 1816 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC; 1817 } 1818 1819 /// Generate the active lane mask phi of the vector loop. 1820 void execute(VPTransformState &State) override; 1821 1822 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1823 /// Print the recipe. 1824 void print(raw_ostream &O, const Twine &Indent, 1825 VPSlotTracker &SlotTracker) const override; 1826 #endif 1827 }; 1828 1829 /// A Recipe for widening the canonical induction variable of the vector loop. 1830 class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { 1831 public: 1832 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV) 1833 : VPRecipeBase(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}), 1834 VPValue(this) {} 1835 1836 ~VPWidenCanonicalIVRecipe() override = default; 1837 1838 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC) 1839 1840 /// Generate a canonical vector induction variable of the vector loop, with 1841 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and 1842 /// step = <VF*UF, VF*UF, ..., VF*UF>. 1843 void execute(VPTransformState &State) override; 1844 1845 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1846 /// Print the recipe. 1847 void print(raw_ostream &O, const Twine &Indent, 1848 VPSlotTracker &SlotTracker) const override; 1849 #endif 1850 1851 /// Returns the scalar type of the induction. 1852 const Type *getScalarType() const { 1853 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)->getDefiningRecipe()) 1854 ->getScalarType(); 1855 } 1856 }; 1857 1858 /// A recipe for converting the canonical IV value to the corresponding value of 1859 /// an IV with different start and step values, using Start + CanonicalIV * 1860 /// Step. 1861 class VPDerivedIVRecipe : public VPRecipeBase, public VPValue { 1862 /// The type of the result value. It may be smaller than the type of the 1863 /// induction and in this case it will get truncated to ResultTy. 1864 Type *ResultTy; 1865 1866 /// Induction descriptor for the induction the canonical IV is transformed to. 1867 const InductionDescriptor &IndDesc; 1868 1869 public: 1870 VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, 1871 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, 1872 Type *ResultTy) 1873 : VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), 1874 VPValue(this), ResultTy(ResultTy), IndDesc(IndDesc) {} 1875 1876 ~VPDerivedIVRecipe() override = default; 1877 1878 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC) 1879 1880 /// Generate the transformed value of the induction at offset StartValue (1. 1881 /// operand) + IV (2. operand) * StepValue (3, operand). 1882 void execute(VPTransformState &State) override; 1883 1884 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1885 /// Print the recipe. 1886 void print(raw_ostream &O, const Twine &Indent, 1887 VPSlotTracker &SlotTracker) const override; 1888 #endif 1889 1890 VPValue *getStartValue() const { return getOperand(0); } 1891 VPValue *getCanonicalIV() const { return getOperand(1); } 1892 VPValue *getStepValue() const { return getOperand(2); } 1893 1894 /// Returns true if the recipe only uses the first lane of operand \p Op. 1895 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1896 assert(is_contained(operands(), Op) && 1897 "Op must be an operand of the recipe"); 1898 return true; 1899 } 1900 }; 1901 1902 /// A recipe for handling phi nodes of integer and floating-point inductions, 1903 /// producing their scalar values. 1904 class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue { 1905 const InductionDescriptor &IndDesc; 1906 1907 public: 1908 VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, 1909 VPValue *Step) 1910 : VPRecipeBase(VPDef::VPScalarIVStepsSC, {IV, Step}), VPValue(this), 1911 IndDesc(IndDesc) {} 1912 1913 ~VPScalarIVStepsRecipe() override = default; 1914 1915 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC) 1916 1917 /// Generate the scalarized versions of the phi node as needed by their users. 1918 void execute(VPTransformState &State) override; 1919 1920 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1921 /// Print the recipe. 1922 void print(raw_ostream &O, const Twine &Indent, 1923 VPSlotTracker &SlotTracker) const override; 1924 #endif 1925 1926 VPValue *getStepValue() const { return getOperand(1); } 1927 1928 /// Returns true if the recipe only uses the first lane of operand \p Op. 1929 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1930 assert(is_contained(operands(), Op) && 1931 "Op must be an operand of the recipe"); 1932 return true; 1933 } 1934 }; 1935 1936 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It 1937 /// holds a sequence of zero or more VPRecipe's each representing a sequence of 1938 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes. 1939 class VPBasicBlock : public VPBlockBase { 1940 public: 1941 using RecipeListTy = iplist<VPRecipeBase>; 1942 1943 private: 1944 /// The VPRecipes held in the order of output instructions to generate. 1945 RecipeListTy Recipes; 1946 1947 public: 1948 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) 1949 : VPBlockBase(VPBasicBlockSC, Name.str()) { 1950 if (Recipe) 1951 appendRecipe(Recipe); 1952 } 1953 1954 ~VPBasicBlock() override { 1955 while (!Recipes.empty()) 1956 Recipes.pop_back(); 1957 } 1958 1959 /// Instruction iterators... 1960 using iterator = RecipeListTy::iterator; 1961 using const_iterator = RecipeListTy::const_iterator; 1962 using reverse_iterator = RecipeListTy::reverse_iterator; 1963 using const_reverse_iterator = RecipeListTy::const_reverse_iterator; 1964 1965 //===--------------------------------------------------------------------===// 1966 /// Recipe iterator methods 1967 /// 1968 inline iterator begin() { return Recipes.begin(); } 1969 inline const_iterator begin() const { return Recipes.begin(); } 1970 inline iterator end() { return Recipes.end(); } 1971 inline const_iterator end() const { return Recipes.end(); } 1972 1973 inline reverse_iterator rbegin() { return Recipes.rbegin(); } 1974 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); } 1975 inline reverse_iterator rend() { return Recipes.rend(); } 1976 inline const_reverse_iterator rend() const { return Recipes.rend(); } 1977 1978 inline size_t size() const { return Recipes.size(); } 1979 inline bool empty() const { return Recipes.empty(); } 1980 inline const VPRecipeBase &front() const { return Recipes.front(); } 1981 inline VPRecipeBase &front() { return Recipes.front(); } 1982 inline const VPRecipeBase &back() const { return Recipes.back(); } 1983 inline VPRecipeBase &back() { return Recipes.back(); } 1984 1985 /// Returns a reference to the list of recipes. 1986 RecipeListTy &getRecipeList() { return Recipes; } 1987 1988 /// Returns a pointer to a member of the recipe list. 1989 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) { 1990 return &VPBasicBlock::Recipes; 1991 } 1992 1993 /// Method to support type inquiry through isa, cast, and dyn_cast. 1994 static inline bool classof(const VPBlockBase *V) { 1995 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC; 1996 } 1997 1998 void insert(VPRecipeBase *Recipe, iterator InsertPt) { 1999 assert(Recipe && "No recipe to append."); 2000 assert(!Recipe->Parent && "Recipe already in VPlan"); 2001 Recipe->Parent = this; 2002 Recipes.insert(InsertPt, Recipe); 2003 } 2004 2005 /// Augment the existing recipes of a VPBasicBlock with an additional 2006 /// \p Recipe as the last recipe. 2007 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); } 2008 2009 /// The method which generates the output IR instructions that correspond to 2010 /// this VPBasicBlock, thereby "executing" the VPlan. 2011 void execute(VPTransformState *State) override; 2012 2013 /// Return the position of the first non-phi node recipe in the block. 2014 iterator getFirstNonPhi(); 2015 2016 /// Returns an iterator range over the PHI-like recipes in the block. 2017 iterator_range<iterator> phis() { 2018 return make_range(begin(), getFirstNonPhi()); 2019 } 2020 2021 void dropAllReferences(VPValue *NewValue) override; 2022 2023 /// Split current block at \p SplitAt by inserting a new block between the 2024 /// current block and its successors and moving all recipes starting at 2025 /// SplitAt to the new block. Returns the new block. 2026 VPBasicBlock *splitAt(iterator SplitAt); 2027 2028 VPRegionBlock *getEnclosingLoopRegion(); 2029 2030 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2031 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p 2032 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers. 2033 /// 2034 /// Note that the numbering is applied to the whole VPlan, so printing 2035 /// individual blocks is consistent with the whole VPlan printing. 2036 void print(raw_ostream &O, const Twine &Indent, 2037 VPSlotTracker &SlotTracker) const override; 2038 using VPBlockBase::print; // Get the print(raw_stream &O) version. 2039 #endif 2040 2041 /// If the block has multiple successors, return the branch recipe terminating 2042 /// the block. If there are no or only a single successor, return nullptr; 2043 VPRecipeBase *getTerminator(); 2044 const VPRecipeBase *getTerminator() const; 2045 2046 /// Returns true if the block is exiting it's parent region. 2047 bool isExiting() const; 2048 2049 private: 2050 /// Create an IR BasicBlock to hold the output instructions generated by this 2051 /// VPBasicBlock, and return it. Update the CFGState accordingly. 2052 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG); 2053 }; 2054 2055 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks 2056 /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. 2057 /// A VPRegionBlock may indicate that its contents are to be replicated several 2058 /// times. This is designed to support predicated scalarization, in which a 2059 /// scalar if-then code structure needs to be generated VF * UF times. Having 2060 /// this replication indicator helps to keep a single model for multiple 2061 /// candidate VF's. The actual replication takes place only once the desired VF 2062 /// and UF have been determined. 2063 class VPRegionBlock : public VPBlockBase { 2064 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. 2065 VPBlockBase *Entry; 2066 2067 /// Hold the Single Exiting block of the SESE region modelled by the 2068 /// VPRegionBlock. 2069 VPBlockBase *Exiting; 2070 2071 /// An indicator whether this region is to generate multiple replicated 2072 /// instances of output IR corresponding to its VPBlockBases. 2073 bool IsReplicator; 2074 2075 public: 2076 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, 2077 const std::string &Name = "", bool IsReplicator = false) 2078 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting), 2079 IsReplicator(IsReplicator) { 2080 assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); 2081 assert(Exiting->getSuccessors().empty() && "Exit block has successors."); 2082 Entry->setParent(this); 2083 Exiting->setParent(this); 2084 } 2085 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) 2086 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr), 2087 IsReplicator(IsReplicator) {} 2088 2089 ~VPRegionBlock() override { 2090 if (Entry) { 2091 VPValue DummyValue; 2092 Entry->dropAllReferences(&DummyValue); 2093 deleteCFG(Entry); 2094 } 2095 } 2096 2097 /// Method to support type inquiry through isa, cast, and dyn_cast. 2098 static inline bool classof(const VPBlockBase *V) { 2099 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC; 2100 } 2101 2102 const VPBlockBase *getEntry() const { return Entry; } 2103 VPBlockBase *getEntry() { return Entry; } 2104 2105 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p 2106 /// EntryBlock must have no predecessors. 2107 void setEntry(VPBlockBase *EntryBlock) { 2108 assert(EntryBlock->getPredecessors().empty() && 2109 "Entry block cannot have predecessors."); 2110 Entry = EntryBlock; 2111 EntryBlock->setParent(this); 2112 } 2113 2114 const VPBlockBase *getExiting() const { return Exiting; } 2115 VPBlockBase *getExiting() { return Exiting; } 2116 2117 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p 2118 /// ExitingBlock must have no successors. 2119 void setExiting(VPBlockBase *ExitingBlock) { 2120 assert(ExitingBlock->getSuccessors().empty() && 2121 "Exit block cannot have successors."); 2122 Exiting = ExitingBlock; 2123 ExitingBlock->setParent(this); 2124 } 2125 2126 /// Returns the pre-header VPBasicBlock of the loop region. 2127 VPBasicBlock *getPreheaderVPBB() { 2128 assert(!isReplicator() && "should only get pre-header of loop regions"); 2129 return getSinglePredecessor()->getExitingBasicBlock(); 2130 } 2131 2132 /// An indicator whether this region is to generate multiple replicated 2133 /// instances of output IR corresponding to its VPBlockBases. 2134 bool isReplicator() const { return IsReplicator; } 2135 2136 /// The method which generates the output IR instructions that correspond to 2137 /// this VPRegionBlock, thereby "executing" the VPlan. 2138 void execute(VPTransformState *State) override; 2139 2140 void dropAllReferences(VPValue *NewValue) override; 2141 2142 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2143 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with 2144 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using 2145 /// consequtive numbers. 2146 /// 2147 /// Note that the numbering is applied to the whole VPlan, so printing 2148 /// individual regions is consistent with the whole VPlan printing. 2149 void print(raw_ostream &O, const Twine &Indent, 2150 VPSlotTracker &SlotTracker) const override; 2151 using VPBlockBase::print; // Get the print(raw_stream &O) version. 2152 #endif 2153 }; 2154 2155 /// VPlan models a candidate for vectorization, encoding various decisions take 2156 /// to produce efficient output IR, including which branches, basic-blocks and 2157 /// output IR instructions to generate, and their cost. VPlan holds a 2158 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry 2159 /// VPBlock. 2160 class VPlan { 2161 friend class VPlanPrinter; 2162 friend class VPSlotTracker; 2163 2164 /// Hold the single entry to the Hierarchical CFG of the VPlan. 2165 VPBlockBase *Entry; 2166 2167 /// Holds the VFs applicable to this VPlan. 2168 SmallSetVector<ElementCount, 2> VFs; 2169 2170 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for 2171 /// any UF. 2172 SmallSetVector<unsigned, 2> UFs; 2173 2174 /// Holds the name of the VPlan, for printing. 2175 std::string Name; 2176 2177 /// Holds all the external definitions created for this VPlan. External 2178 /// definitions must be immutable and hold a pointer to their underlying IR. 2179 DenseMap<Value *, VPValue *> VPExternalDefs; 2180 2181 /// Represents the trip count of the original loop, for folding 2182 /// the tail. 2183 VPValue *TripCount = nullptr; 2184 2185 /// Represents the backedge taken count of the original loop, for folding 2186 /// the tail. It equals TripCount - 1. 2187 VPValue *BackedgeTakenCount = nullptr; 2188 2189 /// Represents the vector trip count. 2190 VPValue VectorTripCount; 2191 2192 /// Holds a mapping between Values and their corresponding VPValue inside 2193 /// VPlan. 2194 Value2VPValueTy Value2VPValue; 2195 2196 /// Contains all VPValues that been allocated by addVPValue directly and need 2197 /// to be free when the plan's destructor is called. 2198 SmallVector<VPValue *, 16> VPValuesToFree; 2199 2200 /// Indicates whether it is safe use the Value2VPValue mapping or if the 2201 /// mapping cannot be used any longer, because it is stale. 2202 bool Value2VPValueEnabled = true; 2203 2204 /// Values used outside the plan. 2205 MapVector<PHINode *, VPLiveOut *> LiveOuts; 2206 2207 public: 2208 VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) { 2209 if (Entry) 2210 Entry->setPlan(this); 2211 } 2212 2213 ~VPlan(); 2214 2215 /// Prepare the plan for execution, setting up the required live-in values. 2216 void prepareToExecute(Value *TripCount, Value *VectorTripCount, 2217 Value *CanonicalIVStartValue, VPTransformState &State, 2218 bool IsEpilogueVectorization); 2219 2220 /// Generate the IR code for this VPlan. 2221 void execute(VPTransformState *State); 2222 2223 VPBlockBase *getEntry() { return Entry; } 2224 const VPBlockBase *getEntry() const { return Entry; } 2225 2226 VPBlockBase *setEntry(VPBlockBase *Block) { 2227 Entry = Block; 2228 Block->setPlan(this); 2229 return Entry; 2230 } 2231 2232 /// The trip count of the original loop. 2233 VPValue *getOrCreateTripCount() { 2234 if (!TripCount) 2235 TripCount = new VPValue(); 2236 return TripCount; 2237 } 2238 2239 /// The backedge taken count of the original loop. 2240 VPValue *getOrCreateBackedgeTakenCount() { 2241 if (!BackedgeTakenCount) 2242 BackedgeTakenCount = new VPValue(); 2243 return BackedgeTakenCount; 2244 } 2245 2246 /// The vector trip count. 2247 VPValue &getVectorTripCount() { return VectorTripCount; } 2248 2249 /// Mark the plan to indicate that using Value2VPValue is not safe any 2250 /// longer, because it may be stale. 2251 void disableValue2VPValue() { Value2VPValueEnabled = false; } 2252 2253 void addVF(ElementCount VF) { VFs.insert(VF); } 2254 2255 void setVF(ElementCount VF) { 2256 assert(hasVF(VF) && "Cannot set VF not already in plan"); 2257 VFs.clear(); 2258 VFs.insert(VF); 2259 } 2260 2261 bool hasVF(ElementCount VF) { return VFs.count(VF); } 2262 2263 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } 2264 2265 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); } 2266 2267 void setUF(unsigned UF) { 2268 assert(hasUF(UF) && "Cannot set the UF not already in plan"); 2269 UFs.clear(); 2270 UFs.insert(UF); 2271 } 2272 2273 /// Return a string with the name of the plan and the applicable VFs and UFs. 2274 std::string getName() const; 2275 2276 void setName(const Twine &newName) { Name = newName.str(); } 2277 2278 /// Get the existing or add a new external definition for \p V. 2279 VPValue *getOrAddExternalDef(Value *V) { 2280 auto I = VPExternalDefs.insert({V, nullptr}); 2281 if (I.second) 2282 I.first->second = new VPValue(V); 2283 return I.first->second; 2284 } 2285 2286 void addVPValue(Value *V) { 2287 assert(Value2VPValueEnabled && 2288 "IR value to VPValue mapping may be out of date!"); 2289 assert(V && "Trying to add a null Value to VPlan"); 2290 assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); 2291 VPValue *VPV = new VPValue(V); 2292 Value2VPValue[V] = VPV; 2293 VPValuesToFree.push_back(VPV); 2294 } 2295 2296 void addVPValue(Value *V, VPValue *VPV) { 2297 assert(Value2VPValueEnabled && "Value2VPValue mapping may be out of date!"); 2298 assert(V && "Trying to add a null Value to VPlan"); 2299 assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); 2300 Value2VPValue[V] = VPV; 2301 } 2302 2303 /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable 2304 /// checking whether it is safe to query VPValues using IR Values. 2305 VPValue *getVPValue(Value *V, bool OverrideAllowed = false) { 2306 assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) && 2307 "Value2VPValue mapping may be out of date!"); 2308 assert(V && "Trying to get the VPValue of a null Value"); 2309 assert(Value2VPValue.count(V) && "Value does not exist in VPlan"); 2310 return Value2VPValue[V]; 2311 } 2312 2313 /// Gets the VPValue or adds a new one (if none exists yet) for \p V. \p 2314 /// OverrideAllowed can be used to disable checking whether it is safe to 2315 /// query VPValues using IR Values. 2316 VPValue *getOrAddVPValue(Value *V, bool OverrideAllowed = false) { 2317 assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) && 2318 "Value2VPValue mapping may be out of date!"); 2319 assert(V && "Trying to get or add the VPValue of a null Value"); 2320 if (!Value2VPValue.count(V)) 2321 addVPValue(V); 2322 return getVPValue(V); 2323 } 2324 2325 void removeVPValueFor(Value *V) { 2326 assert(Value2VPValueEnabled && 2327 "IR value to VPValue mapping may be out of date!"); 2328 Value2VPValue.erase(V); 2329 } 2330 2331 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2332 /// Print this VPlan to \p O. 2333 void print(raw_ostream &O) const; 2334 2335 /// Print this VPlan in DOT format to \p O. 2336 void printDOT(raw_ostream &O) const; 2337 2338 /// Dump the plan to stderr (for debugging). 2339 LLVM_DUMP_METHOD void dump() const; 2340 #endif 2341 2342 /// Returns a range mapping the values the range \p Operands to their 2343 /// corresponding VPValues. 2344 iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>> 2345 mapToVPValues(User::op_range Operands) { 2346 std::function<VPValue *(Value *)> Fn = [this](Value *Op) { 2347 return getOrAddVPValue(Op); 2348 }; 2349 return map_range(Operands, Fn); 2350 } 2351 2352 /// Returns the VPRegionBlock of the vector loop. 2353 VPRegionBlock *getVectorLoopRegion() { 2354 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor()); 2355 } 2356 const VPRegionBlock *getVectorLoopRegion() const { 2357 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor()); 2358 } 2359 2360 /// Returns the canonical induction recipe of the vector loop. 2361 VPCanonicalIVPHIRecipe *getCanonicalIV() { 2362 VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock(); 2363 if (EntryVPBB->empty()) { 2364 // VPlan native path. 2365 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor()); 2366 } 2367 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin()); 2368 } 2369 2370 /// Find and return the VPActiveLaneMaskPHIRecipe from the header - there 2371 /// be only one at most. If there isn't one, then return nullptr. 2372 VPActiveLaneMaskPHIRecipe *getActiveLaneMaskPhi(); 2373 2374 void addLiveOut(PHINode *PN, VPValue *V); 2375 2376 void clearLiveOuts() { 2377 for (auto &KV : LiveOuts) 2378 delete KV.second; 2379 LiveOuts.clear(); 2380 } 2381 2382 void removeLiveOut(PHINode *PN) { 2383 delete LiveOuts[PN]; 2384 LiveOuts.erase(PN); 2385 } 2386 2387 const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const { 2388 return LiveOuts; 2389 } 2390 2391 private: 2392 /// Add to the given dominator tree the header block and every new basic block 2393 /// that was created between it and the latch block, inclusive. 2394 static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB, 2395 BasicBlock *LoopPreHeaderBB, 2396 BasicBlock *LoopExitBB); 2397 }; 2398 2399 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2400 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is 2401 /// indented and follows the dot format. 2402 class VPlanPrinter { 2403 raw_ostream &OS; 2404 const VPlan &Plan; 2405 unsigned Depth = 0; 2406 unsigned TabWidth = 2; 2407 std::string Indent; 2408 unsigned BID = 0; 2409 SmallDenseMap<const VPBlockBase *, unsigned> BlockID; 2410 2411 VPSlotTracker SlotTracker; 2412 2413 /// Handle indentation. 2414 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } 2415 2416 /// Print a given \p Block of the Plan. 2417 void dumpBlock(const VPBlockBase *Block); 2418 2419 /// Print the information related to the CFG edges going out of a given 2420 /// \p Block, followed by printing the successor blocks themselves. 2421 void dumpEdges(const VPBlockBase *Block); 2422 2423 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing 2424 /// its successor blocks. 2425 void dumpBasicBlock(const VPBasicBlock *BasicBlock); 2426 2427 /// Print a given \p Region of the Plan. 2428 void dumpRegion(const VPRegionBlock *Region); 2429 2430 unsigned getOrCreateBID(const VPBlockBase *Block) { 2431 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++; 2432 } 2433 2434 Twine getOrCreateName(const VPBlockBase *Block); 2435 2436 Twine getUID(const VPBlockBase *Block); 2437 2438 /// Print the information related to a CFG edge between two VPBlockBases. 2439 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden, 2440 const Twine &Label); 2441 2442 public: 2443 VPlanPrinter(raw_ostream &O, const VPlan &P) 2444 : OS(O), Plan(P), SlotTracker(&P) {} 2445 2446 LLVM_DUMP_METHOD void dump(); 2447 }; 2448 2449 struct VPlanIngredient { 2450 const Value *V; 2451 2452 VPlanIngredient(const Value *V) : V(V) {} 2453 2454 void print(raw_ostream &O) const; 2455 }; 2456 2457 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) { 2458 I.print(OS); 2459 return OS; 2460 } 2461 2462 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) { 2463 Plan.print(OS); 2464 return OS; 2465 } 2466 #endif 2467 2468 //===----------------------------------------------------------------------===// 2469 // VPlan Utilities 2470 //===----------------------------------------------------------------------===// 2471 2472 /// Class that provides utilities for VPBlockBases in VPlan. 2473 class VPBlockUtils { 2474 public: 2475 VPBlockUtils() = delete; 2476 2477 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p 2478 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p 2479 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's 2480 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must 2481 /// have neither successors nor predecessors. 2482 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { 2483 assert(NewBlock->getSuccessors().empty() && 2484 NewBlock->getPredecessors().empty() && 2485 "Can't insert new block with predecessors or successors."); 2486 NewBlock->setParent(BlockPtr->getParent()); 2487 SmallVector<VPBlockBase *> Succs(BlockPtr->successors()); 2488 for (VPBlockBase *Succ : Succs) { 2489 disconnectBlocks(BlockPtr, Succ); 2490 connectBlocks(NewBlock, Succ); 2491 } 2492 connectBlocks(BlockPtr, NewBlock); 2493 } 2494 2495 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p 2496 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p 2497 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr 2498 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors 2499 /// and \p IfTrue and \p IfFalse must have neither successors nor 2500 /// predecessors. 2501 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, 2502 VPBlockBase *BlockPtr) { 2503 assert(IfTrue->getSuccessors().empty() && 2504 "Can't insert IfTrue with successors."); 2505 assert(IfFalse->getSuccessors().empty() && 2506 "Can't insert IfFalse with successors."); 2507 BlockPtr->setTwoSuccessors(IfTrue, IfFalse); 2508 IfTrue->setPredecessors({BlockPtr}); 2509 IfFalse->setPredecessors({BlockPtr}); 2510 IfTrue->setParent(BlockPtr->getParent()); 2511 IfFalse->setParent(BlockPtr->getParent()); 2512 } 2513 2514 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to 2515 /// the successors of \p From and \p From to the predecessors of \p To. Both 2516 /// VPBlockBases must have the same parent, which can be null. Both 2517 /// VPBlockBases can be already connected to other VPBlockBases. 2518 static void connectBlocks(VPBlockBase *From, VPBlockBase *To) { 2519 assert((From->getParent() == To->getParent()) && 2520 "Can't connect two block with different parents"); 2521 assert(From->getNumSuccessors() < 2 && 2522 "Blocks can't have more than two successors."); 2523 From->appendSuccessor(To); 2524 To->appendPredecessor(From); 2525 } 2526 2527 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To 2528 /// from the successors of \p From and \p From from the predecessors of \p To. 2529 static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) { 2530 assert(To && "Successor to disconnect is null."); 2531 From->removeSuccessor(To); 2532 To->removePredecessor(From); 2533 } 2534 2535 /// Return an iterator range over \p Range which only includes \p BlockTy 2536 /// blocks. The accesses are casted to \p BlockTy. 2537 template <typename BlockTy, typename T> 2538 static auto blocksOnly(const T &Range) { 2539 // Create BaseTy with correct const-ness based on BlockTy. 2540 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value, 2541 const VPBlockBase, VPBlockBase>; 2542 2543 // We need to first create an iterator range over (const) BlocktTy & instead 2544 // of (const) BlockTy * for filter_range to work properly. 2545 auto Mapped = 2546 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; }); 2547 auto Filter = make_filter_range( 2548 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); }); 2549 return map_range(Filter, [](BaseTy &Block) -> BlockTy * { 2550 return cast<BlockTy>(&Block); 2551 }); 2552 } 2553 }; 2554 2555 class VPInterleavedAccessInfo { 2556 DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *> 2557 InterleaveGroupMap; 2558 2559 /// Type for mapping of instruction based interleave groups to VPInstruction 2560 /// interleave groups 2561 using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *, 2562 InterleaveGroup<VPInstruction> *>; 2563 2564 /// Recursively \p Region and populate VPlan based interleave groups based on 2565 /// \p IAI. 2566 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New, 2567 InterleavedAccessInfo &IAI); 2568 /// Recursively traverse \p Block and populate VPlan based interleave groups 2569 /// based on \p IAI. 2570 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, 2571 InterleavedAccessInfo &IAI); 2572 2573 public: 2574 VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI); 2575 2576 ~VPInterleavedAccessInfo() { 2577 SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet; 2578 // Avoid releasing a pointer twice. 2579 for (auto &I : InterleaveGroupMap) 2580 DelSet.insert(I.second); 2581 for (auto *Ptr : DelSet) 2582 delete Ptr; 2583 } 2584 2585 /// Get the interleave group that \p Instr belongs to. 2586 /// 2587 /// \returns nullptr if doesn't have such group. 2588 InterleaveGroup<VPInstruction> * 2589 getInterleaveGroup(VPInstruction *Instr) const { 2590 return InterleaveGroupMap.lookup(Instr); 2591 } 2592 }; 2593 2594 /// Class that maps (parts of) an existing VPlan to trees of combined 2595 /// VPInstructions. 2596 class VPlanSlp { 2597 enum class OpMode { Failed, Load, Opcode }; 2598 2599 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as 2600 /// DenseMap keys. 2601 struct BundleDenseMapInfo { 2602 static SmallVector<VPValue *, 4> getEmptyKey() { 2603 return {reinterpret_cast<VPValue *>(-1)}; 2604 } 2605 2606 static SmallVector<VPValue *, 4> getTombstoneKey() { 2607 return {reinterpret_cast<VPValue *>(-2)}; 2608 } 2609 2610 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) { 2611 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end())); 2612 } 2613 2614 static bool isEqual(const SmallVector<VPValue *, 4> &LHS, 2615 const SmallVector<VPValue *, 4> &RHS) { 2616 return LHS == RHS; 2617 } 2618 }; 2619 2620 /// Mapping of values in the original VPlan to a combined VPInstruction. 2621 DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo> 2622 BundleToCombined; 2623 2624 VPInterleavedAccessInfo &IAI; 2625 2626 /// Basic block to operate on. For now, only instructions in a single BB are 2627 /// considered. 2628 const VPBasicBlock &BB; 2629 2630 /// Indicates whether we managed to combine all visited instructions or not. 2631 bool CompletelySLP = true; 2632 2633 /// Width of the widest combined bundle in bits. 2634 unsigned WidestBundleBits = 0; 2635 2636 using MultiNodeOpTy = 2637 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>; 2638 2639 // Input operand bundles for the current multi node. Each multi node operand 2640 // bundle contains values not matching the multi node's opcode. They will 2641 // be reordered in reorderMultiNodeOps, once we completed building a 2642 // multi node. 2643 SmallVector<MultiNodeOpTy, 4> MultiNodeOps; 2644 2645 /// Indicates whether we are building a multi node currently. 2646 bool MultiNodeActive = false; 2647 2648 /// Check if we can vectorize Operands together. 2649 bool areVectorizable(ArrayRef<VPValue *> Operands) const; 2650 2651 /// Add combined instruction \p New for the bundle \p Operands. 2652 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New); 2653 2654 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now. 2655 VPInstruction *markFailed(); 2656 2657 /// Reorder operands in the multi node to maximize sequential memory access 2658 /// and commutative operations. 2659 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps(); 2660 2661 /// Choose the best candidate to use for the lane after \p Last. The set of 2662 /// candidates to choose from are values with an opcode matching \p Last's 2663 /// or loads consecutive to \p Last. 2664 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last, 2665 SmallPtrSetImpl<VPValue *> &Candidates, 2666 VPInterleavedAccessInfo &IAI); 2667 2668 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2669 /// Print bundle \p Values to dbgs(). 2670 void dumpBundle(ArrayRef<VPValue *> Values); 2671 #endif 2672 2673 public: 2674 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {} 2675 2676 ~VPlanSlp() = default; 2677 2678 /// Tries to build an SLP tree rooted at \p Operands and returns a 2679 /// VPInstruction combining \p Operands, if they can be combined. 2680 VPInstruction *buildGraph(ArrayRef<VPValue *> Operands); 2681 2682 /// Return the width of the widest combined bundle in bits. 2683 unsigned getWidestBundleBits() const { return WidestBundleBits; } 2684 2685 /// Return true if all visited instruction can be combined. 2686 bool isCompletelySLP() const { return CompletelySLP; } 2687 }; 2688 2689 namespace vputils { 2690 2691 /// Returns true if only the first lane of \p Def is used. 2692 bool onlyFirstLaneUsed(VPValue *Def); 2693 2694 /// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p 2695 /// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in 2696 /// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's 2697 /// pre-header already contains a recipe expanding \p Expr, return it. If not, 2698 /// create a new one. 2699 VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, 2700 ScalarEvolution &SE); 2701 2702 /// Returns true if \p VPV is uniform after vectorization. 2703 inline bool isUniformAfterVectorization(VPValue *VPV) { 2704 // A value defined outside the vector region must be uniform after 2705 // vectorization inside a vector region. 2706 if (VPV->isDefinedOutsideVectorRegions()) 2707 return true; 2708 VPRecipeBase *Def = VPV->getDefiningRecipe(); 2709 assert(Def && "Must have definition for value defined inside vector region"); 2710 if (auto Rep = dyn_cast<VPReplicateRecipe>(Def)) 2711 return Rep->isUniform(); 2712 return false; 2713 } 2714 } // end namespace vputils 2715 2716 } // end namespace llvm 2717 2718 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 2719