1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file contains the declarations of the Vectorization Plan base classes: 11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual 12 /// VPBlockBase, together implementing a Hierarchical CFG; 13 /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained 14 /// within VPBasicBlocks; 15 /// 3. VPInstruction, a concrete Recipe and VPUser modeling a single planned 16 /// instruction; 17 /// 4. The VPlan class holding a candidate for vectorization; 18 /// 5. The VPlanPrinter class providing a way to print a plan in dot format; 19 /// These are documented in docs/VectorizationPlan.rst. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 24 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 25 26 #include "VPlanValue.h" 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/MapVector.h" 29 #include "llvm/ADT/SmallBitVector.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallVector.h" 32 #include "llvm/ADT/Twine.h" 33 #include "llvm/ADT/ilist.h" 34 #include "llvm/ADT/ilist_node.h" 35 #include "llvm/Analysis/IVDescriptors.h" 36 #include "llvm/Analysis/LoopInfo.h" 37 #include "llvm/Analysis/VectorUtils.h" 38 #include "llvm/IR/DebugLoc.h" 39 #include "llvm/IR/FMF.h" 40 #include "llvm/IR/Operator.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <cstddef> 44 #include <string> 45 46 namespace llvm { 47 48 class BasicBlock; 49 class DominatorTree; 50 class InnerLoopVectorizer; 51 class IRBuilderBase; 52 class LoopInfo; 53 class raw_ostream; 54 class RecurrenceDescriptor; 55 class SCEV; 56 class Type; 57 class VPBasicBlock; 58 class VPRegionBlock; 59 class VPlan; 60 class VPReplicateRecipe; 61 class VPlanSlp; 62 class Value; 63 class LoopVersioning; 64 65 namespace Intrinsic { 66 typedef unsigned ID; 67 } 68 69 /// Returns a calculation for the total number of elements for a given \p VF. 70 /// For fixed width vectors this value is a constant, whereas for scalable 71 /// vectors it is an expression determined at runtime. 72 Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF); 73 74 /// Return a value for Step multiplied by VF. 75 Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, 76 int64_t Step); 77 78 const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, 79 Loop *CurLoop = nullptr); 80 81 /// A range of powers-of-2 vectorization factors with fixed start and 82 /// adjustable end. The range includes start and excludes end, e.g.,: 83 /// [1, 16) = {1, 2, 4, 8} 84 struct VFRange { 85 // A power of 2. 86 const ElementCount Start; 87 88 // A power of 2. If End <= Start range is empty. 89 ElementCount End; 90 91 bool isEmpty() const { 92 return End.getKnownMinValue() <= Start.getKnownMinValue(); 93 } 94 95 VFRange(const ElementCount &Start, const ElementCount &End) 96 : Start(Start), End(End) { 97 assert(Start.isScalable() == End.isScalable() && 98 "Both Start and End should have the same scalable flag"); 99 assert(isPowerOf2_32(Start.getKnownMinValue()) && 100 "Expected Start to be a power of 2"); 101 assert(isPowerOf2_32(End.getKnownMinValue()) && 102 "Expected End to be a power of 2"); 103 } 104 105 /// Iterator to iterate over vectorization factors in a VFRange. 106 class iterator 107 : public iterator_facade_base<iterator, std::forward_iterator_tag, 108 ElementCount> { 109 ElementCount VF; 110 111 public: 112 iterator(ElementCount VF) : VF(VF) {} 113 114 bool operator==(const iterator &Other) const { return VF == Other.VF; } 115 116 ElementCount operator*() const { return VF; } 117 118 iterator &operator++() { 119 VF *= 2; 120 return *this; 121 } 122 }; 123 124 iterator begin() { return iterator(Start); } 125 iterator end() { 126 assert(isPowerOf2_32(End.getKnownMinValue())); 127 return iterator(End); 128 } 129 }; 130 131 using VPlanPtr = std::unique_ptr<VPlan>; 132 133 /// In what follows, the term "input IR" refers to code that is fed into the 134 /// vectorizer whereas the term "output IR" refers to code that is generated by 135 /// the vectorizer. 136 137 /// VPLane provides a way to access lanes in both fixed width and scalable 138 /// vectors, where for the latter the lane index sometimes needs calculating 139 /// as a runtime expression. 140 class VPLane { 141 public: 142 /// Kind describes how to interpret Lane. 143 enum class Kind : uint8_t { 144 /// For First, Lane is the index into the first N elements of a 145 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>. 146 First, 147 /// For ScalableLast, Lane is the offset from the start of the last 148 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For 149 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of 150 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc. 151 ScalableLast 152 }; 153 154 private: 155 /// in [0..VF) 156 unsigned Lane; 157 158 /// Indicates how the Lane should be interpreted, as described above. 159 Kind LaneKind; 160 161 public: 162 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {} 163 164 static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); } 165 166 static VPLane getLastLaneForVF(const ElementCount &VF) { 167 unsigned LaneOffset = VF.getKnownMinValue() - 1; 168 Kind LaneKind; 169 if (VF.isScalable()) 170 // In this case 'LaneOffset' refers to the offset from the start of the 171 // last subvector with VF.getKnownMinValue() elements. 172 LaneKind = VPLane::Kind::ScalableLast; 173 else 174 LaneKind = VPLane::Kind::First; 175 return VPLane(LaneOffset, LaneKind); 176 } 177 178 /// Returns a compile-time known value for the lane index and asserts if the 179 /// lane can only be calculated at runtime. 180 unsigned getKnownLane() const { 181 assert(LaneKind == Kind::First); 182 return Lane; 183 } 184 185 /// Returns an expression describing the lane index that can be used at 186 /// runtime. 187 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const; 188 189 /// Returns the Kind of lane offset. 190 Kind getKind() const { return LaneKind; } 191 192 /// Returns true if this is the first lane of the whole vector. 193 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; } 194 195 /// Maps the lane to a cache index based on \p VF. 196 unsigned mapToCacheIndex(const ElementCount &VF) const { 197 switch (LaneKind) { 198 case VPLane::Kind::ScalableLast: 199 assert(VF.isScalable() && Lane < VF.getKnownMinValue()); 200 return VF.getKnownMinValue() + Lane; 201 default: 202 assert(Lane < VF.getKnownMinValue()); 203 return Lane; 204 } 205 } 206 207 /// Returns the maxmimum number of lanes that we are able to consider 208 /// caching for \p VF. 209 static unsigned getNumCachedLanes(const ElementCount &VF) { 210 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1); 211 } 212 }; 213 214 /// VPIteration represents a single point in the iteration space of the output 215 /// (vectorized and/or unrolled) IR loop. 216 struct VPIteration { 217 /// in [0..UF) 218 unsigned Part; 219 220 VPLane Lane; 221 222 VPIteration(unsigned Part, unsigned Lane, 223 VPLane::Kind Kind = VPLane::Kind::First) 224 : Part(Part), Lane(Lane, Kind) {} 225 226 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {} 227 228 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); } 229 }; 230 231 /// VPTransformState holds information passed down when "executing" a VPlan, 232 /// needed for generating the output IR. 233 struct VPTransformState { 234 VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, 235 DominatorTree *DT, IRBuilderBase &Builder, 236 InnerLoopVectorizer *ILV, VPlan *Plan) 237 : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan), 238 LVer(nullptr) {} 239 240 /// The chosen Vectorization and Unroll Factors of the loop being vectorized. 241 ElementCount VF; 242 unsigned UF; 243 244 /// Hold the indices to generate specific scalar instructions. Null indicates 245 /// that all instances are to be generated, using either scalar or vector 246 /// instructions. 247 std::optional<VPIteration> Instance; 248 249 struct DataState { 250 /// A type for vectorized values in the new loop. Each value from the 251 /// original loop, when vectorized, is represented by UF vector values in 252 /// the new unrolled loop, where UF is the unroll factor. 253 typedef SmallVector<Value *, 2> PerPartValuesTy; 254 255 DenseMap<VPValue *, PerPartValuesTy> PerPartOutput; 256 257 using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>; 258 DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars; 259 } Data; 260 261 /// Get the generated Value for a given VPValue and a given Part. Note that 262 /// as some Defs are still created by ILV and managed in its ValueMap, this 263 /// method will delegate the call to ILV in such cases in order to provide 264 /// callers a consistent API. 265 /// \see set. 266 Value *get(VPValue *Def, unsigned Part); 267 268 /// Get the generated Value for a given VPValue and given Part and Lane. 269 Value *get(VPValue *Def, const VPIteration &Instance); 270 271 bool hasVectorValue(VPValue *Def, unsigned Part) { 272 auto I = Data.PerPartOutput.find(Def); 273 return I != Data.PerPartOutput.end() && Part < I->second.size() && 274 I->second[Part]; 275 } 276 277 bool hasAnyVectorValue(VPValue *Def) const { 278 return Data.PerPartOutput.contains(Def); 279 } 280 281 bool hasScalarValue(VPValue *Def, VPIteration Instance) { 282 auto I = Data.PerPartScalars.find(Def); 283 if (I == Data.PerPartScalars.end()) 284 return false; 285 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); 286 return Instance.Part < I->second.size() && 287 CacheIdx < I->second[Instance.Part].size() && 288 I->second[Instance.Part][CacheIdx]; 289 } 290 291 /// Set the generated Value for a given VPValue and a given Part. 292 void set(VPValue *Def, Value *V, unsigned Part) { 293 if (!Data.PerPartOutput.count(Def)) { 294 DataState::PerPartValuesTy Entry(UF); 295 Data.PerPartOutput[Def] = Entry; 296 } 297 Data.PerPartOutput[Def][Part] = V; 298 } 299 /// Reset an existing vector value for \p Def and a given \p Part. 300 void reset(VPValue *Def, Value *V, unsigned Part) { 301 auto Iter = Data.PerPartOutput.find(Def); 302 assert(Iter != Data.PerPartOutput.end() && 303 "need to overwrite existing value"); 304 Iter->second[Part] = V; 305 } 306 307 /// Set the generated scalar \p V for \p Def and the given \p Instance. 308 void set(VPValue *Def, Value *V, const VPIteration &Instance) { 309 auto Iter = Data.PerPartScalars.insert({Def, {}}); 310 auto &PerPartVec = Iter.first->second; 311 while (PerPartVec.size() <= Instance.Part) 312 PerPartVec.emplace_back(); 313 auto &Scalars = PerPartVec[Instance.Part]; 314 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); 315 while (Scalars.size() <= CacheIdx) 316 Scalars.push_back(nullptr); 317 assert(!Scalars[CacheIdx] && "should overwrite existing value"); 318 Scalars[CacheIdx] = V; 319 } 320 321 /// Reset an existing scalar value for \p Def and a given \p Instance. 322 void reset(VPValue *Def, Value *V, const VPIteration &Instance) { 323 auto Iter = Data.PerPartScalars.find(Def); 324 assert(Iter != Data.PerPartScalars.end() && 325 "need to overwrite existing value"); 326 assert(Instance.Part < Iter->second.size() && 327 "need to overwrite existing value"); 328 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); 329 assert(CacheIdx < Iter->second[Instance.Part].size() && 330 "need to overwrite existing value"); 331 Iter->second[Instance.Part][CacheIdx] = V; 332 } 333 334 /// Add additional metadata to \p To that was not present on \p Orig. 335 /// 336 /// Currently this is used to add the noalias annotations based on the 337 /// inserted memchecks. Use this for instructions that are *cloned* into the 338 /// vector loop. 339 void addNewMetadata(Instruction *To, const Instruction *Orig); 340 341 /// Add metadata from one instruction to another. 342 /// 343 /// This includes both the original MDs from \p From and additional ones (\see 344 /// addNewMetadata). Use this for *newly created* instructions in the vector 345 /// loop. 346 void addMetadata(Instruction *To, Instruction *From); 347 348 /// Similar to the previous function but it adds the metadata to a 349 /// vector of instructions. 350 void addMetadata(ArrayRef<Value *> To, Instruction *From); 351 352 /// Set the debug location in the builder using the debug location in \p V. 353 void setDebugLocFromInst(const Value *V); 354 355 /// Hold state information used when constructing the CFG of the output IR, 356 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. 357 struct CFGState { 358 /// The previous VPBasicBlock visited. Initially set to null. 359 VPBasicBlock *PrevVPBB = nullptr; 360 361 /// The previous IR BasicBlock created or used. Initially set to the new 362 /// header BasicBlock. 363 BasicBlock *PrevBB = nullptr; 364 365 /// The last IR BasicBlock in the output IR. Set to the exit block of the 366 /// vector loop. 367 BasicBlock *ExitBB = nullptr; 368 369 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case 370 /// of replication, maps the BasicBlock of the last replica created. 371 SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB; 372 373 CFGState() = default; 374 375 /// Returns the BasicBlock* mapped to the pre-header of the loop region 376 /// containing \p R. 377 BasicBlock *getPreheaderBBFor(VPRecipeBase *R); 378 } CFG; 379 380 /// Hold a pointer to LoopInfo to register new basic blocks in the loop. 381 LoopInfo *LI; 382 383 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. 384 DominatorTree *DT; 385 386 /// Hold a reference to the IRBuilder used to generate output IR code. 387 IRBuilderBase &Builder; 388 389 VPValue2ValueTy VPValue2Value; 390 391 /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF). 392 Value *CanonicalIV = nullptr; 393 394 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods. 395 InnerLoopVectorizer *ILV; 396 397 /// Pointer to the VPlan code is generated for. 398 VPlan *Plan; 399 400 /// The loop object for the current parent region, or nullptr. 401 Loop *CurrentVectorLoop = nullptr; 402 403 /// LoopVersioning. It's only set up (non-null) if memchecks were 404 /// used. 405 /// 406 /// This is currently only used to add no-alias metadata based on the 407 /// memchecks. The actually versioning is performed manually. 408 LoopVersioning *LVer = nullptr; 409 410 /// Map SCEVs to their expanded values. Populated when executing 411 /// VPExpandSCEVRecipes. 412 DenseMap<const SCEV *, Value *> ExpandedSCEVs; 413 }; 414 415 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. 416 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. 417 class VPBlockBase { 418 friend class VPBlockUtils; 419 420 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). 421 422 /// An optional name for the block. 423 std::string Name; 424 425 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if 426 /// it is a topmost VPBlockBase. 427 VPRegionBlock *Parent = nullptr; 428 429 /// List of predecessor blocks. 430 SmallVector<VPBlockBase *, 1> Predecessors; 431 432 /// List of successor blocks. 433 SmallVector<VPBlockBase *, 1> Successors; 434 435 /// VPlan containing the block. Can only be set on the entry block of the 436 /// plan. 437 VPlan *Plan = nullptr; 438 439 /// Add \p Successor as the last successor to this block. 440 void appendSuccessor(VPBlockBase *Successor) { 441 assert(Successor && "Cannot add nullptr successor!"); 442 Successors.push_back(Successor); 443 } 444 445 /// Add \p Predecessor as the last predecessor to this block. 446 void appendPredecessor(VPBlockBase *Predecessor) { 447 assert(Predecessor && "Cannot add nullptr predecessor!"); 448 Predecessors.push_back(Predecessor); 449 } 450 451 /// Remove \p Predecessor from the predecessors of this block. 452 void removePredecessor(VPBlockBase *Predecessor) { 453 auto Pos = find(Predecessors, Predecessor); 454 assert(Pos && "Predecessor does not exist"); 455 Predecessors.erase(Pos); 456 } 457 458 /// Remove \p Successor from the successors of this block. 459 void removeSuccessor(VPBlockBase *Successor) { 460 auto Pos = find(Successors, Successor); 461 assert(Pos && "Successor does not exist"); 462 Successors.erase(Pos); 463 } 464 465 protected: 466 VPBlockBase(const unsigned char SC, const std::string &N) 467 : SubclassID(SC), Name(N) {} 468 469 public: 470 /// An enumeration for keeping track of the concrete subclass of VPBlockBase 471 /// that are actually instantiated. Values of this enumeration are kept in the 472 /// SubclassID field of the VPBlockBase objects. They are used for concrete 473 /// type identification. 474 using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC }; 475 476 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>; 477 478 virtual ~VPBlockBase() = default; 479 480 const std::string &getName() const { return Name; } 481 482 void setName(const Twine &newName) { Name = newName.str(); } 483 484 /// \return an ID for the concrete type of this object. 485 /// This is used to implement the classof checks. This should not be used 486 /// for any other purpose, as the values may change as LLVM evolves. 487 unsigned getVPBlockID() const { return SubclassID; } 488 489 VPRegionBlock *getParent() { return Parent; } 490 const VPRegionBlock *getParent() const { return Parent; } 491 492 /// \return A pointer to the plan containing the current block. 493 VPlan *getPlan(); 494 const VPlan *getPlan() const; 495 496 /// Sets the pointer of the plan containing the block. The block must be the 497 /// entry block into the VPlan. 498 void setPlan(VPlan *ParentPlan); 499 500 void setParent(VPRegionBlock *P) { Parent = P; } 501 502 /// \return the VPBasicBlock that is the entry of this VPBlockBase, 503 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 504 /// VPBlockBase is a VPBasicBlock, it is returned. 505 const VPBasicBlock *getEntryBasicBlock() const; 506 VPBasicBlock *getEntryBasicBlock(); 507 508 /// \return the VPBasicBlock that is the exiting this VPBlockBase, 509 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 510 /// VPBlockBase is a VPBasicBlock, it is returned. 511 const VPBasicBlock *getExitingBasicBlock() const; 512 VPBasicBlock *getExitingBasicBlock(); 513 514 const VPBlocksTy &getSuccessors() const { return Successors; } 515 VPBlocksTy &getSuccessors() { return Successors; } 516 517 iterator_range<VPBlockBase **> successors() { return Successors; } 518 519 const VPBlocksTy &getPredecessors() const { return Predecessors; } 520 VPBlocksTy &getPredecessors() { return Predecessors; } 521 522 /// \return the successor of this VPBlockBase if it has a single successor. 523 /// Otherwise return a null pointer. 524 VPBlockBase *getSingleSuccessor() const { 525 return (Successors.size() == 1 ? *Successors.begin() : nullptr); 526 } 527 528 /// \return the predecessor of this VPBlockBase if it has a single 529 /// predecessor. Otherwise return a null pointer. 530 VPBlockBase *getSinglePredecessor() const { 531 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr); 532 } 533 534 size_t getNumSuccessors() const { return Successors.size(); } 535 size_t getNumPredecessors() const { return Predecessors.size(); } 536 537 /// An Enclosing Block of a block B is any block containing B, including B 538 /// itself. \return the closest enclosing block starting from "this", which 539 /// has successors. \return the root enclosing block if all enclosing blocks 540 /// have no successors. 541 VPBlockBase *getEnclosingBlockWithSuccessors(); 542 543 /// \return the closest enclosing block starting from "this", which has 544 /// predecessors. \return the root enclosing block if all enclosing blocks 545 /// have no predecessors. 546 VPBlockBase *getEnclosingBlockWithPredecessors(); 547 548 /// \return the successors either attached directly to this VPBlockBase or, if 549 /// this VPBlockBase is the exit block of a VPRegionBlock and has no 550 /// successors of its own, search recursively for the first enclosing 551 /// VPRegionBlock that has successors and return them. If no such 552 /// VPRegionBlock exists, return the (empty) successors of the topmost 553 /// VPBlockBase reached. 554 const VPBlocksTy &getHierarchicalSuccessors() { 555 return getEnclosingBlockWithSuccessors()->getSuccessors(); 556 } 557 558 /// \return the hierarchical successor of this VPBlockBase if it has a single 559 /// hierarchical successor. Otherwise return a null pointer. 560 VPBlockBase *getSingleHierarchicalSuccessor() { 561 return getEnclosingBlockWithSuccessors()->getSingleSuccessor(); 562 } 563 564 /// \return the predecessors either attached directly to this VPBlockBase or, 565 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no 566 /// predecessors of its own, search recursively for the first enclosing 567 /// VPRegionBlock that has predecessors and return them. If no such 568 /// VPRegionBlock exists, return the (empty) predecessors of the topmost 569 /// VPBlockBase reached. 570 const VPBlocksTy &getHierarchicalPredecessors() { 571 return getEnclosingBlockWithPredecessors()->getPredecessors(); 572 } 573 574 /// \return the hierarchical predecessor of this VPBlockBase if it has a 575 /// single hierarchical predecessor. Otherwise return a null pointer. 576 VPBlockBase *getSingleHierarchicalPredecessor() { 577 return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); 578 } 579 580 /// Set a given VPBlockBase \p Successor as the single successor of this 581 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor. 582 /// This VPBlockBase must have no successors. 583 void setOneSuccessor(VPBlockBase *Successor) { 584 assert(Successors.empty() && "Setting one successor when others exist."); 585 appendSuccessor(Successor); 586 } 587 588 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two 589 /// successors of this VPBlockBase. This VPBlockBase is not added as 590 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no 591 /// successors. 592 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) { 593 assert(Successors.empty() && "Setting two successors when others exist."); 594 appendSuccessor(IfTrue); 595 appendSuccessor(IfFalse); 596 } 597 598 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase. 599 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added 600 /// as successor of any VPBasicBlock in \p NewPreds. 601 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) { 602 assert(Predecessors.empty() && "Block predecessors already set."); 603 for (auto *Pred : NewPreds) 604 appendPredecessor(Pred); 605 } 606 607 /// Remove all the predecessor of this block. 608 void clearPredecessors() { Predecessors.clear(); } 609 610 /// Remove all the successors of this block. 611 void clearSuccessors() { Successors.clear(); } 612 613 /// The method which generates the output IR that correspond to this 614 /// VPBlockBase, thereby "executing" the VPlan. 615 virtual void execute(VPTransformState *State) = 0; 616 617 /// Delete all blocks reachable from a given VPBlockBase, inclusive. 618 static void deleteCFG(VPBlockBase *Entry); 619 620 /// Return true if it is legal to hoist instructions into this block. 621 bool isLegalToHoistInto() { 622 // There are currently no constraints that prevent an instruction to be 623 // hoisted into a VPBlockBase. 624 return true; 625 } 626 627 /// Replace all operands of VPUsers in the block with \p NewValue and also 628 /// replaces all uses of VPValues defined in the block with NewValue. 629 virtual void dropAllReferences(VPValue *NewValue) = 0; 630 631 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 632 void printAsOperand(raw_ostream &OS, bool PrintType) const { 633 OS << getName(); 634 } 635 636 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines 637 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using 638 /// consequtive numbers. 639 /// 640 /// Note that the numbering is applied to the whole VPlan, so printing 641 /// individual blocks is consistent with the whole VPlan printing. 642 virtual void print(raw_ostream &O, const Twine &Indent, 643 VPSlotTracker &SlotTracker) const = 0; 644 645 /// Print plain-text dump of this VPlan to \p O. 646 void print(raw_ostream &O) const { 647 VPSlotTracker SlotTracker(getPlan()); 648 print(O, "", SlotTracker); 649 } 650 651 /// Print the successors of this block to \p O, prefixing all lines with \p 652 /// Indent. 653 void printSuccessors(raw_ostream &O, const Twine &Indent) const; 654 655 /// Dump this VPBlockBase to dbgs(). 656 LLVM_DUMP_METHOD void dump() const { print(dbgs()); } 657 #endif 658 }; 659 660 /// A value that is used outside the VPlan. The operand of the user needs to be 661 /// added to the associated LCSSA phi node. 662 class VPLiveOut : public VPUser { 663 PHINode *Phi; 664 665 public: 666 VPLiveOut(PHINode *Phi, VPValue *Op) 667 : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {} 668 669 static inline bool classof(const VPUser *U) { 670 return U->getVPUserID() == VPUser::VPUserID::LiveOut; 671 } 672 673 /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply 674 /// means we need to add the appropriate incoming value from the middle 675 /// block as exiting edges from the scalar epilogue loop (if present) are 676 /// already in place, and we exit the vector loop exclusively to the middle 677 /// block. 678 void fixPhi(VPlan &Plan, VPTransformState &State); 679 680 /// Returns true if the VPLiveOut uses scalars of operand \p Op. 681 bool usesScalars(const VPValue *Op) const override { 682 assert(is_contained(operands(), Op) && 683 "Op must be an operand of the recipe"); 684 return true; 685 } 686 687 PHINode *getPhi() const { return Phi; } 688 689 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 690 /// Print the VPLiveOut to \p O. 691 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const; 692 #endif 693 }; 694 695 /// VPRecipeBase is a base class modeling a sequence of one or more output IR 696 /// instructions. VPRecipeBase owns the the VPValues it defines through VPDef 697 /// and is responsible for deleting its defined values. Single-value 698 /// VPRecipeBases that also inherit from VPValue must make sure to inherit from 699 /// VPRecipeBase before VPValue. 700 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>, 701 public VPDef, 702 public VPUser { 703 friend VPBasicBlock; 704 friend class VPBlockUtils; 705 706 /// Each VPRecipe belongs to a single VPBasicBlock. 707 VPBasicBlock *Parent = nullptr; 708 709 public: 710 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands) 711 : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {} 712 713 template <typename IterT> 714 VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands) 715 : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {} 716 virtual ~VPRecipeBase() = default; 717 718 /// \return the VPBasicBlock which this VPRecipe belongs to. 719 VPBasicBlock *getParent() { return Parent; } 720 const VPBasicBlock *getParent() const { return Parent; } 721 722 /// The method which generates the output IR instructions that correspond to 723 /// this VPRecipe, thereby "executing" the VPlan. 724 virtual void execute(VPTransformState &State) = 0; 725 726 /// Insert an unlinked recipe into a basic block immediately before 727 /// the specified recipe. 728 void insertBefore(VPRecipeBase *InsertPos); 729 /// Insert an unlinked recipe into \p BB immediately before the insertion 730 /// point \p IP; 731 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP); 732 733 /// Insert an unlinked Recipe into a basic block immediately after 734 /// the specified Recipe. 735 void insertAfter(VPRecipeBase *InsertPos); 736 737 /// Unlink this recipe from its current VPBasicBlock and insert it into 738 /// the VPBasicBlock that MovePos lives in, right after MovePos. 739 void moveAfter(VPRecipeBase *MovePos); 740 741 /// Unlink this recipe and insert into BB before I. 742 /// 743 /// \pre I is a valid iterator into BB. 744 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I); 745 746 /// This method unlinks 'this' from the containing basic block, but does not 747 /// delete it. 748 void removeFromParent(); 749 750 /// This method unlinks 'this' from the containing basic block and deletes it. 751 /// 752 /// \returns an iterator pointing to the element after the erased one 753 iplist<VPRecipeBase>::iterator eraseFromParent(); 754 755 /// Returns the underlying instruction, if the recipe is a VPValue or nullptr 756 /// otherwise. 757 Instruction *getUnderlyingInstr() { 758 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()); 759 } 760 const Instruction *getUnderlyingInstr() const { 761 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()); 762 } 763 764 /// Method to support type inquiry through isa, cast, and dyn_cast. 765 static inline bool classof(const VPDef *D) { 766 // All VPDefs are also VPRecipeBases. 767 return true; 768 } 769 770 static inline bool classof(const VPUser *U) { 771 return U->getVPUserID() == VPUser::VPUserID::Recipe; 772 } 773 774 /// Returns true if the recipe may have side-effects. 775 bool mayHaveSideEffects() const; 776 777 /// Returns true for PHI-like recipes. 778 bool isPhi() const { 779 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC; 780 } 781 782 /// Returns true if the recipe may read from memory. 783 bool mayReadFromMemory() const; 784 785 /// Returns true if the recipe may write to memory. 786 bool mayWriteToMemory() const; 787 788 /// Returns true if the recipe may read from or write to memory. 789 bool mayReadOrWriteMemory() const { 790 return mayReadFromMemory() || mayWriteToMemory(); 791 } 792 }; 793 794 // Helper macro to define common classof implementations for recipes. 795 #define VP_CLASSOF_IMPL(VPDefID) \ 796 static inline bool classof(const VPDef *D) { \ 797 return D->getVPDefID() == VPDefID; \ 798 } \ 799 static inline bool classof(const VPValue *V) { \ 800 auto *R = V->getDefiningRecipe(); \ 801 return R && R->getVPDefID() == VPDefID; \ 802 } \ 803 static inline bool classof(const VPUser *U) { \ 804 auto *R = dyn_cast<VPRecipeBase>(U); \ 805 return R && R->getVPDefID() == VPDefID; \ 806 } \ 807 static inline bool classof(const VPRecipeBase *R) { \ 808 return R->getVPDefID() == VPDefID; \ 809 } 810 811 /// This is a concrete Recipe that models a single VPlan-level instruction. 812 /// While as any Recipe it may generate a sequence of IR instructions when 813 /// executed, these instructions would always form a single-def expression as 814 /// the VPInstruction is also a single def-use vertex. 815 class VPInstruction : public VPRecipeBase, public VPValue { 816 friend class VPlanSlp; 817 818 public: 819 /// VPlan opcodes, extending LLVM IR with idiomatics instructions. 820 enum { 821 FirstOrderRecurrenceSplice = 822 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous 823 // values of a first-order recurrence. 824 Not, 825 ICmpULE, 826 SLPLoad, 827 SLPStore, 828 ActiveLaneMask, 829 CalculateTripCountMinusVF, 830 CanonicalIVIncrement, 831 CanonicalIVIncrementNUW, 832 // The next two are similar to the above, but instead increment the 833 // canonical IV separately for each unrolled part. 834 CanonicalIVIncrementForPart, 835 CanonicalIVIncrementForPartNUW, 836 BranchOnCount, 837 BranchOnCond 838 }; 839 840 private: 841 typedef unsigned char OpcodeTy; 842 OpcodeTy Opcode; 843 FastMathFlags FMF; 844 DebugLoc DL; 845 846 /// An optional name that can be used for the generated IR instruction. 847 const std::string Name; 848 849 /// Utility method serving execute(): generates a single instance of the 850 /// modeled instruction. \returns the generated value for \p Part. 851 /// In some cases an existing value is returned rather than a generated 852 /// one. 853 Value *generateInstruction(VPTransformState &State, unsigned Part); 854 855 protected: 856 void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); } 857 858 public: 859 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL, 860 const Twine &Name = "") 861 : VPRecipeBase(VPDef::VPInstructionSC, Operands), VPValue(this), 862 Opcode(Opcode), DL(DL), Name(Name.str()) {} 863 864 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, 865 DebugLoc DL = {}, const Twine &Name = "") 866 : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {} 867 868 VP_CLASSOF_IMPL(VPDef::VPInstructionSC) 869 870 VPInstruction *clone() const { 871 SmallVector<VPValue *, 2> Operands(operands()); 872 return new VPInstruction(Opcode, Operands, DL, Name); 873 } 874 875 unsigned getOpcode() const { return Opcode; } 876 877 /// Generate the instruction. 878 /// TODO: We currently execute only per-part unless a specific instance is 879 /// provided. 880 void execute(VPTransformState &State) override; 881 882 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 883 /// Print the VPInstruction to \p O. 884 void print(raw_ostream &O, const Twine &Indent, 885 VPSlotTracker &SlotTracker) const override; 886 887 /// Print the VPInstruction to dbgs() (for debugging). 888 LLVM_DUMP_METHOD void dump() const; 889 #endif 890 891 /// Return true if this instruction may modify memory. 892 bool mayWriteToMemory() const { 893 // TODO: we can use attributes of the called function to rule out memory 894 // modifications. 895 return Opcode == Instruction::Store || Opcode == Instruction::Call || 896 Opcode == Instruction::Invoke || Opcode == SLPStore; 897 } 898 899 bool hasResult() const { 900 // CallInst may or may not have a result, depending on the called function. 901 // Conservatively return calls have results for now. 902 switch (getOpcode()) { 903 case Instruction::Ret: 904 case Instruction::Br: 905 case Instruction::Store: 906 case Instruction::Switch: 907 case Instruction::IndirectBr: 908 case Instruction::Resume: 909 case Instruction::CatchRet: 910 case Instruction::Unreachable: 911 case Instruction::Fence: 912 case Instruction::AtomicRMW: 913 case VPInstruction::BranchOnCond: 914 case VPInstruction::BranchOnCount: 915 return false; 916 default: 917 return true; 918 } 919 } 920 921 /// Set the fast-math flags. 922 void setFastMathFlags(FastMathFlags FMFNew); 923 924 /// Returns true if the recipe only uses the first lane of operand \p Op. 925 bool onlyFirstLaneUsed(const VPValue *Op) const override { 926 assert(is_contained(operands(), Op) && 927 "Op must be an operand of the recipe"); 928 if (getOperand(0) != Op) 929 return false; 930 switch (getOpcode()) { 931 default: 932 return false; 933 case VPInstruction::ActiveLaneMask: 934 case VPInstruction::CalculateTripCountMinusVF: 935 case VPInstruction::CanonicalIVIncrement: 936 case VPInstruction::CanonicalIVIncrementNUW: 937 case VPInstruction::CanonicalIVIncrementForPart: 938 case VPInstruction::CanonicalIVIncrementForPartNUW: 939 case VPInstruction::BranchOnCount: 940 return true; 941 }; 942 llvm_unreachable("switch should return"); 943 } 944 }; 945 946 /// Class to record LLVM IR flag for a recipe along with it. 947 class VPRecipeWithIRFlags : public VPRecipeBase { 948 enum class OperationType : unsigned char { 949 OverflowingBinOp, 950 PossiblyExactOp, 951 GEPOp, 952 FPMathOp, 953 Other 954 }; 955 struct WrapFlagsTy { 956 char HasNUW : 1; 957 char HasNSW : 1; 958 }; 959 struct ExactFlagsTy { 960 char IsExact : 1; 961 }; 962 struct GEPFlagsTy { 963 char IsInBounds : 1; 964 }; 965 struct FastMathFlagsTy { 966 char AllowReassoc : 1; 967 char NoNaNs : 1; 968 char NoInfs : 1; 969 char NoSignedZeros : 1; 970 char AllowReciprocal : 1; 971 char AllowContract : 1; 972 char ApproxFunc : 1; 973 }; 974 975 OperationType OpType; 976 977 union { 978 WrapFlagsTy WrapFlags; 979 ExactFlagsTy ExactFlags; 980 GEPFlagsTy GEPFlags; 981 FastMathFlagsTy FMFs; 982 unsigned char AllFlags; 983 }; 984 985 public: 986 template <typename IterT> 987 VPRecipeWithIRFlags(const unsigned char SC, iterator_range<IterT> Operands) 988 : VPRecipeBase(SC, Operands) { 989 OpType = OperationType::Other; 990 AllFlags = 0; 991 } 992 993 template <typename IterT> 994 VPRecipeWithIRFlags(const unsigned char SC, iterator_range<IterT> Operands, 995 Instruction &I) 996 : VPRecipeWithIRFlags(SC, Operands) { 997 if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) { 998 OpType = OperationType::OverflowingBinOp; 999 WrapFlags.HasNUW = Op->hasNoUnsignedWrap(); 1000 WrapFlags.HasNSW = Op->hasNoSignedWrap(); 1001 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) { 1002 OpType = OperationType::PossiblyExactOp; 1003 ExactFlags.IsExact = Op->isExact(); 1004 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { 1005 OpType = OperationType::GEPOp; 1006 GEPFlags.IsInBounds = GEP->isInBounds(); 1007 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) { 1008 OpType = OperationType::FPMathOp; 1009 FastMathFlags FMF = Op->getFastMathFlags(); 1010 FMFs.AllowReassoc = FMF.allowReassoc(); 1011 FMFs.NoNaNs = FMF.noNaNs(); 1012 FMFs.NoInfs = FMF.noInfs(); 1013 FMFs.NoSignedZeros = FMF.noSignedZeros(); 1014 FMFs.AllowReciprocal = FMF.allowReciprocal(); 1015 FMFs.AllowContract = FMF.allowContract(); 1016 FMFs.ApproxFunc = FMF.approxFunc(); 1017 } 1018 } 1019 1020 static inline bool classof(const VPRecipeBase *R) { 1021 return R->getVPDefID() == VPRecipeBase::VPWidenSC || 1022 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || 1023 R->getVPDefID() == VPRecipeBase::VPReplicateSC; 1024 } 1025 1026 /// Drop all poison-generating flags. 1027 void dropPoisonGeneratingFlags() { 1028 // NOTE: This needs to be kept in-sync with 1029 // Instruction::dropPoisonGeneratingFlags. 1030 switch (OpType) { 1031 case OperationType::OverflowingBinOp: 1032 WrapFlags.HasNUW = false; 1033 WrapFlags.HasNSW = false; 1034 break; 1035 case OperationType::PossiblyExactOp: 1036 ExactFlags.IsExact = false; 1037 break; 1038 case OperationType::GEPOp: 1039 GEPFlags.IsInBounds = false; 1040 break; 1041 case OperationType::FPMathOp: 1042 FMFs.NoNaNs = false; 1043 FMFs.NoInfs = false; 1044 break; 1045 case OperationType::Other: 1046 break; 1047 } 1048 } 1049 1050 /// Set the IR flags for \p I. 1051 void setFlags(Instruction *I) const { 1052 switch (OpType) { 1053 case OperationType::OverflowingBinOp: 1054 I->setHasNoUnsignedWrap(WrapFlags.HasNUW); 1055 I->setHasNoSignedWrap(WrapFlags.HasNSW); 1056 break; 1057 case OperationType::PossiblyExactOp: 1058 I->setIsExact(ExactFlags.IsExact); 1059 break; 1060 case OperationType::GEPOp: 1061 cast<GetElementPtrInst>(I)->setIsInBounds(GEPFlags.IsInBounds); 1062 break; 1063 case OperationType::FPMathOp: 1064 I->setHasAllowReassoc(FMFs.AllowReassoc); 1065 I->setHasNoNaNs(FMFs.NoNaNs); 1066 I->setHasNoInfs(FMFs.NoInfs); 1067 I->setHasNoSignedZeros(FMFs.NoSignedZeros); 1068 I->setHasAllowReciprocal(FMFs.AllowReciprocal); 1069 I->setHasAllowContract(FMFs.AllowContract); 1070 I->setHasApproxFunc(FMFs.ApproxFunc); 1071 break; 1072 case OperationType::Other: 1073 break; 1074 } 1075 } 1076 1077 bool isInBounds() const { 1078 assert(OpType == OperationType::GEPOp && 1079 "recipe doesn't have inbounds flag"); 1080 return GEPFlags.IsInBounds; 1081 } 1082 1083 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1084 FastMathFlags getFastMathFlags() const { 1085 FastMathFlags Res; 1086 Res.setAllowReassoc(FMFs.AllowReassoc); 1087 Res.setNoNaNs(FMFs.NoNaNs); 1088 Res.setNoInfs(FMFs.NoInfs); 1089 Res.setNoSignedZeros(FMFs.NoSignedZeros); 1090 Res.setAllowReciprocal(FMFs.AllowReciprocal); 1091 Res.setAllowContract(FMFs.AllowContract); 1092 Res.setApproxFunc(FMFs.ApproxFunc); 1093 return Res; 1094 } 1095 1096 void printFlags(raw_ostream &O) const; 1097 #endif 1098 }; 1099 1100 /// VPWidenRecipe is a recipe for producing a copy of vector type its 1101 /// ingredient. This recipe covers most of the traditional vectorization cases 1102 /// where each ingredient transforms into a vectorized version of itself. 1103 class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue { 1104 1105 public: 1106 template <typename IterT> 1107 VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands) 1108 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPValue(this, &I) {} 1109 1110 ~VPWidenRecipe() override = default; 1111 1112 VP_CLASSOF_IMPL(VPDef::VPWidenSC) 1113 1114 /// Produce widened copies of all Ingredients. 1115 void execute(VPTransformState &State) override; 1116 1117 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1118 /// Print the recipe. 1119 void print(raw_ostream &O, const Twine &Indent, 1120 VPSlotTracker &SlotTracker) const override; 1121 #endif 1122 }; 1123 1124 /// VPWidenCastRecipe is a recipe to create vector cast instructions. 1125 class VPWidenCastRecipe : public VPRecipeBase, public VPValue { 1126 /// Cast instruction opcode. 1127 Instruction::CastOps Opcode; 1128 1129 /// Result type for the cast. 1130 Type *ResultTy; 1131 1132 public: 1133 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, 1134 CastInst *UI = nullptr) 1135 : VPRecipeBase(VPDef::VPWidenCastSC, Op), VPValue(this, UI), 1136 Opcode(Opcode), ResultTy(ResultTy) { 1137 assert((!UI || UI->getOpcode() == Opcode) && 1138 "opcode of underlying cast doesn't match"); 1139 assert((!UI || UI->getType() == ResultTy) && 1140 "result type of underlying cast doesn't match"); 1141 } 1142 1143 ~VPWidenCastRecipe() override = default; 1144 1145 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC) 1146 1147 /// Produce widened copies of the cast. 1148 void execute(VPTransformState &State) override; 1149 1150 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1151 /// Print the recipe. 1152 void print(raw_ostream &O, const Twine &Indent, 1153 VPSlotTracker &SlotTracker) const override; 1154 #endif 1155 1156 Instruction::CastOps getOpcode() const { return Opcode; } 1157 1158 /// Returns the result type of the cast. 1159 Type *getResultType() const { return ResultTy; } 1160 }; 1161 1162 /// A recipe for widening Call instructions. 1163 class VPWidenCallRecipe : public VPRecipeBase, public VPValue { 1164 /// ID of the vector intrinsic to call when widening the call. If set the 1165 /// Intrinsic::not_intrinsic, a library call will be used instead. 1166 Intrinsic::ID VectorIntrinsicID; 1167 /// If this recipe represents a library call, Variant stores a pointer to 1168 /// the chosen function. There is a 1:1 mapping between a given VF and the 1169 /// chosen vectorized variant, so there will be a different vplan for each 1170 /// VF with a valid variant. 1171 Function *Variant; 1172 1173 public: 1174 template <typename IterT> 1175 VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments, 1176 Intrinsic::ID VectorIntrinsicID, 1177 Function *Variant = nullptr) 1178 : VPRecipeBase(VPDef::VPWidenCallSC, CallArguments), VPValue(this, &I), 1179 VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {} 1180 1181 ~VPWidenCallRecipe() override = default; 1182 1183 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC) 1184 1185 /// Produce a widened version of the call instruction. 1186 void execute(VPTransformState &State) override; 1187 1188 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1189 /// Print the recipe. 1190 void print(raw_ostream &O, const Twine &Indent, 1191 VPSlotTracker &SlotTracker) const override; 1192 #endif 1193 }; 1194 1195 /// A recipe for widening select instructions. 1196 struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue { 1197 template <typename IterT> 1198 VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands) 1199 : VPRecipeBase(VPDef::VPWidenSelectSC, Operands), VPValue(this, &I) {} 1200 1201 ~VPWidenSelectRecipe() override = default; 1202 1203 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC) 1204 1205 /// Produce a widened version of the select instruction. 1206 void execute(VPTransformState &State) override; 1207 1208 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1209 /// Print the recipe. 1210 void print(raw_ostream &O, const Twine &Indent, 1211 VPSlotTracker &SlotTracker) const override; 1212 #endif 1213 1214 VPValue *getCond() const { 1215 return getOperand(0); 1216 } 1217 1218 bool isInvariantCond() const { 1219 return getCond()->isDefinedOutsideVectorRegions(); 1220 } 1221 }; 1222 1223 /// A recipe for handling GEP instructions. 1224 class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue { 1225 bool isPointerLoopInvariant() const { 1226 return getOperand(0)->isDefinedOutsideVectorRegions(); 1227 } 1228 1229 bool isIndexLoopInvariant(unsigned I) const { 1230 return getOperand(I + 1)->isDefinedOutsideVectorRegions(); 1231 } 1232 1233 bool areAllOperandsInvariant() const { 1234 return all_of(operands(), [](VPValue *Op) { 1235 return Op->isDefinedOutsideVectorRegions(); 1236 }); 1237 } 1238 1239 public: 1240 template <typename IterT> 1241 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands) 1242 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP), 1243 VPValue(this, GEP) {} 1244 1245 ~VPWidenGEPRecipe() override = default; 1246 1247 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC) 1248 1249 /// Generate the gep nodes. 1250 void execute(VPTransformState &State) override; 1251 1252 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1253 /// Print the recipe. 1254 void print(raw_ostream &O, const Twine &Indent, 1255 VPSlotTracker &SlotTracker) const override; 1256 #endif 1257 }; 1258 1259 /// A pure virtual base class for all recipes modeling header phis, including 1260 /// phis for first order recurrences, pointer inductions and reductions. The 1261 /// start value is the first operand of the recipe and the incoming value from 1262 /// the backedge is the second operand. 1263 /// 1264 /// Inductions are modeled using the following sub-classes: 1265 /// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop, 1266 /// starting at a specified value (zero for the main vector loop, the resume 1267 /// value for the epilogue vector loop) and stepping by 1. The induction 1268 /// controls exiting of the vector loop by comparing against the vector trip 1269 /// count. Produces a single scalar PHI for the induction value per 1270 /// iteration. 1271 /// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and 1272 /// floating point inductions with arbitrary start and step values. Produces 1273 /// a vector PHI per-part. 1274 /// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding 1275 /// value of an IV with different start and step values. Produces a single 1276 /// scalar value per iteration 1277 /// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a 1278 /// canonical or derived induction. 1279 /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a 1280 /// pointer induction. Produces either a vector PHI per-part or scalar values 1281 /// per-lane based on the canonical induction. 1282 class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue { 1283 protected: 1284 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, 1285 VPValue *Start = nullptr) 1286 : VPRecipeBase(VPDefID, {}), VPValue(this, UnderlyingInstr) { 1287 if (Start) 1288 addOperand(Start); 1289 } 1290 1291 public: 1292 ~VPHeaderPHIRecipe() override = default; 1293 1294 /// Method to support type inquiry through isa, cast, and dyn_cast. 1295 static inline bool classof(const VPRecipeBase *B) { 1296 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC && 1297 B->getVPDefID() <= VPDef::VPLastHeaderPHISC; 1298 } 1299 static inline bool classof(const VPValue *V) { 1300 auto *B = V->getDefiningRecipe(); 1301 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC && 1302 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC; 1303 } 1304 1305 /// Generate the phi nodes. 1306 void execute(VPTransformState &State) override = 0; 1307 1308 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1309 /// Print the recipe. 1310 void print(raw_ostream &O, const Twine &Indent, 1311 VPSlotTracker &SlotTracker) const override = 0; 1312 #endif 1313 1314 /// Returns the start value of the phi, if one is set. 1315 VPValue *getStartValue() { 1316 return getNumOperands() == 0 ? nullptr : getOperand(0); 1317 } 1318 VPValue *getStartValue() const { 1319 return getNumOperands() == 0 ? nullptr : getOperand(0); 1320 } 1321 1322 /// Update the start value of the recipe. 1323 void setStartValue(VPValue *V) { setOperand(0, V); } 1324 1325 /// Returns the incoming value from the loop backedge. 1326 virtual VPValue *getBackedgeValue() { 1327 return getOperand(1); 1328 } 1329 1330 /// Returns the backedge value as a recipe. The backedge value is guaranteed 1331 /// to be a recipe. 1332 virtual VPRecipeBase &getBackedgeRecipe() { 1333 return *getBackedgeValue()->getDefiningRecipe(); 1334 } 1335 }; 1336 1337 /// A recipe for handling phi nodes of integer and floating-point inductions, 1338 /// producing their vector values. 1339 class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe { 1340 PHINode *IV; 1341 TruncInst *Trunc; 1342 const InductionDescriptor &IndDesc; 1343 1344 public: 1345 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, 1346 const InductionDescriptor &IndDesc) 1347 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV), 1348 Trunc(nullptr), IndDesc(IndDesc) { 1349 addOperand(Step); 1350 } 1351 1352 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, 1353 const InductionDescriptor &IndDesc, 1354 TruncInst *Trunc) 1355 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start), 1356 IV(IV), Trunc(Trunc), IndDesc(IndDesc) { 1357 addOperand(Step); 1358 } 1359 1360 ~VPWidenIntOrFpInductionRecipe() override = default; 1361 1362 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) 1363 1364 /// Generate the vectorized and scalarized versions of the phi node as 1365 /// needed by their users. 1366 void execute(VPTransformState &State) override; 1367 1368 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1369 /// Print the recipe. 1370 void print(raw_ostream &O, const Twine &Indent, 1371 VPSlotTracker &SlotTracker) const override; 1372 #endif 1373 1374 VPValue *getBackedgeValue() override { 1375 // TODO: All operands of base recipe must exist and be at same index in 1376 // derived recipe. 1377 llvm_unreachable( 1378 "VPWidenIntOrFpInductionRecipe generates its own backedge value"); 1379 } 1380 1381 VPRecipeBase &getBackedgeRecipe() override { 1382 // TODO: All operands of base recipe must exist and be at same index in 1383 // derived recipe. 1384 llvm_unreachable( 1385 "VPWidenIntOrFpInductionRecipe generates its own backedge value"); 1386 } 1387 1388 /// Returns the step value of the induction. 1389 VPValue *getStepValue() { return getOperand(1); } 1390 const VPValue *getStepValue() const { return getOperand(1); } 1391 1392 /// Returns the first defined value as TruncInst, if it is one or nullptr 1393 /// otherwise. 1394 TruncInst *getTruncInst() { return Trunc; } 1395 const TruncInst *getTruncInst() const { return Trunc; } 1396 1397 PHINode *getPHINode() { return IV; } 1398 1399 /// Returns the induction descriptor for the recipe. 1400 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } 1401 1402 /// Returns true if the induction is canonical, i.e. starting at 0 and 1403 /// incremented by UF * VF (= the original IV is incremented by 1). 1404 bool isCanonical() const; 1405 1406 /// Returns the scalar type of the induction. 1407 const Type *getScalarType() const { 1408 return Trunc ? Trunc->getType() : IV->getType(); 1409 } 1410 }; 1411 1412 class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { 1413 const InductionDescriptor &IndDesc; 1414 1415 bool IsScalarAfterVectorization; 1416 1417 public: 1418 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p 1419 /// Start. 1420 VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, 1421 const InductionDescriptor &IndDesc, 1422 bool IsScalarAfterVectorization) 1423 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi), 1424 IndDesc(IndDesc), 1425 IsScalarAfterVectorization(IsScalarAfterVectorization) { 1426 addOperand(Start); 1427 addOperand(Step); 1428 } 1429 1430 ~VPWidenPointerInductionRecipe() override = default; 1431 1432 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC) 1433 1434 /// Generate vector values for the pointer induction. 1435 void execute(VPTransformState &State) override; 1436 1437 /// Returns true if only scalar values will be generated. 1438 bool onlyScalarsGenerated(ElementCount VF); 1439 1440 /// Returns the induction descriptor for the recipe. 1441 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } 1442 1443 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1444 /// Print the recipe. 1445 void print(raw_ostream &O, const Twine &Indent, 1446 VPSlotTracker &SlotTracker) const override; 1447 #endif 1448 }; 1449 1450 /// A recipe for handling header phis that are widened in the vector loop. 1451 /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are 1452 /// managed in the recipe directly. 1453 class VPWidenPHIRecipe : public VPHeaderPHIRecipe { 1454 /// List of incoming blocks. Only used in the VPlan native path. 1455 SmallVector<VPBasicBlock *, 2> IncomingBlocks; 1456 1457 public: 1458 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. 1459 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr) 1460 : VPHeaderPHIRecipe(VPDef::VPWidenPHISC, Phi) { 1461 if (Start) 1462 addOperand(Start); 1463 } 1464 1465 ~VPWidenPHIRecipe() override = default; 1466 1467 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC) 1468 1469 /// Generate the phi/select nodes. 1470 void execute(VPTransformState &State) override; 1471 1472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1473 /// Print the recipe. 1474 void print(raw_ostream &O, const Twine &Indent, 1475 VPSlotTracker &SlotTracker) const override; 1476 #endif 1477 1478 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. 1479 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) { 1480 addOperand(IncomingV); 1481 IncomingBlocks.push_back(IncomingBlock); 1482 } 1483 1484 /// Returns the \p I th incoming VPBasicBlock. 1485 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; } 1486 1487 /// Returns the \p I th incoming VPValue. 1488 VPValue *getIncomingValue(unsigned I) { return getOperand(I); } 1489 }; 1490 1491 /// A recipe for handling first-order recurrence phis. The start value is the 1492 /// first operand of the recipe and the incoming value from the backedge is the 1493 /// second operand. 1494 struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { 1495 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start) 1496 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {} 1497 1498 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC) 1499 1500 static inline bool classof(const VPHeaderPHIRecipe *R) { 1501 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC; 1502 } 1503 1504 void execute(VPTransformState &State) override; 1505 1506 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1507 /// Print the recipe. 1508 void print(raw_ostream &O, const Twine &Indent, 1509 VPSlotTracker &SlotTracker) const override; 1510 #endif 1511 }; 1512 1513 /// A recipe for handling reduction phis. The start value is the first operand 1514 /// of the recipe and the incoming value from the backedge is the second 1515 /// operand. 1516 class VPReductionPHIRecipe : public VPHeaderPHIRecipe { 1517 /// Descriptor for the reduction. 1518 const RecurrenceDescriptor &RdxDesc; 1519 1520 /// The phi is part of an in-loop reduction. 1521 bool IsInLoop; 1522 1523 /// The phi is part of an ordered reduction. Requires IsInLoop to be true. 1524 bool IsOrdered; 1525 1526 public: 1527 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p 1528 /// RdxDesc. 1529 VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, 1530 VPValue &Start, bool IsInLoop = false, 1531 bool IsOrdered = false) 1532 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), 1533 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) { 1534 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop"); 1535 } 1536 1537 ~VPReductionPHIRecipe() override = default; 1538 1539 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC) 1540 1541 static inline bool classof(const VPHeaderPHIRecipe *R) { 1542 return R->getVPDefID() == VPDef::VPReductionPHISC; 1543 } 1544 1545 /// Generate the phi/select nodes. 1546 void execute(VPTransformState &State) override; 1547 1548 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1549 /// Print the recipe. 1550 void print(raw_ostream &O, const Twine &Indent, 1551 VPSlotTracker &SlotTracker) const override; 1552 #endif 1553 1554 const RecurrenceDescriptor &getRecurrenceDescriptor() const { 1555 return RdxDesc; 1556 } 1557 1558 /// Returns true, if the phi is part of an ordered reduction. 1559 bool isOrdered() const { return IsOrdered; } 1560 1561 /// Returns true, if the phi is part of an in-loop reduction. 1562 bool isInLoop() const { return IsInLoop; } 1563 }; 1564 1565 /// A recipe for vectorizing a phi-node as a sequence of mask-based select 1566 /// instructions. 1567 class VPBlendRecipe : public VPRecipeBase, public VPValue { 1568 PHINode *Phi; 1569 1570 public: 1571 /// The blend operation is a User of the incoming values and of their 1572 /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value 1573 /// might be incoming with a full mask for which there is no VPValue. 1574 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands) 1575 : VPRecipeBase(VPDef::VPBlendSC, Operands), VPValue(this, Phi), Phi(Phi) { 1576 assert(Operands.size() > 0 && 1577 ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && 1578 "Expected either a single incoming value or a positive even number " 1579 "of operands"); 1580 } 1581 1582 VP_CLASSOF_IMPL(VPDef::VPBlendSC) 1583 1584 /// Return the number of incoming values, taking into account that a single 1585 /// incoming value has no mask. 1586 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; } 1587 1588 /// Return incoming value number \p Idx. 1589 VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); } 1590 1591 /// Return mask number \p Idx. 1592 VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); } 1593 1594 /// Generate the phi/select nodes. 1595 void execute(VPTransformState &State) override; 1596 1597 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1598 /// Print the recipe. 1599 void print(raw_ostream &O, const Twine &Indent, 1600 VPSlotTracker &SlotTracker) const override; 1601 #endif 1602 1603 /// Returns true if the recipe only uses the first lane of operand \p Op. 1604 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1605 assert(is_contained(operands(), Op) && 1606 "Op must be an operand of the recipe"); 1607 // Recursing through Blend recipes only, must terminate at header phi's the 1608 // latest. 1609 return all_of(users(), 1610 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); }); 1611 } 1612 }; 1613 1614 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load 1615 /// or stores into one wide load/store and shuffles. The first operand of a 1616 /// VPInterleave recipe is the address, followed by the stored values, followed 1617 /// by an optional mask. 1618 class VPInterleaveRecipe : public VPRecipeBase { 1619 const InterleaveGroup<Instruction> *IG; 1620 1621 /// Indicates if the interleave group is in a conditional block and requires a 1622 /// mask. 1623 bool HasMask = false; 1624 1625 /// Indicates if gaps between members of the group need to be masked out or if 1626 /// unusued gaps can be loaded speculatively. 1627 bool NeedsMaskForGaps = false; 1628 1629 public: 1630 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, 1631 ArrayRef<VPValue *> StoredValues, VPValue *Mask, 1632 bool NeedsMaskForGaps) 1633 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG), 1634 NeedsMaskForGaps(NeedsMaskForGaps) { 1635 for (unsigned i = 0; i < IG->getFactor(); ++i) 1636 if (Instruction *I = IG->getMember(i)) { 1637 if (I->getType()->isVoidTy()) 1638 continue; 1639 new VPValue(I, this); 1640 } 1641 1642 for (auto *SV : StoredValues) 1643 addOperand(SV); 1644 if (Mask) { 1645 HasMask = true; 1646 addOperand(Mask); 1647 } 1648 } 1649 ~VPInterleaveRecipe() override = default; 1650 1651 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) 1652 1653 /// Return the address accessed by this recipe. 1654 VPValue *getAddr() const { 1655 return getOperand(0); // Address is the 1st, mandatory operand. 1656 } 1657 1658 /// Return the mask used by this recipe. Note that a full mask is represented 1659 /// by a nullptr. 1660 VPValue *getMask() const { 1661 // Mask is optional and therefore the last, currently 2nd operand. 1662 return HasMask ? getOperand(getNumOperands() - 1) : nullptr; 1663 } 1664 1665 /// Return the VPValues stored by this interleave group. If it is a load 1666 /// interleave group, return an empty ArrayRef. 1667 ArrayRef<VPValue *> getStoredValues() const { 1668 // The first operand is the address, followed by the stored values, followed 1669 // by an optional mask. 1670 return ArrayRef<VPValue *>(op_begin(), getNumOperands()) 1671 .slice(1, getNumStoreOperands()); 1672 } 1673 1674 /// Generate the wide load or store, and shuffles. 1675 void execute(VPTransformState &State) override; 1676 1677 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1678 /// Print the recipe. 1679 void print(raw_ostream &O, const Twine &Indent, 1680 VPSlotTracker &SlotTracker) const override; 1681 #endif 1682 1683 const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } 1684 1685 /// Returns the number of stored operands of this interleave group. Returns 0 1686 /// for load interleave groups. 1687 unsigned getNumStoreOperands() const { 1688 return getNumOperands() - (HasMask ? 2 : 1); 1689 } 1690 1691 /// The recipe only uses the first lane of the address. 1692 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1693 assert(is_contained(operands(), Op) && 1694 "Op must be an operand of the recipe"); 1695 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); 1696 } 1697 }; 1698 1699 /// A recipe to represent inloop reduction operations, performing a reduction on 1700 /// a vector operand into a scalar value, and adding the result to a chain. 1701 /// The Operands are {ChainOp, VecOp, [Condition]}. 1702 class VPReductionRecipe : public VPRecipeBase, public VPValue { 1703 /// The recurrence decriptor for the reduction in question. 1704 const RecurrenceDescriptor *RdxDesc; 1705 /// Pointer to the TTI, needed to create the target reduction 1706 const TargetTransformInfo *TTI; 1707 1708 public: 1709 VPReductionRecipe(const RecurrenceDescriptor *R, Instruction *I, 1710 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, 1711 const TargetTransformInfo *TTI) 1712 : VPRecipeBase(VPDef::VPReductionSC, {ChainOp, VecOp}), VPValue(this, I), 1713 RdxDesc(R), TTI(TTI) { 1714 if (CondOp) 1715 addOperand(CondOp); 1716 } 1717 1718 ~VPReductionRecipe() override = default; 1719 1720 VP_CLASSOF_IMPL(VPDef::VPReductionSC) 1721 1722 /// Generate the reduction in the loop 1723 void execute(VPTransformState &State) override; 1724 1725 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1726 /// Print the recipe. 1727 void print(raw_ostream &O, const Twine &Indent, 1728 VPSlotTracker &SlotTracker) const override; 1729 #endif 1730 1731 /// The VPValue of the scalar Chain being accumulated. 1732 VPValue *getChainOp() const { return getOperand(0); } 1733 /// The VPValue of the vector value to be reduced. 1734 VPValue *getVecOp() const { return getOperand(1); } 1735 /// The VPValue of the condition for the block. 1736 VPValue *getCondOp() const { 1737 return getNumOperands() > 2 ? getOperand(2) : nullptr; 1738 } 1739 }; 1740 1741 /// VPReplicateRecipe replicates a given instruction producing multiple scalar 1742 /// copies of the original scalar type, one per lane, instead of producing a 1743 /// single copy of widened type for all lanes. If the instruction is known to be 1744 /// uniform only one copy, per lane zero, will be generated. 1745 class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue { 1746 /// Indicator if only a single replica per lane is needed. 1747 bool IsUniform; 1748 1749 /// Indicator if the replicas are also predicated. 1750 bool IsPredicated; 1751 1752 public: 1753 template <typename IterT> 1754 VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands, 1755 bool IsUniform, VPValue *Mask = nullptr) 1756 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I), 1757 VPValue(this, I), IsUniform(IsUniform), IsPredicated(Mask) { 1758 if (Mask) 1759 addOperand(Mask); 1760 } 1761 1762 ~VPReplicateRecipe() override = default; 1763 1764 VP_CLASSOF_IMPL(VPDef::VPReplicateSC) 1765 1766 /// Generate replicas of the desired Ingredient. Replicas will be generated 1767 /// for all parts and lanes unless a specific part and lane are specified in 1768 /// the \p State. 1769 void execute(VPTransformState &State) override; 1770 1771 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1772 /// Print the recipe. 1773 void print(raw_ostream &O, const Twine &Indent, 1774 VPSlotTracker &SlotTracker) const override; 1775 #endif 1776 1777 bool isUniform() const { return IsUniform; } 1778 1779 bool isPredicated() const { return IsPredicated; } 1780 1781 /// Returns true if the recipe only uses the first lane of operand \p Op. 1782 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1783 assert(is_contained(operands(), Op) && 1784 "Op must be an operand of the recipe"); 1785 return isUniform(); 1786 } 1787 1788 /// Returns true if the recipe uses scalars of operand \p Op. 1789 bool usesScalars(const VPValue *Op) const override { 1790 assert(is_contained(operands(), Op) && 1791 "Op must be an operand of the recipe"); 1792 return true; 1793 } 1794 1795 /// Returns true if the recipe is used by a widened recipe via an intervening 1796 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed 1797 /// in a vector. 1798 bool shouldPack() const; 1799 1800 /// Return the mask of a predicated VPReplicateRecipe. 1801 VPValue *getMask() { 1802 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe"); 1803 return getOperand(getNumOperands() - 1); 1804 } 1805 }; 1806 1807 /// A recipe for generating conditional branches on the bits of a mask. 1808 class VPBranchOnMaskRecipe : public VPRecipeBase { 1809 public: 1810 VPBranchOnMaskRecipe(VPValue *BlockInMask) 1811 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) { 1812 if (BlockInMask) // nullptr means all-one mask. 1813 addOperand(BlockInMask); 1814 } 1815 1816 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC) 1817 1818 /// Generate the extraction of the appropriate bit from the block mask and the 1819 /// conditional branch. 1820 void execute(VPTransformState &State) override; 1821 1822 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1823 /// Print the recipe. 1824 void print(raw_ostream &O, const Twine &Indent, 1825 VPSlotTracker &SlotTracker) const override { 1826 O << Indent << "BRANCH-ON-MASK "; 1827 if (VPValue *Mask = getMask()) 1828 Mask->printAsOperand(O, SlotTracker); 1829 else 1830 O << " All-One"; 1831 } 1832 #endif 1833 1834 /// Return the mask used by this recipe. Note that a full mask is represented 1835 /// by a nullptr. 1836 VPValue *getMask() const { 1837 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands"); 1838 // Mask is optional. 1839 return getNumOperands() == 1 ? getOperand(0) : nullptr; 1840 } 1841 1842 /// Returns true if the recipe uses scalars of operand \p Op. 1843 bool usesScalars(const VPValue *Op) const override { 1844 assert(is_contained(operands(), Op) && 1845 "Op must be an operand of the recipe"); 1846 return true; 1847 } 1848 }; 1849 1850 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when 1851 /// control converges back from a Branch-on-Mask. The phi nodes are needed in 1852 /// order to merge values that are set under such a branch and feed their uses. 1853 /// The phi nodes can be scalar or vector depending on the users of the value. 1854 /// This recipe works in concert with VPBranchOnMaskRecipe. 1855 class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue { 1856 public: 1857 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi 1858 /// nodes after merging back from a Branch-on-Mask. 1859 VPPredInstPHIRecipe(VPValue *PredV) 1860 : VPRecipeBase(VPDef::VPPredInstPHISC, PredV), VPValue(this) {} 1861 ~VPPredInstPHIRecipe() override = default; 1862 1863 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC) 1864 1865 /// Generates phi nodes for live-outs as needed to retain SSA form. 1866 void execute(VPTransformState &State) override; 1867 1868 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1869 /// Print the recipe. 1870 void print(raw_ostream &O, const Twine &Indent, 1871 VPSlotTracker &SlotTracker) const override; 1872 #endif 1873 1874 /// Returns true if the recipe uses scalars of operand \p Op. 1875 bool usesScalars(const VPValue *Op) const override { 1876 assert(is_contained(operands(), Op) && 1877 "Op must be an operand of the recipe"); 1878 return true; 1879 } 1880 }; 1881 1882 /// A Recipe for widening load/store operations. 1883 /// The recipe uses the following VPValues: 1884 /// - For load: Address, optional mask 1885 /// - For store: Address, stored value, optional mask 1886 /// TODO: We currently execute only per-part unless a specific instance is 1887 /// provided. 1888 class VPWidenMemoryInstructionRecipe : public VPRecipeBase { 1889 Instruction &Ingredient; 1890 1891 // Whether the loaded-from / stored-to addresses are consecutive. 1892 bool Consecutive; 1893 1894 // Whether the consecutive loaded/stored addresses are in reverse order. 1895 bool Reverse; 1896 1897 void setMask(VPValue *Mask) { 1898 if (!Mask) 1899 return; 1900 addOperand(Mask); 1901 } 1902 1903 bool isMasked() const { 1904 return isStore() ? getNumOperands() == 3 : getNumOperands() == 2; 1905 } 1906 1907 public: 1908 VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, 1909 bool Consecutive, bool Reverse) 1910 : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}), 1911 Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) { 1912 assert((Consecutive || !Reverse) && "Reverse implies consecutive"); 1913 new VPValue(this, &Load); 1914 setMask(Mask); 1915 } 1916 1917 VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, 1918 VPValue *StoredValue, VPValue *Mask, 1919 bool Consecutive, bool Reverse) 1920 : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}), 1921 Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { 1922 assert((Consecutive || !Reverse) && "Reverse implies consecutive"); 1923 setMask(Mask); 1924 } 1925 1926 VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC) 1927 1928 /// Return the address accessed by this recipe. 1929 VPValue *getAddr() const { 1930 return getOperand(0); // Address is the 1st, mandatory operand. 1931 } 1932 1933 /// Return the mask used by this recipe. Note that a full mask is represented 1934 /// by a nullptr. 1935 VPValue *getMask() const { 1936 // Mask is optional and therefore the last operand. 1937 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr; 1938 } 1939 1940 /// Returns true if this recipe is a store. 1941 bool isStore() const { return isa<StoreInst>(Ingredient); } 1942 1943 /// Return the address accessed by this recipe. 1944 VPValue *getStoredValue() const { 1945 assert(isStore() && "Stored value only available for store instructions"); 1946 return getOperand(1); // Stored value is the 2nd, mandatory operand. 1947 } 1948 1949 // Return whether the loaded-from / stored-to addresses are consecutive. 1950 bool isConsecutive() const { return Consecutive; } 1951 1952 // Return whether the consecutive loaded/stored addresses are in reverse 1953 // order. 1954 bool isReverse() const { return Reverse; } 1955 1956 /// Generate the wide load/store. 1957 void execute(VPTransformState &State) override; 1958 1959 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1960 /// Print the recipe. 1961 void print(raw_ostream &O, const Twine &Indent, 1962 VPSlotTracker &SlotTracker) const override; 1963 #endif 1964 1965 /// Returns true if the recipe only uses the first lane of operand \p Op. 1966 bool onlyFirstLaneUsed(const VPValue *Op) const override { 1967 assert(is_contained(operands(), Op) && 1968 "Op must be an operand of the recipe"); 1969 1970 // Widened, consecutive memory operations only demand the first lane of 1971 // their address, unless the same operand is also stored. That latter can 1972 // happen with opaque pointers. 1973 return Op == getAddr() && isConsecutive() && 1974 (!isStore() || Op != getStoredValue()); 1975 } 1976 1977 Instruction &getIngredient() const { return Ingredient; } 1978 }; 1979 1980 /// Recipe to expand a SCEV expression. 1981 class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue { 1982 const SCEV *Expr; 1983 ScalarEvolution &SE; 1984 1985 public: 1986 VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE) 1987 : VPRecipeBase(VPDef::VPExpandSCEVSC, {}), VPValue(this), Expr(Expr), 1988 SE(SE) {} 1989 1990 ~VPExpandSCEVRecipe() override = default; 1991 1992 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC) 1993 1994 /// Generate a canonical vector induction variable of the vector loop, with 1995 void execute(VPTransformState &State) override; 1996 1997 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1998 /// Print the recipe. 1999 void print(raw_ostream &O, const Twine &Indent, 2000 VPSlotTracker &SlotTracker) const override; 2001 #endif 2002 2003 const SCEV *getSCEV() const { return Expr; } 2004 }; 2005 2006 /// Canonical scalar induction phi of the vector loop. Starting at the specified 2007 /// start value (either 0 or the resume value when vectorizing the epilogue 2008 /// loop). VPWidenCanonicalIVRecipe represents the vector version of the 2009 /// canonical induction variable. 2010 class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { 2011 DebugLoc DL; 2012 2013 public: 2014 VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL) 2015 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV), DL(DL) {} 2016 2017 ~VPCanonicalIVPHIRecipe() override = default; 2018 2019 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC) 2020 2021 static inline bool classof(const VPHeaderPHIRecipe *D) { 2022 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC; 2023 } 2024 2025 /// Generate the canonical scalar induction phi of the vector loop. 2026 void execute(VPTransformState &State) override; 2027 2028 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2029 /// Print the recipe. 2030 void print(raw_ostream &O, const Twine &Indent, 2031 VPSlotTracker &SlotTracker) const override; 2032 #endif 2033 2034 /// Returns the scalar type of the induction. 2035 const Type *getScalarType() const { 2036 return getOperand(0)->getLiveInIRValue()->getType(); 2037 } 2038 2039 /// Returns true if the recipe only uses the first lane of operand \p Op. 2040 bool onlyFirstLaneUsed(const VPValue *Op) const override { 2041 assert(is_contained(operands(), Op) && 2042 "Op must be an operand of the recipe"); 2043 return true; 2044 } 2045 2046 /// Check if the induction described by \p Kind, /p Start and \p Step is 2047 /// canonical, i.e. has the same start, step (of 1), and type as the 2048 /// canonical IV. 2049 bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, 2050 VPValue *Step, Type *Ty) const; 2051 }; 2052 2053 /// A recipe for generating the active lane mask for the vector loop that is 2054 /// used to predicate the vector operations. 2055 /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and 2056 /// remove VPActiveLaneMaskPHIRecipe. 2057 class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe { 2058 DebugLoc DL; 2059 2060 public: 2061 VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL) 2062 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask), 2063 DL(DL) {} 2064 2065 ~VPActiveLaneMaskPHIRecipe() override = default; 2066 2067 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC) 2068 2069 static inline bool classof(const VPHeaderPHIRecipe *D) { 2070 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC; 2071 } 2072 2073 /// Generate the active lane mask phi of the vector loop. 2074 void execute(VPTransformState &State) override; 2075 2076 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2077 /// Print the recipe. 2078 void print(raw_ostream &O, const Twine &Indent, 2079 VPSlotTracker &SlotTracker) const override; 2080 #endif 2081 }; 2082 2083 /// A Recipe for widening the canonical induction variable of the vector loop. 2084 class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { 2085 public: 2086 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV) 2087 : VPRecipeBase(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}), 2088 VPValue(this) {} 2089 2090 ~VPWidenCanonicalIVRecipe() override = default; 2091 2092 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC) 2093 2094 /// Generate a canonical vector induction variable of the vector loop, with 2095 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and 2096 /// step = <VF*UF, VF*UF, ..., VF*UF>. 2097 void execute(VPTransformState &State) override; 2098 2099 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2100 /// Print the recipe. 2101 void print(raw_ostream &O, const Twine &Indent, 2102 VPSlotTracker &SlotTracker) const override; 2103 #endif 2104 2105 /// Returns the scalar type of the induction. 2106 const Type *getScalarType() const { 2107 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)->getDefiningRecipe()) 2108 ->getScalarType(); 2109 } 2110 }; 2111 2112 /// A recipe for converting the canonical IV value to the corresponding value of 2113 /// an IV with different start and step values, using Start + CanonicalIV * 2114 /// Step. 2115 class VPDerivedIVRecipe : public VPRecipeBase, public VPValue { 2116 /// The type of the result value. It may be smaller than the type of the 2117 /// induction and in this case it will get truncated to ResultTy. 2118 Type *ResultTy; 2119 2120 /// Induction descriptor for the induction the canonical IV is transformed to. 2121 const InductionDescriptor &IndDesc; 2122 2123 public: 2124 VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, 2125 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, 2126 Type *ResultTy) 2127 : VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), 2128 VPValue(this), ResultTy(ResultTy), IndDesc(IndDesc) {} 2129 2130 ~VPDerivedIVRecipe() override = default; 2131 2132 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC) 2133 2134 /// Generate the transformed value of the induction at offset StartValue (1. 2135 /// operand) + IV (2. operand) * StepValue (3, operand). 2136 void execute(VPTransformState &State) override; 2137 2138 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2139 /// Print the recipe. 2140 void print(raw_ostream &O, const Twine &Indent, 2141 VPSlotTracker &SlotTracker) const override; 2142 #endif 2143 2144 VPValue *getStartValue() const { return getOperand(0); } 2145 VPValue *getCanonicalIV() const { return getOperand(1); } 2146 VPValue *getStepValue() const { return getOperand(2); } 2147 2148 /// Returns true if the recipe only uses the first lane of operand \p Op. 2149 bool onlyFirstLaneUsed(const VPValue *Op) const override { 2150 assert(is_contained(operands(), Op) && 2151 "Op must be an operand of the recipe"); 2152 return true; 2153 } 2154 }; 2155 2156 /// A recipe for handling phi nodes of integer and floating-point inductions, 2157 /// producing their scalar values. 2158 class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue { 2159 const InductionDescriptor &IndDesc; 2160 2161 public: 2162 VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, 2163 VPValue *Step) 2164 : VPRecipeBase(VPDef::VPScalarIVStepsSC, {IV, Step}), VPValue(this), 2165 IndDesc(IndDesc) {} 2166 2167 ~VPScalarIVStepsRecipe() override = default; 2168 2169 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC) 2170 2171 /// Generate the scalarized versions of the phi node as needed by their users. 2172 void execute(VPTransformState &State) override; 2173 2174 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2175 /// Print the recipe. 2176 void print(raw_ostream &O, const Twine &Indent, 2177 VPSlotTracker &SlotTracker) const override; 2178 #endif 2179 2180 VPValue *getStepValue() const { return getOperand(1); } 2181 2182 /// Returns true if the recipe only uses the first lane of operand \p Op. 2183 bool onlyFirstLaneUsed(const VPValue *Op) const override { 2184 assert(is_contained(operands(), Op) && 2185 "Op must be an operand of the recipe"); 2186 return true; 2187 } 2188 }; 2189 2190 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It 2191 /// holds a sequence of zero or more VPRecipe's each representing a sequence of 2192 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes. 2193 class VPBasicBlock : public VPBlockBase { 2194 public: 2195 using RecipeListTy = iplist<VPRecipeBase>; 2196 2197 private: 2198 /// The VPRecipes held in the order of output instructions to generate. 2199 RecipeListTy Recipes; 2200 2201 public: 2202 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) 2203 : VPBlockBase(VPBasicBlockSC, Name.str()) { 2204 if (Recipe) 2205 appendRecipe(Recipe); 2206 } 2207 2208 ~VPBasicBlock() override { 2209 while (!Recipes.empty()) 2210 Recipes.pop_back(); 2211 } 2212 2213 /// Instruction iterators... 2214 using iterator = RecipeListTy::iterator; 2215 using const_iterator = RecipeListTy::const_iterator; 2216 using reverse_iterator = RecipeListTy::reverse_iterator; 2217 using const_reverse_iterator = RecipeListTy::const_reverse_iterator; 2218 2219 //===--------------------------------------------------------------------===// 2220 /// Recipe iterator methods 2221 /// 2222 inline iterator begin() { return Recipes.begin(); } 2223 inline const_iterator begin() const { return Recipes.begin(); } 2224 inline iterator end() { return Recipes.end(); } 2225 inline const_iterator end() const { return Recipes.end(); } 2226 2227 inline reverse_iterator rbegin() { return Recipes.rbegin(); } 2228 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); } 2229 inline reverse_iterator rend() { return Recipes.rend(); } 2230 inline const_reverse_iterator rend() const { return Recipes.rend(); } 2231 2232 inline size_t size() const { return Recipes.size(); } 2233 inline bool empty() const { return Recipes.empty(); } 2234 inline const VPRecipeBase &front() const { return Recipes.front(); } 2235 inline VPRecipeBase &front() { return Recipes.front(); } 2236 inline const VPRecipeBase &back() const { return Recipes.back(); } 2237 inline VPRecipeBase &back() { return Recipes.back(); } 2238 2239 /// Returns a reference to the list of recipes. 2240 RecipeListTy &getRecipeList() { return Recipes; } 2241 2242 /// Returns a pointer to a member of the recipe list. 2243 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) { 2244 return &VPBasicBlock::Recipes; 2245 } 2246 2247 /// Method to support type inquiry through isa, cast, and dyn_cast. 2248 static inline bool classof(const VPBlockBase *V) { 2249 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC; 2250 } 2251 2252 void insert(VPRecipeBase *Recipe, iterator InsertPt) { 2253 assert(Recipe && "No recipe to append."); 2254 assert(!Recipe->Parent && "Recipe already in VPlan"); 2255 Recipe->Parent = this; 2256 Recipes.insert(InsertPt, Recipe); 2257 } 2258 2259 /// Augment the existing recipes of a VPBasicBlock with an additional 2260 /// \p Recipe as the last recipe. 2261 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); } 2262 2263 /// The method which generates the output IR instructions that correspond to 2264 /// this VPBasicBlock, thereby "executing" the VPlan. 2265 void execute(VPTransformState *State) override; 2266 2267 /// Return the position of the first non-phi node recipe in the block. 2268 iterator getFirstNonPhi(); 2269 2270 /// Returns an iterator range over the PHI-like recipes in the block. 2271 iterator_range<iterator> phis() { 2272 return make_range(begin(), getFirstNonPhi()); 2273 } 2274 2275 void dropAllReferences(VPValue *NewValue) override; 2276 2277 /// Split current block at \p SplitAt by inserting a new block between the 2278 /// current block and its successors and moving all recipes starting at 2279 /// SplitAt to the new block. Returns the new block. 2280 VPBasicBlock *splitAt(iterator SplitAt); 2281 2282 VPRegionBlock *getEnclosingLoopRegion(); 2283 2284 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2285 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p 2286 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers. 2287 /// 2288 /// Note that the numbering is applied to the whole VPlan, so printing 2289 /// individual blocks is consistent with the whole VPlan printing. 2290 void print(raw_ostream &O, const Twine &Indent, 2291 VPSlotTracker &SlotTracker) const override; 2292 using VPBlockBase::print; // Get the print(raw_stream &O) version. 2293 #endif 2294 2295 /// If the block has multiple successors, return the branch recipe terminating 2296 /// the block. If there are no or only a single successor, return nullptr; 2297 VPRecipeBase *getTerminator(); 2298 const VPRecipeBase *getTerminator() const; 2299 2300 /// Returns true if the block is exiting it's parent region. 2301 bool isExiting() const; 2302 2303 private: 2304 /// Create an IR BasicBlock to hold the output instructions generated by this 2305 /// VPBasicBlock, and return it. Update the CFGState accordingly. 2306 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG); 2307 }; 2308 2309 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks 2310 /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. 2311 /// A VPRegionBlock may indicate that its contents are to be replicated several 2312 /// times. This is designed to support predicated scalarization, in which a 2313 /// scalar if-then code structure needs to be generated VF * UF times. Having 2314 /// this replication indicator helps to keep a single model for multiple 2315 /// candidate VF's. The actual replication takes place only once the desired VF 2316 /// and UF have been determined. 2317 class VPRegionBlock : public VPBlockBase { 2318 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. 2319 VPBlockBase *Entry; 2320 2321 /// Hold the Single Exiting block of the SESE region modelled by the 2322 /// VPRegionBlock. 2323 VPBlockBase *Exiting; 2324 2325 /// An indicator whether this region is to generate multiple replicated 2326 /// instances of output IR corresponding to its VPBlockBases. 2327 bool IsReplicator; 2328 2329 public: 2330 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, 2331 const std::string &Name = "", bool IsReplicator = false) 2332 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting), 2333 IsReplicator(IsReplicator) { 2334 assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); 2335 assert(Exiting->getSuccessors().empty() && "Exit block has successors."); 2336 Entry->setParent(this); 2337 Exiting->setParent(this); 2338 } 2339 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) 2340 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr), 2341 IsReplicator(IsReplicator) {} 2342 2343 ~VPRegionBlock() override { 2344 if (Entry) { 2345 VPValue DummyValue; 2346 Entry->dropAllReferences(&DummyValue); 2347 deleteCFG(Entry); 2348 } 2349 } 2350 2351 /// Method to support type inquiry through isa, cast, and dyn_cast. 2352 static inline bool classof(const VPBlockBase *V) { 2353 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC; 2354 } 2355 2356 const VPBlockBase *getEntry() const { return Entry; } 2357 VPBlockBase *getEntry() { return Entry; } 2358 2359 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p 2360 /// EntryBlock must have no predecessors. 2361 void setEntry(VPBlockBase *EntryBlock) { 2362 assert(EntryBlock->getPredecessors().empty() && 2363 "Entry block cannot have predecessors."); 2364 Entry = EntryBlock; 2365 EntryBlock->setParent(this); 2366 } 2367 2368 const VPBlockBase *getExiting() const { return Exiting; } 2369 VPBlockBase *getExiting() { return Exiting; } 2370 2371 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p 2372 /// ExitingBlock must have no successors. 2373 void setExiting(VPBlockBase *ExitingBlock) { 2374 assert(ExitingBlock->getSuccessors().empty() && 2375 "Exit block cannot have successors."); 2376 Exiting = ExitingBlock; 2377 ExitingBlock->setParent(this); 2378 } 2379 2380 /// Returns the pre-header VPBasicBlock of the loop region. 2381 VPBasicBlock *getPreheaderVPBB() { 2382 assert(!isReplicator() && "should only get pre-header of loop regions"); 2383 return getSinglePredecessor()->getExitingBasicBlock(); 2384 } 2385 2386 /// An indicator whether this region is to generate multiple replicated 2387 /// instances of output IR corresponding to its VPBlockBases. 2388 bool isReplicator() const { return IsReplicator; } 2389 2390 /// The method which generates the output IR instructions that correspond to 2391 /// this VPRegionBlock, thereby "executing" the VPlan. 2392 void execute(VPTransformState *State) override; 2393 2394 void dropAllReferences(VPValue *NewValue) override; 2395 2396 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2397 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with 2398 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using 2399 /// consequtive numbers. 2400 /// 2401 /// Note that the numbering is applied to the whole VPlan, so printing 2402 /// individual regions is consistent with the whole VPlan printing. 2403 void print(raw_ostream &O, const Twine &Indent, 2404 VPSlotTracker &SlotTracker) const override; 2405 using VPBlockBase::print; // Get the print(raw_stream &O) version. 2406 #endif 2407 }; 2408 2409 /// VPlan models a candidate for vectorization, encoding various decisions take 2410 /// to produce efficient output IR, including which branches, basic-blocks and 2411 /// output IR instructions to generate, and their cost. VPlan holds a 2412 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry 2413 /// VPBasicBlock. 2414 class VPlan { 2415 friend class VPlanPrinter; 2416 friend class VPSlotTracker; 2417 2418 /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the 2419 /// preheader of the vector loop. 2420 VPBasicBlock *Entry; 2421 2422 /// VPBasicBlock corresponding to the original preheader. Used to place 2423 /// VPExpandSCEV recipes for expressions used during skeleton creation and the 2424 /// rest of VPlan execution. 2425 VPBasicBlock *Preheader; 2426 2427 /// Holds the VFs applicable to this VPlan. 2428 SmallSetVector<ElementCount, 2> VFs; 2429 2430 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for 2431 /// any UF. 2432 SmallSetVector<unsigned, 2> UFs; 2433 2434 /// Holds the name of the VPlan, for printing. 2435 std::string Name; 2436 2437 /// Represents the trip count of the original loop, for folding 2438 /// the tail. 2439 VPValue *TripCount = nullptr; 2440 2441 /// Represents the backedge taken count of the original loop, for folding 2442 /// the tail. It equals TripCount - 1. 2443 VPValue *BackedgeTakenCount = nullptr; 2444 2445 /// Represents the vector trip count. 2446 VPValue VectorTripCount; 2447 2448 /// Holds a mapping between Values and their corresponding VPValue inside 2449 /// VPlan. 2450 Value2VPValueTy Value2VPValue; 2451 2452 /// Contains all the external definitions created for this VPlan. External 2453 /// definitions are VPValues that hold a pointer to their underlying IR. 2454 SmallVector<VPValue *, 16> VPLiveInsToFree; 2455 2456 /// Indicates whether it is safe use the Value2VPValue mapping or if the 2457 /// mapping cannot be used any longer, because it is stale. 2458 bool Value2VPValueEnabled = true; 2459 2460 /// Values used outside the plan. 2461 MapVector<PHINode *, VPLiveOut *> LiveOuts; 2462 2463 /// Mapping from SCEVs to the VPValues representing their expansions. 2464 /// NOTE: This mapping is temporary and will be removed once all users have 2465 /// been modeled in VPlan directly. 2466 DenseMap<const SCEV *, VPValue *> SCEVToExpansion; 2467 2468 public: 2469 /// Construct a VPlan with original preheader \p Preheader, trip count \p TC 2470 /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to 2471 /// be disconnected, as the bypass blocks between them are not yet modeled in 2472 /// VPlan. 2473 VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry) 2474 : VPlan(Preheader, Entry) { 2475 TripCount = TC; 2476 } 2477 2478 /// Construct a VPlan with original preheader \p Preheader and \p Entry to 2479 /// the plan. At the moment, \p Preheader and \p Entry need to be 2480 /// disconnected, as the bypass blocks between them are not yet modeled in 2481 /// VPlan. 2482 VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry) 2483 : Entry(Entry), Preheader(Preheader) { 2484 Entry->setPlan(this); 2485 Preheader->setPlan(this); 2486 assert(Preheader->getNumSuccessors() == 0 && 2487 Preheader->getNumPredecessors() == 0 && 2488 "preheader must be disconnected"); 2489 } 2490 2491 ~VPlan(); 2492 2493 /// Create an initial VPlan with preheader and entry blocks. Creates a 2494 /// VPExpandSCEVRecipe for \p TripCount and uses it as plan's trip count. 2495 static VPlanPtr createInitialVPlan(const SCEV *TripCount, 2496 ScalarEvolution &PSE); 2497 2498 /// Prepare the plan for execution, setting up the required live-in values. 2499 void prepareToExecute(Value *TripCount, Value *VectorTripCount, 2500 Value *CanonicalIVStartValue, VPTransformState &State, 2501 bool IsEpilogueVectorization); 2502 2503 /// Generate the IR code for this VPlan. 2504 void execute(VPTransformState *State); 2505 2506 VPBasicBlock *getEntry() { return Entry; } 2507 const VPBasicBlock *getEntry() const { return Entry; } 2508 2509 /// The trip count of the original loop. 2510 VPValue *getTripCount() const { 2511 assert(TripCount && "trip count needs to be set before accessing it"); 2512 return TripCount; 2513 } 2514 2515 /// The backedge taken count of the original loop. 2516 VPValue *getOrCreateBackedgeTakenCount() { 2517 if (!BackedgeTakenCount) 2518 BackedgeTakenCount = new VPValue(); 2519 return BackedgeTakenCount; 2520 } 2521 2522 /// The vector trip count. 2523 VPValue &getVectorTripCount() { return VectorTripCount; } 2524 2525 /// Mark the plan to indicate that using Value2VPValue is not safe any 2526 /// longer, because it may be stale. 2527 void disableValue2VPValue() { Value2VPValueEnabled = false; } 2528 2529 void addVF(ElementCount VF) { VFs.insert(VF); } 2530 2531 void setVF(ElementCount VF) { 2532 assert(hasVF(VF) && "Cannot set VF not already in plan"); 2533 VFs.clear(); 2534 VFs.insert(VF); 2535 } 2536 2537 bool hasVF(ElementCount VF) { return VFs.count(VF); } 2538 2539 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } 2540 2541 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); } 2542 2543 void setUF(unsigned UF) { 2544 assert(hasUF(UF) && "Cannot set the UF not already in plan"); 2545 UFs.clear(); 2546 UFs.insert(UF); 2547 } 2548 2549 /// Return a string with the name of the plan and the applicable VFs and UFs. 2550 std::string getName() const; 2551 2552 void setName(const Twine &newName) { Name = newName.str(); } 2553 2554 void addVPValue(Value *V, VPValue *VPV) { 2555 assert((Value2VPValueEnabled || VPV->isLiveIn()) && 2556 "Value2VPValue mapping may be out of date!"); 2557 assert(V && "Trying to add a null Value to VPlan"); 2558 assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); 2559 Value2VPValue[V] = VPV; 2560 } 2561 2562 /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable 2563 /// /// checking whether it is safe to query VPValues using IR Values. 2564 VPValue *getVPValue(Value *V, bool OverrideAllowed = false) { 2565 assert(V && "Trying to get the VPValue of a null Value"); 2566 assert(Value2VPValue.count(V) && "Value does not exist in VPlan"); 2567 assert((Value2VPValueEnabled || OverrideAllowed || 2568 Value2VPValue[V]->isLiveIn()) && 2569 "Value2VPValue mapping may be out of date!"); 2570 return Value2VPValue[V]; 2571 } 2572 2573 /// Gets the VPValue for \p V or adds a new live-in (if none exists yet) for 2574 /// \p V. 2575 VPValue *getVPValueOrAddLiveIn(Value *V) { 2576 assert(V && "Trying to get or add the VPValue of a null Value"); 2577 if (!Value2VPValue.count(V)) { 2578 VPValue *VPV = new VPValue(V); 2579 VPLiveInsToFree.push_back(VPV); 2580 addVPValue(V, VPV); 2581 } 2582 2583 return getVPValue(V); 2584 } 2585 2586 void removeVPValueFor(Value *V) { 2587 assert(Value2VPValueEnabled && 2588 "IR value to VPValue mapping may be out of date!"); 2589 Value2VPValue.erase(V); 2590 } 2591 2592 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2593 /// Print this VPlan to \p O. 2594 void print(raw_ostream &O) const; 2595 2596 /// Print this VPlan in DOT format to \p O. 2597 void printDOT(raw_ostream &O) const; 2598 2599 /// Dump the plan to stderr (for debugging). 2600 LLVM_DUMP_METHOD void dump() const; 2601 #endif 2602 2603 /// Returns a range mapping the values the range \p Operands to their 2604 /// corresponding VPValues. 2605 iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>> 2606 mapToVPValues(User::op_range Operands) { 2607 std::function<VPValue *(Value *)> Fn = [this](Value *Op) { 2608 return getVPValueOrAddLiveIn(Op); 2609 }; 2610 return map_range(Operands, Fn); 2611 } 2612 2613 /// Returns the VPRegionBlock of the vector loop. 2614 VPRegionBlock *getVectorLoopRegion() { 2615 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor()); 2616 } 2617 const VPRegionBlock *getVectorLoopRegion() const { 2618 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor()); 2619 } 2620 2621 /// Returns the canonical induction recipe of the vector loop. 2622 VPCanonicalIVPHIRecipe *getCanonicalIV() { 2623 VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock(); 2624 if (EntryVPBB->empty()) { 2625 // VPlan native path. 2626 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor()); 2627 } 2628 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin()); 2629 } 2630 2631 /// Find and return the VPActiveLaneMaskPHIRecipe from the header - there 2632 /// be only one at most. If there isn't one, then return nullptr. 2633 VPActiveLaneMaskPHIRecipe *getActiveLaneMaskPhi(); 2634 2635 void addLiveOut(PHINode *PN, VPValue *V); 2636 2637 void removeLiveOut(PHINode *PN) { 2638 delete LiveOuts[PN]; 2639 LiveOuts.erase(PN); 2640 } 2641 2642 const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const { 2643 return LiveOuts; 2644 } 2645 2646 VPValue *getSCEVExpansion(const SCEV *S) const { 2647 return SCEVToExpansion.lookup(S); 2648 } 2649 2650 void addSCEVExpansion(const SCEV *S, VPValue *V) { 2651 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded"); 2652 SCEVToExpansion[S] = V; 2653 } 2654 2655 /// \return The block corresponding to the original preheader. 2656 VPBasicBlock *getPreheader() { return Preheader; } 2657 const VPBasicBlock *getPreheader() const { return Preheader; } 2658 2659 private: 2660 /// Add to the given dominator tree the header block and every new basic block 2661 /// that was created between it and the latch block, inclusive. 2662 static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB, 2663 BasicBlock *LoopPreHeaderBB, 2664 BasicBlock *LoopExitBB); 2665 }; 2666 2667 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2668 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is 2669 /// indented and follows the dot format. 2670 class VPlanPrinter { 2671 raw_ostream &OS; 2672 const VPlan &Plan; 2673 unsigned Depth = 0; 2674 unsigned TabWidth = 2; 2675 std::string Indent; 2676 unsigned BID = 0; 2677 SmallDenseMap<const VPBlockBase *, unsigned> BlockID; 2678 2679 VPSlotTracker SlotTracker; 2680 2681 /// Handle indentation. 2682 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } 2683 2684 /// Print a given \p Block of the Plan. 2685 void dumpBlock(const VPBlockBase *Block); 2686 2687 /// Print the information related to the CFG edges going out of a given 2688 /// \p Block, followed by printing the successor blocks themselves. 2689 void dumpEdges(const VPBlockBase *Block); 2690 2691 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing 2692 /// its successor blocks. 2693 void dumpBasicBlock(const VPBasicBlock *BasicBlock); 2694 2695 /// Print a given \p Region of the Plan. 2696 void dumpRegion(const VPRegionBlock *Region); 2697 2698 unsigned getOrCreateBID(const VPBlockBase *Block) { 2699 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++; 2700 } 2701 2702 Twine getOrCreateName(const VPBlockBase *Block); 2703 2704 Twine getUID(const VPBlockBase *Block); 2705 2706 /// Print the information related to a CFG edge between two VPBlockBases. 2707 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden, 2708 const Twine &Label); 2709 2710 public: 2711 VPlanPrinter(raw_ostream &O, const VPlan &P) 2712 : OS(O), Plan(P), SlotTracker(&P) {} 2713 2714 LLVM_DUMP_METHOD void dump(); 2715 }; 2716 2717 struct VPlanIngredient { 2718 const Value *V; 2719 2720 VPlanIngredient(const Value *V) : V(V) {} 2721 2722 void print(raw_ostream &O) const; 2723 }; 2724 2725 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) { 2726 I.print(OS); 2727 return OS; 2728 } 2729 2730 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) { 2731 Plan.print(OS); 2732 return OS; 2733 } 2734 #endif 2735 2736 //===----------------------------------------------------------------------===// 2737 // VPlan Utilities 2738 //===----------------------------------------------------------------------===// 2739 2740 /// Class that provides utilities for VPBlockBases in VPlan. 2741 class VPBlockUtils { 2742 public: 2743 VPBlockUtils() = delete; 2744 2745 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p 2746 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p 2747 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's 2748 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must 2749 /// have neither successors nor predecessors. 2750 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { 2751 assert(NewBlock->getSuccessors().empty() && 2752 NewBlock->getPredecessors().empty() && 2753 "Can't insert new block with predecessors or successors."); 2754 NewBlock->setParent(BlockPtr->getParent()); 2755 SmallVector<VPBlockBase *> Succs(BlockPtr->successors()); 2756 for (VPBlockBase *Succ : Succs) { 2757 disconnectBlocks(BlockPtr, Succ); 2758 connectBlocks(NewBlock, Succ); 2759 } 2760 connectBlocks(BlockPtr, NewBlock); 2761 } 2762 2763 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p 2764 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p 2765 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr 2766 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors 2767 /// and \p IfTrue and \p IfFalse must have neither successors nor 2768 /// predecessors. 2769 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, 2770 VPBlockBase *BlockPtr) { 2771 assert(IfTrue->getSuccessors().empty() && 2772 "Can't insert IfTrue with successors."); 2773 assert(IfFalse->getSuccessors().empty() && 2774 "Can't insert IfFalse with successors."); 2775 BlockPtr->setTwoSuccessors(IfTrue, IfFalse); 2776 IfTrue->setPredecessors({BlockPtr}); 2777 IfFalse->setPredecessors({BlockPtr}); 2778 IfTrue->setParent(BlockPtr->getParent()); 2779 IfFalse->setParent(BlockPtr->getParent()); 2780 } 2781 2782 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to 2783 /// the successors of \p From and \p From to the predecessors of \p To. Both 2784 /// VPBlockBases must have the same parent, which can be null. Both 2785 /// VPBlockBases can be already connected to other VPBlockBases. 2786 static void connectBlocks(VPBlockBase *From, VPBlockBase *To) { 2787 assert((From->getParent() == To->getParent()) && 2788 "Can't connect two block with different parents"); 2789 assert(From->getNumSuccessors() < 2 && 2790 "Blocks can't have more than two successors."); 2791 From->appendSuccessor(To); 2792 To->appendPredecessor(From); 2793 } 2794 2795 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To 2796 /// from the successors of \p From and \p From from the predecessors of \p To. 2797 static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) { 2798 assert(To && "Successor to disconnect is null."); 2799 From->removeSuccessor(To); 2800 To->removePredecessor(From); 2801 } 2802 2803 /// Return an iterator range over \p Range which only includes \p BlockTy 2804 /// blocks. The accesses are casted to \p BlockTy. 2805 template <typename BlockTy, typename T> 2806 static auto blocksOnly(const T &Range) { 2807 // Create BaseTy with correct const-ness based on BlockTy. 2808 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value, 2809 const VPBlockBase, VPBlockBase>; 2810 2811 // We need to first create an iterator range over (const) BlocktTy & instead 2812 // of (const) BlockTy * for filter_range to work properly. 2813 auto Mapped = 2814 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; }); 2815 auto Filter = make_filter_range( 2816 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); }); 2817 return map_range(Filter, [](BaseTy &Block) -> BlockTy * { 2818 return cast<BlockTy>(&Block); 2819 }); 2820 } 2821 }; 2822 2823 class VPInterleavedAccessInfo { 2824 DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *> 2825 InterleaveGroupMap; 2826 2827 /// Type for mapping of instruction based interleave groups to VPInstruction 2828 /// interleave groups 2829 using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *, 2830 InterleaveGroup<VPInstruction> *>; 2831 2832 /// Recursively \p Region and populate VPlan based interleave groups based on 2833 /// \p IAI. 2834 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New, 2835 InterleavedAccessInfo &IAI); 2836 /// Recursively traverse \p Block and populate VPlan based interleave groups 2837 /// based on \p IAI. 2838 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, 2839 InterleavedAccessInfo &IAI); 2840 2841 public: 2842 VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI); 2843 2844 ~VPInterleavedAccessInfo() { 2845 SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet; 2846 // Avoid releasing a pointer twice. 2847 for (auto &I : InterleaveGroupMap) 2848 DelSet.insert(I.second); 2849 for (auto *Ptr : DelSet) 2850 delete Ptr; 2851 } 2852 2853 /// Get the interleave group that \p Instr belongs to. 2854 /// 2855 /// \returns nullptr if doesn't have such group. 2856 InterleaveGroup<VPInstruction> * 2857 getInterleaveGroup(VPInstruction *Instr) const { 2858 return InterleaveGroupMap.lookup(Instr); 2859 } 2860 }; 2861 2862 /// Class that maps (parts of) an existing VPlan to trees of combined 2863 /// VPInstructions. 2864 class VPlanSlp { 2865 enum class OpMode { Failed, Load, Opcode }; 2866 2867 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as 2868 /// DenseMap keys. 2869 struct BundleDenseMapInfo { 2870 static SmallVector<VPValue *, 4> getEmptyKey() { 2871 return {reinterpret_cast<VPValue *>(-1)}; 2872 } 2873 2874 static SmallVector<VPValue *, 4> getTombstoneKey() { 2875 return {reinterpret_cast<VPValue *>(-2)}; 2876 } 2877 2878 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) { 2879 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end())); 2880 } 2881 2882 static bool isEqual(const SmallVector<VPValue *, 4> &LHS, 2883 const SmallVector<VPValue *, 4> &RHS) { 2884 return LHS == RHS; 2885 } 2886 }; 2887 2888 /// Mapping of values in the original VPlan to a combined VPInstruction. 2889 DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo> 2890 BundleToCombined; 2891 2892 VPInterleavedAccessInfo &IAI; 2893 2894 /// Basic block to operate on. For now, only instructions in a single BB are 2895 /// considered. 2896 const VPBasicBlock &BB; 2897 2898 /// Indicates whether we managed to combine all visited instructions or not. 2899 bool CompletelySLP = true; 2900 2901 /// Width of the widest combined bundle in bits. 2902 unsigned WidestBundleBits = 0; 2903 2904 using MultiNodeOpTy = 2905 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>; 2906 2907 // Input operand bundles for the current multi node. Each multi node operand 2908 // bundle contains values not matching the multi node's opcode. They will 2909 // be reordered in reorderMultiNodeOps, once we completed building a 2910 // multi node. 2911 SmallVector<MultiNodeOpTy, 4> MultiNodeOps; 2912 2913 /// Indicates whether we are building a multi node currently. 2914 bool MultiNodeActive = false; 2915 2916 /// Check if we can vectorize Operands together. 2917 bool areVectorizable(ArrayRef<VPValue *> Operands) const; 2918 2919 /// Add combined instruction \p New for the bundle \p Operands. 2920 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New); 2921 2922 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now. 2923 VPInstruction *markFailed(); 2924 2925 /// Reorder operands in the multi node to maximize sequential memory access 2926 /// and commutative operations. 2927 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps(); 2928 2929 /// Choose the best candidate to use for the lane after \p Last. The set of 2930 /// candidates to choose from are values with an opcode matching \p Last's 2931 /// or loads consecutive to \p Last. 2932 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last, 2933 SmallPtrSetImpl<VPValue *> &Candidates, 2934 VPInterleavedAccessInfo &IAI); 2935 2936 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2937 /// Print bundle \p Values to dbgs(). 2938 void dumpBundle(ArrayRef<VPValue *> Values); 2939 #endif 2940 2941 public: 2942 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {} 2943 2944 ~VPlanSlp() = default; 2945 2946 /// Tries to build an SLP tree rooted at \p Operands and returns a 2947 /// VPInstruction combining \p Operands, if they can be combined. 2948 VPInstruction *buildGraph(ArrayRef<VPValue *> Operands); 2949 2950 /// Return the width of the widest combined bundle in bits. 2951 unsigned getWidestBundleBits() const { return WidestBundleBits; } 2952 2953 /// Return true if all visited instruction can be combined. 2954 bool isCompletelySLP() const { return CompletelySLP; } 2955 }; 2956 2957 namespace vputils { 2958 2959 /// Returns true if only the first lane of \p Def is used. 2960 bool onlyFirstLaneUsed(VPValue *Def); 2961 2962 /// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p 2963 /// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in 2964 /// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's 2965 /// pre-header already contains a recipe expanding \p Expr, return it. If not, 2966 /// create a new one. 2967 VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, 2968 ScalarEvolution &SE); 2969 2970 /// Returns true if \p VPV is uniform after vectorization. 2971 inline bool isUniformAfterVectorization(VPValue *VPV) { 2972 // A value defined outside the vector region must be uniform after 2973 // vectorization inside a vector region. 2974 if (VPV->isDefinedOutsideVectorRegions()) 2975 return true; 2976 VPRecipeBase *Def = VPV->getDefiningRecipe(); 2977 assert(Def && "Must have definition for value defined inside vector region"); 2978 if (auto Rep = dyn_cast<VPReplicateRecipe>(Def)) 2979 return Rep->isUniform(); 2980 if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def)) 2981 return all_of(GEP->operands(), isUniformAfterVectorization); 2982 return false; 2983 } 2984 } // end namespace vputils 2985 2986 } // end namespace llvm 2987 2988 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 2989