1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file contains the declarations of the Vectorization Plan base classes: 11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual 12 /// VPBlockBase, together implementing a Hierarchical CFG; 13 /// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be 14 /// treated as proper graphs for generic algorithms; 15 /// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained 16 /// within VPBasicBlocks; 17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned 18 /// instruction; 19 /// 5. The VPlan class holding a candidate for vectorization; 20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format; 21 /// These are documented in docs/VectorizationPlan.rst. 22 // 23 //===----------------------------------------------------------------------===// 24 25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 27 28 #include "VPlanLoopInfo.h" 29 #include "VPlanValue.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/DepthFirstIterator.h" 32 #include "llvm/ADT/GraphTraits.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/SmallBitVector.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallSet.h" 37 #include "llvm/ADT/SmallVector.h" 38 #include "llvm/ADT/Twine.h" 39 #include "llvm/ADT/ilist.h" 40 #include "llvm/ADT/ilist_node.h" 41 #include "llvm/Analysis/VectorUtils.h" 42 #include "llvm/IR/IRBuilder.h" 43 #include <algorithm> 44 #include <cassert> 45 #include <cstddef> 46 #include <map> 47 #include <string> 48 49 namespace llvm { 50 51 class BasicBlock; 52 class DominatorTree; 53 class InnerLoopVectorizer; 54 class LoopInfo; 55 class raw_ostream; 56 class RecurrenceDescriptor; 57 class Value; 58 class VPBasicBlock; 59 class VPRegionBlock; 60 class VPlan; 61 class VPlanSlp; 62 63 /// A range of powers-of-2 vectorization factors with fixed start and 64 /// adjustable end. The range includes start and excludes end, e.g.,: 65 /// [1, 9) = {1, 2, 4, 8} 66 struct VFRange { 67 // A power of 2. 68 const ElementCount Start; 69 70 // Need not be a power of 2. If End <= Start range is empty. 71 ElementCount End; 72 73 bool isEmpty() const { 74 return End.getKnownMinValue() <= Start.getKnownMinValue(); 75 } 76 77 VFRange(const ElementCount &Start, const ElementCount &End) 78 : Start(Start), End(End) { 79 assert(Start.isScalable() == End.isScalable() && 80 "Both Start and End should have the same scalable flag"); 81 assert(isPowerOf2_32(Start.getKnownMinValue()) && 82 "Expected Start to be a power of 2"); 83 } 84 }; 85 86 using VPlanPtr = std::unique_ptr<VPlan>; 87 88 /// In what follows, the term "input IR" refers to code that is fed into the 89 /// vectorizer whereas the term "output IR" refers to code that is generated by 90 /// the vectorizer. 91 92 /// VPIteration represents a single point in the iteration space of the output 93 /// (vectorized and/or unrolled) IR loop. 94 struct VPIteration { 95 /// in [0..UF) 96 unsigned Part; 97 98 /// in [0..VF) 99 unsigned Lane; 100 }; 101 102 /// This is a helper struct for maintaining vectorization state. It's used for 103 /// mapping values from the original loop to their corresponding values in 104 /// the new loop. Two mappings are maintained: one for vectorized values and 105 /// one for scalarized values. Vectorized values are represented with UF 106 /// vector values in the new loop, and scalarized values are represented with 107 /// UF x VF scalar values in the new loop. UF and VF are the unroll and 108 /// vectorization factors, respectively. 109 /// 110 /// Entries can be added to either map with setVectorValue and setScalarValue, 111 /// which assert that an entry was not already added before. If an entry is to 112 /// replace an existing one, call resetVectorValue and resetScalarValue. This is 113 /// currently needed to modify the mapped values during "fix-up" operations that 114 /// occur once the first phase of widening is complete. These operations include 115 /// type truncation and the second phase of recurrence widening. 116 /// 117 /// Entries from either map can be retrieved using the getVectorValue and 118 /// getScalarValue functions, which assert that the desired value exists. 119 struct VectorizerValueMap { 120 friend struct VPTransformState; 121 122 private: 123 /// The unroll factor. Each entry in the vector map contains UF vector values. 124 unsigned UF; 125 126 /// The vectorization factor. Each entry in the scalar map contains UF x VF 127 /// scalar values. 128 ElementCount VF; 129 130 /// The vector and scalar map storage. We use std::map and not DenseMap 131 /// because insertions to DenseMap invalidate its iterators. 132 using VectorParts = SmallVector<Value *, 2>; 133 using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>; 134 std::map<Value *, VectorParts> VectorMapStorage; 135 std::map<Value *, ScalarParts> ScalarMapStorage; 136 137 public: 138 /// Construct an empty map with the given unroll and vectorization factors. 139 VectorizerValueMap(unsigned UF, ElementCount VF) : UF(UF), VF(VF) {} 140 141 /// \return True if the map has any vector entry for \p Key. 142 bool hasAnyVectorValue(Value *Key) const { 143 return VectorMapStorage.count(Key); 144 } 145 146 /// \return True if the map has a vector entry for \p Key and \p Part. 147 bool hasVectorValue(Value *Key, unsigned Part) const { 148 assert(Part < UF && "Queried Vector Part is too large."); 149 if (!hasAnyVectorValue(Key)) 150 return false; 151 const VectorParts &Entry = VectorMapStorage.find(Key)->second; 152 assert(Entry.size() == UF && "VectorParts has wrong dimensions."); 153 return Entry[Part] != nullptr; 154 } 155 156 /// \return True if the map has any scalar entry for \p Key. 157 bool hasAnyScalarValue(Value *Key) const { 158 return ScalarMapStorage.count(Key); 159 } 160 161 /// \return True if the map has a scalar entry for \p Key and \p Instance. 162 bool hasScalarValue(Value *Key, const VPIteration &Instance) const { 163 assert(Instance.Part < UF && "Queried Scalar Part is too large."); 164 assert(Instance.Lane < VF.getKnownMinValue() && 165 "Queried Scalar Lane is too large."); 166 167 if (!hasAnyScalarValue(Key)) 168 return false; 169 const ScalarParts &Entry = ScalarMapStorage.find(Key)->second; 170 assert(Entry.size() == UF && "ScalarParts has wrong dimensions."); 171 assert(Entry[Instance.Part].size() == VF.getKnownMinValue() && 172 "ScalarParts has wrong dimensions."); 173 return Entry[Instance.Part][Instance.Lane] != nullptr; 174 } 175 176 /// Retrieve the existing vector value that corresponds to \p Key and 177 /// \p Part. 178 Value *getVectorValue(Value *Key, unsigned Part) { 179 assert(hasVectorValue(Key, Part) && "Getting non-existent value."); 180 return VectorMapStorage[Key][Part]; 181 } 182 183 /// Retrieve the existing scalar value that corresponds to \p Key and 184 /// \p Instance. 185 Value *getScalarValue(Value *Key, const VPIteration &Instance) { 186 assert(hasScalarValue(Key, Instance) && "Getting non-existent value."); 187 return ScalarMapStorage[Key][Instance.Part][Instance.Lane]; 188 } 189 190 /// Set a vector value associated with \p Key and \p Part. Assumes such a 191 /// value is not already set. If it is, use resetVectorValue() instead. 192 void setVectorValue(Value *Key, unsigned Part, Value *Vector) { 193 assert(!hasVectorValue(Key, Part) && "Vector value already set for part"); 194 if (!VectorMapStorage.count(Key)) { 195 VectorParts Entry(UF); 196 VectorMapStorage[Key] = Entry; 197 } 198 VectorMapStorage[Key][Part] = Vector; 199 } 200 201 /// Set a scalar value associated with \p Key and \p Instance. Assumes such a 202 /// value is not already set. 203 void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) { 204 assert(!hasScalarValue(Key, Instance) && "Scalar value already set"); 205 if (!ScalarMapStorage.count(Key)) { 206 ScalarParts Entry(UF); 207 // TODO: Consider storing uniform values only per-part, as they occupy 208 // lane 0 only, keeping the other VF-1 redundant entries null. 209 for (unsigned Part = 0; Part < UF; ++Part) 210 Entry[Part].resize(VF.getKnownMinValue(), nullptr); 211 ScalarMapStorage[Key] = Entry; 212 } 213 ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar; 214 } 215 216 /// Reset the vector value associated with \p Key for the given \p Part. 217 /// This function can be used to update values that have already been 218 /// vectorized. This is the case for "fix-up" operations including type 219 /// truncation and the second phase of recurrence vectorization. 220 void resetVectorValue(Value *Key, unsigned Part, Value *Vector) { 221 assert(hasVectorValue(Key, Part) && "Vector value not set for part"); 222 VectorMapStorage[Key][Part] = Vector; 223 } 224 225 /// Reset the scalar value associated with \p Key for \p Part and \p Lane. 226 /// This function can be used to update values that have already been 227 /// scalarized. This is the case for "fix-up" operations including scalar phi 228 /// nodes for scalarized and predicated instructions. 229 void resetScalarValue(Value *Key, const VPIteration &Instance, 230 Value *Scalar) { 231 assert(hasScalarValue(Key, Instance) && 232 "Scalar value not set for part and lane"); 233 ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar; 234 } 235 }; 236 237 /// This class is used to enable the VPlan to invoke a method of ILV. This is 238 /// needed until the method is refactored out of ILV and becomes reusable. 239 struct VPCallback { 240 virtual ~VPCallback() {} 241 virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0; 242 virtual Value *getOrCreateScalarValue(Value *V, 243 const VPIteration &Instance) = 0; 244 }; 245 246 /// VPTransformState holds information passed down when "executing" a VPlan, 247 /// needed for generating the output IR. 248 struct VPTransformState { 249 VPTransformState(ElementCount VF, unsigned UF, Loop *OrigLoop, LoopInfo *LI, 250 DominatorTree *DT, IRBuilder<> &Builder, 251 VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV, 252 VPCallback &Callback) 253 : VF(VF), UF(UF), Instance(), OrigLoop(OrigLoop), LI(LI), DT(DT), 254 Builder(Builder), ValueMap(ValueMap), ILV(ILV), Callback(Callback) {} 255 256 /// The chosen Vectorization and Unroll Factors of the loop being vectorized. 257 ElementCount VF; 258 unsigned UF; 259 260 /// Hold the indices to generate specific scalar instructions. Null indicates 261 /// that all instances are to be generated, using either scalar or vector 262 /// instructions. 263 Optional<VPIteration> Instance; 264 265 struct DataState { 266 /// A type for vectorized values in the new loop. Each value from the 267 /// original loop, when vectorized, is represented by UF vector values in 268 /// the new unrolled loop, where UF is the unroll factor. 269 typedef SmallVector<Value *, 2> PerPartValuesTy; 270 271 DenseMap<VPValue *, PerPartValuesTy> PerPartOutput; 272 273 using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>; 274 DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars; 275 } Data; 276 277 /// Get the generated Value for a given VPValue and a given Part. Note that 278 /// as some Defs are still created by ILV and managed in its ValueMap, this 279 /// method will delegate the call to ILV in such cases in order to provide 280 /// callers a consistent API. 281 /// \see set. 282 Value *get(VPValue *Def, unsigned Part) { 283 // If Values have been set for this Def return the one relevant for \p Part. 284 if (Data.PerPartOutput.count(Def)) 285 return Data.PerPartOutput[Def][Part]; 286 // Def is managed by ILV: bring the Values from ValueMap. 287 return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part); 288 } 289 290 /// Get the generated Value for a given VPValue and given Part and Lane. 291 Value *get(VPValue *Def, const VPIteration &Instance); 292 293 bool hasVectorValue(VPValue *Def, unsigned Part) { 294 auto I = Data.PerPartOutput.find(Def); 295 return I != Data.PerPartOutput.end() && Part < I->second.size() && 296 I->second[Part]; 297 } 298 299 bool hasScalarValue(VPValue *Def, VPIteration Instance) { 300 auto I = Data.PerPartScalars.find(Def); 301 if (I == Data.PerPartScalars.end()) 302 return false; 303 return Instance.Part < I->second.size() && 304 Instance.Lane < I->second[Instance.Part].size() && 305 I->second[Instance.Part][Instance.Lane]; 306 } 307 308 /// Set the generated Value for a given VPValue and a given Part. 309 void set(VPValue *Def, Value *V, unsigned Part) { 310 if (!Data.PerPartOutput.count(Def)) { 311 DataState::PerPartValuesTy Entry(UF); 312 Data.PerPartOutput[Def] = Entry; 313 } 314 Data.PerPartOutput[Def][Part] = V; 315 } 316 void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part); 317 318 void set(VPValue *Def, Value *V, const VPIteration &Instance) { 319 auto Iter = Data.PerPartScalars.insert({Def, {}}); 320 auto &PerPartVec = Iter.first->second; 321 while (PerPartVec.size() <= Instance.Part) 322 PerPartVec.emplace_back(); 323 auto &Scalars = PerPartVec[Instance.Part]; 324 while (Scalars.size() <= Instance.Lane) 325 Scalars.push_back(nullptr); 326 Scalars[Instance.Lane] = V; 327 } 328 329 /// Hold state information used when constructing the CFG of the output IR, 330 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. 331 struct CFGState { 332 /// The previous VPBasicBlock visited. Initially set to null. 333 VPBasicBlock *PrevVPBB = nullptr; 334 335 /// The previous IR BasicBlock created or used. Initially set to the new 336 /// header BasicBlock. 337 BasicBlock *PrevBB = nullptr; 338 339 /// The last IR BasicBlock in the output IR. Set to the new latch 340 /// BasicBlock, used for placing the newly created BasicBlocks. 341 BasicBlock *LastBB = nullptr; 342 343 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case 344 /// of replication, maps the BasicBlock of the last replica created. 345 SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB; 346 347 /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed 348 /// up at the end of vector code generation. 349 SmallVector<VPBasicBlock *, 8> VPBBsToFix; 350 351 CFGState() = default; 352 } CFG; 353 354 /// Hold a pointer to the original loop. 355 Loop *OrigLoop; 356 357 /// Hold a pointer to LoopInfo to register new basic blocks in the loop. 358 LoopInfo *LI; 359 360 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. 361 DominatorTree *DT; 362 363 /// Hold a reference to the IRBuilder used to generate output IR code. 364 IRBuilder<> &Builder; 365 366 /// Hold a reference to the Value state information used when generating the 367 /// Values of the output IR. 368 VectorizerValueMap &ValueMap; 369 370 /// Hold a reference to a mapping between VPValues in VPlan and original 371 /// Values they correspond to. 372 VPValue2ValueTy VPValue2Value; 373 374 /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF). 375 Value *CanonicalIV = nullptr; 376 377 /// Hold the trip count of the scalar loop. 378 Value *TripCount = nullptr; 379 380 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods. 381 InnerLoopVectorizer *ILV; 382 383 VPCallback &Callback; 384 }; 385 386 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. 387 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. 388 class VPBlockBase { 389 friend class VPBlockUtils; 390 391 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). 392 393 /// An optional name for the block. 394 std::string Name; 395 396 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if 397 /// it is a topmost VPBlockBase. 398 VPRegionBlock *Parent = nullptr; 399 400 /// List of predecessor blocks. 401 SmallVector<VPBlockBase *, 1> Predecessors; 402 403 /// List of successor blocks. 404 SmallVector<VPBlockBase *, 1> Successors; 405 406 /// Successor selector, null for zero or single successor blocks. 407 VPValue *CondBit = nullptr; 408 409 /// Current block predicate - null if the block does not need a predicate. 410 VPValue *Predicate = nullptr; 411 412 /// VPlan containing the block. Can only be set on the entry block of the 413 /// plan. 414 VPlan *Plan = nullptr; 415 416 /// Add \p Successor as the last successor to this block. 417 void appendSuccessor(VPBlockBase *Successor) { 418 assert(Successor && "Cannot add nullptr successor!"); 419 Successors.push_back(Successor); 420 } 421 422 /// Add \p Predecessor as the last predecessor to this block. 423 void appendPredecessor(VPBlockBase *Predecessor) { 424 assert(Predecessor && "Cannot add nullptr predecessor!"); 425 Predecessors.push_back(Predecessor); 426 } 427 428 /// Remove \p Predecessor from the predecessors of this block. 429 void removePredecessor(VPBlockBase *Predecessor) { 430 auto Pos = find(Predecessors, Predecessor); 431 assert(Pos && "Predecessor does not exist"); 432 Predecessors.erase(Pos); 433 } 434 435 /// Remove \p Successor from the successors of this block. 436 void removeSuccessor(VPBlockBase *Successor) { 437 auto Pos = find(Successors, Successor); 438 assert(Pos && "Successor does not exist"); 439 Successors.erase(Pos); 440 } 441 442 protected: 443 VPBlockBase(const unsigned char SC, const std::string &N) 444 : SubclassID(SC), Name(N) {} 445 446 public: 447 /// An enumeration for keeping track of the concrete subclass of VPBlockBase 448 /// that are actually instantiated. Values of this enumeration are kept in the 449 /// SubclassID field of the VPBlockBase objects. They are used for concrete 450 /// type identification. 451 using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC }; 452 453 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>; 454 455 virtual ~VPBlockBase() = default; 456 457 const std::string &getName() const { return Name; } 458 459 void setName(const Twine &newName) { Name = newName.str(); } 460 461 /// \return an ID for the concrete type of this object. 462 /// This is used to implement the classof checks. This should not be used 463 /// for any other purpose, as the values may change as LLVM evolves. 464 unsigned getVPBlockID() const { return SubclassID; } 465 466 VPRegionBlock *getParent() { return Parent; } 467 const VPRegionBlock *getParent() const { return Parent; } 468 469 /// \return A pointer to the plan containing the current block. 470 VPlan *getPlan(); 471 const VPlan *getPlan() const; 472 473 /// Sets the pointer of the plan containing the block. The block must be the 474 /// entry block into the VPlan. 475 void setPlan(VPlan *ParentPlan); 476 477 void setParent(VPRegionBlock *P) { Parent = P; } 478 479 /// \return the VPBasicBlock that is the entry of this VPBlockBase, 480 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 481 /// VPBlockBase is a VPBasicBlock, it is returned. 482 const VPBasicBlock *getEntryBasicBlock() const; 483 VPBasicBlock *getEntryBasicBlock(); 484 485 /// \return the VPBasicBlock that is the exit of this VPBlockBase, 486 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 487 /// VPBlockBase is a VPBasicBlock, it is returned. 488 const VPBasicBlock *getExitBasicBlock() const; 489 VPBasicBlock *getExitBasicBlock(); 490 491 const VPBlocksTy &getSuccessors() const { return Successors; } 492 VPBlocksTy &getSuccessors() { return Successors; } 493 494 const VPBlocksTy &getPredecessors() const { return Predecessors; } 495 VPBlocksTy &getPredecessors() { return Predecessors; } 496 497 /// \return the successor of this VPBlockBase if it has a single successor. 498 /// Otherwise return a null pointer. 499 VPBlockBase *getSingleSuccessor() const { 500 return (Successors.size() == 1 ? *Successors.begin() : nullptr); 501 } 502 503 /// \return the predecessor of this VPBlockBase if it has a single 504 /// predecessor. Otherwise return a null pointer. 505 VPBlockBase *getSinglePredecessor() const { 506 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr); 507 } 508 509 size_t getNumSuccessors() const { return Successors.size(); } 510 size_t getNumPredecessors() const { return Predecessors.size(); } 511 512 /// An Enclosing Block of a block B is any block containing B, including B 513 /// itself. \return the closest enclosing block starting from "this", which 514 /// has successors. \return the root enclosing block if all enclosing blocks 515 /// have no successors. 516 VPBlockBase *getEnclosingBlockWithSuccessors(); 517 518 /// \return the closest enclosing block starting from "this", which has 519 /// predecessors. \return the root enclosing block if all enclosing blocks 520 /// have no predecessors. 521 VPBlockBase *getEnclosingBlockWithPredecessors(); 522 523 /// \return the successors either attached directly to this VPBlockBase or, if 524 /// this VPBlockBase is the exit block of a VPRegionBlock and has no 525 /// successors of its own, search recursively for the first enclosing 526 /// VPRegionBlock that has successors and return them. If no such 527 /// VPRegionBlock exists, return the (empty) successors of the topmost 528 /// VPBlockBase reached. 529 const VPBlocksTy &getHierarchicalSuccessors() { 530 return getEnclosingBlockWithSuccessors()->getSuccessors(); 531 } 532 533 /// \return the hierarchical successor of this VPBlockBase if it has a single 534 /// hierarchical successor. Otherwise return a null pointer. 535 VPBlockBase *getSingleHierarchicalSuccessor() { 536 return getEnclosingBlockWithSuccessors()->getSingleSuccessor(); 537 } 538 539 /// \return the predecessors either attached directly to this VPBlockBase or, 540 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no 541 /// predecessors of its own, search recursively for the first enclosing 542 /// VPRegionBlock that has predecessors and return them. If no such 543 /// VPRegionBlock exists, return the (empty) predecessors of the topmost 544 /// VPBlockBase reached. 545 const VPBlocksTy &getHierarchicalPredecessors() { 546 return getEnclosingBlockWithPredecessors()->getPredecessors(); 547 } 548 549 /// \return the hierarchical predecessor of this VPBlockBase if it has a 550 /// single hierarchical predecessor. Otherwise return a null pointer. 551 VPBlockBase *getSingleHierarchicalPredecessor() { 552 return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); 553 } 554 555 /// \return the condition bit selecting the successor. 556 VPValue *getCondBit() { return CondBit; } 557 558 const VPValue *getCondBit() const { return CondBit; } 559 560 void setCondBit(VPValue *CV) { CondBit = CV; } 561 562 VPValue *getPredicate() { return Predicate; } 563 564 const VPValue *getPredicate() const { return Predicate; } 565 566 void setPredicate(VPValue *Pred) { Predicate = Pred; } 567 568 /// Set a given VPBlockBase \p Successor as the single successor of this 569 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor. 570 /// This VPBlockBase must have no successors. 571 void setOneSuccessor(VPBlockBase *Successor) { 572 assert(Successors.empty() && "Setting one successor when others exist."); 573 appendSuccessor(Successor); 574 } 575 576 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two 577 /// successors of this VPBlockBase. \p Condition is set as the successor 578 /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p 579 /// IfFalse. This VPBlockBase must have no successors. 580 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse, 581 VPValue *Condition) { 582 assert(Successors.empty() && "Setting two successors when others exist."); 583 assert(Condition && "Setting two successors without condition!"); 584 CondBit = Condition; 585 appendSuccessor(IfTrue); 586 appendSuccessor(IfFalse); 587 } 588 589 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase. 590 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added 591 /// as successor of any VPBasicBlock in \p NewPreds. 592 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) { 593 assert(Predecessors.empty() && "Block predecessors already set."); 594 for (auto *Pred : NewPreds) 595 appendPredecessor(Pred); 596 } 597 598 /// Remove all the predecessor of this block. 599 void clearPredecessors() { Predecessors.clear(); } 600 601 /// Remove all the successors of this block and set to null its condition bit 602 void clearSuccessors() { 603 Successors.clear(); 604 CondBit = nullptr; 605 } 606 607 /// The method which generates the output IR that correspond to this 608 /// VPBlockBase, thereby "executing" the VPlan. 609 virtual void execute(struct VPTransformState *State) = 0; 610 611 /// Delete all blocks reachable from a given VPBlockBase, inclusive. 612 static void deleteCFG(VPBlockBase *Entry); 613 614 void printAsOperand(raw_ostream &OS, bool PrintType) const { 615 OS << getName(); 616 } 617 618 void print(raw_ostream &OS) const { 619 // TODO: Only printing VPBB name for now since we only have dot printing 620 // support for VPInstructions/Recipes. 621 printAsOperand(OS, false); 622 } 623 624 /// Return true if it is legal to hoist instructions into this block. 625 bool isLegalToHoistInto() { 626 // There are currently no constraints that prevent an instruction to be 627 // hoisted into a VPBlockBase. 628 return true; 629 } 630 631 /// Replace all operands of VPUsers in the block with \p NewValue and also 632 /// replaces all uses of VPValues defined in the block with NewValue. 633 virtual void dropAllReferences(VPValue *NewValue) = 0; 634 }; 635 636 /// VPRecipeBase is a base class modeling a sequence of one or more output IR 637 /// instructions. VPRecipeBase owns the the VPValues it defines through VPDef 638 /// and is responsible for deleting its defined values. Single-value 639 /// VPRecipeBases that also inherit from VPValue must make sure to inherit from 640 /// VPRecipeBase before VPValue. 641 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>, 642 public VPDef { 643 friend VPBasicBlock; 644 friend class VPBlockUtils; 645 646 647 /// Each VPRecipe belongs to a single VPBasicBlock. 648 VPBasicBlock *Parent = nullptr; 649 650 public: 651 VPRecipeBase(const unsigned char SC) : VPDef(SC) {} 652 virtual ~VPRecipeBase() = default; 653 654 /// \return the VPBasicBlock which this VPRecipe belongs to. 655 VPBasicBlock *getParent() { return Parent; } 656 const VPBasicBlock *getParent() const { return Parent; } 657 658 /// The method which generates the output IR instructions that correspond to 659 /// this VPRecipe, thereby "executing" the VPlan. 660 virtual void execute(struct VPTransformState &State) = 0; 661 662 /// Insert an unlinked recipe into a basic block immediately before 663 /// the specified recipe. 664 void insertBefore(VPRecipeBase *InsertPos); 665 666 /// Insert an unlinked Recipe into a basic block immediately after 667 /// the specified Recipe. 668 void insertAfter(VPRecipeBase *InsertPos); 669 670 /// Unlink this recipe from its current VPBasicBlock and insert it into 671 /// the VPBasicBlock that MovePos lives in, right after MovePos. 672 void moveAfter(VPRecipeBase *MovePos); 673 674 /// Unlink this recipe and insert into BB before I. 675 /// 676 /// \pre I is a valid iterator into BB. 677 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I); 678 679 /// This method unlinks 'this' from the containing basic block, but does not 680 /// delete it. 681 void removeFromParent(); 682 683 /// This method unlinks 'this' from the containing basic block and deletes it. 684 /// 685 /// \returns an iterator pointing to the element after the erased one 686 iplist<VPRecipeBase>::iterator eraseFromParent(); 687 688 /// Returns a pointer to a VPUser, if the recipe inherits from VPUser or 689 /// nullptr otherwise. 690 VPUser *toVPUser(); 691 692 /// Returns the underlying instruction, if the recipe is a VPValue or nullptr 693 /// otherwise. 694 Instruction *getUnderlyingInstr() { 695 return cast<Instruction>(getVPValue()->getUnderlyingValue()); 696 } 697 const Instruction *getUnderlyingInstr() const { 698 return cast<Instruction>(getVPValue()->getUnderlyingValue()); 699 } 700 701 /// Method to support type inquiry through isa, cast, and dyn_cast. 702 static inline bool classof(const VPDef *D) { 703 // All VPDefs are also VPRecipeBases. 704 return true; 705 } 706 }; 707 708 inline bool VPUser::classof(const VPDef *Def) { 709 return Def->getVPDefID() == VPRecipeBase::VPInstructionSC || 710 Def->getVPDefID() == VPRecipeBase::VPWidenSC || 711 Def->getVPDefID() == VPRecipeBase::VPWidenCallSC || 712 Def->getVPDefID() == VPRecipeBase::VPWidenSelectSC || 713 Def->getVPDefID() == VPRecipeBase::VPWidenGEPSC || 714 Def->getVPDefID() == VPRecipeBase::VPBlendSC || 715 Def->getVPDefID() == VPRecipeBase::VPInterleaveSC || 716 Def->getVPDefID() == VPRecipeBase::VPReplicateSC || 717 Def->getVPDefID() == VPRecipeBase::VPReductionSC || 718 Def->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC || 719 Def->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC; 720 } 721 722 /// This is a concrete Recipe that models a single VPlan-level instruction. 723 /// While as any Recipe it may generate a sequence of IR instructions when 724 /// executed, these instructions would always form a single-def expression as 725 /// the VPInstruction is also a single def-use vertex. 726 class VPInstruction : public VPRecipeBase, public VPUser, public VPValue { 727 friend class VPlanSlp; 728 729 public: 730 /// VPlan opcodes, extending LLVM IR with idiomatics instructions. 731 enum { 732 Not = Instruction::OtherOpsEnd + 1, 733 ICmpULE, 734 SLPLoad, 735 SLPStore, 736 ActiveLaneMask, 737 }; 738 739 private: 740 typedef unsigned char OpcodeTy; 741 OpcodeTy Opcode; 742 743 /// Utility method serving execute(): generates a single instance of the 744 /// modeled instruction. 745 void generateInstruction(VPTransformState &State, unsigned Part); 746 747 protected: 748 void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); } 749 750 public: 751 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands) 752 : VPRecipeBase(VPRecipeBase::VPInstructionSC), VPUser(Operands), 753 VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {} 754 755 VPInstruction(unsigned Opcode, ArrayRef<VPInstruction *> Operands) 756 : VPRecipeBase(VPRecipeBase::VPInstructionSC), VPUser({}), 757 VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) { 758 for (auto *I : Operands) 759 addOperand(I->getVPValue()); 760 } 761 762 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands) 763 : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {} 764 765 /// Method to support type inquiry through isa, cast, and dyn_cast. 766 static inline bool classof(const VPValue *V) { 767 return V->getVPValueID() == VPValue::VPVInstructionSC; 768 } 769 770 VPInstruction *clone() const { 771 SmallVector<VPValue *, 2> Operands(operands()); 772 return new VPInstruction(Opcode, Operands); 773 } 774 775 /// Method to support type inquiry through isa, cast, and dyn_cast. 776 static inline bool classof(const VPDef *R) { 777 return R->getVPDefID() == VPRecipeBase::VPInstructionSC; 778 } 779 780 unsigned getOpcode() const { return Opcode; } 781 782 /// Generate the instruction. 783 /// TODO: We currently execute only per-part unless a specific instance is 784 /// provided. 785 void execute(VPTransformState &State) override; 786 787 /// Print the VPInstruction to \p O. 788 void print(raw_ostream &O, const Twine &Indent, 789 VPSlotTracker &SlotTracker) const override; 790 791 /// Print the VPInstruction to dbgs() (for debugging). 792 void dump() const; 793 794 /// Return true if this instruction may modify memory. 795 bool mayWriteToMemory() const { 796 // TODO: we can use attributes of the called function to rule out memory 797 // modifications. 798 return Opcode == Instruction::Store || Opcode == Instruction::Call || 799 Opcode == Instruction::Invoke || Opcode == SLPStore; 800 } 801 802 bool hasResult() const { 803 // CallInst may or may not have a result, depending on the called function. 804 // Conservatively return calls have results for now. 805 switch (getOpcode()) { 806 case Instruction::Ret: 807 case Instruction::Br: 808 case Instruction::Store: 809 case Instruction::Switch: 810 case Instruction::IndirectBr: 811 case Instruction::Resume: 812 case Instruction::CatchRet: 813 case Instruction::Unreachable: 814 case Instruction::Fence: 815 case Instruction::AtomicRMW: 816 return false; 817 default: 818 return true; 819 } 820 } 821 }; 822 823 /// VPWidenRecipe is a recipe for producing a copy of vector type its 824 /// ingredient. This recipe covers most of the traditional vectorization cases 825 /// where each ingredient transforms into a vectorized version of itself. 826 class VPWidenRecipe : public VPRecipeBase, public VPValue, public VPUser { 827 public: 828 template <typename IterT> 829 VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands) 830 : VPRecipeBase(VPRecipeBase::VPWidenSC), 831 VPValue(VPValue::VPVWidenSC, &I, this), VPUser(Operands) {} 832 833 ~VPWidenRecipe() override = default; 834 835 /// Method to support type inquiry through isa, cast, and dyn_cast. 836 static inline bool classof(const VPDef *D) { 837 return D->getVPDefID() == VPRecipeBase::VPWidenSC; 838 } 839 static inline bool classof(const VPValue *V) { 840 return V->getVPValueID() == VPValue::VPVWidenSC; 841 } 842 843 /// Produce widened copies of all Ingredients. 844 void execute(VPTransformState &State) override; 845 846 /// Print the recipe. 847 void print(raw_ostream &O, const Twine &Indent, 848 VPSlotTracker &SlotTracker) const override; 849 }; 850 851 /// A recipe for widening Call instructions. 852 class VPWidenCallRecipe : public VPRecipeBase, public VPUser, public VPValue { 853 854 public: 855 template <typename IterT> 856 VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments) 857 : VPRecipeBase(VPRecipeBase::VPWidenCallSC), VPUser(CallArguments), 858 VPValue(VPValue::VPVWidenCallSC, &I, this) {} 859 860 ~VPWidenCallRecipe() override = default; 861 862 /// Method to support type inquiry through isa, cast, and dyn_cast. 863 static inline bool classof(const VPDef *D) { 864 return D->getVPDefID() == VPRecipeBase::VPWidenCallSC; 865 } 866 867 /// Produce a widened version of the call instruction. 868 void execute(VPTransformState &State) override; 869 870 /// Print the recipe. 871 void print(raw_ostream &O, const Twine &Indent, 872 VPSlotTracker &SlotTracker) const override; 873 }; 874 875 /// A recipe for widening select instructions. 876 class VPWidenSelectRecipe : public VPRecipeBase, public VPUser, public VPValue { 877 878 /// Is the condition of the select loop invariant? 879 bool InvariantCond; 880 881 public: 882 template <typename IterT> 883 VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands, 884 bool InvariantCond) 885 : VPRecipeBase(VPRecipeBase::VPWidenSelectSC), VPUser(Operands), 886 VPValue(VPValue::VPVWidenSelectSC, &I, this), 887 InvariantCond(InvariantCond) {} 888 889 ~VPWidenSelectRecipe() override = default; 890 891 /// Method to support type inquiry through isa, cast, and dyn_cast. 892 static inline bool classof(const VPDef *D) { 893 return D->getVPDefID() == VPRecipeBase::VPWidenSelectSC; 894 } 895 896 /// Produce a widened version of the select instruction. 897 void execute(VPTransformState &State) override; 898 899 /// Print the recipe. 900 void print(raw_ostream &O, const Twine &Indent, 901 VPSlotTracker &SlotTracker) const override; 902 }; 903 904 /// A recipe for handling GEP instructions. 905 class VPWidenGEPRecipe : public VPRecipeBase, 906 public VPUser, 907 public VPValue { 908 bool IsPtrLoopInvariant; 909 SmallBitVector IsIndexLoopInvariant; 910 911 public: 912 template <typename IterT> 913 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands) 914 : VPRecipeBase(VPRecipeBase::VPWidenGEPSC), VPUser(Operands), 915 VPValue(VPWidenGEPSC, GEP, this), 916 IsIndexLoopInvariant(GEP->getNumIndices(), false) {} 917 918 template <typename IterT> 919 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands, 920 Loop *OrigLoop) 921 : VPRecipeBase(VPRecipeBase::VPWidenGEPSC), VPUser(Operands), 922 VPValue(VPValue::VPVWidenGEPSC, GEP, this), 923 IsIndexLoopInvariant(GEP->getNumIndices(), false) { 924 IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand()); 925 for (auto Index : enumerate(GEP->indices())) 926 IsIndexLoopInvariant[Index.index()] = 927 OrigLoop->isLoopInvariant(Index.value().get()); 928 } 929 ~VPWidenGEPRecipe() override = default; 930 931 /// Method to support type inquiry through isa, cast, and dyn_cast. 932 static inline bool classof(const VPDef *D) { 933 return D->getVPDefID() == VPRecipeBase::VPWidenGEPSC; 934 } 935 936 /// Generate the gep nodes. 937 void execute(VPTransformState &State) override; 938 939 /// Print the recipe. 940 void print(raw_ostream &O, const Twine &Indent, 941 VPSlotTracker &SlotTracker) const override; 942 }; 943 944 /// A recipe for handling phi nodes of integer and floating-point inductions, 945 /// producing their vector and scalar values. 946 class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPUser { 947 PHINode *IV; 948 TruncInst *Trunc; 949 950 public: 951 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, 952 TruncInst *Trunc = nullptr) 953 : VPRecipeBase(VPWidenIntOrFpInductionSC), VPUser({Start}), IV(IV), 954 Trunc(Trunc) { 955 if (Trunc) 956 new VPValue(Trunc, this); 957 else 958 new VPValue(IV, this); 959 } 960 ~VPWidenIntOrFpInductionRecipe() override = default; 961 962 /// Method to support type inquiry through isa, cast, and dyn_cast. 963 static inline bool classof(const VPDef *D) { 964 return D->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC; 965 } 966 967 /// Generate the vectorized and scalarized versions of the phi node as 968 /// needed by their users. 969 void execute(VPTransformState &State) override; 970 971 /// Print the recipe. 972 void print(raw_ostream &O, const Twine &Indent, 973 VPSlotTracker &SlotTracker) const override; 974 975 /// Returns the start value of the induction. 976 VPValue *getStartValue() { return getOperand(0); } 977 }; 978 979 /// A recipe for handling all phi nodes except for integer and FP inductions. 980 /// For reduction PHIs, RdxDesc must point to the corresponding recurrence 981 /// descriptor and the start value is the first operand of the recipe. 982 class VPWidenPHIRecipe : public VPRecipeBase, public VPUser { 983 PHINode *Phi; 984 985 /// Descriptor for a reduction PHI. 986 RecurrenceDescriptor *RdxDesc = nullptr; 987 988 public: 989 /// Create a new VPWidenPHIRecipe for the reduction \p Phi described by \p 990 /// RdxDesc. 991 VPWidenPHIRecipe(PHINode *Phi, RecurrenceDescriptor &RdxDesc, VPValue &Start) 992 : VPWidenPHIRecipe(Phi) { 993 this->RdxDesc = &RdxDesc; 994 addOperand(&Start); 995 } 996 997 /// Create a VPWidenPHIRecipe for \p Phi 998 VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) { 999 new VPValue(Phi, this); 1000 } 1001 ~VPWidenPHIRecipe() override = default; 1002 1003 /// Method to support type inquiry through isa, cast, and dyn_cast. 1004 static inline bool classof(const VPDef *D) { 1005 return D->getVPDefID() == VPRecipeBase::VPWidenPHISC; 1006 } 1007 1008 /// Generate the phi/select nodes. 1009 void execute(VPTransformState &State) override; 1010 1011 /// Print the recipe. 1012 void print(raw_ostream &O, const Twine &Indent, 1013 VPSlotTracker &SlotTracker) const override; 1014 1015 /// Returns the start value of the phi, if it is a reduction. 1016 VPValue *getStartValue() { 1017 return getNumOperands() == 0 ? nullptr : getOperand(0); 1018 } 1019 }; 1020 1021 /// A recipe for vectorizing a phi-node as a sequence of mask-based select 1022 /// instructions. 1023 class VPBlendRecipe : public VPRecipeBase, public VPUser { 1024 PHINode *Phi; 1025 1026 public: 1027 /// The blend operation is a User of the incoming values and of their 1028 /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value 1029 /// might be incoming with a full mask for which there is no VPValue. 1030 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands) 1031 : VPRecipeBase(VPBlendSC), VPUser(Operands), Phi(Phi) { 1032 new VPValue(Phi, this); 1033 assert(Operands.size() > 0 && 1034 ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && 1035 "Expected either a single incoming value or a positive even number " 1036 "of operands"); 1037 } 1038 1039 /// Method to support type inquiry through isa, cast, and dyn_cast. 1040 static inline bool classof(const VPDef *D) { 1041 return D->getVPDefID() == VPRecipeBase::VPBlendSC; 1042 } 1043 1044 /// Return the number of incoming values, taking into account that a single 1045 /// incoming value has no mask. 1046 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; } 1047 1048 /// Return incoming value number \p Idx. 1049 VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); } 1050 1051 /// Return mask number \p Idx. 1052 VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); } 1053 1054 /// Generate the phi/select nodes. 1055 void execute(VPTransformState &State) override; 1056 1057 /// Print the recipe. 1058 void print(raw_ostream &O, const Twine &Indent, 1059 VPSlotTracker &SlotTracker) const override; 1060 }; 1061 1062 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load 1063 /// or stores into one wide load/store and shuffles. The first operand of a 1064 /// VPInterleave recipe is the address, followed by the stored values, followed 1065 /// by an optional mask. 1066 class VPInterleaveRecipe : public VPRecipeBase, public VPUser { 1067 const InterleaveGroup<Instruction> *IG; 1068 1069 bool HasMask = false; 1070 1071 public: 1072 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, 1073 ArrayRef<VPValue *> StoredValues, VPValue *Mask) 1074 : VPRecipeBase(VPInterleaveSC), VPUser(Addr), IG(IG) { 1075 for (unsigned i = 0; i < IG->getFactor(); ++i) 1076 if (Instruction *I = IG->getMember(i)) { 1077 if (I->getType()->isVoidTy()) 1078 continue; 1079 new VPValue(I, this); 1080 } 1081 1082 for (auto *SV : StoredValues) 1083 addOperand(SV); 1084 if (Mask) { 1085 HasMask = true; 1086 addOperand(Mask); 1087 } 1088 } 1089 ~VPInterleaveRecipe() override = default; 1090 1091 /// Method to support type inquiry through isa, cast, and dyn_cast. 1092 static inline bool classof(const VPDef *D) { 1093 return D->getVPDefID() == VPRecipeBase::VPInterleaveSC; 1094 } 1095 1096 /// Return the address accessed by this recipe. 1097 VPValue *getAddr() const { 1098 return getOperand(0); // Address is the 1st, mandatory operand. 1099 } 1100 1101 /// Return the mask used by this recipe. Note that a full mask is represented 1102 /// by a nullptr. 1103 VPValue *getMask() const { 1104 // Mask is optional and therefore the last, currently 2nd operand. 1105 return HasMask ? getOperand(getNumOperands() - 1) : nullptr; 1106 } 1107 1108 /// Return the VPValues stored by this interleave group. If it is a load 1109 /// interleave group, return an empty ArrayRef. 1110 ArrayRef<VPValue *> getStoredValues() const { 1111 // The first operand is the address, followed by the stored values, followed 1112 // by an optional mask. 1113 return ArrayRef<VPValue *>(op_begin(), getNumOperands()) 1114 .slice(1, getNumOperands() - (HasMask ? 2 : 1)); 1115 } 1116 1117 /// Generate the wide load or store, and shuffles. 1118 void execute(VPTransformState &State) override; 1119 1120 /// Print the recipe. 1121 void print(raw_ostream &O, const Twine &Indent, 1122 VPSlotTracker &SlotTracker) const override; 1123 1124 const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } 1125 }; 1126 1127 /// A recipe to represent inloop reduction operations, performing a reduction on 1128 /// a vector operand into a scalar value, and adding the result to a chain. 1129 /// The Operands are {ChainOp, VecOp, [Condition]}. 1130 class VPReductionRecipe : public VPRecipeBase, public VPUser, public VPValue { 1131 /// The recurrence decriptor for the reduction in question. 1132 RecurrenceDescriptor *RdxDesc; 1133 /// Fast math flags to use for the resulting reduction operation. 1134 bool NoNaN; 1135 /// Pointer to the TTI, needed to create the target reduction 1136 const TargetTransformInfo *TTI; 1137 1138 public: 1139 VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp, 1140 VPValue *VecOp, VPValue *CondOp, bool NoNaN, 1141 const TargetTransformInfo *TTI) 1142 : VPRecipeBase(VPRecipeBase::VPReductionSC), VPUser({ChainOp, VecOp}), 1143 VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), NoNaN(NoNaN), 1144 TTI(TTI) { 1145 if (CondOp) 1146 addOperand(CondOp); 1147 } 1148 1149 ~VPReductionRecipe() override = default; 1150 1151 /// Method to support type inquiry through isa, cast, and dyn_cast. 1152 static inline bool classof(const VPValue *V) { 1153 return V->getVPValueID() == VPValue::VPVReductionSC; 1154 } 1155 1156 static inline bool classof(const VPDef *D) { 1157 return D->getVPDefID() == VPRecipeBase::VPReductionSC; 1158 } 1159 1160 /// Generate the reduction in the loop 1161 void execute(VPTransformState &State) override; 1162 1163 /// Print the recipe. 1164 void print(raw_ostream &O, const Twine &Indent, 1165 VPSlotTracker &SlotTracker) const override; 1166 1167 /// The VPValue of the scalar Chain being accumulated. 1168 VPValue *getChainOp() const { return getOperand(0); } 1169 /// The VPValue of the vector value to be reduced. 1170 VPValue *getVecOp() const { return getOperand(1); } 1171 /// The VPValue of the condition for the block. 1172 VPValue *getCondOp() const { 1173 return getNumOperands() > 2 ? getOperand(2) : nullptr; 1174 } 1175 }; 1176 1177 /// VPReplicateRecipe replicates a given instruction producing multiple scalar 1178 /// copies of the original scalar type, one per lane, instead of producing a 1179 /// single copy of widened type for all lanes. If the instruction is known to be 1180 /// uniform only one copy, per lane zero, will be generated. 1181 class VPReplicateRecipe : public VPRecipeBase, public VPUser, public VPValue { 1182 /// Indicator if only a single replica per lane is needed. 1183 bool IsUniform; 1184 1185 /// Indicator if the replicas are also predicated. 1186 bool IsPredicated; 1187 1188 /// Indicator if the scalar values should also be packed into a vector. 1189 bool AlsoPack; 1190 1191 public: 1192 template <typename IterT> 1193 VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands, 1194 bool IsUniform, bool IsPredicated = false) 1195 : VPRecipeBase(VPReplicateSC), VPUser(Operands), 1196 VPValue(VPVReplicateSC, I, this), IsUniform(IsUniform), 1197 IsPredicated(IsPredicated) { 1198 // Retain the previous behavior of predicateInstructions(), where an 1199 // insert-element of a predicated instruction got hoisted into the 1200 // predicated basic block iff it was its only user. This is achieved by 1201 // having predicated instructions also pack their values into a vector by 1202 // default unless they have a replicated user which uses their scalar value. 1203 AlsoPack = IsPredicated && !I->use_empty(); 1204 } 1205 1206 ~VPReplicateRecipe() override = default; 1207 1208 /// Method to support type inquiry through isa, cast, and dyn_cast. 1209 static inline bool classof(const VPDef *D) { 1210 return D->getVPDefID() == VPRecipeBase::VPReplicateSC; 1211 } 1212 1213 static inline bool classof(const VPValue *V) { 1214 return V->getVPValueID() == VPValue::VPVReplicateSC; 1215 } 1216 1217 /// Generate replicas of the desired Ingredient. Replicas will be generated 1218 /// for all parts and lanes unless a specific part and lane are specified in 1219 /// the \p State. 1220 void execute(VPTransformState &State) override; 1221 1222 void setAlsoPack(bool Pack) { AlsoPack = Pack; } 1223 1224 /// Print the recipe. 1225 void print(raw_ostream &O, const Twine &Indent, 1226 VPSlotTracker &SlotTracker) const override; 1227 1228 bool isUniform() const { return IsUniform; } 1229 }; 1230 1231 /// A recipe for generating conditional branches on the bits of a mask. 1232 class VPBranchOnMaskRecipe : public VPRecipeBase, public VPUser { 1233 public: 1234 VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) { 1235 if (BlockInMask) // nullptr means all-one mask. 1236 addOperand(BlockInMask); 1237 } 1238 1239 /// Method to support type inquiry through isa, cast, and dyn_cast. 1240 static inline bool classof(const VPDef *D) { 1241 return D->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC; 1242 } 1243 1244 /// Generate the extraction of the appropriate bit from the block mask and the 1245 /// conditional branch. 1246 void execute(VPTransformState &State) override; 1247 1248 /// Print the recipe. 1249 void print(raw_ostream &O, const Twine &Indent, 1250 VPSlotTracker &SlotTracker) const override { 1251 O << " +\n" << Indent << "\"BRANCH-ON-MASK "; 1252 if (VPValue *Mask = getMask()) 1253 Mask->printAsOperand(O, SlotTracker); 1254 else 1255 O << " All-One"; 1256 O << "\\l\""; 1257 } 1258 1259 /// Return the mask used by this recipe. Note that a full mask is represented 1260 /// by a nullptr. 1261 VPValue *getMask() const { 1262 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands"); 1263 // Mask is optional. 1264 return getNumOperands() == 1 ? getOperand(0) : nullptr; 1265 } 1266 }; 1267 1268 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when 1269 /// control converges back from a Branch-on-Mask. The phi nodes are needed in 1270 /// order to merge values that are set under such a branch and feed their uses. 1271 /// The phi nodes can be scalar or vector depending on the users of the value. 1272 /// This recipe works in concert with VPBranchOnMaskRecipe. 1273 class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser { 1274 1275 public: 1276 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi 1277 /// nodes after merging back from a Branch-on-Mask. 1278 VPPredInstPHIRecipe(VPValue *PredV) 1279 : VPRecipeBase(VPPredInstPHISC), VPUser(PredV) { 1280 new VPValue(PredV->getUnderlyingValue(), this); 1281 } 1282 ~VPPredInstPHIRecipe() override = default; 1283 1284 /// Method to support type inquiry through isa, cast, and dyn_cast. 1285 static inline bool classof(const VPDef *D) { 1286 return D->getVPDefID() == VPRecipeBase::VPPredInstPHISC; 1287 } 1288 1289 /// Generates phi nodes for live-outs as needed to retain SSA form. 1290 void execute(VPTransformState &State) override; 1291 1292 /// Print the recipe. 1293 void print(raw_ostream &O, const Twine &Indent, 1294 VPSlotTracker &SlotTracker) const override; 1295 }; 1296 1297 /// A Recipe for widening load/store operations. 1298 /// The recipe uses the following VPValues: 1299 /// - For load: Address, optional mask 1300 /// - For store: Address, stored value, optional mask 1301 /// TODO: We currently execute only per-part unless a specific instance is 1302 /// provided. 1303 class VPWidenMemoryInstructionRecipe : public VPRecipeBase, 1304 public VPUser { 1305 Instruction &Ingredient; 1306 1307 void setMask(VPValue *Mask) { 1308 if (!Mask) 1309 return; 1310 addOperand(Mask); 1311 } 1312 1313 bool isMasked() const { 1314 return isStore() ? getNumOperands() == 3 : getNumOperands() == 2; 1315 } 1316 1317 public: 1318 VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask) 1319 : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr}), 1320 Ingredient(Load) { 1321 new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this); 1322 setMask(Mask); 1323 } 1324 1325 VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, 1326 VPValue *StoredValue, VPValue *Mask) 1327 : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr, StoredValue}), 1328 Ingredient(Store) { 1329 setMask(Mask); 1330 } 1331 1332 /// Method to support type inquiry through isa, cast, and dyn_cast. 1333 static inline bool classof(const VPDef *D) { 1334 return D->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC; 1335 } 1336 1337 /// Return the address accessed by this recipe. 1338 VPValue *getAddr() const { 1339 return getOperand(0); // Address is the 1st, mandatory operand. 1340 } 1341 1342 /// Return the mask used by this recipe. Note that a full mask is represented 1343 /// by a nullptr. 1344 VPValue *getMask() const { 1345 // Mask is optional and therefore the last operand. 1346 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr; 1347 } 1348 1349 /// Returns true if this recipe is a store. 1350 bool isStore() const { return isa<StoreInst>(Ingredient); } 1351 1352 /// Return the address accessed by this recipe. 1353 VPValue *getStoredValue() const { 1354 assert(isStore() && "Stored value only available for store instructions"); 1355 return getOperand(1); // Stored value is the 2nd, mandatory operand. 1356 } 1357 1358 /// Generate the wide load/store. 1359 void execute(VPTransformState &State) override; 1360 1361 /// Print the recipe. 1362 void print(raw_ostream &O, const Twine &Indent, 1363 VPSlotTracker &SlotTracker) const override; 1364 }; 1365 1366 /// A Recipe for widening the canonical induction variable of the vector loop. 1367 class VPWidenCanonicalIVRecipe : public VPRecipeBase { 1368 public: 1369 VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) { 1370 new VPValue(nullptr, this); 1371 } 1372 1373 ~VPWidenCanonicalIVRecipe() override = default; 1374 1375 /// Method to support type inquiry through isa, cast, and dyn_cast. 1376 static inline bool classof(const VPDef *D) { 1377 return D->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC; 1378 } 1379 1380 /// Generate a canonical vector induction variable of the vector loop, with 1381 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and 1382 /// step = <VF*UF, VF*UF, ..., VF*UF>. 1383 void execute(VPTransformState &State) override; 1384 1385 /// Print the recipe. 1386 void print(raw_ostream &O, const Twine &Indent, 1387 VPSlotTracker &SlotTracker) const override; 1388 }; 1389 1390 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It 1391 /// holds a sequence of zero or more VPRecipe's each representing a sequence of 1392 /// output IR instructions. 1393 class VPBasicBlock : public VPBlockBase { 1394 public: 1395 using RecipeListTy = iplist<VPRecipeBase>; 1396 1397 private: 1398 /// The VPRecipes held in the order of output instructions to generate. 1399 RecipeListTy Recipes; 1400 1401 public: 1402 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) 1403 : VPBlockBase(VPBasicBlockSC, Name.str()) { 1404 if (Recipe) 1405 appendRecipe(Recipe); 1406 } 1407 1408 ~VPBasicBlock() override { Recipes.clear(); } 1409 1410 /// Instruction iterators... 1411 using iterator = RecipeListTy::iterator; 1412 using const_iterator = RecipeListTy::const_iterator; 1413 using reverse_iterator = RecipeListTy::reverse_iterator; 1414 using const_reverse_iterator = RecipeListTy::const_reverse_iterator; 1415 1416 //===--------------------------------------------------------------------===// 1417 /// Recipe iterator methods 1418 /// 1419 inline iterator begin() { return Recipes.begin(); } 1420 inline const_iterator begin() const { return Recipes.begin(); } 1421 inline iterator end() { return Recipes.end(); } 1422 inline const_iterator end() const { return Recipes.end(); } 1423 1424 inline reverse_iterator rbegin() { return Recipes.rbegin(); } 1425 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); } 1426 inline reverse_iterator rend() { return Recipes.rend(); } 1427 inline const_reverse_iterator rend() const { return Recipes.rend(); } 1428 1429 inline size_t size() const { return Recipes.size(); } 1430 inline bool empty() const { return Recipes.empty(); } 1431 inline const VPRecipeBase &front() const { return Recipes.front(); } 1432 inline VPRecipeBase &front() { return Recipes.front(); } 1433 inline const VPRecipeBase &back() const { return Recipes.back(); } 1434 inline VPRecipeBase &back() { return Recipes.back(); } 1435 1436 /// Returns a reference to the list of recipes. 1437 RecipeListTy &getRecipeList() { return Recipes; } 1438 1439 /// Returns a pointer to a member of the recipe list. 1440 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) { 1441 return &VPBasicBlock::Recipes; 1442 } 1443 1444 /// Method to support type inquiry through isa, cast, and dyn_cast. 1445 static inline bool classof(const VPBlockBase *V) { 1446 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC; 1447 } 1448 1449 void insert(VPRecipeBase *Recipe, iterator InsertPt) { 1450 assert(Recipe && "No recipe to append."); 1451 assert(!Recipe->Parent && "Recipe already in VPlan"); 1452 Recipe->Parent = this; 1453 Recipes.insert(InsertPt, Recipe); 1454 } 1455 1456 /// Augment the existing recipes of a VPBasicBlock with an additional 1457 /// \p Recipe as the last recipe. 1458 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); } 1459 1460 /// The method which generates the output IR instructions that correspond to 1461 /// this VPBasicBlock, thereby "executing" the VPlan. 1462 void execute(struct VPTransformState *State) override; 1463 1464 /// Return the position of the first non-phi node recipe in the block. 1465 iterator getFirstNonPhi(); 1466 1467 void dropAllReferences(VPValue *NewValue) override; 1468 1469 private: 1470 /// Create an IR BasicBlock to hold the output instructions generated by this 1471 /// VPBasicBlock, and return it. Update the CFGState accordingly. 1472 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG); 1473 }; 1474 1475 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks 1476 /// which form a Single-Entry-Single-Exit subgraph of the output IR CFG. 1477 /// A VPRegionBlock may indicate that its contents are to be replicated several 1478 /// times. This is designed to support predicated scalarization, in which a 1479 /// scalar if-then code structure needs to be generated VF * UF times. Having 1480 /// this replication indicator helps to keep a single model for multiple 1481 /// candidate VF's. The actual replication takes place only once the desired VF 1482 /// and UF have been determined. 1483 class VPRegionBlock : public VPBlockBase { 1484 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. 1485 VPBlockBase *Entry; 1486 1487 /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock. 1488 VPBlockBase *Exit; 1489 1490 /// An indicator whether this region is to generate multiple replicated 1491 /// instances of output IR corresponding to its VPBlockBases. 1492 bool IsReplicator; 1493 1494 public: 1495 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit, 1496 const std::string &Name = "", bool IsReplicator = false) 1497 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit), 1498 IsReplicator(IsReplicator) { 1499 assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); 1500 assert(Exit->getSuccessors().empty() && "Exit block has successors."); 1501 Entry->setParent(this); 1502 Exit->setParent(this); 1503 } 1504 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) 1505 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr), 1506 IsReplicator(IsReplicator) {} 1507 1508 ~VPRegionBlock() override { 1509 if (Entry) { 1510 VPValue DummyValue; 1511 Entry->dropAllReferences(&DummyValue); 1512 deleteCFG(Entry); 1513 } 1514 } 1515 1516 /// Method to support type inquiry through isa, cast, and dyn_cast. 1517 static inline bool classof(const VPBlockBase *V) { 1518 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC; 1519 } 1520 1521 const VPBlockBase *getEntry() const { return Entry; } 1522 VPBlockBase *getEntry() { return Entry; } 1523 1524 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p 1525 /// EntryBlock must have no predecessors. 1526 void setEntry(VPBlockBase *EntryBlock) { 1527 assert(EntryBlock->getPredecessors().empty() && 1528 "Entry block cannot have predecessors."); 1529 Entry = EntryBlock; 1530 EntryBlock->setParent(this); 1531 } 1532 1533 // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a 1534 // specific interface of llvm::Function, instead of using 1535 // GraphTraints::getEntryNode. We should add a new template parameter to 1536 // DominatorTreeBase representing the Graph type. 1537 VPBlockBase &front() const { return *Entry; } 1538 1539 const VPBlockBase *getExit() const { return Exit; } 1540 VPBlockBase *getExit() { return Exit; } 1541 1542 /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p 1543 /// ExitBlock must have no successors. 1544 void setExit(VPBlockBase *ExitBlock) { 1545 assert(ExitBlock->getSuccessors().empty() && 1546 "Exit block cannot have successors."); 1547 Exit = ExitBlock; 1548 ExitBlock->setParent(this); 1549 } 1550 1551 /// An indicator whether this region is to generate multiple replicated 1552 /// instances of output IR corresponding to its VPBlockBases. 1553 bool isReplicator() const { return IsReplicator; } 1554 1555 /// The method which generates the output IR instructions that correspond to 1556 /// this VPRegionBlock, thereby "executing" the VPlan. 1557 void execute(struct VPTransformState *State) override; 1558 1559 void dropAllReferences(VPValue *NewValue) override; 1560 }; 1561 1562 //===----------------------------------------------------------------------===// 1563 // GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs // 1564 //===----------------------------------------------------------------------===// 1565 1566 // The following set of template specializations implement GraphTraits to treat 1567 // any VPBlockBase as a node in a graph of VPBlockBases. It's important to note 1568 // that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the 1569 // VPBlockBase is a VPRegionBlock, this specialization provides access to its 1570 // successors/predecessors but not to the blocks inside the region. 1571 1572 template <> struct GraphTraits<VPBlockBase *> { 1573 using NodeRef = VPBlockBase *; 1574 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; 1575 1576 static NodeRef getEntryNode(NodeRef N) { return N; } 1577 1578 static inline ChildIteratorType child_begin(NodeRef N) { 1579 return N->getSuccessors().begin(); 1580 } 1581 1582 static inline ChildIteratorType child_end(NodeRef N) { 1583 return N->getSuccessors().end(); 1584 } 1585 }; 1586 1587 template <> struct GraphTraits<const VPBlockBase *> { 1588 using NodeRef = const VPBlockBase *; 1589 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator; 1590 1591 static NodeRef getEntryNode(NodeRef N) { return N; } 1592 1593 static inline ChildIteratorType child_begin(NodeRef N) { 1594 return N->getSuccessors().begin(); 1595 } 1596 1597 static inline ChildIteratorType child_end(NodeRef N) { 1598 return N->getSuccessors().end(); 1599 } 1600 }; 1601 1602 // Inverse order specialization for VPBasicBlocks. Predecessors are used instead 1603 // of successors for the inverse traversal. 1604 template <> struct GraphTraits<Inverse<VPBlockBase *>> { 1605 using NodeRef = VPBlockBase *; 1606 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; 1607 1608 static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; } 1609 1610 static inline ChildIteratorType child_begin(NodeRef N) { 1611 return N->getPredecessors().begin(); 1612 } 1613 1614 static inline ChildIteratorType child_end(NodeRef N) { 1615 return N->getPredecessors().end(); 1616 } 1617 }; 1618 1619 // The following set of template specializations implement GraphTraits to 1620 // treat VPRegionBlock as a graph and recurse inside its nodes. It's important 1621 // to note that the blocks inside the VPRegionBlock are treated as VPBlockBases 1622 // (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so 1623 // there won't be automatic recursion into other VPBlockBases that turn to be 1624 // VPRegionBlocks. 1625 1626 template <> 1627 struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> { 1628 using GraphRef = VPRegionBlock *; 1629 using nodes_iterator = df_iterator<NodeRef>; 1630 1631 static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } 1632 1633 static nodes_iterator nodes_begin(GraphRef N) { 1634 return nodes_iterator::begin(N->getEntry()); 1635 } 1636 1637 static nodes_iterator nodes_end(GraphRef N) { 1638 // df_iterator::end() returns an empty iterator so the node used doesn't 1639 // matter. 1640 return nodes_iterator::end(N); 1641 } 1642 }; 1643 1644 template <> 1645 struct GraphTraits<const VPRegionBlock *> 1646 : public GraphTraits<const VPBlockBase *> { 1647 using GraphRef = const VPRegionBlock *; 1648 using nodes_iterator = df_iterator<NodeRef>; 1649 1650 static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } 1651 1652 static nodes_iterator nodes_begin(GraphRef N) { 1653 return nodes_iterator::begin(N->getEntry()); 1654 } 1655 1656 static nodes_iterator nodes_end(GraphRef N) { 1657 // df_iterator::end() returns an empty iterator so the node used doesn't 1658 // matter. 1659 return nodes_iterator::end(N); 1660 } 1661 }; 1662 1663 template <> 1664 struct GraphTraits<Inverse<VPRegionBlock *>> 1665 : public GraphTraits<Inverse<VPBlockBase *>> { 1666 using GraphRef = VPRegionBlock *; 1667 using nodes_iterator = df_iterator<NodeRef>; 1668 1669 static NodeRef getEntryNode(Inverse<GraphRef> N) { 1670 return N.Graph->getExit(); 1671 } 1672 1673 static nodes_iterator nodes_begin(GraphRef N) { 1674 return nodes_iterator::begin(N->getExit()); 1675 } 1676 1677 static nodes_iterator nodes_end(GraphRef N) { 1678 // df_iterator::end() returns an empty iterator so the node used doesn't 1679 // matter. 1680 return nodes_iterator::end(N); 1681 } 1682 }; 1683 1684 /// VPlan models a candidate for vectorization, encoding various decisions take 1685 /// to produce efficient output IR, including which branches, basic-blocks and 1686 /// output IR instructions to generate, and their cost. VPlan holds a 1687 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry 1688 /// VPBlock. 1689 class VPlan { 1690 friend class VPlanPrinter; 1691 friend class VPSlotTracker; 1692 1693 /// Hold the single entry to the Hierarchical CFG of the VPlan. 1694 VPBlockBase *Entry; 1695 1696 /// Holds the VFs applicable to this VPlan. 1697 SmallSetVector<ElementCount, 2> VFs; 1698 1699 /// Holds the name of the VPlan, for printing. 1700 std::string Name; 1701 1702 /// Holds all the external definitions created for this VPlan. 1703 // TODO: Introduce a specific representation for external definitions in 1704 // VPlan. External definitions must be immutable and hold a pointer to its 1705 // underlying IR that will be used to implement its structural comparison 1706 // (operators '==' and '<'). 1707 SmallPtrSet<VPValue *, 16> VPExternalDefs; 1708 1709 /// Represents the backedge taken count of the original loop, for folding 1710 /// the tail. 1711 VPValue *BackedgeTakenCount = nullptr; 1712 1713 /// Holds a mapping between Values and their corresponding VPValue inside 1714 /// VPlan. 1715 Value2VPValueTy Value2VPValue; 1716 1717 /// Contains all VPValues that been allocated by addVPValue directly and need 1718 /// to be free when the plan's destructor is called. 1719 SmallVector<VPValue *, 16> VPValuesToFree; 1720 1721 /// Holds the VPLoopInfo analysis for this VPlan. 1722 VPLoopInfo VPLInfo; 1723 1724 /// Holds the condition bit values built during VPInstruction to VPRecipe transformation. 1725 SmallVector<VPValue *, 4> VPCBVs; 1726 1727 public: 1728 VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) { 1729 if (Entry) 1730 Entry->setPlan(this); 1731 } 1732 1733 ~VPlan() { 1734 if (Entry) { 1735 VPValue DummyValue; 1736 for (VPBlockBase *Block : depth_first(Entry)) 1737 Block->dropAllReferences(&DummyValue); 1738 1739 VPBlockBase::deleteCFG(Entry); 1740 } 1741 for (VPValue *VPV : VPValuesToFree) 1742 delete VPV; 1743 if (BackedgeTakenCount) 1744 delete BackedgeTakenCount; 1745 for (VPValue *Def : VPExternalDefs) 1746 delete Def; 1747 for (VPValue *CBV : VPCBVs) 1748 delete CBV; 1749 } 1750 1751 /// Generate the IR code for this VPlan. 1752 void execute(struct VPTransformState *State); 1753 1754 VPBlockBase *getEntry() { return Entry; } 1755 const VPBlockBase *getEntry() const { return Entry; } 1756 1757 VPBlockBase *setEntry(VPBlockBase *Block) { 1758 Entry = Block; 1759 Block->setPlan(this); 1760 return Entry; 1761 } 1762 1763 /// The backedge taken count of the original loop. 1764 VPValue *getOrCreateBackedgeTakenCount() { 1765 if (!BackedgeTakenCount) 1766 BackedgeTakenCount = new VPValue(); 1767 return BackedgeTakenCount; 1768 } 1769 1770 void addVF(ElementCount VF) { VFs.insert(VF); } 1771 1772 bool hasVF(ElementCount VF) { return VFs.count(VF); } 1773 1774 const std::string &getName() const { return Name; } 1775 1776 void setName(const Twine &newName) { Name = newName.str(); } 1777 1778 /// Add \p VPVal to the pool of external definitions if it's not already 1779 /// in the pool. 1780 void addExternalDef(VPValue *VPVal) { 1781 VPExternalDefs.insert(VPVal); 1782 } 1783 1784 /// Add \p CBV to the vector of condition bit values. 1785 void addCBV(VPValue *CBV) { 1786 VPCBVs.push_back(CBV); 1787 } 1788 1789 void addVPValue(Value *V) { 1790 assert(V && "Trying to add a null Value to VPlan"); 1791 assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); 1792 VPValue *VPV = new VPValue(V); 1793 Value2VPValue[V] = VPV; 1794 VPValuesToFree.push_back(VPV); 1795 } 1796 1797 void addVPValue(Value *V, VPValue *VPV) { 1798 assert(V && "Trying to add a null Value to VPlan"); 1799 assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); 1800 Value2VPValue[V] = VPV; 1801 } 1802 1803 VPValue *getVPValue(Value *V) { 1804 assert(V && "Trying to get the VPValue of a null Value"); 1805 assert(Value2VPValue.count(V) && "Value does not exist in VPlan"); 1806 return Value2VPValue[V]; 1807 } 1808 1809 VPValue *getOrAddVPValue(Value *V) { 1810 assert(V && "Trying to get or add the VPValue of a null Value"); 1811 if (!Value2VPValue.count(V)) 1812 addVPValue(V); 1813 return getVPValue(V); 1814 } 1815 1816 void removeVPValueFor(Value *V) { Value2VPValue.erase(V); } 1817 1818 /// Return the VPLoopInfo analysis for this VPlan. 1819 VPLoopInfo &getVPLoopInfo() { return VPLInfo; } 1820 const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } 1821 1822 /// Dump the plan to stderr (for debugging). 1823 void dump() const; 1824 1825 /// Returns a range mapping the values the range \p Operands to their 1826 /// corresponding VPValues. 1827 iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>> 1828 mapToVPValues(User::op_range Operands) { 1829 std::function<VPValue *(Value *)> Fn = [this](Value *Op) { 1830 return getOrAddVPValue(Op); 1831 }; 1832 return map_range(Operands, Fn); 1833 } 1834 1835 private: 1836 /// Add to the given dominator tree the header block and every new basic block 1837 /// that was created between it and the latch block, inclusive. 1838 static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB, 1839 BasicBlock *LoopPreHeaderBB, 1840 BasicBlock *LoopExitBB); 1841 }; 1842 1843 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is 1844 /// indented and follows the dot format. 1845 class VPlanPrinter { 1846 friend inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan); 1847 friend inline raw_ostream &operator<<(raw_ostream &OS, 1848 const struct VPlanIngredient &I); 1849 1850 private: 1851 raw_ostream &OS; 1852 const VPlan &Plan; 1853 unsigned Depth = 0; 1854 unsigned TabWidth = 2; 1855 std::string Indent; 1856 unsigned BID = 0; 1857 SmallDenseMap<const VPBlockBase *, unsigned> BlockID; 1858 1859 VPSlotTracker SlotTracker; 1860 1861 VPlanPrinter(raw_ostream &O, const VPlan &P) 1862 : OS(O), Plan(P), SlotTracker(&P) {} 1863 1864 /// Handle indentation. 1865 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } 1866 1867 /// Print a given \p Block of the Plan. 1868 void dumpBlock(const VPBlockBase *Block); 1869 1870 /// Print the information related to the CFG edges going out of a given 1871 /// \p Block, followed by printing the successor blocks themselves. 1872 void dumpEdges(const VPBlockBase *Block); 1873 1874 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing 1875 /// its successor blocks. 1876 void dumpBasicBlock(const VPBasicBlock *BasicBlock); 1877 1878 /// Print a given \p Region of the Plan. 1879 void dumpRegion(const VPRegionBlock *Region); 1880 1881 unsigned getOrCreateBID(const VPBlockBase *Block) { 1882 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++; 1883 } 1884 1885 const Twine getOrCreateName(const VPBlockBase *Block); 1886 1887 const Twine getUID(const VPBlockBase *Block); 1888 1889 /// Print the information related to a CFG edge between two VPBlockBases. 1890 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden, 1891 const Twine &Label); 1892 1893 void dump(); 1894 1895 static void printAsIngredient(raw_ostream &O, const Value *V); 1896 }; 1897 1898 struct VPlanIngredient { 1899 const Value *V; 1900 1901 VPlanIngredient(const Value *V) : V(V) {} 1902 }; 1903 1904 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) { 1905 VPlanPrinter::printAsIngredient(OS, I.V); 1906 return OS; 1907 } 1908 1909 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) { 1910 VPlanPrinter Printer(OS, Plan); 1911 Printer.dump(); 1912 return OS; 1913 } 1914 1915 //===----------------------------------------------------------------------===// 1916 // VPlan Utilities 1917 //===----------------------------------------------------------------------===// 1918 1919 /// Class that provides utilities for VPBlockBases in VPlan. 1920 class VPBlockUtils { 1921 public: 1922 VPBlockUtils() = delete; 1923 1924 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p 1925 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p 1926 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr 1927 /// has more than one successor, its conditional bit is propagated to \p 1928 /// NewBlock. \p NewBlock must have neither successors nor predecessors. 1929 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { 1930 assert(NewBlock->getSuccessors().empty() && 1931 "Can't insert new block with successors."); 1932 // TODO: move successors from BlockPtr to NewBlock when this functionality 1933 // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr 1934 // already has successors. 1935 BlockPtr->setOneSuccessor(NewBlock); 1936 NewBlock->setPredecessors({BlockPtr}); 1937 NewBlock->setParent(BlockPtr->getParent()); 1938 } 1939 1940 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p 1941 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p 1942 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr 1943 /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor 1944 /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse 1945 /// must have neither successors nor predecessors. 1946 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, 1947 VPValue *Condition, VPBlockBase *BlockPtr) { 1948 assert(IfTrue->getSuccessors().empty() && 1949 "Can't insert IfTrue with successors."); 1950 assert(IfFalse->getSuccessors().empty() && 1951 "Can't insert IfFalse with successors."); 1952 BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition); 1953 IfTrue->setPredecessors({BlockPtr}); 1954 IfFalse->setPredecessors({BlockPtr}); 1955 IfTrue->setParent(BlockPtr->getParent()); 1956 IfFalse->setParent(BlockPtr->getParent()); 1957 } 1958 1959 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to 1960 /// the successors of \p From and \p From to the predecessors of \p To. Both 1961 /// VPBlockBases must have the same parent, which can be null. Both 1962 /// VPBlockBases can be already connected to other VPBlockBases. 1963 static void connectBlocks(VPBlockBase *From, VPBlockBase *To) { 1964 assert((From->getParent() == To->getParent()) && 1965 "Can't connect two block with different parents"); 1966 assert(From->getNumSuccessors() < 2 && 1967 "Blocks can't have more than two successors."); 1968 From->appendSuccessor(To); 1969 To->appendPredecessor(From); 1970 } 1971 1972 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To 1973 /// from the successors of \p From and \p From from the predecessors of \p To. 1974 static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) { 1975 assert(To && "Successor to disconnect is null."); 1976 From->removeSuccessor(To); 1977 To->removePredecessor(From); 1978 } 1979 1980 /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge. 1981 static bool isBackEdge(const VPBlockBase *FromBlock, 1982 const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) { 1983 assert(FromBlock->getParent() == ToBlock->getParent() && 1984 FromBlock->getParent() && "Must be in same region"); 1985 const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock); 1986 const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock); 1987 if (!FromLoop || !ToLoop || FromLoop != ToLoop) 1988 return false; 1989 1990 // A back-edge is a branch from the loop latch to its header. 1991 return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader(); 1992 } 1993 1994 /// Returns true if \p Block is a loop latch 1995 static bool blockIsLoopLatch(const VPBlockBase *Block, 1996 const VPLoopInfo *VPLInfo) { 1997 if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block)) 1998 return ParentVPL->isLoopLatch(Block); 1999 2000 return false; 2001 } 2002 2003 /// Count and return the number of succesors of \p PredBlock excluding any 2004 /// backedges. 2005 static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock, 2006 VPLoopInfo *VPLI) { 2007 unsigned Count = 0; 2008 for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) { 2009 if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI)) 2010 Count++; 2011 } 2012 return Count; 2013 } 2014 }; 2015 2016 class VPInterleavedAccessInfo { 2017 DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *> 2018 InterleaveGroupMap; 2019 2020 /// Type for mapping of instruction based interleave groups to VPInstruction 2021 /// interleave groups 2022 using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *, 2023 InterleaveGroup<VPInstruction> *>; 2024 2025 /// Recursively \p Region and populate VPlan based interleave groups based on 2026 /// \p IAI. 2027 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New, 2028 InterleavedAccessInfo &IAI); 2029 /// Recursively traverse \p Block and populate VPlan based interleave groups 2030 /// based on \p IAI. 2031 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, 2032 InterleavedAccessInfo &IAI); 2033 2034 public: 2035 VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI); 2036 2037 ~VPInterleavedAccessInfo() { 2038 SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet; 2039 // Avoid releasing a pointer twice. 2040 for (auto &I : InterleaveGroupMap) 2041 DelSet.insert(I.second); 2042 for (auto *Ptr : DelSet) 2043 delete Ptr; 2044 } 2045 2046 /// Get the interleave group that \p Instr belongs to. 2047 /// 2048 /// \returns nullptr if doesn't have such group. 2049 InterleaveGroup<VPInstruction> * 2050 getInterleaveGroup(VPInstruction *Instr) const { 2051 return InterleaveGroupMap.lookup(Instr); 2052 } 2053 }; 2054 2055 /// Class that maps (parts of) an existing VPlan to trees of combined 2056 /// VPInstructions. 2057 class VPlanSlp { 2058 enum class OpMode { Failed, Load, Opcode }; 2059 2060 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as 2061 /// DenseMap keys. 2062 struct BundleDenseMapInfo { 2063 static SmallVector<VPValue *, 4> getEmptyKey() { 2064 return {reinterpret_cast<VPValue *>(-1)}; 2065 } 2066 2067 static SmallVector<VPValue *, 4> getTombstoneKey() { 2068 return {reinterpret_cast<VPValue *>(-2)}; 2069 } 2070 2071 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) { 2072 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end())); 2073 } 2074 2075 static bool isEqual(const SmallVector<VPValue *, 4> &LHS, 2076 const SmallVector<VPValue *, 4> &RHS) { 2077 return LHS == RHS; 2078 } 2079 }; 2080 2081 /// Mapping of values in the original VPlan to a combined VPInstruction. 2082 DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo> 2083 BundleToCombined; 2084 2085 VPInterleavedAccessInfo &IAI; 2086 2087 /// Basic block to operate on. For now, only instructions in a single BB are 2088 /// considered. 2089 const VPBasicBlock &BB; 2090 2091 /// Indicates whether we managed to combine all visited instructions or not. 2092 bool CompletelySLP = true; 2093 2094 /// Width of the widest combined bundle in bits. 2095 unsigned WidestBundleBits = 0; 2096 2097 using MultiNodeOpTy = 2098 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>; 2099 2100 // Input operand bundles for the current multi node. Each multi node operand 2101 // bundle contains values not matching the multi node's opcode. They will 2102 // be reordered in reorderMultiNodeOps, once we completed building a 2103 // multi node. 2104 SmallVector<MultiNodeOpTy, 4> MultiNodeOps; 2105 2106 /// Indicates whether we are building a multi node currently. 2107 bool MultiNodeActive = false; 2108 2109 /// Check if we can vectorize Operands together. 2110 bool areVectorizable(ArrayRef<VPValue *> Operands) const; 2111 2112 /// Add combined instruction \p New for the bundle \p Operands. 2113 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New); 2114 2115 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now. 2116 VPInstruction *markFailed(); 2117 2118 /// Reorder operands in the multi node to maximize sequential memory access 2119 /// and commutative operations. 2120 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps(); 2121 2122 /// Choose the best candidate to use for the lane after \p Last. The set of 2123 /// candidates to choose from are values with an opcode matching \p Last's 2124 /// or loads consecutive to \p Last. 2125 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last, 2126 SmallPtrSetImpl<VPValue *> &Candidates, 2127 VPInterleavedAccessInfo &IAI); 2128 2129 /// Print bundle \p Values to dbgs(). 2130 void dumpBundle(ArrayRef<VPValue *> Values); 2131 2132 public: 2133 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {} 2134 2135 ~VPlanSlp() = default; 2136 2137 /// Tries to build an SLP tree rooted at \p Operands and returns a 2138 /// VPInstruction combining \p Operands, if they can be combined. 2139 VPInstruction *buildGraph(ArrayRef<VPValue *> Operands); 2140 2141 /// Return the width of the widest combined bundle in bits. 2142 unsigned getWidestBundleBits() const { return WidestBundleBits; } 2143 2144 /// Return true if all visited instruction can be combined. 2145 bool isCompletelySLP() const { return CompletelySLP; } 2146 }; 2147 } // end namespace llvm 2148 2149 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 2150