1 //===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file provides a LoopVectorizationPlanner class. 11 /// InnerLoopVectorizer vectorizes loops which contain only one basic 12 /// LoopVectorizationPlanner - drives the vectorization process after having 13 /// passed Legality checks. 14 /// The planner builds and optimizes the Vectorization Plans which record the 15 /// decisions how to vectorize the given loop. In particular, represent the 16 /// control-flow of the vectorized version, the replication of instructions that 17 /// are to be scalarized, and interleave access groups. 18 /// 19 /// Also provides a VPlan-based builder utility analogous to IRBuilder. 20 /// It provides an instruction-level API for generating VPInstructions while 21 /// abstracting away the Recipe manipulation details. 22 //===----------------------------------------------------------------------===// 23 24 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 25 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 26 27 #include "VPlan.h" 28 #include "llvm/ADT/SmallSet.h" 29 #include "llvm/Support/InstructionCost.h" 30 31 namespace llvm { 32 33 class LoopInfo; 34 class DominatorTree; 35 class LoopVectorizationLegality; 36 class LoopVectorizationCostModel; 37 class PredicatedScalarEvolution; 38 class LoopVectorizeHints; 39 class OptimizationRemarkEmitter; 40 class TargetTransformInfo; 41 class TargetLibraryInfo; 42 class VPRecipeBuilder; 43 44 /// VPlan-based builder utility analogous to IRBuilder. 45 class VPBuilder { 46 VPBasicBlock *BB = nullptr; 47 VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator(); 48 49 /// Insert \p VPI in BB at InsertPt if BB is set. tryInsertInstruction(VPInstruction * VPI)50 VPInstruction *tryInsertInstruction(VPInstruction *VPI) { 51 if (BB) 52 BB->insert(VPI, InsertPt); 53 return VPI; 54 } 55 56 VPInstruction *createInstruction(unsigned Opcode, 57 ArrayRef<VPValue *> Operands, DebugLoc DL, 58 const Twine &Name = "") { 59 return tryInsertInstruction(new VPInstruction(Opcode, Operands, DL, Name)); 60 } 61 62 VPInstruction *createInstruction(unsigned Opcode, 63 std::initializer_list<VPValue *> Operands, 64 DebugLoc DL, const Twine &Name = "") { 65 return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name); 66 } 67 68 public: 69 VPBuilder() = default; VPBuilder(VPBasicBlock * InsertBB)70 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); } VPBuilder(VPRecipeBase * InsertPt)71 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); } 72 73 /// Clear the insertion point: created instructions will not be inserted into 74 /// a block. clearInsertionPoint()75 void clearInsertionPoint() { 76 BB = nullptr; 77 InsertPt = VPBasicBlock::iterator(); 78 } 79 getInsertBlock()80 VPBasicBlock *getInsertBlock() const { return BB; } getInsertPoint()81 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; } 82 83 /// Create a VPBuilder to insert after \p R. getToInsertAfter(VPRecipeBase * R)84 static VPBuilder getToInsertAfter(VPRecipeBase *R) { 85 VPBuilder B; 86 B.setInsertPoint(R->getParent(), std::next(R->getIterator())); 87 return B; 88 } 89 90 /// InsertPoint - A saved insertion point. 91 class VPInsertPoint { 92 VPBasicBlock *Block = nullptr; 93 VPBasicBlock::iterator Point; 94 95 public: 96 /// Creates a new insertion point which doesn't point to anything. 97 VPInsertPoint() = default; 98 99 /// Creates a new insertion point at the given location. VPInsertPoint(VPBasicBlock * InsertBlock,VPBasicBlock::iterator InsertPoint)100 VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint) 101 : Block(InsertBlock), Point(InsertPoint) {} 102 103 /// Returns true if this insert point is set. isSet()104 bool isSet() const { return Block != nullptr; } 105 getBlock()106 VPBasicBlock *getBlock() const { return Block; } getPoint()107 VPBasicBlock::iterator getPoint() const { return Point; } 108 }; 109 110 /// Sets the current insert point to a previously-saved location. restoreIP(VPInsertPoint IP)111 void restoreIP(VPInsertPoint IP) { 112 if (IP.isSet()) 113 setInsertPoint(IP.getBlock(), IP.getPoint()); 114 else 115 clearInsertionPoint(); 116 } 117 118 /// This specifies that created VPInstructions should be appended to the end 119 /// of the specified block. setInsertPoint(VPBasicBlock * TheBB)120 void setInsertPoint(VPBasicBlock *TheBB) { 121 assert(TheBB && "Attempting to set a null insert point"); 122 BB = TheBB; 123 InsertPt = BB->end(); 124 } 125 126 /// This specifies that created instructions should be inserted at the 127 /// specified point. setInsertPoint(VPBasicBlock * TheBB,VPBasicBlock::iterator IP)128 void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) { 129 BB = TheBB; 130 InsertPt = IP; 131 } 132 133 /// This specifies that created instructions should be inserted at the 134 /// specified point. setInsertPoint(VPRecipeBase * IP)135 void setInsertPoint(VPRecipeBase *IP) { 136 BB = IP->getParent(); 137 InsertPt = IP->getIterator(); 138 } 139 140 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as 141 /// its underlying Instruction. 142 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, 143 Instruction *Inst = nullptr, 144 const Twine &Name = "") { 145 DebugLoc DL; 146 if (Inst) 147 DL = Inst->getDebugLoc(); 148 VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name); 149 NewVPInst->setUnderlyingValue(Inst); 150 return NewVPInst; 151 } 152 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, 153 DebugLoc DL, const Twine &Name = "") { 154 return createInstruction(Opcode, Operands, DL, Name); 155 } 156 157 VPInstruction *createOverflowingOp(unsigned Opcode, 158 std::initializer_list<VPValue *> Operands, 159 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, 160 DebugLoc DL = {}, const Twine &Name = "") { 161 return tryInsertInstruction( 162 new VPInstruction(Opcode, Operands, WrapFlags, DL, Name)); 163 } 164 VPValue *createNot(VPValue *Operand, DebugLoc DL = {}, 165 const Twine &Name = "") { 166 return createInstruction(VPInstruction::Not, {Operand}, DL, Name); 167 } 168 169 VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {}, 170 const Twine &Name = "") { 171 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name); 172 } 173 174 VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {}, 175 const Twine &Name = "") { 176 177 return tryInsertInstruction(new VPInstruction( 178 Instruction::BinaryOps::Or, {LHS, RHS}, 179 VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name)); 180 } 181 182 VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {}, 183 const Twine &Name = "") { 184 return tryInsertInstruction( 185 new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name)); 186 } 187 188 VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, 189 DebugLoc DL = {}, const Twine &Name = "", 190 std::optional<FastMathFlags> FMFs = std::nullopt) { 191 auto *Select = 192 FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, 193 *FMFs, DL, Name) 194 : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, 195 DL, Name); 196 return tryInsertInstruction(Select); 197 } 198 199 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A 200 /// and \p B. 201 /// TODO: add createFCmp when needed. 202 VPValue *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, 203 DebugLoc DL = {}, const Twine &Name = ""); 204 205 //===--------------------------------------------------------------------===// 206 // RAII helpers. 207 //===--------------------------------------------------------------------===// 208 209 /// RAII object that stores the current insertion point and restores it when 210 /// the object is destroyed. 211 class InsertPointGuard { 212 VPBuilder &Builder; 213 VPBasicBlock *Block; 214 VPBasicBlock::iterator Point; 215 216 public: InsertPointGuard(VPBuilder & B)217 InsertPointGuard(VPBuilder &B) 218 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {} 219 220 InsertPointGuard(const InsertPointGuard &) = delete; 221 InsertPointGuard &operator=(const InsertPointGuard &) = delete; 222 ~InsertPointGuard()223 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); } 224 }; 225 }; 226 227 /// TODO: The following VectorizationFactor was pulled out of 228 /// LoopVectorizationCostModel class. LV also deals with 229 /// VectorizerParams::VectorizationFactor. 230 /// We need to streamline them. 231 232 /// Information about vectorization costs. 233 struct VectorizationFactor { 234 /// Vector width with best cost. 235 ElementCount Width; 236 237 /// Cost of the loop with that width. 238 InstructionCost Cost; 239 240 /// Cost of the scalar loop. 241 InstructionCost ScalarCost; 242 243 /// The minimum trip count required to make vectorization profitable, e.g. due 244 /// to runtime checks. 245 ElementCount MinProfitableTripCount; 246 VectorizationFactorVectorizationFactor247 VectorizationFactor(ElementCount Width, InstructionCost Cost, 248 InstructionCost ScalarCost) 249 : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {} 250 251 /// Width 1 means no vectorization, cost 0 means uncomputed cost. DisabledVectorizationFactor252 static VectorizationFactor Disabled() { 253 return {ElementCount::getFixed(1), 0, 0}; 254 } 255 256 bool operator==(const VectorizationFactor &rhs) const { 257 return Width == rhs.Width && Cost == rhs.Cost; 258 } 259 260 bool operator!=(const VectorizationFactor &rhs) const { 261 return !(*this == rhs); 262 } 263 }; 264 265 /// A class that represents two vectorization factors (initialized with 0 by 266 /// default). One for fixed-width vectorization and one for scalable 267 /// vectorization. This can be used by the vectorizer to choose from a range of 268 /// fixed and/or scalable VFs in order to find the most cost-effective VF to 269 /// vectorize with. 270 struct FixedScalableVFPair { 271 ElementCount FixedVF; 272 ElementCount ScalableVF; 273 FixedScalableVFPairFixedScalableVFPair274 FixedScalableVFPair() 275 : FixedVF(ElementCount::getFixed(0)), 276 ScalableVF(ElementCount::getScalable(0)) {} FixedScalableVFPairFixedScalableVFPair277 FixedScalableVFPair(const ElementCount &Max) : FixedScalableVFPair() { 278 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max; 279 } FixedScalableVFPairFixedScalableVFPair280 FixedScalableVFPair(const ElementCount &FixedVF, 281 const ElementCount &ScalableVF) 282 : FixedVF(FixedVF), ScalableVF(ScalableVF) { 283 assert(!FixedVF.isScalable() && ScalableVF.isScalable() && 284 "Invalid scalable properties"); 285 } 286 getNoneFixedScalableVFPair287 static FixedScalableVFPair getNone() { return FixedScalableVFPair(); } 288 289 /// \return true if either fixed- or scalable VF is non-zero. 290 explicit operator bool() const { return FixedVF || ScalableVF; } 291 292 /// \return true if either fixed- or scalable VF is a valid vector VF. hasVectorFixedScalableVFPair293 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); } 294 }; 295 296 /// Planner drives the vectorization process after having passed 297 /// Legality checks. 298 class LoopVectorizationPlanner { 299 /// The loop that we evaluate. 300 Loop *OrigLoop; 301 302 /// Loop Info analysis. 303 LoopInfo *LI; 304 305 /// The dominator tree. 306 DominatorTree *DT; 307 308 /// Target Library Info. 309 const TargetLibraryInfo *TLI; 310 311 /// Target Transform Info. 312 const TargetTransformInfo &TTI; 313 314 /// The legality analysis. 315 LoopVectorizationLegality *Legal; 316 317 /// The profitability analysis. 318 LoopVectorizationCostModel &CM; 319 320 /// The interleaved access analysis. 321 InterleavedAccessInfo &IAI; 322 323 PredicatedScalarEvolution &PSE; 324 325 const LoopVectorizeHints &Hints; 326 327 OptimizationRemarkEmitter *ORE; 328 329 SmallVector<VPlanPtr, 4> VPlans; 330 331 /// Profitable vector factors. 332 SmallVector<VectorizationFactor, 8> ProfitableVFs; 333 334 /// A builder used to construct the current plan. 335 VPBuilder Builder; 336 337 /// Computes the cost of \p Plan for vectorization factor \p VF. 338 /// 339 /// The current implementation requires access to the 340 /// LoopVectorizationLegality to handle inductions and reductions, which is 341 /// why it is kept separate from the VPlan-only cost infrastructure. 342 /// 343 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has 344 /// been retired. 345 InstructionCost cost(VPlan &Plan, ElementCount VF) const; 346 347 public: LoopVectorizationPlanner(Loop * L,LoopInfo * LI,DominatorTree * DT,const TargetLibraryInfo * TLI,const TargetTransformInfo & TTI,LoopVectorizationLegality * Legal,LoopVectorizationCostModel & CM,InterleavedAccessInfo & IAI,PredicatedScalarEvolution & PSE,const LoopVectorizeHints & Hints,OptimizationRemarkEmitter * ORE)348 LoopVectorizationPlanner( 349 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, 350 const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, 351 LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, 352 PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, 353 OptimizationRemarkEmitter *ORE) 354 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), 355 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {} 356 357 /// Plan how to best vectorize, return the best VF and its cost, or 358 /// std::nullopt if vectorization and interleaving should be avoided up front. 359 std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC); 360 361 /// Use the VPlan-native path to plan how to best vectorize, return the best 362 /// VF and its cost. 363 VectorizationFactor planInVPlanNativePath(ElementCount UserVF); 364 365 /// Return the best VPlan for \p VF. 366 VPlan &getBestPlanFor(ElementCount VF) const; 367 368 /// Return the most profitable plan and fix its VF to the most profitable one. 369 VPlan &getBestPlan() const; 370 371 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan 372 /// according to the best selected \p VF and \p UF. 373 /// 374 /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue 375 /// vectorization re-using plans for both the main and epilogue vector loops. 376 /// It should be removed once the re-use issue has been fixed. 377 /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop 378 /// to re-use expansion results generated during main plan execution. 379 /// 380 /// Returns a mapping of SCEVs to their expanded IR values and a mapping for 381 /// the reduction resume values. Note that this is a temporary workaround 382 /// needed due to the current epilogue handling. 383 std::pair<DenseMap<const SCEV *, Value *>, 384 DenseMap<const RecurrenceDescriptor *, Value *>> 385 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, 386 InnerLoopVectorizer &LB, DominatorTree *DT, 387 bool IsEpilogueVectorization, 388 const DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr); 389 390 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 391 void printPlans(raw_ostream &O); 392 #endif 393 394 /// Look through the existing plans and return true if we have one with 395 /// vectorization factor \p VF. hasPlanWithVF(ElementCount VF)396 bool hasPlanWithVF(ElementCount VF) const { 397 return any_of(VPlans, 398 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); }); 399 } 400 401 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying 402 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the 403 /// returned value holds for the entire \p Range. 404 static bool 405 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate, 406 VFRange &Range); 407 408 /// \return The most profitable vectorization factor and the cost of that VF 409 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if 410 /// epilogue vectorization is not supported for the loop. 411 VectorizationFactor 412 selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC); 413 414 protected: 415 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, 416 /// according to the information gathered by Legal when it checked if it is 417 /// legal to vectorize the loop. 418 void buildVPlans(ElementCount MinVF, ElementCount MaxVF); 419 420 private: 421 /// Build a VPlan according to the information gathered by Legal. \return a 422 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End 423 /// exclusive, possibly decreasing \p Range.End. 424 VPlanPtr buildVPlan(VFRange &Range); 425 426 /// Build a VPlan using VPRecipes according to the information gather by 427 /// Legal. This method is only used for the legacy inner loop vectorizer. 428 /// \p Range's largest included VF is restricted to the maximum VF the 429 /// returned VPlan is valid for. If no VPlan can be built for the input range, 430 /// set the largest included VF to the maximum VF for which no plan could be 431 /// built. 432 VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range); 433 434 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, 435 /// according to the information gathered by Legal when it checked if it is 436 /// legal to vectorize the loop. This method creates VPlans using VPRecipes. 437 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF); 438 439 // Adjust the recipes for reductions. For in-loop reductions the chain of 440 // instructions leading from the loop exit instr to the phi need to be 441 // converted to reductions, with one operand being vector and the other being 442 // the scalar reduction chain. For other reductions, a select is introduced 443 // between the phi and live-out recipes when folding the tail. 444 void adjustRecipesForReductions(VPlanPtr &Plan, 445 VPRecipeBuilder &RecipeBuilder, 446 ElementCount MinVF); 447 448 /// \return The most profitable vectorization factor for the available VPlans 449 /// and the cost of that VF. 450 /// This is now only used to verify the decisions by the new VPlan-based 451 /// cost-model and will be retired once the VPlan-based cost-model is 452 /// stabilized. 453 VectorizationFactor selectVectorizationFactor(); 454 455 /// Returns true if the per-lane cost of VectorizationFactor A is lower than 456 /// that of B. 457 bool isMoreProfitable(const VectorizationFactor &A, 458 const VectorizationFactor &B) const; 459 460 /// Determines if we have the infrastructure to vectorize the loop and its 461 /// epilogue, assuming the main loop is vectorized by \p VF. 462 bool isCandidateForEpilogueVectorization(const ElementCount VF) const; 463 }; 464 465 } // namespace llvm 466 467 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 468