xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  ///
9  /// \file
10  /// This file provides a LoopVectorizationPlanner class.
11  /// InnerLoopVectorizer vectorizes loops which contain only one basic
12  /// LoopVectorizationPlanner - drives the vectorization process after having
13  /// passed Legality checks.
14  /// The planner builds and optimizes the Vectorization Plans which record the
15  /// decisions how to vectorize the given loop. In particular, represent the
16  /// control-flow of the vectorized version, the replication of instructions that
17  /// are to be scalarized, and interleave access groups.
18  ///
19  /// Also provides a VPlan-based builder utility analogous to IRBuilder.
20  /// It provides an instruction-level API for generating VPInstructions while
21  /// abstracting away the Recipe manipulation details.
22  //===----------------------------------------------------------------------===//
23  
24  #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25  #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26  
27  #include "VPlan.h"
28  #include "llvm/ADT/SmallSet.h"
29  #include "llvm/Support/InstructionCost.h"
30  
31  namespace llvm {
32  
33  class LoopInfo;
34  class DominatorTree;
35  class LoopVectorizationLegality;
36  class LoopVectorizationCostModel;
37  class PredicatedScalarEvolution;
38  class LoopVectorizeHints;
39  class OptimizationRemarkEmitter;
40  class TargetTransformInfo;
41  class TargetLibraryInfo;
42  class VPRecipeBuilder;
43  
44  /// VPlan-based builder utility analogous to IRBuilder.
45  class VPBuilder {
46    VPBasicBlock *BB = nullptr;
47    VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
48  
49    /// Insert \p VPI in BB at InsertPt if BB is set.
tryInsertInstruction(VPInstruction * VPI)50    VPInstruction *tryInsertInstruction(VPInstruction *VPI) {
51      if (BB)
52        BB->insert(VPI, InsertPt);
53      return VPI;
54    }
55  
56    VPInstruction *createInstruction(unsigned Opcode,
57                                     ArrayRef<VPValue *> Operands, DebugLoc DL,
58                                     const Twine &Name = "") {
59      return tryInsertInstruction(new VPInstruction(Opcode, Operands, DL, Name));
60    }
61  
62    VPInstruction *createInstruction(unsigned Opcode,
63                                     std::initializer_list<VPValue *> Operands,
64                                     DebugLoc DL, const Twine &Name = "") {
65      return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name);
66    }
67  
68  public:
69    VPBuilder() = default;
VPBuilder(VPBasicBlock * InsertBB)70    VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
VPBuilder(VPRecipeBase * InsertPt)71    VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
72  
73    /// Clear the insertion point: created instructions will not be inserted into
74    /// a block.
clearInsertionPoint()75    void clearInsertionPoint() {
76      BB = nullptr;
77      InsertPt = VPBasicBlock::iterator();
78    }
79  
getInsertBlock()80    VPBasicBlock *getInsertBlock() const { return BB; }
getInsertPoint()81    VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
82  
83    /// Create a VPBuilder to insert after \p R.
getToInsertAfter(VPRecipeBase * R)84    static VPBuilder getToInsertAfter(VPRecipeBase *R) {
85      VPBuilder B;
86      B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
87      return B;
88    }
89  
90    /// InsertPoint - A saved insertion point.
91    class VPInsertPoint {
92      VPBasicBlock *Block = nullptr;
93      VPBasicBlock::iterator Point;
94  
95    public:
96      /// Creates a new insertion point which doesn't point to anything.
97      VPInsertPoint() = default;
98  
99      /// Creates a new insertion point at the given location.
VPInsertPoint(VPBasicBlock * InsertBlock,VPBasicBlock::iterator InsertPoint)100      VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
101          : Block(InsertBlock), Point(InsertPoint) {}
102  
103      /// Returns true if this insert point is set.
isSet()104      bool isSet() const { return Block != nullptr; }
105  
getBlock()106      VPBasicBlock *getBlock() const { return Block; }
getPoint()107      VPBasicBlock::iterator getPoint() const { return Point; }
108    };
109  
110    /// Sets the current insert point to a previously-saved location.
restoreIP(VPInsertPoint IP)111    void restoreIP(VPInsertPoint IP) {
112      if (IP.isSet())
113        setInsertPoint(IP.getBlock(), IP.getPoint());
114      else
115        clearInsertionPoint();
116    }
117  
118    /// This specifies that created VPInstructions should be appended to the end
119    /// of the specified block.
setInsertPoint(VPBasicBlock * TheBB)120    void setInsertPoint(VPBasicBlock *TheBB) {
121      assert(TheBB && "Attempting to set a null insert point");
122      BB = TheBB;
123      InsertPt = BB->end();
124    }
125  
126    /// This specifies that created instructions should be inserted at the
127    /// specified point.
setInsertPoint(VPBasicBlock * TheBB,VPBasicBlock::iterator IP)128    void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
129      BB = TheBB;
130      InsertPt = IP;
131    }
132  
133    /// This specifies that created instructions should be inserted at the
134    /// specified point.
setInsertPoint(VPRecipeBase * IP)135    void setInsertPoint(VPRecipeBase *IP) {
136      BB = IP->getParent();
137      InsertPt = IP->getIterator();
138    }
139  
140    /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
141    /// its underlying Instruction.
142    VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
143                                Instruction *Inst = nullptr,
144                                const Twine &Name = "") {
145      DebugLoc DL;
146      if (Inst)
147        DL = Inst->getDebugLoc();
148      VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name);
149      NewVPInst->setUnderlyingValue(Inst);
150      return NewVPInst;
151    }
152    VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
153                                DebugLoc DL, const Twine &Name = "") {
154      return createInstruction(Opcode, Operands, DL, Name);
155    }
156  
157    VPInstruction *createOverflowingOp(unsigned Opcode,
158                                       std::initializer_list<VPValue *> Operands,
159                                       VPRecipeWithIRFlags::WrapFlagsTy WrapFlags,
160                                       DebugLoc DL = {}, const Twine &Name = "") {
161      return tryInsertInstruction(
162          new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
163    }
164    VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
165                       const Twine &Name = "") {
166      return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
167    }
168  
169    VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
170                       const Twine &Name = "") {
171      return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
172    }
173  
174    VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
175                      const Twine &Name = "") {
176  
177      return tryInsertInstruction(new VPInstruction(
178          Instruction::BinaryOps::Or, {LHS, RHS},
179          VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
180    }
181  
182    VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
183                              const Twine &Name = "") {
184      return tryInsertInstruction(
185          new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
186    }
187  
188    VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
189                          DebugLoc DL = {}, const Twine &Name = "",
190                          std::optional<FastMathFlags> FMFs = std::nullopt) {
191      auto *Select =
192          FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
193                                   *FMFs, DL, Name)
194               : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
195                                   DL, Name);
196      return tryInsertInstruction(Select);
197    }
198  
199    /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
200    /// and \p B.
201    /// TODO: add createFCmp when needed.
202    VPValue *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
203                        DebugLoc DL = {}, const Twine &Name = "");
204  
205    //===--------------------------------------------------------------------===//
206    // RAII helpers.
207    //===--------------------------------------------------------------------===//
208  
209    /// RAII object that stores the current insertion point and restores it when
210    /// the object is destroyed.
211    class InsertPointGuard {
212      VPBuilder &Builder;
213      VPBasicBlock *Block;
214      VPBasicBlock::iterator Point;
215  
216    public:
InsertPointGuard(VPBuilder & B)217      InsertPointGuard(VPBuilder &B)
218          : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
219  
220      InsertPointGuard(const InsertPointGuard &) = delete;
221      InsertPointGuard &operator=(const InsertPointGuard &) = delete;
222  
~InsertPointGuard()223      ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
224    };
225  };
226  
227  /// TODO: The following VectorizationFactor was pulled out of
228  /// LoopVectorizationCostModel class. LV also deals with
229  /// VectorizerParams::VectorizationFactor.
230  /// We need to streamline them.
231  
232  /// Information about vectorization costs.
233  struct VectorizationFactor {
234    /// Vector width with best cost.
235    ElementCount Width;
236  
237    /// Cost of the loop with that width.
238    InstructionCost Cost;
239  
240    /// Cost of the scalar loop.
241    InstructionCost ScalarCost;
242  
243    /// The minimum trip count required to make vectorization profitable, e.g. due
244    /// to runtime checks.
245    ElementCount MinProfitableTripCount;
246  
VectorizationFactorVectorizationFactor247    VectorizationFactor(ElementCount Width, InstructionCost Cost,
248                        InstructionCost ScalarCost)
249        : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {}
250  
251    /// Width 1 means no vectorization, cost 0 means uncomputed cost.
DisabledVectorizationFactor252    static VectorizationFactor Disabled() {
253      return {ElementCount::getFixed(1), 0, 0};
254    }
255  
256    bool operator==(const VectorizationFactor &rhs) const {
257      return Width == rhs.Width && Cost == rhs.Cost;
258    }
259  
260    bool operator!=(const VectorizationFactor &rhs) const {
261      return !(*this == rhs);
262    }
263  };
264  
265  /// A class that represents two vectorization factors (initialized with 0 by
266  /// default). One for fixed-width vectorization and one for scalable
267  /// vectorization. This can be used by the vectorizer to choose from a range of
268  /// fixed and/or scalable VFs in order to find the most cost-effective VF to
269  /// vectorize with.
270  struct FixedScalableVFPair {
271    ElementCount FixedVF;
272    ElementCount ScalableVF;
273  
FixedScalableVFPairFixedScalableVFPair274    FixedScalableVFPair()
275        : FixedVF(ElementCount::getFixed(0)),
276          ScalableVF(ElementCount::getScalable(0)) {}
FixedScalableVFPairFixedScalableVFPair277    FixedScalableVFPair(const ElementCount &Max) : FixedScalableVFPair() {
278      *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
279    }
FixedScalableVFPairFixedScalableVFPair280    FixedScalableVFPair(const ElementCount &FixedVF,
281                        const ElementCount &ScalableVF)
282        : FixedVF(FixedVF), ScalableVF(ScalableVF) {
283      assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
284             "Invalid scalable properties");
285    }
286  
getNoneFixedScalableVFPair287    static FixedScalableVFPair getNone() { return FixedScalableVFPair(); }
288  
289    /// \return true if either fixed- or scalable VF is non-zero.
290    explicit operator bool() const { return FixedVF || ScalableVF; }
291  
292    /// \return true if either fixed- or scalable VF is a valid vector VF.
hasVectorFixedScalableVFPair293    bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
294  };
295  
296  /// Planner drives the vectorization process after having passed
297  /// Legality checks.
298  class LoopVectorizationPlanner {
299    /// The loop that we evaluate.
300    Loop *OrigLoop;
301  
302    /// Loop Info analysis.
303    LoopInfo *LI;
304  
305    /// The dominator tree.
306    DominatorTree *DT;
307  
308    /// Target Library Info.
309    const TargetLibraryInfo *TLI;
310  
311    /// Target Transform Info.
312    const TargetTransformInfo &TTI;
313  
314    /// The legality analysis.
315    LoopVectorizationLegality *Legal;
316  
317    /// The profitability analysis.
318    LoopVectorizationCostModel &CM;
319  
320    /// The interleaved access analysis.
321    InterleavedAccessInfo &IAI;
322  
323    PredicatedScalarEvolution &PSE;
324  
325    const LoopVectorizeHints &Hints;
326  
327    OptimizationRemarkEmitter *ORE;
328  
329    SmallVector<VPlanPtr, 4> VPlans;
330  
331    /// Profitable vector factors.
332    SmallVector<VectorizationFactor, 8> ProfitableVFs;
333  
334    /// A builder used to construct the current plan.
335    VPBuilder Builder;
336  
337    /// Computes the cost of \p Plan for vectorization factor \p VF.
338    ///
339    /// The current implementation requires access to the
340    /// LoopVectorizationLegality to handle inductions and reductions, which is
341    /// why it is kept separate from the VPlan-only cost infrastructure.
342    ///
343    /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
344    /// been retired.
345    InstructionCost cost(VPlan &Plan, ElementCount VF) const;
346  
347  public:
LoopVectorizationPlanner(Loop * L,LoopInfo * LI,DominatorTree * DT,const TargetLibraryInfo * TLI,const TargetTransformInfo & TTI,LoopVectorizationLegality * Legal,LoopVectorizationCostModel & CM,InterleavedAccessInfo & IAI,PredicatedScalarEvolution & PSE,const LoopVectorizeHints & Hints,OptimizationRemarkEmitter * ORE)348    LoopVectorizationPlanner(
349        Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
350        const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal,
351        LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI,
352        PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints,
353        OptimizationRemarkEmitter *ORE)
354        : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
355          IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
356  
357    /// Plan how to best vectorize, return the best VF and its cost, or
358    /// std::nullopt if vectorization and interleaving should be avoided up front.
359    std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
360  
361    /// Use the VPlan-native path to plan how to best vectorize, return the best
362    /// VF and its cost.
363    VectorizationFactor planInVPlanNativePath(ElementCount UserVF);
364  
365    /// Return the best VPlan for \p VF.
366    VPlan &getBestPlanFor(ElementCount VF) const;
367  
368    /// Return the most profitable plan and fix its VF to the most profitable one.
369    VPlan &getBestPlan() const;
370  
371    /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
372    /// according to the best selected \p VF and  \p UF.
373    ///
374    /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue
375    /// vectorization re-using plans for both the main and epilogue vector loops.
376    /// It should be removed once the re-use issue has been fixed.
377    /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop
378    /// to re-use expansion results generated during main plan execution.
379    ///
380    /// Returns a mapping of SCEVs to their expanded IR values and a mapping for
381    /// the reduction resume values. Note that this is a temporary workaround
382    /// needed due to the current epilogue handling.
383    std::pair<DenseMap<const SCEV *, Value *>,
384              DenseMap<const RecurrenceDescriptor *, Value *>>
385    executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
386                InnerLoopVectorizer &LB, DominatorTree *DT,
387                bool IsEpilogueVectorization,
388                const DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);
389  
390  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
391    void printPlans(raw_ostream &O);
392  #endif
393  
394    /// Look through the existing plans and return true if we have one with
395    /// vectorization factor \p VF.
hasPlanWithVF(ElementCount VF)396    bool hasPlanWithVF(ElementCount VF) const {
397      return any_of(VPlans,
398                    [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
399    }
400  
401    /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
402    /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
403    /// returned value holds for the entire \p Range.
404    static bool
405    getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
406                             VFRange &Range);
407  
408    /// \return The most profitable vectorization factor and the cost of that VF
409    /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
410    /// epilogue vectorization is not supported for the loop.
411    VectorizationFactor
412    selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC);
413  
414  protected:
415    /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
416    /// according to the information gathered by Legal when it checked if it is
417    /// legal to vectorize the loop.
418    void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
419  
420  private:
421    /// Build a VPlan according to the information gathered by Legal. \return a
422    /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
423    /// exclusive, possibly decreasing \p Range.End.
424    VPlanPtr buildVPlan(VFRange &Range);
425  
426    /// Build a VPlan using VPRecipes according to the information gather by
427    /// Legal. This method is only used for the legacy inner loop vectorizer.
428    /// \p Range's largest included VF is restricted to the maximum VF the
429    /// returned VPlan is valid for. If no VPlan can be built for the input range,
430    /// set the largest included VF to the maximum VF for which no plan could be
431    /// built.
432    VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);
433  
434    /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
435    /// according to the information gathered by Legal when it checked if it is
436    /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
437    void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
438  
439    // Adjust the recipes for reductions. For in-loop reductions the chain of
440    // instructions leading from the loop exit instr to the phi need to be
441    // converted to reductions, with one operand being vector and the other being
442    // the scalar reduction chain. For other reductions, a select is introduced
443    // between the phi and live-out recipes when folding the tail.
444    void adjustRecipesForReductions(VPlanPtr &Plan,
445                                    VPRecipeBuilder &RecipeBuilder,
446                                    ElementCount MinVF);
447  
448    /// \return The most profitable vectorization factor for the available VPlans
449    /// and the cost of that VF.
450    /// This is now only used to verify the decisions by the new VPlan-based
451    /// cost-model and will be retired once the VPlan-based cost-model is
452    /// stabilized.
453    VectorizationFactor selectVectorizationFactor();
454  
455    /// Returns true if the per-lane cost of VectorizationFactor A is lower than
456    /// that of B.
457    bool isMoreProfitable(const VectorizationFactor &A,
458                          const VectorizationFactor &B) const;
459  
460    /// Determines if we have the infrastructure to vectorize the loop and its
461    /// epilogue, assuming the main loop is vectorized by \p VF.
462    bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
463  };
464  
465  } // namespace llvm
466  
467  #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
468