xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- VPRecipeBuilder.h - Helper class to build recipes --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H
10 #define LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H
11 
12 #include "LoopVectorizationPlanner.h"
13 #include "VPlan.h"
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
16 
17 namespace llvm {
18 
19 class LoopVectorizationLegality;
20 class LoopVectorizationCostModel;
21 class TargetLibraryInfo;
22 class TargetTransformInfo;
23 struct HistogramInfo;
24 struct VFRange;
25 
26 /// A chain of instructions that form a partial reduction.
27 /// Designed to match either:
28 ///   reduction_bin_op (extend (A), accumulator), or
29 ///   reduction_bin_op (bin_op (extend (A), (extend (B))), accumulator).
30 struct PartialReductionChain {
PartialReductionChainPartialReductionChain31   PartialReductionChain(Instruction *Reduction, Instruction *ExtendA,
32                         Instruction *ExtendB, Instruction *ExtendUser)
33       : Reduction(Reduction), ExtendA(ExtendA), ExtendB(ExtendB),
34         ExtendUser(ExtendUser) {}
35   /// The top-level binary operation that forms the reduction to a scalar
36   /// after the loop body.
37   Instruction *Reduction;
38   /// The extension of each of the inner binary operation's operands.
39   Instruction *ExtendA;
40   Instruction *ExtendB;
41 
42   /// The user of the extends that is then reduced.
43   Instruction *ExtendUser;
44 };
45 
46 /// Helper class to create VPRecipies from IR instructions.
47 class VPRecipeBuilder {
48   /// The VPlan new recipes are added to.
49   VPlan &Plan;
50 
51   /// The loop that we evaluate.
52   Loop *OrigLoop;
53 
54   /// Target Library Info.
55   const TargetLibraryInfo *TLI;
56 
57   // Target Transform Info.
58   const TargetTransformInfo *TTI;
59 
60   /// The legality analysis.
61   LoopVectorizationLegality *Legal;
62 
63   /// The profitablity analysis.
64   LoopVectorizationCostModel &CM;
65 
66   PredicatedScalarEvolution &PSE;
67 
68   VPBuilder &Builder;
69 
70   /// The mask of each VPBB, generated earlier and used for predicating recipes
71   /// in VPBB.
72   /// TODO: remove by applying predication when generating the masks.
73   DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache;
74 
75   // VPlan construction support: Hold a mapping from ingredients to
76   // their recipe.
77   DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
78 
79   /// Cross-iteration reduction & first-order recurrence phis for which we need
80   /// to add the incoming value from the backedge after all recipes have been
81   /// created.
82   SmallVector<VPHeaderPHIRecipe *, 4> PhisToFix;
83 
84   /// A mapping of partial reduction exit instructions to their scaling factor.
85   DenseMap<const Instruction *, unsigned> ScaledReductionMap;
86 
87   /// Loop versioning instance for getting noalias metadata guaranteed by
88   /// runtime checks.
89   LoopVersioning *LVer;
90 
91   /// Check if \p I can be widened at the start of \p Range and possibly
92   /// decrease the range such that the returned value holds for the entire \p
93   /// Range. The function should not be called for memory instructions or calls.
94   bool shouldWiden(Instruction *I, VFRange &Range) const;
95 
96   /// Check if the load or store instruction \p I should widened for \p
97   /// Range.Start and potentially masked. Such instructions are handled by a
98   /// recipe that takes an additional VPInstruction for the mask.
99   VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
100                                         ArrayRef<VPValue *> Operands,
101                                         VFRange &Range);
102 
103   /// Check if an induction recipe should be constructed for \p Phi. If so build
104   /// and return it. If not, return null.
105   VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi,
106                                                ArrayRef<VPValue *> Operands,
107                                                VFRange &Range);
108 
109   /// Optimize the special case where the operand of \p I is a constant integer
110   /// induction variable.
111   VPWidenIntOrFpInductionRecipe *
112   tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
113                                  VFRange &Range);
114 
115   /// Handle call instructions. If \p CI can be widened for \p Range.Start,
116   /// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
117   /// decreased to ensure same decision from \p Range.Start to \p Range.End.
118   VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
119                                     VFRange &Range);
120 
121   /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
122   /// if it can. The function should only be called if the cost-model indicates
123   /// that widening should be performed.
124   VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands);
125 
126   /// Makes Histogram count operations safe for vectorization, by emitting a
127   /// llvm.experimental.vector.histogram.add intrinsic in place of the
128   /// Load + Add|Sub + Store operations that perform the histogram in the
129   /// original scalar loop.
130   VPHistogramRecipe *tryToWidenHistogram(const HistogramInfo *HI,
131                                          ArrayRef<VPValue *> Operands);
132 
133   /// Examines reduction operations to see if the target can use a cheaper
134   /// operation with a wider per-iteration input VF and narrower PHI VF.
135   /// Each element within Chains is a pair with a struct containing reduction
136   /// information and the scaling factor between the number of elements in
137   /// the input and output.
138   /// Recursively calls itself to identify chained scaled reductions.
139   /// Returns true if this invocation added an entry to Chains, otherwise false.
140   /// i.e. returns false in the case that a subcall adds an entry to Chains,
141   /// but the top-level call does not.
142   bool getScaledReductions(
143       Instruction *PHI, Instruction *RdxExitInstr, VFRange &Range,
144       SmallVectorImpl<std::pair<PartialReductionChain, unsigned>> &Chains);
145 
146 public:
VPRecipeBuilder(VPlan & Plan,Loop * OrigLoop,const TargetLibraryInfo * TLI,const TargetTransformInfo * TTI,LoopVectorizationLegality * Legal,LoopVectorizationCostModel & CM,PredicatedScalarEvolution & PSE,VPBuilder & Builder,DenseMap<VPBasicBlock *,VPValue * > & BlockMaskCache,LoopVersioning * LVer)147   VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI,
148                   const TargetTransformInfo *TTI,
149                   LoopVectorizationLegality *Legal,
150                   LoopVectorizationCostModel &CM,
151                   PredicatedScalarEvolution &PSE, VPBuilder &Builder,
152                   DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
153                   LoopVersioning *LVer)
154       : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
155         CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache),
156         LVer(LVer) {}
157 
getScalingForReduction(const Instruction * ExitInst)158   std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
159     auto It = ScaledReductionMap.find(ExitInst);
160     return It == ScaledReductionMap.end() ? std::nullopt
161                                           : std::make_optional(It->second);
162   }
163 
164   /// Find all possible partial reductions in the loop and track all of those
165   /// that are valid so recipes can be formed later.
166   void collectScaledReductions(VFRange &Range);
167 
168   /// Create and return a widened recipe for \p R if one can be created within
169   /// the given VF \p Range.
170   VPRecipeBase *tryToCreateWidenRecipe(VPSingleDefRecipe *R, VFRange &Range);
171 
172   /// Create and return a partial reduction recipe for a reduction instruction
173   /// along with binary operation and reduction phi operands.
174   VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction,
175                                             ArrayRef<VPValue *> Operands,
176                                             unsigned ScaleFactor);
177 
178   /// Set the recipe created for given ingredient.
setRecipe(Instruction * I,VPRecipeBase * R)179   void setRecipe(Instruction *I, VPRecipeBase *R) {
180     assert(!Ingredient2Recipe.contains(I) &&
181            "Cannot reset recipe for instruction.");
182     Ingredient2Recipe[I] = R;
183   }
184 
185   /// Returns the *entry* mask for block \p VPBB or null if the mask is
186   /// all-true.
getBlockInMask(VPBasicBlock * VPBB)187   VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
188     return BlockMaskCache.lookup(VPBB);
189   }
190 
191   /// Return the recipe created for given ingredient.
getRecipe(Instruction * I)192   VPRecipeBase *getRecipe(Instruction *I) {
193     assert(Ingredient2Recipe.count(I) &&
194            "Recording this ingredients recipe was not requested");
195     assert(Ingredient2Recipe[I] != nullptr &&
196            "Ingredient doesn't have a recipe");
197     return Ingredient2Recipe[I];
198   }
199 
200   /// Build a VPReplicationRecipe for \p I using \p Operands. If it is
201   /// predicated, add the mask as last operand. Range.End may be decreased to
202   /// ensure same recipe behavior from \p Range.Start to \p Range.End.
203   VPReplicateRecipe *handleReplication(Instruction *I,
204                                        ArrayRef<VPValue *> Operands,
205                                        VFRange &Range);
206 
getVPValueOrAddLiveIn(Value * V)207   VPValue *getVPValueOrAddLiveIn(Value *V) {
208     if (auto *I = dyn_cast<Instruction>(V)) {
209       if (auto *R = Ingredient2Recipe.lookup(I))
210         return R->getVPSingleValue();
211     }
212     return Plan.getOrAddLiveIn(V);
213   }
214 
updateBlockMaskCache(DenseMap<VPValue *,VPValue * > & Old2New)215   void updateBlockMaskCache(DenseMap<VPValue *, VPValue *> &Old2New) {
216     for (auto &[_, V] : BlockMaskCache) {
217       if (auto *New = Old2New.lookup(V)) {
218         V->replaceAllUsesWith(New);
219         V = New;
220       }
221     }
222   }
223 };
224 } // end namespace llvm
225 
226 #endif // LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H
227