1 //===- VPRecipeBuilder.h - Helper class to build recipes --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H 10 #define LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H 11 12 #include "LoopVectorizationPlanner.h" 13 #include "VPlan.h" 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 16 17 namespace llvm { 18 19 class LoopVectorizationLegality; 20 class LoopVectorizationCostModel; 21 class TargetLibraryInfo; 22 class TargetTransformInfo; 23 struct HistogramInfo; 24 struct VFRange; 25 26 /// A chain of instructions that form a partial reduction. 27 /// Designed to match either: 28 /// reduction_bin_op (extend (A), accumulator), or 29 /// reduction_bin_op (bin_op (extend (A), (extend (B))), accumulator). 30 struct PartialReductionChain { PartialReductionChainPartialReductionChain31 PartialReductionChain(Instruction *Reduction, Instruction *ExtendA, 32 Instruction *ExtendB, Instruction *ExtendUser) 33 : Reduction(Reduction), ExtendA(ExtendA), ExtendB(ExtendB), 34 ExtendUser(ExtendUser) {} 35 /// The top-level binary operation that forms the reduction to a scalar 36 /// after the loop body. 37 Instruction *Reduction; 38 /// The extension of each of the inner binary operation's operands. 39 Instruction *ExtendA; 40 Instruction *ExtendB; 41 42 /// The user of the extends that is then reduced. 43 Instruction *ExtendUser; 44 }; 45 46 /// Helper class to create VPRecipies from IR instructions. 47 class VPRecipeBuilder { 48 /// The VPlan new recipes are added to. 49 VPlan &Plan; 50 51 /// The loop that we evaluate. 52 Loop *OrigLoop; 53 54 /// Target Library Info. 55 const TargetLibraryInfo *TLI; 56 57 // Target Transform Info. 58 const TargetTransformInfo *TTI; 59 60 /// The legality analysis. 61 LoopVectorizationLegality *Legal; 62 63 /// The profitablity analysis. 64 LoopVectorizationCostModel &CM; 65 66 PredicatedScalarEvolution &PSE; 67 68 VPBuilder &Builder; 69 70 /// The mask of each VPBB, generated earlier and used for predicating recipes 71 /// in VPBB. 72 /// TODO: remove by applying predication when generating the masks. 73 DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache; 74 75 // VPlan construction support: Hold a mapping from ingredients to 76 // their recipe. 77 DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe; 78 79 /// Cross-iteration reduction & first-order recurrence phis for which we need 80 /// to add the incoming value from the backedge after all recipes have been 81 /// created. 82 SmallVector<VPHeaderPHIRecipe *, 4> PhisToFix; 83 84 /// A mapping of partial reduction exit instructions to their scaling factor. 85 DenseMap<const Instruction *, unsigned> ScaledReductionMap; 86 87 /// Loop versioning instance for getting noalias metadata guaranteed by 88 /// runtime checks. 89 LoopVersioning *LVer; 90 91 /// Check if \p I can be widened at the start of \p Range and possibly 92 /// decrease the range such that the returned value holds for the entire \p 93 /// Range. The function should not be called for memory instructions or calls. 94 bool shouldWiden(Instruction *I, VFRange &Range) const; 95 96 /// Check if the load or store instruction \p I should widened for \p 97 /// Range.Start and potentially masked. Such instructions are handled by a 98 /// recipe that takes an additional VPInstruction for the mask. 99 VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I, 100 ArrayRef<VPValue *> Operands, 101 VFRange &Range); 102 103 /// Check if an induction recipe should be constructed for \p Phi. If so build 104 /// and return it. If not, return null. 105 VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi, 106 ArrayRef<VPValue *> Operands, 107 VFRange &Range); 108 109 /// Optimize the special case where the operand of \p I is a constant integer 110 /// induction variable. 111 VPWidenIntOrFpInductionRecipe * 112 tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands, 113 VFRange &Range); 114 115 /// Handle call instructions. If \p CI can be widened for \p Range.Start, 116 /// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be 117 /// decreased to ensure same decision from \p Range.Start to \p Range.End. 118 VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands, 119 VFRange &Range); 120 121 /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe 122 /// if it can. The function should only be called if the cost-model indicates 123 /// that widening should be performed. 124 VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands); 125 126 /// Makes Histogram count operations safe for vectorization, by emitting a 127 /// llvm.experimental.vector.histogram.add intrinsic in place of the 128 /// Load + Add|Sub + Store operations that perform the histogram in the 129 /// original scalar loop. 130 VPHistogramRecipe *tryToWidenHistogram(const HistogramInfo *HI, 131 ArrayRef<VPValue *> Operands); 132 133 /// Examines reduction operations to see if the target can use a cheaper 134 /// operation with a wider per-iteration input VF and narrower PHI VF. 135 /// Each element within Chains is a pair with a struct containing reduction 136 /// information and the scaling factor between the number of elements in 137 /// the input and output. 138 /// Recursively calls itself to identify chained scaled reductions. 139 /// Returns true if this invocation added an entry to Chains, otherwise false. 140 /// i.e. returns false in the case that a subcall adds an entry to Chains, 141 /// but the top-level call does not. 142 bool getScaledReductions( 143 Instruction *PHI, Instruction *RdxExitInstr, VFRange &Range, 144 SmallVectorImpl<std::pair<PartialReductionChain, unsigned>> &Chains); 145 146 public: VPRecipeBuilder(VPlan & Plan,Loop * OrigLoop,const TargetLibraryInfo * TLI,const TargetTransformInfo * TTI,LoopVectorizationLegality * Legal,LoopVectorizationCostModel & CM,PredicatedScalarEvolution & PSE,VPBuilder & Builder,DenseMap<VPBasicBlock *,VPValue * > & BlockMaskCache,LoopVersioning * LVer)147 VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI, 148 const TargetTransformInfo *TTI, 149 LoopVectorizationLegality *Legal, 150 LoopVectorizationCostModel &CM, 151 PredicatedScalarEvolution &PSE, VPBuilder &Builder, 152 DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache, 153 LoopVersioning *LVer) 154 : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), 155 CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache), 156 LVer(LVer) {} 157 getScalingForReduction(const Instruction * ExitInst)158 std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) { 159 auto It = ScaledReductionMap.find(ExitInst); 160 return It == ScaledReductionMap.end() ? std::nullopt 161 : std::make_optional(It->second); 162 } 163 164 /// Find all possible partial reductions in the loop and track all of those 165 /// that are valid so recipes can be formed later. 166 void collectScaledReductions(VFRange &Range); 167 168 /// Create and return a widened recipe for \p R if one can be created within 169 /// the given VF \p Range. 170 VPRecipeBase *tryToCreateWidenRecipe(VPSingleDefRecipe *R, VFRange &Range); 171 172 /// Create and return a partial reduction recipe for a reduction instruction 173 /// along with binary operation and reduction phi operands. 174 VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction, 175 ArrayRef<VPValue *> Operands, 176 unsigned ScaleFactor); 177 178 /// Set the recipe created for given ingredient. setRecipe(Instruction * I,VPRecipeBase * R)179 void setRecipe(Instruction *I, VPRecipeBase *R) { 180 assert(!Ingredient2Recipe.contains(I) && 181 "Cannot reset recipe for instruction."); 182 Ingredient2Recipe[I] = R; 183 } 184 185 /// Returns the *entry* mask for block \p VPBB or null if the mask is 186 /// all-true. getBlockInMask(VPBasicBlock * VPBB)187 VPValue *getBlockInMask(VPBasicBlock *VPBB) const { 188 return BlockMaskCache.lookup(VPBB); 189 } 190 191 /// Return the recipe created for given ingredient. getRecipe(Instruction * I)192 VPRecipeBase *getRecipe(Instruction *I) { 193 assert(Ingredient2Recipe.count(I) && 194 "Recording this ingredients recipe was not requested"); 195 assert(Ingredient2Recipe[I] != nullptr && 196 "Ingredient doesn't have a recipe"); 197 return Ingredient2Recipe[I]; 198 } 199 200 /// Build a VPReplicationRecipe for \p I using \p Operands. If it is 201 /// predicated, add the mask as last operand. Range.End may be decreased to 202 /// ensure same recipe behavior from \p Range.Start to \p Range.End. 203 VPReplicateRecipe *handleReplication(Instruction *I, 204 ArrayRef<VPValue *> Operands, 205 VFRange &Range); 206 getVPValueOrAddLiveIn(Value * V)207 VPValue *getVPValueOrAddLiveIn(Value *V) { 208 if (auto *I = dyn_cast<Instruction>(V)) { 209 if (auto *R = Ingredient2Recipe.lookup(I)) 210 return R->getVPSingleValue(); 211 } 212 return Plan.getOrAddLiveIn(V); 213 } 214 updateBlockMaskCache(DenseMap<VPValue *,VPValue * > & Old2New)215 void updateBlockMaskCache(DenseMap<VPValue *, VPValue *> &Old2New) { 216 for (auto &[_, V] : BlockMaskCache) { 217 if (auto *New = Old2New.lookup(V)) { 218 V->replaceAllUsesWith(New); 219 V = New; 220 } 221 } 222 } 223 }; 224 } // end namespace llvm 225 226 #endif // LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H 227