1 //===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file provides utility VPlan to VPlan transformations. 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H 14 #define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H 15 16 #include "VPlan.h" 17 #include "VPlanVerifier.h" 18 #include "llvm/ADT/STLFunctionalExtras.h" 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/Compiler.h" 21 22 namespace llvm { 23 24 class InductionDescriptor; 25 class Instruction; 26 class PHINode; 27 class ScalarEvolution; 28 class PredicatedScalarEvolution; 29 class TargetLibraryInfo; 30 class VPBuilder; 31 class VPRecipeBuilder; 32 struct VFRange; 33 34 extern cl::opt<bool> VerifyEachVPlan; 35 36 struct VPlanTransforms { 37 /// Helper to run a VPlan transform \p Transform on \p VPlan, forwarding extra 38 /// arguments to the transform. Returns the boolean returned by the transform. 39 template <typename... ArgsTy> runPassVPlanTransforms40 static bool runPass(bool (*Transform)(VPlan &, ArgsTy...), VPlan &Plan, 41 typename std::remove_reference<ArgsTy>::type &...Args) { 42 bool Res = Transform(Plan, Args...); 43 if (VerifyEachVPlan) 44 verifyVPlanIsValid(Plan); 45 return Res; 46 } 47 /// Helper to run a VPlan transform \p Transform on \p VPlan, forwarding extra 48 /// arguments to the transform. 49 template <typename... ArgsTy> runPassVPlanTransforms50 static void runPass(void (*Fn)(VPlan &, ArgsTy...), VPlan &Plan, 51 typename std::remove_reference<ArgsTy>::type &...Args) { 52 Fn(Plan, Args...); 53 if (VerifyEachVPlan) 54 verifyVPlanIsValid(Plan); 55 } 56 57 LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan> buildPlainCFG(Loop *TheLoop, 58 LoopInfo &LI); 59 60 /// Prepare the plan for vectorization. It will introduce a dedicated 61 /// VPBasicBlock for the vector pre-header as well as a VPBasicBlock as exit 62 /// block of the main vector loop (middle.block). If a check is needed to 63 /// guard executing the scalar epilogue loop, it will be added to the middle 64 /// block, together with VPBasicBlocks for the scalar preheader and exit 65 /// blocks. \p InductionTy is the type of the canonical induction and used for 66 /// related values, like the trip count expression. It also creates a VPValue 67 /// expression for the original trip count. 68 LLVM_ABI_FOR_TEST static void prepareForVectorization( 69 VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, 70 bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop, 71 DebugLoc IVDL, bool HasUncountableExit, VFRange &Range); 72 73 /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's 74 /// flat CFG into a hierarchical CFG. 75 LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan); 76 77 /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a 78 /// VPValue and connect the block to \p Plan, using the VPValue as branch 79 /// condition. 80 static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, 81 bool AddBranchWeights); 82 83 /// Replaces the VPInstructions in \p Plan with corresponding 84 /// widen recipes. Returns false if any VPInstructions could not be converted 85 /// to a wide recipe if needed. 86 LLVM_ABI_FOR_TEST static bool tryToConvertVPInstructionsToVPRecipes( 87 VPlanPtr &Plan, 88 function_ref<const InductionDescriptor *(PHINode *)> 89 GetIntOrFpInductionDescriptor, 90 ScalarEvolution &SE, const TargetLibraryInfo &TLI); 91 92 /// Try to have all users of fixed-order recurrences appear after the recipe 93 /// defining their previous value, by either sinking users or hoisting recipes 94 /// defining their previous value (and its operands). Then introduce 95 /// FirstOrderRecurrenceSplice VPInstructions to combine the value from the 96 /// recurrence phis and previous values. 97 /// \returns true if all users of fixed-order recurrences could be re-arranged 98 /// as needed or false if it is not possible. In the latter case, \p Plan is 99 /// not valid. 100 static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder); 101 102 /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do, 103 /// try to update the vector loop to exit early if any input is NaN and resume 104 /// executing in the scalar loop to handle the NaNs there. Return false if 105 /// this attempt was unsuccessful. 106 static bool handleMaxMinNumReductions(VPlan &Plan); 107 108 /// Clear NSW/NUW flags from reduction instructions if necessary. 109 static void clearReductionWrapFlags(VPlan &Plan); 110 111 /// Explicitly unroll \p Plan by \p UF. 112 static void unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx); 113 114 /// Replace each VPReplicateRecipe outside on any replicate region in \p Plan 115 /// with \p VF single-scalar recipes. 116 /// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby 117 /// dissolving the latter. 118 static void replicateByVF(VPlan &Plan, ElementCount VF); 119 120 /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the 121 /// resulting plan to \p BestVF and \p BestUF. 122 static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, 123 unsigned BestUF, 124 PredicatedScalarEvolution &PSE); 125 126 /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe 127 /// optimizations, dead recipe removal, replicate region optimizations and 128 /// block merging. 129 static void optimize(VPlan &Plan); 130 131 /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then 132 /// region block and remove the mask operand. Optimize the created regions by 133 /// iteratively sinking scalar operands into the region, followed by merging 134 /// regions until no improvements are remaining. 135 static void createAndOptimizeReplicateRegions(VPlan &Plan); 136 137 /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an 138 /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p 139 /// UseActiveLaneMaskForControlFlow is true, introduce an 140 /// VPActiveLaneMaskPHIRecipe. If \p DataAndControlFlowWithoutRuntimeCheck is 141 /// true, no minimum-iteration runtime check will be created (during skeleton 142 /// creation) and instead it is handled using active-lane-mask. \p 143 /// DataAndControlFlowWithoutRuntimeCheck implies \p 144 /// UseActiveLaneMaskForControlFlow. 145 static void addActiveLaneMask(VPlan &Plan, 146 bool UseActiveLaneMaskForControlFlow, 147 bool DataAndControlFlowWithoutRuntimeCheck); 148 149 /// Insert truncates and extends for any truncated recipe. Redundant casts 150 /// will be folded later. 151 static void 152 truncateToMinimalBitwidths(VPlan &Plan, 153 const MapVector<Instruction *, uint64_t> &MinBWs); 154 155 /// Drop poison flags from recipes that may generate a poison value that is 156 /// used after vectorization, even when their operands are not poison. Those 157 /// recipes meet the following conditions: 158 /// * Contribute to the address computation of a recipe generating a widen 159 /// memory load/store (VPWidenMemoryInstructionRecipe or 160 /// VPInterleaveRecipe). 161 /// * Such a widen memory load/store has at least one underlying Instruction 162 /// that is in a basic block that needs predication and after vectorization 163 /// the generated instruction won't be predicated. 164 /// Uses \p BlockNeedsPredication to check if a block needs predicating. 165 /// TODO: Replace BlockNeedsPredication callback with retrieving info from 166 /// VPlan directly. 167 static void dropPoisonGeneratingRecipes( 168 VPlan &Plan, 169 const std::function<bool(BasicBlock *)> &BlockNeedsPredication); 170 171 /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and 172 /// replaces all uses except the canonical IV increment of 173 /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. 174 /// VPCanonicalIVPHIRecipe is only used to control the loop after 175 /// this transformation. 176 /// \returns true if the transformation succeeds, or false if it doesn't. 177 static bool 178 tryAddExplicitVectorLength(VPlan &Plan, 179 const std::optional<unsigned> &MaxEVLSafeElements); 180 181 // For each Interleave Group in \p InterleaveGroups replace the Recipes 182 // widening its memory instructions with a single VPInterleaveRecipe at its 183 // insertion point. 184 static void createInterleaveGroups( 185 VPlan &Plan, 186 const SmallPtrSetImpl<const InterleaveGroup<Instruction> *> 187 &InterleaveGroups, 188 VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed); 189 190 /// Remove dead recipes from \p Plan. 191 static void removeDeadRecipes(VPlan &Plan); 192 193 /// Update \p Plan to account for the uncountable early exit from \p 194 /// EarlyExitingVPBB to \p EarlyExitVPBB by 195 /// * updating the condition exiting the loop via the latch to include the 196 /// early exit condition, 197 /// * splitting the original middle block to branch to the early exit block 198 /// conditionally - according to the early exit condition. 199 static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, 200 VPBasicBlock *EarlyExitVPBB, 201 VPlan &Plan, VPBasicBlock *HeaderVPBB, 202 VPBasicBlock *LatchVPBB, 203 VFRange &Range); 204 205 /// Replace loop regions with explicit CFG. 206 static void dissolveLoopRegions(VPlan &Plan); 207 208 /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p 209 /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis. 210 static void convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy); 211 212 /// This function converts initial recipes to the abstract recipes and clamps 213 /// \p Range based on cost model for following optimizations and cost 214 /// estimations. The converted abstract recipes will lower to concrete 215 /// recipes before codegen. 216 static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, 217 VFRange &Range); 218 219 /// Perform instcombine-like simplifications on recipes in \p Plan. Use \p 220 /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis. 221 static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy); 222 223 /// If there's a single exit block, optimize its phi recipes that use exiting 224 /// IV values by feeding them precomputed end values instead, possibly taken 225 /// one step backwards. 226 static void 227 optimizeInductionExitUsers(VPlan &Plan, 228 DenseMap<VPValue *, VPValue *> &EndValues); 229 230 /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors. 231 static void materializeBroadcasts(VPlan &Plan); 232 233 /// Try to convert a plan with interleave groups with VF elements to a plan 234 /// with the interleave groups replaced by wide loads and stores processing VF 235 /// elements, if all transformed interleave groups access the full vector 236 /// width (checked via \o VectorRegWidth). This effectively is a very simple 237 /// form of loop-aware SLP, where we use interleave groups to identify 238 /// candidates. 239 static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF, 240 unsigned VectorRegWidth); 241 242 /// Predicate and linearize the control-flow in the only loop region of 243 /// \p Plan. If \p FoldTail is true, create a mask guarding the loop 244 /// header, otherwise use all-true for the header mask. Masks for blocks are 245 /// added to a block-to-mask map which is returned in order to be used later 246 /// for wide recipe construction. This argument is temporary and will be 247 /// removed in the future. 248 static DenseMap<VPBasicBlock *, VPValue *> 249 introduceMasksAndLinearize(VPlan &Plan, bool FoldTail); 250 251 /// Add branch weight metadata, if the \p Plan's middle block is terminated by 252 /// a BranchOnCond recipe. 253 static void 254 addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, 255 std::optional<unsigned> VScaleForTuning); 256 }; 257 258 } // namespace llvm 259 260 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H 261