xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.h (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file provides utility VPlan to VPlan transformations.
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
14 #define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
15 
16 #include "VPlan.h"
17 #include "VPlanVerifier.h"
18 #include "llvm/ADT/STLFunctionalExtras.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Compiler.h"
21 
22 namespace llvm {
23 
24 class InductionDescriptor;
25 class Instruction;
26 class PHINode;
27 class ScalarEvolution;
28 class PredicatedScalarEvolution;
29 class TargetLibraryInfo;
30 class VPBuilder;
31 class VPRecipeBuilder;
32 struct VFRange;
33 
34 extern cl::opt<bool> VerifyEachVPlan;
35 
36 struct VPlanTransforms {
37   /// Helper to run a VPlan transform \p Transform on \p VPlan, forwarding extra
38   /// arguments to the transform. Returns the boolean returned by the transform.
39   template <typename... ArgsTy>
runPassVPlanTransforms40   static bool runPass(bool (*Transform)(VPlan &, ArgsTy...), VPlan &Plan,
41                       typename std::remove_reference<ArgsTy>::type &...Args) {
42     bool Res = Transform(Plan, Args...);
43     if (VerifyEachVPlan)
44       verifyVPlanIsValid(Plan);
45     return Res;
46   }
47   /// Helper to run a VPlan transform \p Transform on \p VPlan, forwarding extra
48   /// arguments to the transform.
49   template <typename... ArgsTy>
runPassVPlanTransforms50   static void runPass(void (*Fn)(VPlan &, ArgsTy...), VPlan &Plan,
51                       typename std::remove_reference<ArgsTy>::type &...Args) {
52     Fn(Plan, Args...);
53     if (VerifyEachVPlan)
54       verifyVPlanIsValid(Plan);
55   }
56 
57   LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan> buildPlainCFG(Loop *TheLoop,
58                                                                 LoopInfo &LI);
59 
60   /// Prepare the plan for vectorization. It will introduce a dedicated
61   /// VPBasicBlock for the vector pre-header as well as a VPBasicBlock as exit
62   /// block of the main vector loop (middle.block). If a check is needed to
63   /// guard executing the scalar epilogue loop, it will be added to the middle
64   /// block, together with VPBasicBlocks for the scalar preheader and exit
65   /// blocks. \p InductionTy is the type of the canonical induction and used for
66   /// related values, like the trip count expression.  It also creates a VPValue
67   /// expression for the original trip count.
68   LLVM_ABI_FOR_TEST static void prepareForVectorization(
69       VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
70       bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
71       DebugLoc IVDL, bool HasUncountableExit, VFRange &Range);
72 
73   /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
74   /// flat CFG into a hierarchical CFG.
75   LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
76 
77   /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
78   /// VPValue and connect the block to \p Plan, using the VPValue as branch
79   /// condition.
80   static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock,
81                                bool AddBranchWeights);
82 
83   /// Replaces the VPInstructions in \p Plan with corresponding
84   /// widen recipes. Returns false if any VPInstructions could not be converted
85   /// to a wide recipe if needed.
86   LLVM_ABI_FOR_TEST static bool tryToConvertVPInstructionsToVPRecipes(
87       VPlanPtr &Plan,
88       function_ref<const InductionDescriptor *(PHINode *)>
89           GetIntOrFpInductionDescriptor,
90       ScalarEvolution &SE, const TargetLibraryInfo &TLI);
91 
92   /// Try to have all users of fixed-order recurrences appear after the recipe
93   /// defining their previous value, by either sinking users or hoisting recipes
94   /// defining their previous value (and its operands). Then introduce
95   /// FirstOrderRecurrenceSplice VPInstructions to combine the value from the
96   /// recurrence phis and previous values.
97   /// \returns true if all users of fixed-order recurrences could be re-arranged
98   /// as needed or false if it is not possible. In the latter case, \p Plan is
99   /// not valid.
100   static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
101 
102   /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
103   /// try to update the vector loop to exit early if any input is NaN and resume
104   /// executing in the scalar loop to handle the NaNs there. Return false if
105   /// this attempt was unsuccessful.
106   static bool handleMaxMinNumReductions(VPlan &Plan);
107 
108   /// Clear NSW/NUW flags from reduction instructions if necessary.
109   static void clearReductionWrapFlags(VPlan &Plan);
110 
111   /// Explicitly unroll \p Plan by \p UF.
112   static void unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx);
113 
114   /// Replace each VPReplicateRecipe outside on any replicate region in \p Plan
115   /// with \p VF single-scalar recipes.
116   /// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby
117   /// dissolving the latter.
118   static void replicateByVF(VPlan &Plan, ElementCount VF);
119 
120   /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
121   /// resulting plan to \p BestVF and \p BestUF.
122   static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
123                                  unsigned BestUF,
124                                  PredicatedScalarEvolution &PSE);
125 
126   /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
127   /// optimizations, dead recipe removal, replicate region optimizations and
128   /// block merging.
129   static void optimize(VPlan &Plan);
130 
131   /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
132   /// region block and remove the mask operand. Optimize the created regions by
133   /// iteratively sinking scalar operands into the region, followed by merging
134   /// regions until no improvements are remaining.
135   static void createAndOptimizeReplicateRegions(VPlan &Plan);
136 
137   /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
138   /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
139   /// UseActiveLaneMaskForControlFlow is true, introduce an
140   /// VPActiveLaneMaskPHIRecipe. If \p DataAndControlFlowWithoutRuntimeCheck is
141   /// true, no minimum-iteration runtime check will be created (during skeleton
142   /// creation) and instead it is handled using active-lane-mask. \p
143   /// DataAndControlFlowWithoutRuntimeCheck implies \p
144   /// UseActiveLaneMaskForControlFlow.
145   static void addActiveLaneMask(VPlan &Plan,
146                                 bool UseActiveLaneMaskForControlFlow,
147                                 bool DataAndControlFlowWithoutRuntimeCheck);
148 
149   /// Insert truncates and extends for any truncated recipe. Redundant casts
150   /// will be folded later.
151   static void
152   truncateToMinimalBitwidths(VPlan &Plan,
153                              const MapVector<Instruction *, uint64_t> &MinBWs);
154 
155   /// Drop poison flags from recipes that may generate a poison value that is
156   /// used after vectorization, even when their operands are not poison. Those
157   /// recipes meet the following conditions:
158   ///  * Contribute to the address computation of a recipe generating a widen
159   ///    memory load/store (VPWidenMemoryInstructionRecipe or
160   ///    VPInterleaveRecipe).
161   ///  * Such a widen memory load/store has at least one underlying Instruction
162   ///    that is in a basic block that needs predication and after vectorization
163   ///    the generated instruction won't be predicated.
164   /// Uses \p BlockNeedsPredication to check if a block needs predicating.
165   /// TODO: Replace BlockNeedsPredication callback with retrieving info from
166   ///       VPlan directly.
167   static void dropPoisonGeneratingRecipes(
168       VPlan &Plan,
169       const std::function<bool(BasicBlock *)> &BlockNeedsPredication);
170 
171   /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
172   /// replaces all uses except the canonical IV increment of
173   /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe.
174   /// VPCanonicalIVPHIRecipe is only used to control the loop after
175   /// this transformation.
176   /// \returns true if the transformation succeeds, or false if it doesn't.
177   static bool
178   tryAddExplicitVectorLength(VPlan &Plan,
179                              const std::optional<unsigned> &MaxEVLSafeElements);
180 
181   // For each Interleave Group in \p InterleaveGroups replace the Recipes
182   // widening its memory instructions with a single VPInterleaveRecipe at its
183   // insertion point.
184   static void createInterleaveGroups(
185       VPlan &Plan,
186       const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
187           &InterleaveGroups,
188       VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed);
189 
190   /// Remove dead recipes from \p Plan.
191   static void removeDeadRecipes(VPlan &Plan);
192 
193   /// Update \p Plan to account for the uncountable early exit from \p
194   /// EarlyExitingVPBB to \p EarlyExitVPBB by
195   ///  * updating the condition exiting the loop via the latch to include the
196   ///    early exit condition,
197   ///  * splitting the original middle block to branch to the early exit block
198   ///    conditionally - according to the early exit condition.
199   static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
200                                          VPBasicBlock *EarlyExitVPBB,
201                                          VPlan &Plan, VPBasicBlock *HeaderVPBB,
202                                          VPBasicBlock *LatchVPBB,
203                                          VFRange &Range);
204 
205   /// Replace loop regions with explicit CFG.
206   static void dissolveLoopRegions(VPlan &Plan);
207 
208   /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
209   /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
210   static void convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy);
211 
212   /// This function converts initial recipes to the abstract recipes and clamps
213   /// \p Range based on cost model for following optimizations and cost
214   /// estimations. The converted abstract recipes will lower to concrete
215   /// recipes before codegen.
216   static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
217                                        VFRange &Range);
218 
219   /// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
220   /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
221   static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
222 
223   /// If there's a single exit block, optimize its phi recipes that use exiting
224   /// IV values by feeding them precomputed end values instead, possibly taken
225   /// one step backwards.
226   static void
227   optimizeInductionExitUsers(VPlan &Plan,
228                              DenseMap<VPValue *, VPValue *> &EndValues);
229 
230   /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
231   static void materializeBroadcasts(VPlan &Plan);
232 
233   /// Try to convert a plan with interleave groups with VF elements to a plan
234   /// with the interleave groups replaced by wide loads and stores processing VF
235   /// elements, if all transformed interleave groups access the full vector
236   /// width (checked via \o VectorRegWidth). This effectively is a very simple
237   /// form of loop-aware SLP, where we use interleave groups to identify
238   /// candidates.
239   static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
240                                      unsigned VectorRegWidth);
241 
242   /// Predicate and linearize the control-flow in the only loop region of
243   /// \p Plan. If \p FoldTail is true, create a mask guarding the loop
244   /// header, otherwise use all-true for the header mask. Masks for blocks are
245   /// added to a block-to-mask map which is returned in order to be used later
246   /// for wide recipe construction. This argument is temporary and will be
247   /// removed in the future.
248   static DenseMap<VPBasicBlock *, VPValue *>
249   introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
250 
251   /// Add branch weight metadata, if the \p Plan's middle block is terminated by
252   /// a BranchOnCond recipe.
253   static void
254   addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
255                                     std::optional<unsigned> VScaleForTuning);
256 };
257 
258 } // namespace llvm
259 
260 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
261