xref: /freebsd/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/IROutliner.h (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1 //===- IROutliner.h - Extract similar IR regions into functions --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // The interface file for the IROutliner which is used by the IROutliner Pass.
11 //
12 // The outliner uses the IRSimilarityIdentifier to identify the similar regions
13 // of code.  It evaluates each set of IRSimilarityCandidates with an estimate of
14 // whether it will provide code size reduction.  Each region is extracted using
15 // the code extractor.  These extracted functions are consolidated into a single
16 // function and called from the extracted call site.
17 //
18 // For example:
19 // \code
20 //   %1 = add i32 %a, %b
21 //   %2 = add i32 %b, %a
22 //   %3 = add i32 %b, %a
23 //   %4 = add i32 %a, %b
24 // \endcode
25 // would become function
26 // \code
27 // define internal void outlined_ir_function(i32 %0, i32 %1) {
28 //   %1 = add i32 %0, %1
29 //   %2 = add i32 %1, %0
30 //   ret void
31 // }
32 // \endcode
33 // with calls:
34 // \code
35 //   call void outlined_ir_function(i32 %a, i32 %b)
36 //   call void outlined_ir_function(i32 %b, i32 %a)
37 // \endcode
38 //
39 //===----------------------------------------------------------------------===//
40 
41 #ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
42 #define LLVM_TRANSFORMS_IPO_IROUTLINER_H
43 
44 #include "llvm/Analysis/IRSimilarityIdentifier.h"
45 #include "llvm/IR/PassManager.h"
46 #include "llvm/Support/InstructionCost.h"
47 #include "llvm/Transforms/Utils/CodeExtractor.h"
48 
49 struct OutlinableGroup;
50 
51 namespace llvm {
52 using namespace CallingConv;
53 using namespace IRSimilarity;
54 
55 class Module;
56 class TargetTransformInfo;
57 class OptimizationRemarkEmitter;
58 
59 /// The OutlinableRegion holds all the information for a specific region, or
60 /// sequence of instructions. This includes what values need to be hoisted to
61 /// arguments from the extracted function, inputs and outputs to the region, and
62 /// mapping from the extracted function arguments to overall function arguments.
63 struct OutlinableRegion {
64   /// Describes the region of code.
65   IRSimilarityCandidate *Candidate = nullptr;
66 
67   /// If this region is outlined, the front and back IRInstructionData could
68   /// potentially become invalidated if the only new instruction is a call.
69   /// This ensures that we replace in the instruction in the IRInstructionData.
70   IRInstructionData *NewFront = nullptr;
71   IRInstructionData *NewBack = nullptr;
72 
73   /// The number of extracted inputs from the CodeExtractor.
74   unsigned NumExtractedInputs = 0;
75 
76   /// The corresponding BasicBlock with the appropriate stores for this
77   /// OutlinableRegion in the overall function.
78   unsigned OutputBlockNum = -1;
79 
80   /// Mapping the extracted argument number to the argument number in the
81   /// overall function.  Since there will be inputs, such as elevated constants
82   /// that are not the same in each region in a SimilarityGroup, or values that
83   /// cannot be sunk into the extracted section in every region, we must keep
84   /// track of which extracted argument maps to which overall argument.
85   DenseMap<unsigned, unsigned> ExtractedArgToAgg;
86   DenseMap<unsigned, unsigned> AggArgToExtracted;
87 
88   /// Values in the outlined functions will often be replaced by arguments. When
89   /// finding corresponding values from one region to another, the found value
90   /// will be the value the argument previously replaced.  This structure maps
91   /// any replaced values for the region to the aggregate aggregate argument
92   /// in the overall function.
93   DenseMap<Value *, Value *> RemappedArguments;
94 
95   /// Marks whether we need to change the order of the arguments when mapping
96   /// the old extracted function call to the new aggregate outlined function
97   /// call.
98   bool ChangedArgOrder = false;
99 
100   /// Marks whether this region ends in a branch, there is special handling
101   /// required for the following basic blocks in this case.
102   bool EndsInBranch = false;
103 
104   /// The PHIBlocks with their corresponding return block based on the return
105   /// value as the key.
106   DenseMap<Value *, BasicBlock *> PHIBlocks;
107 
108   /// Mapping of the argument number in the deduplicated function
109   /// to a given constant, which is used when creating the arguments to the call
110   /// to the newly created deduplicated function.  This is handled separately
111   /// since the CodeExtractor does not recognize constants.
112   DenseMap<unsigned, Constant *> AggArgToConstant;
113 
114   /// The global value numbers that are used as outputs for this section. Once
115   /// extracted, each output will be stored to an output register.  This
116   /// documents the global value numbers that are used in this pattern.
117   SmallVector<unsigned, 4> GVNStores;
118 
119   /// Used to create an outlined function.
120   CodeExtractor *CE = nullptr;
121 
122   /// The call site of the extracted region.
123   CallInst *Call = nullptr;
124 
125   /// The function for the extracted region.
126   Function *ExtractedFunction = nullptr;
127 
128   /// Flag for whether we have split out the IRSimilarityCanidate. That is,
129   /// make the region contained the IRSimilarityCandidate its own BasicBlock.
130   bool CandidateSplit = false;
131 
132   /// Flag for whether we should not consider this region for extraction.
133   bool IgnoreRegion = false;
134 
135   /// The BasicBlock that is before the start of the region BasicBlock,
136   /// only defined when the region has been split.
137   BasicBlock *PrevBB = nullptr;
138 
139   /// The BasicBlock that contains the starting instruction of the region.
140   BasicBlock *StartBB = nullptr;
141 
142   /// The BasicBlock that contains the ending instruction of the region.
143   BasicBlock *EndBB = nullptr;
144 
145   /// The BasicBlock that is after the start of the region BasicBlock,
146   /// only defined when the region has been split.
147   BasicBlock *FollowBB = nullptr;
148 
149   /// The Outlinable Group that contains this region and structurally similar
150   /// regions to this region.
151   OutlinableGroup *Parent = nullptr;
152 
OutlinableRegionOutlinableRegion153   OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
154       : Candidate(&C), Parent(&Group) {
155     StartBB = C.getStartBB();
156     EndBB = C.getEndBB();
157   }
158 
159   /// For the contained region, split the parent BasicBlock at the starting and
160   /// ending instructions of the contained IRSimilarityCandidate.
161   void splitCandidate();
162 
163   /// For the contained region, reattach the BasicBlock at the starting and
164   /// ending instructions of the contained IRSimilarityCandidate, or if the
165   /// function has been extracted, the start and end of the BasicBlock
166   /// containing the called function.
167   void reattachCandidate();
168 
169   /// Find a corresponding value for \p V in similar OutlinableRegion \p Other.
170   ///
171   /// \param Other [in] - The OutlinableRegion to find the corresponding Value
172   /// in.
173   /// \param V [in] - The Value to look for in the other region.
174   /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
175   Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V);
176 
177   /// Find a corresponding BasicBlock for \p BB in similar OutlinableRegion \p Other.
178   ///
179   /// \param Other [in] - The OutlinableRegion to find the corresponding
180   /// BasicBlock in.
181   /// \param BB [in] - The BasicBlock to look for in the other region.
182   /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
183   BasicBlock *findCorrespondingBlockIn(const OutlinableRegion &Other,
184                                        BasicBlock *BB);
185 
186   /// Get the size of the code removed from the region.
187   ///
188   /// \param [in] TTI - The TargetTransformInfo for the parent function.
189   /// \returns the code size of the region
190   InstructionCost getBenefit(TargetTransformInfo &TTI);
191 };
192 
193 /// This class is a pass that identifies similarity in a Module, extracts
194 /// instances of the similarity, and then consolidating the similar regions
195 /// in an effort to reduce code size.  It uses the IRSimilarityIdentifier pass
196 /// to identify the similar regions of code, and then extracts the similar
197 /// sections into a single function.  See the above for an example as to
198 /// how code is extracted and consolidated into a single function.
199 class IROutliner {
200 public:
IROutliner(function_ref<TargetTransformInfo & (Function &)> GTTI,function_ref<IRSimilarityIdentifier & (Module &)> GIRSI,function_ref<OptimizationRemarkEmitter & (Function &)> GORE)201   IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
202              function_ref<IRSimilarityIdentifier &(Module &)> GIRSI,
203              function_ref<OptimizationRemarkEmitter &(Function &)> GORE)
204       : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {
205 
206     // Check that the DenseMap implementation has not changed.
207     assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
208            "DenseMapInfo<unsigned>'s empty key isn't -1!");
209     assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
210            "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
211   }
212   bool run(Module &M);
213 
214 private:
215   /// Find repeated similar code sequences in \p M and outline them into new
216   /// Functions.
217   ///
218   /// \param [in] M - The module to outline from.
219   /// \returns The number of Functions created.
220   unsigned doOutline(Module &M);
221 
222   /// Check whether an OutlinableRegion is incompatible with code already
223   /// outlined. OutlinableRegions are incomptaible when there are overlapping
224   /// instructions, or code that has not been recorded has been added to the
225   /// instructions.
226   ///
227   /// \param [in] Region - The OutlinableRegion to check for conflicts with
228   /// already outlined code.
229   /// \returns whether the region can safely be outlined.
230   bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region);
231 
232   /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
233   /// instructions contained in a previously outlined region and put the
234   /// remaining regions in \p CurrentGroup.
235   ///
236   /// \param [in] CandidateVec - List of similarity candidates for regions with
237   /// the same similarity structure.
238   /// \param [in,out] CurrentGroup - Contains the potential sections to
239   /// be outlined.
240   void
241   pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
242                            OutlinableGroup &CurrentGroup);
243 
244   /// Create the function based on the overall types found in the current
245   /// regions being outlined.
246   ///
247   /// \param M - The module to outline from.
248   /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
249   /// \param [in] FunctionNameSuffix - How many functions have we previously
250   /// created.
251   /// \returns the newly created function.
252   Function *createFunction(Module &M, OutlinableGroup &CG,
253                            unsigned FunctionNameSuffix);
254 
255   /// Identify the needed extracted inputs in a section, and add to the overall
256   /// function if needed.
257   ///
258   /// \param [in] M - The module to outline from.
259   /// \param [in,out] Region - The region to be extracted.
260   /// \param [in] NotSame - The global value numbers of the Values in the region
261   /// that do not have the same Constant in each strucutrally similar region.
262   void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
263                             DenseSet<unsigned> &NotSame);
264 
265   /// Find the number of instructions that will be removed by extracting the
266   /// OutlinableRegions in \p CurrentGroup.
267   ///
268   /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
269   /// analyzed.
270   /// \returns the number of outlined instructions across all regions.
271   InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
272 
273   /// Find the number of instructions that will be added by reloading arguments.
274   ///
275   /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
276   /// analyzed.
277   /// \returns the number of added reload instructions across all regions.
278   InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
279 
280   /// Find the cost and the benefit of \p CurrentGroup and save it back to
281   /// \p CurrentGroup.
282   ///
283   /// \param [in] M - The module being analyzed
284   /// \param [in,out] CurrentGroup - The overall outlined section
285   void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
286 
287   /// Update the output mapping based on the load instruction, and the outputs
288   /// of the extracted function.
289   ///
290   /// \param Region - The region extracted
291   /// \param Outputs - The outputs from the extracted function.
292   /// \param LI - The load instruction used to update the mapping.
293   void updateOutputMapping(OutlinableRegion &Region,
294                            ArrayRef<Value *> Outputs, LoadInst *LI);
295 
296   /// Extract \p Region into its own function.
297   ///
298   /// \param [in] Region - The region to be extracted into its own function.
299   /// \returns True if it was successfully outlined.
300   bool extractSection(OutlinableRegion &Region);
301 
302   /// For the similarities found, and the extracted sections, create a single
303   /// outlined function with appropriate output blocks as necessary.
304   ///
305   /// \param [in] M - The module to outline from
306   /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
307   /// \param [in,out] FuncsToRemove - List of functions to remove from the
308   /// module after outlining is completed.
309   /// \param [in,out] OutlinedFunctionNum - the number of new outlined
310   /// functions.
311   void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
312                                     std::vector<Function *> &FuncsToRemove,
313                                     unsigned &OutlinedFunctionNum);
314 
315   /// If true, enables us to outline from functions that have LinkOnceFromODR
316   /// linkages.
317   bool OutlineFromLinkODRs = false;
318 
319   /// If false, we do not worry if the cost is greater than the benefit.  This
320   /// is for debugging and testing, so that we can test small cases to ensure
321   /// that the outlining is being done correctly.
322   bool CostModel = true;
323 
324   /// The set of outlined Instructions, identified by their location in the
325   /// sequential ordering of instructions in a Module.
326   DenseSet<unsigned> Outlined;
327 
328   /// TargetTransformInfo lambda for target specific information.
329   function_ref<TargetTransformInfo &(Function &)> getTTI;
330 
331   /// A mapping from newly created reloaded output values to the original value.
332   /// If an value is replace by an output from an outlined region, this maps
333   /// that Value, back to its original Value.
334   DenseMap<Value *, Value *> OutputMappings;
335 
336   /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
337   function_ref<IRSimilarityIdentifier &(Module &)> getIRSI;
338 
339   /// The optimization remark emitter for the pass.
340   function_ref<OptimizationRemarkEmitter &(Function &)> getORE;
341 
342   /// The memory allocator used to allocate the CodeExtractors.
343   SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
344 
345   /// The memory allocator used to allocate the OutlinableRegions.
346   SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator;
347 
348   /// The memory allocator used to allocate new IRInstructionData.
349   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
350 
351   /// Custom InstVisitor to classify different instructions for whether it can
352   /// be analyzed for similarity.  This is needed as there may be instruction we
353   /// can identify as having similarity, but are more complicated to outline.
354   struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
355     InstructionAllowed() = default;
356 
visitBranchInstInstructionAllowed357     bool visitBranchInst(BranchInst &BI) { return EnableBranches; }
visitPHINodeInstructionAllowed358     bool visitPHINode(PHINode &PN) { return EnableBranches; }
359     // TODO: Handle allocas.
visitAllocaInstInstructionAllowed360     bool visitAllocaInst(AllocaInst &AI) { return false; }
361     // VAArg instructions are not allowed since this could cause difficulty when
362     // differentiating between different sets of variable instructions in
363     // the deduplicated outlined regions.
visitVAArgInstInstructionAllowed364     bool visitVAArgInst(VAArgInst &VI) { return false; }
365     // We exclude all exception handling cases since they are so context
366     // dependent.
visitLandingPadInstInstructionAllowed367     bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
visitFuncletPadInstInstructionAllowed368     bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
369     // DebugInfo should be included in the regions, but should not be
370     // analyzed for similarity as it has no bearing on the outcome of the
371     // program.
visitDbgInfoIntrinsicInstructionAllowed372     bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
373     // TODO: Handle specific intrinsics individually from those that can be
374     // handled.
IntrinsicInstInstructionAllowed375     bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
376     // We only handle CallInsts that are not indirect, since we cannot guarantee
377     // that they have a name in these cases.
visitCallInstInstructionAllowed378     bool visitCallInst(CallInst &CI) {
379       Function *F = CI.getCalledFunction();
380       bool IsIndirectCall = CI.isIndirectCall();
381       if (IsIndirectCall && !EnableIndirectCalls)
382         return false;
383       if (!F && !IsIndirectCall)
384         return false;
385       // Returning twice can cause issues with the state of the function call
386       // that were not expected when the function was used, so we do not include
387       // the call in outlined functions.
388       if (CI.canReturnTwice())
389         return false;
390       // TODO: Update the outliner to capture whether the outlined function
391       // needs these extra attributes.
392 
393       // Functions marked with the swifttailcc and tailcc calling conventions
394       // require special handling when outlining musttail functions.  The
395       // calling convention must be passed down to the outlined function as
396       // well. Further, there is special handling for musttail calls as well,
397       // requiring a return call directly after.  For now, the outliner does not
398       // support this.
399       bool IsTailCC = CI.getCallingConv() == CallingConv::SwiftTail ||
400                       CI.getCallingConv() == CallingConv::Tail;
401       if (IsTailCC && !EnableMustTailCalls)
402         return false;
403       if (CI.isMustTailCall() && !EnableMustTailCalls)
404         return false;
405       // The outliner can only handle musttail items if it is also accompanied
406       // by the tailcc or swifttailcc calling convention.
407       if (CI.isMustTailCall() && !IsTailCC)
408         return false;
409       return true;
410     }
411     // TODO: Handle FreezeInsts.  Since a frozen value could be frozen inside
412     // the outlined region, and then returned as an output, this will have to be
413     // handled differently.
visitFreezeInstInstructionAllowed414     bool visitFreezeInst(FreezeInst &CI) { return false; }
415     // TODO: We do not current handle similarity that changes the control flow.
visitInvokeInstInstructionAllowed416     bool visitInvokeInst(InvokeInst &II) { return false; }
417     // TODO: We do not current handle similarity that changes the control flow.
visitCallBrInstInstructionAllowed418     bool visitCallBrInst(CallBrInst &CBI) { return false; }
419     // TODO: Handle interblock similarity.
visitTerminatorInstructionAllowed420     bool visitTerminator(Instruction &I) { return false; }
visitInstructionInstructionAllowed421     bool visitInstruction(Instruction &I) { return true; }
422 
423     // The flag variable that marks whether we should allow branch instructions
424     // to be outlined.
425     bool EnableBranches = false;
426 
427     // The flag variable that marks whether we should allow indirect calls
428     // to be outlined.
429     bool EnableIndirectCalls = true;
430 
431     // The flag variable that marks whether we should allow intrinsics
432     // instructions to be outlined.
433     bool EnableIntrinsics = false;
434 
435     // The flag variable that marks whether we should allow musttail calls.
436     bool EnableMustTailCalls = false;
437   };
438 
439   /// A InstVisitor used to exclude certain instructions from being outlined.
440   InstructionAllowed InstructionClassifier;
441 };
442 
443 /// Pass to outline similar regions.
444 class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
445 public:
446   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
447 };
448 
449 } // end namespace llvm
450 
451 #endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
452