1 //===- IROutliner.h - Extract similar IR regions into functions --*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // The interface file for the IROutliner which is used by the IROutliner Pass. 11 // 12 // The outliner uses the IRSimilarityIdentifier to identify the similar regions 13 // of code. It evaluates each set of IRSimilarityCandidates with an estimate of 14 // whether it will provide code size reduction. Each region is extracted using 15 // the code extractor. These extracted functions are consolidated into a single 16 // function and called from the extracted call site. 17 // 18 // For example: 19 // \code 20 // %1 = add i32 %a, %b 21 // %2 = add i32 %b, %a 22 // %3 = add i32 %b, %a 23 // %4 = add i32 %a, %b 24 // \endcode 25 // would become function 26 // \code 27 // define internal void outlined_ir_function(i32 %0, i32 %1) { 28 // %1 = add i32 %0, %1 29 // %2 = add i32 %1, %0 30 // ret void 31 // } 32 // \endcode 33 // with calls: 34 // \code 35 // call void outlined_ir_function(i32 %a, i32 %b) 36 // call void outlined_ir_function(i32 %b, i32 %a) 37 // \endcode 38 // 39 //===----------------------------------------------------------------------===// 40 41 #ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H 42 #define LLVM_TRANSFORMS_IPO_IROUTLINER_H 43 44 #include "llvm/Analysis/IRSimilarityIdentifier.h" 45 #include "llvm/IR/PassManager.h" 46 #include "llvm/Support/InstructionCost.h" 47 #include "llvm/Transforms/Utils/CodeExtractor.h" 48 49 struct OutlinableGroup; 50 51 namespace llvm { 52 using namespace CallingConv; 53 using namespace IRSimilarity; 54 55 class Module; 56 class TargetTransformInfo; 57 class OptimizationRemarkEmitter; 58 59 /// The OutlinableRegion holds all the information for a specific region, or 60 /// sequence of instructions. This includes what values need to be hoisted to 61 /// arguments from the extracted function, inputs and outputs to the region, and 62 /// mapping from the extracted function arguments to overall function arguments. 63 struct OutlinableRegion { 64 /// Describes the region of code. 65 IRSimilarityCandidate *Candidate = nullptr; 66 67 /// If this region is outlined, the front and back IRInstructionData could 68 /// potentially become invalidated if the only new instruction is a call. 69 /// This ensures that we replace in the instruction in the IRInstructionData. 70 IRInstructionData *NewFront = nullptr; 71 IRInstructionData *NewBack = nullptr; 72 73 /// The number of extracted inputs from the CodeExtractor. 74 unsigned NumExtractedInputs = 0; 75 76 /// The corresponding BasicBlock with the appropriate stores for this 77 /// OutlinableRegion in the overall function. 78 unsigned OutputBlockNum = -1; 79 80 /// Mapping the extracted argument number to the argument number in the 81 /// overall function. Since there will be inputs, such as elevated constants 82 /// that are not the same in each region in a SimilarityGroup, or values that 83 /// cannot be sunk into the extracted section in every region, we must keep 84 /// track of which extracted argument maps to which overall argument. 85 DenseMap<unsigned, unsigned> ExtractedArgToAgg; 86 DenseMap<unsigned, unsigned> AggArgToExtracted; 87 88 /// Values in the outlined functions will often be replaced by arguments. When 89 /// finding corresponding values from one region to another, the found value 90 /// will be the value the argument previously replaced. This structure maps 91 /// any replaced values for the region to the aggregate aggregate argument 92 /// in the overall function. 93 DenseMap<Value *, Value *> RemappedArguments; 94 95 /// Marks whether we need to change the order of the arguments when mapping 96 /// the old extracted function call to the new aggregate outlined function 97 /// call. 98 bool ChangedArgOrder = false; 99 100 /// Marks whether this region ends in a branch, there is special handling 101 /// required for the following basic blocks in this case. 102 bool EndsInBranch = false; 103 104 /// The PHIBlocks with their corresponding return block based on the return 105 /// value as the key. 106 DenseMap<Value *, BasicBlock *> PHIBlocks; 107 108 /// Mapping of the argument number in the deduplicated function 109 /// to a given constant, which is used when creating the arguments to the call 110 /// to the newly created deduplicated function. This is handled separately 111 /// since the CodeExtractor does not recognize constants. 112 DenseMap<unsigned, Constant *> AggArgToConstant; 113 114 /// The global value numbers that are used as outputs for this section. Once 115 /// extracted, each output will be stored to an output register. This 116 /// documents the global value numbers that are used in this pattern. 117 SmallVector<unsigned, 4> GVNStores; 118 119 /// Used to create an outlined function. 120 CodeExtractor *CE = nullptr; 121 122 /// The call site of the extracted region. 123 CallInst *Call = nullptr; 124 125 /// The function for the extracted region. 126 Function *ExtractedFunction = nullptr; 127 128 /// Flag for whether we have split out the IRSimilarityCanidate. That is, 129 /// make the region contained the IRSimilarityCandidate its own BasicBlock. 130 bool CandidateSplit = false; 131 132 /// Flag for whether we should not consider this region for extraction. 133 bool IgnoreRegion = false; 134 135 /// The BasicBlock that is before the start of the region BasicBlock, 136 /// only defined when the region has been split. 137 BasicBlock *PrevBB = nullptr; 138 139 /// The BasicBlock that contains the starting instruction of the region. 140 BasicBlock *StartBB = nullptr; 141 142 /// The BasicBlock that contains the ending instruction of the region. 143 BasicBlock *EndBB = nullptr; 144 145 /// The BasicBlock that is after the start of the region BasicBlock, 146 /// only defined when the region has been split. 147 BasicBlock *FollowBB = nullptr; 148 149 /// The Outlinable Group that contains this region and structurally similar 150 /// regions to this region. 151 OutlinableGroup *Parent = nullptr; 152 OutlinableRegionOutlinableRegion153 OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group) 154 : Candidate(&C), Parent(&Group) { 155 StartBB = C.getStartBB(); 156 EndBB = C.getEndBB(); 157 } 158 159 /// For the contained region, split the parent BasicBlock at the starting and 160 /// ending instructions of the contained IRSimilarityCandidate. 161 void splitCandidate(); 162 163 /// For the contained region, reattach the BasicBlock at the starting and 164 /// ending instructions of the contained IRSimilarityCandidate, or if the 165 /// function has been extracted, the start and end of the BasicBlock 166 /// containing the called function. 167 void reattachCandidate(); 168 169 /// Find a corresponding value for \p V in similar OutlinableRegion \p Other. 170 /// 171 /// \param Other [in] - The OutlinableRegion to find the corresponding Value 172 /// in. 173 /// \param V [in] - The Value to look for in the other region. 174 /// \return The corresponding Value to \p V if it exists, otherwise nullptr. 175 Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V); 176 177 /// Find a corresponding BasicBlock for \p BB in similar OutlinableRegion \p Other. 178 /// 179 /// \param Other [in] - The OutlinableRegion to find the corresponding 180 /// BasicBlock in. 181 /// \param BB [in] - The BasicBlock to look for in the other region. 182 /// \return The corresponding Value to \p V if it exists, otherwise nullptr. 183 BasicBlock *findCorrespondingBlockIn(const OutlinableRegion &Other, 184 BasicBlock *BB); 185 186 /// Get the size of the code removed from the region. 187 /// 188 /// \param [in] TTI - The TargetTransformInfo for the parent function. 189 /// \returns the code size of the region 190 InstructionCost getBenefit(TargetTransformInfo &TTI); 191 }; 192 193 /// This class is a pass that identifies similarity in a Module, extracts 194 /// instances of the similarity, and then consolidating the similar regions 195 /// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass 196 /// to identify the similar regions of code, and then extracts the similar 197 /// sections into a single function. See the above for an example as to 198 /// how code is extracted and consolidated into a single function. 199 class IROutliner { 200 public: IROutliner(function_ref<TargetTransformInfo & (Function &)> GTTI,function_ref<IRSimilarityIdentifier & (Module &)> GIRSI,function_ref<OptimizationRemarkEmitter & (Function &)> GORE)201 IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI, 202 function_ref<IRSimilarityIdentifier &(Module &)> GIRSI, 203 function_ref<OptimizationRemarkEmitter &(Function &)> GORE) 204 : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) { 205 206 // Check that the DenseMap implementation has not changed. 207 assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 && 208 "DenseMapInfo<unsigned>'s empty key isn't -1!"); 209 assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 && 210 "DenseMapInfo<unsigned>'s tombstone key isn't -2!"); 211 } 212 bool run(Module &M); 213 214 private: 215 /// Find repeated similar code sequences in \p M and outline them into new 216 /// Functions. 217 /// 218 /// \param [in] M - The module to outline from. 219 /// \returns The number of Functions created. 220 unsigned doOutline(Module &M); 221 222 /// Check whether an OutlinableRegion is incompatible with code already 223 /// outlined. OutlinableRegions are incomptaible when there are overlapping 224 /// instructions, or code that has not been recorded has been added to the 225 /// instructions. 226 /// 227 /// \param [in] Region - The OutlinableRegion to check for conflicts with 228 /// already outlined code. 229 /// \returns whether the region can safely be outlined. 230 bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region); 231 232 /// Remove all the IRSimilarityCandidates from \p CandidateVec that have 233 /// instructions contained in a previously outlined region and put the 234 /// remaining regions in \p CurrentGroup. 235 /// 236 /// \param [in] CandidateVec - List of similarity candidates for regions with 237 /// the same similarity structure. 238 /// \param [in,out] CurrentGroup - Contains the potential sections to 239 /// be outlined. 240 void 241 pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec, 242 OutlinableGroup &CurrentGroup); 243 244 /// Create the function based on the overall types found in the current 245 /// regions being outlined. 246 /// 247 /// \param M - The module to outline from. 248 /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined. 249 /// \param [in] FunctionNameSuffix - How many functions have we previously 250 /// created. 251 /// \returns the newly created function. 252 Function *createFunction(Module &M, OutlinableGroup &CG, 253 unsigned FunctionNameSuffix); 254 255 /// Identify the needed extracted inputs in a section, and add to the overall 256 /// function if needed. 257 /// 258 /// \param [in] M - The module to outline from. 259 /// \param [in,out] Region - The region to be extracted. 260 /// \param [in] NotSame - The global value numbers of the Values in the region 261 /// that do not have the same Constant in each strucutrally similar region. 262 void findAddInputsOutputs(Module &M, OutlinableRegion &Region, 263 DenseSet<unsigned> &NotSame); 264 265 /// Find the number of instructions that will be removed by extracting the 266 /// OutlinableRegions in \p CurrentGroup. 267 /// 268 /// \param [in] CurrentGroup - The collection of OutlinableRegions to be 269 /// analyzed. 270 /// \returns the number of outlined instructions across all regions. 271 InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup); 272 273 /// Find the number of instructions that will be added by reloading arguments. 274 /// 275 /// \param [in] CurrentGroup - The collection of OutlinableRegions to be 276 /// analyzed. 277 /// \returns the number of added reload instructions across all regions. 278 InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup); 279 280 /// Find the cost and the benefit of \p CurrentGroup and save it back to 281 /// \p CurrentGroup. 282 /// 283 /// \param [in] M - The module being analyzed 284 /// \param [in,out] CurrentGroup - The overall outlined section 285 void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup); 286 287 /// Update the output mapping based on the load instruction, and the outputs 288 /// of the extracted function. 289 /// 290 /// \param Region - The region extracted 291 /// \param Outputs - The outputs from the extracted function. 292 /// \param LI - The load instruction used to update the mapping. 293 void updateOutputMapping(OutlinableRegion &Region, 294 ArrayRef<Value *> Outputs, LoadInst *LI); 295 296 /// Extract \p Region into its own function. 297 /// 298 /// \param [in] Region - The region to be extracted into its own function. 299 /// \returns True if it was successfully outlined. 300 bool extractSection(OutlinableRegion &Region); 301 302 /// For the similarities found, and the extracted sections, create a single 303 /// outlined function with appropriate output blocks as necessary. 304 /// 305 /// \param [in] M - The module to outline from 306 /// \param [in] CurrentGroup - The set of extracted sections to consolidate. 307 /// \param [in,out] FuncsToRemove - List of functions to remove from the 308 /// module after outlining is completed. 309 /// \param [in,out] OutlinedFunctionNum - the number of new outlined 310 /// functions. 311 void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup, 312 std::vector<Function *> &FuncsToRemove, 313 unsigned &OutlinedFunctionNum); 314 315 /// If true, enables us to outline from functions that have LinkOnceFromODR 316 /// linkages. 317 bool OutlineFromLinkODRs = false; 318 319 /// If false, we do not worry if the cost is greater than the benefit. This 320 /// is for debugging and testing, so that we can test small cases to ensure 321 /// that the outlining is being done correctly. 322 bool CostModel = true; 323 324 /// The set of outlined Instructions, identified by their location in the 325 /// sequential ordering of instructions in a Module. 326 DenseSet<unsigned> Outlined; 327 328 /// TargetTransformInfo lambda for target specific information. 329 function_ref<TargetTransformInfo &(Function &)> getTTI; 330 331 /// A mapping from newly created reloaded output values to the original value. 332 /// If an value is replace by an output from an outlined region, this maps 333 /// that Value, back to its original Value. 334 DenseMap<Value *, Value *> OutputMappings; 335 336 /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier. 337 function_ref<IRSimilarityIdentifier &(Module &)> getIRSI; 338 339 /// The optimization remark emitter for the pass. 340 function_ref<OptimizationRemarkEmitter &(Function &)> getORE; 341 342 /// The memory allocator used to allocate the CodeExtractors. 343 SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator; 344 345 /// The memory allocator used to allocate the OutlinableRegions. 346 SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator; 347 348 /// The memory allocator used to allocate new IRInstructionData. 349 SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator; 350 351 /// Custom InstVisitor to classify different instructions for whether it can 352 /// be analyzed for similarity. This is needed as there may be instruction we 353 /// can identify as having similarity, but are more complicated to outline. 354 struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> { 355 InstructionAllowed() = default; 356 visitBranchInstInstructionAllowed357 bool visitBranchInst(BranchInst &BI) { return EnableBranches; } visitPHINodeInstructionAllowed358 bool visitPHINode(PHINode &PN) { return EnableBranches; } 359 // TODO: Handle allocas. visitAllocaInstInstructionAllowed360 bool visitAllocaInst(AllocaInst &AI) { return false; } 361 // VAArg instructions are not allowed since this could cause difficulty when 362 // differentiating between different sets of variable instructions in 363 // the deduplicated outlined regions. visitVAArgInstInstructionAllowed364 bool visitVAArgInst(VAArgInst &VI) { return false; } 365 // We exclude all exception handling cases since they are so context 366 // dependent. visitLandingPadInstInstructionAllowed367 bool visitLandingPadInst(LandingPadInst &LPI) { return false; } visitFuncletPadInstInstructionAllowed368 bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; } 369 // DebugInfo should be included in the regions, but should not be 370 // analyzed for similarity as it has no bearing on the outcome of the 371 // program. visitDbgInfoIntrinsicInstructionAllowed372 bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } 373 // TODO: Handle specific intrinsics individually from those that can be 374 // handled. IntrinsicInstInstructionAllowed375 bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; } 376 // We only handle CallInsts that are not indirect, since we cannot guarantee 377 // that they have a name in these cases. visitCallInstInstructionAllowed378 bool visitCallInst(CallInst &CI) { 379 Function *F = CI.getCalledFunction(); 380 bool IsIndirectCall = CI.isIndirectCall(); 381 if (IsIndirectCall && !EnableIndirectCalls) 382 return false; 383 if (!F && !IsIndirectCall) 384 return false; 385 // Returning twice can cause issues with the state of the function call 386 // that were not expected when the function was used, so we do not include 387 // the call in outlined functions. 388 if (CI.canReturnTwice()) 389 return false; 390 // TODO: Update the outliner to capture whether the outlined function 391 // needs these extra attributes. 392 393 // Functions marked with the swifttailcc and tailcc calling conventions 394 // require special handling when outlining musttail functions. The 395 // calling convention must be passed down to the outlined function as 396 // well. Further, there is special handling for musttail calls as well, 397 // requiring a return call directly after. For now, the outliner does not 398 // support this. 399 bool IsTailCC = CI.getCallingConv() == CallingConv::SwiftTail || 400 CI.getCallingConv() == CallingConv::Tail; 401 if (IsTailCC && !EnableMustTailCalls) 402 return false; 403 if (CI.isMustTailCall() && !EnableMustTailCalls) 404 return false; 405 // The outliner can only handle musttail items if it is also accompanied 406 // by the tailcc or swifttailcc calling convention. 407 if (CI.isMustTailCall() && !IsTailCC) 408 return false; 409 return true; 410 } 411 // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside 412 // the outlined region, and then returned as an output, this will have to be 413 // handled differently. visitFreezeInstInstructionAllowed414 bool visitFreezeInst(FreezeInst &CI) { return false; } 415 // TODO: We do not current handle similarity that changes the control flow. visitInvokeInstInstructionAllowed416 bool visitInvokeInst(InvokeInst &II) { return false; } 417 // TODO: We do not current handle similarity that changes the control flow. visitCallBrInstInstructionAllowed418 bool visitCallBrInst(CallBrInst &CBI) { return false; } 419 // TODO: Handle interblock similarity. visitTerminatorInstructionAllowed420 bool visitTerminator(Instruction &I) { return false; } visitInstructionInstructionAllowed421 bool visitInstruction(Instruction &I) { return true; } 422 423 // The flag variable that marks whether we should allow branch instructions 424 // to be outlined. 425 bool EnableBranches = false; 426 427 // The flag variable that marks whether we should allow indirect calls 428 // to be outlined. 429 bool EnableIndirectCalls = true; 430 431 // The flag variable that marks whether we should allow intrinsics 432 // instructions to be outlined. 433 bool EnableIntrinsics = false; 434 435 // The flag variable that marks whether we should allow musttail calls. 436 bool EnableMustTailCalls = false; 437 }; 438 439 /// A InstVisitor used to exclude certain instructions from being outlined. 440 InstructionAllowed InstructionClassifier; 441 }; 442 443 /// Pass to outline similar regions. 444 class IROutlinerPass : public PassInfoMixin<IROutlinerPass> { 445 public: 446 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 447 }; 448 449 } // end namespace llvm 450 451 #endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H 452