1 //===- FunctionSpecialization.h - Function Specialization -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Overview: 10 // --------- 11 // Function Specialization is a transformation which propagates the constant 12 // parameters of a function call from the caller to the callee. It is part of 13 // the Inter-Procedural Sparse Conditional Constant Propagation (IPSCCP) pass. 14 // The transformation runs iteratively a number of times which is controlled 15 // by the option `funcspec-max-iters`. Running it multiple times is needed 16 // for specializing recursive functions, but also exposes new opportunities 17 // arising from specializations which return constant values or contain calls 18 // which can be specialized. 19 // 20 // Function Specialization supports propagating constant parameters like 21 // function pointers, literal constants and addresses of global variables. 22 // By propagating function pointers, indirect calls become direct calls. This 23 // exposes inlining opportunities which we would have otherwise missed. That's 24 // why function specialization is run before the inliner in the optimization 25 // pipeline; that is by design. 26 // 27 // Cost Model: 28 // ----------- 29 // The cost model facilitates a utility for estimating the specialization bonus 30 // from propagating a constant argument. This is the InstCostVisitor, a class 31 // that inherits from the InstVisitor. The bonus itself is expressed as codesize 32 // and latency savings. Codesize savings means the amount of code that becomes 33 // dead in the specialization from propagating the constant, whereas latency 34 // savings represents the cycles we are saving from replacing instructions with 35 // constant values. The InstCostVisitor overrides a set of `visit*` methods to 36 // be able to handle different types of instructions. These attempt to constant- 37 // fold the instruction in which case a constant is returned and propagated 38 // further. 39 // 40 // Function pointers are not handled by the InstCostVisitor. They are treated 41 // separately as they could expose inlining opportunities via indirect call 42 // promotion. The inlining bonus contributes to the total specialization score. 43 // 44 // For a specialization to be profitable its bonus needs to exceed a minimum 45 // threshold. There are three options for controlling the threshold which are 46 // expressed as percentages of the original function size: 47 // * funcspec-min-codesize-savings 48 // * funcspec-min-latency-savings 49 // * funcspec-min-inlining-bonus 50 // There's also an option for controlling the codesize growth from recursive 51 // specializations. That is `funcspec-max-codesize-growth`. 52 // 53 // Once we have all the potential specializations with their score we need to 54 // choose the best ones, which fit in the module specialization budget. That 55 // is controlled by the option `funcspec-max-clones`. To find the best `NSpec` 56 // specializations we use a max-heap. For more details refer to D139346. 57 // 58 // Ideas: 59 // ------ 60 // - With a function specialization attribute for arguments, we could have 61 // a direct way to steer function specialization, avoiding the cost-model, 62 // and thus control compile-times / code-size. 63 // 64 // - Perhaps a post-inlining function specialization pass could be more 65 // aggressive on literal constants. 66 // 67 // References: 68 // ----------- 69 // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable 70 // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q 71 // 72 //===----------------------------------------------------------------------===// 73 74 #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H 75 #define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H 76 77 #include "llvm/Analysis/BlockFrequencyInfo.h" 78 #include "llvm/Analysis/CodeMetrics.h" 79 #include "llvm/Analysis/InlineCost.h" 80 #include "llvm/Analysis/TargetTransformInfo.h" 81 #include "llvm/IR/InstVisitor.h" 82 #include "llvm/Transforms/Scalar/SCCP.h" 83 #include "llvm/Transforms/Utils/Cloning.h" 84 #include "llvm/Transforms/Utils/SCCPSolver.h" 85 #include "llvm/Transforms/Utils/SizeOpts.h" 86 87 namespace llvm { 88 // Map of potential specializations for each function. The FunctionSpecializer 89 // keeps the discovered specialisation opportunities for the module in a single 90 // vector, where the specialisations of each function form a contiguous range. 91 // This map's value is the beginning and the end of that range. 92 using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>; 93 94 // Just a shorter abbreviation to improve indentation. 95 using Cost = InstructionCost; 96 97 // Map of known constants found during the specialization bonus estimation. 98 using ConstMap = DenseMap<Value *, Constant *>; 99 100 // Specialization signature, used to uniquely designate a specialization within 101 // a function. 102 struct SpecSig { 103 // Hashing support, used to distinguish between ordinary, empty, or tombstone 104 // keys. 105 unsigned Key = 0; 106 SmallVector<ArgInfo, 4> Args; 107 108 bool operator==(const SpecSig &Other) const { 109 if (Key != Other.Key) 110 return false; 111 return Args == Other.Args; 112 } 113 114 friend hash_code hash_value(const SpecSig &S) { 115 return hash_combine(hash_value(S.Key), 116 hash_combine_range(S.Args.begin(), S.Args.end())); 117 } 118 }; 119 120 // Specialization instance. 121 struct Spec { 122 // Original function. 123 Function *F; 124 125 // Cloned function, a specialized version of the original one. 126 Function *Clone = nullptr; 127 128 // Specialization signature. 129 SpecSig Sig; 130 131 // Profitability of the specialization. 132 unsigned Score; 133 134 // List of call sites, matching this specialization. 135 SmallVector<CallBase *> CallSites; 136 137 Spec(Function *F, const SpecSig &S, unsigned Score) 138 : F(F), Sig(S), Score(Score) {} 139 Spec(Function *F, const SpecSig &&S, unsigned Score) 140 : F(F), Sig(S), Score(Score) {} 141 }; 142 143 struct Bonus { 144 unsigned CodeSize = 0; 145 unsigned Latency = 0; 146 147 Bonus() = default; 148 149 Bonus(Cost CodeSize, Cost Latency) { 150 int64_t Sz = *CodeSize.getValue(); 151 int64_t Ltc = *Latency.getValue(); 152 153 assert(Sz >= 0 && Ltc >= 0 && "CodeSize and Latency cannot be negative"); 154 // It is safe to down cast since we know the arguments 155 // cannot be negative and Cost is of type int64_t. 156 this->CodeSize = static_cast<unsigned>(Sz); 157 this->Latency = static_cast<unsigned>(Ltc); 158 } 159 160 Bonus &operator+=(const Bonus RHS) { 161 CodeSize += RHS.CodeSize; 162 Latency += RHS.Latency; 163 return *this; 164 } 165 166 Bonus operator+(const Bonus RHS) const { 167 return Bonus(CodeSize + RHS.CodeSize, Latency + RHS.Latency); 168 } 169 170 bool operator==(const Bonus RHS) const { 171 return CodeSize == RHS.CodeSize && Latency == RHS.Latency; 172 } 173 }; 174 175 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> { 176 const DataLayout &DL; 177 BlockFrequencyInfo &BFI; 178 TargetTransformInfo &TTI; 179 SCCPSolver &Solver; 180 181 ConstMap KnownConstants; 182 // Basic blocks known to be unreachable after constant propagation. 183 DenseSet<BasicBlock *> DeadBlocks; 184 // PHI nodes we have visited before. 185 DenseSet<Instruction *> VisitedPHIs; 186 // PHI nodes we have visited once without successfully constant folding them. 187 // Once the InstCostVisitor has processed all the specialization arguments, 188 // it should be possible to determine whether those PHIs can be folded 189 // (some of their incoming values may have become constant or dead). 190 SmallVector<Instruction *> PendingPHIs; 191 192 ConstMap::iterator LastVisited; 193 194 public: 195 InstCostVisitor(const DataLayout &DL, BlockFrequencyInfo &BFI, 196 TargetTransformInfo &TTI, SCCPSolver &Solver) 197 : DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {} 198 199 bool isBlockExecutable(BasicBlock *BB) { 200 return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB); 201 } 202 203 Bonus getSpecializationBonus(Argument *A, Constant *C); 204 205 Bonus getBonusFromPendingPHIs(); 206 207 private: 208 friend class InstVisitor<InstCostVisitor, Constant *>; 209 210 static bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ, 211 DenseSet<BasicBlock *> &DeadBlocks); 212 213 Bonus getUserBonus(Instruction *User, Value *Use = nullptr, 214 Constant *C = nullptr); 215 216 Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList); 217 Cost estimateSwitchInst(SwitchInst &I); 218 Cost estimateBranchInst(BranchInst &I); 219 220 // Transitively Incoming Values (TIV) is a set of Values that can "feed" a 221 // value to the initial PHI-node. It is defined like this: 222 // 223 // * the initial PHI-node belongs to TIV. 224 // 225 // * for every PHI-node in TIV, its operands belong to TIV 226 // 227 // If TIV for the initial PHI-node (P) contains more than one constant or a 228 // value that is not a PHI-node, then P cannot be folded to a constant. 229 // 230 // As soon as we detect these cases, we bail, without constructing the 231 // full TIV. 232 // Otherwise P can be folded to the one constant in TIV. 233 bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root, 234 DenseSet<PHINode *> &TransitivePHIs); 235 236 Constant *visitInstruction(Instruction &I) { return nullptr; } 237 Constant *visitPHINode(PHINode &I); 238 Constant *visitFreezeInst(FreezeInst &I); 239 Constant *visitCallBase(CallBase &I); 240 Constant *visitLoadInst(LoadInst &I); 241 Constant *visitGetElementPtrInst(GetElementPtrInst &I); 242 Constant *visitSelectInst(SelectInst &I); 243 Constant *visitCastInst(CastInst &I); 244 Constant *visitCmpInst(CmpInst &I); 245 Constant *visitUnaryOperator(UnaryOperator &I); 246 Constant *visitBinaryOperator(BinaryOperator &I); 247 }; 248 249 class FunctionSpecializer { 250 251 /// The IPSCCP Solver. 252 SCCPSolver &Solver; 253 254 Module &M; 255 256 /// Analysis manager, needed to invalidate analyses. 257 FunctionAnalysisManager *FAM; 258 259 /// Analyses used to help determine if a function should be specialized. 260 std::function<BlockFrequencyInfo &(Function &)> GetBFI; 261 std::function<const TargetLibraryInfo &(Function &)> GetTLI; 262 std::function<TargetTransformInfo &(Function &)> GetTTI; 263 std::function<AssumptionCache &(Function &)> GetAC; 264 265 SmallPtrSet<Function *, 32> Specializations; 266 SmallPtrSet<Function *, 32> FullySpecialized; 267 DenseMap<Function *, CodeMetrics> FunctionMetrics; 268 DenseMap<Function *, unsigned> FunctionGrowth; 269 unsigned NGlobals = 0; 270 271 public: 272 FunctionSpecializer( 273 SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM, 274 std::function<BlockFrequencyInfo &(Function &)> GetBFI, 275 std::function<const TargetLibraryInfo &(Function &)> GetTLI, 276 std::function<TargetTransformInfo &(Function &)> GetTTI, 277 std::function<AssumptionCache &(Function &)> GetAC) 278 : Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI), 279 GetTTI(GetTTI), GetAC(GetAC) {} 280 281 ~FunctionSpecializer(); 282 283 bool run(); 284 285 InstCostVisitor getInstCostVisitorFor(Function *F) { 286 auto &BFI = GetBFI(*F); 287 auto &TTI = GetTTI(*F); 288 return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver); 289 } 290 291 private: 292 Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call); 293 294 /// A constant stack value is an AllocaInst that has a single constant 295 /// value stored to it. Return this constant if such an alloca stack value 296 /// is a function argument. 297 Constant *getConstantStackValue(CallInst *Call, Value *Val); 298 299 /// See if there are any new constant values for the callers of \p F via 300 /// stack variables and promote them to global variables. 301 void promoteConstantStackValues(Function *F); 302 303 /// Clean up fully specialized functions. 304 void removeDeadFunctions(); 305 306 /// Remove any ssa_copy intrinsics that may have been introduced. 307 void cleanUpSSA(); 308 309 /// @brief Find potential specialization opportunities. 310 /// @param F Function to specialize 311 /// @param FuncSize Cost of specializing a function. 312 /// @param AllSpecs A vector to add potential specializations to. 313 /// @param SM A map for a function's specialisation range 314 /// @return True, if any potential specializations were found 315 bool findSpecializations(Function *F, unsigned FuncSize, 316 SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM); 317 318 /// Compute the inlining bonus for replacing argument \p A with constant \p C. 319 unsigned getInliningBonus(Argument *A, Constant *C); 320 321 bool isCandidateFunction(Function *F); 322 323 /// @brief Create a specialization of \p F and prime the SCCPSolver 324 /// @param F Function to specialize 325 /// @param S Which specialization to create 326 /// @return The new, cloned function 327 Function *createSpecialization(Function *F, const SpecSig &S); 328 329 /// Determine if it is possible to specialise the function for constant values 330 /// of the formal parameter \p A. 331 bool isArgumentInteresting(Argument *A); 332 333 /// Check if the value \p V (an actual argument) is a constant or can only 334 /// have a constant value. Return that constant. 335 Constant *getCandidateConstant(Value *V); 336 337 /// @brief Find and update calls to \p F, which match a specialization 338 /// @param F Orginal function 339 /// @param Begin Start of a range of possibly matching specialisations 340 /// @param End End of a range (exclusive) of possibly matching specialisations 341 void updateCallSites(Function *F, const Spec *Begin, const Spec *End); 342 }; 343 } // namespace llvm 344 345 #endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H 346