xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/GlobalMergeFunctions.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1*700637cbSDimitry Andric //===---- GlobalMergeFunctions.cpp - Global merge functions -------*- C++ -===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // This pass implements the global merge function pass.
10*700637cbSDimitry Andric //
11*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
12*700637cbSDimitry Andric 
13*700637cbSDimitry Andric #include "llvm/CodeGen/GlobalMergeFunctions.h"
14*700637cbSDimitry Andric #include "llvm/ADT/Statistic.h"
15*700637cbSDimitry Andric #include "llvm/Analysis/ModuleSummaryAnalysis.h"
16*700637cbSDimitry Andric #include "llvm/CGData/CodeGenData.h"
17*700637cbSDimitry Andric #include "llvm/CGData/CodeGenDataWriter.h"
18*700637cbSDimitry Andric #include "llvm/CodeGen/Passes.h"
19*700637cbSDimitry Andric #include "llvm/IR/IRBuilder.h"
20*700637cbSDimitry Andric #include "llvm/IR/StructuralHash.h"
21*700637cbSDimitry Andric #include "llvm/InitializePasses.h"
22*700637cbSDimitry Andric #include "llvm/Support/CommandLine.h"
23*700637cbSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h"
24*700637cbSDimitry Andric 
25*700637cbSDimitry Andric #define DEBUG_TYPE "global-merge-func"
26*700637cbSDimitry Andric 
27*700637cbSDimitry Andric using namespace llvm;
28*700637cbSDimitry Andric using namespace llvm::support;
29*700637cbSDimitry Andric 
30*700637cbSDimitry Andric static cl::opt<bool> DisableCGDataForMerging(
31*700637cbSDimitry Andric     "disable-cgdata-for-merging", cl::Hidden,
32*700637cbSDimitry Andric     cl::desc("Disable codegen data for function merging. Local "
33*700637cbSDimitry Andric              "merging is still enabled within a module."),
34*700637cbSDimitry Andric     cl::init(false));
35*700637cbSDimitry Andric 
36*700637cbSDimitry Andric STATISTIC(NumMergedFunctions,
37*700637cbSDimitry Andric           "Number of functions that are actually merged using function hash");
38*700637cbSDimitry Andric STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
39*700637cbSDimitry Andric STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
40*700637cbSDimitry Andric STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
41*700637cbSDimitry Andric 
42*700637cbSDimitry Andric /// Returns true if the \OpIdx operand of \p CI is the callee operand.
isCalleeOperand(const CallBase * CI,unsigned OpIdx)43*700637cbSDimitry Andric static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
44*700637cbSDimitry Andric   return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
45*700637cbSDimitry Andric }
46*700637cbSDimitry Andric 
canParameterizeCallOperand(const CallBase * CI,unsigned OpIdx)47*700637cbSDimitry Andric static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) {
48*700637cbSDimitry Andric   if (CI->isInlineAsm())
49*700637cbSDimitry Andric     return false;
50*700637cbSDimitry Andric   Function *Callee = CI->getCalledOperand()
51*700637cbSDimitry Andric                          ? dyn_cast_or_null<Function>(
52*700637cbSDimitry Andric                                CI->getCalledOperand()->stripPointerCasts())
53*700637cbSDimitry Andric                          : nullptr;
54*700637cbSDimitry Andric   if (Callee) {
55*700637cbSDimitry Andric     if (Callee->isIntrinsic())
56*700637cbSDimitry Andric       return false;
57*700637cbSDimitry Andric     auto Name = Callee->getName();
58*700637cbSDimitry Andric     // objc_msgSend stubs must be called, and can't have their address taken.
59*700637cbSDimitry Andric     if (Name.starts_with("objc_msgSend$"))
60*700637cbSDimitry Andric       return false;
61*700637cbSDimitry Andric     // Calls to dtrace probes must generate unique patchpoints.
62*700637cbSDimitry Andric     if (Name.starts_with("__dtrace"))
63*700637cbSDimitry Andric       return false;
64*700637cbSDimitry Andric   }
65*700637cbSDimitry Andric   if (isCalleeOperand(CI, OpIdx)) {
66*700637cbSDimitry Andric     // The operand is the callee and it has already been signed. Ignore this
67*700637cbSDimitry Andric     // because we cannot add another ptrauth bundle to the call instruction.
68*700637cbSDimitry Andric     if (CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value())
69*700637cbSDimitry Andric       return false;
70*700637cbSDimitry Andric   } else {
71*700637cbSDimitry Andric     // The target of the arc-attached call must be a constant and cannot be
72*700637cbSDimitry Andric     // parameterized.
73*700637cbSDimitry Andric     if (CI->isOperandBundleOfType(LLVMContext::OB_clang_arc_attachedcall,
74*700637cbSDimitry Andric                                   OpIdx))
75*700637cbSDimitry Andric       return false;
76*700637cbSDimitry Andric   }
77*700637cbSDimitry Andric   return true;
78*700637cbSDimitry Andric }
79*700637cbSDimitry Andric 
80*700637cbSDimitry Andric /// Returns true if function \p F is eligible for merging.
isEligibleFunction(Function * F)81*700637cbSDimitry Andric bool isEligibleFunction(Function *F) {
82*700637cbSDimitry Andric   if (F->isDeclaration())
83*700637cbSDimitry Andric     return false;
84*700637cbSDimitry Andric 
85*700637cbSDimitry Andric   if (F->hasFnAttribute(llvm::Attribute::NoMerge) ||
86*700637cbSDimitry Andric       F->hasFnAttribute(llvm::Attribute::AlwaysInline))
87*700637cbSDimitry Andric     return false;
88*700637cbSDimitry Andric 
89*700637cbSDimitry Andric   if (F->hasAvailableExternallyLinkage())
90*700637cbSDimitry Andric     return false;
91*700637cbSDimitry Andric 
92*700637cbSDimitry Andric   if (F->getFunctionType()->isVarArg())
93*700637cbSDimitry Andric     return false;
94*700637cbSDimitry Andric 
95*700637cbSDimitry Andric   if (F->getCallingConv() == CallingConv::SwiftTail)
96*700637cbSDimitry Andric     return false;
97*700637cbSDimitry Andric 
98*700637cbSDimitry Andric   // If function contains callsites with musttail, if we merge
99*700637cbSDimitry Andric   // it, the merged function will have the musttail callsite, but
100*700637cbSDimitry Andric   // the number of parameters can change, thus the parameter count
101*700637cbSDimitry Andric   // of the callsite will mismatch with the function itself.
102*700637cbSDimitry Andric   for (const BasicBlock &BB : *F) {
103*700637cbSDimitry Andric     for (const Instruction &I : BB) {
104*700637cbSDimitry Andric       const auto *CB = dyn_cast<CallBase>(&I);
105*700637cbSDimitry Andric       if (CB && CB->isMustTailCall())
106*700637cbSDimitry Andric         return false;
107*700637cbSDimitry Andric     }
108*700637cbSDimitry Andric   }
109*700637cbSDimitry Andric 
110*700637cbSDimitry Andric   return true;
111*700637cbSDimitry Andric }
112*700637cbSDimitry Andric 
isEligibleInstructionForConstantSharing(const Instruction * I)113*700637cbSDimitry Andric static bool isEligibleInstructionForConstantSharing(const Instruction *I) {
114*700637cbSDimitry Andric   switch (I->getOpcode()) {
115*700637cbSDimitry Andric   case Instruction::Load:
116*700637cbSDimitry Andric   case Instruction::Store:
117*700637cbSDimitry Andric   case Instruction::Call:
118*700637cbSDimitry Andric   case Instruction::Invoke:
119*700637cbSDimitry Andric     return true;
120*700637cbSDimitry Andric   default:
121*700637cbSDimitry Andric     return false;
122*700637cbSDimitry Andric   }
123*700637cbSDimitry Andric }
124*700637cbSDimitry Andric 
125*700637cbSDimitry Andric // This function takes an instruction, \p I, and an operand index, \p OpIdx.
126*700637cbSDimitry Andric // It returns true if the operand should be ignored in the hash computation.
127*700637cbSDimitry Andric // If \p OpIdx is out of range based on the other instruction context, it cannot
128*700637cbSDimitry Andric // be ignored.
ignoreOp(const Instruction * I,unsigned OpIdx)129*700637cbSDimitry Andric static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
130*700637cbSDimitry Andric   if (OpIdx >= I->getNumOperands())
131*700637cbSDimitry Andric     return false;
132*700637cbSDimitry Andric 
133*700637cbSDimitry Andric   if (!isEligibleInstructionForConstantSharing(I))
134*700637cbSDimitry Andric     return false;
135*700637cbSDimitry Andric 
136*700637cbSDimitry Andric   if (!isa<Constant>(I->getOperand(OpIdx)))
137*700637cbSDimitry Andric     return false;
138*700637cbSDimitry Andric 
139*700637cbSDimitry Andric   if (const auto *CI = dyn_cast<CallBase>(I))
140*700637cbSDimitry Andric     return canParameterizeCallOperand(CI, OpIdx);
141*700637cbSDimitry Andric 
142*700637cbSDimitry Andric   return true;
143*700637cbSDimitry Andric }
144*700637cbSDimitry Andric 
analyze(Module & M)145*700637cbSDimitry Andric void GlobalMergeFunc::analyze(Module &M) {
146*700637cbSDimitry Andric   ++NumAnalyzedModues;
147*700637cbSDimitry Andric   for (Function &Func : M) {
148*700637cbSDimitry Andric     ++NumAnalyzedFunctions;
149*700637cbSDimitry Andric     if (isEligibleFunction(&Func)) {
150*700637cbSDimitry Andric       ++NumEligibleFunctions;
151*700637cbSDimitry Andric 
152*700637cbSDimitry Andric       auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
153*700637cbSDimitry Andric 
154*700637cbSDimitry Andric       // Convert the operand map to a vector for a serialization-friendly
155*700637cbSDimitry Andric       // format.
156*700637cbSDimitry Andric       IndexOperandHashVecType IndexOperandHashes;
157*700637cbSDimitry Andric       for (auto &Pair : *FI.IndexOperandHashMap)
158*700637cbSDimitry Andric         IndexOperandHashes.emplace_back(Pair);
159*700637cbSDimitry Andric 
160*700637cbSDimitry Andric       StableFunction SF(FI.FunctionHash, get_stable_name(Func.getName()).str(),
161*700637cbSDimitry Andric                         M.getModuleIdentifier(), FI.IndexInstruction->size(),
162*700637cbSDimitry Andric                         std::move(IndexOperandHashes));
163*700637cbSDimitry Andric 
164*700637cbSDimitry Andric       LocalFunctionMap->insert(SF);
165*700637cbSDimitry Andric     }
166*700637cbSDimitry Andric   }
167*700637cbSDimitry Andric }
168*700637cbSDimitry Andric 
169*700637cbSDimitry Andric /// Tuple to hold function info to process merging.
170*700637cbSDimitry Andric struct FuncMergeInfo {
171*700637cbSDimitry Andric   StableFunctionMap::StableFunctionEntry *SF;
172*700637cbSDimitry Andric   Function *F;
173*700637cbSDimitry Andric   IndexInstrMap *IndexInstruction;
FuncMergeInfoFuncMergeInfo174*700637cbSDimitry Andric   FuncMergeInfo(StableFunctionMap::StableFunctionEntry *SF, Function *F,
175*700637cbSDimitry Andric                 IndexInstrMap *IndexInstruction)
176*700637cbSDimitry Andric       : SF(SF), F(F), IndexInstruction(std::move(IndexInstruction)) {}
177*700637cbSDimitry Andric };
178*700637cbSDimitry Andric 
179*700637cbSDimitry Andric // Given the func info, and the parameterized locations, create and return
180*700637cbSDimitry Andric // a new merged function by replacing the original constants with the new
181*700637cbSDimitry Andric // parameters.
createMergedFunction(FuncMergeInfo & FI,ArrayRef<Type * > ConstParamTypes,const ParamLocsVecTy & ParamLocsVec)182*700637cbSDimitry Andric static Function *createMergedFunction(FuncMergeInfo &FI,
183*700637cbSDimitry Andric                                       ArrayRef<Type *> ConstParamTypes,
184*700637cbSDimitry Andric                                       const ParamLocsVecTy &ParamLocsVec) {
185*700637cbSDimitry Andric   // Synthesize a new merged function name by appending ".Tgm" to the root
186*700637cbSDimitry Andric   // function's name.
187*700637cbSDimitry Andric   auto *MergedFunc = FI.F;
188*700637cbSDimitry Andric   std::string NewFunctionName =
189*700637cbSDimitry Andric       MergedFunc->getName().str() + GlobalMergeFunc::MergingInstanceSuffix;
190*700637cbSDimitry Andric   auto *M = MergedFunc->getParent();
191*700637cbSDimitry Andric   assert(!M->getFunction(NewFunctionName));
192*700637cbSDimitry Andric 
193*700637cbSDimitry Andric   FunctionType *OrigTy = MergedFunc->getFunctionType();
194*700637cbSDimitry Andric   // Get the original params' types.
195*700637cbSDimitry Andric   SmallVector<Type *> ParamTypes(OrigTy->param_begin(), OrigTy->param_end());
196*700637cbSDimitry Andric   // Append const parameter types that are passed in.
197*700637cbSDimitry Andric   ParamTypes.append(ConstParamTypes.begin(), ConstParamTypes.end());
198*700637cbSDimitry Andric   FunctionType *FuncType = FunctionType::get(OrigTy->getReturnType(),
199*700637cbSDimitry Andric                                              ParamTypes, /*isVarArg=*/false);
200*700637cbSDimitry Andric 
201*700637cbSDimitry Andric   // Declare a new function
202*700637cbSDimitry Andric   Function *NewFunction =
203*700637cbSDimitry Andric       Function::Create(FuncType, MergedFunc->getLinkage(), NewFunctionName);
204*700637cbSDimitry Andric   if (auto *SP = MergedFunc->getSubprogram())
205*700637cbSDimitry Andric     NewFunction->setSubprogram(SP);
206*700637cbSDimitry Andric   NewFunction->copyAttributesFrom(MergedFunc);
207*700637cbSDimitry Andric   NewFunction->setDLLStorageClass(GlobalValue::DefaultStorageClass);
208*700637cbSDimitry Andric 
209*700637cbSDimitry Andric   NewFunction->setLinkage(GlobalValue::InternalLinkage);
210*700637cbSDimitry Andric   NewFunction->addFnAttr(Attribute::NoInline);
211*700637cbSDimitry Andric 
212*700637cbSDimitry Andric   // Add the new function before the root function.
213*700637cbSDimitry Andric   M->getFunctionList().insert(MergedFunc->getIterator(), NewFunction);
214*700637cbSDimitry Andric 
215*700637cbSDimitry Andric   // Move the body of MergedFunc into the NewFunction.
216*700637cbSDimitry Andric   NewFunction->splice(NewFunction->begin(), MergedFunc);
217*700637cbSDimitry Andric 
218*700637cbSDimitry Andric   // Update the original args by the new args.
219*700637cbSDimitry Andric   auto NewArgIter = NewFunction->arg_begin();
220*700637cbSDimitry Andric   for (Argument &OrigArg : MergedFunc->args()) {
221*700637cbSDimitry Andric     Argument &NewArg = *NewArgIter++;
222*700637cbSDimitry Andric     OrigArg.replaceAllUsesWith(&NewArg);
223*700637cbSDimitry Andric   }
224*700637cbSDimitry Andric 
225*700637cbSDimitry Andric   // Replace the original Constants by the new args.
226*700637cbSDimitry Andric   unsigned NumOrigArgs = MergedFunc->arg_size();
227*700637cbSDimitry Andric   for (unsigned ParamIdx = 0; ParamIdx < ParamLocsVec.size(); ++ParamIdx) {
228*700637cbSDimitry Andric     Argument *NewArg = NewFunction->getArg(NumOrigArgs + ParamIdx);
229*700637cbSDimitry Andric     for (auto [InstIndex, OpndIndex] : ParamLocsVec[ParamIdx]) {
230*700637cbSDimitry Andric       auto *Inst = FI.IndexInstruction->lookup(InstIndex);
231*700637cbSDimitry Andric       auto *OrigC = Inst->getOperand(OpndIndex);
232*700637cbSDimitry Andric       if (OrigC->getType() != NewArg->getType()) {
233*700637cbSDimitry Andric         IRBuilder<> Builder(Inst->getParent(), Inst->getIterator());
234*700637cbSDimitry Andric         Inst->setOperand(OpndIndex,
235*700637cbSDimitry Andric                          Builder.CreateAggregateCast(NewArg, OrigC->getType()));
236*700637cbSDimitry Andric       } else {
237*700637cbSDimitry Andric         Inst->setOperand(OpndIndex, NewArg);
238*700637cbSDimitry Andric       }
239*700637cbSDimitry Andric     }
240*700637cbSDimitry Andric   }
241*700637cbSDimitry Andric 
242*700637cbSDimitry Andric   return NewFunction;
243*700637cbSDimitry Andric }
244*700637cbSDimitry Andric 
245*700637cbSDimitry Andric // Given the original function (Thunk) and the merged function (ToFunc), create
246*700637cbSDimitry Andric // a thunk to the merged function.
createThunk(FuncMergeInfo & FI,ArrayRef<Constant * > Params,Function * ToFunc)247*700637cbSDimitry Andric static void createThunk(FuncMergeInfo &FI, ArrayRef<Constant *> Params,
248*700637cbSDimitry Andric                         Function *ToFunc) {
249*700637cbSDimitry Andric   auto *Thunk = FI.F;
250*700637cbSDimitry Andric 
251*700637cbSDimitry Andric   assert(Thunk->arg_size() + Params.size() ==
252*700637cbSDimitry Andric          ToFunc->getFunctionType()->getNumParams());
253*700637cbSDimitry Andric   Thunk->dropAllReferences();
254*700637cbSDimitry Andric 
255*700637cbSDimitry Andric   BasicBlock *BB = BasicBlock::Create(Thunk->getContext(), "", Thunk);
256*700637cbSDimitry Andric   IRBuilder<> Builder(BB);
257*700637cbSDimitry Andric 
258*700637cbSDimitry Andric   SmallVector<Value *> Args;
259*700637cbSDimitry Andric   unsigned ParamIdx = 0;
260*700637cbSDimitry Andric   FunctionType *ToFuncTy = ToFunc->getFunctionType();
261*700637cbSDimitry Andric 
262*700637cbSDimitry Andric   // Add arguments which are passed through Thunk.
263*700637cbSDimitry Andric   for (Argument &AI : Thunk->args()) {
264*700637cbSDimitry Andric     Args.push_back(
265*700637cbSDimitry Andric         Builder.CreateAggregateCast(&AI, ToFuncTy->getParamType(ParamIdx)));
266*700637cbSDimitry Andric     ++ParamIdx;
267*700637cbSDimitry Andric   }
268*700637cbSDimitry Andric 
269*700637cbSDimitry Andric   // Add new arguments defined by Params.
270*700637cbSDimitry Andric   for (auto *Param : Params) {
271*700637cbSDimitry Andric     assert(ParamIdx < ToFuncTy->getNumParams());
272*700637cbSDimitry Andric     Args.push_back(
273*700637cbSDimitry Andric         Builder.CreateAggregateCast(Param, ToFuncTy->getParamType(ParamIdx)));
274*700637cbSDimitry Andric     ++ParamIdx;
275*700637cbSDimitry Andric   }
276*700637cbSDimitry Andric 
277*700637cbSDimitry Andric   CallInst *CI = Builder.CreateCall(ToFunc, Args);
278*700637cbSDimitry Andric   bool isSwiftTailCall = ToFunc->getCallingConv() == CallingConv::SwiftTail &&
279*700637cbSDimitry Andric                          Thunk->getCallingConv() == CallingConv::SwiftTail;
280*700637cbSDimitry Andric   CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail
281*700637cbSDimitry Andric                                       : llvm::CallInst::TCK_Tail);
282*700637cbSDimitry Andric   CI->setCallingConv(ToFunc->getCallingConv());
283*700637cbSDimitry Andric   CI->setAttributes(ToFunc->getAttributes());
284*700637cbSDimitry Andric   if (Thunk->getReturnType()->isVoidTy())
285*700637cbSDimitry Andric     Builder.CreateRetVoid();
286*700637cbSDimitry Andric   else
287*700637cbSDimitry Andric     Builder.CreateRet(Builder.CreateAggregateCast(CI, Thunk->getReturnType()));
288*700637cbSDimitry Andric }
289*700637cbSDimitry Andric 
290*700637cbSDimitry Andric // Check if the old merged/optimized IndexOperandHashMap is compatible with
291*700637cbSDimitry Andric // the current IndexOperandHashMap. An operand hash may not be stable across
292*700637cbSDimitry Andric // different builds due to varying modules combined. To address this, we relax
293*700637cbSDimitry Andric // the hash check condition by comparing Const hash patterns instead of absolute
294*700637cbSDimitry Andric // hash values. For example, let's assume we have three Consts located at idx1,
295*700637cbSDimitry Andric // idx3, and idx6, where their corresponding hashes are hash1, hash2, and hash1
296*700637cbSDimitry Andric // in the old merged map below:
297*700637cbSDimitry Andric //   Old (Merged): [(idx1, hash1), (idx3, hash2), (idx6, hash1)]
298*700637cbSDimitry Andric //   Current: [(idx1, hash1'), (idx3, hash2'), (idx6, hash1')]
299*700637cbSDimitry Andric // If the current function also has three Consts in the same locations,
300*700637cbSDimitry Andric // with hash sequences hash1', hash2', and hash1' where the first and third
301*700637cbSDimitry Andric // are the same as the old hash sequences, we consider them matched.
checkConstHashCompatible(const DenseMap<IndexPair,stable_hash> & OldInstOpndIndexToConstHash,const DenseMap<IndexPair,stable_hash> & CurrInstOpndIndexToConstHash)302*700637cbSDimitry Andric static bool checkConstHashCompatible(
303*700637cbSDimitry Andric     const DenseMap<IndexPair, stable_hash> &OldInstOpndIndexToConstHash,
304*700637cbSDimitry Andric     const DenseMap<IndexPair, stable_hash> &CurrInstOpndIndexToConstHash) {
305*700637cbSDimitry Andric 
306*700637cbSDimitry Andric   DenseMap<stable_hash, stable_hash> OldHashToCurrHash;
307*700637cbSDimitry Andric   for (const auto &[Index, OldHash] : OldInstOpndIndexToConstHash) {
308*700637cbSDimitry Andric     auto It = CurrInstOpndIndexToConstHash.find(Index);
309*700637cbSDimitry Andric     if (It == CurrInstOpndIndexToConstHash.end())
310*700637cbSDimitry Andric       return false;
311*700637cbSDimitry Andric 
312*700637cbSDimitry Andric     auto CurrHash = It->second;
313*700637cbSDimitry Andric     auto J = OldHashToCurrHash.find(OldHash);
314*700637cbSDimitry Andric     if (J == OldHashToCurrHash.end())
315*700637cbSDimitry Andric       OldHashToCurrHash.insert({OldHash, CurrHash});
316*700637cbSDimitry Andric     else if (J->second != CurrHash)
317*700637cbSDimitry Andric       return false;
318*700637cbSDimitry Andric   }
319*700637cbSDimitry Andric 
320*700637cbSDimitry Andric   return true;
321*700637cbSDimitry Andric }
322*700637cbSDimitry Andric 
323*700637cbSDimitry Andric // Validate the locations pointed by a param has the same hash and Constant.
324*700637cbSDimitry Andric static bool
checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry & SF,const IndexInstrMap & IndexInstruction,const ParamLocsVecTy & ParamLocsVec)325*700637cbSDimitry Andric checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry &SF,
326*700637cbSDimitry Andric                              const IndexInstrMap &IndexInstruction,
327*700637cbSDimitry Andric                              const ParamLocsVecTy &ParamLocsVec) {
328*700637cbSDimitry Andric   for (auto &ParamLocs : ParamLocsVec) {
329*700637cbSDimitry Andric     std::optional<stable_hash> OldHash;
330*700637cbSDimitry Andric     std::optional<Constant *> OldConst;
331*700637cbSDimitry Andric     for (auto &Loc : ParamLocs) {
332*700637cbSDimitry Andric       assert(SF.IndexOperandHashMap->count(Loc));
333*700637cbSDimitry Andric       auto CurrHash = SF.IndexOperandHashMap->at(Loc);
334*700637cbSDimitry Andric       auto [InstIndex, OpndIndex] = Loc;
335*700637cbSDimitry Andric       assert(InstIndex < IndexInstruction.size());
336*700637cbSDimitry Andric       const auto *Inst = IndexInstruction.lookup(InstIndex);
337*700637cbSDimitry Andric       auto *CurrConst = cast<Constant>(Inst->getOperand(OpndIndex));
338*700637cbSDimitry Andric       if (!OldHash) {
339*700637cbSDimitry Andric         OldHash = CurrHash;
340*700637cbSDimitry Andric         OldConst = CurrConst;
341*700637cbSDimitry Andric       } else if (CurrConst != *OldConst || CurrHash != *OldHash) {
342*700637cbSDimitry Andric         return false;
343*700637cbSDimitry Andric       }
344*700637cbSDimitry Andric     }
345*700637cbSDimitry Andric   }
346*700637cbSDimitry Andric   return true;
347*700637cbSDimitry Andric }
348*700637cbSDimitry Andric 
computeParamInfo(const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> & SFS)349*700637cbSDimitry Andric static ParamLocsVecTy computeParamInfo(
350*700637cbSDimitry Andric     const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
351*700637cbSDimitry Andric         &SFS) {
352*700637cbSDimitry Andric   std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs;
353*700637cbSDimitry Andric   auto &RSF = *SFS[0];
354*700637cbSDimitry Andric   unsigned StableFunctionCount = SFS.size();
355*700637cbSDimitry Andric 
356*700637cbSDimitry Andric   for (auto &[IndexPair, Hash] : *RSF.IndexOperandHashMap) {
357*700637cbSDimitry Andric     // Const hash sequence across stable functions.
358*700637cbSDimitry Andric     // We will allocate a parameter per unique hash squence.
359*700637cbSDimitry Andric     // can't use SmallVector as key
360*700637cbSDimitry Andric     std::vector<stable_hash> ConstHashSeq;
361*700637cbSDimitry Andric     ConstHashSeq.push_back(Hash);
362*700637cbSDimitry Andric     bool Identical = true;
363*700637cbSDimitry Andric     for (unsigned J = 1; J < StableFunctionCount; ++J) {
364*700637cbSDimitry Andric       auto &SF = SFS[J];
365*700637cbSDimitry Andric       auto SHash = SF->IndexOperandHashMap->at(IndexPair);
366*700637cbSDimitry Andric       if (Hash != SHash)
367*700637cbSDimitry Andric         Identical = false;
368*700637cbSDimitry Andric       ConstHashSeq.push_back(SHash);
369*700637cbSDimitry Andric     }
370*700637cbSDimitry Andric 
371*700637cbSDimitry Andric     if (Identical)
372*700637cbSDimitry Andric       continue;
373*700637cbSDimitry Andric 
374*700637cbSDimitry Andric     // For each unique Const hash sequence (parameter), add the locations.
375*700637cbSDimitry Andric     HashSeqToLocs[ConstHashSeq].push_back(IndexPair);
376*700637cbSDimitry Andric   }
377*700637cbSDimitry Andric 
378*700637cbSDimitry Andric   ParamLocsVecTy ParamLocsVec;
379*700637cbSDimitry Andric   for (auto &[HashSeq, Locs] : HashSeqToLocs)
380*700637cbSDimitry Andric     ParamLocsVec.push_back(std::move(Locs));
381*700637cbSDimitry Andric 
382*700637cbSDimitry Andric   llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) {
383*700637cbSDimitry Andric     return L[0] < R[0];
384*700637cbSDimitry Andric   });
385*700637cbSDimitry Andric 
386*700637cbSDimitry Andric   return ParamLocsVec;
387*700637cbSDimitry Andric }
388*700637cbSDimitry Andric 
merge(Module & M,const StableFunctionMap * FunctionMap)389*700637cbSDimitry Andric bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
390*700637cbSDimitry Andric   bool Changed = false;
391*700637cbSDimitry Andric 
392*700637cbSDimitry Andric   // Collect stable functions related to the current module.
393*700637cbSDimitry Andric   DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>>
394*700637cbSDimitry Andric       HashToFuncs;
395*700637cbSDimitry Andric   auto &Maps = FunctionMap->getFunctionMap();
396*700637cbSDimitry Andric   for (auto &F : M) {
397*700637cbSDimitry Andric     if (!isEligibleFunction(&F))
398*700637cbSDimitry Andric       continue;
399*700637cbSDimitry Andric     auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
400*700637cbSDimitry Andric     if (Maps.contains(FI.FunctionHash))
401*700637cbSDimitry Andric       HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI));
402*700637cbSDimitry Andric   }
403*700637cbSDimitry Andric 
404*700637cbSDimitry Andric   for (auto &[Hash, Funcs] : HashToFuncs) {
405*700637cbSDimitry Andric     std::optional<ParamLocsVecTy> ParamLocsVec;
406*700637cbSDimitry Andric     SmallVector<FuncMergeInfo> FuncMergeInfos;
407*700637cbSDimitry Andric     auto &SFS = Maps.at(Hash);
408*700637cbSDimitry Andric     assert(!SFS.empty());
409*700637cbSDimitry Andric     auto &RFS = SFS[0];
410*700637cbSDimitry Andric 
411*700637cbSDimitry Andric     // Iterate functions with the same hash.
412*700637cbSDimitry Andric     for (auto &[F, FI] : Funcs) {
413*700637cbSDimitry Andric       // Check if the function is compatible with any stable function
414*700637cbSDimitry Andric       // in terms of the number of instructions and ignored operands.
415*700637cbSDimitry Andric       if (RFS->InstCount != FI.IndexInstruction->size())
416*700637cbSDimitry Andric         continue;
417*700637cbSDimitry Andric 
418*700637cbSDimitry Andric       auto hasValidSharedConst = [&](StableFunctionMap::StableFunctionEntry *SF,
419*700637cbSDimitry Andric                                      FunctionHashInfo &FHI) {
420*700637cbSDimitry Andric         for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
421*700637cbSDimitry Andric           auto [InstIndex, OpndIndex] = Index;
422*700637cbSDimitry Andric           assert(InstIndex < FHI.IndexInstruction->size());
423*700637cbSDimitry Andric           auto *Inst = FHI.IndexInstruction->lookup(InstIndex);
424*700637cbSDimitry Andric           if (!ignoreOp(Inst, OpndIndex))
425*700637cbSDimitry Andric             return false;
426*700637cbSDimitry Andric         }
427*700637cbSDimitry Andric         return true;
428*700637cbSDimitry Andric       };
429*700637cbSDimitry Andric       if (!hasValidSharedConst(RFS.get(), FI))
430*700637cbSDimitry Andric         continue;
431*700637cbSDimitry Andric 
432*700637cbSDimitry Andric       for (auto &SF : SFS) {
433*700637cbSDimitry Andric         assert(SF->InstCount == FI.IndexInstruction->size());
434*700637cbSDimitry Andric         assert(hasValidSharedConst(SF.get(), FI));
435*700637cbSDimitry Andric         // Check if there is any stable function that is compatiable with the
436*700637cbSDimitry Andric         // current one.
437*700637cbSDimitry Andric         if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
438*700637cbSDimitry Andric                                       *FI.IndexOperandHashMap))
439*700637cbSDimitry Andric           continue;
440*700637cbSDimitry Andric         if (!ParamLocsVec.has_value()) {
441*700637cbSDimitry Andric           ParamLocsVec = computeParamInfo(SFS);
442*700637cbSDimitry Andric           LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
443*700637cbSDimitry Andric                             << " with Params " << ParamLocsVec->size() << "\n");
444*700637cbSDimitry Andric         }
445*700637cbSDimitry Andric         if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
446*700637cbSDimitry Andric                                           *ParamLocsVec))
447*700637cbSDimitry Andric           continue;
448*700637cbSDimitry Andric 
449*700637cbSDimitry Andric         // If a stable function matching the current one is found,
450*700637cbSDimitry Andric         // create a candidate for merging and proceed to the next function.
451*700637cbSDimitry Andric         FuncMergeInfos.emplace_back(SF.get(), F, FI.IndexInstruction.get());
452*700637cbSDimitry Andric         break;
453*700637cbSDimitry Andric       }
454*700637cbSDimitry Andric     }
455*700637cbSDimitry Andric     unsigned FuncMergeInfoSize = FuncMergeInfos.size();
456*700637cbSDimitry Andric     if (FuncMergeInfoSize == 0)
457*700637cbSDimitry Andric       continue;
458*700637cbSDimitry Andric 
459*700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging function count "
460*700637cbSDimitry Andric                       << FuncMergeInfoSize << " for hash:  " << Hash << "\n");
461*700637cbSDimitry Andric 
462*700637cbSDimitry Andric     for (auto &FMI : FuncMergeInfos) {
463*700637cbSDimitry Andric       Changed = true;
464*700637cbSDimitry Andric 
465*700637cbSDimitry Andric       // We've already validated all locations of constant operands pointed by
466*700637cbSDimitry Andric       // the parameters. Populate parameters pointing to the original constants.
467*700637cbSDimitry Andric       SmallVector<Constant *> Params;
468*700637cbSDimitry Andric       SmallVector<Type *> ParamTypes;
469*700637cbSDimitry Andric       for (auto &ParamLocs : *ParamLocsVec) {
470*700637cbSDimitry Andric         assert(!ParamLocs.empty());
471*700637cbSDimitry Andric         auto &[InstIndex, OpndIndex] = ParamLocs[0];
472*700637cbSDimitry Andric         auto *Inst = FMI.IndexInstruction->lookup(InstIndex);
473*700637cbSDimitry Andric         auto *Opnd = cast<Constant>(Inst->getOperand(OpndIndex));
474*700637cbSDimitry Andric         Params.push_back(Opnd);
475*700637cbSDimitry Andric         ParamTypes.push_back(Opnd->getType());
476*700637cbSDimitry Andric       }
477*700637cbSDimitry Andric 
478*700637cbSDimitry Andric       // Create a merged function derived from the current function.
479*700637cbSDimitry Andric       Function *MergedFunc =
480*700637cbSDimitry Andric           createMergedFunction(FMI, ParamTypes, *ParamLocsVec);
481*700637cbSDimitry Andric 
482*700637cbSDimitry Andric       LLVM_DEBUG({
483*700637cbSDimitry Andric         dbgs() << "[GlobalMergeFunc] Merged function (hash:" << FMI.SF->Hash
484*700637cbSDimitry Andric                << ") " << MergedFunc->getName() << " generated from "
485*700637cbSDimitry Andric                << FMI.F->getName() << ":\n";
486*700637cbSDimitry Andric         MergedFunc->dump();
487*700637cbSDimitry Andric       });
488*700637cbSDimitry Andric 
489*700637cbSDimitry Andric       // Transform the current function into a thunk that calls the merged
490*700637cbSDimitry Andric       // function.
491*700637cbSDimitry Andric       createThunk(FMI, Params, MergedFunc);
492*700637cbSDimitry Andric       LLVM_DEBUG({
493*700637cbSDimitry Andric         dbgs() << "[GlobalMergeFunc] Thunk generated: \n";
494*700637cbSDimitry Andric         FMI.F->dump();
495*700637cbSDimitry Andric       });
496*700637cbSDimitry Andric       ++NumMergedFunctions;
497*700637cbSDimitry Andric     }
498*700637cbSDimitry Andric   }
499*700637cbSDimitry Andric 
500*700637cbSDimitry Andric   return Changed;
501*700637cbSDimitry Andric }
502*700637cbSDimitry Andric 
initializeMergerMode(const Module & M)503*700637cbSDimitry Andric void GlobalMergeFunc::initializeMergerMode(const Module &M) {
504*700637cbSDimitry Andric   // Initialize the local function map regardless of the merger mode.
505*700637cbSDimitry Andric   LocalFunctionMap = std::make_unique<StableFunctionMap>();
506*700637cbSDimitry Andric 
507*700637cbSDimitry Andric   // Disable codegen data for merging. The local merge is still enabled.
508*700637cbSDimitry Andric   if (DisableCGDataForMerging)
509*700637cbSDimitry Andric     return;
510*700637cbSDimitry Andric 
511*700637cbSDimitry Andric   // (Full)LTO module does not have functions added to the index.
512*700637cbSDimitry Andric   // In this case, we run a local merger without using codegen data.
513*700637cbSDimitry Andric   if (Index && !Index->hasExportedFunctions(M))
514*700637cbSDimitry Andric     return;
515*700637cbSDimitry Andric 
516*700637cbSDimitry Andric   if (cgdata::emitCGData())
517*700637cbSDimitry Andric     MergerMode = HashFunctionMode::BuildingHashFuncion;
518*700637cbSDimitry Andric   else if (cgdata::hasStableFunctionMap())
519*700637cbSDimitry Andric     MergerMode = HashFunctionMode::UsingHashFunction;
520*700637cbSDimitry Andric }
521*700637cbSDimitry Andric 
emitFunctionMap(Module & M)522*700637cbSDimitry Andric void GlobalMergeFunc::emitFunctionMap(Module &M) {
523*700637cbSDimitry Andric   LLVM_DEBUG(dbgs() << "Emit function map. Size: " << LocalFunctionMap->size()
524*700637cbSDimitry Andric                     << "\n");
525*700637cbSDimitry Andric   // No need to emit the function map if it is empty.
526*700637cbSDimitry Andric   if (LocalFunctionMap->empty())
527*700637cbSDimitry Andric     return;
528*700637cbSDimitry Andric   SmallVector<char> Buf;
529*700637cbSDimitry Andric   raw_svector_ostream OS(Buf);
530*700637cbSDimitry Andric 
531*700637cbSDimitry Andric   std::vector<CGDataPatchItem> PatchItems;
532*700637cbSDimitry Andric   StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get(), PatchItems);
533*700637cbSDimitry Andric   CGDataOStream COS(OS);
534*700637cbSDimitry Andric   COS.patch(PatchItems);
535*700637cbSDimitry Andric 
536*700637cbSDimitry Andric   std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
537*700637cbSDimitry Andric       OS.str(), "in-memory stable function map", false);
538*700637cbSDimitry Andric 
539*700637cbSDimitry Andric   Triple TT(M.getTargetTriple());
540*700637cbSDimitry Andric   embedBufferInModule(M, *Buffer,
541*700637cbSDimitry Andric                       getCodeGenDataSectionName(CG_merge, TT.getObjectFormat()),
542*700637cbSDimitry Andric                       Align(4));
543*700637cbSDimitry Andric }
544*700637cbSDimitry Andric 
run(Module & M)545*700637cbSDimitry Andric bool GlobalMergeFunc::run(Module &M) {
546*700637cbSDimitry Andric   initializeMergerMode(M);
547*700637cbSDimitry Andric 
548*700637cbSDimitry Andric   const StableFunctionMap *FuncMap;
549*700637cbSDimitry Andric   if (MergerMode == HashFunctionMode::UsingHashFunction) {
550*700637cbSDimitry Andric     // Use the prior CG data to optimistically create global merge candidates.
551*700637cbSDimitry Andric     FuncMap = cgdata::getStableFunctionMap();
552*700637cbSDimitry Andric   } else {
553*700637cbSDimitry Andric     analyze(M);
554*700637cbSDimitry Andric     // Emit the local function map to the custom section, __llvm_merge before
555*700637cbSDimitry Andric     // finalizing it.
556*700637cbSDimitry Andric     if (MergerMode == HashFunctionMode::BuildingHashFuncion)
557*700637cbSDimitry Andric       emitFunctionMap(M);
558*700637cbSDimitry Andric     LocalFunctionMap->finalize();
559*700637cbSDimitry Andric     FuncMap = LocalFunctionMap.get();
560*700637cbSDimitry Andric   }
561*700637cbSDimitry Andric 
562*700637cbSDimitry Andric   return merge(M, FuncMap);
563*700637cbSDimitry Andric }
564*700637cbSDimitry Andric 
565*700637cbSDimitry Andric namespace {
566*700637cbSDimitry Andric 
567*700637cbSDimitry Andric class GlobalMergeFuncPassWrapper : public ModulePass {
568*700637cbSDimitry Andric 
569*700637cbSDimitry Andric public:
570*700637cbSDimitry Andric   static char ID;
571*700637cbSDimitry Andric 
572*700637cbSDimitry Andric   GlobalMergeFuncPassWrapper();
573*700637cbSDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const574*700637cbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
575*700637cbSDimitry Andric     AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
576*700637cbSDimitry Andric     AU.setPreservesAll();
577*700637cbSDimitry Andric     ModulePass::getAnalysisUsage(AU);
578*700637cbSDimitry Andric   }
579*700637cbSDimitry Andric 
getPassName() const580*700637cbSDimitry Andric   StringRef getPassName() const override { return "Global Merge Functions"; }
581*700637cbSDimitry Andric 
582*700637cbSDimitry Andric   bool runOnModule(Module &M) override;
583*700637cbSDimitry Andric };
584*700637cbSDimitry Andric 
585*700637cbSDimitry Andric } // namespace
586*700637cbSDimitry Andric 
587*700637cbSDimitry Andric char GlobalMergeFuncPassWrapper::ID = 0;
588*700637cbSDimitry Andric INITIALIZE_PASS_BEGIN(GlobalMergeFuncPassWrapper, "global-merge-func",
589*700637cbSDimitry Andric                       "Global merge function pass", false, false)
590*700637cbSDimitry Andric INITIALIZE_PASS_END(GlobalMergeFuncPassWrapper, "global-merge-func",
591*700637cbSDimitry Andric                     "Global merge function pass", false, false)
592*700637cbSDimitry Andric 
593*700637cbSDimitry Andric namespace llvm {
createGlobalMergeFuncPass()594*700637cbSDimitry Andric ModulePass *createGlobalMergeFuncPass() {
595*700637cbSDimitry Andric   return new GlobalMergeFuncPassWrapper();
596*700637cbSDimitry Andric }
597*700637cbSDimitry Andric } // namespace llvm
598*700637cbSDimitry Andric 
GlobalMergeFuncPassWrapper()599*700637cbSDimitry Andric GlobalMergeFuncPassWrapper::GlobalMergeFuncPassWrapper() : ModulePass(ID) {
600*700637cbSDimitry Andric   initializeGlobalMergeFuncPassWrapperPass(
601*700637cbSDimitry Andric       *llvm::PassRegistry::getPassRegistry());
602*700637cbSDimitry Andric }
603*700637cbSDimitry Andric 
runOnModule(Module & M)604*700637cbSDimitry Andric bool GlobalMergeFuncPassWrapper::runOnModule(Module &M) {
605*700637cbSDimitry Andric   const ModuleSummaryIndex *Index = nullptr;
606*700637cbSDimitry Andric   if (auto *IndexWrapperPass =
607*700637cbSDimitry Andric           getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>())
608*700637cbSDimitry Andric     Index = IndexWrapperPass->getIndex();
609*700637cbSDimitry Andric 
610*700637cbSDimitry Andric   return GlobalMergeFunc(Index).run(M);
611*700637cbSDimitry Andric }
612*700637cbSDimitry Andric 
run(Module & M,AnalysisManager<Module> & AM)613*700637cbSDimitry Andric PreservedAnalyses GlobalMergeFuncPass::run(Module &M,
614*700637cbSDimitry Andric                                            AnalysisManager<Module> &AM) {
615*700637cbSDimitry Andric   bool Changed = GlobalMergeFunc(ImportSummary).run(M);
616*700637cbSDimitry Andric   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
617*700637cbSDimitry Andric }
618