1*700637cbSDimitry Andric //===---- GlobalMergeFunctions.cpp - Global merge functions -------*- C++ -===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // This pass implements the global merge function pass.
10*700637cbSDimitry Andric //
11*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
12*700637cbSDimitry Andric
13*700637cbSDimitry Andric #include "llvm/CodeGen/GlobalMergeFunctions.h"
14*700637cbSDimitry Andric #include "llvm/ADT/Statistic.h"
15*700637cbSDimitry Andric #include "llvm/Analysis/ModuleSummaryAnalysis.h"
16*700637cbSDimitry Andric #include "llvm/CGData/CodeGenData.h"
17*700637cbSDimitry Andric #include "llvm/CGData/CodeGenDataWriter.h"
18*700637cbSDimitry Andric #include "llvm/CodeGen/Passes.h"
19*700637cbSDimitry Andric #include "llvm/IR/IRBuilder.h"
20*700637cbSDimitry Andric #include "llvm/IR/StructuralHash.h"
21*700637cbSDimitry Andric #include "llvm/InitializePasses.h"
22*700637cbSDimitry Andric #include "llvm/Support/CommandLine.h"
23*700637cbSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h"
24*700637cbSDimitry Andric
25*700637cbSDimitry Andric #define DEBUG_TYPE "global-merge-func"
26*700637cbSDimitry Andric
27*700637cbSDimitry Andric using namespace llvm;
28*700637cbSDimitry Andric using namespace llvm::support;
29*700637cbSDimitry Andric
30*700637cbSDimitry Andric static cl::opt<bool> DisableCGDataForMerging(
31*700637cbSDimitry Andric "disable-cgdata-for-merging", cl::Hidden,
32*700637cbSDimitry Andric cl::desc("Disable codegen data for function merging. Local "
33*700637cbSDimitry Andric "merging is still enabled within a module."),
34*700637cbSDimitry Andric cl::init(false));
35*700637cbSDimitry Andric
36*700637cbSDimitry Andric STATISTIC(NumMergedFunctions,
37*700637cbSDimitry Andric "Number of functions that are actually merged using function hash");
38*700637cbSDimitry Andric STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
39*700637cbSDimitry Andric STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
40*700637cbSDimitry Andric STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
41*700637cbSDimitry Andric
42*700637cbSDimitry Andric /// Returns true if the \OpIdx operand of \p CI is the callee operand.
isCalleeOperand(const CallBase * CI,unsigned OpIdx)43*700637cbSDimitry Andric static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
44*700637cbSDimitry Andric return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
45*700637cbSDimitry Andric }
46*700637cbSDimitry Andric
canParameterizeCallOperand(const CallBase * CI,unsigned OpIdx)47*700637cbSDimitry Andric static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) {
48*700637cbSDimitry Andric if (CI->isInlineAsm())
49*700637cbSDimitry Andric return false;
50*700637cbSDimitry Andric Function *Callee = CI->getCalledOperand()
51*700637cbSDimitry Andric ? dyn_cast_or_null<Function>(
52*700637cbSDimitry Andric CI->getCalledOperand()->stripPointerCasts())
53*700637cbSDimitry Andric : nullptr;
54*700637cbSDimitry Andric if (Callee) {
55*700637cbSDimitry Andric if (Callee->isIntrinsic())
56*700637cbSDimitry Andric return false;
57*700637cbSDimitry Andric auto Name = Callee->getName();
58*700637cbSDimitry Andric // objc_msgSend stubs must be called, and can't have their address taken.
59*700637cbSDimitry Andric if (Name.starts_with("objc_msgSend$"))
60*700637cbSDimitry Andric return false;
61*700637cbSDimitry Andric // Calls to dtrace probes must generate unique patchpoints.
62*700637cbSDimitry Andric if (Name.starts_with("__dtrace"))
63*700637cbSDimitry Andric return false;
64*700637cbSDimitry Andric }
65*700637cbSDimitry Andric if (isCalleeOperand(CI, OpIdx)) {
66*700637cbSDimitry Andric // The operand is the callee and it has already been signed. Ignore this
67*700637cbSDimitry Andric // because we cannot add another ptrauth bundle to the call instruction.
68*700637cbSDimitry Andric if (CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value())
69*700637cbSDimitry Andric return false;
70*700637cbSDimitry Andric } else {
71*700637cbSDimitry Andric // The target of the arc-attached call must be a constant and cannot be
72*700637cbSDimitry Andric // parameterized.
73*700637cbSDimitry Andric if (CI->isOperandBundleOfType(LLVMContext::OB_clang_arc_attachedcall,
74*700637cbSDimitry Andric OpIdx))
75*700637cbSDimitry Andric return false;
76*700637cbSDimitry Andric }
77*700637cbSDimitry Andric return true;
78*700637cbSDimitry Andric }
79*700637cbSDimitry Andric
80*700637cbSDimitry Andric /// Returns true if function \p F is eligible for merging.
isEligibleFunction(Function * F)81*700637cbSDimitry Andric bool isEligibleFunction(Function *F) {
82*700637cbSDimitry Andric if (F->isDeclaration())
83*700637cbSDimitry Andric return false;
84*700637cbSDimitry Andric
85*700637cbSDimitry Andric if (F->hasFnAttribute(llvm::Attribute::NoMerge) ||
86*700637cbSDimitry Andric F->hasFnAttribute(llvm::Attribute::AlwaysInline))
87*700637cbSDimitry Andric return false;
88*700637cbSDimitry Andric
89*700637cbSDimitry Andric if (F->hasAvailableExternallyLinkage())
90*700637cbSDimitry Andric return false;
91*700637cbSDimitry Andric
92*700637cbSDimitry Andric if (F->getFunctionType()->isVarArg())
93*700637cbSDimitry Andric return false;
94*700637cbSDimitry Andric
95*700637cbSDimitry Andric if (F->getCallingConv() == CallingConv::SwiftTail)
96*700637cbSDimitry Andric return false;
97*700637cbSDimitry Andric
98*700637cbSDimitry Andric // If function contains callsites with musttail, if we merge
99*700637cbSDimitry Andric // it, the merged function will have the musttail callsite, but
100*700637cbSDimitry Andric // the number of parameters can change, thus the parameter count
101*700637cbSDimitry Andric // of the callsite will mismatch with the function itself.
102*700637cbSDimitry Andric for (const BasicBlock &BB : *F) {
103*700637cbSDimitry Andric for (const Instruction &I : BB) {
104*700637cbSDimitry Andric const auto *CB = dyn_cast<CallBase>(&I);
105*700637cbSDimitry Andric if (CB && CB->isMustTailCall())
106*700637cbSDimitry Andric return false;
107*700637cbSDimitry Andric }
108*700637cbSDimitry Andric }
109*700637cbSDimitry Andric
110*700637cbSDimitry Andric return true;
111*700637cbSDimitry Andric }
112*700637cbSDimitry Andric
isEligibleInstructionForConstantSharing(const Instruction * I)113*700637cbSDimitry Andric static bool isEligibleInstructionForConstantSharing(const Instruction *I) {
114*700637cbSDimitry Andric switch (I->getOpcode()) {
115*700637cbSDimitry Andric case Instruction::Load:
116*700637cbSDimitry Andric case Instruction::Store:
117*700637cbSDimitry Andric case Instruction::Call:
118*700637cbSDimitry Andric case Instruction::Invoke:
119*700637cbSDimitry Andric return true;
120*700637cbSDimitry Andric default:
121*700637cbSDimitry Andric return false;
122*700637cbSDimitry Andric }
123*700637cbSDimitry Andric }
124*700637cbSDimitry Andric
125*700637cbSDimitry Andric // This function takes an instruction, \p I, and an operand index, \p OpIdx.
126*700637cbSDimitry Andric // It returns true if the operand should be ignored in the hash computation.
127*700637cbSDimitry Andric // If \p OpIdx is out of range based on the other instruction context, it cannot
128*700637cbSDimitry Andric // be ignored.
ignoreOp(const Instruction * I,unsigned OpIdx)129*700637cbSDimitry Andric static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
130*700637cbSDimitry Andric if (OpIdx >= I->getNumOperands())
131*700637cbSDimitry Andric return false;
132*700637cbSDimitry Andric
133*700637cbSDimitry Andric if (!isEligibleInstructionForConstantSharing(I))
134*700637cbSDimitry Andric return false;
135*700637cbSDimitry Andric
136*700637cbSDimitry Andric if (!isa<Constant>(I->getOperand(OpIdx)))
137*700637cbSDimitry Andric return false;
138*700637cbSDimitry Andric
139*700637cbSDimitry Andric if (const auto *CI = dyn_cast<CallBase>(I))
140*700637cbSDimitry Andric return canParameterizeCallOperand(CI, OpIdx);
141*700637cbSDimitry Andric
142*700637cbSDimitry Andric return true;
143*700637cbSDimitry Andric }
144*700637cbSDimitry Andric
analyze(Module & M)145*700637cbSDimitry Andric void GlobalMergeFunc::analyze(Module &M) {
146*700637cbSDimitry Andric ++NumAnalyzedModues;
147*700637cbSDimitry Andric for (Function &Func : M) {
148*700637cbSDimitry Andric ++NumAnalyzedFunctions;
149*700637cbSDimitry Andric if (isEligibleFunction(&Func)) {
150*700637cbSDimitry Andric ++NumEligibleFunctions;
151*700637cbSDimitry Andric
152*700637cbSDimitry Andric auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
153*700637cbSDimitry Andric
154*700637cbSDimitry Andric // Convert the operand map to a vector for a serialization-friendly
155*700637cbSDimitry Andric // format.
156*700637cbSDimitry Andric IndexOperandHashVecType IndexOperandHashes;
157*700637cbSDimitry Andric for (auto &Pair : *FI.IndexOperandHashMap)
158*700637cbSDimitry Andric IndexOperandHashes.emplace_back(Pair);
159*700637cbSDimitry Andric
160*700637cbSDimitry Andric StableFunction SF(FI.FunctionHash, get_stable_name(Func.getName()).str(),
161*700637cbSDimitry Andric M.getModuleIdentifier(), FI.IndexInstruction->size(),
162*700637cbSDimitry Andric std::move(IndexOperandHashes));
163*700637cbSDimitry Andric
164*700637cbSDimitry Andric LocalFunctionMap->insert(SF);
165*700637cbSDimitry Andric }
166*700637cbSDimitry Andric }
167*700637cbSDimitry Andric }
168*700637cbSDimitry Andric
169*700637cbSDimitry Andric /// Tuple to hold function info to process merging.
170*700637cbSDimitry Andric struct FuncMergeInfo {
171*700637cbSDimitry Andric StableFunctionMap::StableFunctionEntry *SF;
172*700637cbSDimitry Andric Function *F;
173*700637cbSDimitry Andric IndexInstrMap *IndexInstruction;
FuncMergeInfoFuncMergeInfo174*700637cbSDimitry Andric FuncMergeInfo(StableFunctionMap::StableFunctionEntry *SF, Function *F,
175*700637cbSDimitry Andric IndexInstrMap *IndexInstruction)
176*700637cbSDimitry Andric : SF(SF), F(F), IndexInstruction(std::move(IndexInstruction)) {}
177*700637cbSDimitry Andric };
178*700637cbSDimitry Andric
179*700637cbSDimitry Andric // Given the func info, and the parameterized locations, create and return
180*700637cbSDimitry Andric // a new merged function by replacing the original constants with the new
181*700637cbSDimitry Andric // parameters.
createMergedFunction(FuncMergeInfo & FI,ArrayRef<Type * > ConstParamTypes,const ParamLocsVecTy & ParamLocsVec)182*700637cbSDimitry Andric static Function *createMergedFunction(FuncMergeInfo &FI,
183*700637cbSDimitry Andric ArrayRef<Type *> ConstParamTypes,
184*700637cbSDimitry Andric const ParamLocsVecTy &ParamLocsVec) {
185*700637cbSDimitry Andric // Synthesize a new merged function name by appending ".Tgm" to the root
186*700637cbSDimitry Andric // function's name.
187*700637cbSDimitry Andric auto *MergedFunc = FI.F;
188*700637cbSDimitry Andric std::string NewFunctionName =
189*700637cbSDimitry Andric MergedFunc->getName().str() + GlobalMergeFunc::MergingInstanceSuffix;
190*700637cbSDimitry Andric auto *M = MergedFunc->getParent();
191*700637cbSDimitry Andric assert(!M->getFunction(NewFunctionName));
192*700637cbSDimitry Andric
193*700637cbSDimitry Andric FunctionType *OrigTy = MergedFunc->getFunctionType();
194*700637cbSDimitry Andric // Get the original params' types.
195*700637cbSDimitry Andric SmallVector<Type *> ParamTypes(OrigTy->param_begin(), OrigTy->param_end());
196*700637cbSDimitry Andric // Append const parameter types that are passed in.
197*700637cbSDimitry Andric ParamTypes.append(ConstParamTypes.begin(), ConstParamTypes.end());
198*700637cbSDimitry Andric FunctionType *FuncType = FunctionType::get(OrigTy->getReturnType(),
199*700637cbSDimitry Andric ParamTypes, /*isVarArg=*/false);
200*700637cbSDimitry Andric
201*700637cbSDimitry Andric // Declare a new function
202*700637cbSDimitry Andric Function *NewFunction =
203*700637cbSDimitry Andric Function::Create(FuncType, MergedFunc->getLinkage(), NewFunctionName);
204*700637cbSDimitry Andric if (auto *SP = MergedFunc->getSubprogram())
205*700637cbSDimitry Andric NewFunction->setSubprogram(SP);
206*700637cbSDimitry Andric NewFunction->copyAttributesFrom(MergedFunc);
207*700637cbSDimitry Andric NewFunction->setDLLStorageClass(GlobalValue::DefaultStorageClass);
208*700637cbSDimitry Andric
209*700637cbSDimitry Andric NewFunction->setLinkage(GlobalValue::InternalLinkage);
210*700637cbSDimitry Andric NewFunction->addFnAttr(Attribute::NoInline);
211*700637cbSDimitry Andric
212*700637cbSDimitry Andric // Add the new function before the root function.
213*700637cbSDimitry Andric M->getFunctionList().insert(MergedFunc->getIterator(), NewFunction);
214*700637cbSDimitry Andric
215*700637cbSDimitry Andric // Move the body of MergedFunc into the NewFunction.
216*700637cbSDimitry Andric NewFunction->splice(NewFunction->begin(), MergedFunc);
217*700637cbSDimitry Andric
218*700637cbSDimitry Andric // Update the original args by the new args.
219*700637cbSDimitry Andric auto NewArgIter = NewFunction->arg_begin();
220*700637cbSDimitry Andric for (Argument &OrigArg : MergedFunc->args()) {
221*700637cbSDimitry Andric Argument &NewArg = *NewArgIter++;
222*700637cbSDimitry Andric OrigArg.replaceAllUsesWith(&NewArg);
223*700637cbSDimitry Andric }
224*700637cbSDimitry Andric
225*700637cbSDimitry Andric // Replace the original Constants by the new args.
226*700637cbSDimitry Andric unsigned NumOrigArgs = MergedFunc->arg_size();
227*700637cbSDimitry Andric for (unsigned ParamIdx = 0; ParamIdx < ParamLocsVec.size(); ++ParamIdx) {
228*700637cbSDimitry Andric Argument *NewArg = NewFunction->getArg(NumOrigArgs + ParamIdx);
229*700637cbSDimitry Andric for (auto [InstIndex, OpndIndex] : ParamLocsVec[ParamIdx]) {
230*700637cbSDimitry Andric auto *Inst = FI.IndexInstruction->lookup(InstIndex);
231*700637cbSDimitry Andric auto *OrigC = Inst->getOperand(OpndIndex);
232*700637cbSDimitry Andric if (OrigC->getType() != NewArg->getType()) {
233*700637cbSDimitry Andric IRBuilder<> Builder(Inst->getParent(), Inst->getIterator());
234*700637cbSDimitry Andric Inst->setOperand(OpndIndex,
235*700637cbSDimitry Andric Builder.CreateAggregateCast(NewArg, OrigC->getType()));
236*700637cbSDimitry Andric } else {
237*700637cbSDimitry Andric Inst->setOperand(OpndIndex, NewArg);
238*700637cbSDimitry Andric }
239*700637cbSDimitry Andric }
240*700637cbSDimitry Andric }
241*700637cbSDimitry Andric
242*700637cbSDimitry Andric return NewFunction;
243*700637cbSDimitry Andric }
244*700637cbSDimitry Andric
245*700637cbSDimitry Andric // Given the original function (Thunk) and the merged function (ToFunc), create
246*700637cbSDimitry Andric // a thunk to the merged function.
createThunk(FuncMergeInfo & FI,ArrayRef<Constant * > Params,Function * ToFunc)247*700637cbSDimitry Andric static void createThunk(FuncMergeInfo &FI, ArrayRef<Constant *> Params,
248*700637cbSDimitry Andric Function *ToFunc) {
249*700637cbSDimitry Andric auto *Thunk = FI.F;
250*700637cbSDimitry Andric
251*700637cbSDimitry Andric assert(Thunk->arg_size() + Params.size() ==
252*700637cbSDimitry Andric ToFunc->getFunctionType()->getNumParams());
253*700637cbSDimitry Andric Thunk->dropAllReferences();
254*700637cbSDimitry Andric
255*700637cbSDimitry Andric BasicBlock *BB = BasicBlock::Create(Thunk->getContext(), "", Thunk);
256*700637cbSDimitry Andric IRBuilder<> Builder(BB);
257*700637cbSDimitry Andric
258*700637cbSDimitry Andric SmallVector<Value *> Args;
259*700637cbSDimitry Andric unsigned ParamIdx = 0;
260*700637cbSDimitry Andric FunctionType *ToFuncTy = ToFunc->getFunctionType();
261*700637cbSDimitry Andric
262*700637cbSDimitry Andric // Add arguments which are passed through Thunk.
263*700637cbSDimitry Andric for (Argument &AI : Thunk->args()) {
264*700637cbSDimitry Andric Args.push_back(
265*700637cbSDimitry Andric Builder.CreateAggregateCast(&AI, ToFuncTy->getParamType(ParamIdx)));
266*700637cbSDimitry Andric ++ParamIdx;
267*700637cbSDimitry Andric }
268*700637cbSDimitry Andric
269*700637cbSDimitry Andric // Add new arguments defined by Params.
270*700637cbSDimitry Andric for (auto *Param : Params) {
271*700637cbSDimitry Andric assert(ParamIdx < ToFuncTy->getNumParams());
272*700637cbSDimitry Andric Args.push_back(
273*700637cbSDimitry Andric Builder.CreateAggregateCast(Param, ToFuncTy->getParamType(ParamIdx)));
274*700637cbSDimitry Andric ++ParamIdx;
275*700637cbSDimitry Andric }
276*700637cbSDimitry Andric
277*700637cbSDimitry Andric CallInst *CI = Builder.CreateCall(ToFunc, Args);
278*700637cbSDimitry Andric bool isSwiftTailCall = ToFunc->getCallingConv() == CallingConv::SwiftTail &&
279*700637cbSDimitry Andric Thunk->getCallingConv() == CallingConv::SwiftTail;
280*700637cbSDimitry Andric CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail
281*700637cbSDimitry Andric : llvm::CallInst::TCK_Tail);
282*700637cbSDimitry Andric CI->setCallingConv(ToFunc->getCallingConv());
283*700637cbSDimitry Andric CI->setAttributes(ToFunc->getAttributes());
284*700637cbSDimitry Andric if (Thunk->getReturnType()->isVoidTy())
285*700637cbSDimitry Andric Builder.CreateRetVoid();
286*700637cbSDimitry Andric else
287*700637cbSDimitry Andric Builder.CreateRet(Builder.CreateAggregateCast(CI, Thunk->getReturnType()));
288*700637cbSDimitry Andric }
289*700637cbSDimitry Andric
290*700637cbSDimitry Andric // Check if the old merged/optimized IndexOperandHashMap is compatible with
291*700637cbSDimitry Andric // the current IndexOperandHashMap. An operand hash may not be stable across
292*700637cbSDimitry Andric // different builds due to varying modules combined. To address this, we relax
293*700637cbSDimitry Andric // the hash check condition by comparing Const hash patterns instead of absolute
294*700637cbSDimitry Andric // hash values. For example, let's assume we have three Consts located at idx1,
295*700637cbSDimitry Andric // idx3, and idx6, where their corresponding hashes are hash1, hash2, and hash1
296*700637cbSDimitry Andric // in the old merged map below:
297*700637cbSDimitry Andric // Old (Merged): [(idx1, hash1), (idx3, hash2), (idx6, hash1)]
298*700637cbSDimitry Andric // Current: [(idx1, hash1'), (idx3, hash2'), (idx6, hash1')]
299*700637cbSDimitry Andric // If the current function also has three Consts in the same locations,
300*700637cbSDimitry Andric // with hash sequences hash1', hash2', and hash1' where the first and third
301*700637cbSDimitry Andric // are the same as the old hash sequences, we consider them matched.
checkConstHashCompatible(const DenseMap<IndexPair,stable_hash> & OldInstOpndIndexToConstHash,const DenseMap<IndexPair,stable_hash> & CurrInstOpndIndexToConstHash)302*700637cbSDimitry Andric static bool checkConstHashCompatible(
303*700637cbSDimitry Andric const DenseMap<IndexPair, stable_hash> &OldInstOpndIndexToConstHash,
304*700637cbSDimitry Andric const DenseMap<IndexPair, stable_hash> &CurrInstOpndIndexToConstHash) {
305*700637cbSDimitry Andric
306*700637cbSDimitry Andric DenseMap<stable_hash, stable_hash> OldHashToCurrHash;
307*700637cbSDimitry Andric for (const auto &[Index, OldHash] : OldInstOpndIndexToConstHash) {
308*700637cbSDimitry Andric auto It = CurrInstOpndIndexToConstHash.find(Index);
309*700637cbSDimitry Andric if (It == CurrInstOpndIndexToConstHash.end())
310*700637cbSDimitry Andric return false;
311*700637cbSDimitry Andric
312*700637cbSDimitry Andric auto CurrHash = It->second;
313*700637cbSDimitry Andric auto J = OldHashToCurrHash.find(OldHash);
314*700637cbSDimitry Andric if (J == OldHashToCurrHash.end())
315*700637cbSDimitry Andric OldHashToCurrHash.insert({OldHash, CurrHash});
316*700637cbSDimitry Andric else if (J->second != CurrHash)
317*700637cbSDimitry Andric return false;
318*700637cbSDimitry Andric }
319*700637cbSDimitry Andric
320*700637cbSDimitry Andric return true;
321*700637cbSDimitry Andric }
322*700637cbSDimitry Andric
323*700637cbSDimitry Andric // Validate the locations pointed by a param has the same hash and Constant.
324*700637cbSDimitry Andric static bool
checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry & SF,const IndexInstrMap & IndexInstruction,const ParamLocsVecTy & ParamLocsVec)325*700637cbSDimitry Andric checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry &SF,
326*700637cbSDimitry Andric const IndexInstrMap &IndexInstruction,
327*700637cbSDimitry Andric const ParamLocsVecTy &ParamLocsVec) {
328*700637cbSDimitry Andric for (auto &ParamLocs : ParamLocsVec) {
329*700637cbSDimitry Andric std::optional<stable_hash> OldHash;
330*700637cbSDimitry Andric std::optional<Constant *> OldConst;
331*700637cbSDimitry Andric for (auto &Loc : ParamLocs) {
332*700637cbSDimitry Andric assert(SF.IndexOperandHashMap->count(Loc));
333*700637cbSDimitry Andric auto CurrHash = SF.IndexOperandHashMap->at(Loc);
334*700637cbSDimitry Andric auto [InstIndex, OpndIndex] = Loc;
335*700637cbSDimitry Andric assert(InstIndex < IndexInstruction.size());
336*700637cbSDimitry Andric const auto *Inst = IndexInstruction.lookup(InstIndex);
337*700637cbSDimitry Andric auto *CurrConst = cast<Constant>(Inst->getOperand(OpndIndex));
338*700637cbSDimitry Andric if (!OldHash) {
339*700637cbSDimitry Andric OldHash = CurrHash;
340*700637cbSDimitry Andric OldConst = CurrConst;
341*700637cbSDimitry Andric } else if (CurrConst != *OldConst || CurrHash != *OldHash) {
342*700637cbSDimitry Andric return false;
343*700637cbSDimitry Andric }
344*700637cbSDimitry Andric }
345*700637cbSDimitry Andric }
346*700637cbSDimitry Andric return true;
347*700637cbSDimitry Andric }
348*700637cbSDimitry Andric
computeParamInfo(const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> & SFS)349*700637cbSDimitry Andric static ParamLocsVecTy computeParamInfo(
350*700637cbSDimitry Andric const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
351*700637cbSDimitry Andric &SFS) {
352*700637cbSDimitry Andric std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs;
353*700637cbSDimitry Andric auto &RSF = *SFS[0];
354*700637cbSDimitry Andric unsigned StableFunctionCount = SFS.size();
355*700637cbSDimitry Andric
356*700637cbSDimitry Andric for (auto &[IndexPair, Hash] : *RSF.IndexOperandHashMap) {
357*700637cbSDimitry Andric // Const hash sequence across stable functions.
358*700637cbSDimitry Andric // We will allocate a parameter per unique hash squence.
359*700637cbSDimitry Andric // can't use SmallVector as key
360*700637cbSDimitry Andric std::vector<stable_hash> ConstHashSeq;
361*700637cbSDimitry Andric ConstHashSeq.push_back(Hash);
362*700637cbSDimitry Andric bool Identical = true;
363*700637cbSDimitry Andric for (unsigned J = 1; J < StableFunctionCount; ++J) {
364*700637cbSDimitry Andric auto &SF = SFS[J];
365*700637cbSDimitry Andric auto SHash = SF->IndexOperandHashMap->at(IndexPair);
366*700637cbSDimitry Andric if (Hash != SHash)
367*700637cbSDimitry Andric Identical = false;
368*700637cbSDimitry Andric ConstHashSeq.push_back(SHash);
369*700637cbSDimitry Andric }
370*700637cbSDimitry Andric
371*700637cbSDimitry Andric if (Identical)
372*700637cbSDimitry Andric continue;
373*700637cbSDimitry Andric
374*700637cbSDimitry Andric // For each unique Const hash sequence (parameter), add the locations.
375*700637cbSDimitry Andric HashSeqToLocs[ConstHashSeq].push_back(IndexPair);
376*700637cbSDimitry Andric }
377*700637cbSDimitry Andric
378*700637cbSDimitry Andric ParamLocsVecTy ParamLocsVec;
379*700637cbSDimitry Andric for (auto &[HashSeq, Locs] : HashSeqToLocs)
380*700637cbSDimitry Andric ParamLocsVec.push_back(std::move(Locs));
381*700637cbSDimitry Andric
382*700637cbSDimitry Andric llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) {
383*700637cbSDimitry Andric return L[0] < R[0];
384*700637cbSDimitry Andric });
385*700637cbSDimitry Andric
386*700637cbSDimitry Andric return ParamLocsVec;
387*700637cbSDimitry Andric }
388*700637cbSDimitry Andric
merge(Module & M,const StableFunctionMap * FunctionMap)389*700637cbSDimitry Andric bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
390*700637cbSDimitry Andric bool Changed = false;
391*700637cbSDimitry Andric
392*700637cbSDimitry Andric // Collect stable functions related to the current module.
393*700637cbSDimitry Andric DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>>
394*700637cbSDimitry Andric HashToFuncs;
395*700637cbSDimitry Andric auto &Maps = FunctionMap->getFunctionMap();
396*700637cbSDimitry Andric for (auto &F : M) {
397*700637cbSDimitry Andric if (!isEligibleFunction(&F))
398*700637cbSDimitry Andric continue;
399*700637cbSDimitry Andric auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
400*700637cbSDimitry Andric if (Maps.contains(FI.FunctionHash))
401*700637cbSDimitry Andric HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI));
402*700637cbSDimitry Andric }
403*700637cbSDimitry Andric
404*700637cbSDimitry Andric for (auto &[Hash, Funcs] : HashToFuncs) {
405*700637cbSDimitry Andric std::optional<ParamLocsVecTy> ParamLocsVec;
406*700637cbSDimitry Andric SmallVector<FuncMergeInfo> FuncMergeInfos;
407*700637cbSDimitry Andric auto &SFS = Maps.at(Hash);
408*700637cbSDimitry Andric assert(!SFS.empty());
409*700637cbSDimitry Andric auto &RFS = SFS[0];
410*700637cbSDimitry Andric
411*700637cbSDimitry Andric // Iterate functions with the same hash.
412*700637cbSDimitry Andric for (auto &[F, FI] : Funcs) {
413*700637cbSDimitry Andric // Check if the function is compatible with any stable function
414*700637cbSDimitry Andric // in terms of the number of instructions and ignored operands.
415*700637cbSDimitry Andric if (RFS->InstCount != FI.IndexInstruction->size())
416*700637cbSDimitry Andric continue;
417*700637cbSDimitry Andric
418*700637cbSDimitry Andric auto hasValidSharedConst = [&](StableFunctionMap::StableFunctionEntry *SF,
419*700637cbSDimitry Andric FunctionHashInfo &FHI) {
420*700637cbSDimitry Andric for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
421*700637cbSDimitry Andric auto [InstIndex, OpndIndex] = Index;
422*700637cbSDimitry Andric assert(InstIndex < FHI.IndexInstruction->size());
423*700637cbSDimitry Andric auto *Inst = FHI.IndexInstruction->lookup(InstIndex);
424*700637cbSDimitry Andric if (!ignoreOp(Inst, OpndIndex))
425*700637cbSDimitry Andric return false;
426*700637cbSDimitry Andric }
427*700637cbSDimitry Andric return true;
428*700637cbSDimitry Andric };
429*700637cbSDimitry Andric if (!hasValidSharedConst(RFS.get(), FI))
430*700637cbSDimitry Andric continue;
431*700637cbSDimitry Andric
432*700637cbSDimitry Andric for (auto &SF : SFS) {
433*700637cbSDimitry Andric assert(SF->InstCount == FI.IndexInstruction->size());
434*700637cbSDimitry Andric assert(hasValidSharedConst(SF.get(), FI));
435*700637cbSDimitry Andric // Check if there is any stable function that is compatiable with the
436*700637cbSDimitry Andric // current one.
437*700637cbSDimitry Andric if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
438*700637cbSDimitry Andric *FI.IndexOperandHashMap))
439*700637cbSDimitry Andric continue;
440*700637cbSDimitry Andric if (!ParamLocsVec.has_value()) {
441*700637cbSDimitry Andric ParamLocsVec = computeParamInfo(SFS);
442*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
443*700637cbSDimitry Andric << " with Params " << ParamLocsVec->size() << "\n");
444*700637cbSDimitry Andric }
445*700637cbSDimitry Andric if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
446*700637cbSDimitry Andric *ParamLocsVec))
447*700637cbSDimitry Andric continue;
448*700637cbSDimitry Andric
449*700637cbSDimitry Andric // If a stable function matching the current one is found,
450*700637cbSDimitry Andric // create a candidate for merging and proceed to the next function.
451*700637cbSDimitry Andric FuncMergeInfos.emplace_back(SF.get(), F, FI.IndexInstruction.get());
452*700637cbSDimitry Andric break;
453*700637cbSDimitry Andric }
454*700637cbSDimitry Andric }
455*700637cbSDimitry Andric unsigned FuncMergeInfoSize = FuncMergeInfos.size();
456*700637cbSDimitry Andric if (FuncMergeInfoSize == 0)
457*700637cbSDimitry Andric continue;
458*700637cbSDimitry Andric
459*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging function count "
460*700637cbSDimitry Andric << FuncMergeInfoSize << " for hash: " << Hash << "\n");
461*700637cbSDimitry Andric
462*700637cbSDimitry Andric for (auto &FMI : FuncMergeInfos) {
463*700637cbSDimitry Andric Changed = true;
464*700637cbSDimitry Andric
465*700637cbSDimitry Andric // We've already validated all locations of constant operands pointed by
466*700637cbSDimitry Andric // the parameters. Populate parameters pointing to the original constants.
467*700637cbSDimitry Andric SmallVector<Constant *> Params;
468*700637cbSDimitry Andric SmallVector<Type *> ParamTypes;
469*700637cbSDimitry Andric for (auto &ParamLocs : *ParamLocsVec) {
470*700637cbSDimitry Andric assert(!ParamLocs.empty());
471*700637cbSDimitry Andric auto &[InstIndex, OpndIndex] = ParamLocs[0];
472*700637cbSDimitry Andric auto *Inst = FMI.IndexInstruction->lookup(InstIndex);
473*700637cbSDimitry Andric auto *Opnd = cast<Constant>(Inst->getOperand(OpndIndex));
474*700637cbSDimitry Andric Params.push_back(Opnd);
475*700637cbSDimitry Andric ParamTypes.push_back(Opnd->getType());
476*700637cbSDimitry Andric }
477*700637cbSDimitry Andric
478*700637cbSDimitry Andric // Create a merged function derived from the current function.
479*700637cbSDimitry Andric Function *MergedFunc =
480*700637cbSDimitry Andric createMergedFunction(FMI, ParamTypes, *ParamLocsVec);
481*700637cbSDimitry Andric
482*700637cbSDimitry Andric LLVM_DEBUG({
483*700637cbSDimitry Andric dbgs() << "[GlobalMergeFunc] Merged function (hash:" << FMI.SF->Hash
484*700637cbSDimitry Andric << ") " << MergedFunc->getName() << " generated from "
485*700637cbSDimitry Andric << FMI.F->getName() << ":\n";
486*700637cbSDimitry Andric MergedFunc->dump();
487*700637cbSDimitry Andric });
488*700637cbSDimitry Andric
489*700637cbSDimitry Andric // Transform the current function into a thunk that calls the merged
490*700637cbSDimitry Andric // function.
491*700637cbSDimitry Andric createThunk(FMI, Params, MergedFunc);
492*700637cbSDimitry Andric LLVM_DEBUG({
493*700637cbSDimitry Andric dbgs() << "[GlobalMergeFunc] Thunk generated: \n";
494*700637cbSDimitry Andric FMI.F->dump();
495*700637cbSDimitry Andric });
496*700637cbSDimitry Andric ++NumMergedFunctions;
497*700637cbSDimitry Andric }
498*700637cbSDimitry Andric }
499*700637cbSDimitry Andric
500*700637cbSDimitry Andric return Changed;
501*700637cbSDimitry Andric }
502*700637cbSDimitry Andric
initializeMergerMode(const Module & M)503*700637cbSDimitry Andric void GlobalMergeFunc::initializeMergerMode(const Module &M) {
504*700637cbSDimitry Andric // Initialize the local function map regardless of the merger mode.
505*700637cbSDimitry Andric LocalFunctionMap = std::make_unique<StableFunctionMap>();
506*700637cbSDimitry Andric
507*700637cbSDimitry Andric // Disable codegen data for merging. The local merge is still enabled.
508*700637cbSDimitry Andric if (DisableCGDataForMerging)
509*700637cbSDimitry Andric return;
510*700637cbSDimitry Andric
511*700637cbSDimitry Andric // (Full)LTO module does not have functions added to the index.
512*700637cbSDimitry Andric // In this case, we run a local merger without using codegen data.
513*700637cbSDimitry Andric if (Index && !Index->hasExportedFunctions(M))
514*700637cbSDimitry Andric return;
515*700637cbSDimitry Andric
516*700637cbSDimitry Andric if (cgdata::emitCGData())
517*700637cbSDimitry Andric MergerMode = HashFunctionMode::BuildingHashFuncion;
518*700637cbSDimitry Andric else if (cgdata::hasStableFunctionMap())
519*700637cbSDimitry Andric MergerMode = HashFunctionMode::UsingHashFunction;
520*700637cbSDimitry Andric }
521*700637cbSDimitry Andric
emitFunctionMap(Module & M)522*700637cbSDimitry Andric void GlobalMergeFunc::emitFunctionMap(Module &M) {
523*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "Emit function map. Size: " << LocalFunctionMap->size()
524*700637cbSDimitry Andric << "\n");
525*700637cbSDimitry Andric // No need to emit the function map if it is empty.
526*700637cbSDimitry Andric if (LocalFunctionMap->empty())
527*700637cbSDimitry Andric return;
528*700637cbSDimitry Andric SmallVector<char> Buf;
529*700637cbSDimitry Andric raw_svector_ostream OS(Buf);
530*700637cbSDimitry Andric
531*700637cbSDimitry Andric std::vector<CGDataPatchItem> PatchItems;
532*700637cbSDimitry Andric StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get(), PatchItems);
533*700637cbSDimitry Andric CGDataOStream COS(OS);
534*700637cbSDimitry Andric COS.patch(PatchItems);
535*700637cbSDimitry Andric
536*700637cbSDimitry Andric std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
537*700637cbSDimitry Andric OS.str(), "in-memory stable function map", false);
538*700637cbSDimitry Andric
539*700637cbSDimitry Andric Triple TT(M.getTargetTriple());
540*700637cbSDimitry Andric embedBufferInModule(M, *Buffer,
541*700637cbSDimitry Andric getCodeGenDataSectionName(CG_merge, TT.getObjectFormat()),
542*700637cbSDimitry Andric Align(4));
543*700637cbSDimitry Andric }
544*700637cbSDimitry Andric
run(Module & M)545*700637cbSDimitry Andric bool GlobalMergeFunc::run(Module &M) {
546*700637cbSDimitry Andric initializeMergerMode(M);
547*700637cbSDimitry Andric
548*700637cbSDimitry Andric const StableFunctionMap *FuncMap;
549*700637cbSDimitry Andric if (MergerMode == HashFunctionMode::UsingHashFunction) {
550*700637cbSDimitry Andric // Use the prior CG data to optimistically create global merge candidates.
551*700637cbSDimitry Andric FuncMap = cgdata::getStableFunctionMap();
552*700637cbSDimitry Andric } else {
553*700637cbSDimitry Andric analyze(M);
554*700637cbSDimitry Andric // Emit the local function map to the custom section, __llvm_merge before
555*700637cbSDimitry Andric // finalizing it.
556*700637cbSDimitry Andric if (MergerMode == HashFunctionMode::BuildingHashFuncion)
557*700637cbSDimitry Andric emitFunctionMap(M);
558*700637cbSDimitry Andric LocalFunctionMap->finalize();
559*700637cbSDimitry Andric FuncMap = LocalFunctionMap.get();
560*700637cbSDimitry Andric }
561*700637cbSDimitry Andric
562*700637cbSDimitry Andric return merge(M, FuncMap);
563*700637cbSDimitry Andric }
564*700637cbSDimitry Andric
565*700637cbSDimitry Andric namespace {
566*700637cbSDimitry Andric
567*700637cbSDimitry Andric class GlobalMergeFuncPassWrapper : public ModulePass {
568*700637cbSDimitry Andric
569*700637cbSDimitry Andric public:
570*700637cbSDimitry Andric static char ID;
571*700637cbSDimitry Andric
572*700637cbSDimitry Andric GlobalMergeFuncPassWrapper();
573*700637cbSDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const574*700637cbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
575*700637cbSDimitry Andric AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
576*700637cbSDimitry Andric AU.setPreservesAll();
577*700637cbSDimitry Andric ModulePass::getAnalysisUsage(AU);
578*700637cbSDimitry Andric }
579*700637cbSDimitry Andric
getPassName() const580*700637cbSDimitry Andric StringRef getPassName() const override { return "Global Merge Functions"; }
581*700637cbSDimitry Andric
582*700637cbSDimitry Andric bool runOnModule(Module &M) override;
583*700637cbSDimitry Andric };
584*700637cbSDimitry Andric
585*700637cbSDimitry Andric } // namespace
586*700637cbSDimitry Andric
587*700637cbSDimitry Andric char GlobalMergeFuncPassWrapper::ID = 0;
588*700637cbSDimitry Andric INITIALIZE_PASS_BEGIN(GlobalMergeFuncPassWrapper, "global-merge-func",
589*700637cbSDimitry Andric "Global merge function pass", false, false)
590*700637cbSDimitry Andric INITIALIZE_PASS_END(GlobalMergeFuncPassWrapper, "global-merge-func",
591*700637cbSDimitry Andric "Global merge function pass", false, false)
592*700637cbSDimitry Andric
593*700637cbSDimitry Andric namespace llvm {
createGlobalMergeFuncPass()594*700637cbSDimitry Andric ModulePass *createGlobalMergeFuncPass() {
595*700637cbSDimitry Andric return new GlobalMergeFuncPassWrapper();
596*700637cbSDimitry Andric }
597*700637cbSDimitry Andric } // namespace llvm
598*700637cbSDimitry Andric
GlobalMergeFuncPassWrapper()599*700637cbSDimitry Andric GlobalMergeFuncPassWrapper::GlobalMergeFuncPassWrapper() : ModulePass(ID) {
600*700637cbSDimitry Andric initializeGlobalMergeFuncPassWrapperPass(
601*700637cbSDimitry Andric *llvm::PassRegistry::getPassRegistry());
602*700637cbSDimitry Andric }
603*700637cbSDimitry Andric
runOnModule(Module & M)604*700637cbSDimitry Andric bool GlobalMergeFuncPassWrapper::runOnModule(Module &M) {
605*700637cbSDimitry Andric const ModuleSummaryIndex *Index = nullptr;
606*700637cbSDimitry Andric if (auto *IndexWrapperPass =
607*700637cbSDimitry Andric getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>())
608*700637cbSDimitry Andric Index = IndexWrapperPass->getIndex();
609*700637cbSDimitry Andric
610*700637cbSDimitry Andric return GlobalMergeFunc(Index).run(M);
611*700637cbSDimitry Andric }
612*700637cbSDimitry Andric
run(Module & M,AnalysisManager<Module> & AM)613*700637cbSDimitry Andric PreservedAnalyses GlobalMergeFuncPass::run(Module &M,
614*700637cbSDimitry Andric AnalysisManager<Module> &AM) {
615*700637cbSDimitry Andric bool Changed = GlobalMergeFunc(ImportSummary).run(M);
616*700637cbSDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
617*700637cbSDimitry Andric }
618