xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp (revision be092bcde96bdcfde9013d60e442cca023bfbd1b)
1  //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This family of functions perform manipulations on Modules.
10  //
11  //===----------------------------------------------------------------------===//
12  
13  #include "llvm/Transforms/Utils/ModuleUtils.h"
14  #include "llvm/Analysis/VectorUtils.h"
15  #include "llvm/IR/DerivedTypes.h"
16  #include "llvm/IR/Function.h"
17  #include "llvm/IR/IRBuilder.h"
18  #include "llvm/IR/MDBuilder.h"
19  #include "llvm/IR/Module.h"
20  #include "llvm/Support/raw_ostream.h"
21  #include "llvm/Support/xxhash.h"
22  using namespace llvm;
23  
24  #define DEBUG_TYPE "moduleutils"
25  
26  static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
27                                  int Priority, Constant *Data) {
28    IRBuilder<> IRB(M.getContext());
29    FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
30  
31    // Get the current set of static global constructors and add the new ctor
32    // to the list.
33    SmallVector<Constant *, 16> CurrentCtors;
34    StructType *EltTy = StructType::get(
35        IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()),
36        IRB.getInt8PtrTy());
37  
38    if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
39      if (Constant *Init = GVCtor->getInitializer()) {
40        unsigned n = Init->getNumOperands();
41        CurrentCtors.reserve(n + 1);
42        for (unsigned i = 0; i != n; ++i)
43          CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
44      }
45      GVCtor->eraseFromParent();
46    }
47  
48    // Build a 3 field global_ctor entry.  We don't take a comdat key.
49    Constant *CSVals[3];
50    CSVals[0] = IRB.getInt32(Priority);
51    CSVals[1] = F;
52    CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
53                     : Constant::getNullValue(IRB.getInt8PtrTy());
54    Constant *RuntimeCtorInit =
55        ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
56  
57    CurrentCtors.push_back(RuntimeCtorInit);
58  
59    // Create a new initializer.
60    ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
61    Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
62  
63    // Create the new global variable and replace all uses of
64    // the old global variable with the new one.
65    (void)new GlobalVariable(M, NewInit->getType(), false,
66                             GlobalValue::AppendingLinkage, NewInit, ArrayName);
67  }
68  
69  void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
70    appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
71  }
72  
73  void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
74    appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
75  }
76  
77  static void collectUsedGlobals(GlobalVariable *GV,
78                                 SmallSetVector<Constant *, 16> &Init) {
79    if (!GV || !GV->hasInitializer())
80      return;
81  
82    auto *CA = cast<ConstantArray>(GV->getInitializer());
83    for (Use &Op : CA->operands())
84      Init.insert(cast<Constant>(Op));
85  }
86  
87  static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
88    GlobalVariable *GV = M.getGlobalVariable(Name);
89  
90    SmallSetVector<Constant *, 16> Init;
91    collectUsedGlobals(GV, Init);
92    if (GV)
93      GV->eraseFromParent();
94  
95    Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext());
96    for (auto *V : Values)
97      Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
98  
99    if (Init.empty())
100      return;
101  
102    ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
103    GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
104                                  ConstantArray::get(ATy, Init.getArrayRef()),
105                                  Name);
106    GV->setSection("llvm.metadata");
107  }
108  
109  void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
110    appendToUsedList(M, "llvm.used", Values);
111  }
112  
113  void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
114    appendToUsedList(M, "llvm.compiler.used", Values);
115  }
116  
117  static void removeFromUsedList(Module &M, StringRef Name,
118                                 function_ref<bool(Constant *)> ShouldRemove) {
119    GlobalVariable *GV = M.getNamedGlobal(Name);
120    if (!GV)
121      return;
122  
123    SmallSetVector<Constant *, 16> Init;
124    collectUsedGlobals(GV, Init);
125  
126    Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
127  
128    SmallVector<Constant *, 16> NewInit;
129    for (Constant *MaybeRemoved : Init) {
130      if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
131        NewInit.push_back(MaybeRemoved);
132    }
133  
134    if (!NewInit.empty()) {
135      ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
136      GlobalVariable *NewGV =
137          new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
138                             ConstantArray::get(ATy, NewInit), "", GV,
139                             GV->getThreadLocalMode(), GV->getAddressSpace());
140      NewGV->setSection(GV->getSection());
141      NewGV->takeName(GV);
142    }
143  
144    GV->eraseFromParent();
145  }
146  
147  void llvm::removeFromUsedLists(Module &M,
148                                 function_ref<bool(Constant *)> ShouldRemove) {
149    removeFromUsedList(M, "llvm.used", ShouldRemove);
150    removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
151  }
152  
153  void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
154    if (!M.getModuleFlag("kcfi"))
155      return;
156    // Matches CodeGenModule::CreateKCFITypeId in Clang.
157    LLVMContext &Ctx = M.getContext();
158    MDBuilder MDB(Ctx);
159    F.setMetadata(
160        LLVMContext::MD_kcfi_type,
161        MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
162                             Type::getInt32Ty(Ctx),
163                             static_cast<uint32_t>(xxHash64(MangledType))))));
164    // If the module was compiled with -fpatchable-function-entry, ensure
165    // we use the same patchable-function-prefix.
166    if (auto *MD = mdconst::extract_or_null<ConstantInt>(
167            M.getModuleFlag("kcfi-offset"))) {
168      if (unsigned Offset = MD->getZExtValue())
169        F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
170    }
171  }
172  
173  FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
174                                                    ArrayRef<Type *> InitArgTypes,
175                                                    bool Weak) {
176    assert(!InitName.empty() && "Expected init function name");
177    auto *VoidTy = Type::getVoidTy(M.getContext());
178    auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
179    auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
180    auto *Fn = cast<Function>(FnCallee.getCallee());
181    if (Weak && Fn->isDeclaration())
182      Fn->setLinkage(Function::ExternalWeakLinkage);
183    return FnCallee;
184  }
185  
186  Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
187    Function *Ctor = Function::createWithDefaultAttr(
188        FunctionType::get(Type::getVoidTy(M.getContext()), false),
189        GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
190        CtorName, &M);
191    Ctor->addFnAttr(Attribute::NoUnwind);
192    setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
193    BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
194    ReturnInst::Create(M.getContext(), CtorBB);
195    // Ensure Ctor cannot be discarded, even if in a comdat.
196    appendToUsed(M, {Ctor});
197    return Ctor;
198  }
199  
200  std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
201      Module &M, StringRef CtorName, StringRef InitName,
202      ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
203      StringRef VersionCheckName, bool Weak) {
204    assert(!InitName.empty() && "Expected init function name");
205    assert(InitArgs.size() == InitArgTypes.size() &&
206           "Sanitizer's init function expects different number of arguments");
207    FunctionCallee InitFunction =
208        declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
209    Function *Ctor = createSanitizerCtor(M, CtorName);
210    IRBuilder<> IRB(M.getContext());
211  
212    BasicBlock *RetBB = &Ctor->getEntryBlock();
213    if (Weak) {
214      RetBB->setName("ret");
215      auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
216      auto *CallInitBB =
217          BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
218      auto *InitFn = cast<Function>(InitFunction.getCallee());
219      auto *InitFnPtr =
220          PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
221      IRB.SetInsertPoint(EntryBB);
222      Value *InitNotNull =
223          IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
224      IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
225      IRB.SetInsertPoint(CallInitBB);
226    } else {
227      IRB.SetInsertPoint(RetBB->getTerminator());
228    }
229  
230    IRB.CreateCall(InitFunction, InitArgs);
231    if (!VersionCheckName.empty()) {
232      FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
233          VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
234          AttributeList());
235      IRB.CreateCall(VersionCheckFunction, {});
236    }
237  
238    if (Weak)
239      IRB.CreateBr(RetBB);
240  
241    return std::make_pair(Ctor, InitFunction);
242  }
243  
244  std::pair<Function *, FunctionCallee>
245  llvm::getOrCreateSanitizerCtorAndInitFunctions(
246      Module &M, StringRef CtorName, StringRef InitName,
247      ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
248      function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
249      StringRef VersionCheckName, bool Weak) {
250    assert(!CtorName.empty() && "Expected ctor function name");
251  
252    if (Function *Ctor = M.getFunction(CtorName))
253      // FIXME: Sink this logic into the module, similar to the handling of
254      // globals. This will make moving to a concurrent model much easier.
255      if (Ctor->arg_empty() ||
256          Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
257        return {Ctor,
258                declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
259  
260    Function *Ctor;
261    FunctionCallee InitFunction;
262    std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
263        M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
264    FunctionsCreatedCallback(Ctor, InitFunction);
265    return std::make_pair(Ctor, InitFunction);
266  }
267  
268  void llvm::filterDeadComdatFunctions(
269      SmallVectorImpl<Function *> &DeadComdatFunctions) {
270    SmallPtrSet<Function *, 32> MaybeDeadFunctions;
271    SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
272    for (Function *F : DeadComdatFunctions) {
273      MaybeDeadFunctions.insert(F);
274      if (Comdat *C = F->getComdat())
275        MaybeDeadComdats.insert(C);
276    }
277  
278    // Find comdats for which all users are dead now.
279    SmallPtrSet<Comdat *, 32> DeadComdats;
280    for (Comdat *C : MaybeDeadComdats) {
281      auto IsUserDead = [&](GlobalObject *GO) {
282        auto *F = dyn_cast<Function>(GO);
283        return F && MaybeDeadFunctions.contains(F);
284      };
285      if (all_of(C->getUsers(), IsUserDead))
286        DeadComdats.insert(C);
287    }
288  
289    // Only keep functions which have no comdat or a dead comdat.
290    erase_if(DeadComdatFunctions, [&](Function *F) {
291      Comdat *C = F->getComdat();
292      return C && !DeadComdats.contains(C);
293    });
294  }
295  
296  std::string llvm::getUniqueModuleId(Module *M) {
297    MD5 Md5;
298    bool ExportsSymbols = false;
299    auto AddGlobal = [&](GlobalValue &GV) {
300      if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
301          !GV.hasExternalLinkage() || GV.hasComdat())
302        return;
303      ExportsSymbols = true;
304      Md5.update(GV.getName());
305      Md5.update(ArrayRef<uint8_t>{0});
306    };
307  
308    for (auto &F : *M)
309      AddGlobal(F);
310    for (auto &GV : M->globals())
311      AddGlobal(GV);
312    for (auto &GA : M->aliases())
313      AddGlobal(GA);
314    for (auto &IF : M->ifuncs())
315      AddGlobal(IF);
316  
317    if (!ExportsSymbols)
318      return "";
319  
320    MD5::MD5Result R;
321    Md5.final(R);
322  
323    SmallString<32> Str;
324    MD5::stringifyResult(R, Str);
325    return ("." + Str).str();
326  }
327  
328  void VFABI::setVectorVariantNames(CallInst *CI,
329                                    ArrayRef<std::string> VariantMappings) {
330    if (VariantMappings.empty())
331      return;
332  
333    SmallString<256> Buffer;
334    llvm::raw_svector_ostream Out(Buffer);
335    for (const std::string &VariantMapping : VariantMappings)
336      Out << VariantMapping << ",";
337    // Get rid of the trailing ','.
338    assert(!Buffer.str().empty() && "Must have at least one char.");
339    Buffer.pop_back();
340  
341    Module *M = CI->getModule();
342  #ifndef NDEBUG
343    for (const std::string &VariantMapping : VariantMappings) {
344      LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
345      std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
346      assert(VI && "Cannot add an invalid VFABI name.");
347      assert(M->getNamedValue(VI->VectorName) &&
348             "Cannot add variant to attribute: "
349             "vector function declaration is missing.");
350    }
351  #endif
352    CI->addFnAttr(
353        Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
354  }
355  
356  void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
357                                 StringRef SectionName, Align Alignment) {
358    // Embed the memory buffer into the module.
359    Constant *ModuleConstant = ConstantDataArray::get(
360        M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
361    GlobalVariable *GV = new GlobalVariable(
362        M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
363        ModuleConstant, "llvm.embedded.object");
364    GV->setSection(SectionName);
365    GV->setAlignment(Alignment);
366  
367    LLVMContext &Ctx = M.getContext();
368    NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
369    Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
370                          MDString::get(Ctx, SectionName)};
371  
372    MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
373    GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
374  
375    appendToCompilerUsed(M, GV);
376  }
377  
378  bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
379      Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
380    SmallVector<GlobalIFunc *, 32> AllIFuncs;
381    ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
382    if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
383      for (GlobalIFunc &GI : M.ifuncs())
384        AllIFuncs.push_back(&GI);
385      IFuncsToLower = AllIFuncs;
386    }
387  
388    bool UnhandledUsers = false;
389    LLVMContext &Ctx = M.getContext();
390    const DataLayout &DL = M.getDataLayout();
391  
392    PointerType *TableEntryTy =
393        Ctx.supportsTypedPointers()
394            ? PointerType::get(Type::getInt8Ty(Ctx), DL.getProgramAddressSpace())
395            : PointerType::get(Ctx, DL.getProgramAddressSpace());
396  
397    ArrayType *FuncPtrTableTy =
398        ArrayType::get(TableEntryTy, IFuncsToLower.size());
399  
400    Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
401  
402    // Create a global table of function pointers we'll initialize in a global
403    // constructor.
404    auto *FuncPtrTable = new GlobalVariable(
405        M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
406        PoisonValue::get(FuncPtrTableTy), "", nullptr,
407        GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
408    FuncPtrTable->setAlignment(PtrAlign);
409  
410    // Create a function to initialize the function pointer table.
411    Function *NewCtor = Function::Create(
412        FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
413        DL.getProgramAddressSpace(), "", &M);
414  
415    BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
416    IRBuilder<> InitBuilder(BB);
417  
418    size_t TableIndex = 0;
419    for (GlobalIFunc *GI : IFuncsToLower) {
420      Function *ResolvedFunction = GI->getResolverFunction();
421  
422      // We don't know what to pass to a resolver function taking arguments
423      //
424      // FIXME: Is this even valid? clang and gcc don't complain but this
425      // probably should be invalid IR. We could just pass through undef.
426      if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
427        LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
428                          << ResolvedFunction->getName() << " with parameters\n");
429        UnhandledUsers = true;
430        continue;
431      }
432  
433      // Initialize the function pointer table.
434      CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
435      Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
436      Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
437          FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
438      InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
439  
440      // Update all users to load a pointer from the global table.
441      for (User *User : make_early_inc_range(GI->users())) {
442        Instruction *UserInst = dyn_cast<Instruction>(User);
443        if (!UserInst) {
444          // TODO: Should handle constantexpr casts in user instructions. Probably
445          // can't do much about constant initializers.
446          UnhandledUsers = true;
447          continue;
448        }
449  
450        IRBuilder<> UseBuilder(UserInst);
451        LoadInst *ResolvedTarget =
452            UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
453        Value *ResolvedCast =
454            UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
455        UserInst->replaceUsesOfWith(GI, ResolvedCast);
456      }
457  
458      // If we handled all users, erase the ifunc.
459      if (GI->use_empty())
460        GI->eraseFromParent();
461    }
462  
463    InitBuilder.CreateRetVoid();
464  
465    PointerType *ConstantDataTy = Ctx.supportsTypedPointers()
466                                      ? PointerType::get(Type::getInt8Ty(Ctx), 0)
467                                      : PointerType::get(Ctx, 0);
468  
469    // TODO: Is this the right priority? Probably should be before any other
470    // constructors?
471    const int Priority = 10;
472    appendToGlobalCtors(M, NewCtor, Priority,
473                        ConstantPointerNull::get(ConstantDataTy));
474    return UnhandledUsers;
475  }
476