xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp (revision d14c38ceb8aa10bd94913d0456ec0f726693379b)
1  //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This family of functions perform manipulations on Modules.
10  //
11  //===----------------------------------------------------------------------===//
12  
13  #include "llvm/Transforms/Utils/ModuleUtils.h"
14  #include "llvm/Analysis/VectorUtils.h"
15  #include "llvm/ADT/SmallString.h"
16  #include "llvm/IR/DerivedTypes.h"
17  #include "llvm/IR/Function.h"
18  #include "llvm/IR/IRBuilder.h"
19  #include "llvm/IR/MDBuilder.h"
20  #include "llvm/IR/Module.h"
21  #include "llvm/Support/MD5.h"
22  #include "llvm/Support/raw_ostream.h"
23  #include "llvm/Support/xxhash.h"
24  
25  using namespace llvm;
26  
27  #define DEBUG_TYPE "moduleutils"
28  
29  static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
30                                  int Priority, Constant *Data) {
31    IRBuilder<> IRB(M.getContext());
32    FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
33  
34    // Get the current set of static global constructors and add the new ctor
35    // to the list.
36    SmallVector<Constant *, 16> CurrentCtors;
37    StructType *EltTy;
38    if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
39      EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType());
40      if (Constant *Init = GVCtor->getInitializer()) {
41        unsigned n = Init->getNumOperands();
42        CurrentCtors.reserve(n + 1);
43        for (unsigned i = 0; i != n; ++i)
44          CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
45      }
46      GVCtor->eraseFromParent();
47    } else {
48      EltTy = StructType::get(IRB.getInt32Ty(),
49                              PointerType::get(FnTy, F->getAddressSpace()),
50                              IRB.getPtrTy());
51    }
52  
53    // Build a 3 field global_ctor entry.  We don't take a comdat key.
54    Constant *CSVals[3];
55    CSVals[0] = IRB.getInt32(Priority);
56    CSVals[1] = F;
57    CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy())
58                     : Constant::getNullValue(IRB.getPtrTy());
59    Constant *RuntimeCtorInit =
60        ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
61  
62    CurrentCtors.push_back(RuntimeCtorInit);
63  
64    // Create a new initializer.
65    ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
66    Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
67  
68    // Create the new global variable and replace all uses of
69    // the old global variable with the new one.
70    (void)new GlobalVariable(M, NewInit->getType(), false,
71                             GlobalValue::AppendingLinkage, NewInit, ArrayName);
72  }
73  
74  void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
75    appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
76  }
77  
78  void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
79    appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
80  }
81  
82  static void collectUsedGlobals(GlobalVariable *GV,
83                                 SmallSetVector<Constant *, 16> &Init) {
84    if (!GV || !GV->hasInitializer())
85      return;
86  
87    auto *CA = cast<ConstantArray>(GV->getInitializer());
88    for (Use &Op : CA->operands())
89      Init.insert(cast<Constant>(Op));
90  }
91  
92  static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
93    GlobalVariable *GV = M.getGlobalVariable(Name);
94  
95    SmallSetVector<Constant *, 16> Init;
96    collectUsedGlobals(GV, Init);
97    if (GV)
98      GV->eraseFromParent();
99  
100    Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext());
101    for (auto *V : Values)
102      Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
103  
104    if (Init.empty())
105      return;
106  
107    ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
108    GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
109                                  ConstantArray::get(ATy, Init.getArrayRef()),
110                                  Name);
111    GV->setSection("llvm.metadata");
112  }
113  
114  void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
115    appendToUsedList(M, "llvm.used", Values);
116  }
117  
118  void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
119    appendToUsedList(M, "llvm.compiler.used", Values);
120  }
121  
122  static void removeFromUsedList(Module &M, StringRef Name,
123                                 function_ref<bool(Constant *)> ShouldRemove) {
124    GlobalVariable *GV = M.getNamedGlobal(Name);
125    if (!GV)
126      return;
127  
128    SmallSetVector<Constant *, 16> Init;
129    collectUsedGlobals(GV, Init);
130  
131    Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
132  
133    SmallVector<Constant *, 16> NewInit;
134    for (Constant *MaybeRemoved : Init) {
135      if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
136        NewInit.push_back(MaybeRemoved);
137    }
138  
139    if (!NewInit.empty()) {
140      ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
141      GlobalVariable *NewGV =
142          new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
143                             ConstantArray::get(ATy, NewInit), "", GV,
144                             GV->getThreadLocalMode(), GV->getAddressSpace());
145      NewGV->setSection(GV->getSection());
146      NewGV->takeName(GV);
147    }
148  
149    GV->eraseFromParent();
150  }
151  
152  void llvm::removeFromUsedLists(Module &M,
153                                 function_ref<bool(Constant *)> ShouldRemove) {
154    removeFromUsedList(M, "llvm.used", ShouldRemove);
155    removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
156  }
157  
158  void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
159    if (!M.getModuleFlag("kcfi"))
160      return;
161    // Matches CodeGenModule::CreateKCFITypeId in Clang.
162    LLVMContext &Ctx = M.getContext();
163    MDBuilder MDB(Ctx);
164    F.setMetadata(
165        LLVMContext::MD_kcfi_type,
166        MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
167                             Type::getInt32Ty(Ctx),
168                             static_cast<uint32_t>(xxHash64(MangledType))))));
169    // If the module was compiled with -fpatchable-function-entry, ensure
170    // we use the same patchable-function-prefix.
171    if (auto *MD = mdconst::extract_or_null<ConstantInt>(
172            M.getModuleFlag("kcfi-offset"))) {
173      if (unsigned Offset = MD->getZExtValue())
174        F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
175    }
176  }
177  
178  FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
179                                                    ArrayRef<Type *> InitArgTypes,
180                                                    bool Weak) {
181    assert(!InitName.empty() && "Expected init function name");
182    auto *VoidTy = Type::getVoidTy(M.getContext());
183    auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
184    auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
185    auto *Fn = cast<Function>(FnCallee.getCallee());
186    if (Weak && Fn->isDeclaration())
187      Fn->setLinkage(Function::ExternalWeakLinkage);
188    return FnCallee;
189  }
190  
191  Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
192    Function *Ctor = Function::createWithDefaultAttr(
193        FunctionType::get(Type::getVoidTy(M.getContext()), false),
194        GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
195        CtorName, &M);
196    Ctor->addFnAttr(Attribute::NoUnwind);
197    setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
198    BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
199    ReturnInst::Create(M.getContext(), CtorBB);
200    // Ensure Ctor cannot be discarded, even if in a comdat.
201    appendToUsed(M, {Ctor});
202    return Ctor;
203  }
204  
205  std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
206      Module &M, StringRef CtorName, StringRef InitName,
207      ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
208      StringRef VersionCheckName, bool Weak) {
209    assert(!InitName.empty() && "Expected init function name");
210    assert(InitArgs.size() == InitArgTypes.size() &&
211           "Sanitizer's init function expects different number of arguments");
212    FunctionCallee InitFunction =
213        declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
214    Function *Ctor = createSanitizerCtor(M, CtorName);
215    IRBuilder<> IRB(M.getContext());
216  
217    BasicBlock *RetBB = &Ctor->getEntryBlock();
218    if (Weak) {
219      RetBB->setName("ret");
220      auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
221      auto *CallInitBB =
222          BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
223      auto *InitFn = cast<Function>(InitFunction.getCallee());
224      auto *InitFnPtr =
225          PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
226      IRB.SetInsertPoint(EntryBB);
227      Value *InitNotNull =
228          IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
229      IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
230      IRB.SetInsertPoint(CallInitBB);
231    } else {
232      IRB.SetInsertPoint(RetBB->getTerminator());
233    }
234  
235    IRB.CreateCall(InitFunction, InitArgs);
236    if (!VersionCheckName.empty()) {
237      FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
238          VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
239          AttributeList());
240      IRB.CreateCall(VersionCheckFunction, {});
241    }
242  
243    if (Weak)
244      IRB.CreateBr(RetBB);
245  
246    return std::make_pair(Ctor, InitFunction);
247  }
248  
249  std::pair<Function *, FunctionCallee>
250  llvm::getOrCreateSanitizerCtorAndInitFunctions(
251      Module &M, StringRef CtorName, StringRef InitName,
252      ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
253      function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
254      StringRef VersionCheckName, bool Weak) {
255    assert(!CtorName.empty() && "Expected ctor function name");
256  
257    if (Function *Ctor = M.getFunction(CtorName))
258      // FIXME: Sink this logic into the module, similar to the handling of
259      // globals. This will make moving to a concurrent model much easier.
260      if (Ctor->arg_empty() ||
261          Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
262        return {Ctor,
263                declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
264  
265    Function *Ctor;
266    FunctionCallee InitFunction;
267    std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
268        M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
269    FunctionsCreatedCallback(Ctor, InitFunction);
270    return std::make_pair(Ctor, InitFunction);
271  }
272  
273  void llvm::filterDeadComdatFunctions(
274      SmallVectorImpl<Function *> &DeadComdatFunctions) {
275    SmallPtrSet<Function *, 32> MaybeDeadFunctions;
276    SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
277    for (Function *F : DeadComdatFunctions) {
278      MaybeDeadFunctions.insert(F);
279      if (Comdat *C = F->getComdat())
280        MaybeDeadComdats.insert(C);
281    }
282  
283    // Find comdats for which all users are dead now.
284    SmallPtrSet<Comdat *, 32> DeadComdats;
285    for (Comdat *C : MaybeDeadComdats) {
286      auto IsUserDead = [&](GlobalObject *GO) {
287        auto *F = dyn_cast<Function>(GO);
288        return F && MaybeDeadFunctions.contains(F);
289      };
290      if (all_of(C->getUsers(), IsUserDead))
291        DeadComdats.insert(C);
292    }
293  
294    // Only keep functions which have no comdat or a dead comdat.
295    erase_if(DeadComdatFunctions, [&](Function *F) {
296      Comdat *C = F->getComdat();
297      return C && !DeadComdats.contains(C);
298    });
299  }
300  
301  std::string llvm::getUniqueModuleId(Module *M) {
302    MD5 Md5;
303    bool ExportsSymbols = false;
304    auto AddGlobal = [&](GlobalValue &GV) {
305      if (GV.isDeclaration() || GV.getName().starts_with("llvm.") ||
306          !GV.hasExternalLinkage() || GV.hasComdat())
307        return;
308      ExportsSymbols = true;
309      Md5.update(GV.getName());
310      Md5.update(ArrayRef<uint8_t>{0});
311    };
312  
313    for (auto &F : *M)
314      AddGlobal(F);
315    for (auto &GV : M->globals())
316      AddGlobal(GV);
317    for (auto &GA : M->aliases())
318      AddGlobal(GA);
319    for (auto &IF : M->ifuncs())
320      AddGlobal(IF);
321  
322    if (!ExportsSymbols)
323      return "";
324  
325    MD5::MD5Result R;
326    Md5.final(R);
327  
328    SmallString<32> Str;
329    MD5::stringifyResult(R, Str);
330    return ("." + Str).str();
331  }
332  
333  void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
334                                 StringRef SectionName, Align Alignment) {
335    // Embed the memory buffer into the module.
336    Constant *ModuleConstant = ConstantDataArray::get(
337        M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
338    GlobalVariable *GV = new GlobalVariable(
339        M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
340        ModuleConstant, "llvm.embedded.object");
341    GV->setSection(SectionName);
342    GV->setAlignment(Alignment);
343  
344    LLVMContext &Ctx = M.getContext();
345    NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
346    Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
347                          MDString::get(Ctx, SectionName)};
348  
349    MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
350    GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
351  
352    appendToCompilerUsed(M, GV);
353  }
354  
355  bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
356      Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
357    SmallVector<GlobalIFunc *, 32> AllIFuncs;
358    ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
359    if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
360      for (GlobalIFunc &GI : M.ifuncs())
361        AllIFuncs.push_back(&GI);
362      IFuncsToLower = AllIFuncs;
363    }
364  
365    bool UnhandledUsers = false;
366    LLVMContext &Ctx = M.getContext();
367    const DataLayout &DL = M.getDataLayout();
368  
369    PointerType *TableEntryTy =
370        PointerType::get(Ctx, DL.getProgramAddressSpace());
371  
372    ArrayType *FuncPtrTableTy =
373        ArrayType::get(TableEntryTy, IFuncsToLower.size());
374  
375    Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
376  
377    // Create a global table of function pointers we'll initialize in a global
378    // constructor.
379    auto *FuncPtrTable = new GlobalVariable(
380        M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
381        PoisonValue::get(FuncPtrTableTy), "", nullptr,
382        GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
383    FuncPtrTable->setAlignment(PtrAlign);
384  
385    // Create a function to initialize the function pointer table.
386    Function *NewCtor = Function::Create(
387        FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
388        DL.getProgramAddressSpace(), "", &M);
389  
390    BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
391    IRBuilder<> InitBuilder(BB);
392  
393    size_t TableIndex = 0;
394    for (GlobalIFunc *GI : IFuncsToLower) {
395      Function *ResolvedFunction = GI->getResolverFunction();
396  
397      // We don't know what to pass to a resolver function taking arguments
398      //
399      // FIXME: Is this even valid? clang and gcc don't complain but this
400      // probably should be invalid IR. We could just pass through undef.
401      if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
402        LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
403                          << ResolvedFunction->getName() << " with parameters\n");
404        UnhandledUsers = true;
405        continue;
406      }
407  
408      // Initialize the function pointer table.
409      CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
410      Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
411      Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
412          FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
413      InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
414  
415      // Update all users to load a pointer from the global table.
416      for (User *User : make_early_inc_range(GI->users())) {
417        Instruction *UserInst = dyn_cast<Instruction>(User);
418        if (!UserInst) {
419          // TODO: Should handle constantexpr casts in user instructions. Probably
420          // can't do much about constant initializers.
421          UnhandledUsers = true;
422          continue;
423        }
424  
425        IRBuilder<> UseBuilder(UserInst);
426        LoadInst *ResolvedTarget =
427            UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
428        Value *ResolvedCast =
429            UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
430        UserInst->replaceUsesOfWith(GI, ResolvedCast);
431      }
432  
433      // If we handled all users, erase the ifunc.
434      if (GI->use_empty())
435        GI->eraseFromParent();
436    }
437  
438    InitBuilder.CreateRetVoid();
439  
440    PointerType *ConstantDataTy = PointerType::get(Ctx, 0);
441  
442    // TODO: Is this the right priority? Probably should be before any other
443    // constructors?
444    const int Priority = 10;
445    appendToGlobalCtors(M, NewCtor, Priority,
446                        ConstantPointerNull::get(ConstantDataTy));
447    return UnhandledUsers;
448  }
449