1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions perform manipulations on Modules. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Utils/ModuleUtils.h" 14 #include "llvm/Analysis/VectorUtils.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/IR/DerivedTypes.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/IRBuilder.h" 19 #include "llvm/IR/MDBuilder.h" 20 #include "llvm/IR/Module.h" 21 #include "llvm/Support/MD5.h" 22 #include "llvm/Support/raw_ostream.h" 23 #include "llvm/Support/xxhash.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "moduleutils" 28 29 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, 30 int Priority, Constant *Data) { 31 IRBuilder<> IRB(M.getContext()); 32 FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); 33 34 // Get the current set of static global constructors and add the new ctor 35 // to the list. 36 SmallVector<Constant *, 16> CurrentCtors; 37 StructType *EltTy; 38 if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) { 39 EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType()); 40 if (Constant *Init = GVCtor->getInitializer()) { 41 unsigned n = Init->getNumOperands(); 42 CurrentCtors.reserve(n + 1); 43 for (unsigned i = 0; i != n; ++i) 44 CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); 45 } 46 GVCtor->eraseFromParent(); 47 } else { 48 EltTy = StructType::get(IRB.getInt32Ty(), 49 PointerType::get(FnTy, F->getAddressSpace()), 50 IRB.getPtrTy()); 51 } 52 53 // Build a 3 field global_ctor entry. We don't take a comdat key. 54 Constant *CSVals[3]; 55 CSVals[0] = IRB.getInt32(Priority); 56 CSVals[1] = F; 57 CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy()) 58 : Constant::getNullValue(IRB.getPtrTy()); 59 Constant *RuntimeCtorInit = 60 ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements())); 61 62 CurrentCtors.push_back(RuntimeCtorInit); 63 64 // Create a new initializer. 65 ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); 66 Constant *NewInit = ConstantArray::get(AT, CurrentCtors); 67 68 // Create the new global variable and replace all uses of 69 // the old global variable with the new one. 70 (void)new GlobalVariable(M, NewInit->getType(), false, 71 GlobalValue::AppendingLinkage, NewInit, ArrayName); 72 } 73 74 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { 75 appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); 76 } 77 78 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { 79 appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); 80 } 81 82 static void collectUsedGlobals(GlobalVariable *GV, 83 SmallSetVector<Constant *, 16> &Init) { 84 if (!GV || !GV->hasInitializer()) 85 return; 86 87 auto *CA = cast<ConstantArray>(GV->getInitializer()); 88 for (Use &Op : CA->operands()) 89 Init.insert(cast<Constant>(Op)); 90 } 91 92 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { 93 GlobalVariable *GV = M.getGlobalVariable(Name); 94 95 SmallSetVector<Constant *, 16> Init; 96 collectUsedGlobals(GV, Init); 97 if (GV) 98 GV->eraseFromParent(); 99 100 Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext()); 101 for (auto *V : Values) 102 Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); 103 104 if (Init.empty()) 105 return; 106 107 ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size()); 108 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 109 ConstantArray::get(ATy, Init.getArrayRef()), 110 Name); 111 GV->setSection("llvm.metadata"); 112 } 113 114 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { 115 appendToUsedList(M, "llvm.used", Values); 116 } 117 118 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { 119 appendToUsedList(M, "llvm.compiler.used", Values); 120 } 121 122 static void removeFromUsedList(Module &M, StringRef Name, 123 function_ref<bool(Constant *)> ShouldRemove) { 124 GlobalVariable *GV = M.getNamedGlobal(Name); 125 if (!GV) 126 return; 127 128 SmallSetVector<Constant *, 16> Init; 129 collectUsedGlobals(GV, Init); 130 131 Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType(); 132 133 SmallVector<Constant *, 16> NewInit; 134 for (Constant *MaybeRemoved : Init) { 135 if (!ShouldRemove(MaybeRemoved->stripPointerCasts())) 136 NewInit.push_back(MaybeRemoved); 137 } 138 139 if (!NewInit.empty()) { 140 ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size()); 141 GlobalVariable *NewGV = 142 new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 143 ConstantArray::get(ATy, NewInit), "", GV, 144 GV->getThreadLocalMode(), GV->getAddressSpace()); 145 NewGV->setSection(GV->getSection()); 146 NewGV->takeName(GV); 147 } 148 149 GV->eraseFromParent(); 150 } 151 152 void llvm::removeFromUsedLists(Module &M, 153 function_ref<bool(Constant *)> ShouldRemove) { 154 removeFromUsedList(M, "llvm.used", ShouldRemove); 155 removeFromUsedList(M, "llvm.compiler.used", ShouldRemove); 156 } 157 158 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { 159 if (!M.getModuleFlag("kcfi")) 160 return; 161 // Matches CodeGenModule::CreateKCFITypeId in Clang. 162 LLVMContext &Ctx = M.getContext(); 163 MDBuilder MDB(Ctx); 164 F.setMetadata( 165 LLVMContext::MD_kcfi_type, 166 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( 167 Type::getInt32Ty(Ctx), 168 static_cast<uint32_t>(xxHash64(MangledType)))))); 169 // If the module was compiled with -fpatchable-function-entry, ensure 170 // we use the same patchable-function-prefix. 171 if (auto *MD = mdconst::extract_or_null<ConstantInt>( 172 M.getModuleFlag("kcfi-offset"))) { 173 if (unsigned Offset = MD->getZExtValue()) 174 F.addFnAttr("patchable-function-prefix", std::to_string(Offset)); 175 } 176 } 177 178 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, 179 ArrayRef<Type *> InitArgTypes, 180 bool Weak) { 181 assert(!InitName.empty() && "Expected init function name"); 182 auto *VoidTy = Type::getVoidTy(M.getContext()); 183 auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false); 184 auto FnCallee = M.getOrInsertFunction(InitName, FnTy); 185 auto *Fn = cast<Function>(FnCallee.getCallee()); 186 if (Weak && Fn->isDeclaration()) 187 Fn->setLinkage(Function::ExternalWeakLinkage); 188 return FnCallee; 189 } 190 191 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) { 192 Function *Ctor = Function::createWithDefaultAttr( 193 FunctionType::get(Type::getVoidTy(M.getContext()), false), 194 GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(), 195 CtorName, &M); 196 Ctor->addFnAttr(Attribute::NoUnwind); 197 setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void) 198 BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); 199 ReturnInst::Create(M.getContext(), CtorBB); 200 // Ensure Ctor cannot be discarded, even if in a comdat. 201 appendToUsed(M, {Ctor}); 202 return Ctor; 203 } 204 205 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions( 206 Module &M, StringRef CtorName, StringRef InitName, 207 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 208 StringRef VersionCheckName, bool Weak) { 209 assert(!InitName.empty() && "Expected init function name"); 210 assert(InitArgs.size() == InitArgTypes.size() && 211 "Sanitizer's init function expects different number of arguments"); 212 FunctionCallee InitFunction = 213 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak); 214 Function *Ctor = createSanitizerCtor(M, CtorName); 215 IRBuilder<> IRB(M.getContext()); 216 217 BasicBlock *RetBB = &Ctor->getEntryBlock(); 218 if (Weak) { 219 RetBB->setName("ret"); 220 auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB); 221 auto *CallInitBB = 222 BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB); 223 auto *InitFn = cast<Function>(InitFunction.getCallee()); 224 auto *InitFnPtr = 225 PointerType::get(InitFn->getType(), InitFn->getAddressSpace()); 226 IRB.SetInsertPoint(EntryBB); 227 Value *InitNotNull = 228 IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr)); 229 IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB); 230 IRB.SetInsertPoint(CallInitBB); 231 } else { 232 IRB.SetInsertPoint(RetBB->getTerminator()); 233 } 234 235 IRB.CreateCall(InitFunction, InitArgs); 236 if (!VersionCheckName.empty()) { 237 FunctionCallee VersionCheckFunction = M.getOrInsertFunction( 238 VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), 239 AttributeList()); 240 IRB.CreateCall(VersionCheckFunction, {}); 241 } 242 243 if (Weak) 244 IRB.CreateBr(RetBB); 245 246 return std::make_pair(Ctor, InitFunction); 247 } 248 249 std::pair<Function *, FunctionCallee> 250 llvm::getOrCreateSanitizerCtorAndInitFunctions( 251 Module &M, StringRef CtorName, StringRef InitName, 252 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 253 function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback, 254 StringRef VersionCheckName, bool Weak) { 255 assert(!CtorName.empty() && "Expected ctor function name"); 256 257 if (Function *Ctor = M.getFunction(CtorName)) 258 // FIXME: Sink this logic into the module, similar to the handling of 259 // globals. This will make moving to a concurrent model much easier. 260 if (Ctor->arg_empty() || 261 Ctor->getReturnType() == Type::getVoidTy(M.getContext())) 262 return {Ctor, 263 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)}; 264 265 Function *Ctor; 266 FunctionCallee InitFunction; 267 std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( 268 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak); 269 FunctionsCreatedCallback(Ctor, InitFunction); 270 return std::make_pair(Ctor, InitFunction); 271 } 272 273 void llvm::filterDeadComdatFunctions( 274 SmallVectorImpl<Function *> &DeadComdatFunctions) { 275 SmallPtrSet<Function *, 32> MaybeDeadFunctions; 276 SmallPtrSet<Comdat *, 32> MaybeDeadComdats; 277 for (Function *F : DeadComdatFunctions) { 278 MaybeDeadFunctions.insert(F); 279 if (Comdat *C = F->getComdat()) 280 MaybeDeadComdats.insert(C); 281 } 282 283 // Find comdats for which all users are dead now. 284 SmallPtrSet<Comdat *, 32> DeadComdats; 285 for (Comdat *C : MaybeDeadComdats) { 286 auto IsUserDead = [&](GlobalObject *GO) { 287 auto *F = dyn_cast<Function>(GO); 288 return F && MaybeDeadFunctions.contains(F); 289 }; 290 if (all_of(C->getUsers(), IsUserDead)) 291 DeadComdats.insert(C); 292 } 293 294 // Only keep functions which have no comdat or a dead comdat. 295 erase_if(DeadComdatFunctions, [&](Function *F) { 296 Comdat *C = F->getComdat(); 297 return C && !DeadComdats.contains(C); 298 }); 299 } 300 301 std::string llvm::getUniqueModuleId(Module *M) { 302 MD5 Md5; 303 bool ExportsSymbols = false; 304 auto AddGlobal = [&](GlobalValue &GV) { 305 if (GV.isDeclaration() || GV.getName().starts_with("llvm.") || 306 !GV.hasExternalLinkage() || GV.hasComdat()) 307 return; 308 ExportsSymbols = true; 309 Md5.update(GV.getName()); 310 Md5.update(ArrayRef<uint8_t>{0}); 311 }; 312 313 for (auto &F : *M) 314 AddGlobal(F); 315 for (auto &GV : M->globals()) 316 AddGlobal(GV); 317 for (auto &GA : M->aliases()) 318 AddGlobal(GA); 319 for (auto &IF : M->ifuncs()) 320 AddGlobal(IF); 321 322 if (!ExportsSymbols) 323 return ""; 324 325 MD5::MD5Result R; 326 Md5.final(R); 327 328 SmallString<32> Str; 329 MD5::stringifyResult(R, Str); 330 return ("." + Str).str(); 331 } 332 333 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, 334 StringRef SectionName, Align Alignment) { 335 // Embed the memory buffer into the module. 336 Constant *ModuleConstant = ConstantDataArray::get( 337 M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); 338 GlobalVariable *GV = new GlobalVariable( 339 M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, 340 ModuleConstant, "llvm.embedded.object"); 341 GV->setSection(SectionName); 342 GV->setAlignment(Alignment); 343 344 LLVMContext &Ctx = M.getContext(); 345 NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects"); 346 Metadata *MDVals[] = {ConstantAsMetadata::get(GV), 347 MDString::get(Ctx, SectionName)}; 348 349 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 350 GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {})); 351 352 appendToCompilerUsed(M, GV); 353 } 354 355 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor( 356 Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) { 357 SmallVector<GlobalIFunc *, 32> AllIFuncs; 358 ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower; 359 if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs 360 for (GlobalIFunc &GI : M.ifuncs()) 361 AllIFuncs.push_back(&GI); 362 IFuncsToLower = AllIFuncs; 363 } 364 365 bool UnhandledUsers = false; 366 LLVMContext &Ctx = M.getContext(); 367 const DataLayout &DL = M.getDataLayout(); 368 369 PointerType *TableEntryTy = 370 PointerType::get(Ctx, DL.getProgramAddressSpace()); 371 372 ArrayType *FuncPtrTableTy = 373 ArrayType::get(TableEntryTy, IFuncsToLower.size()); 374 375 Align PtrAlign = DL.getABITypeAlign(TableEntryTy); 376 377 // Create a global table of function pointers we'll initialize in a global 378 // constructor. 379 auto *FuncPtrTable = new GlobalVariable( 380 M, FuncPtrTableTy, false, GlobalValue::InternalLinkage, 381 PoisonValue::get(FuncPtrTableTy), "", nullptr, 382 GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace()); 383 FuncPtrTable->setAlignment(PtrAlign); 384 385 // Create a function to initialize the function pointer table. 386 Function *NewCtor = Function::Create( 387 FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage, 388 DL.getProgramAddressSpace(), "", &M); 389 390 BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor); 391 IRBuilder<> InitBuilder(BB); 392 393 size_t TableIndex = 0; 394 for (GlobalIFunc *GI : IFuncsToLower) { 395 Function *ResolvedFunction = GI->getResolverFunction(); 396 397 // We don't know what to pass to a resolver function taking arguments 398 // 399 // FIXME: Is this even valid? clang and gcc don't complain but this 400 // probably should be invalid IR. We could just pass through undef. 401 if (!std::empty(ResolvedFunction->getFunctionType()->params())) { 402 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function " 403 << ResolvedFunction->getName() << " with parameters\n"); 404 UnhandledUsers = true; 405 continue; 406 } 407 408 // Initialize the function pointer table. 409 CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction); 410 Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy); 411 Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32( 412 FuncPtrTableTy, FuncPtrTable, 0, TableIndex++)); 413 InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign); 414 415 // Update all users to load a pointer from the global table. 416 for (User *User : make_early_inc_range(GI->users())) { 417 Instruction *UserInst = dyn_cast<Instruction>(User); 418 if (!UserInst) { 419 // TODO: Should handle constantexpr casts in user instructions. Probably 420 // can't do much about constant initializers. 421 UnhandledUsers = true; 422 continue; 423 } 424 425 IRBuilder<> UseBuilder(UserInst); 426 LoadInst *ResolvedTarget = 427 UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign); 428 Value *ResolvedCast = 429 UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType()); 430 UserInst->replaceUsesOfWith(GI, ResolvedCast); 431 } 432 433 // If we handled all users, erase the ifunc. 434 if (GI->use_empty()) 435 GI->eraseFromParent(); 436 } 437 438 InitBuilder.CreateRetVoid(); 439 440 PointerType *ConstantDataTy = PointerType::get(Ctx, 0); 441 442 // TODO: Is this the right priority? Probably should be before any other 443 // constructors? 444 const int Priority = 10; 445 appendToGlobalCtors(M, NewCtor, Priority, 446 ConstantPointerNull::get(ConstantDataTy)); 447 return UnhandledUsers; 448 } 449