1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions perform manipulations on Modules. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Utils/ModuleUtils.h" 14 #include "llvm/Analysis/VectorUtils.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/IR/DerivedTypes.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/IRBuilder.h" 19 #include "llvm/IR/MDBuilder.h" 20 #include "llvm/IR/Module.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include "llvm/Support/xxhash.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "moduleutils" 27 28 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, 29 int Priority, Constant *Data) { 30 IRBuilder<> IRB(M.getContext()); 31 FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); 32 33 // Get the current set of static global constructors and add the new ctor 34 // to the list. 35 SmallVector<Constant *, 16> CurrentCtors; 36 StructType *EltTy; 37 if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) { 38 EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType()); 39 if (Constant *Init = GVCtor->getInitializer()) { 40 unsigned n = Init->getNumOperands(); 41 CurrentCtors.reserve(n + 1); 42 for (unsigned i = 0; i != n; ++i) 43 CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); 44 } 45 GVCtor->eraseFromParent(); 46 } else { 47 EltTy = StructType::get(IRB.getInt32Ty(), 48 PointerType::get(FnTy, F->getAddressSpace()), 49 IRB.getPtrTy()); 50 } 51 52 // Build a 3 field global_ctor entry. We don't take a comdat key. 53 Constant *CSVals[3]; 54 CSVals[0] = IRB.getInt32(Priority); 55 CSVals[1] = F; 56 CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy()) 57 : Constant::getNullValue(IRB.getPtrTy()); 58 Constant *RuntimeCtorInit = 59 ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements())); 60 61 CurrentCtors.push_back(RuntimeCtorInit); 62 63 // Create a new initializer. 64 ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); 65 Constant *NewInit = ConstantArray::get(AT, CurrentCtors); 66 67 // Create the new global variable and replace all uses of 68 // the old global variable with the new one. 69 (void)new GlobalVariable(M, NewInit->getType(), false, 70 GlobalValue::AppendingLinkage, NewInit, ArrayName); 71 } 72 73 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { 74 appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); 75 } 76 77 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { 78 appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); 79 } 80 81 static void collectUsedGlobals(GlobalVariable *GV, 82 SmallSetVector<Constant *, 16> &Init) { 83 if (!GV || !GV->hasInitializer()) 84 return; 85 86 auto *CA = cast<ConstantArray>(GV->getInitializer()); 87 for (Use &Op : CA->operands()) 88 Init.insert(cast<Constant>(Op)); 89 } 90 91 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { 92 GlobalVariable *GV = M.getGlobalVariable(Name); 93 94 SmallSetVector<Constant *, 16> Init; 95 collectUsedGlobals(GV, Init); 96 if (GV) 97 GV->eraseFromParent(); 98 99 Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext()); 100 for (auto *V : Values) 101 Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); 102 103 if (Init.empty()) 104 return; 105 106 ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size()); 107 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 108 ConstantArray::get(ATy, Init.getArrayRef()), 109 Name); 110 GV->setSection("llvm.metadata"); 111 } 112 113 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { 114 appendToUsedList(M, "llvm.used", Values); 115 } 116 117 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { 118 appendToUsedList(M, "llvm.compiler.used", Values); 119 } 120 121 static void removeFromUsedList(Module &M, StringRef Name, 122 function_ref<bool(Constant *)> ShouldRemove) { 123 GlobalVariable *GV = M.getNamedGlobal(Name); 124 if (!GV) 125 return; 126 127 SmallSetVector<Constant *, 16> Init; 128 collectUsedGlobals(GV, Init); 129 130 Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType(); 131 132 SmallVector<Constant *, 16> NewInit; 133 for (Constant *MaybeRemoved : Init) { 134 if (!ShouldRemove(MaybeRemoved->stripPointerCasts())) 135 NewInit.push_back(MaybeRemoved); 136 } 137 138 if (!NewInit.empty()) { 139 ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size()); 140 GlobalVariable *NewGV = 141 new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 142 ConstantArray::get(ATy, NewInit), "", GV, 143 GV->getThreadLocalMode(), GV->getAddressSpace()); 144 NewGV->setSection(GV->getSection()); 145 NewGV->takeName(GV); 146 } 147 148 GV->eraseFromParent(); 149 } 150 151 void llvm::removeFromUsedLists(Module &M, 152 function_ref<bool(Constant *)> ShouldRemove) { 153 removeFromUsedList(M, "llvm.used", ShouldRemove); 154 removeFromUsedList(M, "llvm.compiler.used", ShouldRemove); 155 } 156 157 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { 158 if (!M.getModuleFlag("kcfi")) 159 return; 160 // Matches CodeGenModule::CreateKCFITypeId in Clang. 161 LLVMContext &Ctx = M.getContext(); 162 MDBuilder MDB(Ctx); 163 F.setMetadata( 164 LLVMContext::MD_kcfi_type, 165 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( 166 Type::getInt32Ty(Ctx), 167 static_cast<uint32_t>(xxHash64(MangledType)))))); 168 // If the module was compiled with -fpatchable-function-entry, ensure 169 // we use the same patchable-function-prefix. 170 if (auto *MD = mdconst::extract_or_null<ConstantInt>( 171 M.getModuleFlag("kcfi-offset"))) { 172 if (unsigned Offset = MD->getZExtValue()) 173 F.addFnAttr("patchable-function-prefix", std::to_string(Offset)); 174 } 175 } 176 177 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, 178 ArrayRef<Type *> InitArgTypes, 179 bool Weak) { 180 assert(!InitName.empty() && "Expected init function name"); 181 auto *VoidTy = Type::getVoidTy(M.getContext()); 182 auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false); 183 auto FnCallee = M.getOrInsertFunction(InitName, FnTy); 184 auto *Fn = cast<Function>(FnCallee.getCallee()); 185 if (Weak && Fn->isDeclaration()) 186 Fn->setLinkage(Function::ExternalWeakLinkage); 187 return FnCallee; 188 } 189 190 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) { 191 Function *Ctor = Function::createWithDefaultAttr( 192 FunctionType::get(Type::getVoidTy(M.getContext()), false), 193 GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(), 194 CtorName, &M); 195 Ctor->addFnAttr(Attribute::NoUnwind); 196 setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void) 197 BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); 198 ReturnInst::Create(M.getContext(), CtorBB); 199 // Ensure Ctor cannot be discarded, even if in a comdat. 200 appendToUsed(M, {Ctor}); 201 return Ctor; 202 } 203 204 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions( 205 Module &M, StringRef CtorName, StringRef InitName, 206 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 207 StringRef VersionCheckName, bool Weak) { 208 assert(!InitName.empty() && "Expected init function name"); 209 assert(InitArgs.size() == InitArgTypes.size() && 210 "Sanitizer's init function expects different number of arguments"); 211 FunctionCallee InitFunction = 212 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak); 213 Function *Ctor = createSanitizerCtor(M, CtorName); 214 IRBuilder<> IRB(M.getContext()); 215 216 BasicBlock *RetBB = &Ctor->getEntryBlock(); 217 if (Weak) { 218 RetBB->setName("ret"); 219 auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB); 220 auto *CallInitBB = 221 BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB); 222 auto *InitFn = cast<Function>(InitFunction.getCallee()); 223 auto *InitFnPtr = 224 PointerType::get(InitFn->getType(), InitFn->getAddressSpace()); 225 IRB.SetInsertPoint(EntryBB); 226 Value *InitNotNull = 227 IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr)); 228 IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB); 229 IRB.SetInsertPoint(CallInitBB); 230 } else { 231 IRB.SetInsertPoint(RetBB->getTerminator()); 232 } 233 234 IRB.CreateCall(InitFunction, InitArgs); 235 if (!VersionCheckName.empty()) { 236 FunctionCallee VersionCheckFunction = M.getOrInsertFunction( 237 VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), 238 AttributeList()); 239 IRB.CreateCall(VersionCheckFunction, {}); 240 } 241 242 if (Weak) 243 IRB.CreateBr(RetBB); 244 245 return std::make_pair(Ctor, InitFunction); 246 } 247 248 std::pair<Function *, FunctionCallee> 249 llvm::getOrCreateSanitizerCtorAndInitFunctions( 250 Module &M, StringRef CtorName, StringRef InitName, 251 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 252 function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback, 253 StringRef VersionCheckName, bool Weak) { 254 assert(!CtorName.empty() && "Expected ctor function name"); 255 256 if (Function *Ctor = M.getFunction(CtorName)) 257 // FIXME: Sink this logic into the module, similar to the handling of 258 // globals. This will make moving to a concurrent model much easier. 259 if (Ctor->arg_empty() || 260 Ctor->getReturnType() == Type::getVoidTy(M.getContext())) 261 return {Ctor, 262 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)}; 263 264 Function *Ctor; 265 FunctionCallee InitFunction; 266 std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( 267 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak); 268 FunctionsCreatedCallback(Ctor, InitFunction); 269 return std::make_pair(Ctor, InitFunction); 270 } 271 272 void llvm::filterDeadComdatFunctions( 273 SmallVectorImpl<Function *> &DeadComdatFunctions) { 274 SmallPtrSet<Function *, 32> MaybeDeadFunctions; 275 SmallPtrSet<Comdat *, 32> MaybeDeadComdats; 276 for (Function *F : DeadComdatFunctions) { 277 MaybeDeadFunctions.insert(F); 278 if (Comdat *C = F->getComdat()) 279 MaybeDeadComdats.insert(C); 280 } 281 282 // Find comdats for which all users are dead now. 283 SmallPtrSet<Comdat *, 32> DeadComdats; 284 for (Comdat *C : MaybeDeadComdats) { 285 auto IsUserDead = [&](GlobalObject *GO) { 286 auto *F = dyn_cast<Function>(GO); 287 return F && MaybeDeadFunctions.contains(F); 288 }; 289 if (all_of(C->getUsers(), IsUserDead)) 290 DeadComdats.insert(C); 291 } 292 293 // Only keep functions which have no comdat or a dead comdat. 294 erase_if(DeadComdatFunctions, [&](Function *F) { 295 Comdat *C = F->getComdat(); 296 return C && !DeadComdats.contains(C); 297 }); 298 } 299 300 std::string llvm::getUniqueModuleId(Module *M) { 301 MD5 Md5; 302 bool ExportsSymbols = false; 303 auto AddGlobal = [&](GlobalValue &GV) { 304 if (GV.isDeclaration() || GV.getName().starts_with("llvm.") || 305 !GV.hasExternalLinkage() || GV.hasComdat()) 306 return; 307 ExportsSymbols = true; 308 Md5.update(GV.getName()); 309 Md5.update(ArrayRef<uint8_t>{0}); 310 }; 311 312 for (auto &F : *M) 313 AddGlobal(F); 314 for (auto &GV : M->globals()) 315 AddGlobal(GV); 316 for (auto &GA : M->aliases()) 317 AddGlobal(GA); 318 for (auto &IF : M->ifuncs()) 319 AddGlobal(IF); 320 321 if (!ExportsSymbols) 322 return ""; 323 324 MD5::MD5Result R; 325 Md5.final(R); 326 327 SmallString<32> Str; 328 MD5::stringifyResult(R, Str); 329 return ("." + Str).str(); 330 } 331 332 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, 333 StringRef SectionName, Align Alignment) { 334 // Embed the memory buffer into the module. 335 Constant *ModuleConstant = ConstantDataArray::get( 336 M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); 337 GlobalVariable *GV = new GlobalVariable( 338 M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, 339 ModuleConstant, "llvm.embedded.object"); 340 GV->setSection(SectionName); 341 GV->setAlignment(Alignment); 342 343 LLVMContext &Ctx = M.getContext(); 344 NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects"); 345 Metadata *MDVals[] = {ConstantAsMetadata::get(GV), 346 MDString::get(Ctx, SectionName)}; 347 348 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 349 GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {})); 350 351 appendToCompilerUsed(M, GV); 352 } 353 354 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor( 355 Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) { 356 SmallVector<GlobalIFunc *, 32> AllIFuncs; 357 ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower; 358 if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs 359 for (GlobalIFunc &GI : M.ifuncs()) 360 AllIFuncs.push_back(&GI); 361 IFuncsToLower = AllIFuncs; 362 } 363 364 bool UnhandledUsers = false; 365 LLVMContext &Ctx = M.getContext(); 366 const DataLayout &DL = M.getDataLayout(); 367 368 PointerType *TableEntryTy = 369 PointerType::get(Ctx, DL.getProgramAddressSpace()); 370 371 ArrayType *FuncPtrTableTy = 372 ArrayType::get(TableEntryTy, IFuncsToLower.size()); 373 374 Align PtrAlign = DL.getABITypeAlign(TableEntryTy); 375 376 // Create a global table of function pointers we'll initialize in a global 377 // constructor. 378 auto *FuncPtrTable = new GlobalVariable( 379 M, FuncPtrTableTy, false, GlobalValue::InternalLinkage, 380 PoisonValue::get(FuncPtrTableTy), "", nullptr, 381 GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace()); 382 FuncPtrTable->setAlignment(PtrAlign); 383 384 // Create a function to initialize the function pointer table. 385 Function *NewCtor = Function::Create( 386 FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage, 387 DL.getProgramAddressSpace(), "", &M); 388 389 BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor); 390 IRBuilder<> InitBuilder(BB); 391 392 size_t TableIndex = 0; 393 for (GlobalIFunc *GI : IFuncsToLower) { 394 Function *ResolvedFunction = GI->getResolverFunction(); 395 396 // We don't know what to pass to a resolver function taking arguments 397 // 398 // FIXME: Is this even valid? clang and gcc don't complain but this 399 // probably should be invalid IR. We could just pass through undef. 400 if (!std::empty(ResolvedFunction->getFunctionType()->params())) { 401 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function " 402 << ResolvedFunction->getName() << " with parameters\n"); 403 UnhandledUsers = true; 404 continue; 405 } 406 407 // Initialize the function pointer table. 408 CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction); 409 Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy); 410 Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32( 411 FuncPtrTableTy, FuncPtrTable, 0, TableIndex++)); 412 InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign); 413 414 // Update all users to load a pointer from the global table. 415 for (User *User : make_early_inc_range(GI->users())) { 416 Instruction *UserInst = dyn_cast<Instruction>(User); 417 if (!UserInst) { 418 // TODO: Should handle constantexpr casts in user instructions. Probably 419 // can't do much about constant initializers. 420 UnhandledUsers = true; 421 continue; 422 } 423 424 IRBuilder<> UseBuilder(UserInst); 425 LoadInst *ResolvedTarget = 426 UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign); 427 Value *ResolvedCast = 428 UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType()); 429 UserInst->replaceUsesOfWith(GI, ResolvedCast); 430 } 431 432 // If we handled all users, erase the ifunc. 433 if (GI->use_empty()) 434 GI->eraseFromParent(); 435 } 436 437 InitBuilder.CreateRetVoid(); 438 439 PointerType *ConstantDataTy = PointerType::get(Ctx, 0); 440 441 // TODO: Is this the right priority? Probably should be before any other 442 // constructors? 443 const int Priority = 10; 444 appendToGlobalCtors(M, NewCtor, Priority, 445 ConstantPointerNull::get(ConstantDataTy)); 446 return UnhandledUsers; 447 } 448