1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions perform manipulations on Modules. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Utils/ModuleUtils.h" 14 #include "llvm/Analysis/VectorUtils.h" 15 #include "llvm/IR/DerivedTypes.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/IRBuilder.h" 18 #include "llvm/IR/MDBuilder.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/Support/xxhash.h" 22 using namespace llvm; 23 24 #define DEBUG_TYPE "moduleutils" 25 26 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, 27 int Priority, Constant *Data) { 28 IRBuilder<> IRB(M.getContext()); 29 FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); 30 31 // Get the current set of static global constructors and add the new ctor 32 // to the list. 33 SmallVector<Constant *, 16> CurrentCtors; 34 StructType *EltTy = StructType::get( 35 IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()), 36 IRB.getInt8PtrTy()); 37 38 if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) { 39 if (Constant *Init = GVCtor->getInitializer()) { 40 unsigned n = Init->getNumOperands(); 41 CurrentCtors.reserve(n + 1); 42 for (unsigned i = 0; i != n; ++i) 43 CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); 44 } 45 GVCtor->eraseFromParent(); 46 } 47 48 // Build a 3 field global_ctor entry. We don't take a comdat key. 49 Constant *CSVals[3]; 50 CSVals[0] = IRB.getInt32(Priority); 51 CSVals[1] = F; 52 CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) 53 : Constant::getNullValue(IRB.getInt8PtrTy()); 54 Constant *RuntimeCtorInit = 55 ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements())); 56 57 CurrentCtors.push_back(RuntimeCtorInit); 58 59 // Create a new initializer. 60 ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); 61 Constant *NewInit = ConstantArray::get(AT, CurrentCtors); 62 63 // Create the new global variable and replace all uses of 64 // the old global variable with the new one. 65 (void)new GlobalVariable(M, NewInit->getType(), false, 66 GlobalValue::AppendingLinkage, NewInit, ArrayName); 67 } 68 69 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { 70 appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); 71 } 72 73 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { 74 appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); 75 } 76 77 static void collectUsedGlobals(GlobalVariable *GV, 78 SmallSetVector<Constant *, 16> &Init) { 79 if (!GV || !GV->hasInitializer()) 80 return; 81 82 auto *CA = cast<ConstantArray>(GV->getInitializer()); 83 for (Use &Op : CA->operands()) 84 Init.insert(cast<Constant>(Op)); 85 } 86 87 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { 88 GlobalVariable *GV = M.getGlobalVariable(Name); 89 90 SmallSetVector<Constant *, 16> Init; 91 collectUsedGlobals(GV, Init); 92 if (GV) 93 GV->eraseFromParent(); 94 95 Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext()); 96 for (auto *V : Values) 97 Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); 98 99 if (Init.empty()) 100 return; 101 102 ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size()); 103 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 104 ConstantArray::get(ATy, Init.getArrayRef()), 105 Name); 106 GV->setSection("llvm.metadata"); 107 } 108 109 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { 110 appendToUsedList(M, "llvm.used", Values); 111 } 112 113 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { 114 appendToUsedList(M, "llvm.compiler.used", Values); 115 } 116 117 static void removeFromUsedList(Module &M, StringRef Name, 118 function_ref<bool(Constant *)> ShouldRemove) { 119 GlobalVariable *GV = M.getNamedGlobal(Name); 120 if (!GV) 121 return; 122 123 SmallSetVector<Constant *, 16> Init; 124 collectUsedGlobals(GV, Init); 125 126 Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType(); 127 128 SmallVector<Constant *, 16> NewInit; 129 for (Constant *MaybeRemoved : Init) { 130 if (!ShouldRemove(MaybeRemoved->stripPointerCasts())) 131 NewInit.push_back(MaybeRemoved); 132 } 133 134 if (!NewInit.empty()) { 135 ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size()); 136 GlobalVariable *NewGV = 137 new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 138 ConstantArray::get(ATy, NewInit), "", GV, 139 GV->getThreadLocalMode(), GV->getAddressSpace()); 140 NewGV->setSection(GV->getSection()); 141 NewGV->takeName(GV); 142 } 143 144 GV->eraseFromParent(); 145 } 146 147 void llvm::removeFromUsedLists(Module &M, 148 function_ref<bool(Constant *)> ShouldRemove) { 149 removeFromUsedList(M, "llvm.used", ShouldRemove); 150 removeFromUsedList(M, "llvm.compiler.used", ShouldRemove); 151 } 152 153 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { 154 if (!M.getModuleFlag("kcfi")) 155 return; 156 // Matches CodeGenModule::CreateKCFITypeId in Clang. 157 LLVMContext &Ctx = M.getContext(); 158 MDBuilder MDB(Ctx); 159 F.setMetadata( 160 LLVMContext::MD_kcfi_type, 161 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( 162 Type::getInt32Ty(Ctx), 163 static_cast<uint32_t>(xxHash64(MangledType)))))); 164 // If the module was compiled with -fpatchable-function-entry, ensure 165 // we use the same patchable-function-prefix. 166 if (auto *MD = mdconst::extract_or_null<ConstantInt>( 167 M.getModuleFlag("kcfi-offset"))) { 168 if (unsigned Offset = MD->getZExtValue()) 169 F.addFnAttr("patchable-function-prefix", std::to_string(Offset)); 170 } 171 } 172 173 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, 174 ArrayRef<Type *> InitArgTypes, 175 bool Weak) { 176 assert(!InitName.empty() && "Expected init function name"); 177 auto *VoidTy = Type::getVoidTy(M.getContext()); 178 auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false); 179 auto FnCallee = M.getOrInsertFunction(InitName, FnTy); 180 auto *Fn = cast<Function>(FnCallee.getCallee()); 181 if (Weak && Fn->isDeclaration()) 182 Fn->setLinkage(Function::ExternalWeakLinkage); 183 return FnCallee; 184 } 185 186 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) { 187 Function *Ctor = Function::createWithDefaultAttr( 188 FunctionType::get(Type::getVoidTy(M.getContext()), false), 189 GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(), 190 CtorName, &M); 191 Ctor->addFnAttr(Attribute::NoUnwind); 192 setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void) 193 BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); 194 ReturnInst::Create(M.getContext(), CtorBB); 195 // Ensure Ctor cannot be discarded, even if in a comdat. 196 appendToUsed(M, {Ctor}); 197 return Ctor; 198 } 199 200 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions( 201 Module &M, StringRef CtorName, StringRef InitName, 202 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 203 StringRef VersionCheckName, bool Weak) { 204 assert(!InitName.empty() && "Expected init function name"); 205 assert(InitArgs.size() == InitArgTypes.size() && 206 "Sanitizer's init function expects different number of arguments"); 207 FunctionCallee InitFunction = 208 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak); 209 Function *Ctor = createSanitizerCtor(M, CtorName); 210 IRBuilder<> IRB(M.getContext()); 211 212 BasicBlock *RetBB = &Ctor->getEntryBlock(); 213 if (Weak) { 214 RetBB->setName("ret"); 215 auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB); 216 auto *CallInitBB = 217 BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB); 218 auto *InitFn = cast<Function>(InitFunction.getCallee()); 219 auto *InitFnPtr = 220 PointerType::get(InitFn->getType(), InitFn->getAddressSpace()); 221 IRB.SetInsertPoint(EntryBB); 222 Value *InitNotNull = 223 IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr)); 224 IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB); 225 IRB.SetInsertPoint(CallInitBB); 226 } else { 227 IRB.SetInsertPoint(RetBB->getTerminator()); 228 } 229 230 IRB.CreateCall(InitFunction, InitArgs); 231 if (!VersionCheckName.empty()) { 232 FunctionCallee VersionCheckFunction = M.getOrInsertFunction( 233 VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), 234 AttributeList()); 235 IRB.CreateCall(VersionCheckFunction, {}); 236 } 237 238 if (Weak) 239 IRB.CreateBr(RetBB); 240 241 return std::make_pair(Ctor, InitFunction); 242 } 243 244 std::pair<Function *, FunctionCallee> 245 llvm::getOrCreateSanitizerCtorAndInitFunctions( 246 Module &M, StringRef CtorName, StringRef InitName, 247 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 248 function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback, 249 StringRef VersionCheckName, bool Weak) { 250 assert(!CtorName.empty() && "Expected ctor function name"); 251 252 if (Function *Ctor = M.getFunction(CtorName)) 253 // FIXME: Sink this logic into the module, similar to the handling of 254 // globals. This will make moving to a concurrent model much easier. 255 if (Ctor->arg_empty() || 256 Ctor->getReturnType() == Type::getVoidTy(M.getContext())) 257 return {Ctor, 258 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)}; 259 260 Function *Ctor; 261 FunctionCallee InitFunction; 262 std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( 263 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak); 264 FunctionsCreatedCallback(Ctor, InitFunction); 265 return std::make_pair(Ctor, InitFunction); 266 } 267 268 void llvm::filterDeadComdatFunctions( 269 SmallVectorImpl<Function *> &DeadComdatFunctions) { 270 SmallPtrSet<Function *, 32> MaybeDeadFunctions; 271 SmallPtrSet<Comdat *, 32> MaybeDeadComdats; 272 for (Function *F : DeadComdatFunctions) { 273 MaybeDeadFunctions.insert(F); 274 if (Comdat *C = F->getComdat()) 275 MaybeDeadComdats.insert(C); 276 } 277 278 // Find comdats for which all users are dead now. 279 SmallPtrSet<Comdat *, 32> DeadComdats; 280 for (Comdat *C : MaybeDeadComdats) { 281 auto IsUserDead = [&](GlobalObject *GO) { 282 auto *F = dyn_cast<Function>(GO); 283 return F && MaybeDeadFunctions.contains(F); 284 }; 285 if (all_of(C->getUsers(), IsUserDead)) 286 DeadComdats.insert(C); 287 } 288 289 // Only keep functions which have no comdat or a dead comdat. 290 erase_if(DeadComdatFunctions, [&](Function *F) { 291 Comdat *C = F->getComdat(); 292 return C && !DeadComdats.contains(C); 293 }); 294 } 295 296 std::string llvm::getUniqueModuleId(Module *M) { 297 MD5 Md5; 298 bool ExportsSymbols = false; 299 auto AddGlobal = [&](GlobalValue &GV) { 300 if (GV.isDeclaration() || GV.getName().startswith("llvm.") || 301 !GV.hasExternalLinkage() || GV.hasComdat()) 302 return; 303 ExportsSymbols = true; 304 Md5.update(GV.getName()); 305 Md5.update(ArrayRef<uint8_t>{0}); 306 }; 307 308 for (auto &F : *M) 309 AddGlobal(F); 310 for (auto &GV : M->globals()) 311 AddGlobal(GV); 312 for (auto &GA : M->aliases()) 313 AddGlobal(GA); 314 for (auto &IF : M->ifuncs()) 315 AddGlobal(IF); 316 317 if (!ExportsSymbols) 318 return ""; 319 320 MD5::MD5Result R; 321 Md5.final(R); 322 323 SmallString<32> Str; 324 MD5::stringifyResult(R, Str); 325 return ("." + Str).str(); 326 } 327 328 void VFABI::setVectorVariantNames(CallInst *CI, 329 ArrayRef<std::string> VariantMappings) { 330 if (VariantMappings.empty()) 331 return; 332 333 SmallString<256> Buffer; 334 llvm::raw_svector_ostream Out(Buffer); 335 for (const std::string &VariantMapping : VariantMappings) 336 Out << VariantMapping << ","; 337 // Get rid of the trailing ','. 338 assert(!Buffer.str().empty() && "Must have at least one char."); 339 Buffer.pop_back(); 340 341 Module *M = CI->getModule(); 342 #ifndef NDEBUG 343 for (const std::string &VariantMapping : VariantMappings) { 344 LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n"); 345 std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M); 346 assert(VI && "Cannot add an invalid VFABI name."); 347 assert(M->getNamedValue(VI->VectorName) && 348 "Cannot add variant to attribute: " 349 "vector function declaration is missing."); 350 } 351 #endif 352 CI->addFnAttr( 353 Attribute::get(M->getContext(), MappingsAttrName, Buffer.str())); 354 } 355 356 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, 357 StringRef SectionName, Align Alignment) { 358 // Embed the memory buffer into the module. 359 Constant *ModuleConstant = ConstantDataArray::get( 360 M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); 361 GlobalVariable *GV = new GlobalVariable( 362 M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, 363 ModuleConstant, "llvm.embedded.object"); 364 GV->setSection(SectionName); 365 GV->setAlignment(Alignment); 366 367 LLVMContext &Ctx = M.getContext(); 368 NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects"); 369 Metadata *MDVals[] = {ConstantAsMetadata::get(GV), 370 MDString::get(Ctx, SectionName)}; 371 372 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 373 GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {})); 374 375 appendToCompilerUsed(M, GV); 376 } 377 378 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor( 379 Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) { 380 SmallVector<GlobalIFunc *, 32> AllIFuncs; 381 ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower; 382 if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs 383 for (GlobalIFunc &GI : M.ifuncs()) 384 AllIFuncs.push_back(&GI); 385 IFuncsToLower = AllIFuncs; 386 } 387 388 bool UnhandledUsers = false; 389 LLVMContext &Ctx = M.getContext(); 390 const DataLayout &DL = M.getDataLayout(); 391 392 PointerType *TableEntryTy = 393 Ctx.supportsTypedPointers() 394 ? PointerType::get(Type::getInt8Ty(Ctx), DL.getProgramAddressSpace()) 395 : PointerType::get(Ctx, DL.getProgramAddressSpace()); 396 397 ArrayType *FuncPtrTableTy = 398 ArrayType::get(TableEntryTy, IFuncsToLower.size()); 399 400 Align PtrAlign = DL.getABITypeAlign(TableEntryTy); 401 402 // Create a global table of function pointers we'll initialize in a global 403 // constructor. 404 auto *FuncPtrTable = new GlobalVariable( 405 M, FuncPtrTableTy, false, GlobalValue::InternalLinkage, 406 PoisonValue::get(FuncPtrTableTy), "", nullptr, 407 GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace()); 408 FuncPtrTable->setAlignment(PtrAlign); 409 410 // Create a function to initialize the function pointer table. 411 Function *NewCtor = Function::Create( 412 FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage, 413 DL.getProgramAddressSpace(), "", &M); 414 415 BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor); 416 IRBuilder<> InitBuilder(BB); 417 418 size_t TableIndex = 0; 419 for (GlobalIFunc *GI : IFuncsToLower) { 420 Function *ResolvedFunction = GI->getResolverFunction(); 421 422 // We don't know what to pass to a resolver function taking arguments 423 // 424 // FIXME: Is this even valid? clang and gcc don't complain but this 425 // probably should be invalid IR. We could just pass through undef. 426 if (!std::empty(ResolvedFunction->getFunctionType()->params())) { 427 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function " 428 << ResolvedFunction->getName() << " with parameters\n"); 429 UnhandledUsers = true; 430 continue; 431 } 432 433 // Initialize the function pointer table. 434 CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction); 435 Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy); 436 Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32( 437 FuncPtrTableTy, FuncPtrTable, 0, TableIndex++)); 438 InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign); 439 440 // Update all users to load a pointer from the global table. 441 for (User *User : make_early_inc_range(GI->users())) { 442 Instruction *UserInst = dyn_cast<Instruction>(User); 443 if (!UserInst) { 444 // TODO: Should handle constantexpr casts in user instructions. Probably 445 // can't do much about constant initializers. 446 UnhandledUsers = true; 447 continue; 448 } 449 450 IRBuilder<> UseBuilder(UserInst); 451 LoadInst *ResolvedTarget = 452 UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign); 453 Value *ResolvedCast = 454 UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType()); 455 UserInst->replaceUsesOfWith(GI, ResolvedCast); 456 } 457 458 // If we handled all users, erase the ifunc. 459 if (GI->use_empty()) 460 GI->eraseFromParent(); 461 } 462 463 InitBuilder.CreateRetVoid(); 464 465 PointerType *ConstantDataTy = Ctx.supportsTypedPointers() 466 ? PointerType::get(Type::getInt8Ty(Ctx), 0) 467 : PointerType::get(Ctx, 0); 468 469 // TODO: Is this the right priority? Probably should be before any other 470 // constructors? 471 const int Priority = 10; 472 appendToGlobalCtors(M, NewCtor, Priority, 473 ConstantPointerNull::get(ConstantDataTy)); 474 return UnhandledUsers; 475 } 476