1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions perform manipulations on Modules. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Utils/ModuleUtils.h" 14 #include "llvm/Analysis/VectorUtils.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/IR/DerivedTypes.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/IRBuilder.h" 19 #include "llvm/IR/MDBuilder.h" 20 #include "llvm/IR/Module.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include "llvm/Support/xxhash.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "moduleutils" 27 28 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, 29 int Priority, Constant *Data) { 30 IRBuilder<> IRB(M.getContext()); 31 FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); 32 33 // Get the current set of static global constructors and add the new ctor 34 // to the list. 35 SmallVector<Constant *, 16> CurrentCtors; 36 StructType *EltTy; 37 if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) { 38 EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType()); 39 if (Constant *Init = GVCtor->getInitializer()) { 40 unsigned n = Init->getNumOperands(); 41 CurrentCtors.reserve(n + 1); 42 for (unsigned i = 0; i != n; ++i) 43 CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); 44 } 45 GVCtor->eraseFromParent(); 46 } else { 47 EltTy = StructType::get( 48 IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()), 49 IRB.getInt8PtrTy()); 50 } 51 52 // Build a 3 field global_ctor entry. We don't take a comdat key. 53 Constant *CSVals[3]; 54 CSVals[0] = IRB.getInt32(Priority); 55 CSVals[1] = F; 56 CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) 57 : Constant::getNullValue(IRB.getInt8PtrTy()); 58 Constant *RuntimeCtorInit = 59 ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements())); 60 61 CurrentCtors.push_back(RuntimeCtorInit); 62 63 // Create a new initializer. 64 ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); 65 Constant *NewInit = ConstantArray::get(AT, CurrentCtors); 66 67 // Create the new global variable and replace all uses of 68 // the old global variable with the new one. 69 (void)new GlobalVariable(M, NewInit->getType(), false, 70 GlobalValue::AppendingLinkage, NewInit, ArrayName); 71 } 72 73 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { 74 appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); 75 } 76 77 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { 78 appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); 79 } 80 81 static void collectUsedGlobals(GlobalVariable *GV, 82 SmallSetVector<Constant *, 16> &Init) { 83 if (!GV || !GV->hasInitializer()) 84 return; 85 86 auto *CA = cast<ConstantArray>(GV->getInitializer()); 87 for (Use &Op : CA->operands()) 88 Init.insert(cast<Constant>(Op)); 89 } 90 91 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { 92 GlobalVariable *GV = M.getGlobalVariable(Name); 93 94 SmallSetVector<Constant *, 16> Init; 95 collectUsedGlobals(GV, Init); 96 if (GV) 97 GV->eraseFromParent(); 98 99 Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext()); 100 for (auto *V : Values) 101 Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); 102 103 if (Init.empty()) 104 return; 105 106 ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size()); 107 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 108 ConstantArray::get(ATy, Init.getArrayRef()), 109 Name); 110 GV->setSection("llvm.metadata"); 111 } 112 113 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { 114 appendToUsedList(M, "llvm.used", Values); 115 } 116 117 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { 118 appendToUsedList(M, "llvm.compiler.used", Values); 119 } 120 121 static void removeFromUsedList(Module &M, StringRef Name, 122 function_ref<bool(Constant *)> ShouldRemove) { 123 GlobalVariable *GV = M.getNamedGlobal(Name); 124 if (!GV) 125 return; 126 127 SmallSetVector<Constant *, 16> Init; 128 collectUsedGlobals(GV, Init); 129 130 Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType(); 131 132 SmallVector<Constant *, 16> NewInit; 133 for (Constant *MaybeRemoved : Init) { 134 if (!ShouldRemove(MaybeRemoved->stripPointerCasts())) 135 NewInit.push_back(MaybeRemoved); 136 } 137 138 if (!NewInit.empty()) { 139 ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size()); 140 GlobalVariable *NewGV = 141 new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 142 ConstantArray::get(ATy, NewInit), "", GV, 143 GV->getThreadLocalMode(), GV->getAddressSpace()); 144 NewGV->setSection(GV->getSection()); 145 NewGV->takeName(GV); 146 } 147 148 GV->eraseFromParent(); 149 } 150 151 void llvm::removeFromUsedLists(Module &M, 152 function_ref<bool(Constant *)> ShouldRemove) { 153 removeFromUsedList(M, "llvm.used", ShouldRemove); 154 removeFromUsedList(M, "llvm.compiler.used", ShouldRemove); 155 } 156 157 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { 158 if (!M.getModuleFlag("kcfi")) 159 return; 160 // Matches CodeGenModule::CreateKCFITypeId in Clang. 161 LLVMContext &Ctx = M.getContext(); 162 MDBuilder MDB(Ctx); 163 F.setMetadata( 164 LLVMContext::MD_kcfi_type, 165 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( 166 Type::getInt32Ty(Ctx), 167 static_cast<uint32_t>(xxHash64(MangledType)))))); 168 // If the module was compiled with -fpatchable-function-entry, ensure 169 // we use the same patchable-function-prefix. 170 if (auto *MD = mdconst::extract_or_null<ConstantInt>( 171 M.getModuleFlag("kcfi-offset"))) { 172 if (unsigned Offset = MD->getZExtValue()) 173 F.addFnAttr("patchable-function-prefix", std::to_string(Offset)); 174 } 175 } 176 177 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, 178 ArrayRef<Type *> InitArgTypes, 179 bool Weak) { 180 assert(!InitName.empty() && "Expected init function name"); 181 auto *VoidTy = Type::getVoidTy(M.getContext()); 182 auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false); 183 auto FnCallee = M.getOrInsertFunction(InitName, FnTy); 184 auto *Fn = cast<Function>(FnCallee.getCallee()); 185 if (Weak && Fn->isDeclaration()) 186 Fn->setLinkage(Function::ExternalWeakLinkage); 187 return FnCallee; 188 } 189 190 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) { 191 Function *Ctor = Function::createWithDefaultAttr( 192 FunctionType::get(Type::getVoidTy(M.getContext()), false), 193 GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(), 194 CtorName, &M); 195 Ctor->addFnAttr(Attribute::NoUnwind); 196 setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void) 197 BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); 198 ReturnInst::Create(M.getContext(), CtorBB); 199 // Ensure Ctor cannot be discarded, even if in a comdat. 200 appendToUsed(M, {Ctor}); 201 return Ctor; 202 } 203 204 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions( 205 Module &M, StringRef CtorName, StringRef InitName, 206 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 207 StringRef VersionCheckName, bool Weak) { 208 assert(!InitName.empty() && "Expected init function name"); 209 assert(InitArgs.size() == InitArgTypes.size() && 210 "Sanitizer's init function expects different number of arguments"); 211 FunctionCallee InitFunction = 212 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak); 213 Function *Ctor = createSanitizerCtor(M, CtorName); 214 IRBuilder<> IRB(M.getContext()); 215 216 BasicBlock *RetBB = &Ctor->getEntryBlock(); 217 if (Weak) { 218 RetBB->setName("ret"); 219 auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB); 220 auto *CallInitBB = 221 BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB); 222 auto *InitFn = cast<Function>(InitFunction.getCallee()); 223 auto *InitFnPtr = 224 PointerType::get(InitFn->getType(), InitFn->getAddressSpace()); 225 IRB.SetInsertPoint(EntryBB); 226 Value *InitNotNull = 227 IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr)); 228 IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB); 229 IRB.SetInsertPoint(CallInitBB); 230 } else { 231 IRB.SetInsertPoint(RetBB->getTerminator()); 232 } 233 234 IRB.CreateCall(InitFunction, InitArgs); 235 if (!VersionCheckName.empty()) { 236 FunctionCallee VersionCheckFunction = M.getOrInsertFunction( 237 VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), 238 AttributeList()); 239 IRB.CreateCall(VersionCheckFunction, {}); 240 } 241 242 if (Weak) 243 IRB.CreateBr(RetBB); 244 245 return std::make_pair(Ctor, InitFunction); 246 } 247 248 std::pair<Function *, FunctionCallee> 249 llvm::getOrCreateSanitizerCtorAndInitFunctions( 250 Module &M, StringRef CtorName, StringRef InitName, 251 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 252 function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback, 253 StringRef VersionCheckName, bool Weak) { 254 assert(!CtorName.empty() && "Expected ctor function name"); 255 256 if (Function *Ctor = M.getFunction(CtorName)) 257 // FIXME: Sink this logic into the module, similar to the handling of 258 // globals. This will make moving to a concurrent model much easier. 259 if (Ctor->arg_empty() || 260 Ctor->getReturnType() == Type::getVoidTy(M.getContext())) 261 return {Ctor, 262 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)}; 263 264 Function *Ctor; 265 FunctionCallee InitFunction; 266 std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( 267 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak); 268 FunctionsCreatedCallback(Ctor, InitFunction); 269 return std::make_pair(Ctor, InitFunction); 270 } 271 272 void llvm::filterDeadComdatFunctions( 273 SmallVectorImpl<Function *> &DeadComdatFunctions) { 274 SmallPtrSet<Function *, 32> MaybeDeadFunctions; 275 SmallPtrSet<Comdat *, 32> MaybeDeadComdats; 276 for (Function *F : DeadComdatFunctions) { 277 MaybeDeadFunctions.insert(F); 278 if (Comdat *C = F->getComdat()) 279 MaybeDeadComdats.insert(C); 280 } 281 282 // Find comdats for which all users are dead now. 283 SmallPtrSet<Comdat *, 32> DeadComdats; 284 for (Comdat *C : MaybeDeadComdats) { 285 auto IsUserDead = [&](GlobalObject *GO) { 286 auto *F = dyn_cast<Function>(GO); 287 return F && MaybeDeadFunctions.contains(F); 288 }; 289 if (all_of(C->getUsers(), IsUserDead)) 290 DeadComdats.insert(C); 291 } 292 293 // Only keep functions which have no comdat or a dead comdat. 294 erase_if(DeadComdatFunctions, [&](Function *F) { 295 Comdat *C = F->getComdat(); 296 return C && !DeadComdats.contains(C); 297 }); 298 } 299 300 std::string llvm::getUniqueModuleId(Module *M) { 301 MD5 Md5; 302 bool ExportsSymbols = false; 303 auto AddGlobal = [&](GlobalValue &GV) { 304 if (GV.isDeclaration() || GV.getName().startswith("llvm.") || 305 !GV.hasExternalLinkage() || GV.hasComdat()) 306 return; 307 ExportsSymbols = true; 308 Md5.update(GV.getName()); 309 Md5.update(ArrayRef<uint8_t>{0}); 310 }; 311 312 for (auto &F : *M) 313 AddGlobal(F); 314 for (auto &GV : M->globals()) 315 AddGlobal(GV); 316 for (auto &GA : M->aliases()) 317 AddGlobal(GA); 318 for (auto &IF : M->ifuncs()) 319 AddGlobal(IF); 320 321 if (!ExportsSymbols) 322 return ""; 323 324 MD5::MD5Result R; 325 Md5.final(R); 326 327 SmallString<32> Str; 328 MD5::stringifyResult(R, Str); 329 return ("." + Str).str(); 330 } 331 332 void VFABI::setVectorVariantNames(CallInst *CI, 333 ArrayRef<std::string> VariantMappings) { 334 if (VariantMappings.empty()) 335 return; 336 337 SmallString<256> Buffer; 338 llvm::raw_svector_ostream Out(Buffer); 339 for (const std::string &VariantMapping : VariantMappings) 340 Out << VariantMapping << ","; 341 // Get rid of the trailing ','. 342 assert(!Buffer.str().empty() && "Must have at least one char."); 343 Buffer.pop_back(); 344 345 Module *M = CI->getModule(); 346 #ifndef NDEBUG 347 for (const std::string &VariantMapping : VariantMappings) { 348 LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n"); 349 std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M); 350 assert(VI && "Cannot add an invalid VFABI name."); 351 assert(M->getNamedValue(VI->VectorName) && 352 "Cannot add variant to attribute: " 353 "vector function declaration is missing."); 354 } 355 #endif 356 CI->addFnAttr( 357 Attribute::get(M->getContext(), MappingsAttrName, Buffer.str())); 358 } 359 360 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, 361 StringRef SectionName, Align Alignment) { 362 // Embed the memory buffer into the module. 363 Constant *ModuleConstant = ConstantDataArray::get( 364 M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); 365 GlobalVariable *GV = new GlobalVariable( 366 M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, 367 ModuleConstant, "llvm.embedded.object"); 368 GV->setSection(SectionName); 369 GV->setAlignment(Alignment); 370 371 LLVMContext &Ctx = M.getContext(); 372 NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects"); 373 Metadata *MDVals[] = {ConstantAsMetadata::get(GV), 374 MDString::get(Ctx, SectionName)}; 375 376 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 377 GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {})); 378 379 appendToCompilerUsed(M, GV); 380 } 381 382 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor( 383 Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) { 384 SmallVector<GlobalIFunc *, 32> AllIFuncs; 385 ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower; 386 if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs 387 for (GlobalIFunc &GI : M.ifuncs()) 388 AllIFuncs.push_back(&GI); 389 IFuncsToLower = AllIFuncs; 390 } 391 392 bool UnhandledUsers = false; 393 LLVMContext &Ctx = M.getContext(); 394 const DataLayout &DL = M.getDataLayout(); 395 396 PointerType *TableEntryTy = 397 PointerType::get(Ctx, DL.getProgramAddressSpace()); 398 399 ArrayType *FuncPtrTableTy = 400 ArrayType::get(TableEntryTy, IFuncsToLower.size()); 401 402 Align PtrAlign = DL.getABITypeAlign(TableEntryTy); 403 404 // Create a global table of function pointers we'll initialize in a global 405 // constructor. 406 auto *FuncPtrTable = new GlobalVariable( 407 M, FuncPtrTableTy, false, GlobalValue::InternalLinkage, 408 PoisonValue::get(FuncPtrTableTy), "", nullptr, 409 GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace()); 410 FuncPtrTable->setAlignment(PtrAlign); 411 412 // Create a function to initialize the function pointer table. 413 Function *NewCtor = Function::Create( 414 FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage, 415 DL.getProgramAddressSpace(), "", &M); 416 417 BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor); 418 IRBuilder<> InitBuilder(BB); 419 420 size_t TableIndex = 0; 421 for (GlobalIFunc *GI : IFuncsToLower) { 422 Function *ResolvedFunction = GI->getResolverFunction(); 423 424 // We don't know what to pass to a resolver function taking arguments 425 // 426 // FIXME: Is this even valid? clang and gcc don't complain but this 427 // probably should be invalid IR. We could just pass through undef. 428 if (!std::empty(ResolvedFunction->getFunctionType()->params())) { 429 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function " 430 << ResolvedFunction->getName() << " with parameters\n"); 431 UnhandledUsers = true; 432 continue; 433 } 434 435 // Initialize the function pointer table. 436 CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction); 437 Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy); 438 Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32( 439 FuncPtrTableTy, FuncPtrTable, 0, TableIndex++)); 440 InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign); 441 442 // Update all users to load a pointer from the global table. 443 for (User *User : make_early_inc_range(GI->users())) { 444 Instruction *UserInst = dyn_cast<Instruction>(User); 445 if (!UserInst) { 446 // TODO: Should handle constantexpr casts in user instructions. Probably 447 // can't do much about constant initializers. 448 UnhandledUsers = true; 449 continue; 450 } 451 452 IRBuilder<> UseBuilder(UserInst); 453 LoadInst *ResolvedTarget = 454 UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign); 455 Value *ResolvedCast = 456 UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType()); 457 UserInst->replaceUsesOfWith(GI, ResolvedCast); 458 } 459 460 // If we handled all users, erase the ifunc. 461 if (GI->use_empty()) 462 GI->eraseFromParent(); 463 } 464 465 InitBuilder.CreateRetVoid(); 466 467 PointerType *ConstantDataTy = PointerType::get(Ctx, 0); 468 469 // TODO: Is this the right priority? Probably should be before any other 470 // constructors? 471 const int Priority = 10; 472 appendToGlobalCtors(M, NewCtor, Priority, 473 ConstantPointerNull::get(ConstantDataTy)); 474 return UnhandledUsers; 475 } 476