1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of SanitizerBinaryMetadata. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" 14 #include "llvm/ADT/SetVector.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/Analysis/CaptureTracking.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/IR/Constant.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/GlobalVariable.h" 27 #include "llvm/IR/IRBuilder.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/LLVMContext.h" 31 #include "llvm/IR/MDBuilder.h" 32 #include "llvm/IR/Metadata.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/IR/Type.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/ProfileData/InstrProf.h" 37 #include "llvm/Support/Allocator.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/SpecialCaseList.h" 41 #include "llvm/Support/StringSaver.h" 42 #include "llvm/Support/VirtualFileSystem.h" 43 #include "llvm/TargetParser/Triple.h" 44 #include "llvm/Transforms/Utils/ModuleUtils.h" 45 46 #include <array> 47 #include <cstdint> 48 #include <memory> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "sanmd" 53 54 namespace { 55 56 //===--- Constants --------------------------------------------------------===// 57 58 constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits 59 constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized 60 constexpr int kCtorDtorPriority = 2; 61 62 // Pairs of names of initialization callback functions and which section 63 // contains the relevant metadata. 64 class MetadataInfo { 65 public: 66 const StringRef FunctionPrefix; 67 const StringRef SectionSuffix; 68 69 static const MetadataInfo Covered; 70 static const MetadataInfo Atomics; 71 72 private: 73 // Forbid construction elsewhere. 74 explicit constexpr MetadataInfo(StringRef FunctionPrefix, 75 StringRef SectionSuffix) 76 : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {} 77 }; 78 const MetadataInfo MetadataInfo::Covered{ 79 "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection}; 80 const MetadataInfo MetadataInfo::Atomics{ 81 "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection}; 82 83 // The only instances of MetadataInfo are the constants above, so a set of 84 // them may simply store pointers to them. To deterministically generate code, 85 // we need to use a set with stable iteration order, such as SetVector. 86 using MetadataInfoSet = SetVector<const MetadataInfo *>; 87 88 //===--- Command-line options ---------------------------------------------===// 89 90 cl::opt<bool> ClWeakCallbacks( 91 "sanitizer-metadata-weak-callbacks", 92 cl::desc("Declare callbacks extern weak, and only call if non-null."), 93 cl::Hidden, cl::init(true)); 94 cl::opt<bool> 95 ClNoSanitize("sanitizer-metadata-nosanitize-attr", 96 cl::desc("Mark some metadata features uncovered in functions " 97 "with associated no_sanitize attributes."), 98 cl::Hidden, cl::init(true)); 99 100 cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered", 101 cl::desc("Emit PCs for covered functions."), 102 cl::Hidden, cl::init(false)); 103 cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics", 104 cl::desc("Emit PCs for atomic operations."), 105 cl::Hidden, cl::init(false)); 106 cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar", 107 cl::desc("Emit PCs for start of functions that are " 108 "subject for use-after-return checking"), 109 cl::Hidden, cl::init(false)); 110 111 //===--- Statistics -------------------------------------------------------===// 112 113 STATISTIC(NumMetadataCovered, "Metadata attached to covered functions"); 114 STATISTIC(NumMetadataAtomics, "Metadata attached to atomics"); 115 STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions"); 116 117 //===----------------------------------------------------------------------===// 118 119 // Apply opt overrides. 120 SanitizerBinaryMetadataOptions && 121 transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) { 122 Opts.Covered |= ClEmitCovered; 123 Opts.Atomics |= ClEmitAtomics; 124 Opts.UAR |= ClEmitUAR; 125 return std::move(Opts); 126 } 127 128 class SanitizerBinaryMetadata { 129 public: 130 SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts, 131 std::unique_ptr<SpecialCaseList> Ignorelist) 132 : Mod(M), Options(transformOptionsFromCl(std::move(Opts))), 133 Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()), 134 VersionStr(utostr(getVersion())), IRB(M.getContext()) { 135 // FIXME: Make it work with other formats. 136 assert(TargetTriple.isOSBinFormatELF() && "ELF only"); 137 assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) && 138 "Device targets are not supported"); 139 } 140 141 bool run(); 142 143 private: 144 uint32_t getVersion() const { 145 uint32_t Version = kVersionBase; 146 const auto CM = Mod.getCodeModel(); 147 if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large)) 148 Version |= kVersionPtrSizeRel; 149 return Version; 150 } 151 152 void runOn(Function &F, MetadataInfoSet &MIS); 153 154 // Determines which set of metadata to collect for this instruction. 155 // 156 // Returns true if covered metadata is required to unambiguously interpret 157 // other metadata. For example, if we are interested in atomics metadata, any 158 // function with memory operations (atomic or not) requires covered metadata 159 // to determine if a memory operation is atomic or not in modules compiled 160 // with SanitizerBinaryMetadata. 161 bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB, 162 uint64_t &FeatureMask); 163 164 // Get start/end section marker pointer. 165 GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty); 166 167 // Returns the target-dependent section name. 168 StringRef getSectionName(StringRef SectionSuffix); 169 170 // Returns the section start marker name. 171 StringRef getSectionStart(StringRef SectionSuffix); 172 173 // Returns the section end marker name. 174 StringRef getSectionEnd(StringRef SectionSuffix); 175 176 // Returns true if the access to the address should be considered "atomic". 177 bool pretendAtomicAccess(const Value *Addr); 178 179 Module &Mod; 180 const SanitizerBinaryMetadataOptions Options; 181 std::unique_ptr<SpecialCaseList> Ignorelist; 182 const Triple TargetTriple; 183 const std::string VersionStr; 184 IRBuilder<> IRB; 185 BumpPtrAllocator Alloc; 186 UniqueStringSaver StringPool{Alloc}; 187 }; 188 189 bool SanitizerBinaryMetadata::run() { 190 MetadataInfoSet MIS; 191 192 for (Function &F : Mod) 193 runOn(F, MIS); 194 195 if (MIS.empty()) 196 return false; 197 198 // 199 // Setup constructors and call all initialization functions for requested 200 // metadata features. 201 // 202 203 auto *PtrTy = IRB.getPtrTy(); 204 auto *Int32Ty = IRB.getInt32Ty(); 205 const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy}; 206 auto *Version = ConstantInt::get(Int32Ty, getVersion()); 207 208 for (const MetadataInfo *MI : MIS) { 209 const std::array<Value *, InitTypes.size()> InitArgs = { 210 Version, 211 getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy), 212 getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy), 213 }; 214 215 // Calls to the initialization functions with different versions cannot be 216 // merged. Give the structors unique names based on the version, which will 217 // also be used as the COMDAT key. 218 const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str(); 219 220 // We declare the _add and _del functions as weak, and only call them if 221 // there is a valid symbol linked. This allows building binaries with 222 // semantic metadata, but without having callbacks. When a tool that wants 223 // the metadata is linked which provides the callbacks, they will be called. 224 Function *Ctor = 225 createSanitizerCtorAndInitFunctions( 226 Mod, StructorPrefix + ".module_ctor", 227 (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs, 228 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 229 .first; 230 Function *Dtor = 231 createSanitizerCtorAndInitFunctions( 232 Mod, StructorPrefix + ".module_dtor", 233 (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs, 234 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 235 .first; 236 Constant *CtorComdatKey = nullptr; 237 Constant *DtorComdatKey = nullptr; 238 if (TargetTriple.supportsCOMDAT()) { 239 // Use COMDAT to deduplicate constructor/destructor function. The COMDAT 240 // key needs to be a non-local linkage. 241 Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName())); 242 Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName())); 243 Ctor->setLinkage(GlobalValue::ExternalLinkage); 244 Dtor->setLinkage(GlobalValue::ExternalLinkage); 245 // DSOs should _not_ call another constructor/destructor! 246 Ctor->setVisibility(GlobalValue::HiddenVisibility); 247 Dtor->setVisibility(GlobalValue::HiddenVisibility); 248 CtorComdatKey = Ctor; 249 DtorComdatKey = Dtor; 250 } 251 appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey); 252 appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey); 253 } 254 255 return true; 256 } 257 258 void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) { 259 if (F.empty()) 260 return; 261 if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) 262 return; 263 if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName())) 264 return; 265 // Don't touch available_externally functions, their actual body is elsewhere. 266 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 267 return; 268 269 MDBuilder MDB(F.getContext()); 270 271 // The metadata features enabled for this function, stored along covered 272 // metadata (if enabled). 273 uint64_t FeatureMask = 0; 274 // Don't emit unnecessary covered metadata for all functions to save space. 275 bool RequiresCovered = false; 276 277 if (Options.Atomics || Options.UAR) { 278 for (BasicBlock &BB : F) 279 for (Instruction &I : BB) 280 RequiresCovered |= runOn(I, MIS, MDB, FeatureMask); 281 } 282 283 if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread")) 284 FeatureMask &= ~kSanitizerBinaryMetadataAtomics; 285 if (F.isVarArg()) 286 FeatureMask &= ~kSanitizerBinaryMetadataUAR; 287 if (FeatureMask & kSanitizerBinaryMetadataUAR) { 288 RequiresCovered = true; 289 NumMetadataUAR++; 290 } 291 292 // Covered metadata is always emitted if explicitly requested, otherwise only 293 // if some other metadata requires it to unambiguously interpret it for 294 // modules compiled with SanitizerBinaryMetadata. 295 if (Options.Covered || (FeatureMask && RequiresCovered)) { 296 NumMetadataCovered++; 297 const auto *MI = &MetadataInfo::Covered; 298 MIS.insert(MI); 299 const StringRef Section = getSectionName(MI->SectionSuffix); 300 // The feature mask will be placed after the function size. 301 Constant *CFM = IRB.getInt64(FeatureMask); 302 F.setMetadata(LLVMContext::MD_pcsections, 303 MDB.createPCSections({{Section, {CFM}}})); 304 } 305 } 306 307 bool isUARSafeCall(CallInst *CI) { 308 auto *F = CI->getCalledFunction(); 309 // There are no intrinsic functions that leak arguments. 310 // If the called function does not return, the current function 311 // does not return as well, so no possibility of use-after-return. 312 // Sanitizer function also don't leak or don't return. 313 // It's safe to both pass pointers to local variables to them 314 // and to tail-call them. 315 return F && (F->isIntrinsic() || F->doesNotReturn() || 316 F->getName().starts_with("__asan_") || 317 F->getName().starts_with("__hwsan_") || 318 F->getName().starts_with("__ubsan_") || 319 F->getName().starts_with("__msan_") || 320 F->getName().starts_with("__tsan_")); 321 } 322 323 bool hasUseAfterReturnUnsafeUses(Value &V) { 324 for (User *U : V.users()) { 325 if (auto *I = dyn_cast<Instruction>(U)) { 326 if (I->isLifetimeStartOrEnd() || I->isDroppable()) 327 continue; 328 if (auto *CI = dyn_cast<CallInst>(U)) { 329 if (isUARSafeCall(CI)) 330 continue; 331 } 332 if (isa<LoadInst>(U)) 333 continue; 334 if (auto *SI = dyn_cast<StoreInst>(U)) { 335 // If storing TO the alloca, then the address isn't taken. 336 if (SI->getOperand(1) == &V) 337 continue; 338 } 339 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) { 340 if (!hasUseAfterReturnUnsafeUses(*GEPI)) 341 continue; 342 } else if (auto *BCI = dyn_cast<BitCastInst>(U)) { 343 if (!hasUseAfterReturnUnsafeUses(*BCI)) 344 continue; 345 } 346 } 347 return true; 348 } 349 return false; 350 } 351 352 bool useAfterReturnUnsafe(Instruction &I) { 353 if (isa<AllocaInst>(I)) 354 return hasUseAfterReturnUnsafeUses(I); 355 // Tail-called functions are not necessary intercepted 356 // at runtime because there is no call instruction. 357 // So conservatively mark the caller as requiring checking. 358 else if (auto *CI = dyn_cast<CallInst>(&I)) 359 return CI->isTailCall() && !isUARSafeCall(CI); 360 return false; 361 } 362 363 bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) { 364 if (!Addr) 365 return false; 366 367 Addr = Addr->stripInBoundsOffsets(); 368 auto *GV = dyn_cast<GlobalVariable>(Addr); 369 if (!GV) 370 return false; 371 372 // Some compiler-generated accesses are known racy, to avoid false positives 373 // in data-race analysis pretend they're atomic. 374 if (GV->hasSection()) { 375 const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat(); 376 const auto ProfSec = 377 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false); 378 if (GV->getSection().ends_with(ProfSec)) 379 return true; 380 } 381 if (GV->getName().starts_with("__llvm_gcov") || 382 GV->getName().starts_with("__llvm_gcda")) 383 return true; 384 385 return false; 386 } 387 388 // Returns true if the memory at `Addr` may be shared with other threads. 389 bool maybeSharedMutable(const Value *Addr) { 390 // By default assume memory may be shared. 391 if (!Addr) 392 return true; 393 394 if (isa<AllocaInst>(getUnderlyingObject(Addr)) && 395 !PointerMayBeCaptured(Addr, true, true)) 396 return false; // Object is on stack but does not escape. 397 398 Addr = Addr->stripInBoundsOffsets(); 399 if (auto *GV = dyn_cast<GlobalVariable>(Addr)) { 400 if (GV->isConstant()) 401 return false; // Shared, but not mutable. 402 } 403 404 return true; 405 } 406 407 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS, 408 MDBuilder &MDB, uint64_t &FeatureMask) { 409 SmallVector<const MetadataInfo *, 1> InstMetadata; 410 bool RequiresCovered = false; 411 412 // Only call if at least 1 type of metadata is requested. 413 assert(Options.UAR || Options.Atomics); 414 415 if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) { 416 if (useAfterReturnUnsafe(I)) 417 FeatureMask |= kSanitizerBinaryMetadataUAR; 418 } 419 420 if (Options.Atomics) { 421 const Value *Addr = nullptr; 422 if (auto *SI = dyn_cast<StoreInst>(&I)) 423 Addr = SI->getPointerOperand(); 424 else if (auto *LI = dyn_cast<LoadInst>(&I)) 425 Addr = LI->getPointerOperand(); 426 427 if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) { 428 auto SSID = getAtomicSyncScopeID(&I); 429 if ((SSID.has_value() && *SSID != SyncScope::SingleThread) || 430 pretendAtomicAccess(Addr)) { 431 NumMetadataAtomics++; 432 InstMetadata.push_back(&MetadataInfo::Atomics); 433 } 434 FeatureMask |= kSanitizerBinaryMetadataAtomics; 435 RequiresCovered = true; 436 } 437 } 438 439 // Attach MD_pcsections to instruction. 440 if (!InstMetadata.empty()) { 441 MIS.insert(InstMetadata.begin(), InstMetadata.end()); 442 SmallVector<MDBuilder::PCSection, 1> Sections; 443 for (const auto &MI : InstMetadata) 444 Sections.push_back({getSectionName(MI->SectionSuffix), {}}); 445 I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections)); 446 } 447 448 return RequiresCovered; 449 } 450 451 GlobalVariable * 452 SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) { 453 // Use ExternalWeak so that if all sections are discarded due to section 454 // garbage collection, the linker will not report undefined symbol errors. 455 auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false, 456 GlobalVariable::ExternalWeakLinkage, 457 /*Initializer=*/nullptr, MarkerName); 458 Marker->setVisibility(GlobalValue::HiddenVisibility); 459 return Marker; 460 } 461 462 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { 463 // FIXME: Other TargetTriples. 464 // Request ULEB128 encoding for all integer constants. 465 return StringPool.save(SectionSuffix + VersionStr + "!C"); 466 } 467 468 StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { 469 // Twine only concatenates 2 strings; with >2 strings, concatenating them 470 // creates Twine temporaries, and returning the final Twine no longer works 471 // because we'd end up with a stack-use-after-return. So here we also use the 472 // StringPool to store the new string. 473 return StringPool.save("__start_" + SectionSuffix + VersionStr); 474 } 475 476 StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { 477 return StringPool.save("__stop_" + SectionSuffix + VersionStr); 478 } 479 480 } // namespace 481 482 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass( 483 SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles) 484 : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {} 485 486 PreservedAnalyses 487 SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) { 488 std::unique_ptr<SpecialCaseList> Ignorelist; 489 if (!IgnorelistFiles.empty()) { 490 Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, 491 *vfs::getRealFileSystem()); 492 if (Ignorelist->inSection("metadata", "src", M.getSourceFileName())) 493 return PreservedAnalyses::all(); 494 } 495 496 SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist)); 497 if (Pass.run()) 498 return PreservedAnalyses::none(); 499 return PreservedAnalyses::all(); 500 } 501