1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of SanitizerBinaryMetadata. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" 14 #include "llvm/ADT/SetVector.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/Analysis/CaptureTracking.h" 20 #include "llvm/Analysis/ValueTracking.h" 21 #include "llvm/IR/Constant.h" 22 #include "llvm/IR/DerivedTypes.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/GlobalValue.h" 25 #include "llvm/IR/GlobalVariable.h" 26 #include "llvm/IR/IRBuilder.h" 27 #include "llvm/IR/Instruction.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/IR/LLVMContext.h" 30 #include "llvm/IR/MDBuilder.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/ProfileData/InstrProf.h" 36 #include "llvm/Support/Allocator.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/SpecialCaseList.h" 40 #include "llvm/Support/StringSaver.h" 41 #include "llvm/Support/VirtualFileSystem.h" 42 #include "llvm/TargetParser/Triple.h" 43 #include "llvm/Transforms/Utils/ModuleUtils.h" 44 45 #include <array> 46 #include <cstdint> 47 #include <memory> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "sanmd" 52 53 namespace { 54 55 //===--- Constants --------------------------------------------------------===// 56 57 constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits 58 constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized 59 constexpr int kCtorDtorPriority = 2; 60 61 // Pairs of names of initialization callback functions and which section 62 // contains the relevant metadata. 63 class MetadataInfo { 64 public: 65 const StringRef FunctionPrefix; 66 const StringRef SectionSuffix; 67 68 static const MetadataInfo Covered; 69 static const MetadataInfo Atomics; 70 71 private: 72 // Forbid construction elsewhere. 73 explicit constexpr MetadataInfo(StringRef FunctionPrefix, 74 StringRef SectionSuffix) 75 : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {} 76 }; 77 const MetadataInfo MetadataInfo::Covered{ 78 "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection}; 79 const MetadataInfo MetadataInfo::Atomics{ 80 "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection}; 81 82 // The only instances of MetadataInfo are the constants above, so a set of 83 // them may simply store pointers to them. To deterministically generate code, 84 // we need to use a set with stable iteration order, such as SetVector. 85 using MetadataInfoSet = SetVector<const MetadataInfo *>; 86 87 //===--- Command-line options ---------------------------------------------===// 88 89 cl::opt<bool> ClWeakCallbacks( 90 "sanitizer-metadata-weak-callbacks", 91 cl::desc("Declare callbacks extern weak, and only call if non-null."), 92 cl::Hidden, cl::init(true)); 93 cl::opt<bool> 94 ClNoSanitize("sanitizer-metadata-nosanitize-attr", 95 cl::desc("Mark some metadata features uncovered in functions " 96 "with associated no_sanitize attributes."), 97 cl::Hidden, cl::init(true)); 98 99 cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered", 100 cl::desc("Emit PCs for covered functions."), 101 cl::Hidden, cl::init(false)); 102 cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics", 103 cl::desc("Emit PCs for atomic operations."), 104 cl::Hidden, cl::init(false)); 105 cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar", 106 cl::desc("Emit PCs for start of functions that are " 107 "subject for use-after-return checking"), 108 cl::Hidden, cl::init(false)); 109 110 //===--- Statistics -------------------------------------------------------===// 111 112 STATISTIC(NumMetadataCovered, "Metadata attached to covered functions"); 113 STATISTIC(NumMetadataAtomics, "Metadata attached to atomics"); 114 STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions"); 115 116 //===----------------------------------------------------------------------===// 117 118 // Apply opt overrides. 119 SanitizerBinaryMetadataOptions && 120 transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) { 121 Opts.Covered |= ClEmitCovered; 122 Opts.Atomics |= ClEmitAtomics; 123 Opts.UAR |= ClEmitUAR; 124 return std::move(Opts); 125 } 126 127 class SanitizerBinaryMetadata { 128 public: 129 SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts, 130 std::unique_ptr<SpecialCaseList> Ignorelist) 131 : Mod(M), Options(transformOptionsFromCl(std::move(Opts))), 132 Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()), 133 IRB(M.getContext()) { 134 // FIXME: Make it work with other formats. 135 assert(TargetTriple.isOSBinFormatELF() && "ELF only"); 136 assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) && 137 "Device targets are not supported"); 138 } 139 140 bool run(); 141 142 private: 143 uint32_t getVersion() const { 144 uint32_t Version = kVersionBase; 145 const auto CM = Mod.getCodeModel(); 146 if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large)) 147 Version |= kVersionPtrSizeRel; 148 return Version; 149 } 150 151 void runOn(Function &F, MetadataInfoSet &MIS); 152 153 // Determines which set of metadata to collect for this instruction. 154 // 155 // Returns true if covered metadata is required to unambiguously interpret 156 // other metadata. For example, if we are interested in atomics metadata, any 157 // function with memory operations (atomic or not) requires covered metadata 158 // to determine if a memory operation is atomic or not in modules compiled 159 // with SanitizerBinaryMetadata. 160 bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB, 161 uint64_t &FeatureMask); 162 163 // Get start/end section marker pointer. 164 GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty); 165 166 // Returns the target-dependent section name. 167 StringRef getSectionName(StringRef SectionSuffix); 168 169 // Returns the section start marker name. 170 Twine getSectionStart(StringRef SectionSuffix); 171 172 // Returns the section end marker name. 173 Twine getSectionEnd(StringRef SectionSuffix); 174 175 // Returns true if the access to the address should be considered "atomic". 176 bool pretendAtomicAccess(const Value *Addr); 177 178 Module &Mod; 179 const SanitizerBinaryMetadataOptions Options; 180 std::unique_ptr<SpecialCaseList> Ignorelist; 181 const Triple TargetTriple; 182 IRBuilder<> IRB; 183 BumpPtrAllocator Alloc; 184 UniqueStringSaver StringPool{Alloc}; 185 }; 186 187 bool SanitizerBinaryMetadata::run() { 188 MetadataInfoSet MIS; 189 190 for (Function &F : Mod) 191 runOn(F, MIS); 192 193 if (MIS.empty()) 194 return false; 195 196 // 197 // Setup constructors and call all initialization functions for requested 198 // metadata features. 199 // 200 201 auto *PtrTy = IRB.getPtrTy(); 202 auto *Int32Ty = IRB.getInt32Ty(); 203 const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy}; 204 auto *Version = ConstantInt::get(Int32Ty, getVersion()); 205 206 for (const MetadataInfo *MI : MIS) { 207 const std::array<Value *, InitTypes.size()> InitArgs = { 208 Version, 209 getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy), 210 getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy), 211 }; 212 // We declare the _add and _del functions as weak, and only call them if 213 // there is a valid symbol linked. This allows building binaries with 214 // semantic metadata, but without having callbacks. When a tool that wants 215 // the metadata is linked which provides the callbacks, they will be called. 216 Function *Ctor = 217 createSanitizerCtorAndInitFunctions( 218 Mod, (MI->FunctionPrefix + ".module_ctor").str(), 219 (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs, 220 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 221 .first; 222 Function *Dtor = 223 createSanitizerCtorAndInitFunctions( 224 Mod, (MI->FunctionPrefix + ".module_dtor").str(), 225 (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs, 226 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 227 .first; 228 Constant *CtorComdatKey = nullptr; 229 Constant *DtorComdatKey = nullptr; 230 if (TargetTriple.supportsCOMDAT()) { 231 // Use COMDAT to deduplicate constructor/destructor function. The COMDAT 232 // key needs to be a non-local linkage. 233 Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName())); 234 Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName())); 235 Ctor->setLinkage(GlobalValue::ExternalLinkage); 236 Dtor->setLinkage(GlobalValue::ExternalLinkage); 237 // DSOs should _not_ call another constructor/destructor! 238 Ctor->setVisibility(GlobalValue::HiddenVisibility); 239 Dtor->setVisibility(GlobalValue::HiddenVisibility); 240 CtorComdatKey = Ctor; 241 DtorComdatKey = Dtor; 242 } 243 appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey); 244 appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey); 245 } 246 247 return true; 248 } 249 250 void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) { 251 if (F.empty()) 252 return; 253 if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) 254 return; 255 if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName())) 256 return; 257 // Don't touch available_externally functions, their actual body is elsewhere. 258 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 259 return; 260 261 MDBuilder MDB(F.getContext()); 262 263 // The metadata features enabled for this function, stored along covered 264 // metadata (if enabled). 265 uint64_t FeatureMask = 0; 266 // Don't emit unnecessary covered metadata for all functions to save space. 267 bool RequiresCovered = false; 268 269 if (Options.Atomics || Options.UAR) { 270 for (BasicBlock &BB : F) 271 for (Instruction &I : BB) 272 RequiresCovered |= runOn(I, MIS, MDB, FeatureMask); 273 } 274 275 if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread")) 276 FeatureMask &= ~kSanitizerBinaryMetadataAtomics; 277 if (F.isVarArg()) 278 FeatureMask &= ~kSanitizerBinaryMetadataUAR; 279 if (FeatureMask & kSanitizerBinaryMetadataUAR) { 280 RequiresCovered = true; 281 NumMetadataUAR++; 282 } 283 284 // Covered metadata is always emitted if explicitly requested, otherwise only 285 // if some other metadata requires it to unambiguously interpret it for 286 // modules compiled with SanitizerBinaryMetadata. 287 if (Options.Covered || (FeatureMask && RequiresCovered)) { 288 NumMetadataCovered++; 289 const auto *MI = &MetadataInfo::Covered; 290 MIS.insert(MI); 291 const StringRef Section = getSectionName(MI->SectionSuffix); 292 // The feature mask will be placed after the function size. 293 Constant *CFM = IRB.getInt64(FeatureMask); 294 F.setMetadata(LLVMContext::MD_pcsections, 295 MDB.createPCSections({{Section, {CFM}}})); 296 } 297 } 298 299 bool isUARSafeCall(CallInst *CI) { 300 auto *F = CI->getCalledFunction(); 301 // There are no intrinsic functions that leak arguments. 302 // If the called function does not return, the current function 303 // does not return as well, so no possibility of use-after-return. 304 // Sanitizer function also don't leak or don't return. 305 // It's safe to both pass pointers to local variables to them 306 // and to tail-call them. 307 return F && (F->isIntrinsic() || F->doesNotReturn() || 308 F->getName().starts_with("__asan_") || 309 F->getName().starts_with("__hwsan_") || 310 F->getName().starts_with("__ubsan_") || 311 F->getName().starts_with("__msan_") || 312 F->getName().starts_with("__tsan_")); 313 } 314 315 bool hasUseAfterReturnUnsafeUses(Value &V) { 316 for (User *U : V.users()) { 317 if (auto *I = dyn_cast<Instruction>(U)) { 318 if (I->isLifetimeStartOrEnd() || I->isDroppable()) 319 continue; 320 if (auto *CI = dyn_cast<CallInst>(U)) { 321 if (isUARSafeCall(CI)) 322 continue; 323 } 324 if (isa<LoadInst>(U)) 325 continue; 326 if (auto *SI = dyn_cast<StoreInst>(U)) { 327 // If storing TO the alloca, then the address isn't taken. 328 if (SI->getOperand(1) == &V) 329 continue; 330 } 331 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) { 332 if (!hasUseAfterReturnUnsafeUses(*GEPI)) 333 continue; 334 } else if (auto *BCI = dyn_cast<BitCastInst>(U)) { 335 if (!hasUseAfterReturnUnsafeUses(*BCI)) 336 continue; 337 } 338 } 339 return true; 340 } 341 return false; 342 } 343 344 bool useAfterReturnUnsafe(Instruction &I) { 345 if (isa<AllocaInst>(I)) 346 return hasUseAfterReturnUnsafeUses(I); 347 // Tail-called functions are not necessary intercepted 348 // at runtime because there is no call instruction. 349 // So conservatively mark the caller as requiring checking. 350 else if (auto *CI = dyn_cast<CallInst>(&I)) 351 return CI->isTailCall() && !isUARSafeCall(CI); 352 return false; 353 } 354 355 bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) { 356 if (!Addr) 357 return false; 358 359 Addr = Addr->stripInBoundsOffsets(); 360 auto *GV = dyn_cast<GlobalVariable>(Addr); 361 if (!GV) 362 return false; 363 364 // Some compiler-generated accesses are known racy, to avoid false positives 365 // in data-race analysis pretend they're atomic. 366 if (GV->hasSection()) { 367 const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat(); 368 const auto ProfSec = 369 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false); 370 if (GV->getSection().ends_with(ProfSec)) 371 return true; 372 } 373 if (GV->getName().starts_with("__llvm_gcov") || 374 GV->getName().starts_with("__llvm_gcda")) 375 return true; 376 377 return false; 378 } 379 380 // Returns true if the memory at `Addr` may be shared with other threads. 381 bool maybeSharedMutable(const Value *Addr) { 382 // By default assume memory may be shared. 383 if (!Addr) 384 return true; 385 386 if (isa<AllocaInst>(getUnderlyingObject(Addr)) && 387 !PointerMayBeCaptured(Addr, true, true)) 388 return false; // Object is on stack but does not escape. 389 390 Addr = Addr->stripInBoundsOffsets(); 391 if (auto *GV = dyn_cast<GlobalVariable>(Addr)) { 392 if (GV->isConstant()) 393 return false; // Shared, but not mutable. 394 } 395 396 return true; 397 } 398 399 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS, 400 MDBuilder &MDB, uint64_t &FeatureMask) { 401 SmallVector<const MetadataInfo *, 1> InstMetadata; 402 bool RequiresCovered = false; 403 404 // Only call if at least 1 type of metadata is requested. 405 assert(Options.UAR || Options.Atomics); 406 407 if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) { 408 if (useAfterReturnUnsafe(I)) 409 FeatureMask |= kSanitizerBinaryMetadataUAR; 410 } 411 412 if (Options.Atomics) { 413 const Value *Addr = nullptr; 414 if (auto *SI = dyn_cast<StoreInst>(&I)) 415 Addr = SI->getPointerOperand(); 416 else if (auto *LI = dyn_cast<LoadInst>(&I)) 417 Addr = LI->getPointerOperand(); 418 419 if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) { 420 auto SSID = getAtomicSyncScopeID(&I); 421 if ((SSID.has_value() && *SSID != SyncScope::SingleThread) || 422 pretendAtomicAccess(Addr)) { 423 NumMetadataAtomics++; 424 InstMetadata.push_back(&MetadataInfo::Atomics); 425 } 426 FeatureMask |= kSanitizerBinaryMetadataAtomics; 427 RequiresCovered = true; 428 } 429 } 430 431 // Attach MD_pcsections to instruction. 432 if (!InstMetadata.empty()) { 433 MIS.insert(InstMetadata.begin(), InstMetadata.end()); 434 SmallVector<MDBuilder::PCSection, 1> Sections; 435 for (const auto &MI : InstMetadata) 436 Sections.push_back({getSectionName(MI->SectionSuffix), {}}); 437 I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections)); 438 } 439 440 return RequiresCovered; 441 } 442 443 GlobalVariable * 444 SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) { 445 // Use ExternalWeak so that if all sections are discarded due to section 446 // garbage collection, the linker will not report undefined symbol errors. 447 auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false, 448 GlobalVariable::ExternalWeakLinkage, 449 /*Initializer=*/nullptr, MarkerName); 450 Marker->setVisibility(GlobalValue::HiddenVisibility); 451 return Marker; 452 } 453 454 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { 455 // FIXME: Other TargetTriples. 456 // Request ULEB128 encoding for all integer constants. 457 return StringPool.save(SectionSuffix + "!C"); 458 } 459 460 Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { 461 return "__start_" + SectionSuffix; 462 } 463 464 Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { 465 return "__stop_" + SectionSuffix; 466 } 467 468 } // namespace 469 470 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass( 471 SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles) 472 : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {} 473 474 PreservedAnalyses 475 SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) { 476 std::unique_ptr<SpecialCaseList> Ignorelist; 477 if (!IgnorelistFiles.empty()) { 478 Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, 479 *vfs::getRealFileSystem()); 480 if (Ignorelist->inSection("metadata", "src", M.getSourceFileName())) 481 return PreservedAnalyses::all(); 482 } 483 484 SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist)); 485 if (Pass.run()) 486 return PreservedAnalyses::none(); 487 return PreservedAnalyses::all(); 488 } 489