1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of SanitizerBinaryMetadata. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" 14 #include "llvm/ADT/SetVector.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/Analysis/CaptureTracking.h" 20 #include "llvm/Analysis/ValueTracking.h" 21 #include "llvm/IR/Constant.h" 22 #include "llvm/IR/DerivedTypes.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/GlobalValue.h" 25 #include "llvm/IR/GlobalVariable.h" 26 #include "llvm/IR/IRBuilder.h" 27 #include "llvm/IR/Instruction.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/IR/LLVMContext.h" 30 #include "llvm/IR/MDBuilder.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/ProfileData/InstrProf.h" 36 #include "llvm/Support/Allocator.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/SpecialCaseList.h" 40 #include "llvm/Support/StringSaver.h" 41 #include "llvm/Support/VirtualFileSystem.h" 42 #include "llvm/TargetParser/Triple.h" 43 #include "llvm/Transforms/Utils/ModuleUtils.h" 44 45 #include <array> 46 #include <cstdint> 47 #include <memory> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "sanmd" 52 53 namespace { 54 55 //===--- Constants --------------------------------------------------------===// 56 57 constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits 58 constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized 59 constexpr int kCtorDtorPriority = 2; 60 61 // Pairs of names of initialization callback functions and which section 62 // contains the relevant metadata. 63 class MetadataInfo { 64 public: 65 const StringRef FunctionPrefix; 66 const StringRef SectionSuffix; 67 68 static const MetadataInfo Covered; 69 static const MetadataInfo Atomics; 70 71 private: 72 // Forbid construction elsewhere. 73 explicit constexpr MetadataInfo(StringRef FunctionPrefix, 74 StringRef SectionSuffix) 75 : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {} 76 }; 77 const MetadataInfo MetadataInfo::Covered{ 78 "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection}; 79 const MetadataInfo MetadataInfo::Atomics{ 80 "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection}; 81 82 // The only instances of MetadataInfo are the constants above, so a set of 83 // them may simply store pointers to them. To deterministically generate code, 84 // we need to use a set with stable iteration order, such as SetVector. 85 using MetadataInfoSet = SetVector<const MetadataInfo *>; 86 87 //===--- Command-line options ---------------------------------------------===// 88 89 cl::opt<bool> ClWeakCallbacks( 90 "sanitizer-metadata-weak-callbacks", 91 cl::desc("Declare callbacks extern weak, and only call if non-null."), 92 cl::Hidden, cl::init(true)); 93 cl::opt<bool> 94 ClNoSanitize("sanitizer-metadata-nosanitize-attr", 95 cl::desc("Mark some metadata features uncovered in functions " 96 "with associated no_sanitize attributes."), 97 cl::Hidden, cl::init(true)); 98 99 cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered", 100 cl::desc("Emit PCs for covered functions."), 101 cl::Hidden, cl::init(false)); 102 cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics", 103 cl::desc("Emit PCs for atomic operations."), 104 cl::Hidden, cl::init(false)); 105 cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar", 106 cl::desc("Emit PCs for start of functions that are " 107 "subject for use-after-return checking"), 108 cl::Hidden, cl::init(false)); 109 110 //===--- Statistics -------------------------------------------------------===// 111 112 STATISTIC(NumMetadataCovered, "Metadata attached to covered functions"); 113 STATISTIC(NumMetadataAtomics, "Metadata attached to atomics"); 114 STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions"); 115 116 //===----------------------------------------------------------------------===// 117 118 // Apply opt overrides. 119 SanitizerBinaryMetadataOptions && 120 transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) { 121 Opts.Covered |= ClEmitCovered; 122 Opts.Atomics |= ClEmitAtomics; 123 Opts.UAR |= ClEmitUAR; 124 return std::move(Opts); 125 } 126 127 class SanitizerBinaryMetadata { 128 public: 129 SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts, 130 std::unique_ptr<SpecialCaseList> Ignorelist) 131 : Mod(M), Options(transformOptionsFromCl(std::move(Opts))), 132 Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()), 133 IRB(M.getContext()) { 134 // FIXME: Make it work with other formats. 135 assert(TargetTriple.isOSBinFormatELF() && "ELF only"); 136 assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) && 137 "Device targets are not supported"); 138 } 139 140 bool run(); 141 142 private: 143 uint32_t getVersion() const { 144 uint32_t Version = kVersionBase; 145 const auto CM = Mod.getCodeModel(); 146 if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large)) 147 Version |= kVersionPtrSizeRel; 148 return Version; 149 } 150 151 void runOn(Function &F, MetadataInfoSet &MIS); 152 153 // Determines which set of metadata to collect for this instruction. 154 // 155 // Returns true if covered metadata is required to unambiguously interpret 156 // other metadata. For example, if we are interested in atomics metadata, any 157 // function with memory operations (atomic or not) requires covered metadata 158 // to determine if a memory operation is atomic or not in modules compiled 159 // with SanitizerBinaryMetadata. 160 bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB, 161 uint64_t &FeatureMask); 162 163 // Get start/end section marker pointer. 164 GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty); 165 166 // Returns the target-dependent section name. 167 StringRef getSectionName(StringRef SectionSuffix); 168 169 // Returns the section start marker name. 170 Twine getSectionStart(StringRef SectionSuffix); 171 172 // Returns the section end marker name. 173 Twine getSectionEnd(StringRef SectionSuffix); 174 175 // Returns true if the access to the address should be considered "atomic". 176 bool pretendAtomicAccess(const Value *Addr); 177 178 Module &Mod; 179 const SanitizerBinaryMetadataOptions Options; 180 std::unique_ptr<SpecialCaseList> Ignorelist; 181 const Triple TargetTriple; 182 IRBuilder<> IRB; 183 BumpPtrAllocator Alloc; 184 UniqueStringSaver StringPool{Alloc}; 185 }; 186 187 bool SanitizerBinaryMetadata::run() { 188 MetadataInfoSet MIS; 189 190 for (Function &F : Mod) 191 runOn(F, MIS); 192 193 if (MIS.empty()) 194 return false; 195 196 // 197 // Setup constructors and call all initialization functions for requested 198 // metadata features. 199 // 200 201 auto *Int8PtrTy = IRB.getInt8PtrTy(); 202 auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy); 203 auto *Int32Ty = IRB.getInt32Ty(); 204 const std::array<Type *, 3> InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy}; 205 auto *Version = ConstantInt::get(Int32Ty, getVersion()); 206 207 for (const MetadataInfo *MI : MIS) { 208 const std::array<Value *, InitTypes.size()> InitArgs = { 209 Version, 210 getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy), 211 getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy), 212 }; 213 // We declare the _add and _del functions as weak, and only call them if 214 // there is a valid symbol linked. This allows building binaries with 215 // semantic metadata, but without having callbacks. When a tool that wants 216 // the metadata is linked which provides the callbacks, they will be called. 217 Function *Ctor = 218 createSanitizerCtorAndInitFunctions( 219 Mod, (MI->FunctionPrefix + ".module_ctor").str(), 220 (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs, 221 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 222 .first; 223 Function *Dtor = 224 createSanitizerCtorAndInitFunctions( 225 Mod, (MI->FunctionPrefix + ".module_dtor").str(), 226 (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs, 227 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 228 .first; 229 Constant *CtorComdatKey = nullptr; 230 Constant *DtorComdatKey = nullptr; 231 if (TargetTriple.supportsCOMDAT()) { 232 // Use COMDAT to deduplicate constructor/destructor function. The COMDAT 233 // key needs to be a non-local linkage. 234 Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName())); 235 Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName())); 236 Ctor->setLinkage(GlobalValue::ExternalLinkage); 237 Dtor->setLinkage(GlobalValue::ExternalLinkage); 238 // DSOs should _not_ call another constructor/destructor! 239 Ctor->setVisibility(GlobalValue::HiddenVisibility); 240 Dtor->setVisibility(GlobalValue::HiddenVisibility); 241 CtorComdatKey = Ctor; 242 DtorComdatKey = Dtor; 243 } 244 appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey); 245 appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey); 246 } 247 248 return true; 249 } 250 251 void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) { 252 if (F.empty()) 253 return; 254 if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) 255 return; 256 if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName())) 257 return; 258 // Don't touch available_externally functions, their actual body is elsewhere. 259 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 260 return; 261 262 MDBuilder MDB(F.getContext()); 263 264 // The metadata features enabled for this function, stored along covered 265 // metadata (if enabled). 266 uint64_t FeatureMask = 0; 267 // Don't emit unnecessary covered metadata for all functions to save space. 268 bool RequiresCovered = false; 269 270 if (Options.Atomics || Options.UAR) { 271 for (BasicBlock &BB : F) 272 for (Instruction &I : BB) 273 RequiresCovered |= runOn(I, MIS, MDB, FeatureMask); 274 } 275 276 if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread")) 277 FeatureMask &= ~kSanitizerBinaryMetadataAtomics; 278 if (F.isVarArg()) 279 FeatureMask &= ~kSanitizerBinaryMetadataUAR; 280 if (FeatureMask & kSanitizerBinaryMetadataUAR) { 281 RequiresCovered = true; 282 NumMetadataUAR++; 283 } 284 285 // Covered metadata is always emitted if explicitly requested, otherwise only 286 // if some other metadata requires it to unambiguously interpret it for 287 // modules compiled with SanitizerBinaryMetadata. 288 if (Options.Covered || (FeatureMask && RequiresCovered)) { 289 NumMetadataCovered++; 290 const auto *MI = &MetadataInfo::Covered; 291 MIS.insert(MI); 292 const StringRef Section = getSectionName(MI->SectionSuffix); 293 // The feature mask will be placed after the function size. 294 Constant *CFM = IRB.getInt64(FeatureMask); 295 F.setMetadata(LLVMContext::MD_pcsections, 296 MDB.createPCSections({{Section, {CFM}}})); 297 } 298 } 299 300 bool isUARSafeCall(CallInst *CI) { 301 auto *F = CI->getCalledFunction(); 302 // There are no intrinsic functions that leak arguments. 303 // If the called function does not return, the current function 304 // does not return as well, so no possibility of use-after-return. 305 // Sanitizer function also don't leak or don't return. 306 // It's safe to both pass pointers to local variables to them 307 // and to tail-call them. 308 return F && (F->isIntrinsic() || F->doesNotReturn() || 309 F->getName().startswith("__asan_") || 310 F->getName().startswith("__hwsan_") || 311 F->getName().startswith("__ubsan_") || 312 F->getName().startswith("__msan_") || 313 F->getName().startswith("__tsan_")); 314 } 315 316 bool hasUseAfterReturnUnsafeUses(Value &V) { 317 for (User *U : V.users()) { 318 if (auto *I = dyn_cast<Instruction>(U)) { 319 if (I->isLifetimeStartOrEnd() || I->isDroppable()) 320 continue; 321 if (auto *CI = dyn_cast<CallInst>(U)) { 322 if (isUARSafeCall(CI)) 323 continue; 324 } 325 if (isa<LoadInst>(U)) 326 continue; 327 if (auto *SI = dyn_cast<StoreInst>(U)) { 328 // If storing TO the alloca, then the address isn't taken. 329 if (SI->getOperand(1) == &V) 330 continue; 331 } 332 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) { 333 if (!hasUseAfterReturnUnsafeUses(*GEPI)) 334 continue; 335 } else if (auto *BCI = dyn_cast<BitCastInst>(U)) { 336 if (!hasUseAfterReturnUnsafeUses(*BCI)) 337 continue; 338 } 339 } 340 return true; 341 } 342 return false; 343 } 344 345 bool useAfterReturnUnsafe(Instruction &I) { 346 if (isa<AllocaInst>(I)) 347 return hasUseAfterReturnUnsafeUses(I); 348 // Tail-called functions are not necessary intercepted 349 // at runtime because there is no call instruction. 350 // So conservatively mark the caller as requiring checking. 351 else if (auto *CI = dyn_cast<CallInst>(&I)) 352 return CI->isTailCall() && !isUARSafeCall(CI); 353 return false; 354 } 355 356 bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) { 357 if (!Addr) 358 return false; 359 360 Addr = Addr->stripInBoundsOffsets(); 361 auto *GV = dyn_cast<GlobalVariable>(Addr); 362 if (!GV) 363 return false; 364 365 // Some compiler-generated accesses are known racy, to avoid false positives 366 // in data-race analysis pretend they're atomic. 367 if (GV->hasSection()) { 368 const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat(); 369 const auto ProfSec = 370 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false); 371 if (GV->getSection().endswith(ProfSec)) 372 return true; 373 } 374 if (GV->getName().startswith("__llvm_gcov") || 375 GV->getName().startswith("__llvm_gcda")) 376 return true; 377 378 return false; 379 } 380 381 // Returns true if the memory at `Addr` may be shared with other threads. 382 bool maybeSharedMutable(const Value *Addr) { 383 // By default assume memory may be shared. 384 if (!Addr) 385 return true; 386 387 if (isa<AllocaInst>(getUnderlyingObject(Addr)) && 388 !PointerMayBeCaptured(Addr, true, true)) 389 return false; // Object is on stack but does not escape. 390 391 Addr = Addr->stripInBoundsOffsets(); 392 if (auto *GV = dyn_cast<GlobalVariable>(Addr)) { 393 if (GV->isConstant()) 394 return false; // Shared, but not mutable. 395 } 396 397 return true; 398 } 399 400 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS, 401 MDBuilder &MDB, uint64_t &FeatureMask) { 402 SmallVector<const MetadataInfo *, 1> InstMetadata; 403 bool RequiresCovered = false; 404 405 // Only call if at least 1 type of metadata is requested. 406 assert(Options.UAR || Options.Atomics); 407 408 if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) { 409 if (useAfterReturnUnsafe(I)) 410 FeatureMask |= kSanitizerBinaryMetadataUAR; 411 } 412 413 if (Options.Atomics) { 414 const Value *Addr = nullptr; 415 if (auto *SI = dyn_cast<StoreInst>(&I)) 416 Addr = SI->getPointerOperand(); 417 else if (auto *LI = dyn_cast<LoadInst>(&I)) 418 Addr = LI->getPointerOperand(); 419 420 if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) { 421 auto SSID = getAtomicSyncScopeID(&I); 422 if ((SSID.has_value() && *SSID != SyncScope::SingleThread) || 423 pretendAtomicAccess(Addr)) { 424 NumMetadataAtomics++; 425 InstMetadata.push_back(&MetadataInfo::Atomics); 426 } 427 FeatureMask |= kSanitizerBinaryMetadataAtomics; 428 RequiresCovered = true; 429 } 430 } 431 432 // Attach MD_pcsections to instruction. 433 if (!InstMetadata.empty()) { 434 MIS.insert(InstMetadata.begin(), InstMetadata.end()); 435 SmallVector<MDBuilder::PCSection, 1> Sections; 436 for (const auto &MI : InstMetadata) 437 Sections.push_back({getSectionName(MI->SectionSuffix), {}}); 438 I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections)); 439 } 440 441 return RequiresCovered; 442 } 443 444 GlobalVariable * 445 SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) { 446 // Use ExternalWeak so that if all sections are discarded due to section 447 // garbage collection, the linker will not report undefined symbol errors. 448 auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false, 449 GlobalVariable::ExternalWeakLinkage, 450 /*Initializer=*/nullptr, MarkerName); 451 Marker->setVisibility(GlobalValue::HiddenVisibility); 452 return Marker; 453 } 454 455 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { 456 // FIXME: Other TargetTriples. 457 // Request ULEB128 encoding for all integer constants. 458 return StringPool.save(SectionSuffix + "!C"); 459 } 460 461 Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { 462 return "__start_" + SectionSuffix; 463 } 464 465 Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { 466 return "__stop_" + SectionSuffix; 467 } 468 469 } // namespace 470 471 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass( 472 SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles) 473 : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {} 474 475 PreservedAnalyses 476 SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) { 477 std::unique_ptr<SpecialCaseList> Ignorelist; 478 if (!IgnorelistFiles.empty()) { 479 Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, 480 *vfs::getRealFileSystem()); 481 if (Ignorelist->inSection("metadata", "src", M.getSourceFileName())) 482 return PreservedAnalyses::all(); 483 } 484 485 SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist)); 486 if (Pass.run()) 487 return PreservedAnalyses::none(); 488 return PreservedAnalyses::all(); 489 } 490