1 //===- HWAddressSanitizer.cpp - memory access error detector --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file is a part of HWAddressSanitizer, an address basic correctness 11 /// checker based on tagged addressing. 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" 15 #include "llvm/ADT/MapVector.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Analysis/BlockFrequencyInfo.h" 22 #include "llvm/Analysis/DomTreeUpdater.h" 23 #include "llvm/Analysis/GlobalsModRef.h" 24 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 25 #include "llvm/Analysis/PostDominators.h" 26 #include "llvm/Analysis/ProfileSummaryInfo.h" 27 #include "llvm/Analysis/StackSafetyAnalysis.h" 28 #include "llvm/Analysis/TargetLibraryInfo.h" 29 #include "llvm/Analysis/ValueTracking.h" 30 #include "llvm/BinaryFormat/Dwarf.h" 31 #include "llvm/BinaryFormat/ELF.h" 32 #include "llvm/IR/Attributes.h" 33 #include "llvm/IR/BasicBlock.h" 34 #include "llvm/IR/Constant.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DebugInfoMetadata.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Dominators.h" 40 #include "llvm/IR/Function.h" 41 #include "llvm/IR/IRBuilder.h" 42 #include "llvm/IR/InlineAsm.h" 43 #include "llvm/IR/InstIterator.h" 44 #include "llvm/IR/Instruction.h" 45 #include "llvm/IR/Instructions.h" 46 #include "llvm/IR/IntrinsicInst.h" 47 #include "llvm/IR/Intrinsics.h" 48 #include "llvm/IR/LLVMContext.h" 49 #include "llvm/IR/MDBuilder.h" 50 #include "llvm/IR/Module.h" 51 #include "llvm/IR/Type.h" 52 #include "llvm/IR/Value.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Debug.h" 56 #include "llvm/Support/MD5.h" 57 #include "llvm/Support/RandomNumberGenerator.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/TargetParser/Triple.h" 60 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" 61 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 62 #include "llvm/Transforms/Utils/Local.h" 63 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h" 64 #include "llvm/Transforms/Utils/ModuleUtils.h" 65 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 66 #include <optional> 67 #include <random> 68 69 using namespace llvm; 70 71 #define DEBUG_TYPE "hwasan" 72 73 const char kHwasanModuleCtorName[] = "hwasan.module_ctor"; 74 const char kHwasanNoteName[] = "hwasan.note"; 75 const char kHwasanInitName[] = "__hwasan_init"; 76 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk"; 77 78 const char kHwasanShadowMemoryDynamicAddress[] = 79 "__hwasan_shadow_memory_dynamic_address"; 80 81 // Accesses sizes are powers of two: 1, 2, 4, 8, 16. 82 static const size_t kNumberOfAccessSizes = 5; 83 84 static const size_t kDefaultShadowScale = 4; 85 static const uint64_t kDynamicShadowSentinel = 86 std::numeric_limits<uint64_t>::max(); 87 88 static const unsigned kShadowBaseAlignment = 32; 89 90 static cl::opt<std::string> 91 ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix", 92 cl::desc("Prefix for memory access callbacks"), 93 cl::Hidden, cl::init("__hwasan_")); 94 95 static cl::opt<bool> ClKasanMemIntrinCallbackPrefix( 96 "hwasan-kernel-mem-intrinsic-prefix", 97 cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden, 98 cl::init(false)); 99 100 static cl::opt<bool> ClInstrumentWithCalls( 101 "hwasan-instrument-with-calls", 102 cl::desc("instrument reads and writes with callbacks"), cl::Hidden, 103 cl::init(false)); 104 105 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads", 106 cl::desc("instrument read instructions"), 107 cl::Hidden, cl::init(true)); 108 109 static cl::opt<bool> 110 ClInstrumentWrites("hwasan-instrument-writes", 111 cl::desc("instrument write instructions"), cl::Hidden, 112 cl::init(true)); 113 114 static cl::opt<bool> ClInstrumentAtomics( 115 "hwasan-instrument-atomics", 116 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, 117 cl::init(true)); 118 119 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval", 120 cl::desc("instrument byval arguments"), 121 cl::Hidden, cl::init(true)); 122 123 static cl::opt<bool> 124 ClRecover("hwasan-recover", 125 cl::desc("Enable recovery mode (continue-after-error)."), 126 cl::Hidden, cl::init(false)); 127 128 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack", 129 cl::desc("instrument stack (allocas)"), 130 cl::Hidden, cl::init(true)); 131 132 static cl::opt<bool> 133 ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true), 134 cl::Hidden, cl::desc("Use Stack Safety analysis results"), 135 cl::Optional); 136 137 static cl::opt<size_t> ClMaxLifetimes( 138 "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3), 139 cl::ReallyHidden, 140 cl::desc("How many lifetime ends to handle for a single alloca."), 141 cl::Optional); 142 143 static cl::opt<bool> 144 ClUseAfterScope("hwasan-use-after-scope", 145 cl::desc("detect use after scope within function"), 146 cl::Hidden, cl::init(true)); 147 148 static cl::opt<bool> ClGenerateTagsWithCalls( 149 "hwasan-generate-tags-with-calls", 150 cl::desc("generate new tags with runtime library calls"), cl::Hidden, 151 cl::init(false)); 152 153 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"), 154 cl::Hidden, cl::init(false)); 155 156 static cl::opt<int> ClMatchAllTag( 157 "hwasan-match-all-tag", 158 cl::desc("don't report bad accesses via pointers with this tag"), 159 cl::Hidden, cl::init(-1)); 160 161 static cl::opt<bool> 162 ClEnableKhwasan("hwasan-kernel", 163 cl::desc("Enable KernelHWAddressSanitizer instrumentation"), 164 cl::Hidden, cl::init(false)); 165 166 // These flags allow to change the shadow mapping and control how shadow memory 167 // is accessed. The shadow mapping looks like: 168 // Shadow = (Mem >> scale) + offset 169 170 static cl::opt<uint64_t> 171 ClMappingOffset("hwasan-mapping-offset", 172 cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"), 173 cl::Hidden, cl::init(0)); 174 175 static cl::opt<bool> 176 ClWithIfunc("hwasan-with-ifunc", 177 cl::desc("Access dynamic shadow through an ifunc global on " 178 "platforms that support this"), 179 cl::Hidden, cl::init(false)); 180 181 static cl::opt<bool> ClWithTls( 182 "hwasan-with-tls", 183 cl::desc("Access dynamic shadow through an thread-local pointer on " 184 "platforms that support this"), 185 cl::Hidden, cl::init(true)); 186 187 static cl::opt<int> ClHotPercentileCutoff("hwasan-percentile-cutoff-hot", 188 cl::desc("Hot percentile cuttoff.")); 189 190 static cl::opt<float> 191 ClRandomSkipRate("hwasan-random-rate", 192 cl::desc("Probability value in the range [0.0, 1.0] " 193 "to keep instrumentation of a function.")); 194 195 STATISTIC(NumTotalFuncs, "Number of total funcs"); 196 STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs"); 197 STATISTIC(NumNoProfileSummaryFuncs, "Number of funcs without PS"); 198 199 // Mode for selecting how to insert frame record info into the stack ring 200 // buffer. 201 enum RecordStackHistoryMode { 202 // Do not record frame record info. 203 none, 204 205 // Insert instructions into the prologue for storing into the stack ring 206 // buffer directly. 207 instr, 208 209 // Add a call to __hwasan_add_frame_record in the runtime. 210 libcall, 211 }; 212 213 static cl::opt<RecordStackHistoryMode> ClRecordStackHistory( 214 "hwasan-record-stack-history", 215 cl::desc("Record stack frames with tagged allocations in a thread-local " 216 "ring buffer"), 217 cl::values(clEnumVal(none, "Do not record stack ring history"), 218 clEnumVal(instr, "Insert instructions into the prologue for " 219 "storing into the stack ring buffer directly"), 220 clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for " 221 "storing into the stack ring buffer")), 222 cl::Hidden, cl::init(instr)); 223 224 static cl::opt<bool> 225 ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics", 226 cl::desc("instrument memory intrinsics"), 227 cl::Hidden, cl::init(true)); 228 229 static cl::opt<bool> 230 ClInstrumentLandingPads("hwasan-instrument-landing-pads", 231 cl::desc("instrument landing pads"), cl::Hidden, 232 cl::init(false)); 233 234 static cl::opt<bool> ClUseShortGranules( 235 "hwasan-use-short-granules", 236 cl::desc("use short granules in allocas and outlined checks"), cl::Hidden, 237 cl::init(false)); 238 239 static cl::opt<bool> ClInstrumentPersonalityFunctions( 240 "hwasan-instrument-personality-functions", 241 cl::desc("instrument personality functions"), cl::Hidden); 242 243 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks", 244 cl::desc("inline all checks"), 245 cl::Hidden, cl::init(false)); 246 247 static cl::opt<bool> ClInlineFastPathChecks("hwasan-inline-fast-path-checks", 248 cl::desc("inline all checks"), 249 cl::Hidden, cl::init(false)); 250 251 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing". 252 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases", 253 cl::desc("Use page aliasing in HWASan"), 254 cl::Hidden, cl::init(false)); 255 256 namespace { 257 258 template <typename T> T optOr(cl::opt<T> &Opt, T Other) { 259 return Opt.getNumOccurrences() ? Opt : Other; 260 } 261 262 bool shouldUsePageAliases(const Triple &TargetTriple) { 263 return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64; 264 } 265 266 bool shouldInstrumentStack(const Triple &TargetTriple) { 267 return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack; 268 } 269 270 bool shouldInstrumentWithCalls(const Triple &TargetTriple) { 271 return optOr(ClInstrumentWithCalls, TargetTriple.getArch() == Triple::x86_64); 272 } 273 274 bool mightUseStackSafetyAnalysis(bool DisableOptimization) { 275 return optOr(ClUseStackSafety, !DisableOptimization); 276 } 277 278 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple, 279 bool DisableOptimization) { 280 return shouldInstrumentStack(TargetTriple) && 281 mightUseStackSafetyAnalysis(DisableOptimization); 282 } 283 284 bool shouldDetectUseAfterScope(const Triple &TargetTriple) { 285 return ClUseAfterScope && shouldInstrumentStack(TargetTriple); 286 } 287 288 /// An instrumentation pass implementing detection of addressability bugs 289 /// using tagged pointers. 290 class HWAddressSanitizer { 291 public: 292 HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover, 293 const StackSafetyGlobalInfo *SSI) 294 : M(M), SSI(SSI) { 295 this->Recover = optOr(ClRecover, Recover); 296 this->CompileKernel = optOr(ClEnableKhwasan, CompileKernel); 297 this->Rng = ClRandomSkipRate.getNumOccurrences() ? M.createRNG(DEBUG_TYPE) 298 : nullptr; 299 300 initializeModule(); 301 } 302 303 void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM); 304 305 private: 306 struct ShadowTagCheckInfo { 307 Instruction *TagMismatchTerm = nullptr; 308 Value *PtrLong = nullptr; 309 Value *AddrLong = nullptr; 310 Value *PtrTag = nullptr; 311 Value *MemTag = nullptr; 312 }; 313 314 bool selectiveInstrumentationShouldSkip(Function &F, 315 FunctionAnalysisManager &FAM) const; 316 void initializeModule(); 317 void createHwasanCtorComdat(); 318 319 void initializeCallbacks(Module &M); 320 321 Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val); 322 323 Value *getDynamicShadowIfunc(IRBuilder<> &IRB); 324 Value *getShadowNonTls(IRBuilder<> &IRB); 325 326 void untagPointerOperand(Instruction *I, Value *Addr); 327 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); 328 329 int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex); 330 ShadowTagCheckInfo insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore, 331 DomTreeUpdater &DTU, LoopInfo *LI); 332 void instrumentMemAccessOutline(Value *Ptr, bool IsWrite, 333 unsigned AccessSizeIndex, 334 Instruction *InsertBefore, 335 DomTreeUpdater &DTU, LoopInfo *LI); 336 void instrumentMemAccessInline(Value *Ptr, bool IsWrite, 337 unsigned AccessSizeIndex, 338 Instruction *InsertBefore, DomTreeUpdater &DTU, 339 LoopInfo *LI); 340 bool ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, MemIntrinsic *MI); 341 void instrumentMemIntrinsic(MemIntrinsic *MI); 342 bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU, 343 LoopInfo *LI); 344 bool ignoreAccessWithoutRemark(Instruction *Inst, Value *Ptr); 345 bool ignoreAccess(OptimizationRemarkEmitter &ORE, Instruction *Inst, 346 Value *Ptr); 347 348 void getInterestingMemoryOperands( 349 OptimizationRemarkEmitter &ORE, Instruction *I, 350 const TargetLibraryInfo &TLI, 351 SmallVectorImpl<InterestingMemoryOperand> &Interesting); 352 353 void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size); 354 Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag); 355 Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong); 356 bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag, 357 const DominatorTree &DT, const PostDominatorTree &PDT, 358 const LoopInfo &LI); 359 bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec); 360 Value *getNextTagWithCall(IRBuilder<> &IRB); 361 Value *getStackBaseTag(IRBuilder<> &IRB); 362 Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo); 363 Value *getUARTag(IRBuilder<> &IRB); 364 365 Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB); 366 Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag); 367 unsigned retagMask(unsigned AllocaNo); 368 369 void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord); 370 371 void instrumentGlobal(GlobalVariable *GV, uint8_t Tag); 372 void instrumentGlobals(); 373 374 Value *getCachedFP(IRBuilder<> &IRB); 375 Value *getFrameRecordInfo(IRBuilder<> &IRB); 376 377 void instrumentPersonalityFunctions(); 378 379 LLVMContext *C; 380 Module &M; 381 const StackSafetyGlobalInfo *SSI; 382 Triple TargetTriple; 383 std::unique_ptr<RandomNumberGenerator> Rng; 384 385 /// This struct defines the shadow mapping using the rule: 386 /// shadow = (mem >> Scale) + Offset. 387 /// If InGlobal is true, then 388 /// extern char __hwasan_shadow[]; 389 /// shadow = (mem >> Scale) + &__hwasan_shadow 390 /// If InTls is true, then 391 /// extern char *__hwasan_tls; 392 /// shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment) 393 /// 394 /// If WithFrameRecord is true, then __hwasan_tls will be used to access the 395 /// ring buffer for storing stack allocations on targets that support it. 396 struct ShadowMapping { 397 uint8_t Scale; 398 uint64_t Offset; 399 bool InGlobal; 400 bool InTls; 401 bool WithFrameRecord; 402 403 void init(Triple &TargetTriple, bool InstrumentWithCalls); 404 Align getObjectAlignment() const { return Align(1ULL << Scale); } 405 }; 406 407 ShadowMapping Mapping; 408 409 Type *VoidTy = Type::getVoidTy(M.getContext()); 410 Type *IntptrTy = M.getDataLayout().getIntPtrType(M.getContext()); 411 PointerType *PtrTy = PointerType::getUnqual(M.getContext()); 412 Type *Int8Ty = Type::getInt8Ty(M.getContext()); 413 Type *Int32Ty = Type::getInt32Ty(M.getContext()); 414 Type *Int64Ty = Type::getInt64Ty(M.getContext()); 415 416 bool CompileKernel; 417 bool Recover; 418 bool OutlinedChecks; 419 bool InlineFastPath; 420 bool UseShortGranules; 421 bool InstrumentLandingPads; 422 bool InstrumentWithCalls; 423 bool InstrumentStack; 424 bool InstrumentGlobals; 425 bool DetectUseAfterScope; 426 bool UsePageAliases; 427 bool UseMatchAllCallback; 428 429 std::optional<uint8_t> MatchAllTag; 430 431 unsigned PointerTagShift; 432 uint64_t TagMaskByte; 433 434 Function *HwasanCtorFunction; 435 436 FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes]; 437 FunctionCallee HwasanMemoryAccessCallbackSized[2]; 438 439 FunctionCallee HwasanMemmove, HwasanMemcpy, HwasanMemset; 440 FunctionCallee HwasanHandleVfork; 441 442 FunctionCallee HwasanTagMemoryFunc; 443 FunctionCallee HwasanGenerateTagFunc; 444 FunctionCallee HwasanRecordFrameRecordFunc; 445 446 Constant *ShadowGlobal; 447 448 Value *ShadowBase = nullptr; 449 Value *StackBaseTag = nullptr; 450 Value *CachedFP = nullptr; 451 GlobalValue *ThreadPtrGlobal = nullptr; 452 }; 453 454 } // end anonymous namespace 455 456 PreservedAnalyses HWAddressSanitizerPass::run(Module &M, 457 ModuleAnalysisManager &MAM) { 458 const StackSafetyGlobalInfo *SSI = nullptr; 459 auto TargetTriple = llvm::Triple(M.getTargetTriple()); 460 if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization)) 461 SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M); 462 463 HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI); 464 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 465 for (Function &F : M) 466 HWASan.sanitizeFunction(F, FAM); 467 468 PreservedAnalyses PA = PreservedAnalyses::none(); 469 // DominatorTreeAnalysis, PostDominatorTreeAnalysis, and LoopAnalysis 470 // are incrementally updated throughout this pass whenever 471 // SplitBlockAndInsertIfThen is called. 472 PA.preserve<DominatorTreeAnalysis>(); 473 PA.preserve<PostDominatorTreeAnalysis>(); 474 PA.preserve<LoopAnalysis>(); 475 // GlobalsAA is considered stateless and does not get invalidated unless 476 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers 477 // make changes that require GlobalsAA to be invalidated. 478 PA.abandon<GlobalsAA>(); 479 return PA; 480 } 481 void HWAddressSanitizerPass::printPipeline( 482 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 483 static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline( 484 OS, MapClassName2PassName); 485 OS << '<'; 486 if (Options.CompileKernel) 487 OS << "kernel;"; 488 if (Options.Recover) 489 OS << "recover"; 490 OS << '>'; 491 } 492 493 void HWAddressSanitizer::createHwasanCtorComdat() { 494 std::tie(HwasanCtorFunction, std::ignore) = 495 getOrCreateSanitizerCtorAndInitFunctions( 496 M, kHwasanModuleCtorName, kHwasanInitName, 497 /*InitArgTypes=*/{}, 498 /*InitArgs=*/{}, 499 // This callback is invoked when the functions are created the first 500 // time. Hook them into the global ctors list in that case: 501 [&](Function *Ctor, FunctionCallee) { 502 Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName); 503 Ctor->setComdat(CtorComdat); 504 appendToGlobalCtors(M, Ctor, 0, Ctor); 505 }); 506 507 // Create a note that contains pointers to the list of global 508 // descriptors. Adding a note to the output file will cause the linker to 509 // create a PT_NOTE program header pointing to the note that we can use to 510 // find the descriptor list starting from the program headers. A function 511 // provided by the runtime initializes the shadow memory for the globals by 512 // accessing the descriptor list via the note. The dynamic loader needs to 513 // call this function whenever a library is loaded. 514 // 515 // The reason why we use a note for this instead of a more conventional 516 // approach of having a global constructor pass a descriptor list pointer to 517 // the runtime is because of an order of initialization problem. With 518 // constructors we can encounter the following problematic scenario: 519 // 520 // 1) library A depends on library B and also interposes one of B's symbols 521 // 2) B's constructors are called before A's (as required for correctness) 522 // 3) during construction, B accesses one of its "own" globals (actually 523 // interposed by A) and triggers a HWASAN failure due to the initialization 524 // for A not having happened yet 525 // 526 // Even without interposition it is possible to run into similar situations in 527 // cases where two libraries mutually depend on each other. 528 // 529 // We only need one note per binary, so put everything for the note in a 530 // comdat. This needs to be a comdat with an .init_array section to prevent 531 // newer versions of lld from discarding the note. 532 // 533 // Create the note even if we aren't instrumenting globals. This ensures that 534 // binaries linked from object files with both instrumented and 535 // non-instrumented globals will end up with a note, even if a comdat from an 536 // object file with non-instrumented globals is selected. The note is harmless 537 // if the runtime doesn't support it, since it will just be ignored. 538 Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName); 539 540 Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0); 541 auto *Start = 542 new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage, 543 nullptr, "__start_hwasan_globals"); 544 Start->setVisibility(GlobalValue::HiddenVisibility); 545 auto *Stop = 546 new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage, 547 nullptr, "__stop_hwasan_globals"); 548 Stop->setVisibility(GlobalValue::HiddenVisibility); 549 550 // Null-terminated so actually 8 bytes, which are required in order to align 551 // the note properly. 552 auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0"); 553 554 auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(), 555 Int32Ty, Int32Ty); 556 auto *Note = 557 new GlobalVariable(M, NoteTy, /*isConstant=*/true, 558 GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName); 559 Note->setSection(".note.hwasan.globals"); 560 Note->setComdat(NoteComdat); 561 Note->setAlignment(Align(4)); 562 563 // The pointers in the note need to be relative so that the note ends up being 564 // placed in rodata, which is the standard location for notes. 565 auto CreateRelPtr = [&](Constant *Ptr) { 566 return ConstantExpr::getTrunc( 567 ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty), 568 ConstantExpr::getPtrToInt(Note, Int64Ty)), 569 Int32Ty); 570 }; 571 Note->setInitializer(ConstantStruct::getAnon( 572 {ConstantInt::get(Int32Ty, 8), // n_namesz 573 ConstantInt::get(Int32Ty, 8), // n_descsz 574 ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type 575 Name, CreateRelPtr(Start), CreateRelPtr(Stop)})); 576 appendToCompilerUsed(M, Note); 577 578 // Create a zero-length global in hwasan_globals so that the linker will 579 // always create start and stop symbols. 580 auto *Dummy = new GlobalVariable( 581 M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage, 582 Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global"); 583 Dummy->setSection("hwasan_globals"); 584 Dummy->setComdat(NoteComdat); 585 Dummy->setMetadata(LLVMContext::MD_associated, 586 MDNode::get(*C, ValueAsMetadata::get(Note))); 587 appendToCompilerUsed(M, Dummy); 588 } 589 590 /// Module-level initialization. 591 /// 592 /// inserts a call to __hwasan_init to the module's constructor list. 593 void HWAddressSanitizer::initializeModule() { 594 LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n"); 595 TargetTriple = Triple(M.getTargetTriple()); 596 597 // x86_64 currently has two modes: 598 // - Intel LAM (default) 599 // - pointer aliasing (heap only) 600 bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64; 601 UsePageAliases = shouldUsePageAliases(TargetTriple); 602 InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple); 603 InstrumentStack = shouldInstrumentStack(TargetTriple); 604 DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple); 605 PointerTagShift = IsX86_64 ? 57 : 56; 606 TagMaskByte = IsX86_64 ? 0x3F : 0xFF; 607 608 Mapping.init(TargetTriple, InstrumentWithCalls); 609 610 C = &(M.getContext()); 611 IRBuilder<> IRB(*C); 612 613 HwasanCtorFunction = nullptr; 614 615 // Older versions of Android do not have the required runtime support for 616 // short granules, global or personality function instrumentation. On other 617 // platforms we currently require using the latest version of the runtime. 618 bool NewRuntime = 619 !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30); 620 621 UseShortGranules = optOr(ClUseShortGranules, NewRuntime); 622 OutlinedChecks = (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) && 623 TargetTriple.isOSBinFormatELF() && 624 !optOr(ClInlineAllChecks, Recover); 625 626 // These platforms may prefer less inlining to reduce binary size. 627 InlineFastPath = optOr(ClInlineFastPathChecks, !(TargetTriple.isAndroid() || 628 TargetTriple.isOSFuchsia())); 629 630 if (ClMatchAllTag.getNumOccurrences()) { 631 if (ClMatchAllTag != -1) { 632 MatchAllTag = ClMatchAllTag & 0xFF; 633 } 634 } else if (CompileKernel) { 635 MatchAllTag = 0xFF; 636 } 637 UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value(); 638 639 // If we don't have personality function support, fall back to landing pads. 640 InstrumentLandingPads = optOr(ClInstrumentLandingPads, !NewRuntime); 641 642 InstrumentGlobals = 643 !CompileKernel && !UsePageAliases && optOr(ClGlobals, NewRuntime); 644 645 if (!CompileKernel) { 646 createHwasanCtorComdat(); 647 648 if (InstrumentGlobals) 649 instrumentGlobals(); 650 651 bool InstrumentPersonalityFunctions = 652 optOr(ClInstrumentPersonalityFunctions, NewRuntime); 653 if (InstrumentPersonalityFunctions) 654 instrumentPersonalityFunctions(); 655 } 656 657 if (!TargetTriple.isAndroid()) { 658 Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] { 659 auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false, 660 GlobalValue::ExternalLinkage, nullptr, 661 "__hwasan_tls", nullptr, 662 GlobalVariable::InitialExecTLSModel); 663 appendToCompilerUsed(M, GV); 664 return GV; 665 }); 666 ThreadPtrGlobal = cast<GlobalVariable>(C); 667 } 668 } 669 670 void HWAddressSanitizer::initializeCallbacks(Module &M) { 671 IRBuilder<> IRB(*C); 672 const std::string MatchAllStr = UseMatchAllCallback ? "_match_all" : ""; 673 FunctionType *HwasanMemoryAccessCallbackSizedFnTy, 674 *HwasanMemoryAccessCallbackFnTy, *HwasanMemTransferFnTy, 675 *HwasanMemsetFnTy; 676 if (UseMatchAllCallback) { 677 HwasanMemoryAccessCallbackSizedFnTy = 678 FunctionType::get(VoidTy, {IntptrTy, IntptrTy, Int8Ty}, false); 679 HwasanMemoryAccessCallbackFnTy = 680 FunctionType::get(VoidTy, {IntptrTy, Int8Ty}, false); 681 HwasanMemTransferFnTy = 682 FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy, Int8Ty}, false); 683 HwasanMemsetFnTy = 684 FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy, Int8Ty}, false); 685 } else { 686 HwasanMemoryAccessCallbackSizedFnTy = 687 FunctionType::get(VoidTy, {IntptrTy, IntptrTy}, false); 688 HwasanMemoryAccessCallbackFnTy = 689 FunctionType::get(VoidTy, {IntptrTy}, false); 690 HwasanMemTransferFnTy = 691 FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy}, false); 692 HwasanMemsetFnTy = 693 FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy}, false); 694 } 695 696 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { 697 const std::string TypeStr = AccessIsWrite ? "store" : "load"; 698 const std::string EndingStr = Recover ? "_noabort" : ""; 699 700 HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction( 701 ClMemoryAccessCallbackPrefix + TypeStr + "N" + MatchAllStr + EndingStr, 702 HwasanMemoryAccessCallbackSizedFnTy); 703 704 for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; 705 AccessSizeIndex++) { 706 HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] = 707 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr + 708 itostr(1ULL << AccessSizeIndex) + 709 MatchAllStr + EndingStr, 710 HwasanMemoryAccessCallbackFnTy); 711 } 712 } 713 714 const std::string MemIntrinCallbackPrefix = 715 (CompileKernel && !ClKasanMemIntrinCallbackPrefix) 716 ? std::string("") 717 : ClMemoryAccessCallbackPrefix; 718 719 HwasanMemmove = M.getOrInsertFunction( 720 MemIntrinCallbackPrefix + "memmove" + MatchAllStr, HwasanMemTransferFnTy); 721 HwasanMemcpy = M.getOrInsertFunction( 722 MemIntrinCallbackPrefix + "memcpy" + MatchAllStr, HwasanMemTransferFnTy); 723 HwasanMemset = M.getOrInsertFunction( 724 MemIntrinCallbackPrefix + "memset" + MatchAllStr, HwasanMemsetFnTy); 725 726 HwasanTagMemoryFunc = M.getOrInsertFunction("__hwasan_tag_memory", VoidTy, 727 PtrTy, Int8Ty, IntptrTy); 728 HwasanGenerateTagFunc = 729 M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty); 730 731 HwasanRecordFrameRecordFunc = 732 M.getOrInsertFunction("__hwasan_add_frame_record", VoidTy, Int64Ty); 733 734 ShadowGlobal = 735 M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(Int8Ty, 0)); 736 737 HwasanHandleVfork = 738 M.getOrInsertFunction("__hwasan_handle_vfork", VoidTy, IntptrTy); 739 } 740 741 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) { 742 // An empty inline asm with input reg == output reg. 743 // An opaque no-op cast, basically. 744 // This prevents code bloat as a result of rematerializing trivial definitions 745 // such as constants or global addresses at every load and store. 746 InlineAsm *Asm = 747 InlineAsm::get(FunctionType::get(PtrTy, {Val->getType()}, false), 748 StringRef(""), StringRef("=r,0"), 749 /*hasSideEffects=*/false); 750 return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow"); 751 } 752 753 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) { 754 return getOpaqueNoopCast(IRB, ShadowGlobal); 755 } 756 757 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) { 758 if (Mapping.Offset != kDynamicShadowSentinel) 759 return getOpaqueNoopCast( 760 IRB, ConstantExpr::getIntToPtr( 761 ConstantInt::get(IntptrTy, Mapping.Offset), PtrTy)); 762 763 if (Mapping.InGlobal) 764 return getDynamicShadowIfunc(IRB); 765 766 Value *GlobalDynamicAddress = 767 IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal( 768 kHwasanShadowMemoryDynamicAddress, PtrTy); 769 return IRB.CreateLoad(PtrTy, GlobalDynamicAddress); 770 } 771 772 bool HWAddressSanitizer::ignoreAccessWithoutRemark(Instruction *Inst, 773 Value *Ptr) { 774 // Do not instrument accesses from different address spaces; we cannot deal 775 // with them. 776 Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType()); 777 if (PtrTy->getPointerAddressSpace() != 0) 778 return true; 779 780 // Ignore swifterror addresses. 781 // swifterror memory addresses are mem2reg promoted by instruction 782 // selection. As such they cannot have regular uses like an instrumentation 783 // function and it makes no sense to track them as memory. 784 if (Ptr->isSwiftError()) 785 return true; 786 787 if (findAllocaForValue(Ptr)) { 788 if (!InstrumentStack) 789 return true; 790 if (SSI && SSI->stackAccessIsSafe(*Inst)) 791 return true; 792 } 793 794 if (isa<GlobalVariable>(getUnderlyingObject(Ptr))) { 795 if (!InstrumentGlobals) 796 return true; 797 // TODO: Optimize inbound global accesses, like Asan `instrumentMop`. 798 } 799 800 return false; 801 } 802 803 bool HWAddressSanitizer::ignoreAccess(OptimizationRemarkEmitter &ORE, 804 Instruction *Inst, Value *Ptr) { 805 bool Ignored = ignoreAccessWithoutRemark(Inst, Ptr); 806 if (Ignored) { 807 ORE.emit( 808 [&]() { return OptimizationRemark(DEBUG_TYPE, "ignoreAccess", Inst); }); 809 } else { 810 ORE.emit([&]() { 811 return OptimizationRemarkMissed(DEBUG_TYPE, "ignoreAccess", Inst); 812 }); 813 } 814 return Ignored; 815 } 816 817 void HWAddressSanitizer::getInterestingMemoryOperands( 818 OptimizationRemarkEmitter &ORE, Instruction *I, 819 const TargetLibraryInfo &TLI, 820 SmallVectorImpl<InterestingMemoryOperand> &Interesting) { 821 // Skip memory accesses inserted by another instrumentation. 822 if (I->hasMetadata(LLVMContext::MD_nosanitize)) 823 return; 824 825 // Do not instrument the load fetching the dynamic shadow address. 826 if (ShadowBase == I) 827 return; 828 829 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 830 if (!ClInstrumentReads || ignoreAccess(ORE, I, LI->getPointerOperand())) 831 return; 832 Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, 833 LI->getType(), LI->getAlign()); 834 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 835 if (!ClInstrumentWrites || ignoreAccess(ORE, I, SI->getPointerOperand())) 836 return; 837 Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, 838 SI->getValueOperand()->getType(), SI->getAlign()); 839 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 840 if (!ClInstrumentAtomics || ignoreAccess(ORE, I, RMW->getPointerOperand())) 841 return; 842 Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, 843 RMW->getValOperand()->getType(), std::nullopt); 844 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { 845 if (!ClInstrumentAtomics || ignoreAccess(ORE, I, XCHG->getPointerOperand())) 846 return; 847 Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, 848 XCHG->getCompareOperand()->getType(), 849 std::nullopt); 850 } else if (auto *CI = dyn_cast<CallInst>(I)) { 851 for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) { 852 if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || 853 ignoreAccess(ORE, I, CI->getArgOperand(ArgNo))) 854 continue; 855 Type *Ty = CI->getParamByValType(ArgNo); 856 Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); 857 } 858 maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI); 859 } 860 } 861 862 static unsigned getPointerOperandIndex(Instruction *I) { 863 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 864 return LI->getPointerOperandIndex(); 865 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 866 return SI->getPointerOperandIndex(); 867 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) 868 return RMW->getPointerOperandIndex(); 869 if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) 870 return XCHG->getPointerOperandIndex(); 871 report_fatal_error("Unexpected instruction"); 872 return -1; 873 } 874 875 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { 876 size_t Res = llvm::countr_zero(TypeSize / 8); 877 assert(Res < kNumberOfAccessSizes); 878 return Res; 879 } 880 881 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) { 882 if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64 || 883 TargetTriple.isRISCV64()) 884 return; 885 886 IRBuilder<> IRB(I); 887 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 888 Value *UntaggedPtr = 889 IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType()); 890 I->setOperand(getPointerOperandIndex(I), UntaggedPtr); 891 } 892 893 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) { 894 // Mem >> Scale 895 Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale); 896 if (Mapping.Offset == 0) 897 return IRB.CreateIntToPtr(Shadow, PtrTy); 898 // (Mem >> Scale) + Offset 899 return IRB.CreatePtrAdd(ShadowBase, Shadow); 900 } 901 902 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite, 903 unsigned AccessSizeIndex) { 904 return (CompileKernel << HWASanAccessInfo::CompileKernelShift) | 905 (MatchAllTag.has_value() << HWASanAccessInfo::HasMatchAllShift) | 906 (MatchAllTag.value_or(0) << HWASanAccessInfo::MatchAllShift) | 907 (Recover << HWASanAccessInfo::RecoverShift) | 908 (IsWrite << HWASanAccessInfo::IsWriteShift) | 909 (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift); 910 } 911 912 HWAddressSanitizer::ShadowTagCheckInfo 913 HWAddressSanitizer::insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore, 914 DomTreeUpdater &DTU, LoopInfo *LI) { 915 ShadowTagCheckInfo R; 916 917 IRBuilder<> IRB(InsertBefore); 918 919 R.PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy); 920 R.PtrTag = 921 IRB.CreateTrunc(IRB.CreateLShr(R.PtrLong, PointerTagShift), Int8Ty); 922 R.AddrLong = untagPointer(IRB, R.PtrLong); 923 Value *Shadow = memToShadow(R.AddrLong, IRB); 924 R.MemTag = IRB.CreateLoad(Int8Ty, Shadow); 925 Value *TagMismatch = IRB.CreateICmpNE(R.PtrTag, R.MemTag); 926 927 if (MatchAllTag.has_value()) { 928 Value *TagNotIgnored = IRB.CreateICmpNE( 929 R.PtrTag, ConstantInt::get(R.PtrTag->getType(), *MatchAllTag)); 930 TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored); 931 } 932 933 R.TagMismatchTerm = SplitBlockAndInsertIfThen( 934 TagMismatch, InsertBefore, false, 935 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI); 936 937 return R; 938 } 939 940 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite, 941 unsigned AccessSizeIndex, 942 Instruction *InsertBefore, 943 DomTreeUpdater &DTU, 944 LoopInfo *LI) { 945 assert(!UsePageAliases); 946 const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex); 947 948 if (InlineFastPath) 949 InsertBefore = 950 insertShadowTagCheck(Ptr, InsertBefore, DTU, LI).TagMismatchTerm; 951 952 IRBuilder<> IRB(InsertBefore); 953 Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 954 bool useFixedShadowIntrinsic = false; 955 // The memaccess fixed shadow intrinsic is only supported on AArch64, 956 // which allows a 16-bit immediate to be left-shifted by 32. 957 // Since kShadowBaseAlignment == 32, and Linux by default will not 958 // mmap above 48-bits, practically any valid shadow offset is 959 // representable. 960 // In particular, an offset of 4TB (1024 << 32) is representable, and 961 // ought to be good enough for anybody. 962 if (TargetTriple.isAArch64() && Mapping.Offset != kDynamicShadowSentinel) { 963 uint16_t offset_shifted = Mapping.Offset >> 32; 964 useFixedShadowIntrinsic = (uint64_t)offset_shifted << 32 == Mapping.Offset; 965 } 966 967 if (useFixedShadowIntrinsic) 968 IRB.CreateCall( 969 Intrinsic::getDeclaration( 970 M, UseShortGranules 971 ? Intrinsic::hwasan_check_memaccess_shortgranules_fixedshadow 972 : Intrinsic::hwasan_check_memaccess_fixedshadow), 973 {Ptr, ConstantInt::get(Int32Ty, AccessInfo), 974 ConstantInt::get(Int64Ty, Mapping.Offset)}); 975 else 976 IRB.CreateCall(Intrinsic::getDeclaration( 977 M, UseShortGranules 978 ? Intrinsic::hwasan_check_memaccess_shortgranules 979 : Intrinsic::hwasan_check_memaccess), 980 {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)}); 981 } 982 983 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite, 984 unsigned AccessSizeIndex, 985 Instruction *InsertBefore, 986 DomTreeUpdater &DTU, 987 LoopInfo *LI) { 988 assert(!UsePageAliases); 989 const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex); 990 991 ShadowTagCheckInfo TCI = insertShadowTagCheck(Ptr, InsertBefore, DTU, LI); 992 993 IRBuilder<> IRB(TCI.TagMismatchTerm); 994 Value *OutOfShortGranuleTagRange = 995 IRB.CreateICmpUGT(TCI.MemTag, ConstantInt::get(Int8Ty, 15)); 996 Instruction *CheckFailTerm = SplitBlockAndInsertIfThen( 997 OutOfShortGranuleTagRange, TCI.TagMismatchTerm, !Recover, 998 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI); 999 1000 IRB.SetInsertPoint(TCI.TagMismatchTerm); 1001 Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(TCI.PtrLong, 15), Int8Ty); 1002 PtrLowBits = IRB.CreateAdd( 1003 PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1)); 1004 Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, TCI.MemTag); 1005 SplitBlockAndInsertIfThen(PtrLowBitsOOB, TCI.TagMismatchTerm, false, 1006 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, 1007 LI, CheckFailTerm->getParent()); 1008 1009 IRB.SetInsertPoint(TCI.TagMismatchTerm); 1010 Value *InlineTagAddr = IRB.CreateOr(TCI.AddrLong, 15); 1011 InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, PtrTy); 1012 Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr); 1013 Value *InlineTagMismatch = IRB.CreateICmpNE(TCI.PtrTag, InlineTag); 1014 SplitBlockAndInsertIfThen(InlineTagMismatch, TCI.TagMismatchTerm, false, 1015 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, 1016 LI, CheckFailTerm->getParent()); 1017 1018 IRB.SetInsertPoint(CheckFailTerm); 1019 InlineAsm *Asm; 1020 switch (TargetTriple.getArch()) { 1021 case Triple::x86_64: 1022 // The signal handler will find the data address in rdi. 1023 Asm = InlineAsm::get( 1024 FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false), 1025 "int3\nnopl " + 1026 itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) + 1027 "(%rax)", 1028 "{rdi}", 1029 /*hasSideEffects=*/true); 1030 break; 1031 case Triple::aarch64: 1032 case Triple::aarch64_be: 1033 // The signal handler will find the data address in x0. 1034 Asm = InlineAsm::get( 1035 FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false), 1036 "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)), 1037 "{x0}", 1038 /*hasSideEffects=*/true); 1039 break; 1040 case Triple::riscv64: 1041 // The signal handler will find the data address in x10. 1042 Asm = InlineAsm::get( 1043 FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false), 1044 "ebreak\naddiw x0, x11, " + 1045 itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)), 1046 "{x10}", 1047 /*hasSideEffects=*/true); 1048 break; 1049 default: 1050 report_fatal_error("unsupported architecture"); 1051 } 1052 IRB.CreateCall(Asm, TCI.PtrLong); 1053 if (Recover) 1054 cast<BranchInst>(CheckFailTerm) 1055 ->setSuccessor(0, TCI.TagMismatchTerm->getParent()); 1056 } 1057 1058 bool HWAddressSanitizer::ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, 1059 MemIntrinsic *MI) { 1060 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { 1061 return (!ClInstrumentWrites || ignoreAccess(ORE, MTI, MTI->getDest())) && 1062 (!ClInstrumentReads || ignoreAccess(ORE, MTI, MTI->getSource())); 1063 } 1064 if (isa<MemSetInst>(MI)) 1065 return !ClInstrumentWrites || ignoreAccess(ORE, MI, MI->getDest()); 1066 return false; 1067 } 1068 1069 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { 1070 IRBuilder<> IRB(MI); 1071 if (isa<MemTransferInst>(MI)) { 1072 SmallVector<Value *, 4> Args{ 1073 MI->getOperand(0), MI->getOperand(1), 1074 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}; 1075 1076 if (UseMatchAllCallback) 1077 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag)); 1078 IRB.CreateCall(isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy, Args); 1079 } else if (isa<MemSetInst>(MI)) { 1080 SmallVector<Value *, 4> Args{ 1081 MI->getOperand(0), 1082 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), 1083 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}; 1084 if (UseMatchAllCallback) 1085 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag)); 1086 IRB.CreateCall(HwasanMemset, Args); 1087 } 1088 MI->eraseFromParent(); 1089 } 1090 1091 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O, 1092 DomTreeUpdater &DTU, 1093 LoopInfo *LI) { 1094 Value *Addr = O.getPtr(); 1095 1096 LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n"); 1097 1098 if (O.MaybeMask) 1099 return false; // FIXME 1100 1101 IRBuilder<> IRB(O.getInsn()); 1102 if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) && 1103 (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) && 1104 (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() || 1105 *O.Alignment >= O.TypeStoreSize / 8)) { 1106 size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeStoreSize); 1107 if (InstrumentWithCalls) { 1108 SmallVector<Value *, 2> Args{IRB.CreatePointerCast(Addr, IntptrTy)}; 1109 if (UseMatchAllCallback) 1110 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag)); 1111 IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex], 1112 Args); 1113 } else if (OutlinedChecks) { 1114 instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(), 1115 DTU, LI); 1116 } else { 1117 instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(), 1118 DTU, LI); 1119 } 1120 } else { 1121 SmallVector<Value *, 3> Args{ 1122 IRB.CreatePointerCast(Addr, IntptrTy), 1123 IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize), 1124 ConstantInt::get(IntptrTy, 8))}; 1125 if (UseMatchAllCallback) 1126 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag)); 1127 IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], Args); 1128 } 1129 untagPointerOperand(O.getInsn(), Addr); 1130 1131 return true; 1132 } 1133 1134 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, 1135 size_t Size) { 1136 size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); 1137 if (!UseShortGranules) 1138 Size = AlignedSize; 1139 1140 Tag = IRB.CreateTrunc(Tag, Int8Ty); 1141 if (InstrumentWithCalls) { 1142 IRB.CreateCall(HwasanTagMemoryFunc, 1143 {IRB.CreatePointerCast(AI, PtrTy), Tag, 1144 ConstantInt::get(IntptrTy, AlignedSize)}); 1145 } else { 1146 size_t ShadowSize = Size >> Mapping.Scale; 1147 Value *AddrLong = untagPointer(IRB, IRB.CreatePointerCast(AI, IntptrTy)); 1148 Value *ShadowPtr = memToShadow(AddrLong, IRB); 1149 // If this memset is not inlined, it will be intercepted in the hwasan 1150 // runtime library. That's OK, because the interceptor skips the checks if 1151 // the address is in the shadow region. 1152 // FIXME: the interceptor is not as fast as real memset. Consider lowering 1153 // llvm.memset right here into either a sequence of stores, or a call to 1154 // hwasan_tag_memory. 1155 if (ShadowSize) 1156 IRB.CreateMemSet(ShadowPtr, Tag, ShadowSize, Align(1)); 1157 if (Size != AlignedSize) { 1158 const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value(); 1159 IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder), 1160 IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize)); 1161 IRB.CreateStore( 1162 Tag, IRB.CreateConstGEP1_32(Int8Ty, IRB.CreatePointerCast(AI, PtrTy), 1163 AlignedSize - 1)); 1164 } 1165 } 1166 } 1167 1168 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) { 1169 if (TargetTriple.getArch() == Triple::x86_64) 1170 return AllocaNo & TagMaskByte; 1171 1172 // A list of 8-bit numbers that have at most one run of non-zero bits. 1173 // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these 1174 // masks. 1175 // The list does not include the value 255, which is used for UAR. 1176 // 1177 // Because we are more likely to use earlier elements of this list than later 1178 // ones, it is sorted in increasing order of probability of collision with a 1179 // mask allocated (temporally) nearby. The program that generated this list 1180 // can be found at: 1181 // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py 1182 static const unsigned FastMasks[] = { 1183 0, 128, 64, 192, 32, 96, 224, 112, 240, 48, 16, 120, 1184 248, 56, 24, 8, 124, 252, 60, 28, 12, 4, 126, 254, 1185 62, 30, 14, 6, 2, 127, 63, 31, 15, 7, 3, 1}; 1186 return FastMasks[AllocaNo % std::size(FastMasks)]; 1187 } 1188 1189 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) { 1190 if (TagMaskByte == 0xFF) 1191 return OldTag; // No need to clear the tag byte. 1192 return IRB.CreateAnd(OldTag, 1193 ConstantInt::get(OldTag->getType(), TagMaskByte)); 1194 } 1195 1196 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) { 1197 return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy); 1198 } 1199 1200 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) { 1201 if (ClGenerateTagsWithCalls) 1202 return nullptr; 1203 if (StackBaseTag) 1204 return StackBaseTag; 1205 // Extract some entropy from the stack pointer for the tags. 1206 // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ 1207 // between functions). 1208 Value *FramePointerLong = getCachedFP(IRB); 1209 Value *StackTag = 1210 applyTagMask(IRB, IRB.CreateXor(FramePointerLong, 1211 IRB.CreateLShr(FramePointerLong, 20))); 1212 StackTag->setName("hwasan.stack.base.tag"); 1213 return StackTag; 1214 } 1215 1216 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag, 1217 unsigned AllocaNo) { 1218 if (ClGenerateTagsWithCalls) 1219 return getNextTagWithCall(IRB); 1220 return IRB.CreateXor( 1221 StackTag, ConstantInt::get(StackTag->getType(), retagMask(AllocaNo))); 1222 } 1223 1224 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB) { 1225 Value *FramePointerLong = getCachedFP(IRB); 1226 Value *UARTag = 1227 applyTagMask(IRB, IRB.CreateLShr(FramePointerLong, PointerTagShift)); 1228 1229 UARTag->setName("hwasan.uar.tag"); 1230 return UARTag; 1231 } 1232 1233 // Add a tag to an address. 1234 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty, 1235 Value *PtrLong, Value *Tag) { 1236 assert(!UsePageAliases); 1237 Value *TaggedPtrLong; 1238 if (CompileKernel) { 1239 // Kernel addresses have 0xFF in the most significant byte. 1240 Value *ShiftedTag = 1241 IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift), 1242 ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1)); 1243 TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag); 1244 } else { 1245 // Userspace can simply do OR (tag << PointerTagShift); 1246 Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift); 1247 TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag); 1248 } 1249 return IRB.CreateIntToPtr(TaggedPtrLong, Ty); 1250 } 1251 1252 // Remove tag from an address. 1253 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) { 1254 assert(!UsePageAliases); 1255 Value *UntaggedPtrLong; 1256 if (CompileKernel) { 1257 // Kernel addresses have 0xFF in the most significant byte. 1258 UntaggedPtrLong = 1259 IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(), 1260 TagMaskByte << PointerTagShift)); 1261 } else { 1262 // Userspace addresses have 0x00. 1263 UntaggedPtrLong = IRB.CreateAnd( 1264 PtrLong, ConstantInt::get(PtrLong->getType(), 1265 ~(TagMaskByte << PointerTagShift))); 1266 } 1267 return UntaggedPtrLong; 1268 } 1269 1270 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB) { 1271 // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER 1272 // in Bionic's libc/platform/bionic/tls_defines.h. 1273 constexpr int SanitizerSlot = 6; 1274 if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) 1275 return memtag::getAndroidSlotPtr(IRB, SanitizerSlot); 1276 return ThreadPtrGlobal; 1277 } 1278 1279 Value *HWAddressSanitizer::getCachedFP(IRBuilder<> &IRB) { 1280 if (!CachedFP) 1281 CachedFP = memtag::getFP(IRB); 1282 return CachedFP; 1283 } 1284 1285 Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) { 1286 // Prepare ring buffer data. 1287 Value *PC = memtag::getPC(TargetTriple, IRB); 1288 Value *FP = getCachedFP(IRB); 1289 1290 // Mix FP and PC. 1291 // Assumptions: 1292 // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) 1293 // FP is 0xfffffffffffFFFF0 (4 lower bits are zero) 1294 // We only really need ~20 lower non-zero bits (FFFF), so we mix like this: 1295 // 0xFFFFPPPPPPPPPPPP 1296 // 1297 // FP works because in AArch64FrameLowering::getFrameIndexReference, we 1298 // prefer FP-relative offsets for functions compiled with HWASan. 1299 FP = IRB.CreateShl(FP, 44); 1300 return IRB.CreateOr(PC, FP); 1301 } 1302 1303 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) { 1304 if (!Mapping.InTls) 1305 ShadowBase = getShadowNonTls(IRB); 1306 else if (!WithFrameRecord && TargetTriple.isAndroid()) 1307 ShadowBase = getDynamicShadowIfunc(IRB); 1308 1309 if (!WithFrameRecord && ShadowBase) 1310 return; 1311 1312 Value *SlotPtr = nullptr; 1313 Value *ThreadLong = nullptr; 1314 Value *ThreadLongMaybeUntagged = nullptr; 1315 1316 auto getThreadLongMaybeUntagged = [&]() { 1317 if (!SlotPtr) 1318 SlotPtr = getHwasanThreadSlotPtr(IRB); 1319 if (!ThreadLong) 1320 ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr); 1321 // Extract the address field from ThreadLong. Unnecessary on AArch64 with 1322 // TBI. 1323 return TargetTriple.isAArch64() ? ThreadLong 1324 : untagPointer(IRB, ThreadLong); 1325 }; 1326 1327 if (WithFrameRecord) { 1328 switch (ClRecordStackHistory) { 1329 case libcall: { 1330 // Emit a runtime call into hwasan rather than emitting instructions for 1331 // recording stack history. 1332 Value *FrameRecordInfo = getFrameRecordInfo(IRB); 1333 IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo}); 1334 break; 1335 } 1336 case instr: { 1337 ThreadLongMaybeUntagged = getThreadLongMaybeUntagged(); 1338 1339 StackBaseTag = IRB.CreateAShr(ThreadLong, 3); 1340 1341 // Store data to ring buffer. 1342 Value *FrameRecordInfo = getFrameRecordInfo(IRB); 1343 Value *RecordPtr = 1344 IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IRB.getPtrTy(0)); 1345 IRB.CreateStore(FrameRecordInfo, RecordPtr); 1346 1347 // Update the ring buffer. Top byte of ThreadLong defines the size of the 1348 // buffer in pages, it must be a power of two, and the start of the buffer 1349 // must be aligned by twice that much. Therefore wrap around of the ring 1350 // buffer is simply Addr &= ~((ThreadLong >> 56) << 12). 1351 // The use of AShr instead of LShr is due to 1352 // https://bugs.llvm.org/show_bug.cgi?id=39030 1353 // Runtime library makes sure not to use the highest bit. 1354 // 1355 // Mechanical proof of this address calculation can be found at: 1356 // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/prove_hwasanwrap.smt2 1357 // 1358 // Example of the wrap case for N = 1 1359 // Pointer: 0x01AAAAAAAAAAAFF8 1360 // + 1361 // 0x0000000000000008 1362 // = 1363 // 0x01AAAAAAAAAAB000 1364 // & 1365 // WrapMask: 0xFFFFFFFFFFFFF000 1366 // = 1367 // 0x01AAAAAAAAAAA000 1368 // 1369 // Then the WrapMask will be a no-op until the next wrap case. 1370 Value *WrapMask = IRB.CreateXor( 1371 IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true), 1372 ConstantInt::get(IntptrTy, (uint64_t)-1)); 1373 Value *ThreadLongNew = IRB.CreateAnd( 1374 IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask); 1375 IRB.CreateStore(ThreadLongNew, SlotPtr); 1376 break; 1377 } 1378 case none: { 1379 llvm_unreachable( 1380 "A stack history recording mode should've been selected."); 1381 } 1382 } 1383 } 1384 1385 if (!ShadowBase) { 1386 if (!ThreadLongMaybeUntagged) 1387 ThreadLongMaybeUntagged = getThreadLongMaybeUntagged(); 1388 1389 // Get shadow base address by aligning RecordPtr up. 1390 // Note: this is not correct if the pointer is already aligned. 1391 // Runtime library will make sure this never happens. 1392 ShadowBase = IRB.CreateAdd( 1393 IRB.CreateOr( 1394 ThreadLongMaybeUntagged, 1395 ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)), 1396 ConstantInt::get(IntptrTy, 1), "hwasan.shadow"); 1397 ShadowBase = IRB.CreateIntToPtr(ShadowBase, PtrTy); 1398 } 1399 } 1400 1401 bool HWAddressSanitizer::instrumentLandingPads( 1402 SmallVectorImpl<Instruction *> &LandingPadVec) { 1403 for (auto *LP : LandingPadVec) { 1404 IRBuilder<> IRB(LP->getNextNonDebugInstruction()); 1405 IRB.CreateCall( 1406 HwasanHandleVfork, 1407 {memtag::readRegister( 1408 IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp" : "sp")}); 1409 } 1410 return true; 1411 } 1412 1413 bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo, 1414 Value *StackTag, Value *UARTag, 1415 const DominatorTree &DT, 1416 const PostDominatorTree &PDT, 1417 const LoopInfo &LI) { 1418 // Ideally, we want to calculate tagged stack base pointer, and rewrite all 1419 // alloca addresses using that. Unfortunately, offsets are not known yet 1420 // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a 1421 // temp, shift-OR it into each alloca address and xor with the retag mask. 1422 // This generates one extra instruction per alloca use. 1423 unsigned int I = 0; 1424 1425 for (auto &KV : SInfo.AllocasToInstrument) { 1426 auto N = I++; 1427 auto *AI = KV.first; 1428 memtag::AllocaInfo &Info = KV.second; 1429 IRBuilder<> IRB(AI->getNextNonDebugInstruction()); 1430 1431 // Replace uses of the alloca with tagged address. 1432 Value *Tag = getAllocaTag(IRB, StackTag, N); 1433 Value *AILong = IRB.CreatePointerCast(AI, IntptrTy); 1434 Value *AINoTagLong = untagPointer(IRB, AILong); 1435 Value *Replacement = tagPointer(IRB, AI->getType(), AINoTagLong, Tag); 1436 std::string Name = 1437 AI->hasName() ? AI->getName().str() : "alloca." + itostr(N); 1438 Replacement->setName(Name + ".hwasan"); 1439 1440 size_t Size = memtag::getAllocaSizeInBytes(*AI); 1441 size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); 1442 1443 Value *AICast = IRB.CreatePointerCast(AI, PtrTy); 1444 1445 auto HandleLifetime = [&](IntrinsicInst *II) { 1446 // Set the lifetime intrinsic to cover the whole alloca. This reduces the 1447 // set of assumptions we need to make about the lifetime. Without this we 1448 // would need to ensure that we can track the lifetime pointer to a 1449 // constant offset from the alloca, and would still need to change the 1450 // size to include the extra alignment we use for the untagging to make 1451 // the size consistent. 1452 // 1453 // The check for standard lifetime below makes sure that we have exactly 1454 // one set of start / end in any execution (i.e. the ends are not 1455 // reachable from each other), so this will not cause any problems. 1456 II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize)); 1457 II->setArgOperand(1, AICast); 1458 }; 1459 llvm::for_each(Info.LifetimeStart, HandleLifetime); 1460 llvm::for_each(Info.LifetimeEnd, HandleLifetime); 1461 1462 AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) { 1463 auto *User = U.getUser(); 1464 return User != AILong && User != AICast && 1465 !memtag::isLifetimeIntrinsic(User); 1466 }); 1467 1468 memtag::annotateDebugRecords(Info, retagMask(N)); 1469 1470 auto TagEnd = [&](Instruction *Node) { 1471 IRB.SetInsertPoint(Node); 1472 // When untagging, use the `AlignedSize` because we need to set the tags 1473 // for the entire alloca to original. If we used `Size` here, we would 1474 // keep the last granule tagged, and store zero in the last byte of the 1475 // last granule, due to how short granules are implemented. 1476 tagAlloca(IRB, AI, UARTag, AlignedSize); 1477 }; 1478 // Calls to functions that may return twice (e.g. setjmp) confuse the 1479 // postdominator analysis, and will leave us to keep memory tagged after 1480 // function return. Work around this by always untagging at every return 1481 // statement if return_twice functions are called. 1482 bool StandardLifetime = 1483 !SInfo.CallsReturnTwice && 1484 SInfo.UnrecognizedLifetimes.empty() && 1485 memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT, 1486 &LI, ClMaxLifetimes); 1487 if (DetectUseAfterScope && StandardLifetime) { 1488 IntrinsicInst *Start = Info.LifetimeStart[0]; 1489 IRB.SetInsertPoint(Start->getNextNode()); 1490 tagAlloca(IRB, AI, Tag, Size); 1491 if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd, 1492 SInfo.RetVec, TagEnd)) { 1493 for (auto *End : Info.LifetimeEnd) 1494 End->eraseFromParent(); 1495 } 1496 } else { 1497 tagAlloca(IRB, AI, Tag, Size); 1498 for (auto *RI : SInfo.RetVec) 1499 TagEnd(RI); 1500 // We inserted tagging outside of the lifetimes, so we have to remove 1501 // them. 1502 for (auto &II : Info.LifetimeStart) 1503 II->eraseFromParent(); 1504 for (auto &II : Info.LifetimeEnd) 1505 II->eraseFromParent(); 1506 } 1507 memtag::alignAndPadAlloca(Info, Mapping.getObjectAlignment()); 1508 } 1509 for (auto &I : SInfo.UnrecognizedLifetimes) 1510 I->eraseFromParent(); 1511 return true; 1512 } 1513 1514 static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE, 1515 bool Skip) { 1516 if (Skip) { 1517 ORE.emit([&]() { 1518 return OptimizationRemark(DEBUG_TYPE, "Skip", &F) 1519 << "Skipped: F=" << ore::NV("Function", &F); 1520 }); 1521 } else { 1522 ORE.emit([&]() { 1523 return OptimizationRemarkMissed(DEBUG_TYPE, "Sanitize", &F) 1524 << "Sanitized: F=" << ore::NV("Function", &F); 1525 }); 1526 } 1527 } 1528 1529 bool HWAddressSanitizer::selectiveInstrumentationShouldSkip( 1530 Function &F, FunctionAnalysisManager &FAM) const { 1531 bool Skip = [&]() { 1532 if (ClRandomSkipRate.getNumOccurrences()) { 1533 std::bernoulli_distribution D(ClRandomSkipRate); 1534 return !D(*Rng); 1535 } 1536 if (!ClHotPercentileCutoff.getNumOccurrences()) 1537 return false; 1538 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 1539 ProfileSummaryInfo *PSI = 1540 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 1541 if (!PSI || !PSI->hasProfileSummary()) { 1542 ++NumNoProfileSummaryFuncs; 1543 return false; 1544 } 1545 return PSI->isFunctionHotInCallGraphNthPercentile( 1546 ClHotPercentileCutoff, &F, FAM.getResult<BlockFrequencyAnalysis>(F)); 1547 }(); 1548 emitRemark(F, FAM.getResult<OptimizationRemarkEmitterAnalysis>(F), Skip); 1549 return Skip; 1550 } 1551 1552 void HWAddressSanitizer::sanitizeFunction(Function &F, 1553 FunctionAnalysisManager &FAM) { 1554 if (&F == HwasanCtorFunction) 1555 return; 1556 1557 if (!F.hasFnAttribute(Attribute::SanitizeHWAddress)) 1558 return; 1559 1560 if (F.empty()) 1561 return; 1562 1563 NumTotalFuncs++; 1564 1565 OptimizationRemarkEmitter &ORE = 1566 FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1567 1568 if (selectiveInstrumentationShouldSkip(F, FAM)) 1569 return; 1570 1571 NumInstrumentedFuncs++; 1572 1573 LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n"); 1574 1575 SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument; 1576 SmallVector<MemIntrinsic *, 16> IntrinToInstrument; 1577 SmallVector<Instruction *, 8> LandingPadVec; 1578 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 1579 1580 memtag::StackInfoBuilder SIB(SSI); 1581 for (auto &Inst : instructions(F)) { 1582 if (InstrumentStack) { 1583 SIB.visit(Inst); 1584 } 1585 1586 if (InstrumentLandingPads && isa<LandingPadInst>(Inst)) 1587 LandingPadVec.push_back(&Inst); 1588 1589 getInterestingMemoryOperands(ORE, &Inst, TLI, OperandsToInstrument); 1590 1591 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) 1592 if (!ignoreMemIntrinsic(ORE, MI)) 1593 IntrinToInstrument.push_back(MI); 1594 } 1595 1596 memtag::StackInfo &SInfo = SIB.get(); 1597 1598 initializeCallbacks(*F.getParent()); 1599 1600 if (!LandingPadVec.empty()) 1601 instrumentLandingPads(LandingPadVec); 1602 1603 if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() && 1604 F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) { 1605 // __hwasan_personality_thunk is a no-op for functions without an 1606 // instrumented stack, so we can drop it. 1607 F.setPersonalityFn(nullptr); 1608 } 1609 1610 if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() && 1611 IntrinToInstrument.empty()) 1612 return; 1613 1614 assert(!ShadowBase); 1615 1616 // Remove memory attributes that are about to become invalid. 1617 // HWASan checks read from shadow, which invalidates memory(argmem: *) 1618 // Short granule checks on function arguments read from the argument memory 1619 // (last byte of the granule), which invalidates writeonly. 1620 F.removeFnAttr(llvm::Attribute::Memory); 1621 for (auto &A : F.args()) 1622 A.removeAttr(llvm::Attribute::WriteOnly); 1623 1624 BasicBlock::iterator InsertPt = F.getEntryBlock().begin(); 1625 IRBuilder<> EntryIRB(&F.getEntryBlock(), InsertPt); 1626 emitPrologue(EntryIRB, 1627 /*WithFrameRecord*/ ClRecordStackHistory != none && 1628 Mapping.WithFrameRecord && 1629 !SInfo.AllocasToInstrument.empty()); 1630 1631 if (!SInfo.AllocasToInstrument.empty()) { 1632 const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1633 const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F); 1634 const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F); 1635 Value *StackTag = getStackBaseTag(EntryIRB); 1636 Value *UARTag = getUARTag(EntryIRB); 1637 instrumentStack(SInfo, StackTag, UARTag, DT, PDT, LI); 1638 } 1639 1640 // If we split the entry block, move any allocas that were originally in the 1641 // entry block back into the entry block so that they aren't treated as 1642 // dynamic allocas. 1643 if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) { 1644 InsertPt = F.getEntryBlock().begin(); 1645 for (Instruction &I : 1646 llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) { 1647 if (auto *AI = dyn_cast<AllocaInst>(&I)) 1648 if (isa<ConstantInt>(AI->getArraySize())) 1649 I.moveBefore(F.getEntryBlock(), InsertPt); 1650 } 1651 } 1652 1653 DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); 1654 PostDominatorTree *PDT = FAM.getCachedResult<PostDominatorTreeAnalysis>(F); 1655 LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(F); 1656 DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy); 1657 for (auto &Operand : OperandsToInstrument) 1658 instrumentMemAccess(Operand, DTU, LI); 1659 DTU.flush(); 1660 1661 if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) { 1662 for (auto *Inst : IntrinToInstrument) 1663 instrumentMemIntrinsic(Inst); 1664 } 1665 1666 ShadowBase = nullptr; 1667 StackBaseTag = nullptr; 1668 CachedFP = nullptr; 1669 } 1670 1671 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) { 1672 assert(!UsePageAliases); 1673 Constant *Initializer = GV->getInitializer(); 1674 uint64_t SizeInBytes = 1675 M.getDataLayout().getTypeAllocSize(Initializer->getType()); 1676 uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment()); 1677 if (SizeInBytes != NewSize) { 1678 // Pad the initializer out to the next multiple of 16 bytes and add the 1679 // required short granule tag. 1680 std::vector<uint8_t> Init(NewSize - SizeInBytes, 0); 1681 Init.back() = Tag; 1682 Constant *Padding = ConstantDataArray::get(*C, Init); 1683 Initializer = ConstantStruct::getAnon({Initializer, Padding}); 1684 } 1685 1686 auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(), 1687 GlobalValue::ExternalLinkage, Initializer, 1688 GV->getName() + ".hwasan"); 1689 NewGV->copyAttributesFrom(GV); 1690 NewGV->setLinkage(GlobalValue::PrivateLinkage); 1691 NewGV->copyMetadata(GV, 0); 1692 NewGV->setAlignment( 1693 std::max(GV->getAlign().valueOrOne(), Mapping.getObjectAlignment())); 1694 1695 // It is invalid to ICF two globals that have different tags. In the case 1696 // where the size of the global is a multiple of the tag granularity the 1697 // contents of the globals may be the same but the tags (i.e. symbol values) 1698 // may be different, and the symbols are not considered during ICF. In the 1699 // case where the size is not a multiple of the granularity, the short granule 1700 // tags would discriminate two globals with different tags, but there would 1701 // otherwise be nothing stopping such a global from being incorrectly ICF'd 1702 // with an uninstrumented (i.e. tag 0) global that happened to have the short 1703 // granule tag in the last byte. 1704 NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None); 1705 1706 // Descriptor format (assuming little-endian): 1707 // bytes 0-3: relative address of global 1708 // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case 1709 // it isn't, we create multiple descriptors) 1710 // byte 7: tag 1711 auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty); 1712 const uint64_t MaxDescriptorSize = 0xfffff0; 1713 for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes; 1714 DescriptorPos += MaxDescriptorSize) { 1715 auto *Descriptor = 1716 new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage, 1717 nullptr, GV->getName() + ".hwasan.descriptor"); 1718 auto *GVRelPtr = ConstantExpr::getTrunc( 1719 ConstantExpr::getAdd( 1720 ConstantExpr::getSub( 1721 ConstantExpr::getPtrToInt(NewGV, Int64Ty), 1722 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)), 1723 ConstantInt::get(Int64Ty, DescriptorPos)), 1724 Int32Ty); 1725 uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize); 1726 auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24)); 1727 Descriptor->setComdat(NewGV->getComdat()); 1728 Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag})); 1729 Descriptor->setSection("hwasan_globals"); 1730 Descriptor->setMetadata(LLVMContext::MD_associated, 1731 MDNode::get(*C, ValueAsMetadata::get(NewGV))); 1732 appendToCompilerUsed(M, Descriptor); 1733 } 1734 1735 Constant *Aliasee = ConstantExpr::getIntToPtr( 1736 ConstantExpr::getAdd( 1737 ConstantExpr::getPtrToInt(NewGV, Int64Ty), 1738 ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)), 1739 GV->getType()); 1740 auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(), 1741 GV->getLinkage(), "", Aliasee, &M); 1742 Alias->setVisibility(GV->getVisibility()); 1743 Alias->takeName(GV); 1744 GV->replaceAllUsesWith(Alias); 1745 GV->eraseFromParent(); 1746 } 1747 1748 void HWAddressSanitizer::instrumentGlobals() { 1749 std::vector<GlobalVariable *> Globals; 1750 for (GlobalVariable &GV : M.globals()) { 1751 if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress) 1752 continue; 1753 1754 if (GV.isDeclarationForLinker() || GV.getName().starts_with("llvm.") || 1755 GV.isThreadLocal()) 1756 continue; 1757 1758 // Common symbols can't have aliases point to them, so they can't be tagged. 1759 if (GV.hasCommonLinkage()) 1760 continue; 1761 1762 // Globals with custom sections may be used in __start_/__stop_ enumeration, 1763 // which would be broken both by adding tags and potentially by the extra 1764 // padding/alignment that we insert. 1765 if (GV.hasSection()) 1766 continue; 1767 1768 Globals.push_back(&GV); 1769 } 1770 1771 MD5 Hasher; 1772 Hasher.update(M.getSourceFileName()); 1773 MD5::MD5Result Hash; 1774 Hasher.final(Hash); 1775 uint8_t Tag = Hash[0]; 1776 1777 assert(TagMaskByte >= 16); 1778 1779 for (GlobalVariable *GV : Globals) { 1780 // Don't allow globals to be tagged with something that looks like a 1781 // short-granule tag, otherwise we lose inter-granule overflow detection, as 1782 // the fast path shadow-vs-address check succeeds. 1783 if (Tag < 16 || Tag > TagMaskByte) 1784 Tag = 16; 1785 instrumentGlobal(GV, Tag++); 1786 } 1787 } 1788 1789 void HWAddressSanitizer::instrumentPersonalityFunctions() { 1790 // We need to untag stack frames as we unwind past them. That is the job of 1791 // the personality function wrapper, which either wraps an existing 1792 // personality function or acts as a personality function on its own. Each 1793 // function that has a personality function or that can be unwound past has 1794 // its personality function changed to a thunk that calls the personality 1795 // function wrapper in the runtime. 1796 MapVector<Constant *, std::vector<Function *>> PersonalityFns; 1797 for (Function &F : M) { 1798 if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress)) 1799 continue; 1800 1801 if (F.hasPersonalityFn()) { 1802 PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F); 1803 } else if (!F.hasFnAttribute(Attribute::NoUnwind)) { 1804 PersonalityFns[nullptr].push_back(&F); 1805 } 1806 } 1807 1808 if (PersonalityFns.empty()) 1809 return; 1810 1811 FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction( 1812 "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, PtrTy, 1813 PtrTy, PtrTy, PtrTy, PtrTy); 1814 FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy); 1815 FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy); 1816 1817 for (auto &P : PersonalityFns) { 1818 std::string ThunkName = kHwasanPersonalityThunkName; 1819 if (P.first) 1820 ThunkName += ("." + P.first->getName()).str(); 1821 FunctionType *ThunkFnTy = FunctionType::get( 1822 Int32Ty, {Int32Ty, Int32Ty, Int64Ty, PtrTy, PtrTy}, false); 1823 bool IsLocal = P.first && (!isa<GlobalValue>(P.first) || 1824 cast<GlobalValue>(P.first)->hasLocalLinkage()); 1825 auto *ThunkFn = Function::Create(ThunkFnTy, 1826 IsLocal ? GlobalValue::InternalLinkage 1827 : GlobalValue::LinkOnceODRLinkage, 1828 ThunkName, &M); 1829 if (!IsLocal) { 1830 ThunkFn->setVisibility(GlobalValue::HiddenVisibility); 1831 ThunkFn->setComdat(M.getOrInsertComdat(ThunkName)); 1832 } 1833 1834 auto *BB = BasicBlock::Create(*C, "entry", ThunkFn); 1835 IRBuilder<> IRB(BB); 1836 CallInst *WrapperCall = IRB.CreateCall( 1837 HwasanPersonalityWrapper, 1838 {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2), 1839 ThunkFn->getArg(3), ThunkFn->getArg(4), 1840 P.first ? P.first : Constant::getNullValue(PtrTy), 1841 UnwindGetGR.getCallee(), UnwindGetCFA.getCallee()}); 1842 WrapperCall->setTailCall(); 1843 IRB.CreateRet(WrapperCall); 1844 1845 for (Function *F : P.second) 1846 F->setPersonalityFn(ThunkFn); 1847 } 1848 } 1849 1850 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple, 1851 bool InstrumentWithCalls) { 1852 Scale = kDefaultShadowScale; 1853 if (TargetTriple.isOSFuchsia()) { 1854 // Fuchsia is always PIE, which means that the beginning of the address 1855 // space is always available. 1856 InGlobal = false; 1857 InTls = false; 1858 Offset = 0; 1859 WithFrameRecord = true; 1860 } else if (ClMappingOffset.getNumOccurrences() > 0) { 1861 InGlobal = false; 1862 InTls = false; 1863 Offset = ClMappingOffset; 1864 WithFrameRecord = false; 1865 } else if (ClEnableKhwasan || InstrumentWithCalls) { 1866 InGlobal = false; 1867 InTls = false; 1868 Offset = 0; 1869 WithFrameRecord = false; 1870 } else if (ClWithIfunc) { 1871 InGlobal = true; 1872 InTls = false; 1873 Offset = kDynamicShadowSentinel; 1874 WithFrameRecord = false; 1875 } else if (ClWithTls) { 1876 InGlobal = false; 1877 InTls = true; 1878 Offset = kDynamicShadowSentinel; 1879 WithFrameRecord = true; 1880 } else { 1881 InGlobal = false; 1882 InTls = false; 1883 Offset = kDynamicShadowSentinel; 1884 WithFrameRecord = false; 1885 } 1886 } 1887