1 //===- HWAddressSanitizer.cpp - memory access error detector --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/DomTreeUpdater.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/PostDominators.h"
26 #include "llvm/Analysis/ProfileSummaryInfo.h"
27 #include "llvm/Analysis/StackSafetyAnalysis.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/BinaryFormat/Dwarf.h"
31 #include "llvm/BinaryFormat/ELF.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DebugInfoMetadata.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/InstIterator.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Intrinsics.h"
48 #include "llvm/IR/LLVMContext.h"
49 #include "llvm/IR/MDBuilder.h"
50 #include "llvm/IR/Module.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/CommandLine.h"
55 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/MD5.h"
57 #include "llvm/Support/RandomNumberGenerator.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/TargetParser/Triple.h"
60 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
61 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
62 #include "llvm/Transforms/Utils/Local.h"
63 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
64 #include "llvm/Transforms/Utils/ModuleUtils.h"
65 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
66 #include <optional>
67 #include <random>
68
69 using namespace llvm;
70
71 #define DEBUG_TYPE "hwasan"
72
73 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
74 const char kHwasanNoteName[] = "hwasan.note";
75 const char kHwasanInitName[] = "__hwasan_init";
76 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
77
78 const char kHwasanShadowMemoryDynamicAddress[] =
79 "__hwasan_shadow_memory_dynamic_address";
80
81 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
82 static const size_t kNumberOfAccessSizes = 5;
83
84 static const size_t kDefaultShadowScale = 4;
85 static const uint64_t kDynamicShadowSentinel =
86 std::numeric_limits<uint64_t>::max();
87
88 static const unsigned kShadowBaseAlignment = 32;
89
90 static cl::opt<std::string>
91 ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
92 cl::desc("Prefix for memory access callbacks"),
93 cl::Hidden, cl::init("__hwasan_"));
94
95 static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
96 "hwasan-kernel-mem-intrinsic-prefix",
97 cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
98 cl::init(false));
99
100 static cl::opt<bool> ClInstrumentWithCalls(
101 "hwasan-instrument-with-calls",
102 cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
103 cl::init(false));
104
105 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
106 cl::desc("instrument read instructions"),
107 cl::Hidden, cl::init(true));
108
109 static cl::opt<bool>
110 ClInstrumentWrites("hwasan-instrument-writes",
111 cl::desc("instrument write instructions"), cl::Hidden,
112 cl::init(true));
113
114 static cl::opt<bool> ClInstrumentAtomics(
115 "hwasan-instrument-atomics",
116 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
117 cl::init(true));
118
119 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
120 cl::desc("instrument byval arguments"),
121 cl::Hidden, cl::init(true));
122
123 static cl::opt<bool>
124 ClRecover("hwasan-recover",
125 cl::desc("Enable recovery mode (continue-after-error)."),
126 cl::Hidden, cl::init(false));
127
128 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
129 cl::desc("instrument stack (allocas)"),
130 cl::Hidden, cl::init(true));
131
132 static cl::opt<bool>
133 ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
134 cl::Hidden, cl::desc("Use Stack Safety analysis results"),
135 cl::Optional);
136
137 static cl::opt<size_t> ClMaxLifetimes(
138 "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
139 cl::ReallyHidden,
140 cl::desc("How many lifetime ends to handle for a single alloca."),
141 cl::Optional);
142
143 static cl::opt<bool>
144 ClUseAfterScope("hwasan-use-after-scope",
145 cl::desc("detect use after scope within function"),
146 cl::Hidden, cl::init(true));
147
148 static cl::opt<bool> ClGenerateTagsWithCalls(
149 "hwasan-generate-tags-with-calls",
150 cl::desc("generate new tags with runtime library calls"), cl::Hidden,
151 cl::init(false));
152
153 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
154 cl::Hidden, cl::init(false));
155
156 static cl::opt<int> ClMatchAllTag(
157 "hwasan-match-all-tag",
158 cl::desc("don't report bad accesses via pointers with this tag"),
159 cl::Hidden, cl::init(-1));
160
161 static cl::opt<bool>
162 ClEnableKhwasan("hwasan-kernel",
163 cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
164 cl::Hidden, cl::init(false));
165
166 // These flags allow to change the shadow mapping and control how shadow memory
167 // is accessed. The shadow mapping looks like:
168 // Shadow = (Mem >> scale) + offset
169
170 static cl::opt<uint64_t>
171 ClMappingOffset("hwasan-mapping-offset",
172 cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
173 cl::Hidden, cl::init(0));
174
175 static cl::opt<bool>
176 ClWithIfunc("hwasan-with-ifunc",
177 cl::desc("Access dynamic shadow through an ifunc global on "
178 "platforms that support this"),
179 cl::Hidden, cl::init(false));
180
181 static cl::opt<bool> ClWithTls(
182 "hwasan-with-tls",
183 cl::desc("Access dynamic shadow through an thread-local pointer on "
184 "platforms that support this"),
185 cl::Hidden, cl::init(true));
186
187 static cl::opt<int> ClHotPercentileCutoff("hwasan-percentile-cutoff-hot",
188 cl::desc("Hot percentile cuttoff."));
189
190 static cl::opt<float>
191 ClRandomSkipRate("hwasan-random-rate",
192 cl::desc("Probability value in the range [0.0, 1.0] "
193 "to keep instrumentation of a function."));
194
195 STATISTIC(NumTotalFuncs, "Number of total funcs");
196 STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs");
197 STATISTIC(NumNoProfileSummaryFuncs, "Number of funcs without PS");
198
199 // Mode for selecting how to insert frame record info into the stack ring
200 // buffer.
201 enum RecordStackHistoryMode {
202 // Do not record frame record info.
203 none,
204
205 // Insert instructions into the prologue for storing into the stack ring
206 // buffer directly.
207 instr,
208
209 // Add a call to __hwasan_add_frame_record in the runtime.
210 libcall,
211 };
212
213 static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
214 "hwasan-record-stack-history",
215 cl::desc("Record stack frames with tagged allocations in a thread-local "
216 "ring buffer"),
217 cl::values(clEnumVal(none, "Do not record stack ring history"),
218 clEnumVal(instr, "Insert instructions into the prologue for "
219 "storing into the stack ring buffer directly"),
220 clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
221 "storing into the stack ring buffer")),
222 cl::Hidden, cl::init(instr));
223
224 static cl::opt<bool>
225 ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
226 cl::desc("instrument memory intrinsics"),
227 cl::Hidden, cl::init(true));
228
229 static cl::opt<bool>
230 ClInstrumentLandingPads("hwasan-instrument-landing-pads",
231 cl::desc("instrument landing pads"), cl::Hidden,
232 cl::init(false));
233
234 static cl::opt<bool> ClUseShortGranules(
235 "hwasan-use-short-granules",
236 cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
237 cl::init(false));
238
239 static cl::opt<bool> ClInstrumentPersonalityFunctions(
240 "hwasan-instrument-personality-functions",
241 cl::desc("instrument personality functions"), cl::Hidden);
242
243 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
244 cl::desc("inline all checks"),
245 cl::Hidden, cl::init(false));
246
247 static cl::opt<bool> ClInlineFastPathChecks("hwasan-inline-fast-path-checks",
248 cl::desc("inline all checks"),
249 cl::Hidden, cl::init(false));
250
251 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
252 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
253 cl::desc("Use page aliasing in HWASan"),
254 cl::Hidden, cl::init(false));
255
256 namespace {
257
optOr(cl::opt<T> & Opt,T Other)258 template <typename T> T optOr(cl::opt<T> &Opt, T Other) {
259 return Opt.getNumOccurrences() ? Opt : Other;
260 }
261
shouldUsePageAliases(const Triple & TargetTriple)262 bool shouldUsePageAliases(const Triple &TargetTriple) {
263 return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
264 }
265
shouldInstrumentStack(const Triple & TargetTriple)266 bool shouldInstrumentStack(const Triple &TargetTriple) {
267 return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
268 }
269
shouldInstrumentWithCalls(const Triple & TargetTriple)270 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
271 return optOr(ClInstrumentWithCalls, TargetTriple.getArch() == Triple::x86_64);
272 }
273
mightUseStackSafetyAnalysis(bool DisableOptimization)274 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
275 return optOr(ClUseStackSafety, !DisableOptimization);
276 }
277
shouldUseStackSafetyAnalysis(const Triple & TargetTriple,bool DisableOptimization)278 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
279 bool DisableOptimization) {
280 return shouldInstrumentStack(TargetTriple) &&
281 mightUseStackSafetyAnalysis(DisableOptimization);
282 }
283
shouldDetectUseAfterScope(const Triple & TargetTriple)284 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
285 return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
286 }
287
288 /// An instrumentation pass implementing detection of addressability bugs
289 /// using tagged pointers.
290 class HWAddressSanitizer {
291 public:
HWAddressSanitizer(Module & M,bool CompileKernel,bool Recover,const StackSafetyGlobalInfo * SSI)292 HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
293 const StackSafetyGlobalInfo *SSI)
294 : M(M), SSI(SSI) {
295 this->Recover = optOr(ClRecover, Recover);
296 this->CompileKernel = optOr(ClEnableKhwasan, CompileKernel);
297 this->Rng = ClRandomSkipRate.getNumOccurrences() ? M.createRNG(DEBUG_TYPE)
298 : nullptr;
299
300 initializeModule();
301 }
302
303 void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
304
305 private:
306 struct ShadowTagCheckInfo {
307 Instruction *TagMismatchTerm = nullptr;
308 Value *PtrLong = nullptr;
309 Value *AddrLong = nullptr;
310 Value *PtrTag = nullptr;
311 Value *MemTag = nullptr;
312 };
313
314 bool selectiveInstrumentationShouldSkip(Function &F,
315 FunctionAnalysisManager &FAM) const;
316 void initializeModule();
317 void createHwasanCtorComdat();
318
319 void initializeCallbacks(Module &M);
320
321 Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
322
323 Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
324 Value *getShadowNonTls(IRBuilder<> &IRB);
325
326 void untagPointerOperand(Instruction *I, Value *Addr);
327 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
328
329 int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
330 ShadowTagCheckInfo insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
331 DomTreeUpdater &DTU, LoopInfo *LI);
332 void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
333 unsigned AccessSizeIndex,
334 Instruction *InsertBefore,
335 DomTreeUpdater &DTU, LoopInfo *LI);
336 void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
337 unsigned AccessSizeIndex,
338 Instruction *InsertBefore, DomTreeUpdater &DTU,
339 LoopInfo *LI);
340 bool ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, MemIntrinsic *MI);
341 void instrumentMemIntrinsic(MemIntrinsic *MI);
342 bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
343 LoopInfo *LI);
344 bool ignoreAccessWithoutRemark(Instruction *Inst, Value *Ptr);
345 bool ignoreAccess(OptimizationRemarkEmitter &ORE, Instruction *Inst,
346 Value *Ptr);
347
348 void getInterestingMemoryOperands(
349 OptimizationRemarkEmitter &ORE, Instruction *I,
350 const TargetLibraryInfo &TLI,
351 SmallVectorImpl<InterestingMemoryOperand> &Interesting);
352
353 void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
354 Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
355 Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
356 bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
357 const DominatorTree &DT, const PostDominatorTree &PDT,
358 const LoopInfo &LI);
359 bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
360 Value *getNextTagWithCall(IRBuilder<> &IRB);
361 Value *getStackBaseTag(IRBuilder<> &IRB);
362 Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo);
363 Value *getUARTag(IRBuilder<> &IRB);
364
365 Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB);
366 Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
367 unsigned retagMask(unsigned AllocaNo);
368
369 void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
370
371 void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
372 void instrumentGlobals();
373
374 Value *getCachedFP(IRBuilder<> &IRB);
375 Value *getFrameRecordInfo(IRBuilder<> &IRB);
376
377 void instrumentPersonalityFunctions();
378
379 LLVMContext *C;
380 Module &M;
381 const StackSafetyGlobalInfo *SSI;
382 Triple TargetTriple;
383 std::unique_ptr<RandomNumberGenerator> Rng;
384
385 /// This struct defines the shadow mapping using the rule:
386 /// shadow = (mem >> Scale) + Offset.
387 /// If InGlobal is true, then
388 /// extern char __hwasan_shadow[];
389 /// shadow = (mem >> Scale) + &__hwasan_shadow
390 /// If InTls is true, then
391 /// extern char *__hwasan_tls;
392 /// shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
393 ///
394 /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
395 /// ring buffer for storing stack allocations on targets that support it.
396 struct ShadowMapping {
397 uint8_t Scale;
398 uint64_t Offset;
399 bool InGlobal;
400 bool InTls;
401 bool WithFrameRecord;
402
403 void init(Triple &TargetTriple, bool InstrumentWithCalls);
getObjectAlignment__anon1b7bdbef0111::HWAddressSanitizer::ShadowMapping404 Align getObjectAlignment() const { return Align(1ULL << Scale); }
405 };
406
407 ShadowMapping Mapping;
408
409 Type *VoidTy = Type::getVoidTy(M.getContext());
410 Type *IntptrTy = M.getDataLayout().getIntPtrType(M.getContext());
411 PointerType *PtrTy = PointerType::getUnqual(M.getContext());
412 Type *Int8Ty = Type::getInt8Ty(M.getContext());
413 Type *Int32Ty = Type::getInt32Ty(M.getContext());
414 Type *Int64Ty = Type::getInt64Ty(M.getContext());
415
416 bool CompileKernel;
417 bool Recover;
418 bool OutlinedChecks;
419 bool InlineFastPath;
420 bool UseShortGranules;
421 bool InstrumentLandingPads;
422 bool InstrumentWithCalls;
423 bool InstrumentStack;
424 bool InstrumentGlobals;
425 bool DetectUseAfterScope;
426 bool UsePageAliases;
427 bool UseMatchAllCallback;
428
429 std::optional<uint8_t> MatchAllTag;
430
431 unsigned PointerTagShift;
432 uint64_t TagMaskByte;
433
434 Function *HwasanCtorFunction;
435
436 FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
437 FunctionCallee HwasanMemoryAccessCallbackSized[2];
438
439 FunctionCallee HwasanMemmove, HwasanMemcpy, HwasanMemset;
440 FunctionCallee HwasanHandleVfork;
441
442 FunctionCallee HwasanTagMemoryFunc;
443 FunctionCallee HwasanGenerateTagFunc;
444 FunctionCallee HwasanRecordFrameRecordFunc;
445
446 Constant *ShadowGlobal;
447
448 Value *ShadowBase = nullptr;
449 Value *StackBaseTag = nullptr;
450 Value *CachedFP = nullptr;
451 GlobalValue *ThreadPtrGlobal = nullptr;
452 };
453
454 } // end anonymous namespace
455
run(Module & M,ModuleAnalysisManager & MAM)456 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
457 ModuleAnalysisManager &MAM) {
458 const StackSafetyGlobalInfo *SSI = nullptr;
459 auto TargetTriple = llvm::Triple(M.getTargetTriple());
460 if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
461 SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
462
463 HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
464 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
465 for (Function &F : M)
466 HWASan.sanitizeFunction(F, FAM);
467
468 PreservedAnalyses PA = PreservedAnalyses::none();
469 // DominatorTreeAnalysis, PostDominatorTreeAnalysis, and LoopAnalysis
470 // are incrementally updated throughout this pass whenever
471 // SplitBlockAndInsertIfThen is called.
472 PA.preserve<DominatorTreeAnalysis>();
473 PA.preserve<PostDominatorTreeAnalysis>();
474 PA.preserve<LoopAnalysis>();
475 // GlobalsAA is considered stateless and does not get invalidated unless
476 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
477 // make changes that require GlobalsAA to be invalidated.
478 PA.abandon<GlobalsAA>();
479 return PA;
480 }
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)481 void HWAddressSanitizerPass::printPipeline(
482 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
483 static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
484 OS, MapClassName2PassName);
485 OS << '<';
486 if (Options.CompileKernel)
487 OS << "kernel;";
488 if (Options.Recover)
489 OS << "recover";
490 OS << '>';
491 }
492
createHwasanCtorComdat()493 void HWAddressSanitizer::createHwasanCtorComdat() {
494 std::tie(HwasanCtorFunction, std::ignore) =
495 getOrCreateSanitizerCtorAndInitFunctions(
496 M, kHwasanModuleCtorName, kHwasanInitName,
497 /*InitArgTypes=*/{},
498 /*InitArgs=*/{},
499 // This callback is invoked when the functions are created the first
500 // time. Hook them into the global ctors list in that case:
501 [&](Function *Ctor, FunctionCallee) {
502 Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
503 Ctor->setComdat(CtorComdat);
504 appendToGlobalCtors(M, Ctor, 0, Ctor);
505 });
506
507 // Create a note that contains pointers to the list of global
508 // descriptors. Adding a note to the output file will cause the linker to
509 // create a PT_NOTE program header pointing to the note that we can use to
510 // find the descriptor list starting from the program headers. A function
511 // provided by the runtime initializes the shadow memory for the globals by
512 // accessing the descriptor list via the note. The dynamic loader needs to
513 // call this function whenever a library is loaded.
514 //
515 // The reason why we use a note for this instead of a more conventional
516 // approach of having a global constructor pass a descriptor list pointer to
517 // the runtime is because of an order of initialization problem. With
518 // constructors we can encounter the following problematic scenario:
519 //
520 // 1) library A depends on library B and also interposes one of B's symbols
521 // 2) B's constructors are called before A's (as required for correctness)
522 // 3) during construction, B accesses one of its "own" globals (actually
523 // interposed by A) and triggers a HWASAN failure due to the initialization
524 // for A not having happened yet
525 //
526 // Even without interposition it is possible to run into similar situations in
527 // cases where two libraries mutually depend on each other.
528 //
529 // We only need one note per binary, so put everything for the note in a
530 // comdat. This needs to be a comdat with an .init_array section to prevent
531 // newer versions of lld from discarding the note.
532 //
533 // Create the note even if we aren't instrumenting globals. This ensures that
534 // binaries linked from object files with both instrumented and
535 // non-instrumented globals will end up with a note, even if a comdat from an
536 // object file with non-instrumented globals is selected. The note is harmless
537 // if the runtime doesn't support it, since it will just be ignored.
538 Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
539
540 Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
541 auto *Start =
542 new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
543 nullptr, "__start_hwasan_globals");
544 Start->setVisibility(GlobalValue::HiddenVisibility);
545 auto *Stop =
546 new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
547 nullptr, "__stop_hwasan_globals");
548 Stop->setVisibility(GlobalValue::HiddenVisibility);
549
550 // Null-terminated so actually 8 bytes, which are required in order to align
551 // the note properly.
552 auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
553
554 auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
555 Int32Ty, Int32Ty);
556 auto *Note =
557 new GlobalVariable(M, NoteTy, /*isConstant=*/true,
558 GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
559 Note->setSection(".note.hwasan.globals");
560 Note->setComdat(NoteComdat);
561 Note->setAlignment(Align(4));
562
563 // The pointers in the note need to be relative so that the note ends up being
564 // placed in rodata, which is the standard location for notes.
565 auto CreateRelPtr = [&](Constant *Ptr) {
566 return ConstantExpr::getTrunc(
567 ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
568 ConstantExpr::getPtrToInt(Note, Int64Ty)),
569 Int32Ty);
570 };
571 Note->setInitializer(ConstantStruct::getAnon(
572 {ConstantInt::get(Int32Ty, 8), // n_namesz
573 ConstantInt::get(Int32Ty, 8), // n_descsz
574 ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
575 Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
576 appendToCompilerUsed(M, Note);
577
578 // Create a zero-length global in hwasan_globals so that the linker will
579 // always create start and stop symbols.
580 auto *Dummy = new GlobalVariable(
581 M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
582 Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
583 Dummy->setSection("hwasan_globals");
584 Dummy->setComdat(NoteComdat);
585 Dummy->setMetadata(LLVMContext::MD_associated,
586 MDNode::get(*C, ValueAsMetadata::get(Note)));
587 appendToCompilerUsed(M, Dummy);
588 }
589
590 /// Module-level initialization.
591 ///
592 /// inserts a call to __hwasan_init to the module's constructor list.
initializeModule()593 void HWAddressSanitizer::initializeModule() {
594 LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
595 TargetTriple = Triple(M.getTargetTriple());
596
597 // x86_64 currently has two modes:
598 // - Intel LAM (default)
599 // - pointer aliasing (heap only)
600 bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
601 UsePageAliases = shouldUsePageAliases(TargetTriple);
602 InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
603 InstrumentStack = shouldInstrumentStack(TargetTriple);
604 DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
605 PointerTagShift = IsX86_64 ? 57 : 56;
606 TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
607
608 Mapping.init(TargetTriple, InstrumentWithCalls);
609
610 C = &(M.getContext());
611 IRBuilder<> IRB(*C);
612
613 HwasanCtorFunction = nullptr;
614
615 // Older versions of Android do not have the required runtime support for
616 // short granules, global or personality function instrumentation. On other
617 // platforms we currently require using the latest version of the runtime.
618 bool NewRuntime =
619 !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
620
621 UseShortGranules = optOr(ClUseShortGranules, NewRuntime);
622 OutlinedChecks = (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
623 TargetTriple.isOSBinFormatELF() &&
624 !optOr(ClInlineAllChecks, Recover);
625
626 // These platforms may prefer less inlining to reduce binary size.
627 InlineFastPath = optOr(ClInlineFastPathChecks, !(TargetTriple.isAndroid() ||
628 TargetTriple.isOSFuchsia()));
629
630 if (ClMatchAllTag.getNumOccurrences()) {
631 if (ClMatchAllTag != -1) {
632 MatchAllTag = ClMatchAllTag & 0xFF;
633 }
634 } else if (CompileKernel) {
635 MatchAllTag = 0xFF;
636 }
637 UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value();
638
639 // If we don't have personality function support, fall back to landing pads.
640 InstrumentLandingPads = optOr(ClInstrumentLandingPads, !NewRuntime);
641
642 InstrumentGlobals =
643 !CompileKernel && !UsePageAliases && optOr(ClGlobals, NewRuntime);
644
645 if (!CompileKernel) {
646 createHwasanCtorComdat();
647
648 if (InstrumentGlobals)
649 instrumentGlobals();
650
651 bool InstrumentPersonalityFunctions =
652 optOr(ClInstrumentPersonalityFunctions, NewRuntime);
653 if (InstrumentPersonalityFunctions)
654 instrumentPersonalityFunctions();
655 }
656
657 if (!TargetTriple.isAndroid()) {
658 Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
659 auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
660 GlobalValue::ExternalLinkage, nullptr,
661 "__hwasan_tls", nullptr,
662 GlobalVariable::InitialExecTLSModel);
663 appendToCompilerUsed(M, GV);
664 return GV;
665 });
666 ThreadPtrGlobal = cast<GlobalVariable>(C);
667 }
668 }
669
initializeCallbacks(Module & M)670 void HWAddressSanitizer::initializeCallbacks(Module &M) {
671 IRBuilder<> IRB(*C);
672 const std::string MatchAllStr = UseMatchAllCallback ? "_match_all" : "";
673 FunctionType *HwasanMemoryAccessCallbackSizedFnTy,
674 *HwasanMemoryAccessCallbackFnTy, *HwasanMemTransferFnTy,
675 *HwasanMemsetFnTy;
676 if (UseMatchAllCallback) {
677 HwasanMemoryAccessCallbackSizedFnTy =
678 FunctionType::get(VoidTy, {IntptrTy, IntptrTy, Int8Ty}, false);
679 HwasanMemoryAccessCallbackFnTy =
680 FunctionType::get(VoidTy, {IntptrTy, Int8Ty}, false);
681 HwasanMemTransferFnTy =
682 FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy, Int8Ty}, false);
683 HwasanMemsetFnTy =
684 FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy, Int8Ty}, false);
685 } else {
686 HwasanMemoryAccessCallbackSizedFnTy =
687 FunctionType::get(VoidTy, {IntptrTy, IntptrTy}, false);
688 HwasanMemoryAccessCallbackFnTy =
689 FunctionType::get(VoidTy, {IntptrTy}, false);
690 HwasanMemTransferFnTy =
691 FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy}, false);
692 HwasanMemsetFnTy =
693 FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy}, false);
694 }
695
696 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
697 const std::string TypeStr = AccessIsWrite ? "store" : "load";
698 const std::string EndingStr = Recover ? "_noabort" : "";
699
700 HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
701 ClMemoryAccessCallbackPrefix + TypeStr + "N" + MatchAllStr + EndingStr,
702 HwasanMemoryAccessCallbackSizedFnTy);
703
704 for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
705 AccessSizeIndex++) {
706 HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
707 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr +
708 itostr(1ULL << AccessSizeIndex) +
709 MatchAllStr + EndingStr,
710 HwasanMemoryAccessCallbackFnTy);
711 }
712 }
713
714 const std::string MemIntrinCallbackPrefix =
715 (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
716 ? std::string("")
717 : ClMemoryAccessCallbackPrefix;
718
719 HwasanMemmove = M.getOrInsertFunction(
720 MemIntrinCallbackPrefix + "memmove" + MatchAllStr, HwasanMemTransferFnTy);
721 HwasanMemcpy = M.getOrInsertFunction(
722 MemIntrinCallbackPrefix + "memcpy" + MatchAllStr, HwasanMemTransferFnTy);
723 HwasanMemset = M.getOrInsertFunction(
724 MemIntrinCallbackPrefix + "memset" + MatchAllStr, HwasanMemsetFnTy);
725
726 HwasanTagMemoryFunc = M.getOrInsertFunction("__hwasan_tag_memory", VoidTy,
727 PtrTy, Int8Ty, IntptrTy);
728 HwasanGenerateTagFunc =
729 M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
730
731 HwasanRecordFrameRecordFunc =
732 M.getOrInsertFunction("__hwasan_add_frame_record", VoidTy, Int64Ty);
733
734 ShadowGlobal =
735 M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(Int8Ty, 0));
736
737 HwasanHandleVfork =
738 M.getOrInsertFunction("__hwasan_handle_vfork", VoidTy, IntptrTy);
739 }
740
getOpaqueNoopCast(IRBuilder<> & IRB,Value * Val)741 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
742 // An empty inline asm with input reg == output reg.
743 // An opaque no-op cast, basically.
744 // This prevents code bloat as a result of rematerializing trivial definitions
745 // such as constants or global addresses at every load and store.
746 InlineAsm *Asm =
747 InlineAsm::get(FunctionType::get(PtrTy, {Val->getType()}, false),
748 StringRef(""), StringRef("=r,0"),
749 /*hasSideEffects=*/false);
750 return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
751 }
752
getDynamicShadowIfunc(IRBuilder<> & IRB)753 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
754 return getOpaqueNoopCast(IRB, ShadowGlobal);
755 }
756
getShadowNonTls(IRBuilder<> & IRB)757 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
758 if (Mapping.Offset != kDynamicShadowSentinel)
759 return getOpaqueNoopCast(
760 IRB, ConstantExpr::getIntToPtr(
761 ConstantInt::get(IntptrTy, Mapping.Offset), PtrTy));
762
763 if (Mapping.InGlobal)
764 return getDynamicShadowIfunc(IRB);
765
766 Value *GlobalDynamicAddress =
767 IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
768 kHwasanShadowMemoryDynamicAddress, PtrTy);
769 return IRB.CreateLoad(PtrTy, GlobalDynamicAddress);
770 }
771
ignoreAccessWithoutRemark(Instruction * Inst,Value * Ptr)772 bool HWAddressSanitizer::ignoreAccessWithoutRemark(Instruction *Inst,
773 Value *Ptr) {
774 // Do not instrument accesses from different address spaces; we cannot deal
775 // with them.
776 Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
777 if (PtrTy->getPointerAddressSpace() != 0)
778 return true;
779
780 // Ignore swifterror addresses.
781 // swifterror memory addresses are mem2reg promoted by instruction
782 // selection. As such they cannot have regular uses like an instrumentation
783 // function and it makes no sense to track them as memory.
784 if (Ptr->isSwiftError())
785 return true;
786
787 if (findAllocaForValue(Ptr)) {
788 if (!InstrumentStack)
789 return true;
790 if (SSI && SSI->stackAccessIsSafe(*Inst))
791 return true;
792 }
793
794 if (isa<GlobalVariable>(getUnderlyingObject(Ptr))) {
795 if (!InstrumentGlobals)
796 return true;
797 // TODO: Optimize inbound global accesses, like Asan `instrumentMop`.
798 }
799
800 return false;
801 }
802
ignoreAccess(OptimizationRemarkEmitter & ORE,Instruction * Inst,Value * Ptr)803 bool HWAddressSanitizer::ignoreAccess(OptimizationRemarkEmitter &ORE,
804 Instruction *Inst, Value *Ptr) {
805 bool Ignored = ignoreAccessWithoutRemark(Inst, Ptr);
806 if (Ignored) {
807 ORE.emit(
808 [&]() { return OptimizationRemark(DEBUG_TYPE, "ignoreAccess", Inst); });
809 } else {
810 ORE.emit([&]() {
811 return OptimizationRemarkMissed(DEBUG_TYPE, "ignoreAccess", Inst);
812 });
813 }
814 return Ignored;
815 }
816
getInterestingMemoryOperands(OptimizationRemarkEmitter & ORE,Instruction * I,const TargetLibraryInfo & TLI,SmallVectorImpl<InterestingMemoryOperand> & Interesting)817 void HWAddressSanitizer::getInterestingMemoryOperands(
818 OptimizationRemarkEmitter &ORE, Instruction *I,
819 const TargetLibraryInfo &TLI,
820 SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
821 // Skip memory accesses inserted by another instrumentation.
822 if (I->hasMetadata(LLVMContext::MD_nosanitize))
823 return;
824
825 // Do not instrument the load fetching the dynamic shadow address.
826 if (ShadowBase == I)
827 return;
828
829 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
830 if (!ClInstrumentReads || ignoreAccess(ORE, I, LI->getPointerOperand()))
831 return;
832 Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
833 LI->getType(), LI->getAlign());
834 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
835 if (!ClInstrumentWrites || ignoreAccess(ORE, I, SI->getPointerOperand()))
836 return;
837 Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
838 SI->getValueOperand()->getType(), SI->getAlign());
839 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
840 if (!ClInstrumentAtomics || ignoreAccess(ORE, I, RMW->getPointerOperand()))
841 return;
842 Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
843 RMW->getValOperand()->getType(), std::nullopt);
844 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
845 if (!ClInstrumentAtomics || ignoreAccess(ORE, I, XCHG->getPointerOperand()))
846 return;
847 Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
848 XCHG->getCompareOperand()->getType(),
849 std::nullopt);
850 } else if (auto *CI = dyn_cast<CallInst>(I)) {
851 for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
852 if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
853 ignoreAccess(ORE, I, CI->getArgOperand(ArgNo)))
854 continue;
855 Type *Ty = CI->getParamByValType(ArgNo);
856 Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
857 }
858 maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
859 }
860 }
861
getPointerOperandIndex(Instruction * I)862 static unsigned getPointerOperandIndex(Instruction *I) {
863 if (LoadInst *LI = dyn_cast<LoadInst>(I))
864 return LI->getPointerOperandIndex();
865 if (StoreInst *SI = dyn_cast<StoreInst>(I))
866 return SI->getPointerOperandIndex();
867 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
868 return RMW->getPointerOperandIndex();
869 if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
870 return XCHG->getPointerOperandIndex();
871 report_fatal_error("Unexpected instruction");
872 return -1;
873 }
874
TypeSizeToSizeIndex(uint32_t TypeSize)875 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
876 size_t Res = llvm::countr_zero(TypeSize / 8);
877 assert(Res < kNumberOfAccessSizes);
878 return Res;
879 }
880
untagPointerOperand(Instruction * I,Value * Addr)881 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
882 if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64 ||
883 TargetTriple.isRISCV64())
884 return;
885
886 IRBuilder<> IRB(I);
887 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
888 Value *UntaggedPtr =
889 IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
890 I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
891 }
892
memToShadow(Value * Mem,IRBuilder<> & IRB)893 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
894 // Mem >> Scale
895 Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
896 if (Mapping.Offset == 0)
897 return IRB.CreateIntToPtr(Shadow, PtrTy);
898 // (Mem >> Scale) + Offset
899 return IRB.CreatePtrAdd(ShadowBase, Shadow);
900 }
901
getAccessInfo(bool IsWrite,unsigned AccessSizeIndex)902 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
903 unsigned AccessSizeIndex) {
904 return (CompileKernel << HWASanAccessInfo::CompileKernelShift) |
905 (MatchAllTag.has_value() << HWASanAccessInfo::HasMatchAllShift) |
906 (MatchAllTag.value_or(0) << HWASanAccessInfo::MatchAllShift) |
907 (Recover << HWASanAccessInfo::RecoverShift) |
908 (IsWrite << HWASanAccessInfo::IsWriteShift) |
909 (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
910 }
911
912 HWAddressSanitizer::ShadowTagCheckInfo
insertShadowTagCheck(Value * Ptr,Instruction * InsertBefore,DomTreeUpdater & DTU,LoopInfo * LI)913 HWAddressSanitizer::insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
914 DomTreeUpdater &DTU, LoopInfo *LI) {
915 ShadowTagCheckInfo R;
916
917 IRBuilder<> IRB(InsertBefore);
918
919 R.PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
920 R.PtrTag =
921 IRB.CreateTrunc(IRB.CreateLShr(R.PtrLong, PointerTagShift), Int8Ty);
922 R.AddrLong = untagPointer(IRB, R.PtrLong);
923 Value *Shadow = memToShadow(R.AddrLong, IRB);
924 R.MemTag = IRB.CreateLoad(Int8Ty, Shadow);
925 Value *TagMismatch = IRB.CreateICmpNE(R.PtrTag, R.MemTag);
926
927 if (MatchAllTag.has_value()) {
928 Value *TagNotIgnored = IRB.CreateICmpNE(
929 R.PtrTag, ConstantInt::get(R.PtrTag->getType(), *MatchAllTag));
930 TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
931 }
932
933 R.TagMismatchTerm = SplitBlockAndInsertIfThen(
934 TagMismatch, InsertBefore, false,
935 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI);
936
937 return R;
938 }
939
instrumentMemAccessOutline(Value * Ptr,bool IsWrite,unsigned AccessSizeIndex,Instruction * InsertBefore,DomTreeUpdater & DTU,LoopInfo * LI)940 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
941 unsigned AccessSizeIndex,
942 Instruction *InsertBefore,
943 DomTreeUpdater &DTU,
944 LoopInfo *LI) {
945 assert(!UsePageAliases);
946 const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
947
948 if (InlineFastPath)
949 InsertBefore =
950 insertShadowTagCheck(Ptr, InsertBefore, DTU, LI).TagMismatchTerm;
951
952 IRBuilder<> IRB(InsertBefore);
953 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
954 bool useFixedShadowIntrinsic = false;
955 // The memaccess fixed shadow intrinsic is only supported on AArch64,
956 // which allows a 16-bit immediate to be left-shifted by 32.
957 // Since kShadowBaseAlignment == 32, and Linux by default will not
958 // mmap above 48-bits, practically any valid shadow offset is
959 // representable.
960 // In particular, an offset of 4TB (1024 << 32) is representable, and
961 // ought to be good enough for anybody.
962 if (TargetTriple.isAArch64() && Mapping.Offset != kDynamicShadowSentinel) {
963 uint16_t offset_shifted = Mapping.Offset >> 32;
964 useFixedShadowIntrinsic = (uint64_t)offset_shifted << 32 == Mapping.Offset;
965 }
966
967 if (useFixedShadowIntrinsic)
968 IRB.CreateCall(
969 Intrinsic::getDeclaration(
970 M, UseShortGranules
971 ? Intrinsic::hwasan_check_memaccess_shortgranules_fixedshadow
972 : Intrinsic::hwasan_check_memaccess_fixedshadow),
973 {Ptr, ConstantInt::get(Int32Ty, AccessInfo),
974 ConstantInt::get(Int64Ty, Mapping.Offset)});
975 else
976 IRB.CreateCall(Intrinsic::getDeclaration(
977 M, UseShortGranules
978 ? Intrinsic::hwasan_check_memaccess_shortgranules
979 : Intrinsic::hwasan_check_memaccess),
980 {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
981 }
982
instrumentMemAccessInline(Value * Ptr,bool IsWrite,unsigned AccessSizeIndex,Instruction * InsertBefore,DomTreeUpdater & DTU,LoopInfo * LI)983 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
984 unsigned AccessSizeIndex,
985 Instruction *InsertBefore,
986 DomTreeUpdater &DTU,
987 LoopInfo *LI) {
988 assert(!UsePageAliases);
989 const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
990
991 ShadowTagCheckInfo TCI = insertShadowTagCheck(Ptr, InsertBefore, DTU, LI);
992
993 IRBuilder<> IRB(TCI.TagMismatchTerm);
994 Value *OutOfShortGranuleTagRange =
995 IRB.CreateICmpUGT(TCI.MemTag, ConstantInt::get(Int8Ty, 15));
996 Instruction *CheckFailTerm = SplitBlockAndInsertIfThen(
997 OutOfShortGranuleTagRange, TCI.TagMismatchTerm, !Recover,
998 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI);
999
1000 IRB.SetInsertPoint(TCI.TagMismatchTerm);
1001 Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(TCI.PtrLong, 15), Int8Ty);
1002 PtrLowBits = IRB.CreateAdd(
1003 PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
1004 Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, TCI.MemTag);
1005 SplitBlockAndInsertIfThen(PtrLowBitsOOB, TCI.TagMismatchTerm, false,
1006 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU,
1007 LI, CheckFailTerm->getParent());
1008
1009 IRB.SetInsertPoint(TCI.TagMismatchTerm);
1010 Value *InlineTagAddr = IRB.CreateOr(TCI.AddrLong, 15);
1011 InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, PtrTy);
1012 Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
1013 Value *InlineTagMismatch = IRB.CreateICmpNE(TCI.PtrTag, InlineTag);
1014 SplitBlockAndInsertIfThen(InlineTagMismatch, TCI.TagMismatchTerm, false,
1015 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU,
1016 LI, CheckFailTerm->getParent());
1017
1018 IRB.SetInsertPoint(CheckFailTerm);
1019 InlineAsm *Asm;
1020 switch (TargetTriple.getArch()) {
1021 case Triple::x86_64:
1022 // The signal handler will find the data address in rdi.
1023 Asm = InlineAsm::get(
1024 FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1025 "int3\nnopl " +
1026 itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
1027 "(%rax)",
1028 "{rdi}",
1029 /*hasSideEffects=*/true);
1030 break;
1031 case Triple::aarch64:
1032 case Triple::aarch64_be:
1033 // The signal handler will find the data address in x0.
1034 Asm = InlineAsm::get(
1035 FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1036 "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1037 "{x0}",
1038 /*hasSideEffects=*/true);
1039 break;
1040 case Triple::riscv64:
1041 // The signal handler will find the data address in x10.
1042 Asm = InlineAsm::get(
1043 FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1044 "ebreak\naddiw x0, x11, " +
1045 itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1046 "{x10}",
1047 /*hasSideEffects=*/true);
1048 break;
1049 default:
1050 report_fatal_error("unsupported architecture");
1051 }
1052 IRB.CreateCall(Asm, TCI.PtrLong);
1053 if (Recover)
1054 cast<BranchInst>(CheckFailTerm)
1055 ->setSuccessor(0, TCI.TagMismatchTerm->getParent());
1056 }
1057
ignoreMemIntrinsic(OptimizationRemarkEmitter & ORE,MemIntrinsic * MI)1058 bool HWAddressSanitizer::ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE,
1059 MemIntrinsic *MI) {
1060 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1061 return (!ClInstrumentWrites || ignoreAccess(ORE, MTI, MTI->getDest())) &&
1062 (!ClInstrumentReads || ignoreAccess(ORE, MTI, MTI->getSource()));
1063 }
1064 if (isa<MemSetInst>(MI))
1065 return !ClInstrumentWrites || ignoreAccess(ORE, MI, MI->getDest());
1066 return false;
1067 }
1068
instrumentMemIntrinsic(MemIntrinsic * MI)1069 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
1070 IRBuilder<> IRB(MI);
1071 if (isa<MemTransferInst>(MI)) {
1072 SmallVector<Value *, 4> Args{
1073 MI->getOperand(0), MI->getOperand(1),
1074 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1075
1076 if (UseMatchAllCallback)
1077 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1078 IRB.CreateCall(isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy, Args);
1079 } else if (isa<MemSetInst>(MI)) {
1080 SmallVector<Value *, 4> Args{
1081 MI->getOperand(0),
1082 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
1083 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1084 if (UseMatchAllCallback)
1085 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1086 IRB.CreateCall(HwasanMemset, Args);
1087 }
1088 MI->eraseFromParent();
1089 }
1090
instrumentMemAccess(InterestingMemoryOperand & O,DomTreeUpdater & DTU,LoopInfo * LI)1091 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O,
1092 DomTreeUpdater &DTU,
1093 LoopInfo *LI) {
1094 Value *Addr = O.getPtr();
1095
1096 LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
1097
1098 if (O.MaybeMask)
1099 return false; // FIXME
1100
1101 IRBuilder<> IRB(O.getInsn());
1102 if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) &&
1103 (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
1104 (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() ||
1105 *O.Alignment >= O.TypeStoreSize / 8)) {
1106 size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeStoreSize);
1107 if (InstrumentWithCalls) {
1108 SmallVector<Value *, 2> Args{IRB.CreatePointerCast(Addr, IntptrTy)};
1109 if (UseMatchAllCallback)
1110 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1111 IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1112 Args);
1113 } else if (OutlinedChecks) {
1114 instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1115 DTU, LI);
1116 } else {
1117 instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1118 DTU, LI);
1119 }
1120 } else {
1121 SmallVector<Value *, 3> Args{
1122 IRB.CreatePointerCast(Addr, IntptrTy),
1123 IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
1124 ConstantInt::get(IntptrTy, 8))};
1125 if (UseMatchAllCallback)
1126 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1127 IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], Args);
1128 }
1129 untagPointerOperand(O.getInsn(), Addr);
1130
1131 return true;
1132 }
1133
tagAlloca(IRBuilder<> & IRB,AllocaInst * AI,Value * Tag,size_t Size)1134 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
1135 size_t Size) {
1136 size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1137 if (!UseShortGranules)
1138 Size = AlignedSize;
1139
1140 Tag = IRB.CreateTrunc(Tag, Int8Ty);
1141 if (InstrumentWithCalls) {
1142 IRB.CreateCall(HwasanTagMemoryFunc,
1143 {IRB.CreatePointerCast(AI, PtrTy), Tag,
1144 ConstantInt::get(IntptrTy, AlignedSize)});
1145 } else {
1146 size_t ShadowSize = Size >> Mapping.Scale;
1147 Value *AddrLong = untagPointer(IRB, IRB.CreatePointerCast(AI, IntptrTy));
1148 Value *ShadowPtr = memToShadow(AddrLong, IRB);
1149 // If this memset is not inlined, it will be intercepted in the hwasan
1150 // runtime library. That's OK, because the interceptor skips the checks if
1151 // the address is in the shadow region.
1152 // FIXME: the interceptor is not as fast as real memset. Consider lowering
1153 // llvm.memset right here into either a sequence of stores, or a call to
1154 // hwasan_tag_memory.
1155 if (ShadowSize)
1156 IRB.CreateMemSet(ShadowPtr, Tag, ShadowSize, Align(1));
1157 if (Size != AlignedSize) {
1158 const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
1159 IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder),
1160 IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1161 IRB.CreateStore(
1162 Tag, IRB.CreateConstGEP1_32(Int8Ty, IRB.CreatePointerCast(AI, PtrTy),
1163 AlignedSize - 1));
1164 }
1165 }
1166 }
1167
retagMask(unsigned AllocaNo)1168 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1169 if (TargetTriple.getArch() == Triple::x86_64)
1170 return AllocaNo & TagMaskByte;
1171
1172 // A list of 8-bit numbers that have at most one run of non-zero bits.
1173 // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1174 // masks.
1175 // The list does not include the value 255, which is used for UAR.
1176 //
1177 // Because we are more likely to use earlier elements of this list than later
1178 // ones, it is sorted in increasing order of probability of collision with a
1179 // mask allocated (temporally) nearby. The program that generated this list
1180 // can be found at:
1181 // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1182 static const unsigned FastMasks[] = {
1183 0, 128, 64, 192, 32, 96, 224, 112, 240, 48, 16, 120,
1184 248, 56, 24, 8, 124, 252, 60, 28, 12, 4, 126, 254,
1185 62, 30, 14, 6, 2, 127, 63, 31, 15, 7, 3, 1};
1186 return FastMasks[AllocaNo % std::size(FastMasks)];
1187 }
1188
applyTagMask(IRBuilder<> & IRB,Value * OldTag)1189 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1190 if (TagMaskByte == 0xFF)
1191 return OldTag; // No need to clear the tag byte.
1192 return IRB.CreateAnd(OldTag,
1193 ConstantInt::get(OldTag->getType(), TagMaskByte));
1194 }
1195
getNextTagWithCall(IRBuilder<> & IRB)1196 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1197 return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1198 }
1199
getStackBaseTag(IRBuilder<> & IRB)1200 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1201 if (ClGenerateTagsWithCalls)
1202 return nullptr;
1203 if (StackBaseTag)
1204 return StackBaseTag;
1205 // Extract some entropy from the stack pointer for the tags.
1206 // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1207 // between functions).
1208 Value *FramePointerLong = getCachedFP(IRB);
1209 Value *StackTag =
1210 applyTagMask(IRB, IRB.CreateXor(FramePointerLong,
1211 IRB.CreateLShr(FramePointerLong, 20)));
1212 StackTag->setName("hwasan.stack.base.tag");
1213 return StackTag;
1214 }
1215
getAllocaTag(IRBuilder<> & IRB,Value * StackTag,unsigned AllocaNo)1216 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1217 unsigned AllocaNo) {
1218 if (ClGenerateTagsWithCalls)
1219 return getNextTagWithCall(IRB);
1220 return IRB.CreateXor(
1221 StackTag, ConstantInt::get(StackTag->getType(), retagMask(AllocaNo)));
1222 }
1223
getUARTag(IRBuilder<> & IRB)1224 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB) {
1225 Value *FramePointerLong = getCachedFP(IRB);
1226 Value *UARTag =
1227 applyTagMask(IRB, IRB.CreateLShr(FramePointerLong, PointerTagShift));
1228
1229 UARTag->setName("hwasan.uar.tag");
1230 return UARTag;
1231 }
1232
1233 // Add a tag to an address.
tagPointer(IRBuilder<> & IRB,Type * Ty,Value * PtrLong,Value * Tag)1234 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1235 Value *PtrLong, Value *Tag) {
1236 assert(!UsePageAliases);
1237 Value *TaggedPtrLong;
1238 if (CompileKernel) {
1239 // Kernel addresses have 0xFF in the most significant byte.
1240 Value *ShiftedTag =
1241 IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1242 ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1243 TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1244 } else {
1245 // Userspace can simply do OR (tag << PointerTagShift);
1246 Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1247 TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1248 }
1249 return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1250 }
1251
1252 // Remove tag from an address.
untagPointer(IRBuilder<> & IRB,Value * PtrLong)1253 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1254 assert(!UsePageAliases);
1255 Value *UntaggedPtrLong;
1256 if (CompileKernel) {
1257 // Kernel addresses have 0xFF in the most significant byte.
1258 UntaggedPtrLong =
1259 IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1260 TagMaskByte << PointerTagShift));
1261 } else {
1262 // Userspace addresses have 0x00.
1263 UntaggedPtrLong = IRB.CreateAnd(
1264 PtrLong, ConstantInt::get(PtrLong->getType(),
1265 ~(TagMaskByte << PointerTagShift)));
1266 }
1267 return UntaggedPtrLong;
1268 }
1269
getHwasanThreadSlotPtr(IRBuilder<> & IRB)1270 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB) {
1271 // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1272 // in Bionic's libc/platform/bionic/tls_defines.h.
1273 constexpr int SanitizerSlot = 6;
1274 if (TargetTriple.isAArch64() && TargetTriple.isAndroid())
1275 return memtag::getAndroidSlotPtr(IRB, SanitizerSlot);
1276 return ThreadPtrGlobal;
1277 }
1278
getCachedFP(IRBuilder<> & IRB)1279 Value *HWAddressSanitizer::getCachedFP(IRBuilder<> &IRB) {
1280 if (!CachedFP)
1281 CachedFP = memtag::getFP(IRB);
1282 return CachedFP;
1283 }
1284
getFrameRecordInfo(IRBuilder<> & IRB)1285 Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
1286 // Prepare ring buffer data.
1287 Value *PC = memtag::getPC(TargetTriple, IRB);
1288 Value *FP = getCachedFP(IRB);
1289
1290 // Mix FP and PC.
1291 // Assumptions:
1292 // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
1293 // FP is 0xfffffffffffFFFF0 (4 lower bits are zero)
1294 // We only really need ~20 lower non-zero bits (FFFF), so we mix like this:
1295 // 0xFFFFPPPPPPPPPPPP
1296 //
1297 // FP works because in AArch64FrameLowering::getFrameIndexReference, we
1298 // prefer FP-relative offsets for functions compiled with HWASan.
1299 FP = IRB.CreateShl(FP, 44);
1300 return IRB.CreateOr(PC, FP);
1301 }
1302
emitPrologue(IRBuilder<> & IRB,bool WithFrameRecord)1303 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1304 if (!Mapping.InTls)
1305 ShadowBase = getShadowNonTls(IRB);
1306 else if (!WithFrameRecord && TargetTriple.isAndroid())
1307 ShadowBase = getDynamicShadowIfunc(IRB);
1308
1309 if (!WithFrameRecord && ShadowBase)
1310 return;
1311
1312 Value *SlotPtr = nullptr;
1313 Value *ThreadLong = nullptr;
1314 Value *ThreadLongMaybeUntagged = nullptr;
1315
1316 auto getThreadLongMaybeUntagged = [&]() {
1317 if (!SlotPtr)
1318 SlotPtr = getHwasanThreadSlotPtr(IRB);
1319 if (!ThreadLong)
1320 ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1321 // Extract the address field from ThreadLong. Unnecessary on AArch64 with
1322 // TBI.
1323 return TargetTriple.isAArch64() ? ThreadLong
1324 : untagPointer(IRB, ThreadLong);
1325 };
1326
1327 if (WithFrameRecord) {
1328 switch (ClRecordStackHistory) {
1329 case libcall: {
1330 // Emit a runtime call into hwasan rather than emitting instructions for
1331 // recording stack history.
1332 Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1333 IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
1334 break;
1335 }
1336 case instr: {
1337 ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1338
1339 StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1340
1341 // Store data to ring buffer.
1342 Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1343 Value *RecordPtr =
1344 IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IRB.getPtrTy(0));
1345 IRB.CreateStore(FrameRecordInfo, RecordPtr);
1346
1347 // Update the ring buffer. Top byte of ThreadLong defines the size of the
1348 // buffer in pages, it must be a power of two, and the start of the buffer
1349 // must be aligned by twice that much. Therefore wrap around of the ring
1350 // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1351 // The use of AShr instead of LShr is due to
1352 // https://bugs.llvm.org/show_bug.cgi?id=39030
1353 // Runtime library makes sure not to use the highest bit.
1354 //
1355 // Mechanical proof of this address calculation can be found at:
1356 // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/prove_hwasanwrap.smt2
1357 //
1358 // Example of the wrap case for N = 1
1359 // Pointer: 0x01AAAAAAAAAAAFF8
1360 // +
1361 // 0x0000000000000008
1362 // =
1363 // 0x01AAAAAAAAAAB000
1364 // &
1365 // WrapMask: 0xFFFFFFFFFFFFF000
1366 // =
1367 // 0x01AAAAAAAAAAA000
1368 //
1369 // Then the WrapMask will be a no-op until the next wrap case.
1370 Value *WrapMask = IRB.CreateXor(
1371 IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1372 ConstantInt::get(IntptrTy, (uint64_t)-1));
1373 Value *ThreadLongNew = IRB.CreateAnd(
1374 IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1375 IRB.CreateStore(ThreadLongNew, SlotPtr);
1376 break;
1377 }
1378 case none: {
1379 llvm_unreachable(
1380 "A stack history recording mode should've been selected.");
1381 }
1382 }
1383 }
1384
1385 if (!ShadowBase) {
1386 if (!ThreadLongMaybeUntagged)
1387 ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1388
1389 // Get shadow base address by aligning RecordPtr up.
1390 // Note: this is not correct if the pointer is already aligned.
1391 // Runtime library will make sure this never happens.
1392 ShadowBase = IRB.CreateAdd(
1393 IRB.CreateOr(
1394 ThreadLongMaybeUntagged,
1395 ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1396 ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1397 ShadowBase = IRB.CreateIntToPtr(ShadowBase, PtrTy);
1398 }
1399 }
1400
instrumentLandingPads(SmallVectorImpl<Instruction * > & LandingPadVec)1401 bool HWAddressSanitizer::instrumentLandingPads(
1402 SmallVectorImpl<Instruction *> &LandingPadVec) {
1403 for (auto *LP : LandingPadVec) {
1404 IRBuilder<> IRB(LP->getNextNonDebugInstruction());
1405 IRB.CreateCall(
1406 HwasanHandleVfork,
1407 {memtag::readRegister(
1408 IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp" : "sp")});
1409 }
1410 return true;
1411 }
1412
instrumentStack(memtag::StackInfo & SInfo,Value * StackTag,Value * UARTag,const DominatorTree & DT,const PostDominatorTree & PDT,const LoopInfo & LI)1413 bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
1414 Value *StackTag, Value *UARTag,
1415 const DominatorTree &DT,
1416 const PostDominatorTree &PDT,
1417 const LoopInfo &LI) {
1418 // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1419 // alloca addresses using that. Unfortunately, offsets are not known yet
1420 // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1421 // temp, shift-OR it into each alloca address and xor with the retag mask.
1422 // This generates one extra instruction per alloca use.
1423 unsigned int I = 0;
1424
1425 for (auto &KV : SInfo.AllocasToInstrument) {
1426 auto N = I++;
1427 auto *AI = KV.first;
1428 memtag::AllocaInfo &Info = KV.second;
1429 IRBuilder<> IRB(AI->getNextNonDebugInstruction());
1430
1431 // Replace uses of the alloca with tagged address.
1432 Value *Tag = getAllocaTag(IRB, StackTag, N);
1433 Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1434 Value *AINoTagLong = untagPointer(IRB, AILong);
1435 Value *Replacement = tagPointer(IRB, AI->getType(), AINoTagLong, Tag);
1436 std::string Name =
1437 AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1438 Replacement->setName(Name + ".hwasan");
1439
1440 size_t Size = memtag::getAllocaSizeInBytes(*AI);
1441 size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1442
1443 Value *AICast = IRB.CreatePointerCast(AI, PtrTy);
1444
1445 auto HandleLifetime = [&](IntrinsicInst *II) {
1446 // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1447 // set of assumptions we need to make about the lifetime. Without this we
1448 // would need to ensure that we can track the lifetime pointer to a
1449 // constant offset from the alloca, and would still need to change the
1450 // size to include the extra alignment we use for the untagging to make
1451 // the size consistent.
1452 //
1453 // The check for standard lifetime below makes sure that we have exactly
1454 // one set of start / end in any execution (i.e. the ends are not
1455 // reachable from each other), so this will not cause any problems.
1456 II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
1457 II->setArgOperand(1, AICast);
1458 };
1459 llvm::for_each(Info.LifetimeStart, HandleLifetime);
1460 llvm::for_each(Info.LifetimeEnd, HandleLifetime);
1461
1462 AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
1463 auto *User = U.getUser();
1464 return User != AILong && User != AICast &&
1465 !memtag::isLifetimeIntrinsic(User);
1466 });
1467
1468 memtag::annotateDebugRecords(Info, retagMask(N));
1469
1470 auto TagEnd = [&](Instruction *Node) {
1471 IRB.SetInsertPoint(Node);
1472 // When untagging, use the `AlignedSize` because we need to set the tags
1473 // for the entire alloca to original. If we used `Size` here, we would
1474 // keep the last granule tagged, and store zero in the last byte of the
1475 // last granule, due to how short granules are implemented.
1476 tagAlloca(IRB, AI, UARTag, AlignedSize);
1477 };
1478 // Calls to functions that may return twice (e.g. setjmp) confuse the
1479 // postdominator analysis, and will leave us to keep memory tagged after
1480 // function return. Work around this by always untagging at every return
1481 // statement if return_twice functions are called.
1482 bool StandardLifetime =
1483 !SInfo.CallsReturnTwice &&
1484 SInfo.UnrecognizedLifetimes.empty() &&
1485 memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT,
1486 &LI, ClMaxLifetimes);
1487 if (DetectUseAfterScope && StandardLifetime) {
1488 IntrinsicInst *Start = Info.LifetimeStart[0];
1489 IRB.SetInsertPoint(Start->getNextNode());
1490 tagAlloca(IRB, AI, Tag, Size);
1491 if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd,
1492 SInfo.RetVec, TagEnd)) {
1493 for (auto *End : Info.LifetimeEnd)
1494 End->eraseFromParent();
1495 }
1496 } else {
1497 tagAlloca(IRB, AI, Tag, Size);
1498 for (auto *RI : SInfo.RetVec)
1499 TagEnd(RI);
1500 // We inserted tagging outside of the lifetimes, so we have to remove
1501 // them.
1502 for (auto &II : Info.LifetimeStart)
1503 II->eraseFromParent();
1504 for (auto &II : Info.LifetimeEnd)
1505 II->eraseFromParent();
1506 }
1507 memtag::alignAndPadAlloca(Info, Mapping.getObjectAlignment());
1508 }
1509 for (auto &I : SInfo.UnrecognizedLifetimes)
1510 I->eraseFromParent();
1511 return true;
1512 }
1513
emitRemark(const Function & F,OptimizationRemarkEmitter & ORE,bool Skip)1514 static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE,
1515 bool Skip) {
1516 if (Skip) {
1517 ORE.emit([&]() {
1518 return OptimizationRemark(DEBUG_TYPE, "Skip", &F)
1519 << "Skipped: F=" << ore::NV("Function", &F);
1520 });
1521 } else {
1522 ORE.emit([&]() {
1523 return OptimizationRemarkMissed(DEBUG_TYPE, "Sanitize", &F)
1524 << "Sanitized: F=" << ore::NV("Function", &F);
1525 });
1526 }
1527 }
1528
selectiveInstrumentationShouldSkip(Function & F,FunctionAnalysisManager & FAM) const1529 bool HWAddressSanitizer::selectiveInstrumentationShouldSkip(
1530 Function &F, FunctionAnalysisManager &FAM) const {
1531 bool Skip = [&]() {
1532 if (ClRandomSkipRate.getNumOccurrences()) {
1533 std::bernoulli_distribution D(ClRandomSkipRate);
1534 return !D(*Rng);
1535 }
1536 if (!ClHotPercentileCutoff.getNumOccurrences())
1537 return false;
1538 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1539 ProfileSummaryInfo *PSI =
1540 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1541 if (!PSI || !PSI->hasProfileSummary()) {
1542 ++NumNoProfileSummaryFuncs;
1543 return false;
1544 }
1545 return PSI->isFunctionHotInCallGraphNthPercentile(
1546 ClHotPercentileCutoff, &F, FAM.getResult<BlockFrequencyAnalysis>(F));
1547 }();
1548 emitRemark(F, FAM.getResult<OptimizationRemarkEmitterAnalysis>(F), Skip);
1549 return Skip;
1550 }
1551
sanitizeFunction(Function & F,FunctionAnalysisManager & FAM)1552 void HWAddressSanitizer::sanitizeFunction(Function &F,
1553 FunctionAnalysisManager &FAM) {
1554 if (&F == HwasanCtorFunction)
1555 return;
1556
1557 if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1558 return;
1559
1560 if (F.empty())
1561 return;
1562
1563 NumTotalFuncs++;
1564
1565 OptimizationRemarkEmitter &ORE =
1566 FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1567
1568 if (selectiveInstrumentationShouldSkip(F, FAM))
1569 return;
1570
1571 NumInstrumentedFuncs++;
1572
1573 LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1574
1575 SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1576 SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1577 SmallVector<Instruction *, 8> LandingPadVec;
1578 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1579
1580 memtag::StackInfoBuilder SIB(SSI);
1581 for (auto &Inst : instructions(F)) {
1582 if (InstrumentStack) {
1583 SIB.visit(Inst);
1584 }
1585
1586 if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1587 LandingPadVec.push_back(&Inst);
1588
1589 getInterestingMemoryOperands(ORE, &Inst, TLI, OperandsToInstrument);
1590
1591 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1592 if (!ignoreMemIntrinsic(ORE, MI))
1593 IntrinToInstrument.push_back(MI);
1594 }
1595
1596 memtag::StackInfo &SInfo = SIB.get();
1597
1598 initializeCallbacks(*F.getParent());
1599
1600 if (!LandingPadVec.empty())
1601 instrumentLandingPads(LandingPadVec);
1602
1603 if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1604 F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1605 // __hwasan_personality_thunk is a no-op for functions without an
1606 // instrumented stack, so we can drop it.
1607 F.setPersonalityFn(nullptr);
1608 }
1609
1610 if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1611 IntrinToInstrument.empty())
1612 return;
1613
1614 assert(!ShadowBase);
1615
1616 // Remove memory attributes that are about to become invalid.
1617 // HWASan checks read from shadow, which invalidates memory(argmem: *)
1618 // Short granule checks on function arguments read from the argument memory
1619 // (last byte of the granule), which invalidates writeonly.
1620 F.removeFnAttr(llvm::Attribute::Memory);
1621 for (auto &A : F.args())
1622 A.removeAttr(llvm::Attribute::WriteOnly);
1623
1624 BasicBlock::iterator InsertPt = F.getEntryBlock().begin();
1625 IRBuilder<> EntryIRB(&F.getEntryBlock(), InsertPt);
1626 emitPrologue(EntryIRB,
1627 /*WithFrameRecord*/ ClRecordStackHistory != none &&
1628 Mapping.WithFrameRecord &&
1629 !SInfo.AllocasToInstrument.empty());
1630
1631 if (!SInfo.AllocasToInstrument.empty()) {
1632 const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1633 const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
1634 const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
1635 Value *StackTag = getStackBaseTag(EntryIRB);
1636 Value *UARTag = getUARTag(EntryIRB);
1637 instrumentStack(SInfo, StackTag, UARTag, DT, PDT, LI);
1638 }
1639
1640 // If we split the entry block, move any allocas that were originally in the
1641 // entry block back into the entry block so that they aren't treated as
1642 // dynamic allocas.
1643 if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1644 InsertPt = F.getEntryBlock().begin();
1645 for (Instruction &I :
1646 llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1647 if (auto *AI = dyn_cast<AllocaInst>(&I))
1648 if (isa<ConstantInt>(AI->getArraySize()))
1649 I.moveBefore(F.getEntryBlock(), InsertPt);
1650 }
1651 }
1652
1653 DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
1654 PostDominatorTree *PDT = FAM.getCachedResult<PostDominatorTreeAnalysis>(F);
1655 LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(F);
1656 DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
1657 for (auto &Operand : OperandsToInstrument)
1658 instrumentMemAccess(Operand, DTU, LI);
1659 DTU.flush();
1660
1661 if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1662 for (auto *Inst : IntrinToInstrument)
1663 instrumentMemIntrinsic(Inst);
1664 }
1665
1666 ShadowBase = nullptr;
1667 StackBaseTag = nullptr;
1668 CachedFP = nullptr;
1669 }
1670
instrumentGlobal(GlobalVariable * GV,uint8_t Tag)1671 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1672 assert(!UsePageAliases);
1673 Constant *Initializer = GV->getInitializer();
1674 uint64_t SizeInBytes =
1675 M.getDataLayout().getTypeAllocSize(Initializer->getType());
1676 uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1677 if (SizeInBytes != NewSize) {
1678 // Pad the initializer out to the next multiple of 16 bytes and add the
1679 // required short granule tag.
1680 std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1681 Init.back() = Tag;
1682 Constant *Padding = ConstantDataArray::get(*C, Init);
1683 Initializer = ConstantStruct::getAnon({Initializer, Padding});
1684 }
1685
1686 auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1687 GlobalValue::ExternalLinkage, Initializer,
1688 GV->getName() + ".hwasan");
1689 NewGV->copyAttributesFrom(GV);
1690 NewGV->setLinkage(GlobalValue::PrivateLinkage);
1691 NewGV->copyMetadata(GV, 0);
1692 NewGV->setAlignment(
1693 std::max(GV->getAlign().valueOrOne(), Mapping.getObjectAlignment()));
1694
1695 // It is invalid to ICF two globals that have different tags. In the case
1696 // where the size of the global is a multiple of the tag granularity the
1697 // contents of the globals may be the same but the tags (i.e. symbol values)
1698 // may be different, and the symbols are not considered during ICF. In the
1699 // case where the size is not a multiple of the granularity, the short granule
1700 // tags would discriminate two globals with different tags, but there would
1701 // otherwise be nothing stopping such a global from being incorrectly ICF'd
1702 // with an uninstrumented (i.e. tag 0) global that happened to have the short
1703 // granule tag in the last byte.
1704 NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1705
1706 // Descriptor format (assuming little-endian):
1707 // bytes 0-3: relative address of global
1708 // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1709 // it isn't, we create multiple descriptors)
1710 // byte 7: tag
1711 auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1712 const uint64_t MaxDescriptorSize = 0xfffff0;
1713 for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1714 DescriptorPos += MaxDescriptorSize) {
1715 auto *Descriptor =
1716 new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1717 nullptr, GV->getName() + ".hwasan.descriptor");
1718 auto *GVRelPtr = ConstantExpr::getTrunc(
1719 ConstantExpr::getAdd(
1720 ConstantExpr::getSub(
1721 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1722 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1723 ConstantInt::get(Int64Ty, DescriptorPos)),
1724 Int32Ty);
1725 uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1726 auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1727 Descriptor->setComdat(NewGV->getComdat());
1728 Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1729 Descriptor->setSection("hwasan_globals");
1730 Descriptor->setMetadata(LLVMContext::MD_associated,
1731 MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1732 appendToCompilerUsed(M, Descriptor);
1733 }
1734
1735 Constant *Aliasee = ConstantExpr::getIntToPtr(
1736 ConstantExpr::getAdd(
1737 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1738 ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1739 GV->getType());
1740 auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1741 GV->getLinkage(), "", Aliasee, &M);
1742 Alias->setVisibility(GV->getVisibility());
1743 Alias->takeName(GV);
1744 GV->replaceAllUsesWith(Alias);
1745 GV->eraseFromParent();
1746 }
1747
instrumentGlobals()1748 void HWAddressSanitizer::instrumentGlobals() {
1749 std::vector<GlobalVariable *> Globals;
1750 for (GlobalVariable &GV : M.globals()) {
1751 if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
1752 continue;
1753
1754 if (GV.isDeclarationForLinker() || GV.getName().starts_with("llvm.") ||
1755 GV.isThreadLocal())
1756 continue;
1757
1758 // Common symbols can't have aliases point to them, so they can't be tagged.
1759 if (GV.hasCommonLinkage())
1760 continue;
1761
1762 // Globals with custom sections may be used in __start_/__stop_ enumeration,
1763 // which would be broken both by adding tags and potentially by the extra
1764 // padding/alignment that we insert.
1765 if (GV.hasSection())
1766 continue;
1767
1768 Globals.push_back(&GV);
1769 }
1770
1771 MD5 Hasher;
1772 Hasher.update(M.getSourceFileName());
1773 MD5::MD5Result Hash;
1774 Hasher.final(Hash);
1775 uint8_t Tag = Hash[0];
1776
1777 assert(TagMaskByte >= 16);
1778
1779 for (GlobalVariable *GV : Globals) {
1780 // Don't allow globals to be tagged with something that looks like a
1781 // short-granule tag, otherwise we lose inter-granule overflow detection, as
1782 // the fast path shadow-vs-address check succeeds.
1783 if (Tag < 16 || Tag > TagMaskByte)
1784 Tag = 16;
1785 instrumentGlobal(GV, Tag++);
1786 }
1787 }
1788
instrumentPersonalityFunctions()1789 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1790 // We need to untag stack frames as we unwind past them. That is the job of
1791 // the personality function wrapper, which either wraps an existing
1792 // personality function or acts as a personality function on its own. Each
1793 // function that has a personality function or that can be unwound past has
1794 // its personality function changed to a thunk that calls the personality
1795 // function wrapper in the runtime.
1796 MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1797 for (Function &F : M) {
1798 if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1799 continue;
1800
1801 if (F.hasPersonalityFn()) {
1802 PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1803 } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1804 PersonalityFns[nullptr].push_back(&F);
1805 }
1806 }
1807
1808 if (PersonalityFns.empty())
1809 return;
1810
1811 FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1812 "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, PtrTy,
1813 PtrTy, PtrTy, PtrTy, PtrTy);
1814 FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1815 FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1816
1817 for (auto &P : PersonalityFns) {
1818 std::string ThunkName = kHwasanPersonalityThunkName;
1819 if (P.first)
1820 ThunkName += ("." + P.first->getName()).str();
1821 FunctionType *ThunkFnTy = FunctionType::get(
1822 Int32Ty, {Int32Ty, Int32Ty, Int64Ty, PtrTy, PtrTy}, false);
1823 bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1824 cast<GlobalValue>(P.first)->hasLocalLinkage());
1825 auto *ThunkFn = Function::Create(ThunkFnTy,
1826 IsLocal ? GlobalValue::InternalLinkage
1827 : GlobalValue::LinkOnceODRLinkage,
1828 ThunkName, &M);
1829 if (!IsLocal) {
1830 ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1831 ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1832 }
1833
1834 auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1835 IRBuilder<> IRB(BB);
1836 CallInst *WrapperCall = IRB.CreateCall(
1837 HwasanPersonalityWrapper,
1838 {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1839 ThunkFn->getArg(3), ThunkFn->getArg(4),
1840 P.first ? P.first : Constant::getNullValue(PtrTy),
1841 UnwindGetGR.getCallee(), UnwindGetCFA.getCallee()});
1842 WrapperCall->setTailCall();
1843 IRB.CreateRet(WrapperCall);
1844
1845 for (Function *F : P.second)
1846 F->setPersonalityFn(ThunkFn);
1847 }
1848 }
1849
init(Triple & TargetTriple,bool InstrumentWithCalls)1850 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1851 bool InstrumentWithCalls) {
1852 Scale = kDefaultShadowScale;
1853 if (TargetTriple.isOSFuchsia()) {
1854 // Fuchsia is always PIE, which means that the beginning of the address
1855 // space is always available.
1856 InGlobal = false;
1857 InTls = false;
1858 Offset = 0;
1859 WithFrameRecord = true;
1860 } else if (ClMappingOffset.getNumOccurrences() > 0) {
1861 InGlobal = false;
1862 InTls = false;
1863 Offset = ClMappingOffset;
1864 WithFrameRecord = false;
1865 } else if (ClEnableKhwasan || InstrumentWithCalls) {
1866 InGlobal = false;
1867 InTls = false;
1868 Offset = 0;
1869 WithFrameRecord = false;
1870 } else if (ClWithIfunc) {
1871 InGlobal = true;
1872 InTls = false;
1873 Offset = kDynamicShadowSentinel;
1874 WithFrameRecord = false;
1875 } else if (ClWithTls) {
1876 InGlobal = false;
1877 InTls = true;
1878 Offset = kDynamicShadowSentinel;
1879 WithFrameRecord = true;
1880 } else {
1881 InGlobal = false;
1882 InTls = false;
1883 Offset = kDynamicShadowSentinel;
1884 WithFrameRecord = false;
1885 }
1886 }
1887