xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===- HWAddressSanitizer.cpp - memory access error detector --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/DomTreeUpdater.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/PostDominators.h"
26 #include "llvm/Analysis/ProfileSummaryInfo.h"
27 #include "llvm/Analysis/StackSafetyAnalysis.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/BinaryFormat/Dwarf.h"
31 #include "llvm/BinaryFormat/ELF.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DebugInfoMetadata.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/InstIterator.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Intrinsics.h"
48 #include "llvm/IR/LLVMContext.h"
49 #include "llvm/IR/MDBuilder.h"
50 #include "llvm/IR/Module.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/CommandLine.h"
55 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/MD5.h"
57 #include "llvm/Support/RandomNumberGenerator.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/TargetParser/Triple.h"
60 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
61 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
62 #include "llvm/Transforms/Utils/Local.h"
63 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
64 #include "llvm/Transforms/Utils/ModuleUtils.h"
65 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
66 #include <optional>
67 #include <random>
68 
69 using namespace llvm;
70 
71 #define DEBUG_TYPE "hwasan"
72 
73 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
74 const char kHwasanNoteName[] = "hwasan.note";
75 const char kHwasanInitName[] = "__hwasan_init";
76 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
77 
78 const char kHwasanShadowMemoryDynamicAddress[] =
79     "__hwasan_shadow_memory_dynamic_address";
80 
81 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
82 static const size_t kNumberOfAccessSizes = 5;
83 
84 static const size_t kDefaultShadowScale = 4;
85 static const uint64_t kDynamicShadowSentinel =
86     std::numeric_limits<uint64_t>::max();
87 
88 static const unsigned kShadowBaseAlignment = 32;
89 
90 static cl::opt<std::string>
91     ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
92                                  cl::desc("Prefix for memory access callbacks"),
93                                  cl::Hidden, cl::init("__hwasan_"));
94 
95 static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
96     "hwasan-kernel-mem-intrinsic-prefix",
97     cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
98     cl::init(false));
99 
100 static cl::opt<bool> ClInstrumentWithCalls(
101     "hwasan-instrument-with-calls",
102     cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
103     cl::init(false));
104 
105 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
106                                        cl::desc("instrument read instructions"),
107                                        cl::Hidden, cl::init(true));
108 
109 static cl::opt<bool>
110     ClInstrumentWrites("hwasan-instrument-writes",
111                        cl::desc("instrument write instructions"), cl::Hidden,
112                        cl::init(true));
113 
114 static cl::opt<bool> ClInstrumentAtomics(
115     "hwasan-instrument-atomics",
116     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
117     cl::init(true));
118 
119 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
120                                        cl::desc("instrument byval arguments"),
121                                        cl::Hidden, cl::init(true));
122 
123 static cl::opt<bool>
124     ClRecover("hwasan-recover",
125               cl::desc("Enable recovery mode (continue-after-error)."),
126               cl::Hidden, cl::init(false));
127 
128 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
129                                        cl::desc("instrument stack (allocas)"),
130                                        cl::Hidden, cl::init(true));
131 
132 static cl::opt<bool>
133     ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
134                      cl::Hidden, cl::desc("Use Stack Safety analysis results"),
135                      cl::Optional);
136 
137 static cl::opt<size_t> ClMaxLifetimes(
138     "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
139     cl::ReallyHidden,
140     cl::desc("How many lifetime ends to handle for a single alloca."),
141     cl::Optional);
142 
143 static cl::opt<bool>
144     ClUseAfterScope("hwasan-use-after-scope",
145                     cl::desc("detect use after scope within function"),
146                     cl::Hidden, cl::init(true));
147 
148 static cl::opt<bool> ClGenerateTagsWithCalls(
149     "hwasan-generate-tags-with-calls",
150     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
151     cl::init(false));
152 
153 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
154                                cl::Hidden, cl::init(false));
155 
156 static cl::opt<int> ClMatchAllTag(
157     "hwasan-match-all-tag",
158     cl::desc("don't report bad accesses via pointers with this tag"),
159     cl::Hidden, cl::init(-1));
160 
161 static cl::opt<bool>
162     ClEnableKhwasan("hwasan-kernel",
163                     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
164                     cl::Hidden, cl::init(false));
165 
166 // These flags allow to change the shadow mapping and control how shadow memory
167 // is accessed. The shadow mapping looks like:
168 //    Shadow = (Mem >> scale) + offset
169 
170 static cl::opt<uint64_t>
171     ClMappingOffset("hwasan-mapping-offset",
172                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
173                     cl::Hidden, cl::init(0));
174 
175 static cl::opt<bool>
176     ClWithIfunc("hwasan-with-ifunc",
177                 cl::desc("Access dynamic shadow through an ifunc global on "
178                          "platforms that support this"),
179                 cl::Hidden, cl::init(false));
180 
181 static cl::opt<bool> ClWithTls(
182     "hwasan-with-tls",
183     cl::desc("Access dynamic shadow through an thread-local pointer on "
184              "platforms that support this"),
185     cl::Hidden, cl::init(true));
186 
187 static cl::opt<int> ClHotPercentileCutoff("hwasan-percentile-cutoff-hot",
188                                           cl::desc("Hot percentile cuttoff."));
189 
190 static cl::opt<float>
191     ClRandomSkipRate("hwasan-random-rate",
192                      cl::desc("Probability value in the range [0.0, 1.0] "
193                               "to keep instrumentation of a function."));
194 
195 STATISTIC(NumTotalFuncs, "Number of total funcs");
196 STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs");
197 STATISTIC(NumNoProfileSummaryFuncs, "Number of funcs without PS");
198 
199 // Mode for selecting how to insert frame record info into the stack ring
200 // buffer.
201 enum RecordStackHistoryMode {
202   // Do not record frame record info.
203   none,
204 
205   // Insert instructions into the prologue for storing into the stack ring
206   // buffer directly.
207   instr,
208 
209   // Add a call to __hwasan_add_frame_record in the runtime.
210   libcall,
211 };
212 
213 static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
214     "hwasan-record-stack-history",
215     cl::desc("Record stack frames with tagged allocations in a thread-local "
216              "ring buffer"),
217     cl::values(clEnumVal(none, "Do not record stack ring history"),
218                clEnumVal(instr, "Insert instructions into the prologue for "
219                                 "storing into the stack ring buffer directly"),
220                clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
221                                   "storing into the stack ring buffer")),
222     cl::Hidden, cl::init(instr));
223 
224 static cl::opt<bool>
225     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
226                               cl::desc("instrument memory intrinsics"),
227                               cl::Hidden, cl::init(true));
228 
229 static cl::opt<bool>
230     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
231                             cl::desc("instrument landing pads"), cl::Hidden,
232                             cl::init(false));
233 
234 static cl::opt<bool> ClUseShortGranules(
235     "hwasan-use-short-granules",
236     cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
237     cl::init(false));
238 
239 static cl::opt<bool> ClInstrumentPersonalityFunctions(
240     "hwasan-instrument-personality-functions",
241     cl::desc("instrument personality functions"), cl::Hidden);
242 
243 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
244                                        cl::desc("inline all checks"),
245                                        cl::Hidden, cl::init(false));
246 
247 static cl::opt<bool> ClInlineFastPathChecks("hwasan-inline-fast-path-checks",
248                                             cl::desc("inline all checks"),
249                                             cl::Hidden, cl::init(false));
250 
251 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
252 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
253                                       cl::desc("Use page aliasing in HWASan"),
254                                       cl::Hidden, cl::init(false));
255 
256 namespace {
257 
258 template <typename T> T optOr(cl::opt<T> &Opt, T Other) {
259   return Opt.getNumOccurrences() ? Opt : Other;
260 }
261 
262 bool shouldUsePageAliases(const Triple &TargetTriple) {
263   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
264 }
265 
266 bool shouldInstrumentStack(const Triple &TargetTriple) {
267   return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
268 }
269 
270 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
271   return optOr(ClInstrumentWithCalls, TargetTriple.getArch() == Triple::x86_64);
272 }
273 
274 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
275   return optOr(ClUseStackSafety, !DisableOptimization);
276 }
277 
278 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
279                                   bool DisableOptimization) {
280   return shouldInstrumentStack(TargetTriple) &&
281          mightUseStackSafetyAnalysis(DisableOptimization);
282 }
283 
284 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
285   return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
286 }
287 
288 /// An instrumentation pass implementing detection of addressability bugs
289 /// using tagged pointers.
290 class HWAddressSanitizer {
291 public:
292   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
293                      const StackSafetyGlobalInfo *SSI)
294       : M(M), SSI(SSI) {
295     this->Recover = optOr(ClRecover, Recover);
296     this->CompileKernel = optOr(ClEnableKhwasan, CompileKernel);
297     this->Rng = ClRandomSkipRate.getNumOccurrences() ? M.createRNG(DEBUG_TYPE)
298                                                      : nullptr;
299 
300     initializeModule();
301   }
302 
303   void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
304 
305 private:
306   struct ShadowTagCheckInfo {
307     Instruction *TagMismatchTerm = nullptr;
308     Value *PtrLong = nullptr;
309     Value *AddrLong = nullptr;
310     Value *PtrTag = nullptr;
311     Value *MemTag = nullptr;
312   };
313 
314   bool selectiveInstrumentationShouldSkip(Function &F,
315                                           FunctionAnalysisManager &FAM) const;
316   void initializeModule();
317   void createHwasanCtorComdat();
318 
319   void initializeCallbacks(Module &M);
320 
321   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
322 
323   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
324   Value *getShadowNonTls(IRBuilder<> &IRB);
325 
326   void untagPointerOperand(Instruction *I, Value *Addr);
327   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
328 
329   int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
330   ShadowTagCheckInfo insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
331                                           DomTreeUpdater &DTU, LoopInfo *LI);
332   void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
333                                   unsigned AccessSizeIndex,
334                                   Instruction *InsertBefore,
335                                   DomTreeUpdater &DTU, LoopInfo *LI);
336   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
337                                  unsigned AccessSizeIndex,
338                                  Instruction *InsertBefore, DomTreeUpdater &DTU,
339                                  LoopInfo *LI);
340   bool ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, MemIntrinsic *MI);
341   void instrumentMemIntrinsic(MemIntrinsic *MI);
342   bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
343                            LoopInfo *LI);
344   bool ignoreAccessWithoutRemark(Instruction *Inst, Value *Ptr);
345   bool ignoreAccess(OptimizationRemarkEmitter &ORE, Instruction *Inst,
346                     Value *Ptr);
347 
348   void getInterestingMemoryOperands(
349       OptimizationRemarkEmitter &ORE, Instruction *I,
350       const TargetLibraryInfo &TLI,
351       SmallVectorImpl<InterestingMemoryOperand> &Interesting);
352 
353   void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
354   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
355   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
356   bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
357                        const DominatorTree &DT, const PostDominatorTree &PDT,
358                        const LoopInfo &LI);
359   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
360   Value *getNextTagWithCall(IRBuilder<> &IRB);
361   Value *getStackBaseTag(IRBuilder<> &IRB);
362   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo);
363   Value *getUARTag(IRBuilder<> &IRB);
364 
365   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB);
366   Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
367   unsigned retagMask(unsigned AllocaNo);
368 
369   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
370 
371   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
372   void instrumentGlobals();
373 
374   Value *getCachedFP(IRBuilder<> &IRB);
375   Value *getFrameRecordInfo(IRBuilder<> &IRB);
376 
377   void instrumentPersonalityFunctions();
378 
379   LLVMContext *C;
380   Module &M;
381   const StackSafetyGlobalInfo *SSI;
382   Triple TargetTriple;
383   std::unique_ptr<RandomNumberGenerator> Rng;
384 
385   /// This struct defines the shadow mapping using the rule:
386   ///   shadow = (mem >> Scale) + Offset.
387   /// If InGlobal is true, then
388   ///   extern char __hwasan_shadow[];
389   ///   shadow = (mem >> Scale) + &__hwasan_shadow
390   /// If InTls is true, then
391   ///   extern char *__hwasan_tls;
392   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
393   ///
394   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
395   /// ring buffer for storing stack allocations on targets that support it.
396   struct ShadowMapping {
397     uint8_t Scale;
398     uint64_t Offset;
399     bool InGlobal;
400     bool InTls;
401     bool WithFrameRecord;
402 
403     void init(Triple &TargetTriple, bool InstrumentWithCalls);
404     Align getObjectAlignment() const { return Align(1ULL << Scale); }
405   };
406 
407   ShadowMapping Mapping;
408 
409   Type *VoidTy = Type::getVoidTy(M.getContext());
410   Type *IntptrTy = M.getDataLayout().getIntPtrType(M.getContext());
411   PointerType *PtrTy = PointerType::getUnqual(M.getContext());
412   Type *Int8Ty = Type::getInt8Ty(M.getContext());
413   Type *Int32Ty = Type::getInt32Ty(M.getContext());
414   Type *Int64Ty = Type::getInt64Ty(M.getContext());
415 
416   bool CompileKernel;
417   bool Recover;
418   bool OutlinedChecks;
419   bool InlineFastPath;
420   bool UseShortGranules;
421   bool InstrumentLandingPads;
422   bool InstrumentWithCalls;
423   bool InstrumentStack;
424   bool InstrumentGlobals;
425   bool DetectUseAfterScope;
426   bool UsePageAliases;
427   bool UseMatchAllCallback;
428 
429   std::optional<uint8_t> MatchAllTag;
430 
431   unsigned PointerTagShift;
432   uint64_t TagMaskByte;
433 
434   Function *HwasanCtorFunction;
435 
436   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
437   FunctionCallee HwasanMemoryAccessCallbackSized[2];
438 
439   FunctionCallee HwasanMemmove, HwasanMemcpy, HwasanMemset;
440   FunctionCallee HwasanHandleVfork;
441 
442   FunctionCallee HwasanTagMemoryFunc;
443   FunctionCallee HwasanGenerateTagFunc;
444   FunctionCallee HwasanRecordFrameRecordFunc;
445 
446   Constant *ShadowGlobal;
447 
448   Value *ShadowBase = nullptr;
449   Value *StackBaseTag = nullptr;
450   Value *CachedFP = nullptr;
451   GlobalValue *ThreadPtrGlobal = nullptr;
452 };
453 
454 } // end anonymous namespace
455 
456 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
457                                               ModuleAnalysisManager &MAM) {
458   const StackSafetyGlobalInfo *SSI = nullptr;
459   auto TargetTriple = llvm::Triple(M.getTargetTriple());
460   if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
461     SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
462 
463   HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
464   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
465   for (Function &F : M)
466     HWASan.sanitizeFunction(F, FAM);
467 
468   PreservedAnalyses PA = PreservedAnalyses::none();
469   // DominatorTreeAnalysis, PostDominatorTreeAnalysis, and LoopAnalysis
470   // are incrementally updated throughout this pass whenever
471   // SplitBlockAndInsertIfThen is called.
472   PA.preserve<DominatorTreeAnalysis>();
473   PA.preserve<PostDominatorTreeAnalysis>();
474   PA.preserve<LoopAnalysis>();
475   // GlobalsAA is considered stateless and does not get invalidated unless
476   // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
477   // make changes that require GlobalsAA to be invalidated.
478   PA.abandon<GlobalsAA>();
479   return PA;
480 }
481 void HWAddressSanitizerPass::printPipeline(
482     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
483   static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
484       OS, MapClassName2PassName);
485   OS << '<';
486   if (Options.CompileKernel)
487     OS << "kernel;";
488   if (Options.Recover)
489     OS << "recover";
490   OS << '>';
491 }
492 
493 void HWAddressSanitizer::createHwasanCtorComdat() {
494   std::tie(HwasanCtorFunction, std::ignore) =
495       getOrCreateSanitizerCtorAndInitFunctions(
496           M, kHwasanModuleCtorName, kHwasanInitName,
497           /*InitArgTypes=*/{},
498           /*InitArgs=*/{},
499           // This callback is invoked when the functions are created the first
500           // time. Hook them into the global ctors list in that case:
501           [&](Function *Ctor, FunctionCallee) {
502             Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
503             Ctor->setComdat(CtorComdat);
504             appendToGlobalCtors(M, Ctor, 0, Ctor);
505           });
506 
507   // Create a note that contains pointers to the list of global
508   // descriptors. Adding a note to the output file will cause the linker to
509   // create a PT_NOTE program header pointing to the note that we can use to
510   // find the descriptor list starting from the program headers. A function
511   // provided by the runtime initializes the shadow memory for the globals by
512   // accessing the descriptor list via the note. The dynamic loader needs to
513   // call this function whenever a library is loaded.
514   //
515   // The reason why we use a note for this instead of a more conventional
516   // approach of having a global constructor pass a descriptor list pointer to
517   // the runtime is because of an order of initialization problem. With
518   // constructors we can encounter the following problematic scenario:
519   //
520   // 1) library A depends on library B and also interposes one of B's symbols
521   // 2) B's constructors are called before A's (as required for correctness)
522   // 3) during construction, B accesses one of its "own" globals (actually
523   //    interposed by A) and triggers a HWASAN failure due to the initialization
524   //    for A not having happened yet
525   //
526   // Even without interposition it is possible to run into similar situations in
527   // cases where two libraries mutually depend on each other.
528   //
529   // We only need one note per binary, so put everything for the note in a
530   // comdat. This needs to be a comdat with an .init_array section to prevent
531   // newer versions of lld from discarding the note.
532   //
533   // Create the note even if we aren't instrumenting globals. This ensures that
534   // binaries linked from object files with both instrumented and
535   // non-instrumented globals will end up with a note, even if a comdat from an
536   // object file with non-instrumented globals is selected. The note is harmless
537   // if the runtime doesn't support it, since it will just be ignored.
538   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
539 
540   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
541   auto *Start =
542       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
543                          nullptr, "__start_hwasan_globals");
544   Start->setVisibility(GlobalValue::HiddenVisibility);
545   auto *Stop =
546       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
547                          nullptr, "__stop_hwasan_globals");
548   Stop->setVisibility(GlobalValue::HiddenVisibility);
549 
550   // Null-terminated so actually 8 bytes, which are required in order to align
551   // the note properly.
552   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
553 
554   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
555                                  Int32Ty, Int32Ty);
556   auto *Note =
557       new GlobalVariable(M, NoteTy, /*isConstant=*/true,
558                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
559   Note->setSection(".note.hwasan.globals");
560   Note->setComdat(NoteComdat);
561   Note->setAlignment(Align(4));
562 
563   // The pointers in the note need to be relative so that the note ends up being
564   // placed in rodata, which is the standard location for notes.
565   auto CreateRelPtr = [&](Constant *Ptr) {
566     return ConstantExpr::getTrunc(
567         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
568                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
569         Int32Ty);
570   };
571   Note->setInitializer(ConstantStruct::getAnon(
572       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
573        ConstantInt::get(Int32Ty, 8),                           // n_descsz
574        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
575        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
576   appendToCompilerUsed(M, Note);
577 
578   // Create a zero-length global in hwasan_globals so that the linker will
579   // always create start and stop symbols.
580   auto *Dummy = new GlobalVariable(
581       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
582       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
583   Dummy->setSection("hwasan_globals");
584   Dummy->setComdat(NoteComdat);
585   Dummy->setMetadata(LLVMContext::MD_associated,
586                      MDNode::get(*C, ValueAsMetadata::get(Note)));
587   appendToCompilerUsed(M, Dummy);
588 }
589 
590 /// Module-level initialization.
591 ///
592 /// inserts a call to __hwasan_init to the module's constructor list.
593 void HWAddressSanitizer::initializeModule() {
594   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
595   TargetTriple = Triple(M.getTargetTriple());
596 
597   // x86_64 currently has two modes:
598   // - Intel LAM (default)
599   // - pointer aliasing (heap only)
600   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
601   UsePageAliases = shouldUsePageAliases(TargetTriple);
602   InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
603   InstrumentStack = shouldInstrumentStack(TargetTriple);
604   DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
605   PointerTagShift = IsX86_64 ? 57 : 56;
606   TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
607 
608   Mapping.init(TargetTriple, InstrumentWithCalls);
609 
610   C = &(M.getContext());
611   IRBuilder<> IRB(*C);
612 
613   HwasanCtorFunction = nullptr;
614 
615   // Older versions of Android do not have the required runtime support for
616   // short granules, global or personality function instrumentation. On other
617   // platforms we currently require using the latest version of the runtime.
618   bool NewRuntime =
619       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
620 
621   UseShortGranules = optOr(ClUseShortGranules, NewRuntime);
622   OutlinedChecks = (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
623                    TargetTriple.isOSBinFormatELF() &&
624                    !optOr(ClInlineAllChecks, Recover);
625 
626   // These platforms may prefer less inlining to reduce binary size.
627   InlineFastPath = optOr(ClInlineFastPathChecks, !(TargetTriple.isAndroid() ||
628                                                    TargetTriple.isOSFuchsia()));
629 
630   if (ClMatchAllTag.getNumOccurrences()) {
631     if (ClMatchAllTag != -1) {
632       MatchAllTag = ClMatchAllTag & 0xFF;
633     }
634   } else if (CompileKernel) {
635     MatchAllTag = 0xFF;
636   }
637   UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value();
638 
639   // If we don't have personality function support, fall back to landing pads.
640   InstrumentLandingPads = optOr(ClInstrumentLandingPads, !NewRuntime);
641 
642   InstrumentGlobals =
643       !CompileKernel && !UsePageAliases && optOr(ClGlobals, NewRuntime);
644 
645   if (!CompileKernel) {
646     createHwasanCtorComdat();
647 
648     if (InstrumentGlobals)
649       instrumentGlobals();
650 
651     bool InstrumentPersonalityFunctions =
652         optOr(ClInstrumentPersonalityFunctions, NewRuntime);
653     if (InstrumentPersonalityFunctions)
654       instrumentPersonalityFunctions();
655   }
656 
657   if (!TargetTriple.isAndroid()) {
658     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
659       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
660                                     GlobalValue::ExternalLinkage, nullptr,
661                                     "__hwasan_tls", nullptr,
662                                     GlobalVariable::InitialExecTLSModel);
663       appendToCompilerUsed(M, GV);
664       return GV;
665     });
666     ThreadPtrGlobal = cast<GlobalVariable>(C);
667   }
668 }
669 
670 void HWAddressSanitizer::initializeCallbacks(Module &M) {
671   IRBuilder<> IRB(*C);
672   const std::string MatchAllStr = UseMatchAllCallback ? "_match_all" : "";
673   FunctionType *HwasanMemoryAccessCallbackSizedFnTy,
674       *HwasanMemoryAccessCallbackFnTy, *HwasanMemTransferFnTy,
675       *HwasanMemsetFnTy;
676   if (UseMatchAllCallback) {
677     HwasanMemoryAccessCallbackSizedFnTy =
678         FunctionType::get(VoidTy, {IntptrTy, IntptrTy, Int8Ty}, false);
679     HwasanMemoryAccessCallbackFnTy =
680         FunctionType::get(VoidTy, {IntptrTy, Int8Ty}, false);
681     HwasanMemTransferFnTy =
682         FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy, Int8Ty}, false);
683     HwasanMemsetFnTy =
684         FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy, Int8Ty}, false);
685   } else {
686     HwasanMemoryAccessCallbackSizedFnTy =
687         FunctionType::get(VoidTy, {IntptrTy, IntptrTy}, false);
688     HwasanMemoryAccessCallbackFnTy =
689         FunctionType::get(VoidTy, {IntptrTy}, false);
690     HwasanMemTransferFnTy =
691         FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy}, false);
692     HwasanMemsetFnTy =
693         FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy}, false);
694   }
695 
696   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
697     const std::string TypeStr = AccessIsWrite ? "store" : "load";
698     const std::string EndingStr = Recover ? "_noabort" : "";
699 
700     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
701         ClMemoryAccessCallbackPrefix + TypeStr + "N" + MatchAllStr + EndingStr,
702         HwasanMemoryAccessCallbackSizedFnTy);
703 
704     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
705          AccessSizeIndex++) {
706       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
707           M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr +
708                                     itostr(1ULL << AccessSizeIndex) +
709                                     MatchAllStr + EndingStr,
710                                 HwasanMemoryAccessCallbackFnTy);
711     }
712   }
713 
714   const std::string MemIntrinCallbackPrefix =
715       (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
716           ? std::string("")
717           : ClMemoryAccessCallbackPrefix;
718 
719   HwasanMemmove = M.getOrInsertFunction(
720       MemIntrinCallbackPrefix + "memmove" + MatchAllStr, HwasanMemTransferFnTy);
721   HwasanMemcpy = M.getOrInsertFunction(
722       MemIntrinCallbackPrefix + "memcpy" + MatchAllStr, HwasanMemTransferFnTy);
723   HwasanMemset = M.getOrInsertFunction(
724       MemIntrinCallbackPrefix + "memset" + MatchAllStr, HwasanMemsetFnTy);
725 
726   HwasanTagMemoryFunc = M.getOrInsertFunction("__hwasan_tag_memory", VoidTy,
727                                               PtrTy, Int8Ty, IntptrTy);
728   HwasanGenerateTagFunc =
729       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
730 
731   HwasanRecordFrameRecordFunc =
732       M.getOrInsertFunction("__hwasan_add_frame_record", VoidTy, Int64Ty);
733 
734   ShadowGlobal =
735       M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(Int8Ty, 0));
736 
737   HwasanHandleVfork =
738       M.getOrInsertFunction("__hwasan_handle_vfork", VoidTy, IntptrTy);
739 }
740 
741 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
742   // An empty inline asm with input reg == output reg.
743   // An opaque no-op cast, basically.
744   // This prevents code bloat as a result of rematerializing trivial definitions
745   // such as constants or global addresses at every load and store.
746   InlineAsm *Asm =
747       InlineAsm::get(FunctionType::get(PtrTy, {Val->getType()}, false),
748                      StringRef(""), StringRef("=r,0"),
749                      /*hasSideEffects=*/false);
750   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
751 }
752 
753 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
754   return getOpaqueNoopCast(IRB, ShadowGlobal);
755 }
756 
757 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
758   if (Mapping.Offset != kDynamicShadowSentinel)
759     return getOpaqueNoopCast(
760         IRB, ConstantExpr::getIntToPtr(
761                  ConstantInt::get(IntptrTy, Mapping.Offset), PtrTy));
762 
763   if (Mapping.InGlobal)
764     return getDynamicShadowIfunc(IRB);
765 
766   Value *GlobalDynamicAddress =
767       IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
768           kHwasanShadowMemoryDynamicAddress, PtrTy);
769   return IRB.CreateLoad(PtrTy, GlobalDynamicAddress);
770 }
771 
772 bool HWAddressSanitizer::ignoreAccessWithoutRemark(Instruction *Inst,
773                                                    Value *Ptr) {
774   // Do not instrument accesses from different address spaces; we cannot deal
775   // with them.
776   Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
777   if (PtrTy->getPointerAddressSpace() != 0)
778     return true;
779 
780   // Ignore swifterror addresses.
781   // swifterror memory addresses are mem2reg promoted by instruction
782   // selection. As such they cannot have regular uses like an instrumentation
783   // function and it makes no sense to track them as memory.
784   if (Ptr->isSwiftError())
785     return true;
786 
787   if (findAllocaForValue(Ptr)) {
788     if (!InstrumentStack)
789       return true;
790     if (SSI && SSI->stackAccessIsSafe(*Inst))
791       return true;
792   }
793 
794   if (isa<GlobalVariable>(getUnderlyingObject(Ptr))) {
795     if (!InstrumentGlobals)
796       return true;
797     // TODO: Optimize inbound global accesses, like Asan `instrumentMop`.
798   }
799 
800   return false;
801 }
802 
803 bool HWAddressSanitizer::ignoreAccess(OptimizationRemarkEmitter &ORE,
804                                       Instruction *Inst, Value *Ptr) {
805   bool Ignored = ignoreAccessWithoutRemark(Inst, Ptr);
806   if (Ignored) {
807     ORE.emit(
808         [&]() { return OptimizationRemark(DEBUG_TYPE, "ignoreAccess", Inst); });
809   } else {
810     ORE.emit([&]() {
811       return OptimizationRemarkMissed(DEBUG_TYPE, "ignoreAccess", Inst);
812     });
813   }
814   return Ignored;
815 }
816 
817 void HWAddressSanitizer::getInterestingMemoryOperands(
818     OptimizationRemarkEmitter &ORE, Instruction *I,
819     const TargetLibraryInfo &TLI,
820     SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
821   // Skip memory accesses inserted by another instrumentation.
822   if (I->hasMetadata(LLVMContext::MD_nosanitize))
823     return;
824 
825   // Do not instrument the load fetching the dynamic shadow address.
826   if (ShadowBase == I)
827     return;
828 
829   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
830     if (!ClInstrumentReads || ignoreAccess(ORE, I, LI->getPointerOperand()))
831       return;
832     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
833                              LI->getType(), LI->getAlign());
834   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
835     if (!ClInstrumentWrites || ignoreAccess(ORE, I, SI->getPointerOperand()))
836       return;
837     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
838                              SI->getValueOperand()->getType(), SI->getAlign());
839   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
840     if (!ClInstrumentAtomics || ignoreAccess(ORE, I, RMW->getPointerOperand()))
841       return;
842     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
843                              RMW->getValOperand()->getType(), std::nullopt);
844   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
845     if (!ClInstrumentAtomics || ignoreAccess(ORE, I, XCHG->getPointerOperand()))
846       return;
847     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
848                              XCHG->getCompareOperand()->getType(),
849                              std::nullopt);
850   } else if (auto *CI = dyn_cast<CallInst>(I)) {
851     for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
852       if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
853           ignoreAccess(ORE, I, CI->getArgOperand(ArgNo)))
854         continue;
855       Type *Ty = CI->getParamByValType(ArgNo);
856       Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
857     }
858     maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
859   }
860 }
861 
862 static unsigned getPointerOperandIndex(Instruction *I) {
863   if (LoadInst *LI = dyn_cast<LoadInst>(I))
864     return LI->getPointerOperandIndex();
865   if (StoreInst *SI = dyn_cast<StoreInst>(I))
866     return SI->getPointerOperandIndex();
867   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
868     return RMW->getPointerOperandIndex();
869   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
870     return XCHG->getPointerOperandIndex();
871   report_fatal_error("Unexpected instruction");
872   return -1;
873 }
874 
875 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
876   size_t Res = llvm::countr_zero(TypeSize / 8);
877   assert(Res < kNumberOfAccessSizes);
878   return Res;
879 }
880 
881 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
882   if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64 ||
883       TargetTriple.isRISCV64())
884     return;
885 
886   IRBuilder<> IRB(I);
887   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
888   Value *UntaggedPtr =
889       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
890   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
891 }
892 
893 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
894   // Mem >> Scale
895   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
896   if (Mapping.Offset == 0)
897     return IRB.CreateIntToPtr(Shadow, PtrTy);
898   // (Mem >> Scale) + Offset
899   return IRB.CreatePtrAdd(ShadowBase, Shadow);
900 }
901 
902 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
903                                           unsigned AccessSizeIndex) {
904   return (CompileKernel << HWASanAccessInfo::CompileKernelShift) |
905          (MatchAllTag.has_value() << HWASanAccessInfo::HasMatchAllShift) |
906          (MatchAllTag.value_or(0) << HWASanAccessInfo::MatchAllShift) |
907          (Recover << HWASanAccessInfo::RecoverShift) |
908          (IsWrite << HWASanAccessInfo::IsWriteShift) |
909          (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
910 }
911 
912 HWAddressSanitizer::ShadowTagCheckInfo
913 HWAddressSanitizer::insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
914                                          DomTreeUpdater &DTU, LoopInfo *LI) {
915   ShadowTagCheckInfo R;
916 
917   IRBuilder<> IRB(InsertBefore);
918 
919   R.PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
920   R.PtrTag =
921       IRB.CreateTrunc(IRB.CreateLShr(R.PtrLong, PointerTagShift), Int8Ty);
922   R.AddrLong = untagPointer(IRB, R.PtrLong);
923   Value *Shadow = memToShadow(R.AddrLong, IRB);
924   R.MemTag = IRB.CreateLoad(Int8Ty, Shadow);
925   Value *TagMismatch = IRB.CreateICmpNE(R.PtrTag, R.MemTag);
926 
927   if (MatchAllTag.has_value()) {
928     Value *TagNotIgnored = IRB.CreateICmpNE(
929         R.PtrTag, ConstantInt::get(R.PtrTag->getType(), *MatchAllTag));
930     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
931   }
932 
933   R.TagMismatchTerm = SplitBlockAndInsertIfThen(
934       TagMismatch, InsertBefore, false,
935       MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI);
936 
937   return R;
938 }
939 
940 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
941                                                     unsigned AccessSizeIndex,
942                                                     Instruction *InsertBefore,
943                                                     DomTreeUpdater &DTU,
944                                                     LoopInfo *LI) {
945   assert(!UsePageAliases);
946   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
947 
948   if (InlineFastPath)
949     InsertBefore =
950         insertShadowTagCheck(Ptr, InsertBefore, DTU, LI).TagMismatchTerm;
951 
952   IRBuilder<> IRB(InsertBefore);
953   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
954   bool useFixedShadowIntrinsic = false;
955   // The memaccess fixed shadow intrinsic is only supported on AArch64,
956   // which allows a 16-bit immediate to be left-shifted by 32.
957   // Since kShadowBaseAlignment == 32, and Linux by default will not
958   // mmap above 48-bits, practically any valid shadow offset is
959   // representable.
960   // In particular, an offset of 4TB (1024 << 32) is representable, and
961   // ought to be good enough for anybody.
962   if (TargetTriple.isAArch64() && Mapping.Offset != kDynamicShadowSentinel) {
963     uint16_t offset_shifted = Mapping.Offset >> 32;
964     useFixedShadowIntrinsic = (uint64_t)offset_shifted << 32 == Mapping.Offset;
965   }
966 
967   if (useFixedShadowIntrinsic)
968     IRB.CreateCall(
969         Intrinsic::getDeclaration(
970             M, UseShortGranules
971                    ? Intrinsic::hwasan_check_memaccess_shortgranules_fixedshadow
972                    : Intrinsic::hwasan_check_memaccess_fixedshadow),
973         {Ptr, ConstantInt::get(Int32Ty, AccessInfo),
974          ConstantInt::get(Int64Ty, Mapping.Offset)});
975   else
976     IRB.CreateCall(Intrinsic::getDeclaration(
977                        M, UseShortGranules
978                               ? Intrinsic::hwasan_check_memaccess_shortgranules
979                               : Intrinsic::hwasan_check_memaccess),
980                    {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
981 }
982 
983 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
984                                                    unsigned AccessSizeIndex,
985                                                    Instruction *InsertBefore,
986                                                    DomTreeUpdater &DTU,
987                                                    LoopInfo *LI) {
988   assert(!UsePageAliases);
989   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
990 
991   ShadowTagCheckInfo TCI = insertShadowTagCheck(Ptr, InsertBefore, DTU, LI);
992 
993   IRBuilder<> IRB(TCI.TagMismatchTerm);
994   Value *OutOfShortGranuleTagRange =
995       IRB.CreateICmpUGT(TCI.MemTag, ConstantInt::get(Int8Ty, 15));
996   Instruction *CheckFailTerm = SplitBlockAndInsertIfThen(
997       OutOfShortGranuleTagRange, TCI.TagMismatchTerm, !Recover,
998       MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI);
999 
1000   IRB.SetInsertPoint(TCI.TagMismatchTerm);
1001   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(TCI.PtrLong, 15), Int8Ty);
1002   PtrLowBits = IRB.CreateAdd(
1003       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
1004   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, TCI.MemTag);
1005   SplitBlockAndInsertIfThen(PtrLowBitsOOB, TCI.TagMismatchTerm, false,
1006                             MDBuilder(*C).createUnlikelyBranchWeights(), &DTU,
1007                             LI, CheckFailTerm->getParent());
1008 
1009   IRB.SetInsertPoint(TCI.TagMismatchTerm);
1010   Value *InlineTagAddr = IRB.CreateOr(TCI.AddrLong, 15);
1011   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, PtrTy);
1012   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
1013   Value *InlineTagMismatch = IRB.CreateICmpNE(TCI.PtrTag, InlineTag);
1014   SplitBlockAndInsertIfThen(InlineTagMismatch, TCI.TagMismatchTerm, false,
1015                             MDBuilder(*C).createUnlikelyBranchWeights(), &DTU,
1016                             LI, CheckFailTerm->getParent());
1017 
1018   IRB.SetInsertPoint(CheckFailTerm);
1019   InlineAsm *Asm;
1020   switch (TargetTriple.getArch()) {
1021   case Triple::x86_64:
1022     // The signal handler will find the data address in rdi.
1023     Asm = InlineAsm::get(
1024         FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1025         "int3\nnopl " +
1026             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
1027             "(%rax)",
1028         "{rdi}",
1029         /*hasSideEffects=*/true);
1030     break;
1031   case Triple::aarch64:
1032   case Triple::aarch64_be:
1033     // The signal handler will find the data address in x0.
1034     Asm = InlineAsm::get(
1035         FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1036         "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1037         "{x0}",
1038         /*hasSideEffects=*/true);
1039     break;
1040   case Triple::riscv64:
1041     // The signal handler will find the data address in x10.
1042     Asm = InlineAsm::get(
1043         FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1044         "ebreak\naddiw x0, x11, " +
1045             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1046         "{x10}",
1047         /*hasSideEffects=*/true);
1048     break;
1049   default:
1050     report_fatal_error("unsupported architecture");
1051   }
1052   IRB.CreateCall(Asm, TCI.PtrLong);
1053   if (Recover)
1054     cast<BranchInst>(CheckFailTerm)
1055         ->setSuccessor(0, TCI.TagMismatchTerm->getParent());
1056 }
1057 
1058 bool HWAddressSanitizer::ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE,
1059                                             MemIntrinsic *MI) {
1060   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1061     return (!ClInstrumentWrites || ignoreAccess(ORE, MTI, MTI->getDest())) &&
1062            (!ClInstrumentReads || ignoreAccess(ORE, MTI, MTI->getSource()));
1063   }
1064   if (isa<MemSetInst>(MI))
1065     return !ClInstrumentWrites || ignoreAccess(ORE, MI, MI->getDest());
1066   return false;
1067 }
1068 
1069 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
1070   IRBuilder<> IRB(MI);
1071   if (isa<MemTransferInst>(MI)) {
1072     SmallVector<Value *, 4> Args{
1073         MI->getOperand(0), MI->getOperand(1),
1074         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1075 
1076     if (UseMatchAllCallback)
1077       Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1078     IRB.CreateCall(isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy, Args);
1079   } else if (isa<MemSetInst>(MI)) {
1080     SmallVector<Value *, 4> Args{
1081         MI->getOperand(0),
1082         IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
1083         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1084     if (UseMatchAllCallback)
1085       Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1086     IRB.CreateCall(HwasanMemset, Args);
1087   }
1088   MI->eraseFromParent();
1089 }
1090 
1091 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O,
1092                                              DomTreeUpdater &DTU,
1093                                              LoopInfo *LI) {
1094   Value *Addr = O.getPtr();
1095 
1096   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
1097 
1098   if (O.MaybeMask)
1099     return false; // FIXME
1100 
1101   IRBuilder<> IRB(O.getInsn());
1102   if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) &&
1103       (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
1104       (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() ||
1105        *O.Alignment >= O.TypeStoreSize / 8)) {
1106     size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeStoreSize);
1107     if (InstrumentWithCalls) {
1108       SmallVector<Value *, 2> Args{IRB.CreatePointerCast(Addr, IntptrTy)};
1109       if (UseMatchAllCallback)
1110         Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1111       IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1112                      Args);
1113     } else if (OutlinedChecks) {
1114       instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1115                                  DTU, LI);
1116     } else {
1117       instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1118                                 DTU, LI);
1119     }
1120   } else {
1121     SmallVector<Value *, 3> Args{
1122         IRB.CreatePointerCast(Addr, IntptrTy),
1123         IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
1124                        ConstantInt::get(IntptrTy, 8))};
1125     if (UseMatchAllCallback)
1126       Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1127     IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], Args);
1128   }
1129   untagPointerOperand(O.getInsn(), Addr);
1130 
1131   return true;
1132 }
1133 
1134 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
1135                                    size_t Size) {
1136   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1137   if (!UseShortGranules)
1138     Size = AlignedSize;
1139 
1140   Tag = IRB.CreateTrunc(Tag, Int8Ty);
1141   if (InstrumentWithCalls) {
1142     IRB.CreateCall(HwasanTagMemoryFunc,
1143                    {IRB.CreatePointerCast(AI, PtrTy), Tag,
1144                     ConstantInt::get(IntptrTy, AlignedSize)});
1145   } else {
1146     size_t ShadowSize = Size >> Mapping.Scale;
1147     Value *AddrLong = untagPointer(IRB, IRB.CreatePointerCast(AI, IntptrTy));
1148     Value *ShadowPtr = memToShadow(AddrLong, IRB);
1149     // If this memset is not inlined, it will be intercepted in the hwasan
1150     // runtime library. That's OK, because the interceptor skips the checks if
1151     // the address is in the shadow region.
1152     // FIXME: the interceptor is not as fast as real memset. Consider lowering
1153     // llvm.memset right here into either a sequence of stores, or a call to
1154     // hwasan_tag_memory.
1155     if (ShadowSize)
1156       IRB.CreateMemSet(ShadowPtr, Tag, ShadowSize, Align(1));
1157     if (Size != AlignedSize) {
1158       const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
1159       IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder),
1160                       IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1161       IRB.CreateStore(
1162           Tag, IRB.CreateConstGEP1_32(Int8Ty, IRB.CreatePointerCast(AI, PtrTy),
1163                                       AlignedSize - 1));
1164     }
1165   }
1166 }
1167 
1168 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1169   if (TargetTriple.getArch() == Triple::x86_64)
1170     return AllocaNo & TagMaskByte;
1171 
1172   // A list of 8-bit numbers that have at most one run of non-zero bits.
1173   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1174   // masks.
1175   // The list does not include the value 255, which is used for UAR.
1176   //
1177   // Because we are more likely to use earlier elements of this list than later
1178   // ones, it is sorted in increasing order of probability of collision with a
1179   // mask allocated (temporally) nearby. The program that generated this list
1180   // can be found at:
1181   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1182   static const unsigned FastMasks[] = {
1183       0,   128, 64, 192, 32,  96,  224, 112, 240, 48, 16,  120,
1184       248, 56,  24, 8,   124, 252, 60,  28,  12,  4,  126, 254,
1185       62,  30,  14, 6,   2,   127, 63,  31,  15,  7,  3,   1};
1186   return FastMasks[AllocaNo % std::size(FastMasks)];
1187 }
1188 
1189 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1190   if (TagMaskByte == 0xFF)
1191     return OldTag; // No need to clear the tag byte.
1192   return IRB.CreateAnd(OldTag,
1193                        ConstantInt::get(OldTag->getType(), TagMaskByte));
1194 }
1195 
1196 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1197   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1198 }
1199 
1200 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1201   if (ClGenerateTagsWithCalls)
1202     return nullptr;
1203   if (StackBaseTag)
1204     return StackBaseTag;
1205   // Extract some entropy from the stack pointer for the tags.
1206   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1207   // between functions).
1208   Value *FramePointerLong = getCachedFP(IRB);
1209   Value *StackTag =
1210       applyTagMask(IRB, IRB.CreateXor(FramePointerLong,
1211                                       IRB.CreateLShr(FramePointerLong, 20)));
1212   StackTag->setName("hwasan.stack.base.tag");
1213   return StackTag;
1214 }
1215 
1216 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1217                                         unsigned AllocaNo) {
1218   if (ClGenerateTagsWithCalls)
1219     return getNextTagWithCall(IRB);
1220   return IRB.CreateXor(
1221       StackTag, ConstantInt::get(StackTag->getType(), retagMask(AllocaNo)));
1222 }
1223 
1224 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB) {
1225   Value *FramePointerLong = getCachedFP(IRB);
1226   Value *UARTag =
1227       applyTagMask(IRB, IRB.CreateLShr(FramePointerLong, PointerTagShift));
1228 
1229   UARTag->setName("hwasan.uar.tag");
1230   return UARTag;
1231 }
1232 
1233 // Add a tag to an address.
1234 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1235                                       Value *PtrLong, Value *Tag) {
1236   assert(!UsePageAliases);
1237   Value *TaggedPtrLong;
1238   if (CompileKernel) {
1239     // Kernel addresses have 0xFF in the most significant byte.
1240     Value *ShiftedTag =
1241         IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1242                      ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1243     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1244   } else {
1245     // Userspace can simply do OR (tag << PointerTagShift);
1246     Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1247     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1248   }
1249   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1250 }
1251 
1252 // Remove tag from an address.
1253 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1254   assert(!UsePageAliases);
1255   Value *UntaggedPtrLong;
1256   if (CompileKernel) {
1257     // Kernel addresses have 0xFF in the most significant byte.
1258     UntaggedPtrLong =
1259         IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1260                                                TagMaskByte << PointerTagShift));
1261   } else {
1262     // Userspace addresses have 0x00.
1263     UntaggedPtrLong = IRB.CreateAnd(
1264         PtrLong, ConstantInt::get(PtrLong->getType(),
1265                                   ~(TagMaskByte << PointerTagShift)));
1266   }
1267   return UntaggedPtrLong;
1268 }
1269 
1270 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB) {
1271   // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1272   // in Bionic's libc/platform/bionic/tls_defines.h.
1273   constexpr int SanitizerSlot = 6;
1274   if (TargetTriple.isAArch64() && TargetTriple.isAndroid())
1275     return memtag::getAndroidSlotPtr(IRB, SanitizerSlot);
1276   return ThreadPtrGlobal;
1277 }
1278 
1279 Value *HWAddressSanitizer::getCachedFP(IRBuilder<> &IRB) {
1280   if (!CachedFP)
1281     CachedFP = memtag::getFP(IRB);
1282   return CachedFP;
1283 }
1284 
1285 Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
1286   // Prepare ring buffer data.
1287   Value *PC = memtag::getPC(TargetTriple, IRB);
1288   Value *FP = getCachedFP(IRB);
1289 
1290   // Mix FP and PC.
1291   // Assumptions:
1292   // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1293   // FP is 0xfffffffffffFFFF0  (4 lower bits are zero)
1294   // We only really need ~20 lower non-zero bits (FFFF), so we mix like this:
1295   //       0xFFFFPPPPPPPPPPPP
1296   //
1297   // FP works because in AArch64FrameLowering::getFrameIndexReference, we
1298   // prefer FP-relative offsets for functions compiled with HWASan.
1299   FP = IRB.CreateShl(FP, 44);
1300   return IRB.CreateOr(PC, FP);
1301 }
1302 
1303 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1304   if (!Mapping.InTls)
1305     ShadowBase = getShadowNonTls(IRB);
1306   else if (!WithFrameRecord && TargetTriple.isAndroid())
1307     ShadowBase = getDynamicShadowIfunc(IRB);
1308 
1309   if (!WithFrameRecord && ShadowBase)
1310     return;
1311 
1312   Value *SlotPtr = nullptr;
1313   Value *ThreadLong = nullptr;
1314   Value *ThreadLongMaybeUntagged = nullptr;
1315 
1316   auto getThreadLongMaybeUntagged = [&]() {
1317     if (!SlotPtr)
1318       SlotPtr = getHwasanThreadSlotPtr(IRB);
1319     if (!ThreadLong)
1320       ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1321     // Extract the address field from ThreadLong. Unnecessary on AArch64 with
1322     // TBI.
1323     return TargetTriple.isAArch64() ? ThreadLong
1324                                     : untagPointer(IRB, ThreadLong);
1325   };
1326 
1327   if (WithFrameRecord) {
1328     switch (ClRecordStackHistory) {
1329     case libcall: {
1330       // Emit a runtime call into hwasan rather than emitting instructions for
1331       // recording stack history.
1332       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1333       IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
1334       break;
1335     }
1336     case instr: {
1337       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1338 
1339       StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1340 
1341       // Store data to ring buffer.
1342       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1343       Value *RecordPtr =
1344           IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IRB.getPtrTy(0));
1345       IRB.CreateStore(FrameRecordInfo, RecordPtr);
1346 
1347       // Update the ring buffer. Top byte of ThreadLong defines the size of the
1348       // buffer in pages, it must be a power of two, and the start of the buffer
1349       // must be aligned by twice that much. Therefore wrap around of the ring
1350       // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1351       // The use of AShr instead of LShr is due to
1352       //   https://bugs.llvm.org/show_bug.cgi?id=39030
1353       // Runtime library makes sure not to use the highest bit.
1354       //
1355       // Mechanical proof of this address calculation can be found at:
1356       // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/prove_hwasanwrap.smt2
1357       //
1358       // Example of the wrap case for N = 1
1359       // Pointer:   0x01AAAAAAAAAAAFF8
1360       //                     +
1361       //            0x0000000000000008
1362       //                     =
1363       //            0x01AAAAAAAAAAB000
1364       //                     &
1365       // WrapMask:  0xFFFFFFFFFFFFF000
1366       //                     =
1367       //            0x01AAAAAAAAAAA000
1368       //
1369       // Then the WrapMask will be a no-op until the next wrap case.
1370       Value *WrapMask = IRB.CreateXor(
1371           IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1372           ConstantInt::get(IntptrTy, (uint64_t)-1));
1373       Value *ThreadLongNew = IRB.CreateAnd(
1374           IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1375       IRB.CreateStore(ThreadLongNew, SlotPtr);
1376       break;
1377     }
1378     case none: {
1379       llvm_unreachable(
1380           "A stack history recording mode should've been selected.");
1381     }
1382     }
1383   }
1384 
1385   if (!ShadowBase) {
1386     if (!ThreadLongMaybeUntagged)
1387       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1388 
1389     // Get shadow base address by aligning RecordPtr up.
1390     // Note: this is not correct if the pointer is already aligned.
1391     // Runtime library will make sure this never happens.
1392     ShadowBase = IRB.CreateAdd(
1393         IRB.CreateOr(
1394             ThreadLongMaybeUntagged,
1395             ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1396         ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1397     ShadowBase = IRB.CreateIntToPtr(ShadowBase, PtrTy);
1398   }
1399 }
1400 
1401 bool HWAddressSanitizer::instrumentLandingPads(
1402     SmallVectorImpl<Instruction *> &LandingPadVec) {
1403   for (auto *LP : LandingPadVec) {
1404     IRBuilder<> IRB(LP->getNextNonDebugInstruction());
1405     IRB.CreateCall(
1406         HwasanHandleVfork,
1407         {memtag::readRegister(
1408             IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp" : "sp")});
1409   }
1410   return true;
1411 }
1412 
1413 bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
1414                                          Value *StackTag, Value *UARTag,
1415                                          const DominatorTree &DT,
1416                                          const PostDominatorTree &PDT,
1417                                          const LoopInfo &LI) {
1418   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1419   // alloca addresses using that. Unfortunately, offsets are not known yet
1420   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1421   // temp, shift-OR it into each alloca address and xor with the retag mask.
1422   // This generates one extra instruction per alloca use.
1423   unsigned int I = 0;
1424 
1425   for (auto &KV : SInfo.AllocasToInstrument) {
1426     auto N = I++;
1427     auto *AI = KV.first;
1428     memtag::AllocaInfo &Info = KV.second;
1429     IRBuilder<> IRB(AI->getNextNonDebugInstruction());
1430 
1431     // Replace uses of the alloca with tagged address.
1432     Value *Tag = getAllocaTag(IRB, StackTag, N);
1433     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1434     Value *AINoTagLong = untagPointer(IRB, AILong);
1435     Value *Replacement = tagPointer(IRB, AI->getType(), AINoTagLong, Tag);
1436     std::string Name =
1437         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1438     Replacement->setName(Name + ".hwasan");
1439 
1440     size_t Size = memtag::getAllocaSizeInBytes(*AI);
1441     size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1442 
1443     Value *AICast = IRB.CreatePointerCast(AI, PtrTy);
1444 
1445     auto HandleLifetime = [&](IntrinsicInst *II) {
1446       // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1447       // set of assumptions we need to make about the lifetime. Without this we
1448       // would need to ensure that we can track the lifetime pointer to a
1449       // constant offset from the alloca, and would still need to change the
1450       // size to include the extra alignment we use for the untagging to make
1451       // the size consistent.
1452       //
1453       // The check for standard lifetime below makes sure that we have exactly
1454       // one set of start / end in any execution (i.e. the ends are not
1455       // reachable from each other), so this will not cause any problems.
1456       II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
1457       II->setArgOperand(1, AICast);
1458     };
1459     llvm::for_each(Info.LifetimeStart, HandleLifetime);
1460     llvm::for_each(Info.LifetimeEnd, HandleLifetime);
1461 
1462     AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
1463       auto *User = U.getUser();
1464       return User != AILong && User != AICast &&
1465              !memtag::isLifetimeIntrinsic(User);
1466     });
1467 
1468     memtag::annotateDebugRecords(Info, retagMask(N));
1469 
1470     auto TagEnd = [&](Instruction *Node) {
1471       IRB.SetInsertPoint(Node);
1472       // When untagging, use the `AlignedSize` because we need to set the tags
1473       // for the entire alloca to original. If we used `Size` here, we would
1474       // keep the last granule tagged, and store zero in the last byte of the
1475       // last granule, due to how short granules are implemented.
1476       tagAlloca(IRB, AI, UARTag, AlignedSize);
1477     };
1478     // Calls to functions that may return twice (e.g. setjmp) confuse the
1479     // postdominator analysis, and will leave us to keep memory tagged after
1480     // function return. Work around this by always untagging at every return
1481     // statement if return_twice functions are called.
1482     bool StandardLifetime =
1483         !SInfo.CallsReturnTwice &&
1484         SInfo.UnrecognizedLifetimes.empty() &&
1485         memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT,
1486                                    &LI, ClMaxLifetimes);
1487     if (DetectUseAfterScope && StandardLifetime) {
1488       IntrinsicInst *Start = Info.LifetimeStart[0];
1489       IRB.SetInsertPoint(Start->getNextNode());
1490       tagAlloca(IRB, AI, Tag, Size);
1491       if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd,
1492                                         SInfo.RetVec, TagEnd)) {
1493         for (auto *End : Info.LifetimeEnd)
1494           End->eraseFromParent();
1495       }
1496     } else {
1497       tagAlloca(IRB, AI, Tag, Size);
1498       for (auto *RI : SInfo.RetVec)
1499         TagEnd(RI);
1500       // We inserted tagging outside of the lifetimes, so we have to remove
1501       // them.
1502       for (auto &II : Info.LifetimeStart)
1503         II->eraseFromParent();
1504       for (auto &II : Info.LifetimeEnd)
1505         II->eraseFromParent();
1506     }
1507     memtag::alignAndPadAlloca(Info, Mapping.getObjectAlignment());
1508   }
1509   for (auto &I : SInfo.UnrecognizedLifetimes)
1510     I->eraseFromParent();
1511   return true;
1512 }
1513 
1514 static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE,
1515                        bool Skip) {
1516   if (Skip) {
1517     ORE.emit([&]() {
1518       return OptimizationRemark(DEBUG_TYPE, "Skip", &F)
1519              << "Skipped: F=" << ore::NV("Function", &F);
1520     });
1521   } else {
1522     ORE.emit([&]() {
1523       return OptimizationRemarkMissed(DEBUG_TYPE, "Sanitize", &F)
1524              << "Sanitized: F=" << ore::NV("Function", &F);
1525     });
1526   }
1527 }
1528 
1529 bool HWAddressSanitizer::selectiveInstrumentationShouldSkip(
1530     Function &F, FunctionAnalysisManager &FAM) const {
1531   bool Skip = [&]() {
1532     if (ClRandomSkipRate.getNumOccurrences()) {
1533       std::bernoulli_distribution D(ClRandomSkipRate);
1534       return !D(*Rng);
1535     }
1536     if (!ClHotPercentileCutoff.getNumOccurrences())
1537       return false;
1538     auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1539     ProfileSummaryInfo *PSI =
1540         MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1541     if (!PSI || !PSI->hasProfileSummary()) {
1542       ++NumNoProfileSummaryFuncs;
1543       return false;
1544     }
1545     return PSI->isFunctionHotInCallGraphNthPercentile(
1546         ClHotPercentileCutoff, &F, FAM.getResult<BlockFrequencyAnalysis>(F));
1547   }();
1548   emitRemark(F, FAM.getResult<OptimizationRemarkEmitterAnalysis>(F), Skip);
1549   return Skip;
1550 }
1551 
1552 void HWAddressSanitizer::sanitizeFunction(Function &F,
1553                                           FunctionAnalysisManager &FAM) {
1554   if (&F == HwasanCtorFunction)
1555     return;
1556 
1557   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1558     return;
1559 
1560   if (F.empty())
1561     return;
1562 
1563   NumTotalFuncs++;
1564 
1565   OptimizationRemarkEmitter &ORE =
1566       FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1567 
1568   if (selectiveInstrumentationShouldSkip(F, FAM))
1569     return;
1570 
1571   NumInstrumentedFuncs++;
1572 
1573   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1574 
1575   SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1576   SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1577   SmallVector<Instruction *, 8> LandingPadVec;
1578   const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1579 
1580   memtag::StackInfoBuilder SIB(SSI);
1581   for (auto &Inst : instructions(F)) {
1582     if (InstrumentStack) {
1583       SIB.visit(Inst);
1584     }
1585 
1586     if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1587       LandingPadVec.push_back(&Inst);
1588 
1589     getInterestingMemoryOperands(ORE, &Inst, TLI, OperandsToInstrument);
1590 
1591     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1592       if (!ignoreMemIntrinsic(ORE, MI))
1593         IntrinToInstrument.push_back(MI);
1594   }
1595 
1596   memtag::StackInfo &SInfo = SIB.get();
1597 
1598   initializeCallbacks(*F.getParent());
1599 
1600   if (!LandingPadVec.empty())
1601     instrumentLandingPads(LandingPadVec);
1602 
1603   if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1604       F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1605     // __hwasan_personality_thunk is a no-op for functions without an
1606     // instrumented stack, so we can drop it.
1607     F.setPersonalityFn(nullptr);
1608   }
1609 
1610   if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1611       IntrinToInstrument.empty())
1612     return;
1613 
1614   assert(!ShadowBase);
1615 
1616   // Remove memory attributes that are about to become invalid.
1617   // HWASan checks read from shadow, which invalidates memory(argmem: *)
1618   // Short granule checks on function arguments read from the argument memory
1619   // (last byte of the granule), which invalidates writeonly.
1620   F.removeFnAttr(llvm::Attribute::Memory);
1621   for (auto &A : F.args())
1622     A.removeAttr(llvm::Attribute::WriteOnly);
1623 
1624   BasicBlock::iterator InsertPt = F.getEntryBlock().begin();
1625   IRBuilder<> EntryIRB(&F.getEntryBlock(), InsertPt);
1626   emitPrologue(EntryIRB,
1627                /*WithFrameRecord*/ ClRecordStackHistory != none &&
1628                    Mapping.WithFrameRecord &&
1629                    !SInfo.AllocasToInstrument.empty());
1630 
1631   if (!SInfo.AllocasToInstrument.empty()) {
1632     const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1633     const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
1634     const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
1635     Value *StackTag = getStackBaseTag(EntryIRB);
1636     Value *UARTag = getUARTag(EntryIRB);
1637     instrumentStack(SInfo, StackTag, UARTag, DT, PDT, LI);
1638   }
1639 
1640   // If we split the entry block, move any allocas that were originally in the
1641   // entry block back into the entry block so that they aren't treated as
1642   // dynamic allocas.
1643   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1644     InsertPt = F.getEntryBlock().begin();
1645     for (Instruction &I :
1646          llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1647       if (auto *AI = dyn_cast<AllocaInst>(&I))
1648         if (isa<ConstantInt>(AI->getArraySize()))
1649           I.moveBefore(F.getEntryBlock(), InsertPt);
1650     }
1651   }
1652 
1653   DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
1654   PostDominatorTree *PDT = FAM.getCachedResult<PostDominatorTreeAnalysis>(F);
1655   LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(F);
1656   DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
1657   for (auto &Operand : OperandsToInstrument)
1658     instrumentMemAccess(Operand, DTU, LI);
1659   DTU.flush();
1660 
1661   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1662     for (auto *Inst : IntrinToInstrument)
1663       instrumentMemIntrinsic(Inst);
1664   }
1665 
1666   ShadowBase = nullptr;
1667   StackBaseTag = nullptr;
1668   CachedFP = nullptr;
1669 }
1670 
1671 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1672   assert(!UsePageAliases);
1673   Constant *Initializer = GV->getInitializer();
1674   uint64_t SizeInBytes =
1675       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1676   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1677   if (SizeInBytes != NewSize) {
1678     // Pad the initializer out to the next multiple of 16 bytes and add the
1679     // required short granule tag.
1680     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1681     Init.back() = Tag;
1682     Constant *Padding = ConstantDataArray::get(*C, Init);
1683     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1684   }
1685 
1686   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1687                                    GlobalValue::ExternalLinkage, Initializer,
1688                                    GV->getName() + ".hwasan");
1689   NewGV->copyAttributesFrom(GV);
1690   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1691   NewGV->copyMetadata(GV, 0);
1692   NewGV->setAlignment(
1693       std::max(GV->getAlign().valueOrOne(), Mapping.getObjectAlignment()));
1694 
1695   // It is invalid to ICF two globals that have different tags. In the case
1696   // where the size of the global is a multiple of the tag granularity the
1697   // contents of the globals may be the same but the tags (i.e. symbol values)
1698   // may be different, and the symbols are not considered during ICF. In the
1699   // case where the size is not a multiple of the granularity, the short granule
1700   // tags would discriminate two globals with different tags, but there would
1701   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1702   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1703   // granule tag in the last byte.
1704   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1705 
1706   // Descriptor format (assuming little-endian):
1707   // bytes 0-3: relative address of global
1708   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1709   // it isn't, we create multiple descriptors)
1710   // byte 7: tag
1711   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1712   const uint64_t MaxDescriptorSize = 0xfffff0;
1713   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1714        DescriptorPos += MaxDescriptorSize) {
1715     auto *Descriptor =
1716         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1717                            nullptr, GV->getName() + ".hwasan.descriptor");
1718     auto *GVRelPtr = ConstantExpr::getTrunc(
1719         ConstantExpr::getAdd(
1720             ConstantExpr::getSub(
1721                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1722                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1723             ConstantInt::get(Int64Ty, DescriptorPos)),
1724         Int32Ty);
1725     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1726     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1727     Descriptor->setComdat(NewGV->getComdat());
1728     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1729     Descriptor->setSection("hwasan_globals");
1730     Descriptor->setMetadata(LLVMContext::MD_associated,
1731                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1732     appendToCompilerUsed(M, Descriptor);
1733   }
1734 
1735   Constant *Aliasee = ConstantExpr::getIntToPtr(
1736       ConstantExpr::getAdd(
1737           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1738           ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1739       GV->getType());
1740   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1741                                     GV->getLinkage(), "", Aliasee, &M);
1742   Alias->setVisibility(GV->getVisibility());
1743   Alias->takeName(GV);
1744   GV->replaceAllUsesWith(Alias);
1745   GV->eraseFromParent();
1746 }
1747 
1748 void HWAddressSanitizer::instrumentGlobals() {
1749   std::vector<GlobalVariable *> Globals;
1750   for (GlobalVariable &GV : M.globals()) {
1751     if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
1752       continue;
1753 
1754     if (GV.isDeclarationForLinker() || GV.getName().starts_with("llvm.") ||
1755         GV.isThreadLocal())
1756       continue;
1757 
1758     // Common symbols can't have aliases point to them, so they can't be tagged.
1759     if (GV.hasCommonLinkage())
1760       continue;
1761 
1762     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1763     // which would be broken both by adding tags and potentially by the extra
1764     // padding/alignment that we insert.
1765     if (GV.hasSection())
1766       continue;
1767 
1768     Globals.push_back(&GV);
1769   }
1770 
1771   MD5 Hasher;
1772   Hasher.update(M.getSourceFileName());
1773   MD5::MD5Result Hash;
1774   Hasher.final(Hash);
1775   uint8_t Tag = Hash[0];
1776 
1777   assert(TagMaskByte >= 16);
1778 
1779   for (GlobalVariable *GV : Globals) {
1780     // Don't allow globals to be tagged with something that looks like a
1781     // short-granule tag, otherwise we lose inter-granule overflow detection, as
1782     // the fast path shadow-vs-address check succeeds.
1783     if (Tag < 16 || Tag > TagMaskByte)
1784       Tag = 16;
1785     instrumentGlobal(GV, Tag++);
1786   }
1787 }
1788 
1789 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1790   // We need to untag stack frames as we unwind past them. That is the job of
1791   // the personality function wrapper, which either wraps an existing
1792   // personality function or acts as a personality function on its own. Each
1793   // function that has a personality function or that can be unwound past has
1794   // its personality function changed to a thunk that calls the personality
1795   // function wrapper in the runtime.
1796   MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1797   for (Function &F : M) {
1798     if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1799       continue;
1800 
1801     if (F.hasPersonalityFn()) {
1802       PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1803     } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1804       PersonalityFns[nullptr].push_back(&F);
1805     }
1806   }
1807 
1808   if (PersonalityFns.empty())
1809     return;
1810 
1811   FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1812       "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, PtrTy,
1813       PtrTy, PtrTy, PtrTy, PtrTy);
1814   FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1815   FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1816 
1817   for (auto &P : PersonalityFns) {
1818     std::string ThunkName = kHwasanPersonalityThunkName;
1819     if (P.first)
1820       ThunkName += ("." + P.first->getName()).str();
1821     FunctionType *ThunkFnTy = FunctionType::get(
1822         Int32Ty, {Int32Ty, Int32Ty, Int64Ty, PtrTy, PtrTy}, false);
1823     bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1824                                cast<GlobalValue>(P.first)->hasLocalLinkage());
1825     auto *ThunkFn = Function::Create(ThunkFnTy,
1826                                      IsLocal ? GlobalValue::InternalLinkage
1827                                              : GlobalValue::LinkOnceODRLinkage,
1828                                      ThunkName, &M);
1829     if (!IsLocal) {
1830       ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1831       ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1832     }
1833 
1834     auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1835     IRBuilder<> IRB(BB);
1836     CallInst *WrapperCall = IRB.CreateCall(
1837         HwasanPersonalityWrapper,
1838         {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1839          ThunkFn->getArg(3), ThunkFn->getArg(4),
1840          P.first ? P.first : Constant::getNullValue(PtrTy),
1841          UnwindGetGR.getCallee(), UnwindGetCFA.getCallee()});
1842     WrapperCall->setTailCall();
1843     IRB.CreateRet(WrapperCall);
1844 
1845     for (Function *F : P.second)
1846       F->setPersonalityFn(ThunkFn);
1847   }
1848 }
1849 
1850 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1851                                              bool InstrumentWithCalls) {
1852   Scale = kDefaultShadowScale;
1853   if (TargetTriple.isOSFuchsia()) {
1854     // Fuchsia is always PIE, which means that the beginning of the address
1855     // space is always available.
1856     InGlobal = false;
1857     InTls = false;
1858     Offset = 0;
1859     WithFrameRecord = true;
1860   } else if (ClMappingOffset.getNumOccurrences() > 0) {
1861     InGlobal = false;
1862     InTls = false;
1863     Offset = ClMappingOffset;
1864     WithFrameRecord = false;
1865   } else if (ClEnableKhwasan || InstrumentWithCalls) {
1866     InGlobal = false;
1867     InTls = false;
1868     Offset = 0;
1869     WithFrameRecord = false;
1870   } else if (ClWithIfunc) {
1871     InGlobal = true;
1872     InTls = false;
1873     Offset = kDynamicShadowSentinel;
1874     WithFrameRecord = false;
1875   } else if (ClWithTls) {
1876     InGlobal = false;
1877     InTls = true;
1878     Offset = kDynamicShadowSentinel;
1879     WithFrameRecord = true;
1880   } else {
1881     InGlobal = false;
1882     InTls = false;
1883     Offset = kDynamicShadowSentinel;
1884     WithFrameRecord = false;
1885   }
1886 }
1887