xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (revision 5b56413d04e608379c9a306373554a8e4d321bc0)
1 //===- HWAddressSanitizer.cpp - memory access error detector --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Analysis/DomTreeUpdater.h"
21 #include "llvm/Analysis/GlobalsModRef.h"
22 #include "llvm/Analysis/PostDominators.h"
23 #include "llvm/Analysis/StackSafetyAnalysis.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/BinaryFormat/Dwarf.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/IR/Attributes.h"
29 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/IR/Constant.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DataLayout.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/IRBuilder.h"
38 #include "llvm/IR/InlineAsm.h"
39 #include "llvm/IR/InstIterator.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/LLVMContext.h"
45 #include "llvm/IR/MDBuilder.h"
46 #include "llvm/IR/Module.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/TargetParser/Triple.h"
54 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
55 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
56 #include "llvm/Transforms/Utils/Local.h"
57 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
58 #include "llvm/Transforms/Utils/ModuleUtils.h"
59 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
60 #include <optional>
61 
62 using namespace llvm;
63 
64 #define DEBUG_TYPE "hwasan"
65 
66 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
67 const char kHwasanNoteName[] = "hwasan.note";
68 const char kHwasanInitName[] = "__hwasan_init";
69 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
70 
71 const char kHwasanShadowMemoryDynamicAddress[] =
72     "__hwasan_shadow_memory_dynamic_address";
73 
74 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
75 static const size_t kNumberOfAccessSizes = 5;
76 
77 static const size_t kDefaultShadowScale = 4;
78 static const uint64_t kDynamicShadowSentinel =
79     std::numeric_limits<uint64_t>::max();
80 
81 static const unsigned kShadowBaseAlignment = 32;
82 
83 static cl::opt<std::string>
84     ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
85                                  cl::desc("Prefix for memory access callbacks"),
86                                  cl::Hidden, cl::init("__hwasan_"));
87 
88 static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
89     "hwasan-kernel-mem-intrinsic-prefix",
90     cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
91     cl::init(false));
92 
93 static cl::opt<bool> ClInstrumentWithCalls(
94     "hwasan-instrument-with-calls",
95     cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
96     cl::init(false));
97 
98 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
99                                        cl::desc("instrument read instructions"),
100                                        cl::Hidden, cl::init(true));
101 
102 static cl::opt<bool>
103     ClInstrumentWrites("hwasan-instrument-writes",
104                        cl::desc("instrument write instructions"), cl::Hidden,
105                        cl::init(true));
106 
107 static cl::opt<bool> ClInstrumentAtomics(
108     "hwasan-instrument-atomics",
109     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
110     cl::init(true));
111 
112 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
113                                        cl::desc("instrument byval arguments"),
114                                        cl::Hidden, cl::init(true));
115 
116 static cl::opt<bool>
117     ClRecover("hwasan-recover",
118               cl::desc("Enable recovery mode (continue-after-error)."),
119               cl::Hidden, cl::init(false));
120 
121 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
122                                        cl::desc("instrument stack (allocas)"),
123                                        cl::Hidden, cl::init(true));
124 
125 static cl::opt<bool>
126     ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
127                      cl::Hidden, cl::desc("Use Stack Safety analysis results"),
128                      cl::Optional);
129 
130 static cl::opt<size_t> ClMaxLifetimes(
131     "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
132     cl::ReallyHidden,
133     cl::desc("How many lifetime ends to handle for a single alloca."),
134     cl::Optional);
135 
136 static cl::opt<bool>
137     ClUseAfterScope("hwasan-use-after-scope",
138                     cl::desc("detect use after scope within function"),
139                     cl::Hidden, cl::init(true));
140 
141 static cl::opt<bool> ClGenerateTagsWithCalls(
142     "hwasan-generate-tags-with-calls",
143     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
144     cl::init(false));
145 
146 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
147                                cl::Hidden, cl::init(false));
148 
149 static cl::opt<int> ClMatchAllTag(
150     "hwasan-match-all-tag",
151     cl::desc("don't report bad accesses via pointers with this tag"),
152     cl::Hidden, cl::init(-1));
153 
154 static cl::opt<bool>
155     ClEnableKhwasan("hwasan-kernel",
156                     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
157                     cl::Hidden, cl::init(false));
158 
159 // These flags allow to change the shadow mapping and control how shadow memory
160 // is accessed. The shadow mapping looks like:
161 //    Shadow = (Mem >> scale) + offset
162 
163 static cl::opt<uint64_t>
164     ClMappingOffset("hwasan-mapping-offset",
165                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
166                     cl::Hidden, cl::init(0));
167 
168 static cl::opt<bool>
169     ClWithIfunc("hwasan-with-ifunc",
170                 cl::desc("Access dynamic shadow through an ifunc global on "
171                          "platforms that support this"),
172                 cl::Hidden, cl::init(false));
173 
174 static cl::opt<bool> ClWithTls(
175     "hwasan-with-tls",
176     cl::desc("Access dynamic shadow through an thread-local pointer on "
177              "platforms that support this"),
178     cl::Hidden, cl::init(true));
179 
180 // Mode for selecting how to insert frame record info into the stack ring
181 // buffer.
182 enum RecordStackHistoryMode {
183   // Do not record frame record info.
184   none,
185 
186   // Insert instructions into the prologue for storing into the stack ring
187   // buffer directly.
188   instr,
189 
190   // Add a call to __hwasan_add_frame_record in the runtime.
191   libcall,
192 };
193 
194 static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
195     "hwasan-record-stack-history",
196     cl::desc("Record stack frames with tagged allocations in a thread-local "
197              "ring buffer"),
198     cl::values(clEnumVal(none, "Do not record stack ring history"),
199                clEnumVal(instr, "Insert instructions into the prologue for "
200                                 "storing into the stack ring buffer directly"),
201                clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
202                                   "storing into the stack ring buffer")),
203     cl::Hidden, cl::init(instr));
204 
205 static cl::opt<bool>
206     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
207                               cl::desc("instrument memory intrinsics"),
208                               cl::Hidden, cl::init(true));
209 
210 static cl::opt<bool>
211     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
212                             cl::desc("instrument landing pads"), cl::Hidden,
213                             cl::init(false));
214 
215 static cl::opt<bool> ClUseShortGranules(
216     "hwasan-use-short-granules",
217     cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
218     cl::init(false));
219 
220 static cl::opt<bool> ClInstrumentPersonalityFunctions(
221     "hwasan-instrument-personality-functions",
222     cl::desc("instrument personality functions"), cl::Hidden);
223 
224 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
225                                        cl::desc("inline all checks"),
226                                        cl::Hidden, cl::init(false));
227 
228 static cl::opt<bool> ClInlineFastPathChecks("hwasan-inline-fast-path-checks",
229                                             cl::desc("inline all checks"),
230                                             cl::Hidden, cl::init(false));
231 
232 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
233 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
234                                       cl::desc("Use page aliasing in HWASan"),
235                                       cl::Hidden, cl::init(false));
236 
237 namespace {
238 
239 bool shouldUsePageAliases(const Triple &TargetTriple) {
240   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
241 }
242 
243 bool shouldInstrumentStack(const Triple &TargetTriple) {
244   return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
245 }
246 
247 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
248   return ClInstrumentWithCalls.getNumOccurrences()
249              ? ClInstrumentWithCalls
250              : TargetTriple.getArch() == Triple::x86_64;
251 }
252 
253 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
254   return ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
255                                               : !DisableOptimization;
256 }
257 
258 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
259                                   bool DisableOptimization) {
260   return shouldInstrumentStack(TargetTriple) &&
261          mightUseStackSafetyAnalysis(DisableOptimization);
262 }
263 
264 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
265   return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
266 }
267 
268 /// An instrumentation pass implementing detection of addressability bugs
269 /// using tagged pointers.
270 class HWAddressSanitizer {
271 public:
272   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
273                      const StackSafetyGlobalInfo *SSI)
274       : M(M), SSI(SSI) {
275     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
276     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0
277                               ? ClEnableKhwasan
278                               : CompileKernel;
279 
280     initializeModule();
281   }
282 
283   void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
284 
285 private:
286   struct ShadowTagCheckInfo {
287     Instruction *TagMismatchTerm = nullptr;
288     Value *PtrLong = nullptr;
289     Value *AddrLong = nullptr;
290     Value *PtrTag = nullptr;
291     Value *MemTag = nullptr;
292   };
293   void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
294 
295   void initializeModule();
296   void createHwasanCtorComdat();
297 
298   void initializeCallbacks(Module &M);
299 
300   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
301 
302   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
303   Value *getShadowNonTls(IRBuilder<> &IRB);
304 
305   void untagPointerOperand(Instruction *I, Value *Addr);
306   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
307 
308   int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
309   ShadowTagCheckInfo insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
310                                           DomTreeUpdater &DTU, LoopInfo *LI);
311   void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
312                                   unsigned AccessSizeIndex,
313                                   Instruction *InsertBefore,
314                                   DomTreeUpdater &DTU, LoopInfo *LI);
315   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
316                                  unsigned AccessSizeIndex,
317                                  Instruction *InsertBefore, DomTreeUpdater &DTU,
318                                  LoopInfo *LI);
319   bool ignoreMemIntrinsic(MemIntrinsic *MI);
320   void instrumentMemIntrinsic(MemIntrinsic *MI);
321   bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
322                            LoopInfo *LI);
323   bool ignoreAccess(Instruction *Inst, Value *Ptr);
324   void getInterestingMemoryOperands(
325       Instruction *I, const TargetLibraryInfo &TLI,
326       SmallVectorImpl<InterestingMemoryOperand> &Interesting);
327 
328   void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
329   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
330   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
331   bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
332                        const DominatorTree &DT, const PostDominatorTree &PDT,
333                        const LoopInfo &LI);
334   Value *readRegister(IRBuilder<> &IRB, StringRef Name);
335   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
336   Value *getNextTagWithCall(IRBuilder<> &IRB);
337   Value *getStackBaseTag(IRBuilder<> &IRB);
338   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo);
339   Value *getUARTag(IRBuilder<> &IRB);
340 
341   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
342   Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
343   unsigned retagMask(unsigned AllocaNo);
344 
345   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
346 
347   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
348   void instrumentGlobals();
349 
350   Value *getPC(IRBuilder<> &IRB);
351   Value *getSP(IRBuilder<> &IRB);
352   Value *getFrameRecordInfo(IRBuilder<> &IRB);
353 
354   void instrumentPersonalityFunctions();
355 
356   LLVMContext *C;
357   Module &M;
358   const StackSafetyGlobalInfo *SSI;
359   Triple TargetTriple;
360 
361   /// This struct defines the shadow mapping using the rule:
362   ///   shadow = (mem >> Scale) + Offset.
363   /// If InGlobal is true, then
364   ///   extern char __hwasan_shadow[];
365   ///   shadow = (mem >> Scale) + &__hwasan_shadow
366   /// If InTls is true, then
367   ///   extern char *__hwasan_tls;
368   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
369   ///
370   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
371   /// ring buffer for storing stack allocations on targets that support it.
372   struct ShadowMapping {
373     uint8_t Scale;
374     uint64_t Offset;
375     bool InGlobal;
376     bool InTls;
377     bool WithFrameRecord;
378 
379     void init(Triple &TargetTriple, bool InstrumentWithCalls);
380     Align getObjectAlignment() const { return Align(1ULL << Scale); }
381   };
382 
383   ShadowMapping Mapping;
384 
385   Type *VoidTy = Type::getVoidTy(M.getContext());
386   Type *IntptrTy;
387   PointerType *PtrTy;
388   Type *Int8Ty;
389   Type *Int32Ty;
390   Type *Int64Ty = Type::getInt64Ty(M.getContext());
391 
392   bool CompileKernel;
393   bool Recover;
394   bool OutlinedChecks;
395   bool InlineFastPath;
396   bool UseShortGranules;
397   bool InstrumentLandingPads;
398   bool InstrumentWithCalls;
399   bool InstrumentStack;
400   bool DetectUseAfterScope;
401   bool UsePageAliases;
402   bool UseMatchAllCallback;
403 
404   std::optional<uint8_t> MatchAllTag;
405 
406   unsigned PointerTagShift;
407   uint64_t TagMaskByte;
408 
409   Function *HwasanCtorFunction;
410 
411   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
412   FunctionCallee HwasanMemoryAccessCallbackSized[2];
413 
414   FunctionCallee HwasanMemmove, HwasanMemcpy, HwasanMemset;
415   FunctionCallee HwasanHandleVfork;
416 
417   FunctionCallee HwasanTagMemoryFunc;
418   FunctionCallee HwasanGenerateTagFunc;
419   FunctionCallee HwasanRecordFrameRecordFunc;
420 
421   Constant *ShadowGlobal;
422 
423   Value *ShadowBase = nullptr;
424   Value *StackBaseTag = nullptr;
425   Value *CachedSP = nullptr;
426   GlobalValue *ThreadPtrGlobal = nullptr;
427 };
428 
429 } // end anonymous namespace
430 
431 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
432                                               ModuleAnalysisManager &MAM) {
433   const StackSafetyGlobalInfo *SSI = nullptr;
434   auto TargetTriple = llvm::Triple(M.getTargetTriple());
435   if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
436     SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
437 
438   HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
439   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
440   for (Function &F : M)
441     HWASan.sanitizeFunction(F, FAM);
442 
443   PreservedAnalyses PA = PreservedAnalyses::none();
444   // DominatorTreeAnalysis, PostDominatorTreeAnalysis, and LoopAnalysis
445   // are incrementally updated throughout this pass whenever
446   // SplitBlockAndInsertIfThen is called.
447   PA.preserve<DominatorTreeAnalysis>();
448   PA.preserve<PostDominatorTreeAnalysis>();
449   PA.preserve<LoopAnalysis>();
450   // GlobalsAA is considered stateless and does not get invalidated unless
451   // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
452   // make changes that require GlobalsAA to be invalidated.
453   PA.abandon<GlobalsAA>();
454   return PA;
455 }
456 void HWAddressSanitizerPass::printPipeline(
457     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
458   static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
459       OS, MapClassName2PassName);
460   OS << '<';
461   if (Options.CompileKernel)
462     OS << "kernel;";
463   if (Options.Recover)
464     OS << "recover";
465   OS << '>';
466 }
467 
468 void HWAddressSanitizer::createHwasanCtorComdat() {
469   std::tie(HwasanCtorFunction, std::ignore) =
470       getOrCreateSanitizerCtorAndInitFunctions(
471           M, kHwasanModuleCtorName, kHwasanInitName,
472           /*InitArgTypes=*/{},
473           /*InitArgs=*/{},
474           // This callback is invoked when the functions are created the first
475           // time. Hook them into the global ctors list in that case:
476           [&](Function *Ctor, FunctionCallee) {
477             Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
478             Ctor->setComdat(CtorComdat);
479             appendToGlobalCtors(M, Ctor, 0, Ctor);
480           });
481 
482   // Create a note that contains pointers to the list of global
483   // descriptors. Adding a note to the output file will cause the linker to
484   // create a PT_NOTE program header pointing to the note that we can use to
485   // find the descriptor list starting from the program headers. A function
486   // provided by the runtime initializes the shadow memory for the globals by
487   // accessing the descriptor list via the note. The dynamic loader needs to
488   // call this function whenever a library is loaded.
489   //
490   // The reason why we use a note for this instead of a more conventional
491   // approach of having a global constructor pass a descriptor list pointer to
492   // the runtime is because of an order of initialization problem. With
493   // constructors we can encounter the following problematic scenario:
494   //
495   // 1) library A depends on library B and also interposes one of B's symbols
496   // 2) B's constructors are called before A's (as required for correctness)
497   // 3) during construction, B accesses one of its "own" globals (actually
498   //    interposed by A) and triggers a HWASAN failure due to the initialization
499   //    for A not having happened yet
500   //
501   // Even without interposition it is possible to run into similar situations in
502   // cases where two libraries mutually depend on each other.
503   //
504   // We only need one note per binary, so put everything for the note in a
505   // comdat. This needs to be a comdat with an .init_array section to prevent
506   // newer versions of lld from discarding the note.
507   //
508   // Create the note even if we aren't instrumenting globals. This ensures that
509   // binaries linked from object files with both instrumented and
510   // non-instrumented globals will end up with a note, even if a comdat from an
511   // object file with non-instrumented globals is selected. The note is harmless
512   // if the runtime doesn't support it, since it will just be ignored.
513   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
514 
515   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
516   auto *Start =
517       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
518                          nullptr, "__start_hwasan_globals");
519   Start->setVisibility(GlobalValue::HiddenVisibility);
520   auto *Stop =
521       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
522                          nullptr, "__stop_hwasan_globals");
523   Stop->setVisibility(GlobalValue::HiddenVisibility);
524 
525   // Null-terminated so actually 8 bytes, which are required in order to align
526   // the note properly.
527   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
528 
529   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
530                                  Int32Ty, Int32Ty);
531   auto *Note =
532       new GlobalVariable(M, NoteTy, /*isConstant=*/true,
533                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
534   Note->setSection(".note.hwasan.globals");
535   Note->setComdat(NoteComdat);
536   Note->setAlignment(Align(4));
537 
538   // The pointers in the note need to be relative so that the note ends up being
539   // placed in rodata, which is the standard location for notes.
540   auto CreateRelPtr = [&](Constant *Ptr) {
541     return ConstantExpr::getTrunc(
542         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
543                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
544         Int32Ty);
545   };
546   Note->setInitializer(ConstantStruct::getAnon(
547       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
548        ConstantInt::get(Int32Ty, 8),                           // n_descsz
549        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
550        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
551   appendToCompilerUsed(M, Note);
552 
553   // Create a zero-length global in hwasan_globals so that the linker will
554   // always create start and stop symbols.
555   auto *Dummy = new GlobalVariable(
556       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
557       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
558   Dummy->setSection("hwasan_globals");
559   Dummy->setComdat(NoteComdat);
560   Dummy->setMetadata(LLVMContext::MD_associated,
561                      MDNode::get(*C, ValueAsMetadata::get(Note)));
562   appendToCompilerUsed(M, Dummy);
563 }
564 
565 /// Module-level initialization.
566 ///
567 /// inserts a call to __hwasan_init to the module's constructor list.
568 void HWAddressSanitizer::initializeModule() {
569   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
570   auto &DL = M.getDataLayout();
571 
572   TargetTriple = Triple(M.getTargetTriple());
573 
574   // x86_64 currently has two modes:
575   // - Intel LAM (default)
576   // - pointer aliasing (heap only)
577   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
578   UsePageAliases = shouldUsePageAliases(TargetTriple);
579   InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
580   InstrumentStack = shouldInstrumentStack(TargetTriple);
581   DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
582   PointerTagShift = IsX86_64 ? 57 : 56;
583   TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
584 
585   Mapping.init(TargetTriple, InstrumentWithCalls);
586 
587   C = &(M.getContext());
588   IRBuilder<> IRB(*C);
589   IntptrTy = IRB.getIntPtrTy(DL);
590   PtrTy = IRB.getPtrTy();
591   Int8Ty = IRB.getInt8Ty();
592   Int32Ty = IRB.getInt32Ty();
593 
594   HwasanCtorFunction = nullptr;
595 
596   // Older versions of Android do not have the required runtime support for
597   // short granules, global or personality function instrumentation. On other
598   // platforms we currently require using the latest version of the runtime.
599   bool NewRuntime =
600       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
601 
602   UseShortGranules =
603       ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
604   OutlinedChecks =
605       (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
606       TargetTriple.isOSBinFormatELF() &&
607       (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
608 
609   InlineFastPath =
610       (ClInlineFastPathChecks.getNumOccurrences()
611            ? ClInlineFastPathChecks
612            : !(TargetTriple.isAndroid() ||
613                TargetTriple.isOSFuchsia())); // These platforms may prefer less
614                                              // inlining to reduce binary size.
615 
616   if (ClMatchAllTag.getNumOccurrences()) {
617     if (ClMatchAllTag != -1) {
618       MatchAllTag = ClMatchAllTag & 0xFF;
619     }
620   } else if (CompileKernel) {
621     MatchAllTag = 0xFF;
622   }
623   UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value();
624 
625   // If we don't have personality function support, fall back to landing pads.
626   InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
627                               ? ClInstrumentLandingPads
628                               : !NewRuntime;
629 
630   if (!CompileKernel) {
631     createHwasanCtorComdat();
632     bool InstrumentGlobals =
633         ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
634 
635     if (InstrumentGlobals && !UsePageAliases)
636       instrumentGlobals();
637 
638     bool InstrumentPersonalityFunctions =
639         ClInstrumentPersonalityFunctions.getNumOccurrences()
640             ? ClInstrumentPersonalityFunctions
641             : NewRuntime;
642     if (InstrumentPersonalityFunctions)
643       instrumentPersonalityFunctions();
644   }
645 
646   if (!TargetTriple.isAndroid()) {
647     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
648       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
649                                     GlobalValue::ExternalLinkage, nullptr,
650                                     "__hwasan_tls", nullptr,
651                                     GlobalVariable::InitialExecTLSModel);
652       appendToCompilerUsed(M, GV);
653       return GV;
654     });
655     ThreadPtrGlobal = cast<GlobalVariable>(C);
656   }
657 }
658 
659 void HWAddressSanitizer::initializeCallbacks(Module &M) {
660   IRBuilder<> IRB(*C);
661   const std::string MatchAllStr = UseMatchAllCallback ? "_match_all" : "";
662   FunctionType *HwasanMemoryAccessCallbackSizedFnTy,
663       *HwasanMemoryAccessCallbackFnTy, *HwasanMemTransferFnTy,
664       *HwasanMemsetFnTy;
665   if (UseMatchAllCallback) {
666     HwasanMemoryAccessCallbackSizedFnTy =
667         FunctionType::get(VoidTy, {IntptrTy, IntptrTy, Int8Ty}, false);
668     HwasanMemoryAccessCallbackFnTy =
669         FunctionType::get(VoidTy, {IntptrTy, Int8Ty}, false);
670     HwasanMemTransferFnTy =
671         FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy, Int8Ty}, false);
672     HwasanMemsetFnTy =
673         FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy, Int8Ty}, false);
674   } else {
675     HwasanMemoryAccessCallbackSizedFnTy =
676         FunctionType::get(VoidTy, {IntptrTy, IntptrTy}, false);
677     HwasanMemoryAccessCallbackFnTy =
678         FunctionType::get(VoidTy, {IntptrTy}, false);
679     HwasanMemTransferFnTy =
680         FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy}, false);
681     HwasanMemsetFnTy =
682         FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy}, false);
683   }
684 
685   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
686     const std::string TypeStr = AccessIsWrite ? "store" : "load";
687     const std::string EndingStr = Recover ? "_noabort" : "";
688 
689     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
690         ClMemoryAccessCallbackPrefix + TypeStr + "N" + MatchAllStr + EndingStr,
691         HwasanMemoryAccessCallbackSizedFnTy);
692 
693     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
694          AccessSizeIndex++) {
695       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
696           M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr +
697                                     itostr(1ULL << AccessSizeIndex) +
698                                     MatchAllStr + EndingStr,
699                                 HwasanMemoryAccessCallbackFnTy);
700     }
701   }
702 
703   const std::string MemIntrinCallbackPrefix =
704       (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
705           ? std::string("")
706           : ClMemoryAccessCallbackPrefix;
707 
708   HwasanMemmove = M.getOrInsertFunction(
709       MemIntrinCallbackPrefix + "memmove" + MatchAllStr, HwasanMemTransferFnTy);
710   HwasanMemcpy = M.getOrInsertFunction(
711       MemIntrinCallbackPrefix + "memcpy" + MatchAllStr, HwasanMemTransferFnTy);
712   HwasanMemset = M.getOrInsertFunction(
713       MemIntrinCallbackPrefix + "memset" + MatchAllStr, HwasanMemsetFnTy);
714 
715   HwasanTagMemoryFunc = M.getOrInsertFunction("__hwasan_tag_memory", VoidTy,
716                                               PtrTy, Int8Ty, IntptrTy);
717   HwasanGenerateTagFunc =
718       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
719 
720   HwasanRecordFrameRecordFunc =
721       M.getOrInsertFunction("__hwasan_add_frame_record", VoidTy, Int64Ty);
722 
723   ShadowGlobal =
724       M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(Int8Ty, 0));
725 
726   HwasanHandleVfork =
727       M.getOrInsertFunction("__hwasan_handle_vfork", VoidTy, IntptrTy);
728 }
729 
730 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
731   // An empty inline asm with input reg == output reg.
732   // An opaque no-op cast, basically.
733   // This prevents code bloat as a result of rematerializing trivial definitions
734   // such as constants or global addresses at every load and store.
735   InlineAsm *Asm =
736       InlineAsm::get(FunctionType::get(PtrTy, {Val->getType()}, false),
737                      StringRef(""), StringRef("=r,0"),
738                      /*hasSideEffects=*/false);
739   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
740 }
741 
742 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
743   return getOpaqueNoopCast(IRB, ShadowGlobal);
744 }
745 
746 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
747   if (Mapping.Offset != kDynamicShadowSentinel)
748     return getOpaqueNoopCast(
749         IRB, ConstantExpr::getIntToPtr(
750                  ConstantInt::get(IntptrTy, Mapping.Offset), PtrTy));
751 
752   if (Mapping.InGlobal)
753     return getDynamicShadowIfunc(IRB);
754 
755   Value *GlobalDynamicAddress =
756       IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
757           kHwasanShadowMemoryDynamicAddress, PtrTy);
758   return IRB.CreateLoad(PtrTy, GlobalDynamicAddress);
759 }
760 
761 bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
762   // Do not instrument accesses from different address spaces; we cannot deal
763   // with them.
764   Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
765   if (PtrTy->getPointerAddressSpace() != 0)
766     return true;
767 
768   // Ignore swifterror addresses.
769   // swifterror memory addresses are mem2reg promoted by instruction
770   // selection. As such they cannot have regular uses like an instrumentation
771   // function and it makes no sense to track them as memory.
772   if (Ptr->isSwiftError())
773     return true;
774 
775   if (findAllocaForValue(Ptr)) {
776     if (!InstrumentStack)
777       return true;
778     if (SSI && SSI->stackAccessIsSafe(*Inst))
779       return true;
780   }
781   return false;
782 }
783 
784 void HWAddressSanitizer::getInterestingMemoryOperands(
785     Instruction *I, const TargetLibraryInfo &TLI,
786     SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
787   // Skip memory accesses inserted by another instrumentation.
788   if (I->hasMetadata(LLVMContext::MD_nosanitize))
789     return;
790 
791   // Do not instrument the load fetching the dynamic shadow address.
792   if (ShadowBase == I)
793     return;
794 
795   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
796     if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
797       return;
798     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
799                              LI->getType(), LI->getAlign());
800   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
801     if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
802       return;
803     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
804                              SI->getValueOperand()->getType(), SI->getAlign());
805   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
806     if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
807       return;
808     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
809                              RMW->getValOperand()->getType(), std::nullopt);
810   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
811     if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
812       return;
813     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
814                              XCHG->getCompareOperand()->getType(),
815                              std::nullopt);
816   } else if (auto *CI = dyn_cast<CallInst>(I)) {
817     for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
818       if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
819           ignoreAccess(I, CI->getArgOperand(ArgNo)))
820         continue;
821       Type *Ty = CI->getParamByValType(ArgNo);
822       Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
823     }
824     maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
825   }
826 }
827 
828 static unsigned getPointerOperandIndex(Instruction *I) {
829   if (LoadInst *LI = dyn_cast<LoadInst>(I))
830     return LI->getPointerOperandIndex();
831   if (StoreInst *SI = dyn_cast<StoreInst>(I))
832     return SI->getPointerOperandIndex();
833   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
834     return RMW->getPointerOperandIndex();
835   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
836     return XCHG->getPointerOperandIndex();
837   report_fatal_error("Unexpected instruction");
838   return -1;
839 }
840 
841 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
842   size_t Res = llvm::countr_zero(TypeSize / 8);
843   assert(Res < kNumberOfAccessSizes);
844   return Res;
845 }
846 
847 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
848   if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64 ||
849       TargetTriple.isRISCV64())
850     return;
851 
852   IRBuilder<> IRB(I);
853   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
854   Value *UntaggedPtr =
855       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
856   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
857 }
858 
859 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
860   // Mem >> Scale
861   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
862   if (Mapping.Offset == 0)
863     return IRB.CreateIntToPtr(Shadow, PtrTy);
864   // (Mem >> Scale) + Offset
865   return IRB.CreatePtrAdd(ShadowBase, Shadow);
866 }
867 
868 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
869                                           unsigned AccessSizeIndex) {
870   return (CompileKernel << HWASanAccessInfo::CompileKernelShift) |
871          (MatchAllTag.has_value() << HWASanAccessInfo::HasMatchAllShift) |
872          (MatchAllTag.value_or(0) << HWASanAccessInfo::MatchAllShift) |
873          (Recover << HWASanAccessInfo::RecoverShift) |
874          (IsWrite << HWASanAccessInfo::IsWriteShift) |
875          (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
876 }
877 
878 HWAddressSanitizer::ShadowTagCheckInfo
879 HWAddressSanitizer::insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
880                                          DomTreeUpdater &DTU, LoopInfo *LI) {
881   ShadowTagCheckInfo R;
882 
883   IRBuilder<> IRB(InsertBefore);
884 
885   R.PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
886   R.PtrTag =
887       IRB.CreateTrunc(IRB.CreateLShr(R.PtrLong, PointerTagShift), Int8Ty);
888   R.AddrLong = untagPointer(IRB, R.PtrLong);
889   Value *Shadow = memToShadow(R.AddrLong, IRB);
890   R.MemTag = IRB.CreateLoad(Int8Ty, Shadow);
891   Value *TagMismatch = IRB.CreateICmpNE(R.PtrTag, R.MemTag);
892 
893   if (MatchAllTag.has_value()) {
894     Value *TagNotIgnored = IRB.CreateICmpNE(
895         R.PtrTag, ConstantInt::get(R.PtrTag->getType(), *MatchAllTag));
896     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
897   }
898 
899   R.TagMismatchTerm = SplitBlockAndInsertIfThen(
900       TagMismatch, InsertBefore, false,
901       MDBuilder(*C).createBranchWeights(1, 100000), &DTU, LI);
902 
903   return R;
904 }
905 
906 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
907                                                     unsigned AccessSizeIndex,
908                                                     Instruction *InsertBefore,
909                                                     DomTreeUpdater &DTU,
910                                                     LoopInfo *LI) {
911   assert(!UsePageAliases);
912   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
913 
914   if (InlineFastPath)
915     InsertBefore =
916         insertShadowTagCheck(Ptr, InsertBefore, DTU, LI).TagMismatchTerm;
917 
918   IRBuilder<> IRB(InsertBefore);
919   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
920   IRB.CreateCall(Intrinsic::getDeclaration(
921                      M, UseShortGranules
922                             ? Intrinsic::hwasan_check_memaccess_shortgranules
923                             : Intrinsic::hwasan_check_memaccess),
924                  {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
925 }
926 
927 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
928                                                    unsigned AccessSizeIndex,
929                                                    Instruction *InsertBefore,
930                                                    DomTreeUpdater &DTU,
931                                                    LoopInfo *LI) {
932   assert(!UsePageAliases);
933   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
934 
935   ShadowTagCheckInfo TCI = insertShadowTagCheck(Ptr, InsertBefore, DTU, LI);
936 
937   IRBuilder<> IRB(TCI.TagMismatchTerm);
938   Value *OutOfShortGranuleTagRange =
939       IRB.CreateICmpUGT(TCI.MemTag, ConstantInt::get(Int8Ty, 15));
940   Instruction *CheckFailTerm = SplitBlockAndInsertIfThen(
941       OutOfShortGranuleTagRange, TCI.TagMismatchTerm, !Recover,
942       MDBuilder(*C).createBranchWeights(1, 100000), &DTU, LI);
943 
944   IRB.SetInsertPoint(TCI.TagMismatchTerm);
945   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(TCI.PtrLong, 15), Int8Ty);
946   PtrLowBits = IRB.CreateAdd(
947       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
948   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, TCI.MemTag);
949   SplitBlockAndInsertIfThen(PtrLowBitsOOB, TCI.TagMismatchTerm, false,
950                             MDBuilder(*C).createBranchWeights(1, 100000), &DTU,
951                             LI, CheckFailTerm->getParent());
952 
953   IRB.SetInsertPoint(TCI.TagMismatchTerm);
954   Value *InlineTagAddr = IRB.CreateOr(TCI.AddrLong, 15);
955   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, PtrTy);
956   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
957   Value *InlineTagMismatch = IRB.CreateICmpNE(TCI.PtrTag, InlineTag);
958   SplitBlockAndInsertIfThen(InlineTagMismatch, TCI.TagMismatchTerm, false,
959                             MDBuilder(*C).createBranchWeights(1, 100000), &DTU,
960                             LI, CheckFailTerm->getParent());
961 
962   IRB.SetInsertPoint(CheckFailTerm);
963   InlineAsm *Asm;
964   switch (TargetTriple.getArch()) {
965   case Triple::x86_64:
966     // The signal handler will find the data address in rdi.
967     Asm = InlineAsm::get(
968         FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
969         "int3\nnopl " +
970             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
971             "(%rax)",
972         "{rdi}",
973         /*hasSideEffects=*/true);
974     break;
975   case Triple::aarch64:
976   case Triple::aarch64_be:
977     // The signal handler will find the data address in x0.
978     Asm = InlineAsm::get(
979         FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
980         "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
981         "{x0}",
982         /*hasSideEffects=*/true);
983     break;
984   case Triple::riscv64:
985     // The signal handler will find the data address in x10.
986     Asm = InlineAsm::get(
987         FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
988         "ebreak\naddiw x0, x11, " +
989             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
990         "{x10}",
991         /*hasSideEffects=*/true);
992     break;
993   default:
994     report_fatal_error("unsupported architecture");
995   }
996   IRB.CreateCall(Asm, TCI.PtrLong);
997   if (Recover)
998     cast<BranchInst>(CheckFailTerm)
999         ->setSuccessor(0, TCI.TagMismatchTerm->getParent());
1000 }
1001 
1002 bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
1003   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1004     return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
1005            (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
1006   }
1007   if (isa<MemSetInst>(MI))
1008     return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
1009   return false;
1010 }
1011 
1012 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
1013   IRBuilder<> IRB(MI);
1014   if (isa<MemTransferInst>(MI)) {
1015     SmallVector<Value *, 4> Args{
1016         MI->getOperand(0), MI->getOperand(1),
1017         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1018 
1019     if (UseMatchAllCallback)
1020       Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1021     IRB.CreateCall(isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy, Args);
1022   } else if (isa<MemSetInst>(MI)) {
1023     SmallVector<Value *, 4> Args{
1024         MI->getOperand(0),
1025         IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
1026         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1027     if (UseMatchAllCallback)
1028       Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1029     IRB.CreateCall(HwasanMemset, Args);
1030   }
1031   MI->eraseFromParent();
1032 }
1033 
1034 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O,
1035                                              DomTreeUpdater &DTU,
1036                                              LoopInfo *LI) {
1037   Value *Addr = O.getPtr();
1038 
1039   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
1040 
1041   if (O.MaybeMask)
1042     return false; // FIXME
1043 
1044   IRBuilder<> IRB(O.getInsn());
1045   if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) &&
1046       (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
1047       (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() ||
1048        *O.Alignment >= O.TypeStoreSize / 8)) {
1049     size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeStoreSize);
1050     if (InstrumentWithCalls) {
1051       SmallVector<Value *, 2> Args{IRB.CreatePointerCast(Addr, IntptrTy)};
1052       if (UseMatchAllCallback)
1053         Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1054       IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1055                      Args);
1056     } else if (OutlinedChecks) {
1057       instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1058                                  DTU, LI);
1059     } else {
1060       instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1061                                 DTU, LI);
1062     }
1063   } else {
1064     SmallVector<Value *, 3> Args{
1065         IRB.CreatePointerCast(Addr, IntptrTy),
1066         IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
1067                        ConstantInt::get(IntptrTy, 8))};
1068     if (UseMatchAllCallback)
1069       Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1070     IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], Args);
1071   }
1072   untagPointerOperand(O.getInsn(), Addr);
1073 
1074   return true;
1075 }
1076 
1077 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
1078                                    size_t Size) {
1079   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1080   if (!UseShortGranules)
1081     Size = AlignedSize;
1082 
1083   Tag = IRB.CreateTrunc(Tag, Int8Ty);
1084   if (InstrumentWithCalls) {
1085     IRB.CreateCall(HwasanTagMemoryFunc,
1086                    {IRB.CreatePointerCast(AI, PtrTy), Tag,
1087                     ConstantInt::get(IntptrTy, AlignedSize)});
1088   } else {
1089     size_t ShadowSize = Size >> Mapping.Scale;
1090     Value *AddrLong = untagPointer(IRB, IRB.CreatePointerCast(AI, IntptrTy));
1091     Value *ShadowPtr = memToShadow(AddrLong, IRB);
1092     // If this memset is not inlined, it will be intercepted in the hwasan
1093     // runtime library. That's OK, because the interceptor skips the checks if
1094     // the address is in the shadow region.
1095     // FIXME: the interceptor is not as fast as real memset. Consider lowering
1096     // llvm.memset right here into either a sequence of stores, or a call to
1097     // hwasan_tag_memory.
1098     if (ShadowSize)
1099       IRB.CreateMemSet(ShadowPtr, Tag, ShadowSize, Align(1));
1100     if (Size != AlignedSize) {
1101       const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
1102       IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder),
1103                       IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1104       IRB.CreateStore(
1105           Tag, IRB.CreateConstGEP1_32(Int8Ty, IRB.CreatePointerCast(AI, PtrTy),
1106                                       AlignedSize - 1));
1107     }
1108   }
1109 }
1110 
1111 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1112   if (TargetTriple.getArch() == Triple::x86_64)
1113     return AllocaNo & TagMaskByte;
1114 
1115   // A list of 8-bit numbers that have at most one run of non-zero bits.
1116   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1117   // masks.
1118   // The list does not include the value 255, which is used for UAR.
1119   //
1120   // Because we are more likely to use earlier elements of this list than later
1121   // ones, it is sorted in increasing order of probability of collision with a
1122   // mask allocated (temporally) nearby. The program that generated this list
1123   // can be found at:
1124   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1125   static const unsigned FastMasks[] = {
1126       0,   128, 64, 192, 32,  96,  224, 112, 240, 48, 16,  120,
1127       248, 56,  24, 8,   124, 252, 60,  28,  12,  4,  126, 254,
1128       62,  30,  14, 6,   2,   127, 63,  31,  15,  7,  3,   1};
1129   return FastMasks[AllocaNo % std::size(FastMasks)];
1130 }
1131 
1132 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1133   if (TagMaskByte == 0xFF)
1134     return OldTag; // No need to clear the tag byte.
1135   return IRB.CreateAnd(OldTag,
1136                        ConstantInt::get(OldTag->getType(), TagMaskByte));
1137 }
1138 
1139 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1140   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1141 }
1142 
1143 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1144   if (ClGenerateTagsWithCalls)
1145     return nullptr;
1146   if (StackBaseTag)
1147     return StackBaseTag;
1148   // Extract some entropy from the stack pointer for the tags.
1149   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1150   // between functions).
1151   Value *StackPointerLong = getSP(IRB);
1152   Value *StackTag =
1153       applyTagMask(IRB, IRB.CreateXor(StackPointerLong,
1154                                       IRB.CreateLShr(StackPointerLong, 20)));
1155   StackTag->setName("hwasan.stack.base.tag");
1156   return StackTag;
1157 }
1158 
1159 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1160                                         unsigned AllocaNo) {
1161   if (ClGenerateTagsWithCalls)
1162     return getNextTagWithCall(IRB);
1163   return IRB.CreateXor(
1164       StackTag, ConstantInt::get(StackTag->getType(), retagMask(AllocaNo)));
1165 }
1166 
1167 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB) {
1168   Value *StackPointerLong = getSP(IRB);
1169   Value *UARTag =
1170       applyTagMask(IRB, IRB.CreateLShr(StackPointerLong, PointerTagShift));
1171 
1172   UARTag->setName("hwasan.uar.tag");
1173   return UARTag;
1174 }
1175 
1176 // Add a tag to an address.
1177 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1178                                       Value *PtrLong, Value *Tag) {
1179   assert(!UsePageAliases);
1180   Value *TaggedPtrLong;
1181   if (CompileKernel) {
1182     // Kernel addresses have 0xFF in the most significant byte.
1183     Value *ShiftedTag =
1184         IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1185                      ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1186     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1187   } else {
1188     // Userspace can simply do OR (tag << PointerTagShift);
1189     Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1190     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1191   }
1192   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1193 }
1194 
1195 // Remove tag from an address.
1196 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1197   assert(!UsePageAliases);
1198   Value *UntaggedPtrLong;
1199   if (CompileKernel) {
1200     // Kernel addresses have 0xFF in the most significant byte.
1201     UntaggedPtrLong =
1202         IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1203                                                TagMaskByte << PointerTagShift));
1204   } else {
1205     // Userspace addresses have 0x00.
1206     UntaggedPtrLong = IRB.CreateAnd(
1207         PtrLong, ConstantInt::get(PtrLong->getType(),
1208                                   ~(TagMaskByte << PointerTagShift)));
1209   }
1210   return UntaggedPtrLong;
1211 }
1212 
1213 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
1214   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1215   if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
1216     // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1217     // in Bionic's libc/private/bionic_tls.h.
1218     Function *ThreadPointerFunc =
1219         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
1220     return IRB.CreateConstGEP1_32(Int8Ty, IRB.CreateCall(ThreadPointerFunc),
1221                                   0x30);
1222   }
1223   if (ThreadPtrGlobal)
1224     return ThreadPtrGlobal;
1225 
1226   return nullptr;
1227 }
1228 
1229 Value *HWAddressSanitizer::getPC(IRBuilder<> &IRB) {
1230   if (TargetTriple.getArch() == Triple::aarch64)
1231     return readRegister(IRB, "pc");
1232   return IRB.CreatePtrToInt(IRB.GetInsertBlock()->getParent(), IntptrTy);
1233 }
1234 
1235 Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) {
1236   if (!CachedSP) {
1237     // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
1238     // first).
1239     Function *F = IRB.GetInsertBlock()->getParent();
1240     Module *M = F->getParent();
1241     auto *GetStackPointerFn = Intrinsic::getDeclaration(
1242         M, Intrinsic::frameaddress,
1243         IRB.getPtrTy(M->getDataLayout().getAllocaAddrSpace()));
1244     CachedSP = IRB.CreatePtrToInt(
1245         IRB.CreateCall(GetStackPointerFn, {Constant::getNullValue(Int32Ty)}),
1246         IntptrTy);
1247   }
1248   return CachedSP;
1249 }
1250 
1251 Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
1252   // Prepare ring buffer data.
1253   Value *PC = getPC(IRB);
1254   Value *SP = getSP(IRB);
1255 
1256   // Mix SP and PC.
1257   // Assumptions:
1258   // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1259   // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
1260   // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
1261   //       0xSSSSPPPPPPPPPPPP
1262   SP = IRB.CreateShl(SP, 44);
1263   return IRB.CreateOr(PC, SP);
1264 }
1265 
1266 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1267   if (!Mapping.InTls)
1268     ShadowBase = getShadowNonTls(IRB);
1269   else if (!WithFrameRecord && TargetTriple.isAndroid())
1270     ShadowBase = getDynamicShadowIfunc(IRB);
1271 
1272   if (!WithFrameRecord && ShadowBase)
1273     return;
1274 
1275   Value *SlotPtr = nullptr;
1276   Value *ThreadLong = nullptr;
1277   Value *ThreadLongMaybeUntagged = nullptr;
1278 
1279   auto getThreadLongMaybeUntagged = [&]() {
1280     if (!SlotPtr)
1281       SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
1282     if (!ThreadLong)
1283       ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1284     // Extract the address field from ThreadLong. Unnecessary on AArch64 with
1285     // TBI.
1286     return TargetTriple.isAArch64() ? ThreadLong
1287                                     : untagPointer(IRB, ThreadLong);
1288   };
1289 
1290   if (WithFrameRecord) {
1291     switch (ClRecordStackHistory) {
1292     case libcall: {
1293       // Emit a runtime call into hwasan rather than emitting instructions for
1294       // recording stack history.
1295       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1296       IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
1297       break;
1298     }
1299     case instr: {
1300       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1301 
1302       StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1303 
1304       // Store data to ring buffer.
1305       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1306       Value *RecordPtr =
1307           IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IRB.getPtrTy(0));
1308       IRB.CreateStore(FrameRecordInfo, RecordPtr);
1309 
1310       // Update the ring buffer. Top byte of ThreadLong defines the size of the
1311       // buffer in pages, it must be a power of two, and the start of the buffer
1312       // must be aligned by twice that much. Therefore wrap around of the ring
1313       // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1314       // The use of AShr instead of LShr is due to
1315       //   https://bugs.llvm.org/show_bug.cgi?id=39030
1316       // Runtime library makes sure not to use the highest bit.
1317       Value *WrapMask = IRB.CreateXor(
1318           IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1319           ConstantInt::get(IntptrTy, (uint64_t)-1));
1320       Value *ThreadLongNew = IRB.CreateAnd(
1321           IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1322       IRB.CreateStore(ThreadLongNew, SlotPtr);
1323       break;
1324     }
1325     case none: {
1326       llvm_unreachable(
1327           "A stack history recording mode should've been selected.");
1328     }
1329     }
1330   }
1331 
1332   if (!ShadowBase) {
1333     if (!ThreadLongMaybeUntagged)
1334       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1335 
1336     // Get shadow base address by aligning RecordPtr up.
1337     // Note: this is not correct if the pointer is already aligned.
1338     // Runtime library will make sure this never happens.
1339     ShadowBase = IRB.CreateAdd(
1340         IRB.CreateOr(
1341             ThreadLongMaybeUntagged,
1342             ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1343         ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1344     ShadowBase = IRB.CreateIntToPtr(ShadowBase, PtrTy);
1345   }
1346 }
1347 
1348 Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
1349   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1350   Function *ReadRegister =
1351       Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
1352   MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
1353   Value *Args[] = {MetadataAsValue::get(*C, MD)};
1354   return IRB.CreateCall(ReadRegister, Args);
1355 }
1356 
1357 bool HWAddressSanitizer::instrumentLandingPads(
1358     SmallVectorImpl<Instruction *> &LandingPadVec) {
1359   for (auto *LP : LandingPadVec) {
1360     IRBuilder<> IRB(LP->getNextNode());
1361     IRB.CreateCall(
1362         HwasanHandleVfork,
1363         {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
1364                                                                       : "sp")});
1365   }
1366   return true;
1367 }
1368 
1369 static bool isLifetimeIntrinsic(Value *V) {
1370   auto *II = dyn_cast<IntrinsicInst>(V);
1371   return II && II->isLifetimeStartOrEnd();
1372 }
1373 
1374 bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
1375                                          Value *StackTag, Value *UARTag,
1376                                          const DominatorTree &DT,
1377                                          const PostDominatorTree &PDT,
1378                                          const LoopInfo &LI) {
1379   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1380   // alloca addresses using that. Unfortunately, offsets are not known yet
1381   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1382   // temp, shift-OR it into each alloca address and xor with the retag mask.
1383   // This generates one extra instruction per alloca use.
1384   unsigned int I = 0;
1385 
1386   for (auto &KV : SInfo.AllocasToInstrument) {
1387     auto N = I++;
1388     auto *AI = KV.first;
1389     memtag::AllocaInfo &Info = KV.second;
1390     IRBuilder<> IRB(AI->getNextNode());
1391 
1392     // Replace uses of the alloca with tagged address.
1393     Value *Tag = getAllocaTag(IRB, StackTag, N);
1394     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1395     Value *AINoTagLong = untagPointer(IRB, AILong);
1396     Value *Replacement = tagPointer(IRB, AI->getType(), AINoTagLong, Tag);
1397     std::string Name =
1398         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1399     Replacement->setName(Name + ".hwasan");
1400 
1401     size_t Size = memtag::getAllocaSizeInBytes(*AI);
1402     size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1403 
1404     Value *AICast = IRB.CreatePointerCast(AI, PtrTy);
1405 
1406     auto HandleLifetime = [&](IntrinsicInst *II) {
1407       // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1408       // set of assumptions we need to make about the lifetime. Without this we
1409       // would need to ensure that we can track the lifetime pointer to a
1410       // constant offset from the alloca, and would still need to change the
1411       // size to include the extra alignment we use for the untagging to make
1412       // the size consistent.
1413       //
1414       // The check for standard lifetime below makes sure that we have exactly
1415       // one set of start / end in any execution (i.e. the ends are not
1416       // reachable from each other), so this will not cause any problems.
1417       II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
1418       II->setArgOperand(1, AICast);
1419     };
1420     llvm::for_each(Info.LifetimeStart, HandleLifetime);
1421     llvm::for_each(Info.LifetimeEnd, HandleLifetime);
1422 
1423     AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
1424       auto *User = U.getUser();
1425       return User != AILong && User != AICast && !isLifetimeIntrinsic(User);
1426     });
1427 
1428     for (auto *DDI : Info.DbgVariableIntrinsics) {
1429       // Prepend "tag_offset, N" to the dwarf expression.
1430       // Tag offset logically applies to the alloca pointer, and it makes sense
1431       // to put it at the beginning of the expression.
1432       SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
1433                                          retagMask(N)};
1434       for (size_t LocNo = 0; LocNo < DDI->getNumVariableLocationOps(); ++LocNo)
1435         if (DDI->getVariableLocationOp(LocNo) == AI)
1436           DDI->setExpression(DIExpression::appendOpsToArg(DDI->getExpression(),
1437                                                           NewOps, LocNo));
1438     }
1439 
1440     auto TagEnd = [&](Instruction *Node) {
1441       IRB.SetInsertPoint(Node);
1442       // When untagging, use the `AlignedSize` because we need to set the tags
1443       // for the entire alloca to original. If we used `Size` here, we would
1444       // keep the last granule tagged, and store zero in the last byte of the
1445       // last granule, due to how short granules are implemented.
1446       tagAlloca(IRB, AI, UARTag, AlignedSize);
1447     };
1448     // Calls to functions that may return twice (e.g. setjmp) confuse the
1449     // postdominator analysis, and will leave us to keep memory tagged after
1450     // function return. Work around this by always untagging at every return
1451     // statement if return_twice functions are called.
1452     bool StandardLifetime =
1453         SInfo.UnrecognizedLifetimes.empty() &&
1454         memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT,
1455                                    &LI, ClMaxLifetimes) &&
1456         !SInfo.CallsReturnTwice;
1457     if (DetectUseAfterScope && StandardLifetime) {
1458       IntrinsicInst *Start = Info.LifetimeStart[0];
1459       IRB.SetInsertPoint(Start->getNextNode());
1460       tagAlloca(IRB, AI, Tag, Size);
1461       if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd,
1462                                         SInfo.RetVec, TagEnd)) {
1463         for (auto *End : Info.LifetimeEnd)
1464           End->eraseFromParent();
1465       }
1466     } else {
1467       tagAlloca(IRB, AI, Tag, Size);
1468       for (auto *RI : SInfo.RetVec)
1469         TagEnd(RI);
1470       // We inserted tagging outside of the lifetimes, so we have to remove
1471       // them.
1472       for (auto &II : Info.LifetimeStart)
1473         II->eraseFromParent();
1474       for (auto &II : Info.LifetimeEnd)
1475         II->eraseFromParent();
1476     }
1477     memtag::alignAndPadAlloca(Info, Mapping.getObjectAlignment());
1478   }
1479   for (auto &I : SInfo.UnrecognizedLifetimes)
1480     I->eraseFromParent();
1481   return true;
1482 }
1483 
1484 void HWAddressSanitizer::sanitizeFunction(Function &F,
1485                                           FunctionAnalysisManager &FAM) {
1486   if (&F == HwasanCtorFunction)
1487     return;
1488 
1489   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1490     return;
1491 
1492   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1493 
1494   SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1495   SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1496   SmallVector<Instruction *, 8> LandingPadVec;
1497   const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1498 
1499   memtag::StackInfoBuilder SIB(SSI);
1500   for (auto &Inst : instructions(F)) {
1501     if (InstrumentStack) {
1502       SIB.visit(Inst);
1503     }
1504 
1505     if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1506       LandingPadVec.push_back(&Inst);
1507 
1508     getInterestingMemoryOperands(&Inst, TLI, OperandsToInstrument);
1509 
1510     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1511       if (!ignoreMemIntrinsic(MI))
1512         IntrinToInstrument.push_back(MI);
1513   }
1514 
1515   memtag::StackInfo &SInfo = SIB.get();
1516 
1517   initializeCallbacks(*F.getParent());
1518 
1519   if (!LandingPadVec.empty())
1520     instrumentLandingPads(LandingPadVec);
1521 
1522   if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1523       F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1524     // __hwasan_personality_thunk is a no-op for functions without an
1525     // instrumented stack, so we can drop it.
1526     F.setPersonalityFn(nullptr);
1527   }
1528 
1529   if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1530       IntrinToInstrument.empty())
1531     return;
1532 
1533   assert(!ShadowBase);
1534 
1535   Instruction *InsertPt = &*F.getEntryBlock().begin();
1536   IRBuilder<> EntryIRB(InsertPt);
1537   emitPrologue(EntryIRB,
1538                /*WithFrameRecord*/ ClRecordStackHistory != none &&
1539                    Mapping.WithFrameRecord &&
1540                    !SInfo.AllocasToInstrument.empty());
1541 
1542   if (!SInfo.AllocasToInstrument.empty()) {
1543     const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1544     const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
1545     const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
1546     Value *StackTag = getStackBaseTag(EntryIRB);
1547     Value *UARTag = getUARTag(EntryIRB);
1548     instrumentStack(SInfo, StackTag, UARTag, DT, PDT, LI);
1549   }
1550 
1551   // If we split the entry block, move any allocas that were originally in the
1552   // entry block back into the entry block so that they aren't treated as
1553   // dynamic allocas.
1554   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1555     InsertPt = &*F.getEntryBlock().begin();
1556     for (Instruction &I :
1557          llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1558       if (auto *AI = dyn_cast<AllocaInst>(&I))
1559         if (isa<ConstantInt>(AI->getArraySize()))
1560           I.moveBefore(InsertPt);
1561     }
1562   }
1563 
1564   DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
1565   PostDominatorTree *PDT = FAM.getCachedResult<PostDominatorTreeAnalysis>(F);
1566   LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(F);
1567   DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
1568   for (auto &Operand : OperandsToInstrument)
1569     instrumentMemAccess(Operand, DTU, LI);
1570   DTU.flush();
1571 
1572   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1573     for (auto *Inst : IntrinToInstrument)
1574       instrumentMemIntrinsic(Inst);
1575   }
1576 
1577   ShadowBase = nullptr;
1578   StackBaseTag = nullptr;
1579   CachedSP = nullptr;
1580 }
1581 
1582 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1583   assert(!UsePageAliases);
1584   Constant *Initializer = GV->getInitializer();
1585   uint64_t SizeInBytes =
1586       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1587   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1588   if (SizeInBytes != NewSize) {
1589     // Pad the initializer out to the next multiple of 16 bytes and add the
1590     // required short granule tag.
1591     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1592     Init.back() = Tag;
1593     Constant *Padding = ConstantDataArray::get(*C, Init);
1594     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1595   }
1596 
1597   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1598                                    GlobalValue::ExternalLinkage, Initializer,
1599                                    GV->getName() + ".hwasan");
1600   NewGV->copyAttributesFrom(GV);
1601   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1602   NewGV->copyMetadata(GV, 0);
1603   NewGV->setAlignment(
1604       std::max(GV->getAlign().valueOrOne(), Mapping.getObjectAlignment()));
1605 
1606   // It is invalid to ICF two globals that have different tags. In the case
1607   // where the size of the global is a multiple of the tag granularity the
1608   // contents of the globals may be the same but the tags (i.e. symbol values)
1609   // may be different, and the symbols are not considered during ICF. In the
1610   // case where the size is not a multiple of the granularity, the short granule
1611   // tags would discriminate two globals with different tags, but there would
1612   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1613   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1614   // granule tag in the last byte.
1615   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1616 
1617   // Descriptor format (assuming little-endian):
1618   // bytes 0-3: relative address of global
1619   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1620   // it isn't, we create multiple descriptors)
1621   // byte 7: tag
1622   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1623   const uint64_t MaxDescriptorSize = 0xfffff0;
1624   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1625        DescriptorPos += MaxDescriptorSize) {
1626     auto *Descriptor =
1627         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1628                            nullptr, GV->getName() + ".hwasan.descriptor");
1629     auto *GVRelPtr = ConstantExpr::getTrunc(
1630         ConstantExpr::getAdd(
1631             ConstantExpr::getSub(
1632                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1633                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1634             ConstantInt::get(Int64Ty, DescriptorPos)),
1635         Int32Ty);
1636     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1637     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1638     Descriptor->setComdat(NewGV->getComdat());
1639     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1640     Descriptor->setSection("hwasan_globals");
1641     Descriptor->setMetadata(LLVMContext::MD_associated,
1642                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1643     appendToCompilerUsed(M, Descriptor);
1644   }
1645 
1646   Constant *Aliasee = ConstantExpr::getIntToPtr(
1647       ConstantExpr::getAdd(
1648           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1649           ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1650       GV->getType());
1651   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1652                                     GV->getLinkage(), "", Aliasee, &M);
1653   Alias->setVisibility(GV->getVisibility());
1654   Alias->takeName(GV);
1655   GV->replaceAllUsesWith(Alias);
1656   GV->eraseFromParent();
1657 }
1658 
1659 void HWAddressSanitizer::instrumentGlobals() {
1660   std::vector<GlobalVariable *> Globals;
1661   for (GlobalVariable &GV : M.globals()) {
1662     if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
1663       continue;
1664 
1665     if (GV.isDeclarationForLinker() || GV.getName().starts_with("llvm.") ||
1666         GV.isThreadLocal())
1667       continue;
1668 
1669     // Common symbols can't have aliases point to them, so they can't be tagged.
1670     if (GV.hasCommonLinkage())
1671       continue;
1672 
1673     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1674     // which would be broken both by adding tags and potentially by the extra
1675     // padding/alignment that we insert.
1676     if (GV.hasSection())
1677       continue;
1678 
1679     Globals.push_back(&GV);
1680   }
1681 
1682   MD5 Hasher;
1683   Hasher.update(M.getSourceFileName());
1684   MD5::MD5Result Hash;
1685   Hasher.final(Hash);
1686   uint8_t Tag = Hash[0];
1687 
1688   assert(TagMaskByte >= 16);
1689 
1690   for (GlobalVariable *GV : Globals) {
1691     // Don't allow globals to be tagged with something that looks like a
1692     // short-granule tag, otherwise we lose inter-granule overflow detection, as
1693     // the fast path shadow-vs-address check succeeds.
1694     if (Tag < 16 || Tag > TagMaskByte)
1695       Tag = 16;
1696     instrumentGlobal(GV, Tag++);
1697   }
1698 }
1699 
1700 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1701   // We need to untag stack frames as we unwind past them. That is the job of
1702   // the personality function wrapper, which either wraps an existing
1703   // personality function or acts as a personality function on its own. Each
1704   // function that has a personality function or that can be unwound past has
1705   // its personality function changed to a thunk that calls the personality
1706   // function wrapper in the runtime.
1707   MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1708   for (Function &F : M) {
1709     if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1710       continue;
1711 
1712     if (F.hasPersonalityFn()) {
1713       PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1714     } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1715       PersonalityFns[nullptr].push_back(&F);
1716     }
1717   }
1718 
1719   if (PersonalityFns.empty())
1720     return;
1721 
1722   FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1723       "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, PtrTy,
1724       PtrTy, PtrTy, PtrTy, PtrTy);
1725   FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1726   FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1727 
1728   for (auto &P : PersonalityFns) {
1729     std::string ThunkName = kHwasanPersonalityThunkName;
1730     if (P.first)
1731       ThunkName += ("." + P.first->getName()).str();
1732     FunctionType *ThunkFnTy = FunctionType::get(
1733         Int32Ty, {Int32Ty, Int32Ty, Int64Ty, PtrTy, PtrTy}, false);
1734     bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1735                                cast<GlobalValue>(P.first)->hasLocalLinkage());
1736     auto *ThunkFn = Function::Create(ThunkFnTy,
1737                                      IsLocal ? GlobalValue::InternalLinkage
1738                                              : GlobalValue::LinkOnceODRLinkage,
1739                                      ThunkName, &M);
1740     if (!IsLocal) {
1741       ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1742       ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1743     }
1744 
1745     auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1746     IRBuilder<> IRB(BB);
1747     CallInst *WrapperCall = IRB.CreateCall(
1748         HwasanPersonalityWrapper,
1749         {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1750          ThunkFn->getArg(3), ThunkFn->getArg(4),
1751          P.first ? P.first : Constant::getNullValue(PtrTy),
1752          UnwindGetGR.getCallee(), UnwindGetCFA.getCallee()});
1753     WrapperCall->setTailCall();
1754     IRB.CreateRet(WrapperCall);
1755 
1756     for (Function *F : P.second)
1757       F->setPersonalityFn(ThunkFn);
1758   }
1759 }
1760 
1761 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1762                                              bool InstrumentWithCalls) {
1763   Scale = kDefaultShadowScale;
1764   if (TargetTriple.isOSFuchsia()) {
1765     // Fuchsia is always PIE, which means that the beginning of the address
1766     // space is always available.
1767     InGlobal = false;
1768     InTls = false;
1769     Offset = 0;
1770     WithFrameRecord = true;
1771   } else if (ClMappingOffset.getNumOccurrences() > 0) {
1772     InGlobal = false;
1773     InTls = false;
1774     Offset = ClMappingOffset;
1775     WithFrameRecord = false;
1776   } else if (ClEnableKhwasan || InstrumentWithCalls) {
1777     InGlobal = false;
1778     InTls = false;
1779     Offset = 0;
1780     WithFrameRecord = false;
1781   } else if (ClWithIfunc) {
1782     InGlobal = true;
1783     InTls = false;
1784     Offset = kDynamicShadowSentinel;
1785     WithFrameRecord = false;
1786   } else if (ClWithTls) {
1787     InGlobal = false;
1788     InTls = true;
1789     Offset = kDynamicShadowSentinel;
1790     WithFrameRecord = true;
1791   } else {
1792     InGlobal = false;
1793     InTls = false;
1794     Offset = kDynamicShadowSentinel;
1795     WithFrameRecord = false;
1796   }
1797 }
1798