xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1 //===- HWAddressSanitizer.cpp - memory access error detector --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Analysis/GlobalsModRef.h"
21 #include "llvm/Analysis/PostDominators.h"
22 #include "llvm/Analysis/StackSafetyAnalysis.h"
23 #include "llvm/Analysis/ValueTracking.h"
24 #include "llvm/BinaryFormat/Dwarf.h"
25 #include "llvm/BinaryFormat/ELF.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DebugInfoMetadata.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Dominators.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/InlineAsm.h"
37 #include "llvm/IR/InstIterator.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/MDBuilder.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/NoFolder.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Debug.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include "llvm/TargetParser/Triple.h"
53 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
54 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
55 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
56 #include "llvm/Transforms/Utils/ModuleUtils.h"
57 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
58 #include <optional>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "hwasan"
63 
64 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
65 const char kHwasanNoteName[] = "hwasan.note";
66 const char kHwasanInitName[] = "__hwasan_init";
67 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
68 
69 const char kHwasanShadowMemoryDynamicAddress[] =
70     "__hwasan_shadow_memory_dynamic_address";
71 
72 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
73 static const size_t kNumberOfAccessSizes = 5;
74 
75 static const size_t kDefaultShadowScale = 4;
76 static const uint64_t kDynamicShadowSentinel =
77     std::numeric_limits<uint64_t>::max();
78 
79 static const unsigned kShadowBaseAlignment = 32;
80 
81 static cl::opt<std::string>
82     ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
83                                  cl::desc("Prefix for memory access callbacks"),
84                                  cl::Hidden, cl::init("__hwasan_"));
85 
86 static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
87     "hwasan-kernel-mem-intrinsic-prefix",
88     cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
89     cl::init(false));
90 
91 static cl::opt<bool> ClInstrumentWithCalls(
92     "hwasan-instrument-with-calls",
93     cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
94     cl::init(false));
95 
96 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
97                                        cl::desc("instrument read instructions"),
98                                        cl::Hidden, cl::init(true));
99 
100 static cl::opt<bool>
101     ClInstrumentWrites("hwasan-instrument-writes",
102                        cl::desc("instrument write instructions"), cl::Hidden,
103                        cl::init(true));
104 
105 static cl::opt<bool> ClInstrumentAtomics(
106     "hwasan-instrument-atomics",
107     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
108     cl::init(true));
109 
110 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
111                                        cl::desc("instrument byval arguments"),
112                                        cl::Hidden, cl::init(true));
113 
114 static cl::opt<bool>
115     ClRecover("hwasan-recover",
116               cl::desc("Enable recovery mode (continue-after-error)."),
117               cl::Hidden, cl::init(false));
118 
119 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
120                                        cl::desc("instrument stack (allocas)"),
121                                        cl::Hidden, cl::init(true));
122 
123 static cl::opt<bool>
124     ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
125                      cl::Hidden, cl::desc("Use Stack Safety analysis results"),
126                      cl::Optional);
127 
128 static cl::opt<size_t> ClMaxLifetimes(
129     "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
130     cl::ReallyHidden,
131     cl::desc("How many lifetime ends to handle for a single alloca."),
132     cl::Optional);
133 
134 static cl::opt<bool>
135     ClUseAfterScope("hwasan-use-after-scope",
136                     cl::desc("detect use after scope within function"),
137                     cl::Hidden, cl::init(false));
138 
139 static cl::opt<bool> ClGenerateTagsWithCalls(
140     "hwasan-generate-tags-with-calls",
141     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
142     cl::init(false));
143 
144 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
145                                cl::Hidden, cl::init(false));
146 
147 static cl::opt<int> ClMatchAllTag(
148     "hwasan-match-all-tag",
149     cl::desc("don't report bad accesses via pointers with this tag"),
150     cl::Hidden, cl::init(-1));
151 
152 static cl::opt<bool>
153     ClEnableKhwasan("hwasan-kernel",
154                     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
155                     cl::Hidden, cl::init(false));
156 
157 // These flags allow to change the shadow mapping and control how shadow memory
158 // is accessed. The shadow mapping looks like:
159 //    Shadow = (Mem >> scale) + offset
160 
161 static cl::opt<uint64_t>
162     ClMappingOffset("hwasan-mapping-offset",
163                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
164                     cl::Hidden, cl::init(0));
165 
166 static cl::opt<bool>
167     ClWithIfunc("hwasan-with-ifunc",
168                 cl::desc("Access dynamic shadow through an ifunc global on "
169                          "platforms that support this"),
170                 cl::Hidden, cl::init(false));
171 
172 static cl::opt<bool> ClWithTls(
173     "hwasan-with-tls",
174     cl::desc("Access dynamic shadow through an thread-local pointer on "
175              "platforms that support this"),
176     cl::Hidden, cl::init(true));
177 
178 // Mode for selecting how to insert frame record info into the stack ring
179 // buffer.
180 enum RecordStackHistoryMode {
181   // Do not record frame record info.
182   none,
183 
184   // Insert instructions into the prologue for storing into the stack ring
185   // buffer directly.
186   instr,
187 
188   // Add a call to __hwasan_add_frame_record in the runtime.
189   libcall,
190 };
191 
192 static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
193     "hwasan-record-stack-history",
194     cl::desc("Record stack frames with tagged allocations in a thread-local "
195              "ring buffer"),
196     cl::values(clEnumVal(none, "Do not record stack ring history"),
197                clEnumVal(instr, "Insert instructions into the prologue for "
198                                 "storing into the stack ring buffer directly"),
199                clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
200                                   "storing into the stack ring buffer")),
201     cl::Hidden, cl::init(instr));
202 
203 static cl::opt<bool>
204     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
205                               cl::desc("instrument memory intrinsics"),
206                               cl::Hidden, cl::init(true));
207 
208 static cl::opt<bool>
209     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
210                             cl::desc("instrument landing pads"), cl::Hidden,
211                             cl::init(false));
212 
213 static cl::opt<bool> ClUseShortGranules(
214     "hwasan-use-short-granules",
215     cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
216     cl::init(false));
217 
218 static cl::opt<bool> ClInstrumentPersonalityFunctions(
219     "hwasan-instrument-personality-functions",
220     cl::desc("instrument personality functions"), cl::Hidden);
221 
222 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
223                                        cl::desc("inline all checks"),
224                                        cl::Hidden, cl::init(false));
225 
226 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
227 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
228                                       cl::desc("Use page aliasing in HWASan"),
229                                       cl::Hidden, cl::init(false));
230 
231 namespace {
232 
233 bool shouldUsePageAliases(const Triple &TargetTriple) {
234   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
235 }
236 
237 bool shouldInstrumentStack(const Triple &TargetTriple) {
238   return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
239 }
240 
241 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
242   return ClInstrumentWithCalls.getNumOccurrences()
243              ? ClInstrumentWithCalls
244              : TargetTriple.getArch() == Triple::x86_64;
245 }
246 
247 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
248   return ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
249                                               : !DisableOptimization;
250 }
251 
252 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
253                                   bool DisableOptimization) {
254   return shouldInstrumentStack(TargetTriple) &&
255          mightUseStackSafetyAnalysis(DisableOptimization);
256 }
257 
258 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
259   return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
260 }
261 
262 /// An instrumentation pass implementing detection of addressability bugs
263 /// using tagged pointers.
264 class HWAddressSanitizer {
265 public:
266   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
267                      const StackSafetyGlobalInfo *SSI)
268       : M(M), SSI(SSI) {
269     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
270     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0
271                               ? ClEnableKhwasan
272                               : CompileKernel;
273 
274     initializeModule();
275   }
276 
277   void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
278 
279   void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
280   void initializeModule();
281   void createHwasanCtorComdat();
282 
283   void initializeCallbacks(Module &M);
284 
285   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
286 
287   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
288   Value *getShadowNonTls(IRBuilder<> &IRB);
289 
290   void untagPointerOperand(Instruction *I, Value *Addr);
291   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
292 
293   int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
294   void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
295                                   unsigned AccessSizeIndex,
296                                   Instruction *InsertBefore);
297   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
298                                  unsigned AccessSizeIndex,
299                                  Instruction *InsertBefore);
300   bool ignoreMemIntrinsic(MemIntrinsic *MI);
301   void instrumentMemIntrinsic(MemIntrinsic *MI);
302   bool instrumentMemAccess(InterestingMemoryOperand &O);
303   bool ignoreAccess(Instruction *Inst, Value *Ptr);
304   void getInterestingMemoryOperands(
305       Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
306 
307   void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
308   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
309   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
310   bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
311                        const DominatorTree &DT, const PostDominatorTree &PDT,
312                        const LoopInfo &LI);
313   Value *readRegister(IRBuilder<> &IRB, StringRef Name);
314   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
315   Value *getNextTagWithCall(IRBuilder<> &IRB);
316   Value *getStackBaseTag(IRBuilder<> &IRB);
317   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo);
318   Value *getUARTag(IRBuilder<> &IRB);
319 
320   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
321   Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
322   unsigned retagMask(unsigned AllocaNo);
323 
324   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
325 
326   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
327   void instrumentGlobals();
328 
329   Value *getPC(IRBuilder<> &IRB);
330   Value *getSP(IRBuilder<> &IRB);
331   Value *getFrameRecordInfo(IRBuilder<> &IRB);
332 
333   void instrumentPersonalityFunctions();
334 
335 private:
336   LLVMContext *C;
337   Module &M;
338   const StackSafetyGlobalInfo *SSI;
339   Triple TargetTriple;
340 
341   /// This struct defines the shadow mapping using the rule:
342   ///   shadow = (mem >> Scale) + Offset.
343   /// If InGlobal is true, then
344   ///   extern char __hwasan_shadow[];
345   ///   shadow = (mem >> Scale) + &__hwasan_shadow
346   /// If InTls is true, then
347   ///   extern char *__hwasan_tls;
348   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
349   ///
350   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
351   /// ring buffer for storing stack allocations on targets that support it.
352   struct ShadowMapping {
353     uint8_t Scale;
354     uint64_t Offset;
355     bool InGlobal;
356     bool InTls;
357     bool WithFrameRecord;
358 
359     void init(Triple &TargetTriple, bool InstrumentWithCalls);
360     Align getObjectAlignment() const { return Align(1ULL << Scale); }
361   };
362 
363   ShadowMapping Mapping;
364 
365   Type *VoidTy = Type::getVoidTy(M.getContext());
366   Type *IntptrTy;
367   Type *Int8PtrTy;
368   Type *Int8Ty;
369   Type *Int32Ty;
370   Type *Int64Ty = Type::getInt64Ty(M.getContext());
371 
372   bool CompileKernel;
373   bool Recover;
374   bool OutlinedChecks;
375   bool UseShortGranules;
376   bool InstrumentLandingPads;
377   bool InstrumentWithCalls;
378   bool InstrumentStack;
379   bool DetectUseAfterScope;
380   bool UsePageAliases;
381   bool UseMatchAllCallback;
382 
383   std::optional<uint8_t> MatchAllTag;
384 
385   unsigned PointerTagShift;
386   uint64_t TagMaskByte;
387 
388   Function *HwasanCtorFunction;
389 
390   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
391   FunctionCallee HwasanMemoryAccessCallbackSized[2];
392 
393   FunctionCallee HwasanMemmove, HwasanMemcpy, HwasanMemset;
394   FunctionCallee HwasanHandleVfork;
395 
396   FunctionCallee HwasanTagMemoryFunc;
397   FunctionCallee HwasanGenerateTagFunc;
398   FunctionCallee HwasanRecordFrameRecordFunc;
399 
400   Constant *ShadowGlobal;
401 
402   Value *ShadowBase = nullptr;
403   Value *StackBaseTag = nullptr;
404   Value *CachedSP = nullptr;
405   GlobalValue *ThreadPtrGlobal = nullptr;
406 };
407 
408 } // end anonymous namespace
409 
410 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
411                                               ModuleAnalysisManager &MAM) {
412   const StackSafetyGlobalInfo *SSI = nullptr;
413   auto TargetTriple = llvm::Triple(M.getTargetTriple());
414   if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
415     SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
416 
417   HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
418   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
419   for (Function &F : M)
420     HWASan.sanitizeFunction(F, FAM);
421 
422   PreservedAnalyses PA = PreservedAnalyses::none();
423   // GlobalsAA is considered stateless and does not get invalidated unless
424   // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
425   // make changes that require GlobalsAA to be invalidated.
426   PA.abandon<GlobalsAA>();
427   return PA;
428 }
429 void HWAddressSanitizerPass::printPipeline(
430     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
431   static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
432       OS, MapClassName2PassName);
433   OS << '<';
434   if (Options.CompileKernel)
435     OS << "kernel;";
436   if (Options.Recover)
437     OS << "recover";
438   OS << '>';
439 }
440 
441 void HWAddressSanitizer::createHwasanCtorComdat() {
442   std::tie(HwasanCtorFunction, std::ignore) =
443       getOrCreateSanitizerCtorAndInitFunctions(
444           M, kHwasanModuleCtorName, kHwasanInitName,
445           /*InitArgTypes=*/{},
446           /*InitArgs=*/{},
447           // This callback is invoked when the functions are created the first
448           // time. Hook them into the global ctors list in that case:
449           [&](Function *Ctor, FunctionCallee) {
450             Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
451             Ctor->setComdat(CtorComdat);
452             appendToGlobalCtors(M, Ctor, 0, Ctor);
453           });
454 
455   // Create a note that contains pointers to the list of global
456   // descriptors. Adding a note to the output file will cause the linker to
457   // create a PT_NOTE program header pointing to the note that we can use to
458   // find the descriptor list starting from the program headers. A function
459   // provided by the runtime initializes the shadow memory for the globals by
460   // accessing the descriptor list via the note. The dynamic loader needs to
461   // call this function whenever a library is loaded.
462   //
463   // The reason why we use a note for this instead of a more conventional
464   // approach of having a global constructor pass a descriptor list pointer to
465   // the runtime is because of an order of initialization problem. With
466   // constructors we can encounter the following problematic scenario:
467   //
468   // 1) library A depends on library B and also interposes one of B's symbols
469   // 2) B's constructors are called before A's (as required for correctness)
470   // 3) during construction, B accesses one of its "own" globals (actually
471   //    interposed by A) and triggers a HWASAN failure due to the initialization
472   //    for A not having happened yet
473   //
474   // Even without interposition it is possible to run into similar situations in
475   // cases where two libraries mutually depend on each other.
476   //
477   // We only need one note per binary, so put everything for the note in a
478   // comdat. This needs to be a comdat with an .init_array section to prevent
479   // newer versions of lld from discarding the note.
480   //
481   // Create the note even if we aren't instrumenting globals. This ensures that
482   // binaries linked from object files with both instrumented and
483   // non-instrumented globals will end up with a note, even if a comdat from an
484   // object file with non-instrumented globals is selected. The note is harmless
485   // if the runtime doesn't support it, since it will just be ignored.
486   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
487 
488   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
489   auto *Start =
490       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
491                          nullptr, "__start_hwasan_globals");
492   Start->setVisibility(GlobalValue::HiddenVisibility);
493   auto *Stop =
494       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
495                          nullptr, "__stop_hwasan_globals");
496   Stop->setVisibility(GlobalValue::HiddenVisibility);
497 
498   // Null-terminated so actually 8 bytes, which are required in order to align
499   // the note properly.
500   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
501 
502   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
503                                  Int32Ty, Int32Ty);
504   auto *Note =
505       new GlobalVariable(M, NoteTy, /*isConstant=*/true,
506                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
507   Note->setSection(".note.hwasan.globals");
508   Note->setComdat(NoteComdat);
509   Note->setAlignment(Align(4));
510 
511   // The pointers in the note need to be relative so that the note ends up being
512   // placed in rodata, which is the standard location for notes.
513   auto CreateRelPtr = [&](Constant *Ptr) {
514     return ConstantExpr::getTrunc(
515         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
516                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
517         Int32Ty);
518   };
519   Note->setInitializer(ConstantStruct::getAnon(
520       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
521        ConstantInt::get(Int32Ty, 8),                           // n_descsz
522        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
523        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
524   appendToCompilerUsed(M, Note);
525 
526   // Create a zero-length global in hwasan_globals so that the linker will
527   // always create start and stop symbols.
528   auto *Dummy = new GlobalVariable(
529       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
530       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
531   Dummy->setSection("hwasan_globals");
532   Dummy->setComdat(NoteComdat);
533   Dummy->setMetadata(LLVMContext::MD_associated,
534                      MDNode::get(*C, ValueAsMetadata::get(Note)));
535   appendToCompilerUsed(M, Dummy);
536 }
537 
538 /// Module-level initialization.
539 ///
540 /// inserts a call to __hwasan_init to the module's constructor list.
541 void HWAddressSanitizer::initializeModule() {
542   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
543   auto &DL = M.getDataLayout();
544 
545   TargetTriple = Triple(M.getTargetTriple());
546 
547   // x86_64 currently has two modes:
548   // - Intel LAM (default)
549   // - pointer aliasing (heap only)
550   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
551   UsePageAliases = shouldUsePageAliases(TargetTriple);
552   InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
553   InstrumentStack = shouldInstrumentStack(TargetTriple);
554   DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
555   PointerTagShift = IsX86_64 ? 57 : 56;
556   TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
557 
558   Mapping.init(TargetTriple, InstrumentWithCalls);
559 
560   C = &(M.getContext());
561   IRBuilder<> IRB(*C);
562   IntptrTy = IRB.getIntPtrTy(DL);
563   Int8PtrTy = IRB.getInt8PtrTy();
564   Int8Ty = IRB.getInt8Ty();
565   Int32Ty = IRB.getInt32Ty();
566 
567   HwasanCtorFunction = nullptr;
568 
569   // Older versions of Android do not have the required runtime support for
570   // short granules, global or personality function instrumentation. On other
571   // platforms we currently require using the latest version of the runtime.
572   bool NewRuntime =
573       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
574 
575   UseShortGranules =
576       ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
577   OutlinedChecks =
578       (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
579       TargetTriple.isOSBinFormatELF() &&
580       (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
581 
582   if (ClMatchAllTag.getNumOccurrences()) {
583     if (ClMatchAllTag != -1) {
584       MatchAllTag = ClMatchAllTag & 0xFF;
585     }
586   } else if (CompileKernel) {
587     MatchAllTag = 0xFF;
588   }
589   UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value();
590 
591   // If we don't have personality function support, fall back to landing pads.
592   InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
593                               ? ClInstrumentLandingPads
594                               : !NewRuntime;
595 
596   if (!CompileKernel) {
597     createHwasanCtorComdat();
598     bool InstrumentGlobals =
599         ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
600 
601     if (InstrumentGlobals && !UsePageAliases)
602       instrumentGlobals();
603 
604     bool InstrumentPersonalityFunctions =
605         ClInstrumentPersonalityFunctions.getNumOccurrences()
606             ? ClInstrumentPersonalityFunctions
607             : NewRuntime;
608     if (InstrumentPersonalityFunctions)
609       instrumentPersonalityFunctions();
610   }
611 
612   if (!TargetTriple.isAndroid()) {
613     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
614       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
615                                     GlobalValue::ExternalLinkage, nullptr,
616                                     "__hwasan_tls", nullptr,
617                                     GlobalVariable::InitialExecTLSModel);
618       appendToCompilerUsed(M, GV);
619       return GV;
620     });
621     ThreadPtrGlobal = cast<GlobalVariable>(C);
622   }
623 }
624 
625 void HWAddressSanitizer::initializeCallbacks(Module &M) {
626   IRBuilder<> IRB(*C);
627   const std::string MatchAllStr = UseMatchAllCallback ? "_match_all" : "";
628   FunctionType *HwasanMemoryAccessCallbackSizedFnTy,
629       *HwasanMemoryAccessCallbackFnTy, *HwasanMemTransferFnTy,
630       *HwasanMemsetFnTy;
631   if (UseMatchAllCallback) {
632     HwasanMemoryAccessCallbackSizedFnTy =
633         FunctionType::get(VoidTy, {IntptrTy, IntptrTy, Int8Ty}, false);
634     HwasanMemoryAccessCallbackFnTy =
635         FunctionType::get(VoidTy, {IntptrTy, Int8Ty}, false);
636     HwasanMemTransferFnTy = FunctionType::get(
637         Int8PtrTy, {Int8PtrTy, Int8PtrTy, IntptrTy, Int8Ty}, false);
638     HwasanMemsetFnTy = FunctionType::get(
639         Int8PtrTy, {Int8PtrTy, Int32Ty, IntptrTy, Int8Ty}, false);
640   } else {
641     HwasanMemoryAccessCallbackSizedFnTy =
642         FunctionType::get(VoidTy, {IntptrTy, IntptrTy}, false);
643     HwasanMemoryAccessCallbackFnTy =
644         FunctionType::get(VoidTy, {IntptrTy}, false);
645     HwasanMemTransferFnTy =
646         FunctionType::get(Int8PtrTy, {Int8PtrTy, Int8PtrTy, IntptrTy}, false);
647     HwasanMemsetFnTy =
648         FunctionType::get(Int8PtrTy, {Int8PtrTy, Int32Ty, IntptrTy}, false);
649   }
650 
651   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
652     const std::string TypeStr = AccessIsWrite ? "store" : "load";
653     const std::string EndingStr = Recover ? "_noabort" : "";
654 
655     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
656         ClMemoryAccessCallbackPrefix + TypeStr + "N" + MatchAllStr + EndingStr,
657         HwasanMemoryAccessCallbackSizedFnTy);
658 
659     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
660          AccessSizeIndex++) {
661       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
662           M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr +
663                                     itostr(1ULL << AccessSizeIndex) +
664                                     MatchAllStr + EndingStr,
665                                 HwasanMemoryAccessCallbackFnTy);
666     }
667   }
668 
669   const std::string MemIntrinCallbackPrefix =
670       (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
671           ? std::string("")
672           : ClMemoryAccessCallbackPrefix;
673 
674   HwasanMemmove = M.getOrInsertFunction(
675       MemIntrinCallbackPrefix + "memmove" + MatchAllStr, HwasanMemTransferFnTy);
676   HwasanMemcpy = M.getOrInsertFunction(
677       MemIntrinCallbackPrefix + "memcpy" + MatchAllStr, HwasanMemTransferFnTy);
678   HwasanMemset = M.getOrInsertFunction(
679       MemIntrinCallbackPrefix + "memset" + MatchAllStr, HwasanMemsetFnTy);
680 
681   HwasanTagMemoryFunc = M.getOrInsertFunction("__hwasan_tag_memory", VoidTy,
682                                               Int8PtrTy, Int8Ty, IntptrTy);
683   HwasanGenerateTagFunc =
684       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
685 
686   HwasanRecordFrameRecordFunc =
687       M.getOrInsertFunction("__hwasan_add_frame_record", VoidTy, Int64Ty);
688 
689   ShadowGlobal =
690       M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(Int8Ty, 0));
691 
692   HwasanHandleVfork =
693       M.getOrInsertFunction("__hwasan_handle_vfork", VoidTy, IntptrTy);
694 }
695 
696 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
697   // An empty inline asm with input reg == output reg.
698   // An opaque no-op cast, basically.
699   // This prevents code bloat as a result of rematerializing trivial definitions
700   // such as constants or global addresses at every load and store.
701   InlineAsm *Asm =
702       InlineAsm::get(FunctionType::get(Int8PtrTy, {Val->getType()}, false),
703                      StringRef(""), StringRef("=r,0"),
704                      /*hasSideEffects=*/false);
705   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
706 }
707 
708 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
709   return getOpaqueNoopCast(IRB, ShadowGlobal);
710 }
711 
712 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
713   if (Mapping.Offset != kDynamicShadowSentinel)
714     return getOpaqueNoopCast(
715         IRB, ConstantExpr::getIntToPtr(
716                  ConstantInt::get(IntptrTy, Mapping.Offset), Int8PtrTy));
717 
718   if (Mapping.InGlobal)
719     return getDynamicShadowIfunc(IRB);
720 
721   Value *GlobalDynamicAddress =
722       IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
723           kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
724   return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
725 }
726 
727 bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
728   // Do not instrument accesses from different address spaces; we cannot deal
729   // with them.
730   Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
731   if (PtrTy->getPointerAddressSpace() != 0)
732     return true;
733 
734   // Ignore swifterror addresses.
735   // swifterror memory addresses are mem2reg promoted by instruction
736   // selection. As such they cannot have regular uses like an instrumentation
737   // function and it makes no sense to track them as memory.
738   if (Ptr->isSwiftError())
739     return true;
740 
741   if (findAllocaForValue(Ptr)) {
742     if (!InstrumentStack)
743       return true;
744     if (SSI && SSI->stackAccessIsSafe(*Inst))
745       return true;
746   }
747   return false;
748 }
749 
750 void HWAddressSanitizer::getInterestingMemoryOperands(
751     Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
752   // Skip memory accesses inserted by another instrumentation.
753   if (I->hasMetadata(LLVMContext::MD_nosanitize))
754     return;
755 
756   // Do not instrument the load fetching the dynamic shadow address.
757   if (ShadowBase == I)
758     return;
759 
760   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
761     if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
762       return;
763     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
764                              LI->getType(), LI->getAlign());
765   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
766     if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
767       return;
768     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
769                              SI->getValueOperand()->getType(), SI->getAlign());
770   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
771     if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
772       return;
773     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
774                              RMW->getValOperand()->getType(), std::nullopt);
775   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
776     if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
777       return;
778     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
779                              XCHG->getCompareOperand()->getType(),
780                              std::nullopt);
781   } else if (auto *CI = dyn_cast<CallInst>(I)) {
782     for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
783       if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
784           ignoreAccess(I, CI->getArgOperand(ArgNo)))
785         continue;
786       Type *Ty = CI->getParamByValType(ArgNo);
787       Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
788     }
789   }
790 }
791 
792 static unsigned getPointerOperandIndex(Instruction *I) {
793   if (LoadInst *LI = dyn_cast<LoadInst>(I))
794     return LI->getPointerOperandIndex();
795   if (StoreInst *SI = dyn_cast<StoreInst>(I))
796     return SI->getPointerOperandIndex();
797   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
798     return RMW->getPointerOperandIndex();
799   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
800     return XCHG->getPointerOperandIndex();
801   report_fatal_error("Unexpected instruction");
802   return -1;
803 }
804 
805 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
806   size_t Res = llvm::countr_zero(TypeSize / 8);
807   assert(Res < kNumberOfAccessSizes);
808   return Res;
809 }
810 
811 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
812   if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64 ||
813       TargetTriple.isRISCV64())
814     return;
815 
816   IRBuilder<> IRB(I);
817   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
818   Value *UntaggedPtr =
819       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
820   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
821 }
822 
823 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
824   // Mem >> Scale
825   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
826   if (Mapping.Offset == 0)
827     return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
828   // (Mem >> Scale) + Offset
829   return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
830 }
831 
832 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
833                                           unsigned AccessSizeIndex) {
834   return (CompileKernel << HWASanAccessInfo::CompileKernelShift) |
835          (MatchAllTag.has_value() << HWASanAccessInfo::HasMatchAllShift) |
836          (MatchAllTag.value_or(0) << HWASanAccessInfo::MatchAllShift) |
837          (Recover << HWASanAccessInfo::RecoverShift) |
838          (IsWrite << HWASanAccessInfo::IsWriteShift) |
839          (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
840 }
841 
842 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
843                                                     unsigned AccessSizeIndex,
844                                                     Instruction *InsertBefore) {
845   assert(!UsePageAliases);
846   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
847   IRBuilder<> IRB(InsertBefore);
848   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
849   Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
850   IRB.CreateCall(Intrinsic::getDeclaration(
851                      M, UseShortGranules
852                             ? Intrinsic::hwasan_check_memaccess_shortgranules
853                             : Intrinsic::hwasan_check_memaccess),
854                  {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
855 }
856 
857 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
858                                                    unsigned AccessSizeIndex,
859                                                    Instruction *InsertBefore) {
860   assert(!UsePageAliases);
861   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
862   IRBuilder<> IRB(InsertBefore);
863 
864   Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
865   Value *PtrTag =
866       IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift), Int8Ty);
867   Value *AddrLong = untagPointer(IRB, PtrLong);
868   Value *Shadow = memToShadow(AddrLong, IRB);
869   Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
870   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
871 
872   if (MatchAllTag.has_value()) {
873     Value *TagNotIgnored = IRB.CreateICmpNE(
874         PtrTag, ConstantInt::get(PtrTag->getType(), *MatchAllTag));
875     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
876   }
877 
878   Instruction *CheckTerm =
879       SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
880                                 MDBuilder(*C).createBranchWeights(1, 100000));
881 
882   IRB.SetInsertPoint(CheckTerm);
883   Value *OutOfShortGranuleTagRange =
884       IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
885   Instruction *CheckFailTerm =
886       SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
887                                 MDBuilder(*C).createBranchWeights(1, 100000));
888 
889   IRB.SetInsertPoint(CheckTerm);
890   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
891   PtrLowBits = IRB.CreateAdd(
892       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
893   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
894   SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
895                             MDBuilder(*C).createBranchWeights(1, 100000),
896                             (DomTreeUpdater *)nullptr, nullptr,
897                             CheckFailTerm->getParent());
898 
899   IRB.SetInsertPoint(CheckTerm);
900   Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
901   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
902   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
903   Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
904   SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
905                             MDBuilder(*C).createBranchWeights(1, 100000),
906                             (DomTreeUpdater *)nullptr, nullptr,
907                             CheckFailTerm->getParent());
908 
909   IRB.SetInsertPoint(CheckFailTerm);
910   InlineAsm *Asm;
911   switch (TargetTriple.getArch()) {
912   case Triple::x86_64:
913     // The signal handler will find the data address in rdi.
914     Asm = InlineAsm::get(
915         FunctionType::get(VoidTy, {PtrLong->getType()}, false),
916         "int3\nnopl " +
917             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
918             "(%rax)",
919         "{rdi}",
920         /*hasSideEffects=*/true);
921     break;
922   case Triple::aarch64:
923   case Triple::aarch64_be:
924     // The signal handler will find the data address in x0.
925     Asm = InlineAsm::get(
926         FunctionType::get(VoidTy, {PtrLong->getType()}, false),
927         "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
928         "{x0}",
929         /*hasSideEffects=*/true);
930     break;
931   case Triple::riscv64:
932     // The signal handler will find the data address in x10.
933     Asm = InlineAsm::get(
934         FunctionType::get(VoidTy, {PtrLong->getType()}, false),
935         "ebreak\naddiw x0, x11, " +
936             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
937         "{x10}",
938         /*hasSideEffects=*/true);
939     break;
940   default:
941     report_fatal_error("unsupported architecture");
942   }
943   IRB.CreateCall(Asm, PtrLong);
944   if (Recover)
945     cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
946 }
947 
948 bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
949   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
950     return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
951            (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
952   }
953   if (isa<MemSetInst>(MI))
954     return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
955   return false;
956 }
957 
958 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
959   IRBuilder<> IRB(MI);
960   if (isa<MemTransferInst>(MI)) {
961     if (UseMatchAllCallback) {
962       IRB.CreateCall(
963           isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy,
964           {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
965            IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
966            IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false),
967            ConstantInt::get(Int8Ty, *MatchAllTag)});
968     } else {
969       IRB.CreateCall(
970           isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy,
971           {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
972            IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
973            IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
974     }
975   } else if (isa<MemSetInst>(MI)) {
976     if (UseMatchAllCallback) {
977       IRB.CreateCall(
978           HwasanMemset,
979           {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
980            IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
981            IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false),
982            ConstantInt::get(Int8Ty, *MatchAllTag)});
983     } else {
984       IRB.CreateCall(
985           HwasanMemset,
986           {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
987            IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
988            IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
989     }
990   }
991   MI->eraseFromParent();
992 }
993 
994 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
995   Value *Addr = O.getPtr();
996 
997   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
998 
999   if (O.MaybeMask)
1000     return false; // FIXME
1001 
1002   IRBuilder<> IRB(O.getInsn());
1003   if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) &&
1004       (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
1005       (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() ||
1006        *O.Alignment >= O.TypeStoreSize / 8)) {
1007     size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeStoreSize);
1008     if (InstrumentWithCalls) {
1009       if (UseMatchAllCallback) {
1010         IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1011                        {IRB.CreatePointerCast(Addr, IntptrTy),
1012                         ConstantInt::get(Int8Ty, *MatchAllTag)});
1013       } else {
1014         IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1015                        IRB.CreatePointerCast(Addr, IntptrTy));
1016       }
1017     } else if (OutlinedChecks) {
1018       instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
1019     } else {
1020       instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
1021     }
1022   } else {
1023     if (UseMatchAllCallback) {
1024       IRB.CreateCall(
1025           HwasanMemoryAccessCallbackSized[O.IsWrite],
1026           {IRB.CreatePointerCast(Addr, IntptrTy),
1027            IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
1028                           ConstantInt::get(IntptrTy, 8)),
1029            ConstantInt::get(Int8Ty, *MatchAllTag)});
1030     } else {
1031       IRB.CreateCall(
1032           HwasanMemoryAccessCallbackSized[O.IsWrite],
1033           {IRB.CreatePointerCast(Addr, IntptrTy),
1034            IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
1035                           ConstantInt::get(IntptrTy, 8))});
1036     }
1037   }
1038   untagPointerOperand(O.getInsn(), Addr);
1039 
1040   return true;
1041 }
1042 
1043 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
1044                                    size_t Size) {
1045   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1046   if (!UseShortGranules)
1047     Size = AlignedSize;
1048 
1049   Tag = IRB.CreateTrunc(Tag, Int8Ty);
1050   if (InstrumentWithCalls) {
1051     IRB.CreateCall(HwasanTagMemoryFunc,
1052                    {IRB.CreatePointerCast(AI, Int8PtrTy), Tag,
1053                     ConstantInt::get(IntptrTy, AlignedSize)});
1054   } else {
1055     size_t ShadowSize = Size >> Mapping.Scale;
1056     Value *AddrLong = untagPointer(IRB, IRB.CreatePointerCast(AI, IntptrTy));
1057     Value *ShadowPtr = memToShadow(AddrLong, IRB);
1058     // If this memset is not inlined, it will be intercepted in the hwasan
1059     // runtime library. That's OK, because the interceptor skips the checks if
1060     // the address is in the shadow region.
1061     // FIXME: the interceptor is not as fast as real memset. Consider lowering
1062     // llvm.memset right here into either a sequence of stores, or a call to
1063     // hwasan_tag_memory.
1064     if (ShadowSize)
1065       IRB.CreateMemSet(ShadowPtr, Tag, ShadowSize, Align(1));
1066     if (Size != AlignedSize) {
1067       const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
1068       IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder),
1069                       IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1070       IRB.CreateStore(Tag, IRB.CreateConstGEP1_32(
1071                                Int8Ty, IRB.CreatePointerCast(AI, Int8PtrTy),
1072                                AlignedSize - 1));
1073     }
1074   }
1075 }
1076 
1077 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1078   if (TargetTriple.getArch() == Triple::x86_64)
1079     return AllocaNo & TagMaskByte;
1080 
1081   // A list of 8-bit numbers that have at most one run of non-zero bits.
1082   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1083   // masks.
1084   // The list does not include the value 255, which is used for UAR.
1085   //
1086   // Because we are more likely to use earlier elements of this list than later
1087   // ones, it is sorted in increasing order of probability of collision with a
1088   // mask allocated (temporally) nearby. The program that generated this list
1089   // can be found at:
1090   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1091   static const unsigned FastMasks[] = {
1092       0,   128, 64, 192, 32,  96,  224, 112, 240, 48, 16,  120,
1093       248, 56,  24, 8,   124, 252, 60,  28,  12,  4,  126, 254,
1094       62,  30,  14, 6,   2,   127, 63,  31,  15,  7,  3,   1};
1095   return FastMasks[AllocaNo % std::size(FastMasks)];
1096 }
1097 
1098 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1099   if (TagMaskByte == 0xFF)
1100     return OldTag; // No need to clear the tag byte.
1101   return IRB.CreateAnd(OldTag,
1102                        ConstantInt::get(OldTag->getType(), TagMaskByte));
1103 }
1104 
1105 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1106   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1107 }
1108 
1109 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1110   if (ClGenerateTagsWithCalls)
1111     return nullptr;
1112   if (StackBaseTag)
1113     return StackBaseTag;
1114   // Extract some entropy from the stack pointer for the tags.
1115   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1116   // between functions).
1117   Value *StackPointerLong = getSP(IRB);
1118   Value *StackTag =
1119       applyTagMask(IRB, IRB.CreateXor(StackPointerLong,
1120                                       IRB.CreateLShr(StackPointerLong, 20)));
1121   StackTag->setName("hwasan.stack.base.tag");
1122   return StackTag;
1123 }
1124 
1125 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1126                                         unsigned AllocaNo) {
1127   if (ClGenerateTagsWithCalls)
1128     return getNextTagWithCall(IRB);
1129   return IRB.CreateXor(
1130       StackTag, ConstantInt::get(StackTag->getType(), retagMask(AllocaNo)));
1131 }
1132 
1133 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB) {
1134   Value *StackPointerLong = getSP(IRB);
1135   Value *UARTag =
1136       applyTagMask(IRB, IRB.CreateLShr(StackPointerLong, PointerTagShift));
1137 
1138   UARTag->setName("hwasan.uar.tag");
1139   return UARTag;
1140 }
1141 
1142 // Add a tag to an address.
1143 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1144                                       Value *PtrLong, Value *Tag) {
1145   assert(!UsePageAliases);
1146   Value *TaggedPtrLong;
1147   if (CompileKernel) {
1148     // Kernel addresses have 0xFF in the most significant byte.
1149     Value *ShiftedTag =
1150         IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1151                      ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1152     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1153   } else {
1154     // Userspace can simply do OR (tag << PointerTagShift);
1155     Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1156     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1157   }
1158   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1159 }
1160 
1161 // Remove tag from an address.
1162 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1163   assert(!UsePageAliases);
1164   Value *UntaggedPtrLong;
1165   if (CompileKernel) {
1166     // Kernel addresses have 0xFF in the most significant byte.
1167     UntaggedPtrLong =
1168         IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1169                                                TagMaskByte << PointerTagShift));
1170   } else {
1171     // Userspace addresses have 0x00.
1172     UntaggedPtrLong = IRB.CreateAnd(
1173         PtrLong, ConstantInt::get(PtrLong->getType(),
1174                                   ~(TagMaskByte << PointerTagShift)));
1175   }
1176   return UntaggedPtrLong;
1177 }
1178 
1179 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
1180   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1181   if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
1182     // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1183     // in Bionic's libc/private/bionic_tls.h.
1184     Function *ThreadPointerFunc =
1185         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
1186     Value *SlotPtr = IRB.CreatePointerCast(
1187         IRB.CreateConstGEP1_32(Int8Ty, IRB.CreateCall(ThreadPointerFunc), 0x30),
1188         Ty->getPointerTo(0));
1189     return SlotPtr;
1190   }
1191   if (ThreadPtrGlobal)
1192     return ThreadPtrGlobal;
1193 
1194   return nullptr;
1195 }
1196 
1197 Value *HWAddressSanitizer::getPC(IRBuilder<> &IRB) {
1198   if (TargetTriple.getArch() == Triple::aarch64)
1199     return readRegister(IRB, "pc");
1200   return IRB.CreatePtrToInt(IRB.GetInsertBlock()->getParent(), IntptrTy);
1201 }
1202 
1203 Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) {
1204   if (!CachedSP) {
1205     // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
1206     // first).
1207     Function *F = IRB.GetInsertBlock()->getParent();
1208     Module *M = F->getParent();
1209     auto *GetStackPointerFn = Intrinsic::getDeclaration(
1210         M, Intrinsic::frameaddress,
1211         IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1212     CachedSP = IRB.CreatePtrToInt(
1213         IRB.CreateCall(GetStackPointerFn, {Constant::getNullValue(Int32Ty)}),
1214         IntptrTy);
1215   }
1216   return CachedSP;
1217 }
1218 
1219 Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
1220   // Prepare ring buffer data.
1221   Value *PC = getPC(IRB);
1222   Value *SP = getSP(IRB);
1223 
1224   // Mix SP and PC.
1225   // Assumptions:
1226   // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1227   // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
1228   // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
1229   //       0xSSSSPPPPPPPPPPPP
1230   SP = IRB.CreateShl(SP, 44);
1231   return IRB.CreateOr(PC, SP);
1232 }
1233 
1234 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1235   if (!Mapping.InTls)
1236     ShadowBase = getShadowNonTls(IRB);
1237   else if (!WithFrameRecord && TargetTriple.isAndroid())
1238     ShadowBase = getDynamicShadowIfunc(IRB);
1239 
1240   if (!WithFrameRecord && ShadowBase)
1241     return;
1242 
1243   Value *SlotPtr = nullptr;
1244   Value *ThreadLong = nullptr;
1245   Value *ThreadLongMaybeUntagged = nullptr;
1246 
1247   auto getThreadLongMaybeUntagged = [&]() {
1248     if (!SlotPtr)
1249       SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
1250     if (!ThreadLong)
1251       ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1252     // Extract the address field from ThreadLong. Unnecessary on AArch64 with
1253     // TBI.
1254     return TargetTriple.isAArch64() ? ThreadLong
1255                                     : untagPointer(IRB, ThreadLong);
1256   };
1257 
1258   if (WithFrameRecord) {
1259     switch (ClRecordStackHistory) {
1260     case libcall: {
1261       // Emit a runtime call into hwasan rather than emitting instructions for
1262       // recording stack history.
1263       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1264       IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
1265       break;
1266     }
1267     case instr: {
1268       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1269 
1270       StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1271 
1272       // Store data to ring buffer.
1273       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1274       Value *RecordPtr = IRB.CreateIntToPtr(ThreadLongMaybeUntagged,
1275                                             IntptrTy->getPointerTo(0));
1276       IRB.CreateStore(FrameRecordInfo, RecordPtr);
1277 
1278       // Update the ring buffer. Top byte of ThreadLong defines the size of the
1279       // buffer in pages, it must be a power of two, and the start of the buffer
1280       // must be aligned by twice that much. Therefore wrap around of the ring
1281       // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1282       // The use of AShr instead of LShr is due to
1283       //   https://bugs.llvm.org/show_bug.cgi?id=39030
1284       // Runtime library makes sure not to use the highest bit.
1285       Value *WrapMask = IRB.CreateXor(
1286           IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1287           ConstantInt::get(IntptrTy, (uint64_t)-1));
1288       Value *ThreadLongNew = IRB.CreateAnd(
1289           IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1290       IRB.CreateStore(ThreadLongNew, SlotPtr);
1291       break;
1292     }
1293     case none: {
1294       llvm_unreachable(
1295           "A stack history recording mode should've been selected.");
1296     }
1297     }
1298   }
1299 
1300   if (!ShadowBase) {
1301     if (!ThreadLongMaybeUntagged)
1302       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1303 
1304     // Get shadow base address by aligning RecordPtr up.
1305     // Note: this is not correct if the pointer is already aligned.
1306     // Runtime library will make sure this never happens.
1307     ShadowBase = IRB.CreateAdd(
1308         IRB.CreateOr(
1309             ThreadLongMaybeUntagged,
1310             ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1311         ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1312     ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy);
1313   }
1314 }
1315 
1316 Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
1317   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1318   Function *ReadRegister =
1319       Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
1320   MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
1321   Value *Args[] = {MetadataAsValue::get(*C, MD)};
1322   return IRB.CreateCall(ReadRegister, Args);
1323 }
1324 
1325 bool HWAddressSanitizer::instrumentLandingPads(
1326     SmallVectorImpl<Instruction *> &LandingPadVec) {
1327   for (auto *LP : LandingPadVec) {
1328     IRBuilder<> IRB(LP->getNextNode());
1329     IRB.CreateCall(
1330         HwasanHandleVfork,
1331         {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
1332                                                                       : "sp")});
1333   }
1334   return true;
1335 }
1336 
1337 static bool isLifetimeIntrinsic(Value *V) {
1338   auto *II = dyn_cast<IntrinsicInst>(V);
1339   return II && II->isLifetimeStartOrEnd();
1340 }
1341 
1342 bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
1343                                          Value *StackTag, Value *UARTag,
1344                                          const DominatorTree &DT,
1345                                          const PostDominatorTree &PDT,
1346                                          const LoopInfo &LI) {
1347   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1348   // alloca addresses using that. Unfortunately, offsets are not known yet
1349   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1350   // temp, shift-OR it into each alloca address and xor with the retag mask.
1351   // This generates one extra instruction per alloca use.
1352   unsigned int I = 0;
1353 
1354   for (auto &KV : SInfo.AllocasToInstrument) {
1355     auto N = I++;
1356     auto *AI = KV.first;
1357     memtag::AllocaInfo &Info = KV.second;
1358     IRBuilder<> IRB(AI->getNextNode());
1359 
1360     // Replace uses of the alloca with tagged address.
1361     Value *Tag = getAllocaTag(IRB, StackTag, N);
1362     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1363     Value *AINoTagLong = untagPointer(IRB, AILong);
1364     Value *Replacement = tagPointer(IRB, AI->getType(), AINoTagLong, Tag);
1365     std::string Name =
1366         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1367     Replacement->setName(Name + ".hwasan");
1368 
1369     size_t Size = memtag::getAllocaSizeInBytes(*AI);
1370     size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1371 
1372     Value *AICast = IRB.CreatePointerCast(AI, Int8PtrTy);
1373 
1374     auto HandleLifetime = [&](IntrinsicInst *II) {
1375       // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1376       // set of assumptions we need to make about the lifetime. Without this we
1377       // would need to ensure that we can track the lifetime pointer to a
1378       // constant offset from the alloca, and would still need to change the
1379       // size to include the extra alignment we use for the untagging to make
1380       // the size consistent.
1381       //
1382       // The check for standard lifetime below makes sure that we have exactly
1383       // one set of start / end in any execution (i.e. the ends are not
1384       // reachable from each other), so this will not cause any problems.
1385       II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
1386       II->setArgOperand(1, AICast);
1387     };
1388     llvm::for_each(Info.LifetimeStart, HandleLifetime);
1389     llvm::for_each(Info.LifetimeEnd, HandleLifetime);
1390 
1391     AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
1392       auto *User = U.getUser();
1393       return User != AILong && User != AICast && !isLifetimeIntrinsic(User);
1394     });
1395 
1396     for (auto *DDI : Info.DbgVariableIntrinsics) {
1397       // Prepend "tag_offset, N" to the dwarf expression.
1398       // Tag offset logically applies to the alloca pointer, and it makes sense
1399       // to put it at the beginning of the expression.
1400       SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
1401                                          retagMask(N)};
1402       for (size_t LocNo = 0; LocNo < DDI->getNumVariableLocationOps(); ++LocNo)
1403         if (DDI->getVariableLocationOp(LocNo) == AI)
1404           DDI->setExpression(DIExpression::appendOpsToArg(DDI->getExpression(),
1405                                                           NewOps, LocNo));
1406     }
1407 
1408     auto TagEnd = [&](Instruction *Node) {
1409       IRB.SetInsertPoint(Node);
1410       // When untagging, use the `AlignedSize` because we need to set the tags
1411       // for the entire alloca to original. If we used `Size` here, we would
1412       // keep the last granule tagged, and store zero in the last byte of the
1413       // last granule, due to how short granules are implemented.
1414       tagAlloca(IRB, AI, UARTag, AlignedSize);
1415     };
1416     // Calls to functions that may return twice (e.g. setjmp) confuse the
1417     // postdominator analysis, and will leave us to keep memory tagged after
1418     // function return. Work around this by always untagging at every return
1419     // statement if return_twice functions are called.
1420     bool StandardLifetime =
1421         SInfo.UnrecognizedLifetimes.empty() &&
1422         memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT,
1423                                    &LI, ClMaxLifetimes) &&
1424         !SInfo.CallsReturnTwice;
1425     if (DetectUseAfterScope && StandardLifetime) {
1426       IntrinsicInst *Start = Info.LifetimeStart[0];
1427       IRB.SetInsertPoint(Start->getNextNode());
1428       tagAlloca(IRB, AI, Tag, Size);
1429       if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd,
1430                                         SInfo.RetVec, TagEnd)) {
1431         for (auto *End : Info.LifetimeEnd)
1432           End->eraseFromParent();
1433       }
1434     } else {
1435       tagAlloca(IRB, AI, Tag, Size);
1436       for (auto *RI : SInfo.RetVec)
1437         TagEnd(RI);
1438       // We inserted tagging outside of the lifetimes, so we have to remove
1439       // them.
1440       for (auto &II : Info.LifetimeStart)
1441         II->eraseFromParent();
1442       for (auto &II : Info.LifetimeEnd)
1443         II->eraseFromParent();
1444     }
1445     memtag::alignAndPadAlloca(Info, Mapping.getObjectAlignment());
1446   }
1447   for (auto &I : SInfo.UnrecognizedLifetimes)
1448     I->eraseFromParent();
1449   return true;
1450 }
1451 
1452 void HWAddressSanitizer::sanitizeFunction(Function &F,
1453                                           FunctionAnalysisManager &FAM) {
1454   if (&F == HwasanCtorFunction)
1455     return;
1456 
1457   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1458     return;
1459 
1460   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1461 
1462   SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1463   SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1464   SmallVector<Instruction *, 8> LandingPadVec;
1465 
1466   memtag::StackInfoBuilder SIB(SSI);
1467   for (auto &Inst : instructions(F)) {
1468     if (InstrumentStack) {
1469       SIB.visit(Inst);
1470     }
1471 
1472     if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1473       LandingPadVec.push_back(&Inst);
1474 
1475     getInterestingMemoryOperands(&Inst, OperandsToInstrument);
1476 
1477     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1478       if (!ignoreMemIntrinsic(MI))
1479         IntrinToInstrument.push_back(MI);
1480   }
1481 
1482   memtag::StackInfo &SInfo = SIB.get();
1483 
1484   initializeCallbacks(*F.getParent());
1485 
1486   if (!LandingPadVec.empty())
1487     instrumentLandingPads(LandingPadVec);
1488 
1489   if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1490       F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1491     // __hwasan_personality_thunk is a no-op for functions without an
1492     // instrumented stack, so we can drop it.
1493     F.setPersonalityFn(nullptr);
1494   }
1495 
1496   if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1497       IntrinToInstrument.empty())
1498     return;
1499 
1500   assert(!ShadowBase);
1501 
1502   Instruction *InsertPt = &*F.getEntryBlock().begin();
1503   IRBuilder<> EntryIRB(InsertPt);
1504   emitPrologue(EntryIRB,
1505                /*WithFrameRecord*/ ClRecordStackHistory != none &&
1506                    Mapping.WithFrameRecord &&
1507                    !SInfo.AllocasToInstrument.empty());
1508 
1509   if (!SInfo.AllocasToInstrument.empty()) {
1510     const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1511     const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
1512     const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
1513     Value *StackTag = getStackBaseTag(EntryIRB);
1514     Value *UARTag = getUARTag(EntryIRB);
1515     instrumentStack(SInfo, StackTag, UARTag, DT, PDT, LI);
1516   }
1517 
1518   // If we split the entry block, move any allocas that were originally in the
1519   // entry block back into the entry block so that they aren't treated as
1520   // dynamic allocas.
1521   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1522     InsertPt = &*F.getEntryBlock().begin();
1523     for (Instruction &I :
1524          llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1525       if (auto *AI = dyn_cast<AllocaInst>(&I))
1526         if (isa<ConstantInt>(AI->getArraySize()))
1527           I.moveBefore(InsertPt);
1528     }
1529   }
1530 
1531   for (auto &Operand : OperandsToInstrument)
1532     instrumentMemAccess(Operand);
1533 
1534   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1535     for (auto *Inst : IntrinToInstrument)
1536       instrumentMemIntrinsic(Inst);
1537   }
1538 
1539   ShadowBase = nullptr;
1540   StackBaseTag = nullptr;
1541   CachedSP = nullptr;
1542 }
1543 
1544 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1545   assert(!UsePageAliases);
1546   Constant *Initializer = GV->getInitializer();
1547   uint64_t SizeInBytes =
1548       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1549   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1550   if (SizeInBytes != NewSize) {
1551     // Pad the initializer out to the next multiple of 16 bytes and add the
1552     // required short granule tag.
1553     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1554     Init.back() = Tag;
1555     Constant *Padding = ConstantDataArray::get(*C, Init);
1556     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1557   }
1558 
1559   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1560                                    GlobalValue::ExternalLinkage, Initializer,
1561                                    GV->getName() + ".hwasan");
1562   NewGV->copyAttributesFrom(GV);
1563   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1564   NewGV->copyMetadata(GV, 0);
1565   NewGV->setAlignment(
1566       std::max(GV->getAlign().valueOrOne(), Mapping.getObjectAlignment()));
1567 
1568   // It is invalid to ICF two globals that have different tags. In the case
1569   // where the size of the global is a multiple of the tag granularity the
1570   // contents of the globals may be the same but the tags (i.e. symbol values)
1571   // may be different, and the symbols are not considered during ICF. In the
1572   // case where the size is not a multiple of the granularity, the short granule
1573   // tags would discriminate two globals with different tags, but there would
1574   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1575   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1576   // granule tag in the last byte.
1577   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1578 
1579   // Descriptor format (assuming little-endian):
1580   // bytes 0-3: relative address of global
1581   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1582   // it isn't, we create multiple descriptors)
1583   // byte 7: tag
1584   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1585   const uint64_t MaxDescriptorSize = 0xfffff0;
1586   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1587        DescriptorPos += MaxDescriptorSize) {
1588     auto *Descriptor =
1589         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1590                            nullptr, GV->getName() + ".hwasan.descriptor");
1591     auto *GVRelPtr = ConstantExpr::getTrunc(
1592         ConstantExpr::getAdd(
1593             ConstantExpr::getSub(
1594                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1595                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1596             ConstantInt::get(Int64Ty, DescriptorPos)),
1597         Int32Ty);
1598     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1599     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1600     Descriptor->setComdat(NewGV->getComdat());
1601     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1602     Descriptor->setSection("hwasan_globals");
1603     Descriptor->setMetadata(LLVMContext::MD_associated,
1604                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1605     appendToCompilerUsed(M, Descriptor);
1606   }
1607 
1608   Constant *Aliasee = ConstantExpr::getIntToPtr(
1609       ConstantExpr::getAdd(
1610           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1611           ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1612       GV->getType());
1613   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1614                                     GV->getLinkage(), "", Aliasee, &M);
1615   Alias->setVisibility(GV->getVisibility());
1616   Alias->takeName(GV);
1617   GV->replaceAllUsesWith(Alias);
1618   GV->eraseFromParent();
1619 }
1620 
1621 void HWAddressSanitizer::instrumentGlobals() {
1622   std::vector<GlobalVariable *> Globals;
1623   for (GlobalVariable &GV : M.globals()) {
1624     if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
1625       continue;
1626 
1627     if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
1628         GV.isThreadLocal())
1629       continue;
1630 
1631     // Common symbols can't have aliases point to them, so they can't be tagged.
1632     if (GV.hasCommonLinkage())
1633       continue;
1634 
1635     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1636     // which would be broken both by adding tags and potentially by the extra
1637     // padding/alignment that we insert.
1638     if (GV.hasSection())
1639       continue;
1640 
1641     Globals.push_back(&GV);
1642   }
1643 
1644   MD5 Hasher;
1645   Hasher.update(M.getSourceFileName());
1646   MD5::MD5Result Hash;
1647   Hasher.final(Hash);
1648   uint8_t Tag = Hash[0];
1649 
1650   assert(TagMaskByte >= 16);
1651 
1652   for (GlobalVariable *GV : Globals) {
1653     // Don't allow globals to be tagged with something that looks like a
1654     // short-granule tag, otherwise we lose inter-granule overflow detection, as
1655     // the fast path shadow-vs-address check succeeds.
1656     if (Tag < 16 || Tag > TagMaskByte)
1657       Tag = 16;
1658     instrumentGlobal(GV, Tag++);
1659   }
1660 }
1661 
1662 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1663   // We need to untag stack frames as we unwind past them. That is the job of
1664   // the personality function wrapper, which either wraps an existing
1665   // personality function or acts as a personality function on its own. Each
1666   // function that has a personality function or that can be unwound past has
1667   // its personality function changed to a thunk that calls the personality
1668   // function wrapper in the runtime.
1669   MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1670   for (Function &F : M) {
1671     if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1672       continue;
1673 
1674     if (F.hasPersonalityFn()) {
1675       PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1676     } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1677       PersonalityFns[nullptr].push_back(&F);
1678     }
1679   }
1680 
1681   if (PersonalityFns.empty())
1682     return;
1683 
1684   FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1685       "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
1686       Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
1687   FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1688   FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1689 
1690   for (auto &P : PersonalityFns) {
1691     std::string ThunkName = kHwasanPersonalityThunkName;
1692     if (P.first)
1693       ThunkName += ("." + P.first->getName()).str();
1694     FunctionType *ThunkFnTy = FunctionType::get(
1695         Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
1696     bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1697                                cast<GlobalValue>(P.first)->hasLocalLinkage());
1698     auto *ThunkFn = Function::Create(ThunkFnTy,
1699                                      IsLocal ? GlobalValue::InternalLinkage
1700                                              : GlobalValue::LinkOnceODRLinkage,
1701                                      ThunkName, &M);
1702     if (!IsLocal) {
1703       ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1704       ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1705     }
1706 
1707     auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1708     IRBuilder<> IRB(BB);
1709     CallInst *WrapperCall = IRB.CreateCall(
1710         HwasanPersonalityWrapper,
1711         {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1712          ThunkFn->getArg(3), ThunkFn->getArg(4),
1713          P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
1714                  : Constant::getNullValue(Int8PtrTy),
1715          IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
1716          IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
1717     WrapperCall->setTailCall();
1718     IRB.CreateRet(WrapperCall);
1719 
1720     for (Function *F : P.second)
1721       F->setPersonalityFn(ThunkFn);
1722   }
1723 }
1724 
1725 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1726                                              bool InstrumentWithCalls) {
1727   Scale = kDefaultShadowScale;
1728   if (TargetTriple.isOSFuchsia()) {
1729     // Fuchsia is always PIE, which means that the beginning of the address
1730     // space is always available.
1731     InGlobal = false;
1732     InTls = false;
1733     Offset = 0;
1734     WithFrameRecord = true;
1735   } else if (ClMappingOffset.getNumOccurrences() > 0) {
1736     InGlobal = false;
1737     InTls = false;
1738     Offset = ClMappingOffset;
1739     WithFrameRecord = false;
1740   } else if (ClEnableKhwasan || InstrumentWithCalls) {
1741     InGlobal = false;
1742     InTls = false;
1743     Offset = 0;
1744     WithFrameRecord = false;
1745   } else if (ClWithIfunc) {
1746     InGlobal = true;
1747     InTls = false;
1748     Offset = kDynamicShadowSentinel;
1749     WithFrameRecord = false;
1750   } else if (ClWithTls) {
1751     InGlobal = false;
1752     InTls = true;
1753     Offset = kDynamicShadowSentinel;
1754     WithFrameRecord = true;
1755   } else {
1756     InGlobal = false;
1757     InTls = false;
1758     Offset = kDynamicShadowSentinel;
1759     WithFrameRecord = false;
1760   }
1761 }
1762