xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (revision 3dd5524264095ed8612c28908e13f80668eff2f9)
1 //===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/Analysis/PostDominators.h"
22 #include "llvm/Analysis/StackSafetyAnalysis.h"
23 #include "llvm/Analysis/ValueTracking.h"
24 #include "llvm/BinaryFormat/Dwarf.h"
25 #include "llvm/BinaryFormat/ELF.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DebugInfoMetadata.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Dominators.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/InlineAsm.h"
37 #include "llvm/IR/InstIterator.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/MDBuilder.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
52 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
53 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
54 #include "llvm/Transforms/Utils/ModuleUtils.h"
55 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
56 
57 using namespace llvm;
58 
59 #define DEBUG_TYPE "hwasan"
60 
61 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
62 const char kHwasanNoteName[] = "hwasan.note";
63 const char kHwasanInitName[] = "__hwasan_init";
64 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
65 
66 const char kHwasanShadowMemoryDynamicAddress[] =
67     "__hwasan_shadow_memory_dynamic_address";
68 
69 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
70 static const size_t kNumberOfAccessSizes = 5;
71 
72 static const size_t kDefaultShadowScale = 4;
73 static const uint64_t kDynamicShadowSentinel =
74     std::numeric_limits<uint64_t>::max();
75 
76 static const unsigned kShadowBaseAlignment = 32;
77 
78 static cl::opt<std::string>
79     ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
80                                  cl::desc("Prefix for memory access callbacks"),
81                                  cl::Hidden, cl::init("__hwasan_"));
82 
83 static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
84     "hwasan-kernel-mem-intrinsic-prefix",
85     cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
86     cl::init(false));
87 
88 static cl::opt<bool> ClInstrumentWithCalls(
89     "hwasan-instrument-with-calls",
90     cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
91     cl::init(false));
92 
93 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
94                                        cl::desc("instrument read instructions"),
95                                        cl::Hidden, cl::init(true));
96 
97 static cl::opt<bool>
98     ClInstrumentWrites("hwasan-instrument-writes",
99                        cl::desc("instrument write instructions"), cl::Hidden,
100                        cl::init(true));
101 
102 static cl::opt<bool> ClInstrumentAtomics(
103     "hwasan-instrument-atomics",
104     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
105     cl::init(true));
106 
107 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
108                                        cl::desc("instrument byval arguments"),
109                                        cl::Hidden, cl::init(true));
110 
111 static cl::opt<bool>
112     ClRecover("hwasan-recover",
113               cl::desc("Enable recovery mode (continue-after-error)."),
114               cl::Hidden, cl::init(false));
115 
116 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
117                                        cl::desc("instrument stack (allocas)"),
118                                        cl::Hidden, cl::init(true));
119 
120 static cl::opt<bool>
121     ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
122                      cl::Hidden, cl::desc("Use Stack Safety analysis results"),
123                      cl::Optional);
124 
125 static cl::opt<size_t> ClMaxLifetimes(
126     "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
127     cl::ReallyHidden,
128     cl::desc("How many lifetime ends to handle for a single alloca."),
129     cl::Optional);
130 
131 static cl::opt<bool>
132     ClUseAfterScope("hwasan-use-after-scope",
133                     cl::desc("detect use after scope within function"),
134                     cl::Hidden, cl::init(false));
135 
136 static cl::opt<bool> ClUARRetagToZero(
137     "hwasan-uar-retag-to-zero",
138     cl::desc("Clear alloca tags before returning from the function to allow "
139              "non-instrumented and instrumented function calls mix. When set "
140              "to false, allocas are retagged before returning from the "
141              "function to detect use after return."),
142     cl::Hidden, cl::init(true));
143 
144 static cl::opt<bool> ClGenerateTagsWithCalls(
145     "hwasan-generate-tags-with-calls",
146     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
147     cl::init(false));
148 
149 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
150                                cl::Hidden, cl::init(false));
151 
152 static cl::opt<int> ClMatchAllTag(
153     "hwasan-match-all-tag",
154     cl::desc("don't report bad accesses via pointers with this tag"),
155     cl::Hidden, cl::init(-1));
156 
157 static cl::opt<bool>
158     ClEnableKhwasan("hwasan-kernel",
159                     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
160                     cl::Hidden, cl::init(false));
161 
162 // These flags allow to change the shadow mapping and control how shadow memory
163 // is accessed. The shadow mapping looks like:
164 //    Shadow = (Mem >> scale) + offset
165 
166 static cl::opt<uint64_t>
167     ClMappingOffset("hwasan-mapping-offset",
168                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
169                     cl::Hidden, cl::init(0));
170 
171 static cl::opt<bool>
172     ClWithIfunc("hwasan-with-ifunc",
173                 cl::desc("Access dynamic shadow through an ifunc global on "
174                          "platforms that support this"),
175                 cl::Hidden, cl::init(false));
176 
177 static cl::opt<bool> ClWithTls(
178     "hwasan-with-tls",
179     cl::desc("Access dynamic shadow through an thread-local pointer on "
180              "platforms that support this"),
181     cl::Hidden, cl::init(true));
182 
183 // Mode for selecting how to insert frame record info into the stack ring
184 // buffer.
185 enum RecordStackHistoryMode {
186   // Do not record frame record info.
187   none,
188 
189   // Insert instructions into the prologue for storing into the stack ring
190   // buffer directly.
191   instr,
192 
193   // Add a call to __hwasan_add_frame_record in the runtime.
194   libcall,
195 };
196 
197 static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
198     "hwasan-record-stack-history",
199     cl::desc("Record stack frames with tagged allocations in a thread-local "
200              "ring buffer"),
201     cl::values(clEnumVal(none, "Do not record stack ring history"),
202                clEnumVal(instr, "Insert instructions into the prologue for "
203                                 "storing into the stack ring buffer directly"),
204                clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
205                                   "storing into the stack ring buffer")),
206     cl::Hidden, cl::init(instr));
207 
208 static cl::opt<bool>
209     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
210                               cl::desc("instrument memory intrinsics"),
211                               cl::Hidden, cl::init(true));
212 
213 static cl::opt<bool>
214     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
215                             cl::desc("instrument landing pads"), cl::Hidden,
216                             cl::init(false));
217 
218 static cl::opt<bool> ClUseShortGranules(
219     "hwasan-use-short-granules",
220     cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
221     cl::init(false));
222 
223 static cl::opt<bool> ClInstrumentPersonalityFunctions(
224     "hwasan-instrument-personality-functions",
225     cl::desc("instrument personality functions"), cl::Hidden);
226 
227 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
228                                        cl::desc("inline all checks"),
229                                        cl::Hidden, cl::init(false));
230 
231 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
232 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
233                                       cl::desc("Use page aliasing in HWASan"),
234                                       cl::Hidden, cl::init(false));
235 
236 namespace {
237 
238 bool shouldUsePageAliases(const Triple &TargetTriple) {
239   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
240 }
241 
242 bool shouldInstrumentStack(const Triple &TargetTriple) {
243   return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
244 }
245 
246 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
247   return ClInstrumentWithCalls || TargetTriple.getArch() == Triple::x86_64;
248 }
249 
250 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
251   return ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
252                                               : !DisableOptimization;
253 }
254 
255 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
256                                   bool DisableOptimization) {
257   return shouldInstrumentStack(TargetTriple) &&
258          mightUseStackSafetyAnalysis(DisableOptimization);
259 }
260 
261 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
262   return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
263 }
264 
265 /// An instrumentation pass implementing detection of addressability bugs
266 /// using tagged pointers.
267 class HWAddressSanitizer {
268 public:
269   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
270                      const StackSafetyGlobalInfo *SSI)
271       : M(M), SSI(SSI) {
272     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
273     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0
274                               ? ClEnableKhwasan
275                               : CompileKernel;
276 
277     initializeModule();
278   }
279 
280   void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
281 
282   bool sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
283   void initializeModule();
284   void createHwasanCtorComdat();
285 
286   void initializeCallbacks(Module &M);
287 
288   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
289 
290   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
291   Value *getShadowNonTls(IRBuilder<> &IRB);
292 
293   void untagPointerOperand(Instruction *I, Value *Addr);
294   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
295 
296   int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
297   void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
298                                   unsigned AccessSizeIndex,
299                                   Instruction *InsertBefore);
300   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
301                                  unsigned AccessSizeIndex,
302                                  Instruction *InsertBefore);
303   bool ignoreMemIntrinsic(MemIntrinsic *MI);
304   void instrumentMemIntrinsic(MemIntrinsic *MI);
305   bool instrumentMemAccess(InterestingMemoryOperand &O);
306   bool ignoreAccess(Instruction *Inst, Value *Ptr);
307   void getInterestingMemoryOperands(
308       Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
309 
310   bool isInterestingAlloca(const AllocaInst &AI);
311   void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
312   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
313   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
314   bool instrumentStack(memtag::StackInfo &Info, Value *StackTag,
315                        const DominatorTree &DT, const PostDominatorTree &PDT,
316                        const LoopInfo &LI);
317   Value *readRegister(IRBuilder<> &IRB, StringRef Name);
318   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
319   Value *getNextTagWithCall(IRBuilder<> &IRB);
320   Value *getStackBaseTag(IRBuilder<> &IRB);
321   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
322                       unsigned AllocaNo);
323   Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
324 
325   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
326   Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
327   unsigned retagMask(unsigned AllocaNo);
328 
329   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
330 
331   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
332   void instrumentGlobals();
333 
334   Value *getPC(IRBuilder<> &IRB);
335   Value *getSP(IRBuilder<> &IRB);
336   Value *getFrameRecordInfo(IRBuilder<> &IRB);
337 
338   void instrumentPersonalityFunctions();
339 
340 private:
341   LLVMContext *C;
342   Module &M;
343   const StackSafetyGlobalInfo *SSI;
344   Triple TargetTriple;
345   FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
346   FunctionCallee HWAsanHandleVfork;
347 
348   /// This struct defines the shadow mapping using the rule:
349   ///   shadow = (mem >> Scale) + Offset.
350   /// If InGlobal is true, then
351   ///   extern char __hwasan_shadow[];
352   ///   shadow = (mem >> Scale) + &__hwasan_shadow
353   /// If InTls is true, then
354   ///   extern char *__hwasan_tls;
355   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
356   ///
357   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
358   /// ring buffer for storing stack allocations on targets that support it.
359   struct ShadowMapping {
360     int Scale;
361     uint64_t Offset;
362     bool InGlobal;
363     bool InTls;
364     bool WithFrameRecord;
365 
366     void init(Triple &TargetTriple, bool InstrumentWithCalls);
367     uint64_t getObjectAlignment() const { return 1ULL << Scale; }
368   };
369 
370   ShadowMapping Mapping;
371 
372   Type *VoidTy = Type::getVoidTy(M.getContext());
373   Type *IntptrTy;
374   Type *Int8PtrTy;
375   Type *Int8Ty;
376   Type *Int32Ty;
377   Type *Int64Ty = Type::getInt64Ty(M.getContext());
378 
379   bool CompileKernel;
380   bool Recover;
381   bool OutlinedChecks;
382   bool UseShortGranules;
383   bool InstrumentLandingPads;
384   bool InstrumentWithCalls;
385   bool InstrumentStack;
386   bool DetectUseAfterScope;
387   bool UsePageAliases;
388 
389   bool HasMatchAllTag = false;
390   uint8_t MatchAllTag = 0;
391 
392   unsigned PointerTagShift;
393   uint64_t TagMaskByte;
394 
395   Function *HwasanCtorFunction;
396 
397   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
398   FunctionCallee HwasanMemoryAccessCallbackSized[2];
399 
400   FunctionCallee HwasanTagMemoryFunc;
401   FunctionCallee HwasanGenerateTagFunc;
402   FunctionCallee HwasanRecordFrameRecordFunc;
403 
404   Constant *ShadowGlobal;
405 
406   Value *ShadowBase = nullptr;
407   Value *StackBaseTag = nullptr;
408   Value *CachedSP = nullptr;
409   GlobalValue *ThreadPtrGlobal = nullptr;
410 };
411 
412 } // end anonymous namespace
413 
414 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
415                                               ModuleAnalysisManager &MAM) {
416   const StackSafetyGlobalInfo *SSI = nullptr;
417   auto TargetTriple = llvm::Triple(M.getTargetTriple());
418   if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
419     SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
420 
421   HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
422   bool Modified = false;
423   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
424   for (Function &F : M)
425     Modified |= HWASan.sanitizeFunction(F, FAM);
426   if (Modified)
427     return PreservedAnalyses::none();
428   return PreservedAnalyses::all();
429 }
430 void HWAddressSanitizerPass::printPipeline(
431     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
432   static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
433       OS, MapClassName2PassName);
434   OS << "<";
435   if (Options.CompileKernel)
436     OS << "kernel;";
437   if (Options.Recover)
438     OS << "recover";
439   OS << ">";
440 }
441 
442 void HWAddressSanitizer::createHwasanCtorComdat() {
443   std::tie(HwasanCtorFunction, std::ignore) =
444       getOrCreateSanitizerCtorAndInitFunctions(
445           M, kHwasanModuleCtorName, kHwasanInitName,
446           /*InitArgTypes=*/{},
447           /*InitArgs=*/{},
448           // This callback is invoked when the functions are created the first
449           // time. Hook them into the global ctors list in that case:
450           [&](Function *Ctor, FunctionCallee) {
451             Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
452             Ctor->setComdat(CtorComdat);
453             appendToGlobalCtors(M, Ctor, 0, Ctor);
454           });
455 
456   // Create a note that contains pointers to the list of global
457   // descriptors. Adding a note to the output file will cause the linker to
458   // create a PT_NOTE program header pointing to the note that we can use to
459   // find the descriptor list starting from the program headers. A function
460   // provided by the runtime initializes the shadow memory for the globals by
461   // accessing the descriptor list via the note. The dynamic loader needs to
462   // call this function whenever a library is loaded.
463   //
464   // The reason why we use a note for this instead of a more conventional
465   // approach of having a global constructor pass a descriptor list pointer to
466   // the runtime is because of an order of initialization problem. With
467   // constructors we can encounter the following problematic scenario:
468   //
469   // 1) library A depends on library B and also interposes one of B's symbols
470   // 2) B's constructors are called before A's (as required for correctness)
471   // 3) during construction, B accesses one of its "own" globals (actually
472   //    interposed by A) and triggers a HWASAN failure due to the initialization
473   //    for A not having happened yet
474   //
475   // Even without interposition it is possible to run into similar situations in
476   // cases where two libraries mutually depend on each other.
477   //
478   // We only need one note per binary, so put everything for the note in a
479   // comdat. This needs to be a comdat with an .init_array section to prevent
480   // newer versions of lld from discarding the note.
481   //
482   // Create the note even if we aren't instrumenting globals. This ensures that
483   // binaries linked from object files with both instrumented and
484   // non-instrumented globals will end up with a note, even if a comdat from an
485   // object file with non-instrumented globals is selected. The note is harmless
486   // if the runtime doesn't support it, since it will just be ignored.
487   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
488 
489   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
490   auto Start =
491       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
492                          nullptr, "__start_hwasan_globals");
493   Start->setVisibility(GlobalValue::HiddenVisibility);
494   Start->setDSOLocal(true);
495   auto Stop =
496       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
497                          nullptr, "__stop_hwasan_globals");
498   Stop->setVisibility(GlobalValue::HiddenVisibility);
499   Stop->setDSOLocal(true);
500 
501   // Null-terminated so actually 8 bytes, which are required in order to align
502   // the note properly.
503   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
504 
505   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
506                                  Int32Ty, Int32Ty);
507   auto *Note =
508       new GlobalVariable(M, NoteTy, /*isConstant=*/true,
509                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
510   Note->setSection(".note.hwasan.globals");
511   Note->setComdat(NoteComdat);
512   Note->setAlignment(Align(4));
513   Note->setDSOLocal(true);
514 
515   // The pointers in the note need to be relative so that the note ends up being
516   // placed in rodata, which is the standard location for notes.
517   auto CreateRelPtr = [&](Constant *Ptr) {
518     return ConstantExpr::getTrunc(
519         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
520                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
521         Int32Ty);
522   };
523   Note->setInitializer(ConstantStruct::getAnon(
524       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
525        ConstantInt::get(Int32Ty, 8),                           // n_descsz
526        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
527        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
528   appendToCompilerUsed(M, Note);
529 
530   // Create a zero-length global in hwasan_globals so that the linker will
531   // always create start and stop symbols.
532   auto Dummy = new GlobalVariable(
533       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
534       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
535   Dummy->setSection("hwasan_globals");
536   Dummy->setComdat(NoteComdat);
537   Dummy->setMetadata(LLVMContext::MD_associated,
538                      MDNode::get(*C, ValueAsMetadata::get(Note)));
539   appendToCompilerUsed(M, Dummy);
540 }
541 
542 /// Module-level initialization.
543 ///
544 /// inserts a call to __hwasan_init to the module's constructor list.
545 void HWAddressSanitizer::initializeModule() {
546   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
547   auto &DL = M.getDataLayout();
548 
549   TargetTriple = Triple(M.getTargetTriple());
550 
551   // x86_64 currently has two modes:
552   // - Intel LAM (default)
553   // - pointer aliasing (heap only)
554   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
555   UsePageAliases = shouldUsePageAliases(TargetTriple);
556   InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
557   InstrumentStack = shouldInstrumentStack(TargetTriple);
558   DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
559   PointerTagShift = IsX86_64 ? 57 : 56;
560   TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
561 
562   Mapping.init(TargetTriple, InstrumentWithCalls);
563 
564   C = &(M.getContext());
565   IRBuilder<> IRB(*C);
566   IntptrTy = IRB.getIntPtrTy(DL);
567   Int8PtrTy = IRB.getInt8PtrTy();
568   Int8Ty = IRB.getInt8Ty();
569   Int32Ty = IRB.getInt32Ty();
570 
571   HwasanCtorFunction = nullptr;
572 
573   // Older versions of Android do not have the required runtime support for
574   // short granules, global or personality function instrumentation. On other
575   // platforms we currently require using the latest version of the runtime.
576   bool NewRuntime =
577       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
578 
579   UseShortGranules =
580       ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
581   OutlinedChecks =
582       TargetTriple.isAArch64() && TargetTriple.isOSBinFormatELF() &&
583       (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
584 
585   if (ClMatchAllTag.getNumOccurrences()) {
586     if (ClMatchAllTag != -1) {
587       HasMatchAllTag = true;
588       MatchAllTag = ClMatchAllTag & 0xFF;
589     }
590   } else if (CompileKernel) {
591     HasMatchAllTag = true;
592     MatchAllTag = 0xFF;
593   }
594 
595   // If we don't have personality function support, fall back to landing pads.
596   InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
597                               ? ClInstrumentLandingPads
598                               : !NewRuntime;
599 
600   if (!CompileKernel) {
601     createHwasanCtorComdat();
602     bool InstrumentGlobals =
603         ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
604 
605     if (InstrumentGlobals && !UsePageAliases)
606       instrumentGlobals();
607 
608     bool InstrumentPersonalityFunctions =
609         ClInstrumentPersonalityFunctions.getNumOccurrences()
610             ? ClInstrumentPersonalityFunctions
611             : NewRuntime;
612     if (InstrumentPersonalityFunctions)
613       instrumentPersonalityFunctions();
614   }
615 
616   if (!TargetTriple.isAndroid()) {
617     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
618       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
619                                     GlobalValue::ExternalLinkage, nullptr,
620                                     "__hwasan_tls", nullptr,
621                                     GlobalVariable::InitialExecTLSModel);
622       appendToCompilerUsed(M, GV);
623       return GV;
624     });
625     ThreadPtrGlobal = cast<GlobalVariable>(C);
626   }
627 }
628 
629 void HWAddressSanitizer::initializeCallbacks(Module &M) {
630   IRBuilder<> IRB(*C);
631   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
632     const std::string TypeStr = AccessIsWrite ? "store" : "load";
633     const std::string EndingStr = Recover ? "_noabort" : "";
634 
635     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
636         ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
637         FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
638 
639     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
640          AccessSizeIndex++) {
641       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
642           M.getOrInsertFunction(
643               ClMemoryAccessCallbackPrefix + TypeStr +
644                   itostr(1ULL << AccessSizeIndex) + EndingStr,
645               FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
646     }
647   }
648 
649   HwasanTagMemoryFunc = M.getOrInsertFunction(
650       "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
651   HwasanGenerateTagFunc =
652       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
653 
654   HwasanRecordFrameRecordFunc = M.getOrInsertFunction(
655       "__hwasan_add_frame_record", IRB.getVoidTy(), Int64Ty);
656 
657   ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
658                                      ArrayType::get(IRB.getInt8Ty(), 0));
659 
660   const std::string MemIntrinCallbackPrefix =
661       (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
662           ? std::string("")
663           : ClMemoryAccessCallbackPrefix;
664   HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
665                                         IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
666                                         IRB.getInt8PtrTy(), IntptrTy);
667   HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
668                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
669                                        IRB.getInt8PtrTy(), IntptrTy);
670   HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
671                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
672                                        IRB.getInt32Ty(), IntptrTy);
673 
674   HWAsanHandleVfork =
675       M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
676 }
677 
678 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
679   // An empty inline asm with input reg == output reg.
680   // An opaque no-op cast, basically.
681   // This prevents code bloat as a result of rematerializing trivial definitions
682   // such as constants or global addresses at every load and store.
683   InlineAsm *Asm =
684       InlineAsm::get(FunctionType::get(Int8PtrTy, {Val->getType()}, false),
685                      StringRef(""), StringRef("=r,0"),
686                      /*hasSideEffects=*/false);
687   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
688 }
689 
690 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
691   return getOpaqueNoopCast(IRB, ShadowGlobal);
692 }
693 
694 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
695   if (Mapping.Offset != kDynamicShadowSentinel)
696     return getOpaqueNoopCast(
697         IRB, ConstantExpr::getIntToPtr(
698                  ConstantInt::get(IntptrTy, Mapping.Offset), Int8PtrTy));
699 
700   if (Mapping.InGlobal) {
701     return getDynamicShadowIfunc(IRB);
702   } else {
703     Value *GlobalDynamicAddress =
704         IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
705             kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
706     return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
707   }
708 }
709 
710 bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
711   // Do not instrument acesses from different address spaces; we cannot deal
712   // with them.
713   Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
714   if (PtrTy->getPointerAddressSpace() != 0)
715     return true;
716 
717   // Ignore swifterror addresses.
718   // swifterror memory addresses are mem2reg promoted by instruction
719   // selection. As such they cannot have regular uses like an instrumentation
720   // function and it makes no sense to track them as memory.
721   if (Ptr->isSwiftError())
722     return true;
723 
724   if (findAllocaForValue(Ptr)) {
725     if (!InstrumentStack)
726       return true;
727     if (SSI && SSI->stackAccessIsSafe(*Inst))
728       return true;
729   }
730   return false;
731 }
732 
733 void HWAddressSanitizer::getInterestingMemoryOperands(
734     Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
735   // Skip memory accesses inserted by another instrumentation.
736   if (I->hasMetadata(LLVMContext::MD_nosanitize))
737     return;
738 
739   // Do not instrument the load fetching the dynamic shadow address.
740   if (ShadowBase == I)
741     return;
742 
743   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
744     if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
745       return;
746     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
747                              LI->getType(), LI->getAlign());
748   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
749     if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
750       return;
751     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
752                              SI->getValueOperand()->getType(), SI->getAlign());
753   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
754     if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
755       return;
756     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
757                              RMW->getValOperand()->getType(), None);
758   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
759     if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
760       return;
761     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
762                              XCHG->getCompareOperand()->getType(), None);
763   } else if (auto CI = dyn_cast<CallInst>(I)) {
764     for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
765       if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
766           ignoreAccess(I, CI->getArgOperand(ArgNo)))
767         continue;
768       Type *Ty = CI->getParamByValType(ArgNo);
769       Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
770     }
771   }
772 }
773 
774 static unsigned getPointerOperandIndex(Instruction *I) {
775   if (LoadInst *LI = dyn_cast<LoadInst>(I))
776     return LI->getPointerOperandIndex();
777   if (StoreInst *SI = dyn_cast<StoreInst>(I))
778     return SI->getPointerOperandIndex();
779   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
780     return RMW->getPointerOperandIndex();
781   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
782     return XCHG->getPointerOperandIndex();
783   report_fatal_error("Unexpected instruction");
784   return -1;
785 }
786 
787 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
788   size_t Res = countTrailingZeros(TypeSize / 8);
789   assert(Res < kNumberOfAccessSizes);
790   return Res;
791 }
792 
793 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
794   if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64)
795     return;
796 
797   IRBuilder<> IRB(I);
798   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
799   Value *UntaggedPtr =
800       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
801   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
802 }
803 
804 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
805   // Mem >> Scale
806   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
807   if (Mapping.Offset == 0)
808     return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
809   // (Mem >> Scale) + Offset
810   return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
811 }
812 
813 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
814                                           unsigned AccessSizeIndex) {
815   return (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
816          (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
817          (MatchAllTag << HWASanAccessInfo::MatchAllShift) +
818          (Recover << HWASanAccessInfo::RecoverShift) +
819          (IsWrite << HWASanAccessInfo::IsWriteShift) +
820          (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
821 }
822 
823 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
824                                                     unsigned AccessSizeIndex,
825                                                     Instruction *InsertBefore) {
826   assert(!UsePageAliases);
827   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
828   IRBuilder<> IRB(InsertBefore);
829   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
830   Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
831   IRB.CreateCall(Intrinsic::getDeclaration(
832                      M, UseShortGranules
833                             ? Intrinsic::hwasan_check_memaccess_shortgranules
834                             : Intrinsic::hwasan_check_memaccess),
835                  {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
836 }
837 
838 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
839                                                    unsigned AccessSizeIndex,
840                                                    Instruction *InsertBefore) {
841   assert(!UsePageAliases);
842   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
843   IRBuilder<> IRB(InsertBefore);
844 
845   Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
846   Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift),
847                                   IRB.getInt8Ty());
848   Value *AddrLong = untagPointer(IRB, PtrLong);
849   Value *Shadow = memToShadow(AddrLong, IRB);
850   Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
851   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
852 
853   if (HasMatchAllTag) {
854     Value *TagNotIgnored = IRB.CreateICmpNE(
855         PtrTag, ConstantInt::get(PtrTag->getType(), MatchAllTag));
856     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
857   }
858 
859   Instruction *CheckTerm =
860       SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
861                                 MDBuilder(*C).createBranchWeights(1, 100000));
862 
863   IRB.SetInsertPoint(CheckTerm);
864   Value *OutOfShortGranuleTagRange =
865       IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
866   Instruction *CheckFailTerm =
867       SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
868                                 MDBuilder(*C).createBranchWeights(1, 100000));
869 
870   IRB.SetInsertPoint(CheckTerm);
871   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
872   PtrLowBits = IRB.CreateAdd(
873       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
874   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
875   SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
876                             MDBuilder(*C).createBranchWeights(1, 100000),
877                             (DomTreeUpdater *)nullptr, nullptr,
878                             CheckFailTerm->getParent());
879 
880   IRB.SetInsertPoint(CheckTerm);
881   Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
882   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
883   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
884   Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
885   SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
886                             MDBuilder(*C).createBranchWeights(1, 100000),
887                             (DomTreeUpdater *)nullptr, nullptr,
888                             CheckFailTerm->getParent());
889 
890   IRB.SetInsertPoint(CheckFailTerm);
891   InlineAsm *Asm;
892   switch (TargetTriple.getArch()) {
893   case Triple::x86_64:
894     // The signal handler will find the data address in rdi.
895     Asm = InlineAsm::get(
896         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
897         "int3\nnopl " +
898             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
899             "(%rax)",
900         "{rdi}",
901         /*hasSideEffects=*/true);
902     break;
903   case Triple::aarch64:
904   case Triple::aarch64_be:
905     // The signal handler will find the data address in x0.
906     Asm = InlineAsm::get(
907         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
908         "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
909         "{x0}",
910         /*hasSideEffects=*/true);
911     break;
912   default:
913     report_fatal_error("unsupported architecture");
914   }
915   IRB.CreateCall(Asm, PtrLong);
916   if (Recover)
917     cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
918 }
919 
920 bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
921   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
922     return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
923            (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
924   }
925   if (isa<MemSetInst>(MI))
926     return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
927   return false;
928 }
929 
930 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
931   IRBuilder<> IRB(MI);
932   if (isa<MemTransferInst>(MI)) {
933     IRB.CreateCall(
934         isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
935         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
936          IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
937          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
938   } else if (isa<MemSetInst>(MI)) {
939     IRB.CreateCall(
940         HWAsanMemset,
941         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
942          IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
943          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
944   }
945   MI->eraseFromParent();
946 }
947 
948 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
949   Value *Addr = O.getPtr();
950 
951   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
952 
953   if (O.MaybeMask)
954     return false; // FIXME
955 
956   IRBuilder<> IRB(O.getInsn());
957   if (isPowerOf2_64(O.TypeSize) &&
958       (O.TypeSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
959       (!O.Alignment || *O.Alignment >= (1ULL << Mapping.Scale) ||
960        *O.Alignment >= O.TypeSize / 8)) {
961     size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize);
962     if (InstrumentWithCalls) {
963       IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
964                      IRB.CreatePointerCast(Addr, IntptrTy));
965     } else if (OutlinedChecks) {
966       instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
967     } else {
968       instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
969     }
970   } else {
971     IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite],
972                    {IRB.CreatePointerCast(Addr, IntptrTy),
973                     ConstantInt::get(IntptrTy, O.TypeSize / 8)});
974   }
975   untagPointerOperand(O.getInsn(), Addr);
976 
977   return true;
978 }
979 
980 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
981                                    size_t Size) {
982   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
983   if (!UseShortGranules)
984     Size = AlignedSize;
985 
986   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
987   if (InstrumentWithCalls) {
988     IRB.CreateCall(HwasanTagMemoryFunc,
989                    {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
990                     ConstantInt::get(IntptrTy, AlignedSize)});
991   } else {
992     size_t ShadowSize = Size >> Mapping.Scale;
993     Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
994     // If this memset is not inlined, it will be intercepted in the hwasan
995     // runtime library. That's OK, because the interceptor skips the checks if
996     // the address is in the shadow region.
997     // FIXME: the interceptor is not as fast as real memset. Consider lowering
998     // llvm.memset right here into either a sequence of stores, or a call to
999     // hwasan_tag_memory.
1000     if (ShadowSize)
1001       IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align(1));
1002     if (Size != AlignedSize) {
1003       IRB.CreateStore(
1004           ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
1005           IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1006       IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
1007                                    Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
1008                                    AlignedSize - 1));
1009     }
1010   }
1011 }
1012 
1013 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1014   if (TargetTriple.getArch() == Triple::x86_64)
1015     return AllocaNo & TagMaskByte;
1016 
1017   // A list of 8-bit numbers that have at most one run of non-zero bits.
1018   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1019   // masks.
1020   // The list does not include the value 255, which is used for UAR.
1021   //
1022   // Because we are more likely to use earlier elements of this list than later
1023   // ones, it is sorted in increasing order of probability of collision with a
1024   // mask allocated (temporally) nearby. The program that generated this list
1025   // can be found at:
1026   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1027   static unsigned FastMasks[] = {0,  128, 64,  192, 32,  96,  224, 112, 240,
1028                                  48, 16,  120, 248, 56,  24,  8,   124, 252,
1029                                  60, 28,  12,  4,   126, 254, 62,  30,  14,
1030                                  6,  2,   127, 63,  31,  15,  7,   3,   1};
1031   return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
1032 }
1033 
1034 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1035   if (TargetTriple.getArch() == Triple::x86_64) {
1036     Constant *TagMask = ConstantInt::get(IntptrTy, TagMaskByte);
1037     Value *NewTag = IRB.CreateAnd(OldTag, TagMask);
1038     return NewTag;
1039   }
1040   // aarch64 uses 8-bit tags, so no mask is needed.
1041   return OldTag;
1042 }
1043 
1044 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1045   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1046 }
1047 
1048 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1049   if (ClGenerateTagsWithCalls)
1050     return getNextTagWithCall(IRB);
1051   if (StackBaseTag)
1052     return StackBaseTag;
1053   // Extract some entropy from the stack pointer for the tags.
1054   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1055   // between functions).
1056   Value *StackPointerLong = getSP(IRB);
1057   Value *StackTag =
1058       applyTagMask(IRB, IRB.CreateXor(StackPointerLong,
1059                                       IRB.CreateLShr(StackPointerLong, 20)));
1060   StackTag->setName("hwasan.stack.base.tag");
1061   return StackTag;
1062 }
1063 
1064 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1065                                         AllocaInst *AI, unsigned AllocaNo) {
1066   if (ClGenerateTagsWithCalls)
1067     return getNextTagWithCall(IRB);
1068   return IRB.CreateXor(StackTag,
1069                        ConstantInt::get(IntptrTy, retagMask(AllocaNo)));
1070 }
1071 
1072 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
1073   if (ClUARRetagToZero)
1074     return ConstantInt::get(IntptrTy, 0);
1075   if (ClGenerateTagsWithCalls)
1076     return getNextTagWithCall(IRB);
1077   return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, TagMaskByte));
1078 }
1079 
1080 // Add a tag to an address.
1081 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1082                                       Value *PtrLong, Value *Tag) {
1083   assert(!UsePageAliases);
1084   Value *TaggedPtrLong;
1085   if (CompileKernel) {
1086     // Kernel addresses have 0xFF in the most significant byte.
1087     Value *ShiftedTag =
1088         IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1089                      ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1090     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1091   } else {
1092     // Userspace can simply do OR (tag << PointerTagShift);
1093     Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1094     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1095   }
1096   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1097 }
1098 
1099 // Remove tag from an address.
1100 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1101   assert(!UsePageAliases);
1102   Value *UntaggedPtrLong;
1103   if (CompileKernel) {
1104     // Kernel addresses have 0xFF in the most significant byte.
1105     UntaggedPtrLong =
1106         IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1107                                                0xFFULL << PointerTagShift));
1108   } else {
1109     // Userspace addresses have 0x00.
1110     UntaggedPtrLong =
1111         IRB.CreateAnd(PtrLong, ConstantInt::get(PtrLong->getType(),
1112                                                 ~(0xFFULL << PointerTagShift)));
1113   }
1114   return UntaggedPtrLong;
1115 }
1116 
1117 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
1118   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1119   if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
1120     // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1121     // in Bionic's libc/private/bionic_tls.h.
1122     Function *ThreadPointerFunc =
1123         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
1124     Value *SlotPtr = IRB.CreatePointerCast(
1125         IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
1126                                IRB.CreateCall(ThreadPointerFunc), 0x30),
1127         Ty->getPointerTo(0));
1128     return SlotPtr;
1129   }
1130   if (ThreadPtrGlobal)
1131     return ThreadPtrGlobal;
1132 
1133   return nullptr;
1134 }
1135 
1136 Value *HWAddressSanitizer::getPC(IRBuilder<> &IRB) {
1137   if (TargetTriple.getArch() == Triple::aarch64)
1138     return readRegister(IRB, "pc");
1139   else
1140     return IRB.CreatePtrToInt(IRB.GetInsertBlock()->getParent(), IntptrTy);
1141 }
1142 
1143 Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) {
1144   if (!CachedSP) {
1145     // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
1146     // first).
1147     Function *F = IRB.GetInsertBlock()->getParent();
1148     Module *M = F->getParent();
1149     auto GetStackPointerFn = Intrinsic::getDeclaration(
1150         M, Intrinsic::frameaddress,
1151         IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1152     CachedSP = IRB.CreatePtrToInt(
1153         IRB.CreateCall(GetStackPointerFn,
1154                        {Constant::getNullValue(IRB.getInt32Ty())}),
1155         IntptrTy);
1156   }
1157   return CachedSP;
1158 }
1159 
1160 Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
1161   // Prepare ring buffer data.
1162   Value *PC = getPC(IRB);
1163   Value *SP = getSP(IRB);
1164 
1165   // Mix SP and PC.
1166   // Assumptions:
1167   // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1168   // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
1169   // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
1170   //       0xSSSSPPPPPPPPPPPP
1171   SP = IRB.CreateShl(SP, 44);
1172   return IRB.CreateOr(PC, SP);
1173 }
1174 
1175 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1176   if (!Mapping.InTls)
1177     ShadowBase = getShadowNonTls(IRB);
1178   else if (!WithFrameRecord && TargetTriple.isAndroid())
1179     ShadowBase = getDynamicShadowIfunc(IRB);
1180 
1181   if (!WithFrameRecord && ShadowBase)
1182     return;
1183 
1184   Value *SlotPtr = nullptr;
1185   Value *ThreadLong = nullptr;
1186   Value *ThreadLongMaybeUntagged = nullptr;
1187 
1188   auto getThreadLongMaybeUntagged = [&]() {
1189     if (!SlotPtr)
1190       SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
1191     if (!ThreadLong)
1192       ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1193     // Extract the address field from ThreadLong. Unnecessary on AArch64 with
1194     // TBI.
1195     return TargetTriple.isAArch64() ? ThreadLong
1196                                     : untagPointer(IRB, ThreadLong);
1197   };
1198 
1199   if (WithFrameRecord) {
1200     switch (ClRecordStackHistory) {
1201     case libcall: {
1202       // Emit a runtime call into hwasan rather than emitting instructions for
1203       // recording stack history.
1204       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1205       IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
1206       break;
1207     }
1208     case instr: {
1209       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1210 
1211       StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1212 
1213       // Store data to ring buffer.
1214       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1215       Value *RecordPtr = IRB.CreateIntToPtr(ThreadLongMaybeUntagged,
1216                                             IntptrTy->getPointerTo(0));
1217       IRB.CreateStore(FrameRecordInfo, RecordPtr);
1218 
1219       // Update the ring buffer. Top byte of ThreadLong defines the size of the
1220       // buffer in pages, it must be a power of two, and the start of the buffer
1221       // must be aligned by twice that much. Therefore wrap around of the ring
1222       // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1223       // The use of AShr instead of LShr is due to
1224       //   https://bugs.llvm.org/show_bug.cgi?id=39030
1225       // Runtime library makes sure not to use the highest bit.
1226       Value *WrapMask = IRB.CreateXor(
1227           IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1228           ConstantInt::get(IntptrTy, (uint64_t)-1));
1229       Value *ThreadLongNew = IRB.CreateAnd(
1230           IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1231       IRB.CreateStore(ThreadLongNew, SlotPtr);
1232       break;
1233     }
1234     case none: {
1235       llvm_unreachable(
1236           "A stack history recording mode should've been selected.");
1237     }
1238     }
1239   }
1240 
1241   if (!ShadowBase) {
1242     if (!ThreadLongMaybeUntagged)
1243       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1244 
1245     // Get shadow base address by aligning RecordPtr up.
1246     // Note: this is not correct if the pointer is already aligned.
1247     // Runtime library will make sure this never happens.
1248     ShadowBase = IRB.CreateAdd(
1249         IRB.CreateOr(
1250             ThreadLongMaybeUntagged,
1251             ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1252         ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1253     ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy);
1254   }
1255 }
1256 
1257 Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
1258   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1259   Function *ReadRegister =
1260       Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
1261   MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
1262   Value *Args[] = {MetadataAsValue::get(*C, MD)};
1263   return IRB.CreateCall(ReadRegister, Args);
1264 }
1265 
1266 bool HWAddressSanitizer::instrumentLandingPads(
1267     SmallVectorImpl<Instruction *> &LandingPadVec) {
1268   for (auto *LP : LandingPadVec) {
1269     IRBuilder<> IRB(LP->getNextNode());
1270     IRB.CreateCall(
1271         HWAsanHandleVfork,
1272         {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
1273                                                                       : "sp")});
1274   }
1275   return true;
1276 }
1277 
1278 static bool isLifetimeIntrinsic(Value *V) {
1279   auto *II = dyn_cast<IntrinsicInst>(V);
1280   return II && II->isLifetimeStartOrEnd();
1281 }
1282 
1283 bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
1284                                          Value *StackTag,
1285                                          const DominatorTree &DT,
1286                                          const PostDominatorTree &PDT,
1287                                          const LoopInfo &LI) {
1288   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1289   // alloca addresses using that. Unfortunately, offsets are not known yet
1290   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1291   // temp, shift-OR it into each alloca address and xor with the retag mask.
1292   // This generates one extra instruction per alloca use.
1293   unsigned int I = 0;
1294 
1295   for (auto &KV : SInfo.AllocasToInstrument) {
1296     auto N = I++;
1297     auto *AI = KV.first;
1298     memtag::AllocaInfo &Info = KV.second;
1299     IRBuilder<> IRB(AI->getNextNode());
1300 
1301     // Replace uses of the alloca with tagged address.
1302     Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
1303     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1304     Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
1305     std::string Name =
1306         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1307     Replacement->setName(Name + ".hwasan");
1308 
1309     size_t Size = memtag::getAllocaSizeInBytes(*AI);
1310     size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1311 
1312     Value *AICast = IRB.CreatePointerCast(AI, Int8PtrTy);
1313 
1314     auto HandleLifetime = [&](IntrinsicInst *II) {
1315       // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1316       // set of assumptions we need to make about the lifetime. Without this we
1317       // would need to ensure that we can track the lifetime pointer to a
1318       // constant offset from the alloca, and would still need to change the
1319       // size to include the extra alignment we use for the untagging to make
1320       // the size consistent.
1321       //
1322       // The check for standard lifetime below makes sure that we have exactly
1323       // one set of start / end in any execution (i.e. the ends are not
1324       // reachable from each other), so this will not cause any problems.
1325       II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
1326       II->setArgOperand(1, AICast);
1327     };
1328     llvm::for_each(Info.LifetimeStart, HandleLifetime);
1329     llvm::for_each(Info.LifetimeEnd, HandleLifetime);
1330 
1331     AI->replaceUsesWithIf(Replacement, [AICast, AILong](Use &U) {
1332       auto *User = U.getUser();
1333       return User != AILong && User != AICast && !isLifetimeIntrinsic(User);
1334     });
1335 
1336     for (auto *DDI : Info.DbgVariableIntrinsics) {
1337       // Prepend "tag_offset, N" to the dwarf expression.
1338       // Tag offset logically applies to the alloca pointer, and it makes sense
1339       // to put it at the beginning of the expression.
1340       SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
1341                                          retagMask(N)};
1342       for (size_t LocNo = 0; LocNo < DDI->getNumVariableLocationOps(); ++LocNo)
1343         if (DDI->getVariableLocationOp(LocNo) == AI)
1344           DDI->setExpression(DIExpression::appendOpsToArg(DDI->getExpression(),
1345                                                           NewOps, LocNo));
1346     }
1347 
1348     auto TagEnd = [&](Instruction *Node) {
1349       IRB.SetInsertPoint(Node);
1350       Value *UARTag = getUARTag(IRB, StackTag);
1351       // When untagging, use the `AlignedSize` because we need to set the tags
1352       // for the entire alloca to zero. If we used `Size` here, we would
1353       // keep the last granule tagged, and store zero in the last byte of the
1354       // last granule, due to how short granules are implemented.
1355       tagAlloca(IRB, AI, UARTag, AlignedSize);
1356     };
1357     // Calls to functions that may return twice (e.g. setjmp) confuse the
1358     // postdominator analysis, and will leave us to keep memory tagged after
1359     // function return. Work around this by always untagging at every return
1360     // statement if return_twice functions are called.
1361     bool StandardLifetime =
1362         SInfo.UnrecognizedLifetimes.empty() &&
1363         memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT,
1364                                    &LI, ClMaxLifetimes) &&
1365         !SInfo.CallsReturnTwice;
1366     if (DetectUseAfterScope && StandardLifetime) {
1367       IntrinsicInst *Start = Info.LifetimeStart[0];
1368       IRB.SetInsertPoint(Start->getNextNode());
1369       tagAlloca(IRB, AI, Tag, Size);
1370       if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd,
1371                                         SInfo.RetVec, TagEnd)) {
1372         for (auto *End : Info.LifetimeEnd)
1373           End->eraseFromParent();
1374       }
1375     } else {
1376       tagAlloca(IRB, AI, Tag, Size);
1377       for (auto *RI : SInfo.RetVec)
1378         TagEnd(RI);
1379       // We inserted tagging outside of the lifetimes, so we have to remove
1380       // them.
1381       for (auto &II : Info.LifetimeStart)
1382         II->eraseFromParent();
1383       for (auto &II : Info.LifetimeEnd)
1384         II->eraseFromParent();
1385     }
1386     memtag::alignAndPadAlloca(Info, Align(Mapping.getObjectAlignment()));
1387   }
1388   for (auto &I : SInfo.UnrecognizedLifetimes)
1389     I->eraseFromParent();
1390   return true;
1391 }
1392 
1393 bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
1394   return (AI.getAllocatedType()->isSized() &&
1395           // FIXME: instrument dynamic allocas, too
1396           AI.isStaticAlloca() &&
1397           // alloca() may be called with 0 size, ignore it.
1398           memtag::getAllocaSizeInBytes(AI) > 0 &&
1399           // We are only interested in allocas not promotable to registers.
1400           // Promotable allocas are common under -O0.
1401           !isAllocaPromotable(&AI) &&
1402           // inalloca allocas are not treated as static, and we don't want
1403           // dynamic alloca instrumentation for them as well.
1404           !AI.isUsedWithInAlloca() &&
1405           // swifterror allocas are register promoted by ISel
1406           !AI.isSwiftError()) &&
1407          // safe allocas are not interesting
1408          !(SSI && SSI->isSafe(AI));
1409 }
1410 
1411 bool HWAddressSanitizer::sanitizeFunction(Function &F,
1412                                           FunctionAnalysisManager &FAM) {
1413   if (&F == HwasanCtorFunction)
1414     return false;
1415 
1416   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1417     return false;
1418 
1419   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1420 
1421   SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1422   SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1423   SmallVector<Instruction *, 8> LandingPadVec;
1424 
1425   memtag::StackInfoBuilder SIB(
1426       [this](const AllocaInst &AI) { return isInterestingAlloca(AI); });
1427   for (auto &Inst : instructions(F)) {
1428     if (InstrumentStack) {
1429       SIB.visit(Inst);
1430     }
1431 
1432     if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1433       LandingPadVec.push_back(&Inst);
1434 
1435     getInterestingMemoryOperands(&Inst, OperandsToInstrument);
1436 
1437     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1438       if (!ignoreMemIntrinsic(MI))
1439         IntrinToInstrument.push_back(MI);
1440   }
1441 
1442   memtag::StackInfo &SInfo = SIB.get();
1443 
1444   initializeCallbacks(*F.getParent());
1445 
1446   bool Changed = false;
1447 
1448   if (!LandingPadVec.empty())
1449     Changed |= instrumentLandingPads(LandingPadVec);
1450 
1451   if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1452       F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1453     // __hwasan_personality_thunk is a no-op for functions without an
1454     // instrumented stack, so we can drop it.
1455     F.setPersonalityFn(nullptr);
1456     Changed = true;
1457   }
1458 
1459   if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1460       IntrinToInstrument.empty())
1461     return Changed;
1462 
1463   assert(!ShadowBase);
1464 
1465   Instruction *InsertPt = &*F.getEntryBlock().begin();
1466   IRBuilder<> EntryIRB(InsertPt);
1467   emitPrologue(EntryIRB,
1468                /*WithFrameRecord*/ ClRecordStackHistory != none &&
1469                    Mapping.WithFrameRecord &&
1470                    !SInfo.AllocasToInstrument.empty());
1471 
1472   if (!SInfo.AllocasToInstrument.empty()) {
1473     const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1474     const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
1475     const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
1476     Value *StackTag =
1477         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
1478     instrumentStack(SInfo, StackTag, DT, PDT, LI);
1479   }
1480 
1481   // If we split the entry block, move any allocas that were originally in the
1482   // entry block back into the entry block so that they aren't treated as
1483   // dynamic allocas.
1484   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1485     InsertPt = &*F.getEntryBlock().begin();
1486     for (Instruction &I :
1487          llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1488       if (auto *AI = dyn_cast<AllocaInst>(&I))
1489         if (isa<ConstantInt>(AI->getArraySize()))
1490           I.moveBefore(InsertPt);
1491     }
1492   }
1493 
1494   for (auto &Operand : OperandsToInstrument)
1495     instrumentMemAccess(Operand);
1496 
1497   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1498     for (auto Inst : IntrinToInstrument)
1499       instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
1500   }
1501 
1502   ShadowBase = nullptr;
1503   StackBaseTag = nullptr;
1504   CachedSP = nullptr;
1505 
1506   return true;
1507 }
1508 
1509 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1510   assert(!UsePageAliases);
1511   Constant *Initializer = GV->getInitializer();
1512   uint64_t SizeInBytes =
1513       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1514   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1515   if (SizeInBytes != NewSize) {
1516     // Pad the initializer out to the next multiple of 16 bytes and add the
1517     // required short granule tag.
1518     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1519     Init.back() = Tag;
1520     Constant *Padding = ConstantDataArray::get(*C, Init);
1521     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1522   }
1523 
1524   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1525                                    GlobalValue::ExternalLinkage, Initializer,
1526                                    GV->getName() + ".hwasan");
1527   NewGV->copyAttributesFrom(GV);
1528   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1529   NewGV->copyMetadata(GV, 0);
1530   NewGV->setAlignment(
1531       MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment())));
1532 
1533   // It is invalid to ICF two globals that have different tags. In the case
1534   // where the size of the global is a multiple of the tag granularity the
1535   // contents of the globals may be the same but the tags (i.e. symbol values)
1536   // may be different, and the symbols are not considered during ICF. In the
1537   // case where the size is not a multiple of the granularity, the short granule
1538   // tags would discriminate two globals with different tags, but there would
1539   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1540   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1541   // granule tag in the last byte.
1542   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1543 
1544   // Descriptor format (assuming little-endian):
1545   // bytes 0-3: relative address of global
1546   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1547   // it isn't, we create multiple descriptors)
1548   // byte 7: tag
1549   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1550   const uint64_t MaxDescriptorSize = 0xfffff0;
1551   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1552        DescriptorPos += MaxDescriptorSize) {
1553     auto *Descriptor =
1554         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1555                            nullptr, GV->getName() + ".hwasan.descriptor");
1556     auto *GVRelPtr = ConstantExpr::getTrunc(
1557         ConstantExpr::getAdd(
1558             ConstantExpr::getSub(
1559                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1560                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1561             ConstantInt::get(Int64Ty, DescriptorPos)),
1562         Int32Ty);
1563     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1564     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1565     Descriptor->setComdat(NewGV->getComdat());
1566     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1567     Descriptor->setSection("hwasan_globals");
1568     Descriptor->setMetadata(LLVMContext::MD_associated,
1569                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1570     appendToCompilerUsed(M, Descriptor);
1571   }
1572 
1573   Constant *Aliasee = ConstantExpr::getIntToPtr(
1574       ConstantExpr::getAdd(
1575           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1576           ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1577       GV->getType());
1578   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1579                                     GV->getLinkage(), "", Aliasee, &M);
1580   Alias->setVisibility(GV->getVisibility());
1581   Alias->takeName(GV);
1582   GV->replaceAllUsesWith(Alias);
1583   GV->eraseFromParent();
1584 }
1585 
1586 void HWAddressSanitizer::instrumentGlobals() {
1587   std::vector<GlobalVariable *> Globals;
1588   for (GlobalVariable &GV : M.globals()) {
1589     if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
1590       continue;
1591 
1592     if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
1593         GV.isThreadLocal())
1594       continue;
1595 
1596     // Common symbols can't have aliases point to them, so they can't be tagged.
1597     if (GV.hasCommonLinkage())
1598       continue;
1599 
1600     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1601     // which would be broken both by adding tags and potentially by the extra
1602     // padding/alignment that we insert.
1603     if (GV.hasSection())
1604       continue;
1605 
1606     Globals.push_back(&GV);
1607   }
1608 
1609   MD5 Hasher;
1610   Hasher.update(M.getSourceFileName());
1611   MD5::MD5Result Hash;
1612   Hasher.final(Hash);
1613   uint8_t Tag = Hash[0];
1614 
1615   for (GlobalVariable *GV : Globals) {
1616     Tag &= TagMaskByte;
1617     // Skip tag 0 in order to avoid collisions with untagged memory.
1618     if (Tag == 0)
1619       Tag = 1;
1620     instrumentGlobal(GV, Tag++);
1621   }
1622 }
1623 
1624 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1625   // We need to untag stack frames as we unwind past them. That is the job of
1626   // the personality function wrapper, which either wraps an existing
1627   // personality function or acts as a personality function on its own. Each
1628   // function that has a personality function or that can be unwound past has
1629   // its personality function changed to a thunk that calls the personality
1630   // function wrapper in the runtime.
1631   MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1632   for (Function &F : M) {
1633     if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1634       continue;
1635 
1636     if (F.hasPersonalityFn()) {
1637       PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1638     } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1639       PersonalityFns[nullptr].push_back(&F);
1640     }
1641   }
1642 
1643   if (PersonalityFns.empty())
1644     return;
1645 
1646   FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1647       "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
1648       Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
1649   FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1650   FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1651 
1652   for (auto &P : PersonalityFns) {
1653     std::string ThunkName = kHwasanPersonalityThunkName;
1654     if (P.first)
1655       ThunkName += ("." + P.first->getName()).str();
1656     FunctionType *ThunkFnTy = FunctionType::get(
1657         Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
1658     bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1659                                cast<GlobalValue>(P.first)->hasLocalLinkage());
1660     auto *ThunkFn = Function::Create(ThunkFnTy,
1661                                      IsLocal ? GlobalValue::InternalLinkage
1662                                              : GlobalValue::LinkOnceODRLinkage,
1663                                      ThunkName, &M);
1664     if (!IsLocal) {
1665       ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1666       ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1667     }
1668 
1669     auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1670     IRBuilder<> IRB(BB);
1671     CallInst *WrapperCall = IRB.CreateCall(
1672         HwasanPersonalityWrapper,
1673         {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1674          ThunkFn->getArg(3), ThunkFn->getArg(4),
1675          P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
1676                  : Constant::getNullValue(Int8PtrTy),
1677          IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
1678          IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
1679     WrapperCall->setTailCall();
1680     IRB.CreateRet(WrapperCall);
1681 
1682     for (Function *F : P.second)
1683       F->setPersonalityFn(ThunkFn);
1684   }
1685 }
1686 
1687 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1688                                              bool InstrumentWithCalls) {
1689   Scale = kDefaultShadowScale;
1690   if (TargetTriple.isOSFuchsia()) {
1691     // Fuchsia is always PIE, which means that the beginning of the address
1692     // space is always available.
1693     InGlobal = false;
1694     InTls = false;
1695     Offset = 0;
1696     WithFrameRecord = true;
1697   } else if (ClMappingOffset.getNumOccurrences() > 0) {
1698     InGlobal = false;
1699     InTls = false;
1700     Offset = ClMappingOffset;
1701     WithFrameRecord = false;
1702   } else if (ClEnableKhwasan || InstrumentWithCalls) {
1703     InGlobal = false;
1704     InTls = false;
1705     Offset = 0;
1706     WithFrameRecord = false;
1707   } else if (ClWithIfunc) {
1708     InGlobal = true;
1709     InTls = false;
1710     Offset = kDynamicShadowSentinel;
1711     WithFrameRecord = false;
1712   } else if (ClWithTls) {
1713     InGlobal = false;
1714     InTls = true;
1715     Offset = kDynamicShadowSentinel;
1716     WithFrameRecord = true;
1717   } else {
1718     InGlobal = false;
1719     InTls = false;
1720     Offset = kDynamicShadowSentinel;
1721     WithFrameRecord = false;
1722   }
1723 }
1724