xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (revision 1323ec571215a77ddd21294f0871979d5ad6b992)
1 //===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address sanity checker
11 /// based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/Analysis/StackSafetyAnalysis.h"
21 #include "llvm/BinaryFormat/ELF.h"
22 #include "llvm/IR/Attributes.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/Constant.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/DebugInfoMetadata.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InlineAsm.h"
32 #include "llvm/IR/InstVisitor.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Intrinsics.h"
37 #include "llvm/IR/LLVMContext.h"
38 #include "llvm/IR/MDBuilder.h"
39 #include "llvm/IR/Module.h"
40 #include "llvm/IR/Type.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/CommandLine.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/raw_ostream.h"
48 #include "llvm/Transforms/Instrumentation.h"
49 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
50 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
51 #include "llvm/Transforms/Utils/ModuleUtils.h"
52 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
53 #include <sstream>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "hwasan"
58 
59 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
60 const char kHwasanNoteName[] = "hwasan.note";
61 const char kHwasanInitName[] = "__hwasan_init";
62 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
63 
64 const char kHwasanShadowMemoryDynamicAddress[] =
65     "__hwasan_shadow_memory_dynamic_address";
66 
67 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
68 static const size_t kNumberOfAccessSizes = 5;
69 
70 static const size_t kDefaultShadowScale = 4;
71 static const uint64_t kDynamicShadowSentinel =
72     std::numeric_limits<uint64_t>::max();
73 
74 static const unsigned kShadowBaseAlignment = 32;
75 
76 static cl::opt<std::string>
77     ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
78                                  cl::desc("Prefix for memory access callbacks"),
79                                  cl::Hidden, cl::init("__hwasan_"));
80 
81 static cl::opt<bool> ClInstrumentWithCalls(
82     "hwasan-instrument-with-calls",
83     cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
84     cl::init(false));
85 
86 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
87                                        cl::desc("instrument read instructions"),
88                                        cl::Hidden, cl::init(true));
89 
90 static cl::opt<bool>
91     ClInstrumentWrites("hwasan-instrument-writes",
92                        cl::desc("instrument write instructions"), cl::Hidden,
93                        cl::init(true));
94 
95 static cl::opt<bool> ClInstrumentAtomics(
96     "hwasan-instrument-atomics",
97     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
98     cl::init(true));
99 
100 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
101                                        cl::desc("instrument byval arguments"),
102                                        cl::Hidden, cl::init(true));
103 
104 static cl::opt<bool>
105     ClRecover("hwasan-recover",
106               cl::desc("Enable recovery mode (continue-after-error)."),
107               cl::Hidden, cl::init(false));
108 
109 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
110                                        cl::desc("instrument stack (allocas)"),
111                                        cl::Hidden, cl::init(true));
112 
113 static cl::opt<bool>
114     ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
115                      cl::Hidden, cl::desc("Use Stack Safety analysis results"),
116                      cl::Optional);
117 
118 static cl::opt<bool> ClUARRetagToZero(
119     "hwasan-uar-retag-to-zero",
120     cl::desc("Clear alloca tags before returning from the function to allow "
121              "non-instrumented and instrumented function calls mix. When set "
122              "to false, allocas are retagged before returning from the "
123              "function to detect use after return."),
124     cl::Hidden, cl::init(true));
125 
126 static cl::opt<bool> ClGenerateTagsWithCalls(
127     "hwasan-generate-tags-with-calls",
128     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
129     cl::init(false));
130 
131 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
132                                cl::Hidden, cl::init(false), cl::ZeroOrMore);
133 
134 static cl::opt<int> ClMatchAllTag(
135     "hwasan-match-all-tag",
136     cl::desc("don't report bad accesses via pointers with this tag"),
137     cl::Hidden, cl::init(-1));
138 
139 static cl::opt<bool>
140     ClEnableKhwasan("hwasan-kernel",
141                     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
142                     cl::Hidden, cl::init(false));
143 
144 // These flags allow to change the shadow mapping and control how shadow memory
145 // is accessed. The shadow mapping looks like:
146 //    Shadow = (Mem >> scale) + offset
147 
148 static cl::opt<uint64_t>
149     ClMappingOffset("hwasan-mapping-offset",
150                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
151                     cl::Hidden, cl::init(0));
152 
153 static cl::opt<bool>
154     ClWithIfunc("hwasan-with-ifunc",
155                 cl::desc("Access dynamic shadow through an ifunc global on "
156                          "platforms that support this"),
157                 cl::Hidden, cl::init(false));
158 
159 static cl::opt<bool> ClWithTls(
160     "hwasan-with-tls",
161     cl::desc("Access dynamic shadow through an thread-local pointer on "
162              "platforms that support this"),
163     cl::Hidden, cl::init(true));
164 
165 static cl::opt<bool>
166     ClRecordStackHistory("hwasan-record-stack-history",
167                          cl::desc("Record stack frames with tagged allocations "
168                                   "in a thread-local ring buffer"),
169                          cl::Hidden, cl::init(true));
170 static cl::opt<bool>
171     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
172                               cl::desc("instrument memory intrinsics"),
173                               cl::Hidden, cl::init(true));
174 
175 static cl::opt<bool>
176     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
177                             cl::desc("instrument landing pads"), cl::Hidden,
178                             cl::init(false), cl::ZeroOrMore);
179 
180 static cl::opt<bool> ClUseShortGranules(
181     "hwasan-use-short-granules",
182     cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
183     cl::init(false), cl::ZeroOrMore);
184 
185 static cl::opt<bool> ClInstrumentPersonalityFunctions(
186     "hwasan-instrument-personality-functions",
187     cl::desc("instrument personality functions"), cl::Hidden, cl::init(false),
188     cl::ZeroOrMore);
189 
190 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
191                                        cl::desc("inline all checks"),
192                                        cl::Hidden, cl::init(false));
193 
194 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
195 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
196                                       cl::desc("Use page aliasing in HWASan"),
197                                       cl::Hidden, cl::init(false));
198 
199 namespace {
200 
201 bool shouldUsePageAliases(const Triple &TargetTriple) {
202   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
203 }
204 
205 bool shouldInstrumentStack(const Triple &TargetTriple) {
206   return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
207 }
208 
209 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
210   return ClInstrumentWithCalls || TargetTriple.getArch() == Triple::x86_64;
211 }
212 
213 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
214   return ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
215                                               : !DisableOptimization;
216 }
217 
218 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
219                                   bool DisableOptimization) {
220   return shouldInstrumentStack(TargetTriple) &&
221          mightUseStackSafetyAnalysis(DisableOptimization);
222 }
223 /// An instrumentation pass implementing detection of addressability bugs
224 /// using tagged pointers.
225 class HWAddressSanitizer {
226 public:
227   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
228                      const StackSafetyGlobalInfo *SSI)
229       : M(M), SSI(SSI) {
230     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
231     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0
232                               ? ClEnableKhwasan
233                               : CompileKernel;
234 
235     initializeModule();
236   }
237 
238   void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
239 
240   bool sanitizeFunction(Function &F);
241   void initializeModule();
242   void createHwasanCtorComdat();
243 
244   void initializeCallbacks(Module &M);
245 
246   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
247 
248   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
249   Value *getShadowNonTls(IRBuilder<> &IRB);
250 
251   void untagPointerOperand(Instruction *I, Value *Addr);
252   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
253   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
254                                  unsigned AccessSizeIndex,
255                                  Instruction *InsertBefore);
256   void instrumentMemIntrinsic(MemIntrinsic *MI);
257   bool instrumentMemAccess(InterestingMemoryOperand &O);
258   bool ignoreAccess(Value *Ptr);
259   void getInterestingMemoryOperands(
260       Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
261 
262   bool isInterestingAlloca(const AllocaInst &AI);
263   bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
264   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
265   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
266   bool instrumentStack(
267       SmallVectorImpl<AllocaInst *> &Allocas,
268       DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
269       SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
270   Value *readRegister(IRBuilder<> &IRB, StringRef Name);
271   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
272   Value *getNextTagWithCall(IRBuilder<> &IRB);
273   Value *getStackBaseTag(IRBuilder<> &IRB);
274   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
275                       unsigned AllocaNo);
276   Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
277 
278   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
279   Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
280   unsigned retagMask(unsigned AllocaNo);
281 
282   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
283 
284   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
285   void instrumentGlobals();
286 
287   void instrumentPersonalityFunctions();
288 
289 private:
290   LLVMContext *C;
291   Module &M;
292   const StackSafetyGlobalInfo *SSI;
293   Triple TargetTriple;
294   FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
295   FunctionCallee HWAsanHandleVfork;
296 
297   /// This struct defines the shadow mapping using the rule:
298   ///   shadow = (mem >> Scale) + Offset.
299   /// If InGlobal is true, then
300   ///   extern char __hwasan_shadow[];
301   ///   shadow = (mem >> Scale) + &__hwasan_shadow
302   /// If InTls is true, then
303   ///   extern char *__hwasan_tls;
304   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
305   ///
306   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
307   /// ring buffer for storing stack allocations on targets that support it.
308   struct ShadowMapping {
309     int Scale;
310     uint64_t Offset;
311     bool InGlobal;
312     bool InTls;
313     bool WithFrameRecord;
314 
315     void init(Triple &TargetTriple, bool InstrumentWithCalls);
316     unsigned getObjectAlignment() const { return 1U << Scale; }
317   };
318   ShadowMapping Mapping;
319 
320   Type *VoidTy = Type::getVoidTy(M.getContext());
321   Type *IntptrTy;
322   Type *Int8PtrTy;
323   Type *Int8Ty;
324   Type *Int32Ty;
325   Type *Int64Ty = Type::getInt64Ty(M.getContext());
326 
327   bool CompileKernel;
328   bool Recover;
329   bool OutlinedChecks;
330   bool UseShortGranules;
331   bool InstrumentLandingPads;
332   bool InstrumentWithCalls;
333   bool InstrumentStack;
334   bool UsePageAliases;
335 
336   bool HasMatchAllTag = false;
337   uint8_t MatchAllTag = 0;
338 
339   unsigned PointerTagShift;
340   uint64_t TagMaskByte;
341 
342   Function *HwasanCtorFunction;
343 
344   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
345   FunctionCallee HwasanMemoryAccessCallbackSized[2];
346 
347   FunctionCallee HwasanTagMemoryFunc;
348   FunctionCallee HwasanGenerateTagFunc;
349 
350   Constant *ShadowGlobal;
351 
352   Value *ShadowBase = nullptr;
353   Value *StackBaseTag = nullptr;
354   GlobalValue *ThreadPtrGlobal = nullptr;
355 };
356 
357 class HWAddressSanitizerLegacyPass : public FunctionPass {
358 public:
359   // Pass identification, replacement for typeid.
360   static char ID;
361 
362   explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false,
363                                         bool Recover = false,
364                                         bool DisableOptimization = false)
365       : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover),
366         DisableOptimization(DisableOptimization) {
367     initializeHWAddressSanitizerLegacyPassPass(
368         *PassRegistry::getPassRegistry());
369   }
370 
371   StringRef getPassName() const override { return "HWAddressSanitizer"; }
372 
373   bool doInitialization(Module &M) override {
374     HWASan = std::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover,
375                                                   /*SSI=*/nullptr);
376     return true;
377   }
378 
379   bool runOnFunction(Function &F) override {
380     if (shouldUseStackSafetyAnalysis(Triple(F.getParent()->getTargetTriple()),
381                                      DisableOptimization)) {
382       // We cannot call getAnalysis in doInitialization, that would cause a
383       // crash as the required analyses are not initialized yet.
384       HWASan->setSSI(
385           &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult());
386     }
387     return HWASan->sanitizeFunction(F);
388   }
389 
390   bool doFinalization(Module &M) override {
391     HWASan.reset();
392     return false;
393   }
394 
395   void getAnalysisUsage(AnalysisUsage &AU) const override {
396     // This is an over-estimation of, in case we are building for an
397     // architecture that doesn't allow stack tagging we will still load the
398     // analysis.
399     // This is so we don't need to plumb TargetTriple all the way to here.
400     if (mightUseStackSafetyAnalysis(DisableOptimization))
401       AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
402   }
403 
404 private:
405   std::unique_ptr<HWAddressSanitizer> HWASan;
406   bool CompileKernel;
407   bool Recover;
408   bool DisableOptimization;
409 };
410 
411 } // end anonymous namespace
412 
413 char HWAddressSanitizerLegacyPass::ID = 0;
414 
415 INITIALIZE_PASS_BEGIN(
416     HWAddressSanitizerLegacyPass, "hwasan",
417     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
418     false)
419 INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
420 INITIALIZE_PASS_END(
421     HWAddressSanitizerLegacyPass, "hwasan",
422     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
423     false)
424 
425 FunctionPass *
426 llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel, bool Recover,
427                                              bool DisableOptimization) {
428   assert(!CompileKernel || Recover);
429   return new HWAddressSanitizerLegacyPass(CompileKernel, Recover,
430                                           DisableOptimization);
431 }
432 
433 HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover,
434                                                bool DisableOptimization)
435     : CompileKernel(CompileKernel), Recover(Recover),
436       DisableOptimization(DisableOptimization) {}
437 
438 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
439                                               ModuleAnalysisManager &MAM) {
440   const StackSafetyGlobalInfo *SSI = nullptr;
441   if (shouldUseStackSafetyAnalysis(llvm::Triple(M.getTargetTriple()),
442                                    DisableOptimization))
443     SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
444   HWAddressSanitizer HWASan(M, CompileKernel, Recover, SSI);
445   bool Modified = false;
446   for (Function &F : M)
447     Modified |= HWASan.sanitizeFunction(F);
448   if (Modified)
449     return PreservedAnalyses::none();
450   return PreservedAnalyses::all();
451 }
452 
453 void HWAddressSanitizer::createHwasanCtorComdat() {
454   std::tie(HwasanCtorFunction, std::ignore) =
455       getOrCreateSanitizerCtorAndInitFunctions(
456           M, kHwasanModuleCtorName, kHwasanInitName,
457           /*InitArgTypes=*/{},
458           /*InitArgs=*/{},
459           // This callback is invoked when the functions are created the first
460           // time. Hook them into the global ctors list in that case:
461           [&](Function *Ctor, FunctionCallee) {
462             Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
463             Ctor->setComdat(CtorComdat);
464             appendToGlobalCtors(M, Ctor, 0, Ctor);
465           });
466 
467   // Create a note that contains pointers to the list of global
468   // descriptors. Adding a note to the output file will cause the linker to
469   // create a PT_NOTE program header pointing to the note that we can use to
470   // find the descriptor list starting from the program headers. A function
471   // provided by the runtime initializes the shadow memory for the globals by
472   // accessing the descriptor list via the note. The dynamic loader needs to
473   // call this function whenever a library is loaded.
474   //
475   // The reason why we use a note for this instead of a more conventional
476   // approach of having a global constructor pass a descriptor list pointer to
477   // the runtime is because of an order of initialization problem. With
478   // constructors we can encounter the following problematic scenario:
479   //
480   // 1) library A depends on library B and also interposes one of B's symbols
481   // 2) B's constructors are called before A's (as required for correctness)
482   // 3) during construction, B accesses one of its "own" globals (actually
483   //    interposed by A) and triggers a HWASAN failure due to the initialization
484   //    for A not having happened yet
485   //
486   // Even without interposition it is possible to run into similar situations in
487   // cases where two libraries mutually depend on each other.
488   //
489   // We only need one note per binary, so put everything for the note in a
490   // comdat. This needs to be a comdat with an .init_array section to prevent
491   // newer versions of lld from discarding the note.
492   //
493   // Create the note even if we aren't instrumenting globals. This ensures that
494   // binaries linked from object files with both instrumented and
495   // non-instrumented globals will end up with a note, even if a comdat from an
496   // object file with non-instrumented globals is selected. The note is harmless
497   // if the runtime doesn't support it, since it will just be ignored.
498   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
499 
500   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
501   auto Start =
502       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
503                          nullptr, "__start_hwasan_globals");
504   Start->setVisibility(GlobalValue::HiddenVisibility);
505   Start->setDSOLocal(true);
506   auto Stop =
507       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
508                          nullptr, "__stop_hwasan_globals");
509   Stop->setVisibility(GlobalValue::HiddenVisibility);
510   Stop->setDSOLocal(true);
511 
512   // Null-terminated so actually 8 bytes, which are required in order to align
513   // the note properly.
514   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
515 
516   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
517                                  Int32Ty, Int32Ty);
518   auto *Note =
519       new GlobalVariable(M, NoteTy, /*isConstant=*/true,
520                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
521   Note->setSection(".note.hwasan.globals");
522   Note->setComdat(NoteComdat);
523   Note->setAlignment(Align(4));
524   Note->setDSOLocal(true);
525 
526   // The pointers in the note need to be relative so that the note ends up being
527   // placed in rodata, which is the standard location for notes.
528   auto CreateRelPtr = [&](Constant *Ptr) {
529     return ConstantExpr::getTrunc(
530         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
531                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
532         Int32Ty);
533   };
534   Note->setInitializer(ConstantStruct::getAnon(
535       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
536        ConstantInt::get(Int32Ty, 8),                           // n_descsz
537        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
538        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
539   appendToCompilerUsed(M, Note);
540 
541   // Create a zero-length global in hwasan_globals so that the linker will
542   // always create start and stop symbols.
543   auto Dummy = new GlobalVariable(
544       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
545       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
546   Dummy->setSection("hwasan_globals");
547   Dummy->setComdat(NoteComdat);
548   Dummy->setMetadata(LLVMContext::MD_associated,
549                      MDNode::get(*C, ValueAsMetadata::get(Note)));
550   appendToCompilerUsed(M, Dummy);
551 }
552 
553 /// Module-level initialization.
554 ///
555 /// inserts a call to __hwasan_init to the module's constructor list.
556 void HWAddressSanitizer::initializeModule() {
557   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
558   auto &DL = M.getDataLayout();
559 
560   TargetTriple = Triple(M.getTargetTriple());
561 
562   // x86_64 currently has two modes:
563   // - Intel LAM (default)
564   // - pointer aliasing (heap only)
565   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
566   UsePageAliases = shouldUsePageAliases(TargetTriple);
567   InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
568   InstrumentStack = shouldInstrumentStack(TargetTriple);
569   PointerTagShift = IsX86_64 ? 57 : 56;
570   TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
571 
572   Mapping.init(TargetTriple, InstrumentWithCalls);
573 
574   C = &(M.getContext());
575   IRBuilder<> IRB(*C);
576   IntptrTy = IRB.getIntPtrTy(DL);
577   Int8PtrTy = IRB.getInt8PtrTy();
578   Int8Ty = IRB.getInt8Ty();
579   Int32Ty = IRB.getInt32Ty();
580 
581   HwasanCtorFunction = nullptr;
582 
583   // Older versions of Android do not have the required runtime support for
584   // short granules, global or personality function instrumentation. On other
585   // platforms we currently require using the latest version of the runtime.
586   bool NewRuntime =
587       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
588 
589   UseShortGranules =
590       ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
591   OutlinedChecks =
592       TargetTriple.isAArch64() && TargetTriple.isOSBinFormatELF() &&
593       (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
594 
595   if (ClMatchAllTag.getNumOccurrences()) {
596     if (ClMatchAllTag != -1) {
597       HasMatchAllTag = true;
598       MatchAllTag = ClMatchAllTag & 0xFF;
599     }
600   } else if (CompileKernel) {
601     HasMatchAllTag = true;
602     MatchAllTag = 0xFF;
603   }
604 
605   // If we don't have personality function support, fall back to landing pads.
606   InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
607                               ? ClInstrumentLandingPads
608                               : !NewRuntime;
609 
610   if (!CompileKernel) {
611     createHwasanCtorComdat();
612     bool InstrumentGlobals =
613         ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
614 
615     if (InstrumentGlobals && !UsePageAliases)
616       instrumentGlobals();
617 
618     bool InstrumentPersonalityFunctions =
619         ClInstrumentPersonalityFunctions.getNumOccurrences()
620             ? ClInstrumentPersonalityFunctions
621             : NewRuntime;
622     if (InstrumentPersonalityFunctions)
623       instrumentPersonalityFunctions();
624   }
625 
626   if (!TargetTriple.isAndroid()) {
627     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
628       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
629                                     GlobalValue::ExternalLinkage, nullptr,
630                                     "__hwasan_tls", nullptr,
631                                     GlobalVariable::InitialExecTLSModel);
632       appendToCompilerUsed(M, GV);
633       return GV;
634     });
635     ThreadPtrGlobal = cast<GlobalVariable>(C);
636   }
637 }
638 
639 void HWAddressSanitizer::initializeCallbacks(Module &M) {
640   IRBuilder<> IRB(*C);
641   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
642     const std::string TypeStr = AccessIsWrite ? "store" : "load";
643     const std::string EndingStr = Recover ? "_noabort" : "";
644 
645     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
646         ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
647         FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
648 
649     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
650          AccessSizeIndex++) {
651       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
652           M.getOrInsertFunction(
653               ClMemoryAccessCallbackPrefix + TypeStr +
654                   itostr(1ULL << AccessSizeIndex) + EndingStr,
655               FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
656     }
657   }
658 
659   HwasanTagMemoryFunc = M.getOrInsertFunction(
660       "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
661   HwasanGenerateTagFunc =
662       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
663 
664   ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
665                                      ArrayType::get(IRB.getInt8Ty(), 0));
666 
667   const std::string MemIntrinCallbackPrefix =
668       CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
669   HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
670                                         IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
671                                         IRB.getInt8PtrTy(), IntptrTy);
672   HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
673                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
674                                        IRB.getInt8PtrTy(), IntptrTy);
675   HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
676                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
677                                        IRB.getInt32Ty(), IntptrTy);
678 
679   HWAsanHandleVfork =
680       M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
681 }
682 
683 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
684   // An empty inline asm with input reg == output reg.
685   // An opaque no-op cast, basically.
686   // This prevents code bloat as a result of rematerializing trivial definitions
687   // such as constants or global addresses at every load and store.
688   InlineAsm *Asm =
689       InlineAsm::get(FunctionType::get(Int8PtrTy, {Val->getType()}, false),
690                      StringRef(""), StringRef("=r,0"),
691                      /*hasSideEffects=*/false);
692   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
693 }
694 
695 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
696   return getOpaqueNoopCast(IRB, ShadowGlobal);
697 }
698 
699 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
700   if (Mapping.Offset != kDynamicShadowSentinel)
701     return getOpaqueNoopCast(
702         IRB, ConstantExpr::getIntToPtr(
703                  ConstantInt::get(IntptrTy, Mapping.Offset), Int8PtrTy));
704 
705   if (Mapping.InGlobal) {
706     return getDynamicShadowIfunc(IRB);
707   } else {
708     Value *GlobalDynamicAddress =
709         IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
710             kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
711     return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
712   }
713 }
714 
715 bool HWAddressSanitizer::ignoreAccess(Value *Ptr) {
716   // Do not instrument acesses from different address spaces; we cannot deal
717   // with them.
718   Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
719   if (PtrTy->getPointerAddressSpace() != 0)
720     return true;
721 
722   // Ignore swifterror addresses.
723   // swifterror memory addresses are mem2reg promoted by instruction
724   // selection. As such they cannot have regular uses like an instrumentation
725   // function and it makes no sense to track them as memory.
726   if (Ptr->isSwiftError())
727     return true;
728 
729   return false;
730 }
731 
732 void HWAddressSanitizer::getInterestingMemoryOperands(
733     Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
734   // Skip memory accesses inserted by another instrumentation.
735   if (I->hasMetadata("nosanitize"))
736     return;
737 
738   // Do not instrument the load fetching the dynamic shadow address.
739   if (ShadowBase == I)
740     return;
741 
742   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
743     if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand()))
744       return;
745     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
746                              LI->getType(), LI->getAlign());
747   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
748     if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand()))
749       return;
750     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
751                              SI->getValueOperand()->getType(), SI->getAlign());
752   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
753     if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand()))
754       return;
755     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
756                              RMW->getValOperand()->getType(), None);
757   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
758     if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand()))
759       return;
760     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
761                              XCHG->getCompareOperand()->getType(), None);
762   } else if (auto CI = dyn_cast<CallInst>(I)) {
763     for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) {
764       if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
765           ignoreAccess(CI->getArgOperand(ArgNo)))
766         continue;
767       Type *Ty = CI->getParamByValType(ArgNo);
768       Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
769     }
770   }
771 }
772 
773 static unsigned getPointerOperandIndex(Instruction *I) {
774   if (LoadInst *LI = dyn_cast<LoadInst>(I))
775     return LI->getPointerOperandIndex();
776   if (StoreInst *SI = dyn_cast<StoreInst>(I))
777     return SI->getPointerOperandIndex();
778   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
779     return RMW->getPointerOperandIndex();
780   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
781     return XCHG->getPointerOperandIndex();
782   report_fatal_error("Unexpected instruction");
783   return -1;
784 }
785 
786 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
787   size_t Res = countTrailingZeros(TypeSize / 8);
788   assert(Res < kNumberOfAccessSizes);
789   return Res;
790 }
791 
792 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
793   if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64)
794     return;
795 
796   IRBuilder<> IRB(I);
797   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
798   Value *UntaggedPtr =
799       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
800   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
801 }
802 
803 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
804   // Mem >> Scale
805   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
806   if (Mapping.Offset == 0)
807     return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
808   // (Mem >> Scale) + Offset
809   return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
810 }
811 
812 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
813                                                    unsigned AccessSizeIndex,
814                                                    Instruction *InsertBefore) {
815   assert(!UsePageAliases);
816   const int64_t AccessInfo =
817       (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
818       (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
819       (MatchAllTag << HWASanAccessInfo::MatchAllShift) +
820       (Recover << HWASanAccessInfo::RecoverShift) +
821       (IsWrite << HWASanAccessInfo::IsWriteShift) +
822       (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
823   IRBuilder<> IRB(InsertBefore);
824 
825   if (OutlinedChecks) {
826     Module *M = IRB.GetInsertBlock()->getParent()->getParent();
827     Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
828     IRB.CreateCall(Intrinsic::getDeclaration(
829                        M, UseShortGranules
830                               ? Intrinsic::hwasan_check_memaccess_shortgranules
831                               : Intrinsic::hwasan_check_memaccess),
832                    {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
833     return;
834   }
835 
836   Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
837   Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift),
838                                   IRB.getInt8Ty());
839   Value *AddrLong = untagPointer(IRB, PtrLong);
840   Value *Shadow = memToShadow(AddrLong, IRB);
841   Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
842   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
843 
844   if (HasMatchAllTag) {
845     Value *TagNotIgnored = IRB.CreateICmpNE(
846         PtrTag, ConstantInt::get(PtrTag->getType(), MatchAllTag));
847     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
848   }
849 
850   Instruction *CheckTerm =
851       SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
852                                 MDBuilder(*C).createBranchWeights(1, 100000));
853 
854   IRB.SetInsertPoint(CheckTerm);
855   Value *OutOfShortGranuleTagRange =
856       IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
857   Instruction *CheckFailTerm =
858       SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
859                                 MDBuilder(*C).createBranchWeights(1, 100000));
860 
861   IRB.SetInsertPoint(CheckTerm);
862   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
863   PtrLowBits = IRB.CreateAdd(
864       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
865   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
866   SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
867                             MDBuilder(*C).createBranchWeights(1, 100000),
868                             (DomTreeUpdater *)nullptr, nullptr,
869                             CheckFailTerm->getParent());
870 
871   IRB.SetInsertPoint(CheckTerm);
872   Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
873   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
874   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
875   Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
876   SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
877                             MDBuilder(*C).createBranchWeights(1, 100000),
878                             (DomTreeUpdater *)nullptr, nullptr,
879                             CheckFailTerm->getParent());
880 
881   IRB.SetInsertPoint(CheckFailTerm);
882   InlineAsm *Asm;
883   switch (TargetTriple.getArch()) {
884   case Triple::x86_64:
885     // The signal handler will find the data address in rdi.
886     Asm = InlineAsm::get(
887         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
888         "int3\nnopl " +
889             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
890             "(%rax)",
891         "{rdi}",
892         /*hasSideEffects=*/true);
893     break;
894   case Triple::aarch64:
895   case Triple::aarch64_be:
896     // The signal handler will find the data address in x0.
897     Asm = InlineAsm::get(
898         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
899         "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
900         "{x0}",
901         /*hasSideEffects=*/true);
902     break;
903   default:
904     report_fatal_error("unsupported architecture");
905   }
906   IRB.CreateCall(Asm, PtrLong);
907   if (Recover)
908     cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
909 }
910 
911 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
912   IRBuilder<> IRB(MI);
913   if (isa<MemTransferInst>(MI)) {
914     IRB.CreateCall(
915         isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
916         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
917          IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
918          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
919   } else if (isa<MemSetInst>(MI)) {
920     IRB.CreateCall(
921         HWAsanMemset,
922         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
923          IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
924          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
925   }
926   MI->eraseFromParent();
927 }
928 
929 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
930   Value *Addr = O.getPtr();
931 
932   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
933 
934   if (O.MaybeMask)
935     return false; // FIXME
936 
937   IRBuilder<> IRB(O.getInsn());
938   if (isPowerOf2_64(O.TypeSize) &&
939       (O.TypeSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
940       (!O.Alignment || *O.Alignment >= (1ULL << Mapping.Scale) ||
941        *O.Alignment >= O.TypeSize / 8)) {
942     size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize);
943     if (InstrumentWithCalls) {
944       IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
945                      IRB.CreatePointerCast(Addr, IntptrTy));
946     } else {
947       instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
948     }
949   } else {
950     IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite],
951                    {IRB.CreatePointerCast(Addr, IntptrTy),
952                     ConstantInt::get(IntptrTy, O.TypeSize / 8)});
953   }
954   untagPointerOperand(O.getInsn(), Addr);
955 
956   return true;
957 }
958 
959 static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
960   uint64_t ArraySize = 1;
961   if (AI.isArrayAllocation()) {
962     const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
963     assert(CI && "non-constant array size");
964     ArraySize = CI->getZExtValue();
965   }
966   Type *Ty = AI.getAllocatedType();
967   uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
968   return SizeInBytes * ArraySize;
969 }
970 
971 bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
972                                    size_t Size) {
973   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
974   if (!UseShortGranules)
975     Size = AlignedSize;
976 
977   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
978   if (InstrumentWithCalls) {
979     IRB.CreateCall(HwasanTagMemoryFunc,
980                    {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
981                     ConstantInt::get(IntptrTy, AlignedSize)});
982   } else {
983     size_t ShadowSize = Size >> Mapping.Scale;
984     Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
985     // If this memset is not inlined, it will be intercepted in the hwasan
986     // runtime library. That's OK, because the interceptor skips the checks if
987     // the address is in the shadow region.
988     // FIXME: the interceptor is not as fast as real memset. Consider lowering
989     // llvm.memset right here into either a sequence of stores, or a call to
990     // hwasan_tag_memory.
991     if (ShadowSize)
992       IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align(1));
993     if (Size != AlignedSize) {
994       IRB.CreateStore(
995           ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
996           IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
997       IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
998                                    Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
999                                    AlignedSize - 1));
1000     }
1001   }
1002   return true;
1003 }
1004 
1005 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1006   if (TargetTriple.getArch() == Triple::x86_64)
1007     return AllocaNo & TagMaskByte;
1008 
1009   // A list of 8-bit numbers that have at most one run of non-zero bits.
1010   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1011   // masks.
1012   // The list does not include the value 255, which is used for UAR.
1013   //
1014   // Because we are more likely to use earlier elements of this list than later
1015   // ones, it is sorted in increasing order of probability of collision with a
1016   // mask allocated (temporally) nearby. The program that generated this list
1017   // can be found at:
1018   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1019   static unsigned FastMasks[] = {0,  128, 64,  192, 32,  96,  224, 112, 240,
1020                                  48, 16,  120, 248, 56,  24,  8,   124, 252,
1021                                  60, 28,  12,  4,   126, 254, 62,  30,  14,
1022                                  6,  2,   127, 63,  31,  15,  7,   3,   1};
1023   return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
1024 }
1025 
1026 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1027   if (TargetTriple.getArch() == Triple::x86_64) {
1028     Constant *TagMask = ConstantInt::get(IntptrTy, TagMaskByte);
1029     Value *NewTag = IRB.CreateAnd(OldTag, TagMask);
1030     return NewTag;
1031   }
1032   // aarch64 uses 8-bit tags, so no mask is needed.
1033   return OldTag;
1034 }
1035 
1036 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1037   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1038 }
1039 
1040 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1041   if (ClGenerateTagsWithCalls)
1042     return getNextTagWithCall(IRB);
1043   if (StackBaseTag)
1044     return StackBaseTag;
1045   // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
1046   // first).
1047   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1048   auto GetStackPointerFn = Intrinsic::getDeclaration(
1049       M, Intrinsic::frameaddress,
1050       IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1051   Value *StackPointer = IRB.CreateCall(
1052       GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())});
1053 
1054   // Extract some entropy from the stack pointer for the tags.
1055   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1056   // between functions).
1057   Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy);
1058   Value *StackTag =
1059       applyTagMask(IRB, IRB.CreateXor(StackPointerLong,
1060                                       IRB.CreateLShr(StackPointerLong, 20)));
1061   StackTag->setName("hwasan.stack.base.tag");
1062   return StackTag;
1063 }
1064 
1065 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1066                                         AllocaInst *AI, unsigned AllocaNo) {
1067   if (ClGenerateTagsWithCalls)
1068     return getNextTagWithCall(IRB);
1069   return IRB.CreateXor(StackTag,
1070                        ConstantInt::get(IntptrTy, retagMask(AllocaNo)));
1071 }
1072 
1073 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
1074   if (ClUARRetagToZero)
1075     return ConstantInt::get(IntptrTy, 0);
1076   if (ClGenerateTagsWithCalls)
1077     return getNextTagWithCall(IRB);
1078   return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, TagMaskByte));
1079 }
1080 
1081 // Add a tag to an address.
1082 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1083                                       Value *PtrLong, Value *Tag) {
1084   assert(!UsePageAliases);
1085   Value *TaggedPtrLong;
1086   if (CompileKernel) {
1087     // Kernel addresses have 0xFF in the most significant byte.
1088     Value *ShiftedTag =
1089         IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1090                      ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1091     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1092   } else {
1093     // Userspace can simply do OR (tag << PointerTagShift);
1094     Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1095     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1096   }
1097   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1098 }
1099 
1100 // Remove tag from an address.
1101 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1102   assert(!UsePageAliases);
1103   Value *UntaggedPtrLong;
1104   if (CompileKernel) {
1105     // Kernel addresses have 0xFF in the most significant byte.
1106     UntaggedPtrLong =
1107         IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1108                                                0xFFULL << PointerTagShift));
1109   } else {
1110     // Userspace addresses have 0x00.
1111     UntaggedPtrLong =
1112         IRB.CreateAnd(PtrLong, ConstantInt::get(PtrLong->getType(),
1113                                                 ~(0xFFULL << PointerTagShift)));
1114   }
1115   return UntaggedPtrLong;
1116 }
1117 
1118 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
1119   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1120   if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
1121     // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1122     // in Bionic's libc/private/bionic_tls.h.
1123     Function *ThreadPointerFunc =
1124         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
1125     Value *SlotPtr = IRB.CreatePointerCast(
1126         IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
1127                                IRB.CreateCall(ThreadPointerFunc), 0x30),
1128         Ty->getPointerTo(0));
1129     return SlotPtr;
1130   }
1131   if (ThreadPtrGlobal)
1132     return ThreadPtrGlobal;
1133 
1134   return nullptr;
1135 }
1136 
1137 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1138   if (!Mapping.InTls)
1139     ShadowBase = getShadowNonTls(IRB);
1140   else if (!WithFrameRecord && TargetTriple.isAndroid())
1141     ShadowBase = getDynamicShadowIfunc(IRB);
1142 
1143   if (!WithFrameRecord && ShadowBase)
1144     return;
1145 
1146   Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
1147   assert(SlotPtr);
1148 
1149   Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1150   // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
1151   Value *ThreadLongMaybeUntagged =
1152       TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
1153 
1154   if (WithFrameRecord) {
1155     Function *F = IRB.GetInsertBlock()->getParent();
1156     StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1157 
1158     // Prepare ring buffer data.
1159     Value *PC;
1160     if (TargetTriple.getArch() == Triple::aarch64)
1161       PC = readRegister(IRB, "pc");
1162     else
1163       PC = IRB.CreatePtrToInt(F, IntptrTy);
1164     Module *M = F->getParent();
1165     auto GetStackPointerFn = Intrinsic::getDeclaration(
1166         M, Intrinsic::frameaddress,
1167         IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1168     Value *SP = IRB.CreatePtrToInt(
1169         IRB.CreateCall(GetStackPointerFn,
1170                        {Constant::getNullValue(IRB.getInt32Ty())}),
1171         IntptrTy);
1172     // Mix SP and PC.
1173     // Assumptions:
1174     // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1175     // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
1176     // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
1177     //       0xSSSSPPPPPPPPPPPP
1178     SP = IRB.CreateShl(SP, 44);
1179 
1180     // Store data to ring buffer.
1181     Value *RecordPtr =
1182         IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
1183     IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
1184 
1185     // Update the ring buffer. Top byte of ThreadLong defines the size of the
1186     // buffer in pages, it must be a power of two, and the start of the buffer
1187     // must be aligned by twice that much. Therefore wrap around of the ring
1188     // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1189     // The use of AShr instead of LShr is due to
1190     //   https://bugs.llvm.org/show_bug.cgi?id=39030
1191     // Runtime library makes sure not to use the highest bit.
1192     Value *WrapMask = IRB.CreateXor(
1193         IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1194         ConstantInt::get(IntptrTy, (uint64_t)-1));
1195     Value *ThreadLongNew = IRB.CreateAnd(
1196         IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1197     IRB.CreateStore(ThreadLongNew, SlotPtr);
1198   }
1199 
1200   if (!ShadowBase) {
1201     // Get shadow base address by aligning RecordPtr up.
1202     // Note: this is not correct if the pointer is already aligned.
1203     // Runtime library will make sure this never happens.
1204     ShadowBase = IRB.CreateAdd(
1205         IRB.CreateOr(
1206             ThreadLongMaybeUntagged,
1207             ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1208         ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1209     ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy);
1210   }
1211 }
1212 
1213 Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
1214   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1215   Function *ReadRegister =
1216       Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
1217   MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
1218   Value *Args[] = {MetadataAsValue::get(*C, MD)};
1219   return IRB.CreateCall(ReadRegister, Args);
1220 }
1221 
1222 bool HWAddressSanitizer::instrumentLandingPads(
1223     SmallVectorImpl<Instruction *> &LandingPadVec) {
1224   for (auto *LP : LandingPadVec) {
1225     IRBuilder<> IRB(LP->getNextNode());
1226     IRB.CreateCall(
1227         HWAsanHandleVfork,
1228         {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
1229                                                                       : "sp")});
1230   }
1231   return true;
1232 }
1233 
1234 bool HWAddressSanitizer::instrumentStack(
1235     SmallVectorImpl<AllocaInst *> &Allocas,
1236     DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
1237     SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
1238   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1239   // alloca addresses using that. Unfortunately, offsets are not known yet
1240   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1241   // temp, shift-OR it into each alloca address and xor with the retag mask.
1242   // This generates one extra instruction per alloca use.
1243   for (unsigned N = 0; N < Allocas.size(); ++N) {
1244     auto *AI = Allocas[N];
1245     IRBuilder<> IRB(AI->getNextNode());
1246 
1247     // Replace uses of the alloca with tagged address.
1248     Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
1249     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1250     Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
1251     std::string Name =
1252         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1253     Replacement->setName(Name + ".hwasan");
1254 
1255     AI->replaceUsesWithIf(Replacement,
1256                           [AILong](Use &U) { return U.getUser() != AILong; });
1257 
1258     for (auto *DDI : AllocaDbgMap.lookup(AI)) {
1259       // Prepend "tag_offset, N" to the dwarf expression.
1260       // Tag offset logically applies to the alloca pointer, and it makes sense
1261       // to put it at the beginning of the expression.
1262       SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
1263                                          retagMask(N)};
1264       for (size_t LocNo = 0; LocNo < DDI->getNumVariableLocationOps(); ++LocNo)
1265         if (DDI->getVariableLocationOp(LocNo) == AI)
1266           DDI->setExpression(DIExpression::appendOpsToArg(DDI->getExpression(),
1267                                                           NewOps, LocNo));
1268     }
1269 
1270     size_t Size = getAllocaSizeInBytes(*AI);
1271     tagAlloca(IRB, AI, Tag, Size);
1272 
1273     for (auto RI : RetVec) {
1274       IRB.SetInsertPoint(RI);
1275 
1276       // Re-tag alloca memory with the special UAR tag.
1277       Value *Tag = getUARTag(IRB, StackTag);
1278       tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment()));
1279     }
1280   }
1281 
1282   return true;
1283 }
1284 
1285 bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
1286   return (AI.getAllocatedType()->isSized() &&
1287           // FIXME: instrument dynamic allocas, too
1288           AI.isStaticAlloca() &&
1289           // alloca() may be called with 0 size, ignore it.
1290           getAllocaSizeInBytes(AI) > 0 &&
1291           // We are only interested in allocas not promotable to registers.
1292           // Promotable allocas are common under -O0.
1293           !isAllocaPromotable(&AI) &&
1294           // inalloca allocas are not treated as static, and we don't want
1295           // dynamic alloca instrumentation for them as well.
1296           !AI.isUsedWithInAlloca() &&
1297           // swifterror allocas are register promoted by ISel
1298           !AI.isSwiftError()) &&
1299          // safe allocas are not interesting
1300          !(SSI && SSI->isSafe(AI));
1301 }
1302 
1303 bool HWAddressSanitizer::sanitizeFunction(Function &F) {
1304   if (&F == HwasanCtorFunction)
1305     return false;
1306 
1307   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1308     return false;
1309 
1310   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1311 
1312   SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1313   SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1314   SmallVector<AllocaInst *, 8> AllocasToInstrument;
1315   SmallVector<Instruction *, 8> RetVec;
1316   SmallVector<Instruction *, 8> LandingPadVec;
1317   DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
1318   for (auto &BB : F) {
1319     for (auto &Inst : BB) {
1320       if (InstrumentStack)
1321         if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
1322           if (isInterestingAlloca(*AI))
1323             AllocasToInstrument.push_back(AI);
1324           continue;
1325         }
1326 
1327       if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) ||
1328           isa<CleanupReturnInst>(Inst))
1329         RetVec.push_back(&Inst);
1330 
1331       if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
1332         for (Value *V : DVI->location_ops()) {
1333           if (auto *Alloca = dyn_cast_or_null<AllocaInst>(V))
1334             if (!AllocaDbgMap.count(Alloca) ||
1335                 AllocaDbgMap[Alloca].back() != DVI)
1336               AllocaDbgMap[Alloca].push_back(DVI);
1337         }
1338       }
1339 
1340       if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1341         LandingPadVec.push_back(&Inst);
1342 
1343       getInterestingMemoryOperands(&Inst, OperandsToInstrument);
1344 
1345       if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1346         IntrinToInstrument.push_back(MI);
1347     }
1348   }
1349 
1350   initializeCallbacks(*F.getParent());
1351 
1352   bool Changed = false;
1353 
1354   if (!LandingPadVec.empty())
1355     Changed |= instrumentLandingPads(LandingPadVec);
1356 
1357   if (AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1358       F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1359     // __hwasan_personality_thunk is a no-op for functions without an
1360     // instrumented stack, so we can drop it.
1361     F.setPersonalityFn(nullptr);
1362     Changed = true;
1363   }
1364 
1365   if (AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1366       IntrinToInstrument.empty())
1367     return Changed;
1368 
1369   assert(!ShadowBase);
1370 
1371   Instruction *InsertPt = &*F.getEntryBlock().begin();
1372   IRBuilder<> EntryIRB(InsertPt);
1373   emitPrologue(EntryIRB,
1374                /*WithFrameRecord*/ ClRecordStackHistory &&
1375                    Mapping.WithFrameRecord && !AllocasToInstrument.empty());
1376 
1377   if (!AllocasToInstrument.empty()) {
1378     Value *StackTag =
1379         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
1380     instrumentStack(AllocasToInstrument, AllocaDbgMap, RetVec, StackTag);
1381   }
1382   // Pad and align each of the allocas that we instrumented to stop small
1383   // uninteresting allocas from hiding in instrumented alloca's padding and so
1384   // that we have enough space to store real tags for short granules.
1385   DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
1386   for (AllocaInst *AI : AllocasToInstrument) {
1387     uint64_t Size = getAllocaSizeInBytes(*AI);
1388     uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1389     AI->setAlignment(
1390         Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
1391     if (Size != AlignedSize) {
1392       Type *AllocatedType = AI->getAllocatedType();
1393       if (AI->isArrayAllocation()) {
1394         uint64_t ArraySize =
1395             cast<ConstantInt>(AI->getArraySize())->getZExtValue();
1396         AllocatedType = ArrayType::get(AllocatedType, ArraySize);
1397       }
1398       Type *TypeWithPadding = StructType::get(
1399           AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
1400       auto *NewAI = new AllocaInst(
1401           TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
1402       NewAI->takeName(AI);
1403       NewAI->setAlignment(AI->getAlign());
1404       NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
1405       NewAI->setSwiftError(AI->isSwiftError());
1406       NewAI->copyMetadata(*AI);
1407       auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
1408       AI->replaceAllUsesWith(Bitcast);
1409       AllocaToPaddedAllocaMap[AI] = NewAI;
1410     }
1411   }
1412 
1413   if (!AllocaToPaddedAllocaMap.empty()) {
1414     for (auto &BB : F) {
1415       for (auto &Inst : BB) {
1416         if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
1417           SmallDenseSet<Value *> LocationOps(DVI->location_ops().begin(),
1418                                              DVI->location_ops().end());
1419           for (Value *V : LocationOps) {
1420             if (auto *AI = dyn_cast_or_null<AllocaInst>(V)) {
1421               if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI))
1422                 DVI->replaceVariableLocationOp(V, NewAI);
1423             }
1424           }
1425         }
1426       }
1427     }
1428     for (auto &P : AllocaToPaddedAllocaMap)
1429       P.first->eraseFromParent();
1430   }
1431 
1432   // If we split the entry block, move any allocas that were originally in the
1433   // entry block back into the entry block so that they aren't treated as
1434   // dynamic allocas.
1435   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1436     InsertPt = &*F.getEntryBlock().begin();
1437     for (auto II = EntryIRB.GetInsertBlock()->begin(),
1438               IE = EntryIRB.GetInsertBlock()->end();
1439          II != IE;) {
1440       Instruction *I = &*II++;
1441       if (auto *AI = dyn_cast<AllocaInst>(I))
1442         if (isa<ConstantInt>(AI->getArraySize()))
1443           I->moveBefore(InsertPt);
1444     }
1445   }
1446 
1447   for (auto &Operand : OperandsToInstrument)
1448     instrumentMemAccess(Operand);
1449 
1450   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1451     for (auto Inst : IntrinToInstrument)
1452       instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
1453   }
1454 
1455   ShadowBase = nullptr;
1456   StackBaseTag = nullptr;
1457 
1458   return true;
1459 }
1460 
1461 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1462   assert(!UsePageAliases);
1463   Constant *Initializer = GV->getInitializer();
1464   uint64_t SizeInBytes =
1465       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1466   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1467   if (SizeInBytes != NewSize) {
1468     // Pad the initializer out to the next multiple of 16 bytes and add the
1469     // required short granule tag.
1470     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1471     Init.back() = Tag;
1472     Constant *Padding = ConstantDataArray::get(*C, Init);
1473     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1474   }
1475 
1476   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1477                                    GlobalValue::ExternalLinkage, Initializer,
1478                                    GV->getName() + ".hwasan");
1479   NewGV->copyAttributesFrom(GV);
1480   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1481   NewGV->copyMetadata(GV, 0);
1482   NewGV->setAlignment(
1483       MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment())));
1484 
1485   // It is invalid to ICF two globals that have different tags. In the case
1486   // where the size of the global is a multiple of the tag granularity the
1487   // contents of the globals may be the same but the tags (i.e. symbol values)
1488   // may be different, and the symbols are not considered during ICF. In the
1489   // case where the size is not a multiple of the granularity, the short granule
1490   // tags would discriminate two globals with different tags, but there would
1491   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1492   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1493   // granule tag in the last byte.
1494   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1495 
1496   // Descriptor format (assuming little-endian):
1497   // bytes 0-3: relative address of global
1498   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1499   // it isn't, we create multiple descriptors)
1500   // byte 7: tag
1501   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1502   const uint64_t MaxDescriptorSize = 0xfffff0;
1503   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1504        DescriptorPos += MaxDescriptorSize) {
1505     auto *Descriptor =
1506         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1507                            nullptr, GV->getName() + ".hwasan.descriptor");
1508     auto *GVRelPtr = ConstantExpr::getTrunc(
1509         ConstantExpr::getAdd(
1510             ConstantExpr::getSub(
1511                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1512                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1513             ConstantInt::get(Int64Ty, DescriptorPos)),
1514         Int32Ty);
1515     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1516     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1517     Descriptor->setComdat(NewGV->getComdat());
1518     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1519     Descriptor->setSection("hwasan_globals");
1520     Descriptor->setMetadata(LLVMContext::MD_associated,
1521                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1522     appendToCompilerUsed(M, Descriptor);
1523   }
1524 
1525   Constant *Aliasee = ConstantExpr::getIntToPtr(
1526       ConstantExpr::getAdd(
1527           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1528           ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1529       GV->getType());
1530   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1531                                     GV->getLinkage(), "", Aliasee, &M);
1532   Alias->setVisibility(GV->getVisibility());
1533   Alias->takeName(GV);
1534   GV->replaceAllUsesWith(Alias);
1535   GV->eraseFromParent();
1536 }
1537 
1538 static DenseSet<GlobalVariable *> getExcludedGlobals(Module &M) {
1539   NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
1540   if (!Globals)
1541     return DenseSet<GlobalVariable *>();
1542   DenseSet<GlobalVariable *> Excluded(Globals->getNumOperands());
1543   for (auto MDN : Globals->operands()) {
1544     // Metadata node contains the global and the fields of "Entry".
1545     assert(MDN->getNumOperands() == 5);
1546     auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
1547     // The optimizer may optimize away a global entirely.
1548     if (!V)
1549       continue;
1550     auto *StrippedV = V->stripPointerCasts();
1551     auto *GV = dyn_cast<GlobalVariable>(StrippedV);
1552     if (!GV)
1553       continue;
1554     ConstantInt *IsExcluded = mdconst::extract<ConstantInt>(MDN->getOperand(4));
1555     if (IsExcluded->isOne())
1556       Excluded.insert(GV);
1557   }
1558   return Excluded;
1559 }
1560 
1561 void HWAddressSanitizer::instrumentGlobals() {
1562   std::vector<GlobalVariable *> Globals;
1563   auto ExcludedGlobals = getExcludedGlobals(M);
1564   for (GlobalVariable &GV : M.globals()) {
1565     if (ExcludedGlobals.count(&GV))
1566       continue;
1567 
1568     if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
1569         GV.isThreadLocal())
1570       continue;
1571 
1572     // Common symbols can't have aliases point to them, so they can't be tagged.
1573     if (GV.hasCommonLinkage())
1574       continue;
1575 
1576     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1577     // which would be broken both by adding tags and potentially by the extra
1578     // padding/alignment that we insert.
1579     if (GV.hasSection())
1580       continue;
1581 
1582     Globals.push_back(&GV);
1583   }
1584 
1585   MD5 Hasher;
1586   Hasher.update(M.getSourceFileName());
1587   MD5::MD5Result Hash;
1588   Hasher.final(Hash);
1589   uint8_t Tag = Hash[0] & TagMaskByte;
1590 
1591   for (GlobalVariable *GV : Globals) {
1592     // Skip tag 0 in order to avoid collisions with untagged memory.
1593     if (Tag == 0)
1594       Tag = 1;
1595     instrumentGlobal(GV, Tag++);
1596   }
1597 }
1598 
1599 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1600   // We need to untag stack frames as we unwind past them. That is the job of
1601   // the personality function wrapper, which either wraps an existing
1602   // personality function or acts as a personality function on its own. Each
1603   // function that has a personality function or that can be unwound past has
1604   // its personality function changed to a thunk that calls the personality
1605   // function wrapper in the runtime.
1606   MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1607   for (Function &F : M) {
1608     if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1609       continue;
1610 
1611     if (F.hasPersonalityFn()) {
1612       PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1613     } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1614       PersonalityFns[nullptr].push_back(&F);
1615     }
1616   }
1617 
1618   if (PersonalityFns.empty())
1619     return;
1620 
1621   FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1622       "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
1623       Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
1624   FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1625   FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1626 
1627   for (auto &P : PersonalityFns) {
1628     std::string ThunkName = kHwasanPersonalityThunkName;
1629     if (P.first)
1630       ThunkName += ("." + P.first->getName()).str();
1631     FunctionType *ThunkFnTy = FunctionType::get(
1632         Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
1633     bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1634                                cast<GlobalValue>(P.first)->hasLocalLinkage());
1635     auto *ThunkFn = Function::Create(ThunkFnTy,
1636                                      IsLocal ? GlobalValue::InternalLinkage
1637                                              : GlobalValue::LinkOnceODRLinkage,
1638                                      ThunkName, &M);
1639     if (!IsLocal) {
1640       ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1641       ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1642     }
1643 
1644     auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1645     IRBuilder<> IRB(BB);
1646     CallInst *WrapperCall = IRB.CreateCall(
1647         HwasanPersonalityWrapper,
1648         {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1649          ThunkFn->getArg(3), ThunkFn->getArg(4),
1650          P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
1651                  : Constant::getNullValue(Int8PtrTy),
1652          IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
1653          IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
1654     WrapperCall->setTailCall();
1655     IRB.CreateRet(WrapperCall);
1656 
1657     for (Function *F : P.second)
1658       F->setPersonalityFn(ThunkFn);
1659   }
1660 }
1661 
1662 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1663                                              bool InstrumentWithCalls) {
1664   Scale = kDefaultShadowScale;
1665   if (TargetTriple.isOSFuchsia()) {
1666     // Fuchsia is always PIE, which means that the beginning of the address
1667     // space is always available.
1668     InGlobal = false;
1669     InTls = false;
1670     Offset = 0;
1671     WithFrameRecord = true;
1672   } else if (ClMappingOffset.getNumOccurrences() > 0) {
1673     InGlobal = false;
1674     InTls = false;
1675     Offset = ClMappingOffset;
1676     WithFrameRecord = false;
1677   } else if (ClEnableKhwasan || InstrumentWithCalls) {
1678     InGlobal = false;
1679     InTls = false;
1680     Offset = 0;
1681     WithFrameRecord = false;
1682   } else if (ClWithIfunc) {
1683     InGlobal = true;
1684     InTls = false;
1685     Offset = kDynamicShadowSentinel;
1686     WithFrameRecord = false;
1687   } else if (ClWithTls) {
1688     InGlobal = false;
1689     InTls = true;
1690     Offset = kDynamicShadowSentinel;
1691     WithFrameRecord = true;
1692   } else {
1693     InGlobal = false;
1694     InTls = false;
1695     Offset = kDynamicShadowSentinel;
1696     WithFrameRecord = false;
1697   }
1698 }
1699