xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (revision 7ef62cebc2f965b0f640263e179276928885e33d)
1 //===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/Analysis/GlobalsModRef.h"
22 #include "llvm/Analysis/PostDominators.h"
23 #include "llvm/Analysis/StackSafetyAnalysis.h"
24 #include "llvm/Analysis/ValueTracking.h"
25 #include "llvm/BinaryFormat/Dwarf.h"
26 #include "llvm/BinaryFormat/ELF.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DebugInfoMetadata.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/InlineAsm.h"
38 #include "llvm/IR/InstIterator.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/IR/MDBuilder.h"
45 #include "llvm/IR/Module.h"
46 #include "llvm/IR/NoFolder.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
54 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
55 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
56 #include "llvm/Transforms/Utils/ModuleUtils.h"
57 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
58 #include <optional>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "hwasan"
63 
64 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
65 const char kHwasanNoteName[] = "hwasan.note";
66 const char kHwasanInitName[] = "__hwasan_init";
67 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
68 
69 const char kHwasanShadowMemoryDynamicAddress[] =
70     "__hwasan_shadow_memory_dynamic_address";
71 
72 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
73 static const size_t kNumberOfAccessSizes = 5;
74 
75 static const size_t kDefaultShadowScale = 4;
76 static const uint64_t kDynamicShadowSentinel =
77     std::numeric_limits<uint64_t>::max();
78 
79 static const unsigned kShadowBaseAlignment = 32;
80 
81 static cl::opt<std::string>
82     ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
83                                  cl::desc("Prefix for memory access callbacks"),
84                                  cl::Hidden, cl::init("__hwasan_"));
85 
86 static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
87     "hwasan-kernel-mem-intrinsic-prefix",
88     cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
89     cl::init(false));
90 
91 static cl::opt<bool> ClInstrumentWithCalls(
92     "hwasan-instrument-with-calls",
93     cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
94     cl::init(false));
95 
96 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
97                                        cl::desc("instrument read instructions"),
98                                        cl::Hidden, cl::init(true));
99 
100 static cl::opt<bool>
101     ClInstrumentWrites("hwasan-instrument-writes",
102                        cl::desc("instrument write instructions"), cl::Hidden,
103                        cl::init(true));
104 
105 static cl::opt<bool> ClInstrumentAtomics(
106     "hwasan-instrument-atomics",
107     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
108     cl::init(true));
109 
110 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
111                                        cl::desc("instrument byval arguments"),
112                                        cl::Hidden, cl::init(true));
113 
114 static cl::opt<bool>
115     ClRecover("hwasan-recover",
116               cl::desc("Enable recovery mode (continue-after-error)."),
117               cl::Hidden, cl::init(false));
118 
119 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
120                                        cl::desc("instrument stack (allocas)"),
121                                        cl::Hidden, cl::init(true));
122 
123 static cl::opt<bool>
124     ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
125                      cl::Hidden, cl::desc("Use Stack Safety analysis results"),
126                      cl::Optional);
127 
128 static cl::opt<size_t> ClMaxLifetimes(
129     "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
130     cl::ReallyHidden,
131     cl::desc("How many lifetime ends to handle for a single alloca."),
132     cl::Optional);
133 
134 static cl::opt<bool>
135     ClUseAfterScope("hwasan-use-after-scope",
136                     cl::desc("detect use after scope within function"),
137                     cl::Hidden, cl::init(false));
138 
139 static cl::opt<bool> ClUARRetagToZero(
140     "hwasan-uar-retag-to-zero",
141     cl::desc("Clear alloca tags before returning from the function to allow "
142              "non-instrumented and instrumented function calls mix. When set "
143              "to false, allocas are retagged before returning from the "
144              "function to detect use after return."),
145     cl::Hidden, cl::init(true));
146 
147 static cl::opt<bool> ClGenerateTagsWithCalls(
148     "hwasan-generate-tags-with-calls",
149     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
150     cl::init(false));
151 
152 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
153                                cl::Hidden, cl::init(false));
154 
155 static cl::opt<int> ClMatchAllTag(
156     "hwasan-match-all-tag",
157     cl::desc("don't report bad accesses via pointers with this tag"),
158     cl::Hidden, cl::init(-1));
159 
160 static cl::opt<bool>
161     ClEnableKhwasan("hwasan-kernel",
162                     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
163                     cl::Hidden, cl::init(false));
164 
165 // These flags allow to change the shadow mapping and control how shadow memory
166 // is accessed. The shadow mapping looks like:
167 //    Shadow = (Mem >> scale) + offset
168 
169 static cl::opt<uint64_t>
170     ClMappingOffset("hwasan-mapping-offset",
171                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
172                     cl::Hidden, cl::init(0));
173 
174 static cl::opt<bool>
175     ClWithIfunc("hwasan-with-ifunc",
176                 cl::desc("Access dynamic shadow through an ifunc global on "
177                          "platforms that support this"),
178                 cl::Hidden, cl::init(false));
179 
180 static cl::opt<bool> ClWithTls(
181     "hwasan-with-tls",
182     cl::desc("Access dynamic shadow through an thread-local pointer on "
183              "platforms that support this"),
184     cl::Hidden, cl::init(true));
185 
186 // Mode for selecting how to insert frame record info into the stack ring
187 // buffer.
188 enum RecordStackHistoryMode {
189   // Do not record frame record info.
190   none,
191 
192   // Insert instructions into the prologue for storing into the stack ring
193   // buffer directly.
194   instr,
195 
196   // Add a call to __hwasan_add_frame_record in the runtime.
197   libcall,
198 };
199 
200 static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
201     "hwasan-record-stack-history",
202     cl::desc("Record stack frames with tagged allocations in a thread-local "
203              "ring buffer"),
204     cl::values(clEnumVal(none, "Do not record stack ring history"),
205                clEnumVal(instr, "Insert instructions into the prologue for "
206                                 "storing into the stack ring buffer directly"),
207                clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
208                                   "storing into the stack ring buffer")),
209     cl::Hidden, cl::init(instr));
210 
211 static cl::opt<bool>
212     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
213                               cl::desc("instrument memory intrinsics"),
214                               cl::Hidden, cl::init(true));
215 
216 static cl::opt<bool>
217     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
218                             cl::desc("instrument landing pads"), cl::Hidden,
219                             cl::init(false));
220 
221 static cl::opt<bool> ClUseShortGranules(
222     "hwasan-use-short-granules",
223     cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
224     cl::init(false));
225 
226 static cl::opt<bool> ClInstrumentPersonalityFunctions(
227     "hwasan-instrument-personality-functions",
228     cl::desc("instrument personality functions"), cl::Hidden);
229 
230 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
231                                        cl::desc("inline all checks"),
232                                        cl::Hidden, cl::init(false));
233 
234 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
235 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
236                                       cl::desc("Use page aliasing in HWASan"),
237                                       cl::Hidden, cl::init(false));
238 
239 namespace {
240 
241 bool shouldUsePageAliases(const Triple &TargetTriple) {
242   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
243 }
244 
245 bool shouldInstrumentStack(const Triple &TargetTriple) {
246   return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
247 }
248 
249 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
250   return ClInstrumentWithCalls || TargetTriple.getArch() == Triple::x86_64;
251 }
252 
253 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
254   return ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
255                                               : !DisableOptimization;
256 }
257 
258 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
259                                   bool DisableOptimization) {
260   return shouldInstrumentStack(TargetTriple) &&
261          mightUseStackSafetyAnalysis(DisableOptimization);
262 }
263 
264 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
265   return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
266 }
267 
268 /// An instrumentation pass implementing detection of addressability bugs
269 /// using tagged pointers.
270 class HWAddressSanitizer {
271 public:
272   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
273                      const StackSafetyGlobalInfo *SSI)
274       : M(M), SSI(SSI) {
275     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
276     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0
277                               ? ClEnableKhwasan
278                               : CompileKernel;
279 
280     initializeModule();
281   }
282 
283   void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
284 
285   bool sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
286   void initializeModule();
287   void createHwasanCtorComdat();
288 
289   void initializeCallbacks(Module &M);
290 
291   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
292 
293   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
294   Value *getShadowNonTls(IRBuilder<> &IRB);
295 
296   void untagPointerOperand(Instruction *I, Value *Addr);
297   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
298 
299   int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
300   void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
301                                   unsigned AccessSizeIndex,
302                                   Instruction *InsertBefore);
303   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
304                                  unsigned AccessSizeIndex,
305                                  Instruction *InsertBefore);
306   bool ignoreMemIntrinsic(MemIntrinsic *MI);
307   void instrumentMemIntrinsic(MemIntrinsic *MI);
308   bool instrumentMemAccess(InterestingMemoryOperand &O);
309   bool ignoreAccess(Instruction *Inst, Value *Ptr);
310   void getInterestingMemoryOperands(
311       Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
312 
313   void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
314   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
315   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
316   bool instrumentStack(memtag::StackInfo &Info, Value *StackTag,
317                        const DominatorTree &DT, const PostDominatorTree &PDT,
318                        const LoopInfo &LI);
319   Value *readRegister(IRBuilder<> &IRB, StringRef Name);
320   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
321   Value *getNextTagWithCall(IRBuilder<> &IRB);
322   Value *getStackBaseTag(IRBuilder<> &IRB);
323   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
324                       unsigned AllocaNo);
325   Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
326 
327   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
328   Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
329   unsigned retagMask(unsigned AllocaNo);
330 
331   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
332 
333   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
334   void instrumentGlobals();
335 
336   Value *getPC(IRBuilder<> &IRB);
337   Value *getSP(IRBuilder<> &IRB);
338   Value *getFrameRecordInfo(IRBuilder<> &IRB);
339 
340   void instrumentPersonalityFunctions();
341 
342 private:
343   LLVMContext *C;
344   Module &M;
345   const StackSafetyGlobalInfo *SSI;
346   Triple TargetTriple;
347   FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
348   FunctionCallee HWAsanHandleVfork;
349 
350   /// This struct defines the shadow mapping using the rule:
351   ///   shadow = (mem >> Scale) + Offset.
352   /// If InGlobal is true, then
353   ///   extern char __hwasan_shadow[];
354   ///   shadow = (mem >> Scale) + &__hwasan_shadow
355   /// If InTls is true, then
356   ///   extern char *__hwasan_tls;
357   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
358   ///
359   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
360   /// ring buffer for storing stack allocations on targets that support it.
361   struct ShadowMapping {
362     uint8_t Scale;
363     uint64_t Offset;
364     bool InGlobal;
365     bool InTls;
366     bool WithFrameRecord;
367 
368     void init(Triple &TargetTriple, bool InstrumentWithCalls);
369     Align getObjectAlignment() const { return Align(1ULL << Scale); }
370   };
371 
372   ShadowMapping Mapping;
373 
374   Type *VoidTy = Type::getVoidTy(M.getContext());
375   Type *IntptrTy;
376   Type *Int8PtrTy;
377   Type *Int8Ty;
378   Type *Int32Ty;
379   Type *Int64Ty = Type::getInt64Ty(M.getContext());
380 
381   bool CompileKernel;
382   bool Recover;
383   bool OutlinedChecks;
384   bool UseShortGranules;
385   bool InstrumentLandingPads;
386   bool InstrumentWithCalls;
387   bool InstrumentStack;
388   bool DetectUseAfterScope;
389   bool UsePageAliases;
390 
391   std::optional<uint8_t> MatchAllTag;
392 
393   unsigned PointerTagShift;
394   uint64_t TagMaskByte;
395 
396   Function *HwasanCtorFunction;
397 
398   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
399   FunctionCallee HwasanMemoryAccessCallbackSized[2];
400 
401   FunctionCallee HwasanTagMemoryFunc;
402   FunctionCallee HwasanGenerateTagFunc;
403   FunctionCallee HwasanRecordFrameRecordFunc;
404 
405   Constant *ShadowGlobal;
406 
407   Value *ShadowBase = nullptr;
408   Value *StackBaseTag = nullptr;
409   Value *CachedSP = nullptr;
410   GlobalValue *ThreadPtrGlobal = nullptr;
411 };
412 
413 } // end anonymous namespace
414 
415 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
416                                               ModuleAnalysisManager &MAM) {
417   const StackSafetyGlobalInfo *SSI = nullptr;
418   auto TargetTriple = llvm::Triple(M.getTargetTriple());
419   if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
420     SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
421 
422   HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
423   bool Modified = false;
424   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
425   for (Function &F : M)
426     Modified |= HWASan.sanitizeFunction(F, FAM);
427   if (!Modified)
428     return PreservedAnalyses::all();
429 
430   PreservedAnalyses PA = PreservedAnalyses::none();
431   // GlobalsAA is considered stateless and does not get invalidated unless
432   // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
433   // make changes that require GlobalsAA to be invalidated.
434   PA.abandon<GlobalsAA>();
435   return PA;
436 }
437 void HWAddressSanitizerPass::printPipeline(
438     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
439   static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
440       OS, MapClassName2PassName);
441   OS << "<";
442   if (Options.CompileKernel)
443     OS << "kernel;";
444   if (Options.Recover)
445     OS << "recover";
446   OS << ">";
447 }
448 
449 void HWAddressSanitizer::createHwasanCtorComdat() {
450   std::tie(HwasanCtorFunction, std::ignore) =
451       getOrCreateSanitizerCtorAndInitFunctions(
452           M, kHwasanModuleCtorName, kHwasanInitName,
453           /*InitArgTypes=*/{},
454           /*InitArgs=*/{},
455           // This callback is invoked when the functions are created the first
456           // time. Hook them into the global ctors list in that case:
457           [&](Function *Ctor, FunctionCallee) {
458             Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
459             Ctor->setComdat(CtorComdat);
460             appendToGlobalCtors(M, Ctor, 0, Ctor);
461           });
462 
463   // Create a note that contains pointers to the list of global
464   // descriptors. Adding a note to the output file will cause the linker to
465   // create a PT_NOTE program header pointing to the note that we can use to
466   // find the descriptor list starting from the program headers. A function
467   // provided by the runtime initializes the shadow memory for the globals by
468   // accessing the descriptor list via the note. The dynamic loader needs to
469   // call this function whenever a library is loaded.
470   //
471   // The reason why we use a note for this instead of a more conventional
472   // approach of having a global constructor pass a descriptor list pointer to
473   // the runtime is because of an order of initialization problem. With
474   // constructors we can encounter the following problematic scenario:
475   //
476   // 1) library A depends on library B and also interposes one of B's symbols
477   // 2) B's constructors are called before A's (as required for correctness)
478   // 3) during construction, B accesses one of its "own" globals (actually
479   //    interposed by A) and triggers a HWASAN failure due to the initialization
480   //    for A not having happened yet
481   //
482   // Even without interposition it is possible to run into similar situations in
483   // cases where two libraries mutually depend on each other.
484   //
485   // We only need one note per binary, so put everything for the note in a
486   // comdat. This needs to be a comdat with an .init_array section to prevent
487   // newer versions of lld from discarding the note.
488   //
489   // Create the note even if we aren't instrumenting globals. This ensures that
490   // binaries linked from object files with both instrumented and
491   // non-instrumented globals will end up with a note, even if a comdat from an
492   // object file with non-instrumented globals is selected. The note is harmless
493   // if the runtime doesn't support it, since it will just be ignored.
494   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
495 
496   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
497   auto *Start =
498       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
499                          nullptr, "__start_hwasan_globals");
500   Start->setVisibility(GlobalValue::HiddenVisibility);
501   auto *Stop =
502       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
503                          nullptr, "__stop_hwasan_globals");
504   Stop->setVisibility(GlobalValue::HiddenVisibility);
505 
506   // Null-terminated so actually 8 bytes, which are required in order to align
507   // the note properly.
508   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
509 
510   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
511                                  Int32Ty, Int32Ty);
512   auto *Note =
513       new GlobalVariable(M, NoteTy, /*isConstant=*/true,
514                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
515   Note->setSection(".note.hwasan.globals");
516   Note->setComdat(NoteComdat);
517   Note->setAlignment(Align(4));
518 
519   // The pointers in the note need to be relative so that the note ends up being
520   // placed in rodata, which is the standard location for notes.
521   auto CreateRelPtr = [&](Constant *Ptr) {
522     return ConstantExpr::getTrunc(
523         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
524                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
525         Int32Ty);
526   };
527   Note->setInitializer(ConstantStruct::getAnon(
528       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
529        ConstantInt::get(Int32Ty, 8),                           // n_descsz
530        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
531        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
532   appendToCompilerUsed(M, Note);
533 
534   // Create a zero-length global in hwasan_globals so that the linker will
535   // always create start and stop symbols.
536   auto *Dummy = new GlobalVariable(
537       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
538       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
539   Dummy->setSection("hwasan_globals");
540   Dummy->setComdat(NoteComdat);
541   Dummy->setMetadata(LLVMContext::MD_associated,
542                      MDNode::get(*C, ValueAsMetadata::get(Note)));
543   appendToCompilerUsed(M, Dummy);
544 }
545 
546 /// Module-level initialization.
547 ///
548 /// inserts a call to __hwasan_init to the module's constructor list.
549 void HWAddressSanitizer::initializeModule() {
550   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
551   auto &DL = M.getDataLayout();
552 
553   TargetTriple = Triple(M.getTargetTriple());
554 
555   // x86_64 currently has two modes:
556   // - Intel LAM (default)
557   // - pointer aliasing (heap only)
558   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
559   UsePageAliases = shouldUsePageAliases(TargetTriple);
560   InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
561   InstrumentStack = shouldInstrumentStack(TargetTriple);
562   DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
563   PointerTagShift = IsX86_64 ? 57 : 56;
564   TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
565 
566   Mapping.init(TargetTriple, InstrumentWithCalls);
567 
568   C = &(M.getContext());
569   IRBuilder<> IRB(*C);
570   IntptrTy = IRB.getIntPtrTy(DL);
571   Int8PtrTy = IRB.getInt8PtrTy();
572   Int8Ty = IRB.getInt8Ty();
573   Int32Ty = IRB.getInt32Ty();
574 
575   HwasanCtorFunction = nullptr;
576 
577   // Older versions of Android do not have the required runtime support for
578   // short granules, global or personality function instrumentation. On other
579   // platforms we currently require using the latest version of the runtime.
580   bool NewRuntime =
581       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
582 
583   UseShortGranules =
584       ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
585   OutlinedChecks =
586       (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
587       TargetTriple.isOSBinFormatELF() &&
588       (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
589 
590   if (ClMatchAllTag.getNumOccurrences()) {
591     if (ClMatchAllTag != -1) {
592       MatchAllTag = ClMatchAllTag & 0xFF;
593     }
594   } else if (CompileKernel) {
595     MatchAllTag = 0xFF;
596   }
597 
598   // If we don't have personality function support, fall back to landing pads.
599   InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
600                               ? ClInstrumentLandingPads
601                               : !NewRuntime;
602 
603   if (!CompileKernel) {
604     createHwasanCtorComdat();
605     bool InstrumentGlobals =
606         ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
607 
608     if (InstrumentGlobals && !UsePageAliases)
609       instrumentGlobals();
610 
611     bool InstrumentPersonalityFunctions =
612         ClInstrumentPersonalityFunctions.getNumOccurrences()
613             ? ClInstrumentPersonalityFunctions
614             : NewRuntime;
615     if (InstrumentPersonalityFunctions)
616       instrumentPersonalityFunctions();
617   }
618 
619   if (!TargetTriple.isAndroid()) {
620     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
621       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
622                                     GlobalValue::ExternalLinkage, nullptr,
623                                     "__hwasan_tls", nullptr,
624                                     GlobalVariable::InitialExecTLSModel);
625       appendToCompilerUsed(M, GV);
626       return GV;
627     });
628     ThreadPtrGlobal = cast<GlobalVariable>(C);
629   }
630 }
631 
632 void HWAddressSanitizer::initializeCallbacks(Module &M) {
633   IRBuilder<> IRB(*C);
634   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
635     const std::string TypeStr = AccessIsWrite ? "store" : "load";
636     const std::string EndingStr = Recover ? "_noabort" : "";
637 
638     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
639         ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
640         FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
641 
642     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
643          AccessSizeIndex++) {
644       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
645           M.getOrInsertFunction(
646               ClMemoryAccessCallbackPrefix + TypeStr +
647                   itostr(1ULL << AccessSizeIndex) + EndingStr,
648               FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
649     }
650   }
651 
652   HwasanTagMemoryFunc = M.getOrInsertFunction(
653       "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
654   HwasanGenerateTagFunc =
655       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
656 
657   HwasanRecordFrameRecordFunc = M.getOrInsertFunction(
658       "__hwasan_add_frame_record", IRB.getVoidTy(), Int64Ty);
659 
660   ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
661                                      ArrayType::get(IRB.getInt8Ty(), 0));
662 
663   const std::string MemIntrinCallbackPrefix =
664       (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
665           ? std::string("")
666           : ClMemoryAccessCallbackPrefix;
667   HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
668                                         IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
669                                         IRB.getInt8PtrTy(), IntptrTy);
670   HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
671                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
672                                        IRB.getInt8PtrTy(), IntptrTy);
673   HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
674                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
675                                        IRB.getInt32Ty(), IntptrTy);
676 
677   HWAsanHandleVfork =
678       M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
679 }
680 
681 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
682   // An empty inline asm with input reg == output reg.
683   // An opaque no-op cast, basically.
684   // This prevents code bloat as a result of rematerializing trivial definitions
685   // such as constants or global addresses at every load and store.
686   InlineAsm *Asm =
687       InlineAsm::get(FunctionType::get(Int8PtrTy, {Val->getType()}, false),
688                      StringRef(""), StringRef("=r,0"),
689                      /*hasSideEffects=*/false);
690   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
691 }
692 
693 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
694   return getOpaqueNoopCast(IRB, ShadowGlobal);
695 }
696 
697 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
698   if (Mapping.Offset != kDynamicShadowSentinel)
699     return getOpaqueNoopCast(
700         IRB, ConstantExpr::getIntToPtr(
701                  ConstantInt::get(IntptrTy, Mapping.Offset), Int8PtrTy));
702 
703   if (Mapping.InGlobal)
704     return getDynamicShadowIfunc(IRB);
705 
706   Value *GlobalDynamicAddress =
707       IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
708           kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
709   return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
710 }
711 
712 bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
713   // Do not instrument accesses from different address spaces; we cannot deal
714   // with them.
715   Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
716   if (PtrTy->getPointerAddressSpace() != 0)
717     return true;
718 
719   // Ignore swifterror addresses.
720   // swifterror memory addresses are mem2reg promoted by instruction
721   // selection. As such they cannot have regular uses like an instrumentation
722   // function and it makes no sense to track them as memory.
723   if (Ptr->isSwiftError())
724     return true;
725 
726   if (findAllocaForValue(Ptr)) {
727     if (!InstrumentStack)
728       return true;
729     if (SSI && SSI->stackAccessIsSafe(*Inst))
730       return true;
731   }
732   return false;
733 }
734 
735 void HWAddressSanitizer::getInterestingMemoryOperands(
736     Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
737   // Skip memory accesses inserted by another instrumentation.
738   if (I->hasMetadata(LLVMContext::MD_nosanitize))
739     return;
740 
741   // Do not instrument the load fetching the dynamic shadow address.
742   if (ShadowBase == I)
743     return;
744 
745   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
746     if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
747       return;
748     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
749                              LI->getType(), LI->getAlign());
750   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
751     if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
752       return;
753     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
754                              SI->getValueOperand()->getType(), SI->getAlign());
755   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
756     if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
757       return;
758     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
759                              RMW->getValOperand()->getType(), std::nullopt);
760   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
761     if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
762       return;
763     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
764                              XCHG->getCompareOperand()->getType(),
765                              std::nullopt);
766   } else if (auto *CI = dyn_cast<CallInst>(I)) {
767     for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
768       if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
769           ignoreAccess(I, CI->getArgOperand(ArgNo)))
770         continue;
771       Type *Ty = CI->getParamByValType(ArgNo);
772       Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
773     }
774   }
775 }
776 
777 static unsigned getPointerOperandIndex(Instruction *I) {
778   if (LoadInst *LI = dyn_cast<LoadInst>(I))
779     return LI->getPointerOperandIndex();
780   if (StoreInst *SI = dyn_cast<StoreInst>(I))
781     return SI->getPointerOperandIndex();
782   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
783     return RMW->getPointerOperandIndex();
784   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
785     return XCHG->getPointerOperandIndex();
786   report_fatal_error("Unexpected instruction");
787   return -1;
788 }
789 
790 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
791   size_t Res = countTrailingZeros(TypeSize / 8);
792   assert(Res < kNumberOfAccessSizes);
793   return Res;
794 }
795 
796 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
797   if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64 ||
798       TargetTriple.isRISCV64())
799     return;
800 
801   IRBuilder<> IRB(I);
802   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
803   Value *UntaggedPtr =
804       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
805   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
806 }
807 
808 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
809   // Mem >> Scale
810   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
811   if (Mapping.Offset == 0)
812     return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
813   // (Mem >> Scale) + Offset
814   return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
815 }
816 
817 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
818                                           unsigned AccessSizeIndex) {
819   return (CompileKernel << HWASanAccessInfo::CompileKernelShift) |
820          (MatchAllTag.has_value() << HWASanAccessInfo::HasMatchAllShift) |
821          (MatchAllTag.value_or(0) << HWASanAccessInfo::MatchAllShift) |
822          (Recover << HWASanAccessInfo::RecoverShift) |
823          (IsWrite << HWASanAccessInfo::IsWriteShift) |
824          (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
825 }
826 
827 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
828                                                     unsigned AccessSizeIndex,
829                                                     Instruction *InsertBefore) {
830   assert(!UsePageAliases);
831   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
832   IRBuilder<> IRB(InsertBefore);
833   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
834   Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
835   IRB.CreateCall(Intrinsic::getDeclaration(
836                      M, UseShortGranules
837                             ? Intrinsic::hwasan_check_memaccess_shortgranules
838                             : Intrinsic::hwasan_check_memaccess),
839                  {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
840 }
841 
842 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
843                                                    unsigned AccessSizeIndex,
844                                                    Instruction *InsertBefore) {
845   assert(!UsePageAliases);
846   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
847   IRBuilder<> IRB(InsertBefore);
848 
849   Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
850   Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift),
851                                   IRB.getInt8Ty());
852   Value *AddrLong = untagPointer(IRB, PtrLong);
853   Value *Shadow = memToShadow(AddrLong, IRB);
854   Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
855   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
856 
857   if (MatchAllTag.has_value()) {
858     Value *TagNotIgnored = IRB.CreateICmpNE(
859         PtrTag, ConstantInt::get(PtrTag->getType(), *MatchAllTag));
860     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
861   }
862 
863   Instruction *CheckTerm =
864       SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
865                                 MDBuilder(*C).createBranchWeights(1, 100000));
866 
867   IRB.SetInsertPoint(CheckTerm);
868   Value *OutOfShortGranuleTagRange =
869       IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
870   Instruction *CheckFailTerm =
871       SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
872                                 MDBuilder(*C).createBranchWeights(1, 100000));
873 
874   IRB.SetInsertPoint(CheckTerm);
875   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
876   PtrLowBits = IRB.CreateAdd(
877       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
878   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
879   SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
880                             MDBuilder(*C).createBranchWeights(1, 100000),
881                             (DomTreeUpdater *)nullptr, nullptr,
882                             CheckFailTerm->getParent());
883 
884   IRB.SetInsertPoint(CheckTerm);
885   Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
886   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
887   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
888   Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
889   SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
890                             MDBuilder(*C).createBranchWeights(1, 100000),
891                             (DomTreeUpdater *)nullptr, nullptr,
892                             CheckFailTerm->getParent());
893 
894   IRB.SetInsertPoint(CheckFailTerm);
895   InlineAsm *Asm;
896   switch (TargetTriple.getArch()) {
897   case Triple::x86_64:
898     // The signal handler will find the data address in rdi.
899     Asm = InlineAsm::get(
900         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
901         "int3\nnopl " +
902             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
903             "(%rax)",
904         "{rdi}",
905         /*hasSideEffects=*/true);
906     break;
907   case Triple::aarch64:
908   case Triple::aarch64_be:
909     // The signal handler will find the data address in x0.
910     Asm = InlineAsm::get(
911         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
912         "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
913         "{x0}",
914         /*hasSideEffects=*/true);
915     break;
916   case Triple::riscv64:
917     // The signal handler will find the data address in x10.
918     Asm = InlineAsm::get(
919         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
920         "ebreak\naddiw x0, x11, " +
921             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
922         "{x10}",
923         /*hasSideEffects=*/true);
924     break;
925   default:
926     report_fatal_error("unsupported architecture");
927   }
928   IRB.CreateCall(Asm, PtrLong);
929   if (Recover)
930     cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
931 }
932 
933 bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
934   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
935     return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
936            (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
937   }
938   if (isa<MemSetInst>(MI))
939     return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
940   return false;
941 }
942 
943 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
944   IRBuilder<> IRB(MI);
945   if (isa<MemTransferInst>(MI)) {
946     IRB.CreateCall(
947         isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
948         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
949          IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
950          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
951   } else if (isa<MemSetInst>(MI)) {
952     IRB.CreateCall(
953         HWAsanMemset,
954         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
955          IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
956          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
957   }
958   MI->eraseFromParent();
959 }
960 
961 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
962   Value *Addr = O.getPtr();
963 
964   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
965 
966   if (O.MaybeMask)
967     return false; // FIXME
968 
969   IRBuilder<> IRB(O.getInsn());
970   if (isPowerOf2_64(O.TypeSize) &&
971       (O.TypeSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
972       (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() ||
973        *O.Alignment >= O.TypeSize / 8)) {
974     size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize);
975     if (InstrumentWithCalls) {
976       IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
977                      IRB.CreatePointerCast(Addr, IntptrTy));
978     } else if (OutlinedChecks) {
979       instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
980     } else {
981       instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
982     }
983   } else {
984     IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite],
985                    {IRB.CreatePointerCast(Addr, IntptrTy),
986                     ConstantInt::get(IntptrTy, O.TypeSize / 8)});
987   }
988   untagPointerOperand(O.getInsn(), Addr);
989 
990   return true;
991 }
992 
993 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
994                                    size_t Size) {
995   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
996   if (!UseShortGranules)
997     Size = AlignedSize;
998 
999   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
1000   if (InstrumentWithCalls) {
1001     IRB.CreateCall(HwasanTagMemoryFunc,
1002                    {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
1003                     ConstantInt::get(IntptrTy, AlignedSize)});
1004   } else {
1005     size_t ShadowSize = Size >> Mapping.Scale;
1006     Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
1007     // If this memset is not inlined, it will be intercepted in the hwasan
1008     // runtime library. That's OK, because the interceptor skips the checks if
1009     // the address is in the shadow region.
1010     // FIXME: the interceptor is not as fast as real memset. Consider lowering
1011     // llvm.memset right here into either a sequence of stores, or a call to
1012     // hwasan_tag_memory.
1013     if (ShadowSize)
1014       IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align(1));
1015     if (Size != AlignedSize) {
1016       const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
1017       IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder),
1018                       IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1019       IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
1020                                    Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
1021                                    AlignedSize - 1));
1022     }
1023   }
1024 }
1025 
1026 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1027   if (TargetTriple.getArch() == Triple::x86_64)
1028     return AllocaNo & TagMaskByte;
1029 
1030   // A list of 8-bit numbers that have at most one run of non-zero bits.
1031   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1032   // masks.
1033   // The list does not include the value 255, which is used for UAR.
1034   //
1035   // Because we are more likely to use earlier elements of this list than later
1036   // ones, it is sorted in increasing order of probability of collision with a
1037   // mask allocated (temporally) nearby. The program that generated this list
1038   // can be found at:
1039   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1040   static unsigned FastMasks[] = {0,  128, 64,  192, 32,  96,  224, 112, 240,
1041                                  48, 16,  120, 248, 56,  24,  8,   124, 252,
1042                                  60, 28,  12,  4,   126, 254, 62,  30,  14,
1043                                  6,  2,   127, 63,  31,  15,  7,   3,   1};
1044   return FastMasks[AllocaNo % std::size(FastMasks)];
1045 }
1046 
1047 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1048   if (TargetTriple.getArch() == Triple::x86_64) {
1049     Constant *TagMask = ConstantInt::get(IntptrTy, TagMaskByte);
1050     Value *NewTag = IRB.CreateAnd(OldTag, TagMask);
1051     return NewTag;
1052   }
1053   // aarch64 uses 8-bit tags, so no mask is needed.
1054   return OldTag;
1055 }
1056 
1057 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1058   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1059 }
1060 
1061 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1062   if (ClGenerateTagsWithCalls)
1063     return getNextTagWithCall(IRB);
1064   if (StackBaseTag)
1065     return StackBaseTag;
1066   // Extract some entropy from the stack pointer for the tags.
1067   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1068   // between functions).
1069   Value *StackPointerLong = getSP(IRB);
1070   Value *StackTag =
1071       applyTagMask(IRB, IRB.CreateXor(StackPointerLong,
1072                                       IRB.CreateLShr(StackPointerLong, 20)));
1073   StackTag->setName("hwasan.stack.base.tag");
1074   return StackTag;
1075 }
1076 
1077 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1078                                         AllocaInst *AI, unsigned AllocaNo) {
1079   if (ClGenerateTagsWithCalls)
1080     return getNextTagWithCall(IRB);
1081   return IRB.CreateXor(StackTag,
1082                        ConstantInt::get(IntptrTy, retagMask(AllocaNo)));
1083 }
1084 
1085 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
1086   if (ClUARRetagToZero)
1087     return ConstantInt::get(IntptrTy, 0);
1088   if (ClGenerateTagsWithCalls)
1089     return getNextTagWithCall(IRB);
1090   return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, TagMaskByte));
1091 }
1092 
1093 // Add a tag to an address.
1094 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1095                                       Value *PtrLong, Value *Tag) {
1096   assert(!UsePageAliases);
1097   Value *TaggedPtrLong;
1098   if (CompileKernel) {
1099     // Kernel addresses have 0xFF in the most significant byte.
1100     Value *ShiftedTag =
1101         IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1102                      ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1103     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1104   } else {
1105     // Userspace can simply do OR (tag << PointerTagShift);
1106     Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1107     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1108   }
1109   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1110 }
1111 
1112 // Remove tag from an address.
1113 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1114   assert(!UsePageAliases);
1115   Value *UntaggedPtrLong;
1116   if (CompileKernel) {
1117     // Kernel addresses have 0xFF in the most significant byte.
1118     UntaggedPtrLong =
1119         IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1120                                                0xFFULL << PointerTagShift));
1121   } else {
1122     // Userspace addresses have 0x00.
1123     UntaggedPtrLong =
1124         IRB.CreateAnd(PtrLong, ConstantInt::get(PtrLong->getType(),
1125                                                 ~(0xFFULL << PointerTagShift)));
1126   }
1127   return UntaggedPtrLong;
1128 }
1129 
1130 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
1131   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1132   if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
1133     // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1134     // in Bionic's libc/private/bionic_tls.h.
1135     Function *ThreadPointerFunc =
1136         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
1137     Value *SlotPtr = IRB.CreatePointerCast(
1138         IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
1139                                IRB.CreateCall(ThreadPointerFunc), 0x30),
1140         Ty->getPointerTo(0));
1141     return SlotPtr;
1142   }
1143   if (ThreadPtrGlobal)
1144     return ThreadPtrGlobal;
1145 
1146   return nullptr;
1147 }
1148 
1149 Value *HWAddressSanitizer::getPC(IRBuilder<> &IRB) {
1150   if (TargetTriple.getArch() == Triple::aarch64)
1151     return readRegister(IRB, "pc");
1152   return IRB.CreatePtrToInt(IRB.GetInsertBlock()->getParent(), IntptrTy);
1153 }
1154 
1155 Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) {
1156   if (!CachedSP) {
1157     // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
1158     // first).
1159     Function *F = IRB.GetInsertBlock()->getParent();
1160     Module *M = F->getParent();
1161     auto *GetStackPointerFn = Intrinsic::getDeclaration(
1162         M, Intrinsic::frameaddress,
1163         IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1164     CachedSP = IRB.CreatePtrToInt(
1165         IRB.CreateCall(GetStackPointerFn,
1166                        {Constant::getNullValue(IRB.getInt32Ty())}),
1167         IntptrTy);
1168   }
1169   return CachedSP;
1170 }
1171 
1172 Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
1173   // Prepare ring buffer data.
1174   Value *PC = getPC(IRB);
1175   Value *SP = getSP(IRB);
1176 
1177   // Mix SP and PC.
1178   // Assumptions:
1179   // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1180   // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
1181   // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
1182   //       0xSSSSPPPPPPPPPPPP
1183   SP = IRB.CreateShl(SP, 44);
1184   return IRB.CreateOr(PC, SP);
1185 }
1186 
1187 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1188   if (!Mapping.InTls)
1189     ShadowBase = getShadowNonTls(IRB);
1190   else if (!WithFrameRecord && TargetTriple.isAndroid())
1191     ShadowBase = getDynamicShadowIfunc(IRB);
1192 
1193   if (!WithFrameRecord && ShadowBase)
1194     return;
1195 
1196   Value *SlotPtr = nullptr;
1197   Value *ThreadLong = nullptr;
1198   Value *ThreadLongMaybeUntagged = nullptr;
1199 
1200   auto getThreadLongMaybeUntagged = [&]() {
1201     if (!SlotPtr)
1202       SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
1203     if (!ThreadLong)
1204       ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1205     // Extract the address field from ThreadLong. Unnecessary on AArch64 with
1206     // TBI.
1207     return TargetTriple.isAArch64() ? ThreadLong
1208                                     : untagPointer(IRB, ThreadLong);
1209   };
1210 
1211   if (WithFrameRecord) {
1212     switch (ClRecordStackHistory) {
1213     case libcall: {
1214       // Emit a runtime call into hwasan rather than emitting instructions for
1215       // recording stack history.
1216       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1217       IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
1218       break;
1219     }
1220     case instr: {
1221       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1222 
1223       StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1224 
1225       // Store data to ring buffer.
1226       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1227       Value *RecordPtr = IRB.CreateIntToPtr(ThreadLongMaybeUntagged,
1228                                             IntptrTy->getPointerTo(0));
1229       IRB.CreateStore(FrameRecordInfo, RecordPtr);
1230 
1231       // Update the ring buffer. Top byte of ThreadLong defines the size of the
1232       // buffer in pages, it must be a power of two, and the start of the buffer
1233       // must be aligned by twice that much. Therefore wrap around of the ring
1234       // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1235       // The use of AShr instead of LShr is due to
1236       //   https://bugs.llvm.org/show_bug.cgi?id=39030
1237       // Runtime library makes sure not to use the highest bit.
1238       Value *WrapMask = IRB.CreateXor(
1239           IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1240           ConstantInt::get(IntptrTy, (uint64_t)-1));
1241       Value *ThreadLongNew = IRB.CreateAnd(
1242           IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1243       IRB.CreateStore(ThreadLongNew, SlotPtr);
1244       break;
1245     }
1246     case none: {
1247       llvm_unreachable(
1248           "A stack history recording mode should've been selected.");
1249     }
1250     }
1251   }
1252 
1253   if (!ShadowBase) {
1254     if (!ThreadLongMaybeUntagged)
1255       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1256 
1257     // Get shadow base address by aligning RecordPtr up.
1258     // Note: this is not correct if the pointer is already aligned.
1259     // Runtime library will make sure this never happens.
1260     ShadowBase = IRB.CreateAdd(
1261         IRB.CreateOr(
1262             ThreadLongMaybeUntagged,
1263             ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1264         ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1265     ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy);
1266   }
1267 }
1268 
1269 Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
1270   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1271   Function *ReadRegister =
1272       Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
1273   MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
1274   Value *Args[] = {MetadataAsValue::get(*C, MD)};
1275   return IRB.CreateCall(ReadRegister, Args);
1276 }
1277 
1278 bool HWAddressSanitizer::instrumentLandingPads(
1279     SmallVectorImpl<Instruction *> &LandingPadVec) {
1280   for (auto *LP : LandingPadVec) {
1281     IRBuilder<> IRB(LP->getNextNode());
1282     IRB.CreateCall(
1283         HWAsanHandleVfork,
1284         {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
1285                                                                       : "sp")});
1286   }
1287   return true;
1288 }
1289 
1290 static bool isLifetimeIntrinsic(Value *V) {
1291   auto *II = dyn_cast<IntrinsicInst>(V);
1292   return II && II->isLifetimeStartOrEnd();
1293 }
1294 
1295 bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
1296                                          Value *StackTag,
1297                                          const DominatorTree &DT,
1298                                          const PostDominatorTree &PDT,
1299                                          const LoopInfo &LI) {
1300   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1301   // alloca addresses using that. Unfortunately, offsets are not known yet
1302   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1303   // temp, shift-OR it into each alloca address and xor with the retag mask.
1304   // This generates one extra instruction per alloca use.
1305   unsigned int I = 0;
1306 
1307   for (auto &KV : SInfo.AllocasToInstrument) {
1308     auto N = I++;
1309     auto *AI = KV.first;
1310     memtag::AllocaInfo &Info = KV.second;
1311     IRBuilder<> IRB(AI->getNextNode());
1312 
1313     // Replace uses of the alloca with tagged address.
1314     Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
1315     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1316     Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
1317     std::string Name =
1318         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1319     Replacement->setName(Name + ".hwasan");
1320 
1321     size_t Size = memtag::getAllocaSizeInBytes(*AI);
1322     size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1323 
1324     Value *AICast = IRB.CreatePointerCast(AI, Int8PtrTy);
1325 
1326     auto HandleLifetime = [&](IntrinsicInst *II) {
1327       // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1328       // set of assumptions we need to make about the lifetime. Without this we
1329       // would need to ensure that we can track the lifetime pointer to a
1330       // constant offset from the alloca, and would still need to change the
1331       // size to include the extra alignment we use for the untagging to make
1332       // the size consistent.
1333       //
1334       // The check for standard lifetime below makes sure that we have exactly
1335       // one set of start / end in any execution (i.e. the ends are not
1336       // reachable from each other), so this will not cause any problems.
1337       II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
1338       II->setArgOperand(1, AICast);
1339     };
1340     llvm::for_each(Info.LifetimeStart, HandleLifetime);
1341     llvm::for_each(Info.LifetimeEnd, HandleLifetime);
1342 
1343     AI->replaceUsesWithIf(Replacement, [AICast, AILong](Use &U) {
1344       auto *User = U.getUser();
1345       return User != AILong && User != AICast && !isLifetimeIntrinsic(User);
1346     });
1347 
1348     for (auto *DDI : Info.DbgVariableIntrinsics) {
1349       // Prepend "tag_offset, N" to the dwarf expression.
1350       // Tag offset logically applies to the alloca pointer, and it makes sense
1351       // to put it at the beginning of the expression.
1352       SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
1353                                          retagMask(N)};
1354       for (size_t LocNo = 0; LocNo < DDI->getNumVariableLocationOps(); ++LocNo)
1355         if (DDI->getVariableLocationOp(LocNo) == AI)
1356           DDI->setExpression(DIExpression::appendOpsToArg(DDI->getExpression(),
1357                                                           NewOps, LocNo));
1358     }
1359 
1360     auto TagEnd = [&](Instruction *Node) {
1361       IRB.SetInsertPoint(Node);
1362       Value *UARTag = getUARTag(IRB, StackTag);
1363       // When untagging, use the `AlignedSize` because we need to set the tags
1364       // for the entire alloca to zero. If we used `Size` here, we would
1365       // keep the last granule tagged, and store zero in the last byte of the
1366       // last granule, due to how short granules are implemented.
1367       tagAlloca(IRB, AI, UARTag, AlignedSize);
1368     };
1369     // Calls to functions that may return twice (e.g. setjmp) confuse the
1370     // postdominator analysis, and will leave us to keep memory tagged after
1371     // function return. Work around this by always untagging at every return
1372     // statement if return_twice functions are called.
1373     bool StandardLifetime =
1374         SInfo.UnrecognizedLifetimes.empty() &&
1375         memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT,
1376                                    &LI, ClMaxLifetimes) &&
1377         !SInfo.CallsReturnTwice;
1378     if (DetectUseAfterScope && StandardLifetime) {
1379       IntrinsicInst *Start = Info.LifetimeStart[0];
1380       IRB.SetInsertPoint(Start->getNextNode());
1381       tagAlloca(IRB, AI, Tag, Size);
1382       if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd,
1383                                         SInfo.RetVec, TagEnd)) {
1384         for (auto *End : Info.LifetimeEnd)
1385           End->eraseFromParent();
1386       }
1387     } else {
1388       tagAlloca(IRB, AI, Tag, Size);
1389       for (auto *RI : SInfo.RetVec)
1390         TagEnd(RI);
1391       // We inserted tagging outside of the lifetimes, so we have to remove
1392       // them.
1393       for (auto &II : Info.LifetimeStart)
1394         II->eraseFromParent();
1395       for (auto &II : Info.LifetimeEnd)
1396         II->eraseFromParent();
1397     }
1398     memtag::alignAndPadAlloca(Info, Mapping.getObjectAlignment());
1399   }
1400   for (auto &I : SInfo.UnrecognizedLifetimes)
1401     I->eraseFromParent();
1402   return true;
1403 }
1404 
1405 bool HWAddressSanitizer::sanitizeFunction(Function &F,
1406                                           FunctionAnalysisManager &FAM) {
1407   if (&F == HwasanCtorFunction)
1408     return false;
1409 
1410   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1411     return false;
1412 
1413   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1414 
1415   SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1416   SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1417   SmallVector<Instruction *, 8> LandingPadVec;
1418 
1419   memtag::StackInfoBuilder SIB(SSI);
1420   for (auto &Inst : instructions(F)) {
1421     if (InstrumentStack) {
1422       SIB.visit(Inst);
1423     }
1424 
1425     if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1426       LandingPadVec.push_back(&Inst);
1427 
1428     getInterestingMemoryOperands(&Inst, OperandsToInstrument);
1429 
1430     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1431       if (!ignoreMemIntrinsic(MI))
1432         IntrinToInstrument.push_back(MI);
1433   }
1434 
1435   memtag::StackInfo &SInfo = SIB.get();
1436 
1437   initializeCallbacks(*F.getParent());
1438 
1439   bool Changed = false;
1440 
1441   if (!LandingPadVec.empty())
1442     Changed |= instrumentLandingPads(LandingPadVec);
1443 
1444   if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1445       F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1446     // __hwasan_personality_thunk is a no-op for functions without an
1447     // instrumented stack, so we can drop it.
1448     F.setPersonalityFn(nullptr);
1449     Changed = true;
1450   }
1451 
1452   if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1453       IntrinToInstrument.empty())
1454     return Changed;
1455 
1456   assert(!ShadowBase);
1457 
1458   Instruction *InsertPt = &*F.getEntryBlock().begin();
1459   IRBuilder<> EntryIRB(InsertPt);
1460   emitPrologue(EntryIRB,
1461                /*WithFrameRecord*/ ClRecordStackHistory != none &&
1462                    Mapping.WithFrameRecord &&
1463                    !SInfo.AllocasToInstrument.empty());
1464 
1465   if (!SInfo.AllocasToInstrument.empty()) {
1466     const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1467     const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
1468     const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
1469     Value *StackTag =
1470         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
1471     instrumentStack(SInfo, StackTag, DT, PDT, LI);
1472   }
1473 
1474   // If we split the entry block, move any allocas that were originally in the
1475   // entry block back into the entry block so that they aren't treated as
1476   // dynamic allocas.
1477   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1478     InsertPt = &*F.getEntryBlock().begin();
1479     for (Instruction &I :
1480          llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1481       if (auto *AI = dyn_cast<AllocaInst>(&I))
1482         if (isa<ConstantInt>(AI->getArraySize()))
1483           I.moveBefore(InsertPt);
1484     }
1485   }
1486 
1487   for (auto &Operand : OperandsToInstrument)
1488     instrumentMemAccess(Operand);
1489 
1490   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1491     for (auto *Inst : IntrinToInstrument)
1492       instrumentMemIntrinsic(Inst);
1493   }
1494 
1495   ShadowBase = nullptr;
1496   StackBaseTag = nullptr;
1497   CachedSP = nullptr;
1498 
1499   return true;
1500 }
1501 
1502 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1503   assert(!UsePageAliases);
1504   Constant *Initializer = GV->getInitializer();
1505   uint64_t SizeInBytes =
1506       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1507   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1508   if (SizeInBytes != NewSize) {
1509     // Pad the initializer out to the next multiple of 16 bytes and add the
1510     // required short granule tag.
1511     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1512     Init.back() = Tag;
1513     Constant *Padding = ConstantDataArray::get(*C, Init);
1514     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1515   }
1516 
1517   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1518                                    GlobalValue::ExternalLinkage, Initializer,
1519                                    GV->getName() + ".hwasan");
1520   NewGV->copyAttributesFrom(GV);
1521   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1522   NewGV->copyMetadata(GV, 0);
1523   NewGV->setAlignment(
1524       std::max(GV->getAlign().valueOrOne(), Mapping.getObjectAlignment()));
1525 
1526   // It is invalid to ICF two globals that have different tags. In the case
1527   // where the size of the global is a multiple of the tag granularity the
1528   // contents of the globals may be the same but the tags (i.e. symbol values)
1529   // may be different, and the symbols are not considered during ICF. In the
1530   // case where the size is not a multiple of the granularity, the short granule
1531   // tags would discriminate two globals with different tags, but there would
1532   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1533   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1534   // granule tag in the last byte.
1535   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1536 
1537   // Descriptor format (assuming little-endian):
1538   // bytes 0-3: relative address of global
1539   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1540   // it isn't, we create multiple descriptors)
1541   // byte 7: tag
1542   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1543   const uint64_t MaxDescriptorSize = 0xfffff0;
1544   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1545        DescriptorPos += MaxDescriptorSize) {
1546     auto *Descriptor =
1547         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1548                            nullptr, GV->getName() + ".hwasan.descriptor");
1549     auto *GVRelPtr = ConstantExpr::getTrunc(
1550         ConstantExpr::getAdd(
1551             ConstantExpr::getSub(
1552                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1553                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1554             ConstantInt::get(Int64Ty, DescriptorPos)),
1555         Int32Ty);
1556     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1557     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1558     Descriptor->setComdat(NewGV->getComdat());
1559     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1560     Descriptor->setSection("hwasan_globals");
1561     Descriptor->setMetadata(LLVMContext::MD_associated,
1562                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1563     appendToCompilerUsed(M, Descriptor);
1564   }
1565 
1566   Constant *Aliasee = ConstantExpr::getIntToPtr(
1567       ConstantExpr::getAdd(
1568           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1569           ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1570       GV->getType());
1571   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1572                                     GV->getLinkage(), "", Aliasee, &M);
1573   Alias->setVisibility(GV->getVisibility());
1574   Alias->takeName(GV);
1575   GV->replaceAllUsesWith(Alias);
1576   GV->eraseFromParent();
1577 }
1578 
1579 void HWAddressSanitizer::instrumentGlobals() {
1580   std::vector<GlobalVariable *> Globals;
1581   for (GlobalVariable &GV : M.globals()) {
1582     if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
1583       continue;
1584 
1585     if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
1586         GV.isThreadLocal())
1587       continue;
1588 
1589     // Common symbols can't have aliases point to them, so they can't be tagged.
1590     if (GV.hasCommonLinkage())
1591       continue;
1592 
1593     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1594     // which would be broken both by adding tags and potentially by the extra
1595     // padding/alignment that we insert.
1596     if (GV.hasSection())
1597       continue;
1598 
1599     Globals.push_back(&GV);
1600   }
1601 
1602   MD5 Hasher;
1603   Hasher.update(M.getSourceFileName());
1604   MD5::MD5Result Hash;
1605   Hasher.final(Hash);
1606   uint8_t Tag = Hash[0];
1607 
1608   for (GlobalVariable *GV : Globals) {
1609     Tag &= TagMaskByte;
1610     // Skip tag 0 in order to avoid collisions with untagged memory.
1611     if (Tag == 0)
1612       Tag = 1;
1613     instrumentGlobal(GV, Tag++);
1614   }
1615 }
1616 
1617 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1618   // We need to untag stack frames as we unwind past them. That is the job of
1619   // the personality function wrapper, which either wraps an existing
1620   // personality function or acts as a personality function on its own. Each
1621   // function that has a personality function or that can be unwound past has
1622   // its personality function changed to a thunk that calls the personality
1623   // function wrapper in the runtime.
1624   MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1625   for (Function &F : M) {
1626     if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1627       continue;
1628 
1629     if (F.hasPersonalityFn()) {
1630       PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1631     } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1632       PersonalityFns[nullptr].push_back(&F);
1633     }
1634   }
1635 
1636   if (PersonalityFns.empty())
1637     return;
1638 
1639   FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1640       "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
1641       Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
1642   FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1643   FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1644 
1645   for (auto &P : PersonalityFns) {
1646     std::string ThunkName = kHwasanPersonalityThunkName;
1647     if (P.first)
1648       ThunkName += ("." + P.first->getName()).str();
1649     FunctionType *ThunkFnTy = FunctionType::get(
1650         Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
1651     bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1652                                cast<GlobalValue>(P.first)->hasLocalLinkage());
1653     auto *ThunkFn = Function::Create(ThunkFnTy,
1654                                      IsLocal ? GlobalValue::InternalLinkage
1655                                              : GlobalValue::LinkOnceODRLinkage,
1656                                      ThunkName, &M);
1657     if (!IsLocal) {
1658       ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1659       ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1660     }
1661 
1662     auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1663     IRBuilder<> IRB(BB);
1664     CallInst *WrapperCall = IRB.CreateCall(
1665         HwasanPersonalityWrapper,
1666         {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1667          ThunkFn->getArg(3), ThunkFn->getArg(4),
1668          P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
1669                  : Constant::getNullValue(Int8PtrTy),
1670          IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
1671          IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
1672     WrapperCall->setTailCall();
1673     IRB.CreateRet(WrapperCall);
1674 
1675     for (Function *F : P.second)
1676       F->setPersonalityFn(ThunkFn);
1677   }
1678 }
1679 
1680 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1681                                              bool InstrumentWithCalls) {
1682   Scale = kDefaultShadowScale;
1683   if (TargetTriple.isOSFuchsia()) {
1684     // Fuchsia is always PIE, which means that the beginning of the address
1685     // space is always available.
1686     InGlobal = false;
1687     InTls = false;
1688     Offset = 0;
1689     WithFrameRecord = true;
1690   } else if (ClMappingOffset.getNumOccurrences() > 0) {
1691     InGlobal = false;
1692     InTls = false;
1693     Offset = ClMappingOffset;
1694     WithFrameRecord = false;
1695   } else if (ClEnableKhwasan || InstrumentWithCalls) {
1696     InGlobal = false;
1697     InTls = false;
1698     Offset = 0;
1699     WithFrameRecord = false;
1700   } else if (ClWithIfunc) {
1701     InGlobal = true;
1702     InTls = false;
1703     Offset = kDynamicShadowSentinel;
1704     WithFrameRecord = false;
1705   } else if (ClWithTls) {
1706     InGlobal = false;
1707     InTls = true;
1708     Offset = kDynamicShadowSentinel;
1709     WithFrameRecord = true;
1710   } else {
1711     InGlobal = false;
1712     InTls = false;
1713     Offset = kDynamicShadowSentinel;
1714     WithFrameRecord = false;
1715   }
1716 }
1717