xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===- HWAddressSanitizer.cpp - memory access error detector --------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  /// \file
10  /// This file is a part of HWAddressSanitizer, an address basic correctness
11  /// checker based on tagged addressing.
12  //===----------------------------------------------------------------------===//
13  
14  #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15  #include "llvm/ADT/MapVector.h"
16  #include "llvm/ADT/STLExtras.h"
17  #include "llvm/ADT/SmallVector.h"
18  #include "llvm/ADT/Statistic.h"
19  #include "llvm/ADT/StringExtras.h"
20  #include "llvm/ADT/StringRef.h"
21  #include "llvm/Analysis/BlockFrequencyInfo.h"
22  #include "llvm/Analysis/DomTreeUpdater.h"
23  #include "llvm/Analysis/GlobalsModRef.h"
24  #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25  #include "llvm/Analysis/PostDominators.h"
26  #include "llvm/Analysis/ProfileSummaryInfo.h"
27  #include "llvm/Analysis/StackSafetyAnalysis.h"
28  #include "llvm/Analysis/TargetLibraryInfo.h"
29  #include "llvm/Analysis/ValueTracking.h"
30  #include "llvm/BinaryFormat/Dwarf.h"
31  #include "llvm/BinaryFormat/ELF.h"
32  #include "llvm/IR/Attributes.h"
33  #include "llvm/IR/BasicBlock.h"
34  #include "llvm/IR/Constant.h"
35  #include "llvm/IR/Constants.h"
36  #include "llvm/IR/DataLayout.h"
37  #include "llvm/IR/DebugInfoMetadata.h"
38  #include "llvm/IR/DerivedTypes.h"
39  #include "llvm/IR/Dominators.h"
40  #include "llvm/IR/Function.h"
41  #include "llvm/IR/IRBuilder.h"
42  #include "llvm/IR/InlineAsm.h"
43  #include "llvm/IR/InstIterator.h"
44  #include "llvm/IR/Instruction.h"
45  #include "llvm/IR/Instructions.h"
46  #include "llvm/IR/IntrinsicInst.h"
47  #include "llvm/IR/Intrinsics.h"
48  #include "llvm/IR/LLVMContext.h"
49  #include "llvm/IR/MDBuilder.h"
50  #include "llvm/IR/Module.h"
51  #include "llvm/IR/Type.h"
52  #include "llvm/IR/Value.h"
53  #include "llvm/Support/Casting.h"
54  #include "llvm/Support/CommandLine.h"
55  #include "llvm/Support/Debug.h"
56  #include "llvm/Support/MD5.h"
57  #include "llvm/Support/RandomNumberGenerator.h"
58  #include "llvm/Support/raw_ostream.h"
59  #include "llvm/TargetParser/Triple.h"
60  #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
61  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
62  #include "llvm/Transforms/Utils/Local.h"
63  #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
64  #include "llvm/Transforms/Utils/ModuleUtils.h"
65  #include "llvm/Transforms/Utils/PromoteMemToReg.h"
66  #include <optional>
67  #include <random>
68  
69  using namespace llvm;
70  
71  #define DEBUG_TYPE "hwasan"
72  
73  const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
74  const char kHwasanNoteName[] = "hwasan.note";
75  const char kHwasanInitName[] = "__hwasan_init";
76  const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
77  
78  const char kHwasanShadowMemoryDynamicAddress[] =
79      "__hwasan_shadow_memory_dynamic_address";
80  
81  // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
82  static const size_t kNumberOfAccessSizes = 5;
83  
84  static const size_t kDefaultShadowScale = 4;
85  static const uint64_t kDynamicShadowSentinel =
86      std::numeric_limits<uint64_t>::max();
87  
88  static const unsigned kShadowBaseAlignment = 32;
89  
90  static cl::opt<std::string>
91      ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
92                                   cl::desc("Prefix for memory access callbacks"),
93                                   cl::Hidden, cl::init("__hwasan_"));
94  
95  static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
96      "hwasan-kernel-mem-intrinsic-prefix",
97      cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
98      cl::init(false));
99  
100  static cl::opt<bool> ClInstrumentWithCalls(
101      "hwasan-instrument-with-calls",
102      cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
103      cl::init(false));
104  
105  static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
106                                         cl::desc("instrument read instructions"),
107                                         cl::Hidden, cl::init(true));
108  
109  static cl::opt<bool>
110      ClInstrumentWrites("hwasan-instrument-writes",
111                         cl::desc("instrument write instructions"), cl::Hidden,
112                         cl::init(true));
113  
114  static cl::opt<bool> ClInstrumentAtomics(
115      "hwasan-instrument-atomics",
116      cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
117      cl::init(true));
118  
119  static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
120                                         cl::desc("instrument byval arguments"),
121                                         cl::Hidden, cl::init(true));
122  
123  static cl::opt<bool>
124      ClRecover("hwasan-recover",
125                cl::desc("Enable recovery mode (continue-after-error)."),
126                cl::Hidden, cl::init(false));
127  
128  static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
129                                         cl::desc("instrument stack (allocas)"),
130                                         cl::Hidden, cl::init(true));
131  
132  static cl::opt<bool>
133      ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
134                       cl::Hidden, cl::desc("Use Stack Safety analysis results"),
135                       cl::Optional);
136  
137  static cl::opt<size_t> ClMaxLifetimes(
138      "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
139      cl::ReallyHidden,
140      cl::desc("How many lifetime ends to handle for a single alloca."),
141      cl::Optional);
142  
143  static cl::opt<bool>
144      ClUseAfterScope("hwasan-use-after-scope",
145                      cl::desc("detect use after scope within function"),
146                      cl::Hidden, cl::init(true));
147  
148  static cl::opt<bool> ClGenerateTagsWithCalls(
149      "hwasan-generate-tags-with-calls",
150      cl::desc("generate new tags with runtime library calls"), cl::Hidden,
151      cl::init(false));
152  
153  static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
154                                 cl::Hidden, cl::init(false));
155  
156  static cl::opt<int> ClMatchAllTag(
157      "hwasan-match-all-tag",
158      cl::desc("don't report bad accesses via pointers with this tag"),
159      cl::Hidden, cl::init(-1));
160  
161  static cl::opt<bool>
162      ClEnableKhwasan("hwasan-kernel",
163                      cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
164                      cl::Hidden, cl::init(false));
165  
166  // These flags allow to change the shadow mapping and control how shadow memory
167  // is accessed. The shadow mapping looks like:
168  //    Shadow = (Mem >> scale) + offset
169  
170  static cl::opt<uint64_t>
171      ClMappingOffset("hwasan-mapping-offset",
172                      cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
173                      cl::Hidden, cl::init(0));
174  
175  static cl::opt<bool>
176      ClWithIfunc("hwasan-with-ifunc",
177                  cl::desc("Access dynamic shadow through an ifunc global on "
178                           "platforms that support this"),
179                  cl::Hidden, cl::init(false));
180  
181  static cl::opt<bool> ClWithTls(
182      "hwasan-with-tls",
183      cl::desc("Access dynamic shadow through an thread-local pointer on "
184               "platforms that support this"),
185      cl::Hidden, cl::init(true));
186  
187  static cl::opt<int> ClHotPercentileCutoff("hwasan-percentile-cutoff-hot",
188                                            cl::desc("Hot percentile cuttoff."));
189  
190  static cl::opt<float>
191      ClRandomSkipRate("hwasan-random-rate",
192                       cl::desc("Probability value in the range [0.0, 1.0] "
193                                "to keep instrumentation of a function."));
194  
195  STATISTIC(NumTotalFuncs, "Number of total funcs");
196  STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs");
197  STATISTIC(NumNoProfileSummaryFuncs, "Number of funcs without PS");
198  
199  // Mode for selecting how to insert frame record info into the stack ring
200  // buffer.
201  enum RecordStackHistoryMode {
202    // Do not record frame record info.
203    none,
204  
205    // Insert instructions into the prologue for storing into the stack ring
206    // buffer directly.
207    instr,
208  
209    // Add a call to __hwasan_add_frame_record in the runtime.
210    libcall,
211  };
212  
213  static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
214      "hwasan-record-stack-history",
215      cl::desc("Record stack frames with tagged allocations in a thread-local "
216               "ring buffer"),
217      cl::values(clEnumVal(none, "Do not record stack ring history"),
218                 clEnumVal(instr, "Insert instructions into the prologue for "
219                                  "storing into the stack ring buffer directly"),
220                 clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
221                                    "storing into the stack ring buffer")),
222      cl::Hidden, cl::init(instr));
223  
224  static cl::opt<bool>
225      ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
226                                cl::desc("instrument memory intrinsics"),
227                                cl::Hidden, cl::init(true));
228  
229  static cl::opt<bool>
230      ClInstrumentLandingPads("hwasan-instrument-landing-pads",
231                              cl::desc("instrument landing pads"), cl::Hidden,
232                              cl::init(false));
233  
234  static cl::opt<bool> ClUseShortGranules(
235      "hwasan-use-short-granules",
236      cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
237      cl::init(false));
238  
239  static cl::opt<bool> ClInstrumentPersonalityFunctions(
240      "hwasan-instrument-personality-functions",
241      cl::desc("instrument personality functions"), cl::Hidden);
242  
243  static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
244                                         cl::desc("inline all checks"),
245                                         cl::Hidden, cl::init(false));
246  
247  static cl::opt<bool> ClInlineFastPathChecks("hwasan-inline-fast-path-checks",
248                                              cl::desc("inline all checks"),
249                                              cl::Hidden, cl::init(false));
250  
251  // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
252  static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
253                                        cl::desc("Use page aliasing in HWASan"),
254                                        cl::Hidden, cl::init(false));
255  
256  namespace {
257  
optOr(cl::opt<T> & Opt,T Other)258  template <typename T> T optOr(cl::opt<T> &Opt, T Other) {
259    return Opt.getNumOccurrences() ? Opt : Other;
260  }
261  
shouldUsePageAliases(const Triple & TargetTriple)262  bool shouldUsePageAliases(const Triple &TargetTriple) {
263    return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
264  }
265  
shouldInstrumentStack(const Triple & TargetTriple)266  bool shouldInstrumentStack(const Triple &TargetTriple) {
267    return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
268  }
269  
shouldInstrumentWithCalls(const Triple & TargetTriple)270  bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
271    return optOr(ClInstrumentWithCalls, TargetTriple.getArch() == Triple::x86_64);
272  }
273  
mightUseStackSafetyAnalysis(bool DisableOptimization)274  bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
275    return optOr(ClUseStackSafety, !DisableOptimization);
276  }
277  
shouldUseStackSafetyAnalysis(const Triple & TargetTriple,bool DisableOptimization)278  bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
279                                    bool DisableOptimization) {
280    return shouldInstrumentStack(TargetTriple) &&
281           mightUseStackSafetyAnalysis(DisableOptimization);
282  }
283  
shouldDetectUseAfterScope(const Triple & TargetTriple)284  bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
285    return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
286  }
287  
288  /// An instrumentation pass implementing detection of addressability bugs
289  /// using tagged pointers.
290  class HWAddressSanitizer {
291  public:
HWAddressSanitizer(Module & M,bool CompileKernel,bool Recover,const StackSafetyGlobalInfo * SSI)292    HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
293                       const StackSafetyGlobalInfo *SSI)
294        : M(M), SSI(SSI) {
295      this->Recover = optOr(ClRecover, Recover);
296      this->CompileKernel = optOr(ClEnableKhwasan, CompileKernel);
297      this->Rng = ClRandomSkipRate.getNumOccurrences() ? M.createRNG(DEBUG_TYPE)
298                                                       : nullptr;
299  
300      initializeModule();
301    }
302  
303    void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
304  
305  private:
306    struct ShadowTagCheckInfo {
307      Instruction *TagMismatchTerm = nullptr;
308      Value *PtrLong = nullptr;
309      Value *AddrLong = nullptr;
310      Value *PtrTag = nullptr;
311      Value *MemTag = nullptr;
312    };
313  
314    bool selectiveInstrumentationShouldSkip(Function &F,
315                                            FunctionAnalysisManager &FAM) const;
316    void initializeModule();
317    void createHwasanCtorComdat();
318  
319    void initializeCallbacks(Module &M);
320  
321    Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
322  
323    Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
324    Value *getShadowNonTls(IRBuilder<> &IRB);
325  
326    void untagPointerOperand(Instruction *I, Value *Addr);
327    Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
328  
329    int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
330    ShadowTagCheckInfo insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
331                                            DomTreeUpdater &DTU, LoopInfo *LI);
332    void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
333                                    unsigned AccessSizeIndex,
334                                    Instruction *InsertBefore,
335                                    DomTreeUpdater &DTU, LoopInfo *LI);
336    void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
337                                   unsigned AccessSizeIndex,
338                                   Instruction *InsertBefore, DomTreeUpdater &DTU,
339                                   LoopInfo *LI);
340    bool ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, MemIntrinsic *MI);
341    void instrumentMemIntrinsic(MemIntrinsic *MI);
342    bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
343                             LoopInfo *LI);
344    bool ignoreAccessWithoutRemark(Instruction *Inst, Value *Ptr);
345    bool ignoreAccess(OptimizationRemarkEmitter &ORE, Instruction *Inst,
346                      Value *Ptr);
347  
348    void getInterestingMemoryOperands(
349        OptimizationRemarkEmitter &ORE, Instruction *I,
350        const TargetLibraryInfo &TLI,
351        SmallVectorImpl<InterestingMemoryOperand> &Interesting);
352  
353    void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
354    Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
355    Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
356    bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
357                         const DominatorTree &DT, const PostDominatorTree &PDT,
358                         const LoopInfo &LI);
359    bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
360    Value *getNextTagWithCall(IRBuilder<> &IRB);
361    Value *getStackBaseTag(IRBuilder<> &IRB);
362    Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo);
363    Value *getUARTag(IRBuilder<> &IRB);
364  
365    Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB);
366    Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
367    unsigned retagMask(unsigned AllocaNo);
368  
369    void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
370  
371    void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
372    void instrumentGlobals();
373  
374    Value *getCachedFP(IRBuilder<> &IRB);
375    Value *getFrameRecordInfo(IRBuilder<> &IRB);
376  
377    void instrumentPersonalityFunctions();
378  
379    LLVMContext *C;
380    Module &M;
381    const StackSafetyGlobalInfo *SSI;
382    Triple TargetTriple;
383    std::unique_ptr<RandomNumberGenerator> Rng;
384  
385    /// This struct defines the shadow mapping using the rule:
386    ///   shadow = (mem >> Scale) + Offset.
387    /// If InGlobal is true, then
388    ///   extern char __hwasan_shadow[];
389    ///   shadow = (mem >> Scale) + &__hwasan_shadow
390    /// If InTls is true, then
391    ///   extern char *__hwasan_tls;
392    ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
393    ///
394    /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
395    /// ring buffer for storing stack allocations on targets that support it.
396    struct ShadowMapping {
397      uint8_t Scale;
398      uint64_t Offset;
399      bool InGlobal;
400      bool InTls;
401      bool WithFrameRecord;
402  
403      void init(Triple &TargetTriple, bool InstrumentWithCalls);
getObjectAlignment__anon1b7bdbef0111::HWAddressSanitizer::ShadowMapping404      Align getObjectAlignment() const { return Align(1ULL << Scale); }
405    };
406  
407    ShadowMapping Mapping;
408  
409    Type *VoidTy = Type::getVoidTy(M.getContext());
410    Type *IntptrTy = M.getDataLayout().getIntPtrType(M.getContext());
411    PointerType *PtrTy = PointerType::getUnqual(M.getContext());
412    Type *Int8Ty = Type::getInt8Ty(M.getContext());
413    Type *Int32Ty = Type::getInt32Ty(M.getContext());
414    Type *Int64Ty = Type::getInt64Ty(M.getContext());
415  
416    bool CompileKernel;
417    bool Recover;
418    bool OutlinedChecks;
419    bool InlineFastPath;
420    bool UseShortGranules;
421    bool InstrumentLandingPads;
422    bool InstrumentWithCalls;
423    bool InstrumentStack;
424    bool InstrumentGlobals;
425    bool DetectUseAfterScope;
426    bool UsePageAliases;
427    bool UseMatchAllCallback;
428  
429    std::optional<uint8_t> MatchAllTag;
430  
431    unsigned PointerTagShift;
432    uint64_t TagMaskByte;
433  
434    Function *HwasanCtorFunction;
435  
436    FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
437    FunctionCallee HwasanMemoryAccessCallbackSized[2];
438  
439    FunctionCallee HwasanMemmove, HwasanMemcpy, HwasanMemset;
440    FunctionCallee HwasanHandleVfork;
441  
442    FunctionCallee HwasanTagMemoryFunc;
443    FunctionCallee HwasanGenerateTagFunc;
444    FunctionCallee HwasanRecordFrameRecordFunc;
445  
446    Constant *ShadowGlobal;
447  
448    Value *ShadowBase = nullptr;
449    Value *StackBaseTag = nullptr;
450    Value *CachedFP = nullptr;
451    GlobalValue *ThreadPtrGlobal = nullptr;
452  };
453  
454  } // end anonymous namespace
455  
run(Module & M,ModuleAnalysisManager & MAM)456  PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
457                                                ModuleAnalysisManager &MAM) {
458    const StackSafetyGlobalInfo *SSI = nullptr;
459    auto TargetTriple = llvm::Triple(M.getTargetTriple());
460    if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
461      SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
462  
463    HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
464    auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
465    for (Function &F : M)
466      HWASan.sanitizeFunction(F, FAM);
467  
468    PreservedAnalyses PA = PreservedAnalyses::none();
469    // DominatorTreeAnalysis, PostDominatorTreeAnalysis, and LoopAnalysis
470    // are incrementally updated throughout this pass whenever
471    // SplitBlockAndInsertIfThen is called.
472    PA.preserve<DominatorTreeAnalysis>();
473    PA.preserve<PostDominatorTreeAnalysis>();
474    PA.preserve<LoopAnalysis>();
475    // GlobalsAA is considered stateless and does not get invalidated unless
476    // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
477    // make changes that require GlobalsAA to be invalidated.
478    PA.abandon<GlobalsAA>();
479    return PA;
480  }
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)481  void HWAddressSanitizerPass::printPipeline(
482      raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
483    static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
484        OS, MapClassName2PassName);
485    OS << '<';
486    if (Options.CompileKernel)
487      OS << "kernel;";
488    if (Options.Recover)
489      OS << "recover";
490    OS << '>';
491  }
492  
createHwasanCtorComdat()493  void HWAddressSanitizer::createHwasanCtorComdat() {
494    std::tie(HwasanCtorFunction, std::ignore) =
495        getOrCreateSanitizerCtorAndInitFunctions(
496            M, kHwasanModuleCtorName, kHwasanInitName,
497            /*InitArgTypes=*/{},
498            /*InitArgs=*/{},
499            // This callback is invoked when the functions are created the first
500            // time. Hook them into the global ctors list in that case:
501            [&](Function *Ctor, FunctionCallee) {
502              Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
503              Ctor->setComdat(CtorComdat);
504              appendToGlobalCtors(M, Ctor, 0, Ctor);
505            });
506  
507    // Create a note that contains pointers to the list of global
508    // descriptors. Adding a note to the output file will cause the linker to
509    // create a PT_NOTE program header pointing to the note that we can use to
510    // find the descriptor list starting from the program headers. A function
511    // provided by the runtime initializes the shadow memory for the globals by
512    // accessing the descriptor list via the note. The dynamic loader needs to
513    // call this function whenever a library is loaded.
514    //
515    // The reason why we use a note for this instead of a more conventional
516    // approach of having a global constructor pass a descriptor list pointer to
517    // the runtime is because of an order of initialization problem. With
518    // constructors we can encounter the following problematic scenario:
519    //
520    // 1) library A depends on library B and also interposes one of B's symbols
521    // 2) B's constructors are called before A's (as required for correctness)
522    // 3) during construction, B accesses one of its "own" globals (actually
523    //    interposed by A) and triggers a HWASAN failure due to the initialization
524    //    for A not having happened yet
525    //
526    // Even without interposition it is possible to run into similar situations in
527    // cases where two libraries mutually depend on each other.
528    //
529    // We only need one note per binary, so put everything for the note in a
530    // comdat. This needs to be a comdat with an .init_array section to prevent
531    // newer versions of lld from discarding the note.
532    //
533    // Create the note even if we aren't instrumenting globals. This ensures that
534    // binaries linked from object files with both instrumented and
535    // non-instrumented globals will end up with a note, even if a comdat from an
536    // object file with non-instrumented globals is selected. The note is harmless
537    // if the runtime doesn't support it, since it will just be ignored.
538    Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
539  
540    Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
541    auto *Start =
542        new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
543                           nullptr, "__start_hwasan_globals");
544    Start->setVisibility(GlobalValue::HiddenVisibility);
545    auto *Stop =
546        new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
547                           nullptr, "__stop_hwasan_globals");
548    Stop->setVisibility(GlobalValue::HiddenVisibility);
549  
550    // Null-terminated so actually 8 bytes, which are required in order to align
551    // the note properly.
552    auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
553  
554    auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
555                                   Int32Ty, Int32Ty);
556    auto *Note =
557        new GlobalVariable(M, NoteTy, /*isConstant=*/true,
558                           GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
559    Note->setSection(".note.hwasan.globals");
560    Note->setComdat(NoteComdat);
561    Note->setAlignment(Align(4));
562  
563    // The pointers in the note need to be relative so that the note ends up being
564    // placed in rodata, which is the standard location for notes.
565    auto CreateRelPtr = [&](Constant *Ptr) {
566      return ConstantExpr::getTrunc(
567          ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
568                               ConstantExpr::getPtrToInt(Note, Int64Ty)),
569          Int32Ty);
570    };
571    Note->setInitializer(ConstantStruct::getAnon(
572        {ConstantInt::get(Int32Ty, 8),                           // n_namesz
573         ConstantInt::get(Int32Ty, 8),                           // n_descsz
574         ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
575         Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
576    appendToCompilerUsed(M, Note);
577  
578    // Create a zero-length global in hwasan_globals so that the linker will
579    // always create start and stop symbols.
580    auto *Dummy = new GlobalVariable(
581        M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
582        Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
583    Dummy->setSection("hwasan_globals");
584    Dummy->setComdat(NoteComdat);
585    Dummy->setMetadata(LLVMContext::MD_associated,
586                       MDNode::get(*C, ValueAsMetadata::get(Note)));
587    appendToCompilerUsed(M, Dummy);
588  }
589  
590  /// Module-level initialization.
591  ///
592  /// inserts a call to __hwasan_init to the module's constructor list.
initializeModule()593  void HWAddressSanitizer::initializeModule() {
594    LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
595    TargetTriple = Triple(M.getTargetTriple());
596  
597    // x86_64 currently has two modes:
598    // - Intel LAM (default)
599    // - pointer aliasing (heap only)
600    bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
601    UsePageAliases = shouldUsePageAliases(TargetTriple);
602    InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
603    InstrumentStack = shouldInstrumentStack(TargetTriple);
604    DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
605    PointerTagShift = IsX86_64 ? 57 : 56;
606    TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
607  
608    Mapping.init(TargetTriple, InstrumentWithCalls);
609  
610    C = &(M.getContext());
611    IRBuilder<> IRB(*C);
612  
613    HwasanCtorFunction = nullptr;
614  
615    // Older versions of Android do not have the required runtime support for
616    // short granules, global or personality function instrumentation. On other
617    // platforms we currently require using the latest version of the runtime.
618    bool NewRuntime =
619        !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
620  
621    UseShortGranules = optOr(ClUseShortGranules, NewRuntime);
622    OutlinedChecks = (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
623                     TargetTriple.isOSBinFormatELF() &&
624                     !optOr(ClInlineAllChecks, Recover);
625  
626    // These platforms may prefer less inlining to reduce binary size.
627    InlineFastPath = optOr(ClInlineFastPathChecks, !(TargetTriple.isAndroid() ||
628                                                     TargetTriple.isOSFuchsia()));
629  
630    if (ClMatchAllTag.getNumOccurrences()) {
631      if (ClMatchAllTag != -1) {
632        MatchAllTag = ClMatchAllTag & 0xFF;
633      }
634    } else if (CompileKernel) {
635      MatchAllTag = 0xFF;
636    }
637    UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value();
638  
639    // If we don't have personality function support, fall back to landing pads.
640    InstrumentLandingPads = optOr(ClInstrumentLandingPads, !NewRuntime);
641  
642    InstrumentGlobals =
643        !CompileKernel && !UsePageAliases && optOr(ClGlobals, NewRuntime);
644  
645    if (!CompileKernel) {
646      createHwasanCtorComdat();
647  
648      if (InstrumentGlobals)
649        instrumentGlobals();
650  
651      bool InstrumentPersonalityFunctions =
652          optOr(ClInstrumentPersonalityFunctions, NewRuntime);
653      if (InstrumentPersonalityFunctions)
654        instrumentPersonalityFunctions();
655    }
656  
657    if (!TargetTriple.isAndroid()) {
658      Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
659        auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
660                                      GlobalValue::ExternalLinkage, nullptr,
661                                      "__hwasan_tls", nullptr,
662                                      GlobalVariable::InitialExecTLSModel);
663        appendToCompilerUsed(M, GV);
664        return GV;
665      });
666      ThreadPtrGlobal = cast<GlobalVariable>(C);
667    }
668  }
669  
initializeCallbacks(Module & M)670  void HWAddressSanitizer::initializeCallbacks(Module &M) {
671    IRBuilder<> IRB(*C);
672    const std::string MatchAllStr = UseMatchAllCallback ? "_match_all" : "";
673    FunctionType *HwasanMemoryAccessCallbackSizedFnTy,
674        *HwasanMemoryAccessCallbackFnTy, *HwasanMemTransferFnTy,
675        *HwasanMemsetFnTy;
676    if (UseMatchAllCallback) {
677      HwasanMemoryAccessCallbackSizedFnTy =
678          FunctionType::get(VoidTy, {IntptrTy, IntptrTy, Int8Ty}, false);
679      HwasanMemoryAccessCallbackFnTy =
680          FunctionType::get(VoidTy, {IntptrTy, Int8Ty}, false);
681      HwasanMemTransferFnTy =
682          FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy, Int8Ty}, false);
683      HwasanMemsetFnTy =
684          FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy, Int8Ty}, false);
685    } else {
686      HwasanMemoryAccessCallbackSizedFnTy =
687          FunctionType::get(VoidTy, {IntptrTy, IntptrTy}, false);
688      HwasanMemoryAccessCallbackFnTy =
689          FunctionType::get(VoidTy, {IntptrTy}, false);
690      HwasanMemTransferFnTy =
691          FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy}, false);
692      HwasanMemsetFnTy =
693          FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy}, false);
694    }
695  
696    for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
697      const std::string TypeStr = AccessIsWrite ? "store" : "load";
698      const std::string EndingStr = Recover ? "_noabort" : "";
699  
700      HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
701          ClMemoryAccessCallbackPrefix + TypeStr + "N" + MatchAllStr + EndingStr,
702          HwasanMemoryAccessCallbackSizedFnTy);
703  
704      for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
705           AccessSizeIndex++) {
706        HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
707            M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr +
708                                      itostr(1ULL << AccessSizeIndex) +
709                                      MatchAllStr + EndingStr,
710                                  HwasanMemoryAccessCallbackFnTy);
711      }
712    }
713  
714    const std::string MemIntrinCallbackPrefix =
715        (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
716            ? std::string("")
717            : ClMemoryAccessCallbackPrefix;
718  
719    HwasanMemmove = M.getOrInsertFunction(
720        MemIntrinCallbackPrefix + "memmove" + MatchAllStr, HwasanMemTransferFnTy);
721    HwasanMemcpy = M.getOrInsertFunction(
722        MemIntrinCallbackPrefix + "memcpy" + MatchAllStr, HwasanMemTransferFnTy);
723    HwasanMemset = M.getOrInsertFunction(
724        MemIntrinCallbackPrefix + "memset" + MatchAllStr, HwasanMemsetFnTy);
725  
726    HwasanTagMemoryFunc = M.getOrInsertFunction("__hwasan_tag_memory", VoidTy,
727                                                PtrTy, Int8Ty, IntptrTy);
728    HwasanGenerateTagFunc =
729        M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
730  
731    HwasanRecordFrameRecordFunc =
732        M.getOrInsertFunction("__hwasan_add_frame_record", VoidTy, Int64Ty);
733  
734    ShadowGlobal =
735        M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(Int8Ty, 0));
736  
737    HwasanHandleVfork =
738        M.getOrInsertFunction("__hwasan_handle_vfork", VoidTy, IntptrTy);
739  }
740  
getOpaqueNoopCast(IRBuilder<> & IRB,Value * Val)741  Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
742    // An empty inline asm with input reg == output reg.
743    // An opaque no-op cast, basically.
744    // This prevents code bloat as a result of rematerializing trivial definitions
745    // such as constants or global addresses at every load and store.
746    InlineAsm *Asm =
747        InlineAsm::get(FunctionType::get(PtrTy, {Val->getType()}, false),
748                       StringRef(""), StringRef("=r,0"),
749                       /*hasSideEffects=*/false);
750    return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
751  }
752  
getDynamicShadowIfunc(IRBuilder<> & IRB)753  Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
754    return getOpaqueNoopCast(IRB, ShadowGlobal);
755  }
756  
getShadowNonTls(IRBuilder<> & IRB)757  Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
758    if (Mapping.Offset != kDynamicShadowSentinel)
759      return getOpaqueNoopCast(
760          IRB, ConstantExpr::getIntToPtr(
761                   ConstantInt::get(IntptrTy, Mapping.Offset), PtrTy));
762  
763    if (Mapping.InGlobal)
764      return getDynamicShadowIfunc(IRB);
765  
766    Value *GlobalDynamicAddress =
767        IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
768            kHwasanShadowMemoryDynamicAddress, PtrTy);
769    return IRB.CreateLoad(PtrTy, GlobalDynamicAddress);
770  }
771  
ignoreAccessWithoutRemark(Instruction * Inst,Value * Ptr)772  bool HWAddressSanitizer::ignoreAccessWithoutRemark(Instruction *Inst,
773                                                     Value *Ptr) {
774    // Do not instrument accesses from different address spaces; we cannot deal
775    // with them.
776    Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
777    if (PtrTy->getPointerAddressSpace() != 0)
778      return true;
779  
780    // Ignore swifterror addresses.
781    // swifterror memory addresses are mem2reg promoted by instruction
782    // selection. As such they cannot have regular uses like an instrumentation
783    // function and it makes no sense to track them as memory.
784    if (Ptr->isSwiftError())
785      return true;
786  
787    if (findAllocaForValue(Ptr)) {
788      if (!InstrumentStack)
789        return true;
790      if (SSI && SSI->stackAccessIsSafe(*Inst))
791        return true;
792    }
793  
794    if (isa<GlobalVariable>(getUnderlyingObject(Ptr))) {
795      if (!InstrumentGlobals)
796        return true;
797      // TODO: Optimize inbound global accesses, like Asan `instrumentMop`.
798    }
799  
800    return false;
801  }
802  
ignoreAccess(OptimizationRemarkEmitter & ORE,Instruction * Inst,Value * Ptr)803  bool HWAddressSanitizer::ignoreAccess(OptimizationRemarkEmitter &ORE,
804                                        Instruction *Inst, Value *Ptr) {
805    bool Ignored = ignoreAccessWithoutRemark(Inst, Ptr);
806    if (Ignored) {
807      ORE.emit(
808          [&]() { return OptimizationRemark(DEBUG_TYPE, "ignoreAccess", Inst); });
809    } else {
810      ORE.emit([&]() {
811        return OptimizationRemarkMissed(DEBUG_TYPE, "ignoreAccess", Inst);
812      });
813    }
814    return Ignored;
815  }
816  
getInterestingMemoryOperands(OptimizationRemarkEmitter & ORE,Instruction * I,const TargetLibraryInfo & TLI,SmallVectorImpl<InterestingMemoryOperand> & Interesting)817  void HWAddressSanitizer::getInterestingMemoryOperands(
818      OptimizationRemarkEmitter &ORE, Instruction *I,
819      const TargetLibraryInfo &TLI,
820      SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
821    // Skip memory accesses inserted by another instrumentation.
822    if (I->hasMetadata(LLVMContext::MD_nosanitize))
823      return;
824  
825    // Do not instrument the load fetching the dynamic shadow address.
826    if (ShadowBase == I)
827      return;
828  
829    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
830      if (!ClInstrumentReads || ignoreAccess(ORE, I, LI->getPointerOperand()))
831        return;
832      Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
833                               LI->getType(), LI->getAlign());
834    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
835      if (!ClInstrumentWrites || ignoreAccess(ORE, I, SI->getPointerOperand()))
836        return;
837      Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
838                               SI->getValueOperand()->getType(), SI->getAlign());
839    } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
840      if (!ClInstrumentAtomics || ignoreAccess(ORE, I, RMW->getPointerOperand()))
841        return;
842      Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
843                               RMW->getValOperand()->getType(), std::nullopt);
844    } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
845      if (!ClInstrumentAtomics || ignoreAccess(ORE, I, XCHG->getPointerOperand()))
846        return;
847      Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
848                               XCHG->getCompareOperand()->getType(),
849                               std::nullopt);
850    } else if (auto *CI = dyn_cast<CallInst>(I)) {
851      for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
852        if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
853            ignoreAccess(ORE, I, CI->getArgOperand(ArgNo)))
854          continue;
855        Type *Ty = CI->getParamByValType(ArgNo);
856        Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
857      }
858      maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
859    }
860  }
861  
getPointerOperandIndex(Instruction * I)862  static unsigned getPointerOperandIndex(Instruction *I) {
863    if (LoadInst *LI = dyn_cast<LoadInst>(I))
864      return LI->getPointerOperandIndex();
865    if (StoreInst *SI = dyn_cast<StoreInst>(I))
866      return SI->getPointerOperandIndex();
867    if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
868      return RMW->getPointerOperandIndex();
869    if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
870      return XCHG->getPointerOperandIndex();
871    report_fatal_error("Unexpected instruction");
872    return -1;
873  }
874  
TypeSizeToSizeIndex(uint32_t TypeSize)875  static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
876    size_t Res = llvm::countr_zero(TypeSize / 8);
877    assert(Res < kNumberOfAccessSizes);
878    return Res;
879  }
880  
untagPointerOperand(Instruction * I,Value * Addr)881  void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
882    if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64 ||
883        TargetTriple.isRISCV64())
884      return;
885  
886    IRBuilder<> IRB(I);
887    Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
888    Value *UntaggedPtr =
889        IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
890    I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
891  }
892  
memToShadow(Value * Mem,IRBuilder<> & IRB)893  Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
894    // Mem >> Scale
895    Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
896    if (Mapping.Offset == 0)
897      return IRB.CreateIntToPtr(Shadow, PtrTy);
898    // (Mem >> Scale) + Offset
899    return IRB.CreatePtrAdd(ShadowBase, Shadow);
900  }
901  
getAccessInfo(bool IsWrite,unsigned AccessSizeIndex)902  int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
903                                            unsigned AccessSizeIndex) {
904    return (CompileKernel << HWASanAccessInfo::CompileKernelShift) |
905           (MatchAllTag.has_value() << HWASanAccessInfo::HasMatchAllShift) |
906           (MatchAllTag.value_or(0) << HWASanAccessInfo::MatchAllShift) |
907           (Recover << HWASanAccessInfo::RecoverShift) |
908           (IsWrite << HWASanAccessInfo::IsWriteShift) |
909           (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
910  }
911  
912  HWAddressSanitizer::ShadowTagCheckInfo
insertShadowTagCheck(Value * Ptr,Instruction * InsertBefore,DomTreeUpdater & DTU,LoopInfo * LI)913  HWAddressSanitizer::insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
914                                           DomTreeUpdater &DTU, LoopInfo *LI) {
915    ShadowTagCheckInfo R;
916  
917    IRBuilder<> IRB(InsertBefore);
918  
919    R.PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
920    R.PtrTag =
921        IRB.CreateTrunc(IRB.CreateLShr(R.PtrLong, PointerTagShift), Int8Ty);
922    R.AddrLong = untagPointer(IRB, R.PtrLong);
923    Value *Shadow = memToShadow(R.AddrLong, IRB);
924    R.MemTag = IRB.CreateLoad(Int8Ty, Shadow);
925    Value *TagMismatch = IRB.CreateICmpNE(R.PtrTag, R.MemTag);
926  
927    if (MatchAllTag.has_value()) {
928      Value *TagNotIgnored = IRB.CreateICmpNE(
929          R.PtrTag, ConstantInt::get(R.PtrTag->getType(), *MatchAllTag));
930      TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
931    }
932  
933    R.TagMismatchTerm = SplitBlockAndInsertIfThen(
934        TagMismatch, InsertBefore, false,
935        MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI);
936  
937    return R;
938  }
939  
instrumentMemAccessOutline(Value * Ptr,bool IsWrite,unsigned AccessSizeIndex,Instruction * InsertBefore,DomTreeUpdater & DTU,LoopInfo * LI)940  void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
941                                                      unsigned AccessSizeIndex,
942                                                      Instruction *InsertBefore,
943                                                      DomTreeUpdater &DTU,
944                                                      LoopInfo *LI) {
945    assert(!UsePageAliases);
946    const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
947  
948    if (InlineFastPath)
949      InsertBefore =
950          insertShadowTagCheck(Ptr, InsertBefore, DTU, LI).TagMismatchTerm;
951  
952    IRBuilder<> IRB(InsertBefore);
953    Module *M = IRB.GetInsertBlock()->getParent()->getParent();
954    bool useFixedShadowIntrinsic = false;
955    // The memaccess fixed shadow intrinsic is only supported on AArch64,
956    // which allows a 16-bit immediate to be left-shifted by 32.
957    // Since kShadowBaseAlignment == 32, and Linux by default will not
958    // mmap above 48-bits, practically any valid shadow offset is
959    // representable.
960    // In particular, an offset of 4TB (1024 << 32) is representable, and
961    // ought to be good enough for anybody.
962    if (TargetTriple.isAArch64() && Mapping.Offset != kDynamicShadowSentinel) {
963      uint16_t offset_shifted = Mapping.Offset >> 32;
964      useFixedShadowIntrinsic = (uint64_t)offset_shifted << 32 == Mapping.Offset;
965    }
966  
967    if (useFixedShadowIntrinsic)
968      IRB.CreateCall(
969          Intrinsic::getDeclaration(
970              M, UseShortGranules
971                     ? Intrinsic::hwasan_check_memaccess_shortgranules_fixedshadow
972                     : Intrinsic::hwasan_check_memaccess_fixedshadow),
973          {Ptr, ConstantInt::get(Int32Ty, AccessInfo),
974           ConstantInt::get(Int64Ty, Mapping.Offset)});
975    else
976      IRB.CreateCall(Intrinsic::getDeclaration(
977                         M, UseShortGranules
978                                ? Intrinsic::hwasan_check_memaccess_shortgranules
979                                : Intrinsic::hwasan_check_memaccess),
980                     {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
981  }
982  
instrumentMemAccessInline(Value * Ptr,bool IsWrite,unsigned AccessSizeIndex,Instruction * InsertBefore,DomTreeUpdater & DTU,LoopInfo * LI)983  void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
984                                                     unsigned AccessSizeIndex,
985                                                     Instruction *InsertBefore,
986                                                     DomTreeUpdater &DTU,
987                                                     LoopInfo *LI) {
988    assert(!UsePageAliases);
989    const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
990  
991    ShadowTagCheckInfo TCI = insertShadowTagCheck(Ptr, InsertBefore, DTU, LI);
992  
993    IRBuilder<> IRB(TCI.TagMismatchTerm);
994    Value *OutOfShortGranuleTagRange =
995        IRB.CreateICmpUGT(TCI.MemTag, ConstantInt::get(Int8Ty, 15));
996    Instruction *CheckFailTerm = SplitBlockAndInsertIfThen(
997        OutOfShortGranuleTagRange, TCI.TagMismatchTerm, !Recover,
998        MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI);
999  
1000    IRB.SetInsertPoint(TCI.TagMismatchTerm);
1001    Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(TCI.PtrLong, 15), Int8Ty);
1002    PtrLowBits = IRB.CreateAdd(
1003        PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
1004    Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, TCI.MemTag);
1005    SplitBlockAndInsertIfThen(PtrLowBitsOOB, TCI.TagMismatchTerm, false,
1006                              MDBuilder(*C).createUnlikelyBranchWeights(), &DTU,
1007                              LI, CheckFailTerm->getParent());
1008  
1009    IRB.SetInsertPoint(TCI.TagMismatchTerm);
1010    Value *InlineTagAddr = IRB.CreateOr(TCI.AddrLong, 15);
1011    InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, PtrTy);
1012    Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
1013    Value *InlineTagMismatch = IRB.CreateICmpNE(TCI.PtrTag, InlineTag);
1014    SplitBlockAndInsertIfThen(InlineTagMismatch, TCI.TagMismatchTerm, false,
1015                              MDBuilder(*C).createUnlikelyBranchWeights(), &DTU,
1016                              LI, CheckFailTerm->getParent());
1017  
1018    IRB.SetInsertPoint(CheckFailTerm);
1019    InlineAsm *Asm;
1020    switch (TargetTriple.getArch()) {
1021    case Triple::x86_64:
1022      // The signal handler will find the data address in rdi.
1023      Asm = InlineAsm::get(
1024          FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1025          "int3\nnopl " +
1026              itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
1027              "(%rax)",
1028          "{rdi}",
1029          /*hasSideEffects=*/true);
1030      break;
1031    case Triple::aarch64:
1032    case Triple::aarch64_be:
1033      // The signal handler will find the data address in x0.
1034      Asm = InlineAsm::get(
1035          FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1036          "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1037          "{x0}",
1038          /*hasSideEffects=*/true);
1039      break;
1040    case Triple::riscv64:
1041      // The signal handler will find the data address in x10.
1042      Asm = InlineAsm::get(
1043          FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1044          "ebreak\naddiw x0, x11, " +
1045              itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1046          "{x10}",
1047          /*hasSideEffects=*/true);
1048      break;
1049    default:
1050      report_fatal_error("unsupported architecture");
1051    }
1052    IRB.CreateCall(Asm, TCI.PtrLong);
1053    if (Recover)
1054      cast<BranchInst>(CheckFailTerm)
1055          ->setSuccessor(0, TCI.TagMismatchTerm->getParent());
1056  }
1057  
ignoreMemIntrinsic(OptimizationRemarkEmitter & ORE,MemIntrinsic * MI)1058  bool HWAddressSanitizer::ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE,
1059                                              MemIntrinsic *MI) {
1060    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1061      return (!ClInstrumentWrites || ignoreAccess(ORE, MTI, MTI->getDest())) &&
1062             (!ClInstrumentReads || ignoreAccess(ORE, MTI, MTI->getSource()));
1063    }
1064    if (isa<MemSetInst>(MI))
1065      return !ClInstrumentWrites || ignoreAccess(ORE, MI, MI->getDest());
1066    return false;
1067  }
1068  
instrumentMemIntrinsic(MemIntrinsic * MI)1069  void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
1070    IRBuilder<> IRB(MI);
1071    if (isa<MemTransferInst>(MI)) {
1072      SmallVector<Value *, 4> Args{
1073          MI->getOperand(0), MI->getOperand(1),
1074          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1075  
1076      if (UseMatchAllCallback)
1077        Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1078      IRB.CreateCall(isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy, Args);
1079    } else if (isa<MemSetInst>(MI)) {
1080      SmallVector<Value *, 4> Args{
1081          MI->getOperand(0),
1082          IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
1083          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1084      if (UseMatchAllCallback)
1085        Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1086      IRB.CreateCall(HwasanMemset, Args);
1087    }
1088    MI->eraseFromParent();
1089  }
1090  
instrumentMemAccess(InterestingMemoryOperand & O,DomTreeUpdater & DTU,LoopInfo * LI)1091  bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O,
1092                                               DomTreeUpdater &DTU,
1093                                               LoopInfo *LI) {
1094    Value *Addr = O.getPtr();
1095  
1096    LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
1097  
1098    if (O.MaybeMask)
1099      return false; // FIXME
1100  
1101    IRBuilder<> IRB(O.getInsn());
1102    if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) &&
1103        (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
1104        (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() ||
1105         *O.Alignment >= O.TypeStoreSize / 8)) {
1106      size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeStoreSize);
1107      if (InstrumentWithCalls) {
1108        SmallVector<Value *, 2> Args{IRB.CreatePointerCast(Addr, IntptrTy)};
1109        if (UseMatchAllCallback)
1110          Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1111        IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1112                       Args);
1113      } else if (OutlinedChecks) {
1114        instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1115                                   DTU, LI);
1116      } else {
1117        instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1118                                  DTU, LI);
1119      }
1120    } else {
1121      SmallVector<Value *, 3> Args{
1122          IRB.CreatePointerCast(Addr, IntptrTy),
1123          IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
1124                         ConstantInt::get(IntptrTy, 8))};
1125      if (UseMatchAllCallback)
1126        Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1127      IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], Args);
1128    }
1129    untagPointerOperand(O.getInsn(), Addr);
1130  
1131    return true;
1132  }
1133  
tagAlloca(IRBuilder<> & IRB,AllocaInst * AI,Value * Tag,size_t Size)1134  void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
1135                                     size_t Size) {
1136    size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1137    if (!UseShortGranules)
1138      Size = AlignedSize;
1139  
1140    Tag = IRB.CreateTrunc(Tag, Int8Ty);
1141    if (InstrumentWithCalls) {
1142      IRB.CreateCall(HwasanTagMemoryFunc,
1143                     {IRB.CreatePointerCast(AI, PtrTy), Tag,
1144                      ConstantInt::get(IntptrTy, AlignedSize)});
1145    } else {
1146      size_t ShadowSize = Size >> Mapping.Scale;
1147      Value *AddrLong = untagPointer(IRB, IRB.CreatePointerCast(AI, IntptrTy));
1148      Value *ShadowPtr = memToShadow(AddrLong, IRB);
1149      // If this memset is not inlined, it will be intercepted in the hwasan
1150      // runtime library. That's OK, because the interceptor skips the checks if
1151      // the address is in the shadow region.
1152      // FIXME: the interceptor is not as fast as real memset. Consider lowering
1153      // llvm.memset right here into either a sequence of stores, or a call to
1154      // hwasan_tag_memory.
1155      if (ShadowSize)
1156        IRB.CreateMemSet(ShadowPtr, Tag, ShadowSize, Align(1));
1157      if (Size != AlignedSize) {
1158        const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
1159        IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder),
1160                        IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1161        IRB.CreateStore(
1162            Tag, IRB.CreateConstGEP1_32(Int8Ty, IRB.CreatePointerCast(AI, PtrTy),
1163                                        AlignedSize - 1));
1164      }
1165    }
1166  }
1167  
retagMask(unsigned AllocaNo)1168  unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1169    if (TargetTriple.getArch() == Triple::x86_64)
1170      return AllocaNo & TagMaskByte;
1171  
1172    // A list of 8-bit numbers that have at most one run of non-zero bits.
1173    // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1174    // masks.
1175    // The list does not include the value 255, which is used for UAR.
1176    //
1177    // Because we are more likely to use earlier elements of this list than later
1178    // ones, it is sorted in increasing order of probability of collision with a
1179    // mask allocated (temporally) nearby. The program that generated this list
1180    // can be found at:
1181    // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1182    static const unsigned FastMasks[] = {
1183        0,   128, 64, 192, 32,  96,  224, 112, 240, 48, 16,  120,
1184        248, 56,  24, 8,   124, 252, 60,  28,  12,  4,  126, 254,
1185        62,  30,  14, 6,   2,   127, 63,  31,  15,  7,  3,   1};
1186    return FastMasks[AllocaNo % std::size(FastMasks)];
1187  }
1188  
applyTagMask(IRBuilder<> & IRB,Value * OldTag)1189  Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1190    if (TagMaskByte == 0xFF)
1191      return OldTag; // No need to clear the tag byte.
1192    return IRB.CreateAnd(OldTag,
1193                         ConstantInt::get(OldTag->getType(), TagMaskByte));
1194  }
1195  
getNextTagWithCall(IRBuilder<> & IRB)1196  Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1197    return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1198  }
1199  
getStackBaseTag(IRBuilder<> & IRB)1200  Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1201    if (ClGenerateTagsWithCalls)
1202      return nullptr;
1203    if (StackBaseTag)
1204      return StackBaseTag;
1205    // Extract some entropy from the stack pointer for the tags.
1206    // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1207    // between functions).
1208    Value *FramePointerLong = getCachedFP(IRB);
1209    Value *StackTag =
1210        applyTagMask(IRB, IRB.CreateXor(FramePointerLong,
1211                                        IRB.CreateLShr(FramePointerLong, 20)));
1212    StackTag->setName("hwasan.stack.base.tag");
1213    return StackTag;
1214  }
1215  
getAllocaTag(IRBuilder<> & IRB,Value * StackTag,unsigned AllocaNo)1216  Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1217                                          unsigned AllocaNo) {
1218    if (ClGenerateTagsWithCalls)
1219      return getNextTagWithCall(IRB);
1220    return IRB.CreateXor(
1221        StackTag, ConstantInt::get(StackTag->getType(), retagMask(AllocaNo)));
1222  }
1223  
getUARTag(IRBuilder<> & IRB)1224  Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB) {
1225    Value *FramePointerLong = getCachedFP(IRB);
1226    Value *UARTag =
1227        applyTagMask(IRB, IRB.CreateLShr(FramePointerLong, PointerTagShift));
1228  
1229    UARTag->setName("hwasan.uar.tag");
1230    return UARTag;
1231  }
1232  
1233  // Add a tag to an address.
tagPointer(IRBuilder<> & IRB,Type * Ty,Value * PtrLong,Value * Tag)1234  Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1235                                        Value *PtrLong, Value *Tag) {
1236    assert(!UsePageAliases);
1237    Value *TaggedPtrLong;
1238    if (CompileKernel) {
1239      // Kernel addresses have 0xFF in the most significant byte.
1240      Value *ShiftedTag =
1241          IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1242                       ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1243      TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1244    } else {
1245      // Userspace can simply do OR (tag << PointerTagShift);
1246      Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1247      TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1248    }
1249    return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1250  }
1251  
1252  // Remove tag from an address.
untagPointer(IRBuilder<> & IRB,Value * PtrLong)1253  Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1254    assert(!UsePageAliases);
1255    Value *UntaggedPtrLong;
1256    if (CompileKernel) {
1257      // Kernel addresses have 0xFF in the most significant byte.
1258      UntaggedPtrLong =
1259          IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1260                                                 TagMaskByte << PointerTagShift));
1261    } else {
1262      // Userspace addresses have 0x00.
1263      UntaggedPtrLong = IRB.CreateAnd(
1264          PtrLong, ConstantInt::get(PtrLong->getType(),
1265                                    ~(TagMaskByte << PointerTagShift)));
1266    }
1267    return UntaggedPtrLong;
1268  }
1269  
getHwasanThreadSlotPtr(IRBuilder<> & IRB)1270  Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB) {
1271    // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1272    // in Bionic's libc/platform/bionic/tls_defines.h.
1273    constexpr int SanitizerSlot = 6;
1274    if (TargetTriple.isAArch64() && TargetTriple.isAndroid())
1275      return memtag::getAndroidSlotPtr(IRB, SanitizerSlot);
1276    return ThreadPtrGlobal;
1277  }
1278  
getCachedFP(IRBuilder<> & IRB)1279  Value *HWAddressSanitizer::getCachedFP(IRBuilder<> &IRB) {
1280    if (!CachedFP)
1281      CachedFP = memtag::getFP(IRB);
1282    return CachedFP;
1283  }
1284  
getFrameRecordInfo(IRBuilder<> & IRB)1285  Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
1286    // Prepare ring buffer data.
1287    Value *PC = memtag::getPC(TargetTriple, IRB);
1288    Value *FP = getCachedFP(IRB);
1289  
1290    // Mix FP and PC.
1291    // Assumptions:
1292    // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1293    // FP is 0xfffffffffffFFFF0  (4 lower bits are zero)
1294    // We only really need ~20 lower non-zero bits (FFFF), so we mix like this:
1295    //       0xFFFFPPPPPPPPPPPP
1296    //
1297    // FP works because in AArch64FrameLowering::getFrameIndexReference, we
1298    // prefer FP-relative offsets for functions compiled with HWASan.
1299    FP = IRB.CreateShl(FP, 44);
1300    return IRB.CreateOr(PC, FP);
1301  }
1302  
emitPrologue(IRBuilder<> & IRB,bool WithFrameRecord)1303  void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1304    if (!Mapping.InTls)
1305      ShadowBase = getShadowNonTls(IRB);
1306    else if (!WithFrameRecord && TargetTriple.isAndroid())
1307      ShadowBase = getDynamicShadowIfunc(IRB);
1308  
1309    if (!WithFrameRecord && ShadowBase)
1310      return;
1311  
1312    Value *SlotPtr = nullptr;
1313    Value *ThreadLong = nullptr;
1314    Value *ThreadLongMaybeUntagged = nullptr;
1315  
1316    auto getThreadLongMaybeUntagged = [&]() {
1317      if (!SlotPtr)
1318        SlotPtr = getHwasanThreadSlotPtr(IRB);
1319      if (!ThreadLong)
1320        ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1321      // Extract the address field from ThreadLong. Unnecessary on AArch64 with
1322      // TBI.
1323      return TargetTriple.isAArch64() ? ThreadLong
1324                                      : untagPointer(IRB, ThreadLong);
1325    };
1326  
1327    if (WithFrameRecord) {
1328      switch (ClRecordStackHistory) {
1329      case libcall: {
1330        // Emit a runtime call into hwasan rather than emitting instructions for
1331        // recording stack history.
1332        Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1333        IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
1334        break;
1335      }
1336      case instr: {
1337        ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1338  
1339        StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1340  
1341        // Store data to ring buffer.
1342        Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1343        Value *RecordPtr =
1344            IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IRB.getPtrTy(0));
1345        IRB.CreateStore(FrameRecordInfo, RecordPtr);
1346  
1347        // Update the ring buffer. Top byte of ThreadLong defines the size of the
1348        // buffer in pages, it must be a power of two, and the start of the buffer
1349        // must be aligned by twice that much. Therefore wrap around of the ring
1350        // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1351        // The use of AShr instead of LShr is due to
1352        //   https://bugs.llvm.org/show_bug.cgi?id=39030
1353        // Runtime library makes sure not to use the highest bit.
1354        //
1355        // Mechanical proof of this address calculation can be found at:
1356        // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/prove_hwasanwrap.smt2
1357        //
1358        // Example of the wrap case for N = 1
1359        // Pointer:   0x01AAAAAAAAAAAFF8
1360        //                     +
1361        //            0x0000000000000008
1362        //                     =
1363        //            0x01AAAAAAAAAAB000
1364        //                     &
1365        // WrapMask:  0xFFFFFFFFFFFFF000
1366        //                     =
1367        //            0x01AAAAAAAAAAA000
1368        //
1369        // Then the WrapMask will be a no-op until the next wrap case.
1370        Value *WrapMask = IRB.CreateXor(
1371            IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1372            ConstantInt::get(IntptrTy, (uint64_t)-1));
1373        Value *ThreadLongNew = IRB.CreateAnd(
1374            IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1375        IRB.CreateStore(ThreadLongNew, SlotPtr);
1376        break;
1377      }
1378      case none: {
1379        llvm_unreachable(
1380            "A stack history recording mode should've been selected.");
1381      }
1382      }
1383    }
1384  
1385    if (!ShadowBase) {
1386      if (!ThreadLongMaybeUntagged)
1387        ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1388  
1389      // Get shadow base address by aligning RecordPtr up.
1390      // Note: this is not correct if the pointer is already aligned.
1391      // Runtime library will make sure this never happens.
1392      ShadowBase = IRB.CreateAdd(
1393          IRB.CreateOr(
1394              ThreadLongMaybeUntagged,
1395              ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1396          ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1397      ShadowBase = IRB.CreateIntToPtr(ShadowBase, PtrTy);
1398    }
1399  }
1400  
instrumentLandingPads(SmallVectorImpl<Instruction * > & LandingPadVec)1401  bool HWAddressSanitizer::instrumentLandingPads(
1402      SmallVectorImpl<Instruction *> &LandingPadVec) {
1403    for (auto *LP : LandingPadVec) {
1404      IRBuilder<> IRB(LP->getNextNonDebugInstruction());
1405      IRB.CreateCall(
1406          HwasanHandleVfork,
1407          {memtag::readRegister(
1408              IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp" : "sp")});
1409    }
1410    return true;
1411  }
1412  
instrumentStack(memtag::StackInfo & SInfo,Value * StackTag,Value * UARTag,const DominatorTree & DT,const PostDominatorTree & PDT,const LoopInfo & LI)1413  bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
1414                                           Value *StackTag, Value *UARTag,
1415                                           const DominatorTree &DT,
1416                                           const PostDominatorTree &PDT,
1417                                           const LoopInfo &LI) {
1418    // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1419    // alloca addresses using that. Unfortunately, offsets are not known yet
1420    // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1421    // temp, shift-OR it into each alloca address and xor with the retag mask.
1422    // This generates one extra instruction per alloca use.
1423    unsigned int I = 0;
1424  
1425    for (auto &KV : SInfo.AllocasToInstrument) {
1426      auto N = I++;
1427      auto *AI = KV.first;
1428      memtag::AllocaInfo &Info = KV.second;
1429      IRBuilder<> IRB(AI->getNextNonDebugInstruction());
1430  
1431      // Replace uses of the alloca with tagged address.
1432      Value *Tag = getAllocaTag(IRB, StackTag, N);
1433      Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1434      Value *AINoTagLong = untagPointer(IRB, AILong);
1435      Value *Replacement = tagPointer(IRB, AI->getType(), AINoTagLong, Tag);
1436      std::string Name =
1437          AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1438      Replacement->setName(Name + ".hwasan");
1439  
1440      size_t Size = memtag::getAllocaSizeInBytes(*AI);
1441      size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1442  
1443      Value *AICast = IRB.CreatePointerCast(AI, PtrTy);
1444  
1445      auto HandleLifetime = [&](IntrinsicInst *II) {
1446        // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1447        // set of assumptions we need to make about the lifetime. Without this we
1448        // would need to ensure that we can track the lifetime pointer to a
1449        // constant offset from the alloca, and would still need to change the
1450        // size to include the extra alignment we use for the untagging to make
1451        // the size consistent.
1452        //
1453        // The check for standard lifetime below makes sure that we have exactly
1454        // one set of start / end in any execution (i.e. the ends are not
1455        // reachable from each other), so this will not cause any problems.
1456        II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
1457        II->setArgOperand(1, AICast);
1458      };
1459      llvm::for_each(Info.LifetimeStart, HandleLifetime);
1460      llvm::for_each(Info.LifetimeEnd, HandleLifetime);
1461  
1462      AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
1463        auto *User = U.getUser();
1464        return User != AILong && User != AICast &&
1465               !memtag::isLifetimeIntrinsic(User);
1466      });
1467  
1468      memtag::annotateDebugRecords(Info, retagMask(N));
1469  
1470      auto TagEnd = [&](Instruction *Node) {
1471        IRB.SetInsertPoint(Node);
1472        // When untagging, use the `AlignedSize` because we need to set the tags
1473        // for the entire alloca to original. If we used `Size` here, we would
1474        // keep the last granule tagged, and store zero in the last byte of the
1475        // last granule, due to how short granules are implemented.
1476        tagAlloca(IRB, AI, UARTag, AlignedSize);
1477      };
1478      // Calls to functions that may return twice (e.g. setjmp) confuse the
1479      // postdominator analysis, and will leave us to keep memory tagged after
1480      // function return. Work around this by always untagging at every return
1481      // statement if return_twice functions are called.
1482      bool StandardLifetime =
1483          !SInfo.CallsReturnTwice &&
1484          SInfo.UnrecognizedLifetimes.empty() &&
1485          memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT,
1486                                     &LI, ClMaxLifetimes);
1487      if (DetectUseAfterScope && StandardLifetime) {
1488        IntrinsicInst *Start = Info.LifetimeStart[0];
1489        IRB.SetInsertPoint(Start->getNextNode());
1490        tagAlloca(IRB, AI, Tag, Size);
1491        if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd,
1492                                          SInfo.RetVec, TagEnd)) {
1493          for (auto *End : Info.LifetimeEnd)
1494            End->eraseFromParent();
1495        }
1496      } else {
1497        tagAlloca(IRB, AI, Tag, Size);
1498        for (auto *RI : SInfo.RetVec)
1499          TagEnd(RI);
1500        // We inserted tagging outside of the lifetimes, so we have to remove
1501        // them.
1502        for (auto &II : Info.LifetimeStart)
1503          II->eraseFromParent();
1504        for (auto &II : Info.LifetimeEnd)
1505          II->eraseFromParent();
1506      }
1507      memtag::alignAndPadAlloca(Info, Mapping.getObjectAlignment());
1508    }
1509    for (auto &I : SInfo.UnrecognizedLifetimes)
1510      I->eraseFromParent();
1511    return true;
1512  }
1513  
emitRemark(const Function & F,OptimizationRemarkEmitter & ORE,bool Skip)1514  static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE,
1515                         bool Skip) {
1516    if (Skip) {
1517      ORE.emit([&]() {
1518        return OptimizationRemark(DEBUG_TYPE, "Skip", &F)
1519               << "Skipped: F=" << ore::NV("Function", &F);
1520      });
1521    } else {
1522      ORE.emit([&]() {
1523        return OptimizationRemarkMissed(DEBUG_TYPE, "Sanitize", &F)
1524               << "Sanitized: F=" << ore::NV("Function", &F);
1525      });
1526    }
1527  }
1528  
selectiveInstrumentationShouldSkip(Function & F,FunctionAnalysisManager & FAM) const1529  bool HWAddressSanitizer::selectiveInstrumentationShouldSkip(
1530      Function &F, FunctionAnalysisManager &FAM) const {
1531    bool Skip = [&]() {
1532      if (ClRandomSkipRate.getNumOccurrences()) {
1533        std::bernoulli_distribution D(ClRandomSkipRate);
1534        return !D(*Rng);
1535      }
1536      if (!ClHotPercentileCutoff.getNumOccurrences())
1537        return false;
1538      auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1539      ProfileSummaryInfo *PSI =
1540          MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1541      if (!PSI || !PSI->hasProfileSummary()) {
1542        ++NumNoProfileSummaryFuncs;
1543        return false;
1544      }
1545      return PSI->isFunctionHotInCallGraphNthPercentile(
1546          ClHotPercentileCutoff, &F, FAM.getResult<BlockFrequencyAnalysis>(F));
1547    }();
1548    emitRemark(F, FAM.getResult<OptimizationRemarkEmitterAnalysis>(F), Skip);
1549    return Skip;
1550  }
1551  
sanitizeFunction(Function & F,FunctionAnalysisManager & FAM)1552  void HWAddressSanitizer::sanitizeFunction(Function &F,
1553                                            FunctionAnalysisManager &FAM) {
1554    if (&F == HwasanCtorFunction)
1555      return;
1556  
1557    if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1558      return;
1559  
1560    if (F.empty())
1561      return;
1562  
1563    NumTotalFuncs++;
1564  
1565    OptimizationRemarkEmitter &ORE =
1566        FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1567  
1568    if (selectiveInstrumentationShouldSkip(F, FAM))
1569      return;
1570  
1571    NumInstrumentedFuncs++;
1572  
1573    LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1574  
1575    SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1576    SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1577    SmallVector<Instruction *, 8> LandingPadVec;
1578    const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1579  
1580    memtag::StackInfoBuilder SIB(SSI);
1581    for (auto &Inst : instructions(F)) {
1582      if (InstrumentStack) {
1583        SIB.visit(Inst);
1584      }
1585  
1586      if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1587        LandingPadVec.push_back(&Inst);
1588  
1589      getInterestingMemoryOperands(ORE, &Inst, TLI, OperandsToInstrument);
1590  
1591      if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1592        if (!ignoreMemIntrinsic(ORE, MI))
1593          IntrinToInstrument.push_back(MI);
1594    }
1595  
1596    memtag::StackInfo &SInfo = SIB.get();
1597  
1598    initializeCallbacks(*F.getParent());
1599  
1600    if (!LandingPadVec.empty())
1601      instrumentLandingPads(LandingPadVec);
1602  
1603    if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1604        F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1605      // __hwasan_personality_thunk is a no-op for functions without an
1606      // instrumented stack, so we can drop it.
1607      F.setPersonalityFn(nullptr);
1608    }
1609  
1610    if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1611        IntrinToInstrument.empty())
1612      return;
1613  
1614    assert(!ShadowBase);
1615  
1616    // Remove memory attributes that are about to become invalid.
1617    // HWASan checks read from shadow, which invalidates memory(argmem: *)
1618    // Short granule checks on function arguments read from the argument memory
1619    // (last byte of the granule), which invalidates writeonly.
1620    F.removeFnAttr(llvm::Attribute::Memory);
1621    for (auto &A : F.args())
1622      A.removeAttr(llvm::Attribute::WriteOnly);
1623  
1624    BasicBlock::iterator InsertPt = F.getEntryBlock().begin();
1625    IRBuilder<> EntryIRB(&F.getEntryBlock(), InsertPt);
1626    emitPrologue(EntryIRB,
1627                 /*WithFrameRecord*/ ClRecordStackHistory != none &&
1628                     Mapping.WithFrameRecord &&
1629                     !SInfo.AllocasToInstrument.empty());
1630  
1631    if (!SInfo.AllocasToInstrument.empty()) {
1632      const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1633      const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
1634      const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
1635      Value *StackTag = getStackBaseTag(EntryIRB);
1636      Value *UARTag = getUARTag(EntryIRB);
1637      instrumentStack(SInfo, StackTag, UARTag, DT, PDT, LI);
1638    }
1639  
1640    // If we split the entry block, move any allocas that were originally in the
1641    // entry block back into the entry block so that they aren't treated as
1642    // dynamic allocas.
1643    if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1644      InsertPt = F.getEntryBlock().begin();
1645      for (Instruction &I :
1646           llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1647        if (auto *AI = dyn_cast<AllocaInst>(&I))
1648          if (isa<ConstantInt>(AI->getArraySize()))
1649            I.moveBefore(F.getEntryBlock(), InsertPt);
1650      }
1651    }
1652  
1653    DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
1654    PostDominatorTree *PDT = FAM.getCachedResult<PostDominatorTreeAnalysis>(F);
1655    LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(F);
1656    DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
1657    for (auto &Operand : OperandsToInstrument)
1658      instrumentMemAccess(Operand, DTU, LI);
1659    DTU.flush();
1660  
1661    if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1662      for (auto *Inst : IntrinToInstrument)
1663        instrumentMemIntrinsic(Inst);
1664    }
1665  
1666    ShadowBase = nullptr;
1667    StackBaseTag = nullptr;
1668    CachedFP = nullptr;
1669  }
1670  
instrumentGlobal(GlobalVariable * GV,uint8_t Tag)1671  void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1672    assert(!UsePageAliases);
1673    Constant *Initializer = GV->getInitializer();
1674    uint64_t SizeInBytes =
1675        M.getDataLayout().getTypeAllocSize(Initializer->getType());
1676    uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1677    if (SizeInBytes != NewSize) {
1678      // Pad the initializer out to the next multiple of 16 bytes and add the
1679      // required short granule tag.
1680      std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1681      Init.back() = Tag;
1682      Constant *Padding = ConstantDataArray::get(*C, Init);
1683      Initializer = ConstantStruct::getAnon({Initializer, Padding});
1684    }
1685  
1686    auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1687                                     GlobalValue::ExternalLinkage, Initializer,
1688                                     GV->getName() + ".hwasan");
1689    NewGV->copyAttributesFrom(GV);
1690    NewGV->setLinkage(GlobalValue::PrivateLinkage);
1691    NewGV->copyMetadata(GV, 0);
1692    NewGV->setAlignment(
1693        std::max(GV->getAlign().valueOrOne(), Mapping.getObjectAlignment()));
1694  
1695    // It is invalid to ICF two globals that have different tags. In the case
1696    // where the size of the global is a multiple of the tag granularity the
1697    // contents of the globals may be the same but the tags (i.e. symbol values)
1698    // may be different, and the symbols are not considered during ICF. In the
1699    // case where the size is not a multiple of the granularity, the short granule
1700    // tags would discriminate two globals with different tags, but there would
1701    // otherwise be nothing stopping such a global from being incorrectly ICF'd
1702    // with an uninstrumented (i.e. tag 0) global that happened to have the short
1703    // granule tag in the last byte.
1704    NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1705  
1706    // Descriptor format (assuming little-endian):
1707    // bytes 0-3: relative address of global
1708    // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1709    // it isn't, we create multiple descriptors)
1710    // byte 7: tag
1711    auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1712    const uint64_t MaxDescriptorSize = 0xfffff0;
1713    for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1714         DescriptorPos += MaxDescriptorSize) {
1715      auto *Descriptor =
1716          new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1717                             nullptr, GV->getName() + ".hwasan.descriptor");
1718      auto *GVRelPtr = ConstantExpr::getTrunc(
1719          ConstantExpr::getAdd(
1720              ConstantExpr::getSub(
1721                  ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1722                  ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1723              ConstantInt::get(Int64Ty, DescriptorPos)),
1724          Int32Ty);
1725      uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1726      auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1727      Descriptor->setComdat(NewGV->getComdat());
1728      Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1729      Descriptor->setSection("hwasan_globals");
1730      Descriptor->setMetadata(LLVMContext::MD_associated,
1731                              MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1732      appendToCompilerUsed(M, Descriptor);
1733    }
1734  
1735    Constant *Aliasee = ConstantExpr::getIntToPtr(
1736        ConstantExpr::getAdd(
1737            ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1738            ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1739        GV->getType());
1740    auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1741                                      GV->getLinkage(), "", Aliasee, &M);
1742    Alias->setVisibility(GV->getVisibility());
1743    Alias->takeName(GV);
1744    GV->replaceAllUsesWith(Alias);
1745    GV->eraseFromParent();
1746  }
1747  
instrumentGlobals()1748  void HWAddressSanitizer::instrumentGlobals() {
1749    std::vector<GlobalVariable *> Globals;
1750    for (GlobalVariable &GV : M.globals()) {
1751      if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
1752        continue;
1753  
1754      if (GV.isDeclarationForLinker() || GV.getName().starts_with("llvm.") ||
1755          GV.isThreadLocal())
1756        continue;
1757  
1758      // Common symbols can't have aliases point to them, so they can't be tagged.
1759      if (GV.hasCommonLinkage())
1760        continue;
1761  
1762      // Globals with custom sections may be used in __start_/__stop_ enumeration,
1763      // which would be broken both by adding tags and potentially by the extra
1764      // padding/alignment that we insert.
1765      if (GV.hasSection())
1766        continue;
1767  
1768      Globals.push_back(&GV);
1769    }
1770  
1771    MD5 Hasher;
1772    Hasher.update(M.getSourceFileName());
1773    MD5::MD5Result Hash;
1774    Hasher.final(Hash);
1775    uint8_t Tag = Hash[0];
1776  
1777    assert(TagMaskByte >= 16);
1778  
1779    for (GlobalVariable *GV : Globals) {
1780      // Don't allow globals to be tagged with something that looks like a
1781      // short-granule tag, otherwise we lose inter-granule overflow detection, as
1782      // the fast path shadow-vs-address check succeeds.
1783      if (Tag < 16 || Tag > TagMaskByte)
1784        Tag = 16;
1785      instrumentGlobal(GV, Tag++);
1786    }
1787  }
1788  
instrumentPersonalityFunctions()1789  void HWAddressSanitizer::instrumentPersonalityFunctions() {
1790    // We need to untag stack frames as we unwind past them. That is the job of
1791    // the personality function wrapper, which either wraps an existing
1792    // personality function or acts as a personality function on its own. Each
1793    // function that has a personality function or that can be unwound past has
1794    // its personality function changed to a thunk that calls the personality
1795    // function wrapper in the runtime.
1796    MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1797    for (Function &F : M) {
1798      if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1799        continue;
1800  
1801      if (F.hasPersonalityFn()) {
1802        PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1803      } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1804        PersonalityFns[nullptr].push_back(&F);
1805      }
1806    }
1807  
1808    if (PersonalityFns.empty())
1809      return;
1810  
1811    FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1812        "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, PtrTy,
1813        PtrTy, PtrTy, PtrTy, PtrTy);
1814    FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1815    FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1816  
1817    for (auto &P : PersonalityFns) {
1818      std::string ThunkName = kHwasanPersonalityThunkName;
1819      if (P.first)
1820        ThunkName += ("." + P.first->getName()).str();
1821      FunctionType *ThunkFnTy = FunctionType::get(
1822          Int32Ty, {Int32Ty, Int32Ty, Int64Ty, PtrTy, PtrTy}, false);
1823      bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1824                                 cast<GlobalValue>(P.first)->hasLocalLinkage());
1825      auto *ThunkFn = Function::Create(ThunkFnTy,
1826                                       IsLocal ? GlobalValue::InternalLinkage
1827                                               : GlobalValue::LinkOnceODRLinkage,
1828                                       ThunkName, &M);
1829      if (!IsLocal) {
1830        ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1831        ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1832      }
1833  
1834      auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1835      IRBuilder<> IRB(BB);
1836      CallInst *WrapperCall = IRB.CreateCall(
1837          HwasanPersonalityWrapper,
1838          {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1839           ThunkFn->getArg(3), ThunkFn->getArg(4),
1840           P.first ? P.first : Constant::getNullValue(PtrTy),
1841           UnwindGetGR.getCallee(), UnwindGetCFA.getCallee()});
1842      WrapperCall->setTailCall();
1843      IRB.CreateRet(WrapperCall);
1844  
1845      for (Function *F : P.second)
1846        F->setPersonalityFn(ThunkFn);
1847    }
1848  }
1849  
init(Triple & TargetTriple,bool InstrumentWithCalls)1850  void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1851                                               bool InstrumentWithCalls) {
1852    Scale = kDefaultShadowScale;
1853    if (TargetTriple.isOSFuchsia()) {
1854      // Fuchsia is always PIE, which means that the beginning of the address
1855      // space is always available.
1856      InGlobal = false;
1857      InTls = false;
1858      Offset = 0;
1859      WithFrameRecord = true;
1860    } else if (ClMappingOffset.getNumOccurrences() > 0) {
1861      InGlobal = false;
1862      InTls = false;
1863      Offset = ClMappingOffset;
1864      WithFrameRecord = false;
1865    } else if (ClEnableKhwasan || InstrumentWithCalls) {
1866      InGlobal = false;
1867      InTls = false;
1868      Offset = 0;
1869      WithFrameRecord = false;
1870    } else if (ClWithIfunc) {
1871      InGlobal = true;
1872      InTls = false;
1873      Offset = kDynamicShadowSentinel;
1874      WithFrameRecord = false;
1875    } else if (ClWithTls) {
1876      InGlobal = false;
1877      InTls = true;
1878      Offset = kDynamicShadowSentinel;
1879      WithFrameRecord = true;
1880    } else {
1881      InGlobal = false;
1882      InTls = false;
1883      Offset = kDynamicShadowSentinel;
1884      WithFrameRecord = false;
1885    }
1886  }
1887