xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- AArch64StackTagging.cpp - Stack tagging in IR --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //===----------------------------------------------------------------------===//
9 
10 #include "AArch64.h"
11 #include "AArch64InstrInfo.h"
12 #include "AArch64Subtarget.h"
13 #include "AArch64TargetMachine.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/CFG.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/PostDominators.h"
22 #include "llvm/Analysis/ScalarEvolution.h"
23 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
24 #include "llvm/Analysis/StackSafetyAnalysis.h"
25 #include "llvm/BinaryFormat/Dwarf.h"
26 #include "llvm/CodeGen/LiveRegUnits.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineLoopInfo.h"
33 #include "llvm/CodeGen/MachineOperand.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/TargetPassConfig.h"
36 #include "llvm/CodeGen/TargetRegisterInfo.h"
37 #include "llvm/IR/DebugLoc.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GetElementPtrTypeIterator.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InstIterator.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/IntrinsicsAArch64.h"
47 #include "llvm/IR/Metadata.h"
48 #include "llvm/IR/ValueHandle.h"
49 #include "llvm/InitializePasses.h"
50 #include "llvm/Pass.h"
51 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include "llvm/Transforms/Utils/Local.h"
55 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
56 #include <cassert>
57 #include <iterator>
58 #include <memory>
59 #include <utility>
60 
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "aarch64-stack-tagging"
64 
65 static cl::opt<bool> ClMergeInit(
66     "stack-tagging-merge-init", cl::Hidden, cl::init(true),
67     cl::desc("merge stack variable initializers with tagging when possible"));
68 
69 static cl::opt<bool>
70     ClUseStackSafety("stack-tagging-use-stack-safety", cl::Hidden,
71                      cl::init(true),
72                      cl::desc("Use Stack Safety analysis results"));
73 
74 static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
75                                      cl::init(40), cl::Hidden);
76 
77 static cl::opt<unsigned>
78     ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272),
79                          cl::Hidden);
80 
81 static cl::opt<size_t> ClMaxLifetimes(
82     "stack-tagging-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
83     cl::ReallyHidden,
84     cl::desc("How many lifetime ends to handle for a single alloca."),
85     cl::Optional);
86 
87 // Mode for selecting how to insert frame record info into the stack ring
88 // buffer.
89 enum StackTaggingRecordStackHistoryMode {
90   // Do not record frame record info.
91   none,
92 
93   // Insert instructions into the prologue for storing into the stack ring
94   // buffer directly.
95   instr,
96 };
97 
98 static cl::opt<StackTaggingRecordStackHistoryMode> ClRecordStackHistory(
99     "stack-tagging-record-stack-history",
100     cl::desc("Record stack frames with tagged allocations in a thread-local "
101              "ring buffer"),
102     cl::values(clEnumVal(none, "Do not record stack ring history"),
103                clEnumVal(instr, "Insert instructions into the prologue for "
104                                 "storing into the stack ring buffer")),
105     cl::Hidden, cl::init(none));
106 
107 static const Align kTagGranuleSize = Align(16);
108 
109 namespace {
110 
111 class InitializerBuilder {
112   uint64_t Size;
113   const DataLayout *DL;
114   Value *BasePtr;
115   Function *SetTagFn;
116   Function *SetTagZeroFn;
117   Function *StgpFn;
118 
119   // List of initializers sorted by start offset.
120   struct Range {
121     uint64_t Start, End;
122     Instruction *Inst;
123   };
124   SmallVector<Range, 4> Ranges;
125   // 8-aligned offset => 8-byte initializer
126   // Missing keys are zero initialized.
127   std::map<uint64_t, Value *> Out;
128 
129 public:
InitializerBuilder(uint64_t Size,const DataLayout * DL,Value * BasePtr,Function * SetTagFn,Function * SetTagZeroFn,Function * StgpFn)130   InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr,
131                      Function *SetTagFn, Function *SetTagZeroFn,
132                      Function *StgpFn)
133       : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn),
134         SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
135 
addRange(uint64_t Start,uint64_t End,Instruction * Inst)136   bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
137     auto I =
138         llvm::lower_bound(Ranges, Start, [](const Range &LHS, uint64_t RHS) {
139           return LHS.End <= RHS;
140         });
141     if (I != Ranges.end() && End > I->Start) {
142       // Overlap - bail.
143       return false;
144     }
145     Ranges.insert(I, {Start, End, Inst});
146     return true;
147   }
148 
addStore(uint64_t Offset,StoreInst * SI,const DataLayout * DL)149   bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) {
150     int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType());
151     if (!addRange(Offset, Offset + StoreSize, SI))
152       return false;
153     IRBuilder<> IRB(SI);
154     applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0));
155     return true;
156   }
157 
addMemSet(uint64_t Offset,MemSetInst * MSI)158   bool addMemSet(uint64_t Offset, MemSetInst *MSI) {
159     uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue();
160     if (!addRange(Offset, Offset + StoreSize, MSI))
161       return false;
162     IRBuilder<> IRB(MSI);
163     applyMemSet(IRB, Offset, Offset + StoreSize,
164                 cast<ConstantInt>(MSI->getValue()));
165     return true;
166   }
167 
applyMemSet(IRBuilder<> & IRB,int64_t Start,int64_t End,ConstantInt * V)168   void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End,
169                    ConstantInt *V) {
170     // Out[] does not distinguish between zero and undef, and we already know
171     // that this memset does not overlap with any other initializer. Nothing to
172     // do for memset(0).
173     if (V->isZero())
174       return;
175     for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
176       uint64_t Cst = 0x0101010101010101UL;
177       int LowBits = Offset < Start ? (Start - Offset) * 8 : 0;
178       if (LowBits)
179         Cst = (Cst >> LowBits) << LowBits;
180       int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0;
181       if (HighBits)
182         Cst = (Cst << HighBits) >> HighBits;
183       ConstantInt *C =
184           ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue());
185 
186       Value *&CurrentV = Out[Offset];
187       if (!CurrentV) {
188         CurrentV = C;
189       } else {
190         CurrentV = IRB.CreateOr(CurrentV, C);
191       }
192     }
193   }
194 
195   // Take a 64-bit slice of the value starting at the given offset (in bytes).
196   // Offset can be negative. Pad with zeroes on both sides when necessary.
sliceValue(IRBuilder<> & IRB,Value * V,int64_t Offset)197   Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) {
198     if (Offset > 0) {
199       V = IRB.CreateLShr(V, Offset * 8);
200       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
201     } else if (Offset < 0) {
202       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
203       V = IRB.CreateShl(V, -Offset * 8);
204     } else {
205       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
206     }
207     return V;
208   }
209 
applyStore(IRBuilder<> & IRB,int64_t Start,int64_t End,Value * StoredValue)210   void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End,
211                   Value *StoredValue) {
212     StoredValue = flatten(IRB, StoredValue);
213     for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
214       Value *V = sliceValue(IRB, StoredValue, Offset - Start);
215       Value *&CurrentV = Out[Offset];
216       if (!CurrentV) {
217         CurrentV = V;
218       } else {
219         CurrentV = IRB.CreateOr(CurrentV, V);
220       }
221     }
222   }
223 
generate(IRBuilder<> & IRB)224   void generate(IRBuilder<> &IRB) {
225     LLVM_DEBUG(dbgs() << "Combined initializer\n");
226     // No initializers => the entire allocation is undef.
227     if (Ranges.empty()) {
228       emitUndef(IRB, 0, Size);
229       return;
230     }
231 
232     // Look through 8-byte initializer list 16 bytes at a time;
233     // If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
234     // Otherwise, emit zeroes up to next available item.
235     uint64_t LastOffset = 0;
236     for (uint64_t Offset = 0; Offset < Size; Offset += 16) {
237       auto I1 = Out.find(Offset);
238       auto I2 = Out.find(Offset + 8);
239       if (I1 == Out.end() && I2 == Out.end())
240         continue;
241 
242       if (Offset > LastOffset)
243         emitZeroes(IRB, LastOffset, Offset - LastOffset);
244 
245       Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
246                                       : I1->second;
247       Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
248                                       : I2->second;
249       emitPair(IRB, Offset, Store1, Store2);
250       LastOffset = Offset + 16;
251     }
252 
253     // memset(0) does not update Out[], therefore the tail can be either undef
254     // or zero.
255     if (LastOffset < Size)
256       emitZeroes(IRB, LastOffset, Size - LastOffset);
257 
258     for (const auto &R : Ranges) {
259       R.Inst->eraseFromParent();
260     }
261   }
262 
emitZeroes(IRBuilder<> & IRB,uint64_t Offset,uint64_t Size)263   void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
264     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + Size
265                       << ") zero\n");
266     Value *Ptr = BasePtr;
267     if (Offset)
268       Ptr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), Ptr, Offset);
269     IRB.CreateCall(SetTagZeroFn,
270                    {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
271   }
272 
emitUndef(IRBuilder<> & IRB,uint64_t Offset,uint64_t Size)273   void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
274     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + Size
275                       << ") undef\n");
276     Value *Ptr = BasePtr;
277     if (Offset)
278       Ptr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), Ptr, Offset);
279     IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
280   }
281 
emitPair(IRBuilder<> & IRB,uint64_t Offset,Value * A,Value * B)282   void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) {
283     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + 16 << "):\n");
284     LLVM_DEBUG(dbgs() << "    " << *A << "\n    " << *B << "\n");
285     Value *Ptr = BasePtr;
286     if (Offset)
287       Ptr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), Ptr, Offset);
288     IRB.CreateCall(StgpFn, {Ptr, A, B});
289   }
290 
flatten(IRBuilder<> & IRB,Value * V)291   Value *flatten(IRBuilder<> &IRB, Value *V) {
292     if (V->getType()->isIntegerTy())
293       return V;
294     // vector of pointers -> vector of ints
295     if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) {
296       LLVMContext &Ctx = IRB.getContext();
297       Type *EltTy = VecTy->getElementType();
298       if (EltTy->isPointerTy()) {
299         uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
300         auto *NewTy = FixedVectorType::get(
301             IntegerType::get(Ctx, EltSize),
302             cast<FixedVectorType>(VecTy)->getNumElements());
303         V = IRB.CreatePointerCast(V, NewTy);
304       }
305     }
306     return IRB.CreateBitOrPointerCast(
307         V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8));
308   }
309 };
310 
311 class AArch64StackTagging : public FunctionPass {
312   const bool MergeInit;
313   const bool UseStackSafety;
314 
315 public:
316   static char ID; // Pass ID, replacement for typeid
317 
AArch64StackTagging(bool IsOptNone=false)318   AArch64StackTagging(bool IsOptNone = false)
319       : FunctionPass(ID),
320         MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone),
321         UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
322                                                             : !IsOptNone) {
323     initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
324   }
325 
326   void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
327                  uint64_t Size);
328   void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
329 
330   Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr,
331                                    uint64_t Size, InitializerBuilder &IB);
332 
333   Instruction *insertBaseTaggedPointer(
334       const Module &M,
335       const MapVector<AllocaInst *, memtag::AllocaInfo> &Allocas,
336       const DominatorTree *DT);
337   bool runOnFunction(Function &F) override;
338 
getPassName() const339   StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
340 
341 private:
342   Function *F = nullptr;
343   Function *SetTagFunc = nullptr;
344   const DataLayout *DL = nullptr;
345   AAResults *AA = nullptr;
346   const StackSafetyGlobalInfo *SSI = nullptr;
347 
getAnalysisUsage(AnalysisUsage & AU) const348   void getAnalysisUsage(AnalysisUsage &AU) const override {
349     AU.setPreservesCFG();
350     if (UseStackSafety)
351       AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
352     if (MergeInit)
353       AU.addRequired<AAResultsWrapperPass>();
354   }
355 };
356 
357 } // end anonymous namespace
358 
359 char AArch64StackTagging::ID = 0;
360 
361 INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
362                       false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)363 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
364 INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
365 INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
366                     false, false)
367 
368 FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) {
369   return new AArch64StackTagging(IsOptNone);
370 }
371 
collectInitializers(Instruction * StartInst,Value * StartPtr,uint64_t Size,InitializerBuilder & IB)372 Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
373                                                       Value *StartPtr,
374                                                       uint64_t Size,
375                                                       InitializerBuilder &IB) {
376   MemoryLocation AllocaLoc{StartPtr, Size};
377   Instruction *LastInst = StartInst;
378   BasicBlock::iterator BI(StartInst);
379 
380   unsigned Count = 0;
381   for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) {
382     if (!isa<DbgInfoIntrinsic>(*BI))
383       ++Count;
384 
385     if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc)))
386       continue;
387 
388     if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
389       // If the instruction is readnone, ignore it, otherwise bail out.  We
390       // don't even allow readonly here because we don't want something like:
391       // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
392       if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
393         break;
394       continue;
395     }
396 
397     if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
398       if (!NextStore->isSimple())
399         break;
400 
401       // Check to see if this store is to a constant offset from the start ptr.
402       std::optional<int64_t> Offset =
403           NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, *DL);
404       if (!Offset)
405         break;
406 
407       if (!IB.addStore(*Offset, NextStore, DL))
408         break;
409       LastInst = NextStore;
410     } else {
411       MemSetInst *MSI = cast<MemSetInst>(BI);
412 
413       if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
414         break;
415 
416       if (!isa<ConstantInt>(MSI->getValue()))
417         break;
418 
419       // Check to see if this store is to a constant offset from the start ptr.
420       std::optional<int64_t> Offset =
421           MSI->getDest()->getPointerOffsetFrom(StartPtr, *DL);
422       if (!Offset)
423         break;
424 
425       if (!IB.addMemSet(*Offset, MSI))
426         break;
427       LastInst = MSI;
428     }
429   }
430   return LastInst;
431 }
432 
tagAlloca(AllocaInst * AI,Instruction * InsertBefore,Value * Ptr,uint64_t Size)433 void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
434                                     Value *Ptr, uint64_t Size) {
435   auto SetTagZeroFunc =
436       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero);
437   auto StgpFunc =
438       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp);
439 
440   InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc);
441   bool LittleEndian =
442       Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
443   // Current implementation of initializer merging assumes little endianness.
444   if (MergeInit && !F->hasOptNone() && LittleEndian &&
445       Size < ClMergeInitSizeLimit) {
446     LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
447                       << ", size = " << Size << "\n");
448     InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
449   }
450 
451   IRBuilder<> IRB(InsertBefore);
452   IB.generate(IRB);
453 }
454 
untagAlloca(AllocaInst * AI,Instruction * InsertBefore,uint64_t Size)455 void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
456                                       uint64_t Size) {
457   IRBuilder<> IRB(InsertBefore);
458   IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getPtrTy()),
459                               ConstantInt::get(IRB.getInt64Ty(), Size)});
460 }
461 
insertBaseTaggedPointer(const Module & M,const MapVector<AllocaInst *,memtag::AllocaInfo> & AllocasToInstrument,const DominatorTree * DT)462 Instruction *AArch64StackTagging::insertBaseTaggedPointer(
463     const Module &M,
464     const MapVector<AllocaInst *, memtag::AllocaInfo> &AllocasToInstrument,
465     const DominatorTree *DT) {
466   BasicBlock *PrologueBB = nullptr;
467   // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
468   for (auto &I : AllocasToInstrument) {
469     const memtag::AllocaInfo &Info = I.second;
470     AllocaInst *AI = Info.AI;
471     if (!PrologueBB) {
472       PrologueBB = AI->getParent();
473       continue;
474     }
475     PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent());
476   }
477   assert(PrologueBB);
478 
479   IRBuilder<> IRB(&PrologueBB->front());
480   Function *IRG_SP =
481       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp);
482   Instruction *Base =
483       IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
484   Base->setName("basetag");
485   auto TargetTriple = Triple(M.getTargetTriple());
486   // This is not a stable ABI for now, so only allow in dev builds with API
487   // level 10000.
488   // The ThreadLong format is the same as with HWASan, but the entries for
489   // stack MTE take two slots (16 bytes).
490   if (ClRecordStackHistory == instr && TargetTriple.isAndroid() &&
491       TargetTriple.isAArch64() && !TargetTriple.isAndroidVersionLT(10000) &&
492       !AllocasToInstrument.empty()) {
493     constexpr int StackMteSlot = -3;
494     constexpr uint64_t TagMask = 0xFULL << 56;
495 
496     auto *IntptrTy = IRB.getIntPtrTy(M.getDataLayout());
497     Value *SlotPtr = memtag::getAndroidSlotPtr(IRB, StackMteSlot);
498     auto *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
499     Value *FP = memtag::getFP(IRB);
500     Value *Tag = IRB.CreateAnd(IRB.CreatePtrToInt(Base, IntptrTy), TagMask);
501     Value *TaggedFP = IRB.CreateOr(FP, Tag);
502     Value *PC = memtag::getPC(TargetTriple, IRB);
503     Value *RecordPtr = IRB.CreateIntToPtr(ThreadLong, IRB.getPtrTy(0));
504     IRB.CreateStore(PC, RecordPtr);
505     IRB.CreateStore(TaggedFP, IRB.CreateConstGEP1_64(IntptrTy, RecordPtr, 1));
506     // Update the ring buffer. Top byte of ThreadLong defines the size of the
507     // buffer in pages, it must be a power of two, and the start of the buffer
508     // must be aligned by twice that much. Therefore wrap around of the ring
509     // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
510     // The use of AShr instead of LShr is due to
511     //   https://bugs.llvm.org/show_bug.cgi?id=39030
512     // Runtime library makes sure not to use the highest bit.
513     Value *WrapMask = IRB.CreateXor(
514         IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
515         ConstantInt::get(IntptrTy, (uint64_t)-1));
516     Value *ThreadLongNew = IRB.CreateAnd(
517         IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 16)), WrapMask);
518     IRB.CreateStore(ThreadLongNew, SlotPtr);
519   }
520   return Base;
521 }
522 
523 // FIXME: check for MTE extension
runOnFunction(Function & Fn)524 bool AArch64StackTagging::runOnFunction(Function &Fn) {
525   if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
526     return false;
527 
528   if (UseStackSafety)
529     SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult();
530   F = &Fn;
531   DL = &Fn.getDataLayout();
532   if (MergeInit)
533     AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
534 
535   memtag::StackInfoBuilder SIB(SSI);
536   for (Instruction &I : instructions(F))
537     SIB.visit(I);
538   memtag::StackInfo &SInfo = SIB.get();
539 
540   if (SInfo.AllocasToInstrument.empty())
541     return false;
542 
543   std::unique_ptr<DominatorTree> DeleteDT;
544   DominatorTree *DT = nullptr;
545   if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
546     DT = &P->getDomTree();
547 
548   if (DT == nullptr) {
549     DeleteDT = std::make_unique<DominatorTree>(*F);
550     DT = DeleteDT.get();
551   }
552 
553   std::unique_ptr<PostDominatorTree> DeletePDT;
554   PostDominatorTree *PDT = nullptr;
555   if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
556     PDT = &P->getPostDomTree();
557 
558   if (PDT == nullptr) {
559     DeletePDT = std::make_unique<PostDominatorTree>(*F);
560     PDT = DeletePDT.get();
561   }
562 
563   std::unique_ptr<LoopInfo> DeleteLI;
564   LoopInfo *LI = nullptr;
565   if (auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>()) {
566     LI = &LIWP->getLoopInfo();
567   } else {
568     DeleteLI = std::make_unique<LoopInfo>(*DT);
569     LI = DeleteLI.get();
570   }
571 
572   SetTagFunc =
573       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
574 
575   Instruction *Base =
576       insertBaseTaggedPointer(*Fn.getParent(), SInfo.AllocasToInstrument, DT);
577 
578   int NextTag = 0;
579   for (auto &I : SInfo.AllocasToInstrument) {
580     memtag::AllocaInfo &Info = I.second;
581     assert(Info.AI && SIB.isInterestingAlloca(*Info.AI));
582     memtag::alignAndPadAlloca(Info, kTagGranuleSize);
583     AllocaInst *AI = Info.AI;
584     int Tag = NextTag;
585     NextTag = (NextTag + 1) % 16;
586     // Replace alloca with tagp(alloca).
587     IRBuilder<> IRB(Info.AI->getNextNode());
588     Function *TagP = Intrinsic::getDeclaration(
589         F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
590     Instruction *TagPCall =
591         IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
592                               ConstantInt::get(IRB.getInt64Ty(), Tag)});
593     if (Info.AI->hasName())
594       TagPCall->setName(Info.AI->getName() + ".tag");
595     // Does not replace metadata, so we don't have to handle DbgVariableRecords.
596     Info.AI->replaceUsesWithIf(TagPCall, [&](const Use &U) {
597       return !memtag::isLifetimeIntrinsic(U.getUser());
598     });
599     TagPCall->setOperand(0, Info.AI);
600 
601     // Calls to functions that may return twice (e.g. setjmp) confuse the
602     // postdominator analysis, and will leave us to keep memory tagged after
603     // function return. Work around this by always untagging at every return
604     // statement if return_twice functions are called.
605     bool StandardLifetime =
606         !SInfo.CallsReturnTwice &&
607         SInfo.UnrecognizedLifetimes.empty() &&
608         memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, DT, LI,
609                                    ClMaxLifetimes);
610     if (StandardLifetime) {
611       IntrinsicInst *Start = Info.LifetimeStart[0];
612       uint64_t Size =
613           cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
614       Size = alignTo(Size, kTagGranuleSize);
615       tagAlloca(AI, Start->getNextNode(), TagPCall, Size);
616 
617       auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, Node, Size); };
618       if (!DT || !PDT ||
619           !memtag::forAllReachableExits(*DT, *PDT, *LI, Start, Info.LifetimeEnd,
620                                         SInfo.RetVec, TagEnd)) {
621         for (auto *End : Info.LifetimeEnd)
622           End->eraseFromParent();
623       }
624     } else {
625       uint64_t Size = *Info.AI->getAllocationSize(*DL);
626       Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getPtrTy());
627       tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
628       for (auto *RI : SInfo.RetVec) {
629         untagAlloca(AI, RI, Size);
630       }
631       // We may have inserted tag/untag outside of any lifetime interval.
632       // Remove all lifetime intrinsics for this alloca.
633       for (auto *II : Info.LifetimeStart)
634         II->eraseFromParent();
635       for (auto *II : Info.LifetimeEnd)
636         II->eraseFromParent();
637     }
638 
639     memtag::annotateDebugRecords(Info, static_cast<unsigned long>(Tag));
640   }
641 
642   // If we have instrumented at least one alloca, all unrecognized lifetime
643   // intrinsics have to go.
644   for (auto *I : SInfo.UnrecognizedLifetimes)
645     I->eraseFromParent();
646 
647   return true;
648 }
649