xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AArch64.h"
10 #include "AArch64InstrInfo.h"
11 #include "AArch64MachineFunctionInfo.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/Statistic.h"
14 #include "llvm/CodeGen/MachineFrameInfo.h"
15 #include "llvm/CodeGen/MachineFunction.h"
16 #include "llvm/CodeGen/MachineFunctionPass.h"
17 #include "llvm/CodeGen/MachineInstrBuilder.h"
18 #include "llvm/CodeGen/MachineRegisterInfo.h"
19 #include "llvm/CodeGen/MachineTraceMetrics.h"
20 #include "llvm/CodeGen/Passes.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetRegisterInfo.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
31 
32 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
33 
34 static cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
35     "stack-tagging-unchecked-ld-st", cl::Hidden, cl::init(UncheckedSafe),
36     cl::desc(
37         "Unconditionally apply unchecked-ld-st optimization (even for large "
38         "stack frames, or in the presence of variable sized allocas)."),
39     cl::values(
40         clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
41         clEnumValN(
42             UncheckedSafe, "safe",
43             "apply unchecked-ld-st when the target is definitely within range"),
44         clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
45 
46 static cl::opt<bool>
47     ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
48                 cl::desc("Apply first slot optimization for stack tagging "
49                          "(eliminate ADDG Rt, Rn, 0, 0)."));
50 
51 namespace {
52 
53 class AArch64StackTaggingPreRA : public MachineFunctionPass {
54   MachineFunction *MF;
55   AArch64FunctionInfo *AFI;
56   MachineFrameInfo *MFI;
57   MachineRegisterInfo *MRI;
58   const AArch64RegisterInfo *TRI;
59   const AArch64InstrInfo *TII;
60 
61   SmallVector<MachineInstr*, 16> ReTags;
62 
63 public:
64   static char ID;
AArch64StackTaggingPreRA()65   AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {}
66 
67   bool mayUseUncheckedLoadStore();
68   void uncheckUsesOf(unsigned TaggedReg, int FI);
69   void uncheckLoadsAndStores();
70   std::optional<int> findFirstSlotCandidate();
71 
72   bool runOnMachineFunction(MachineFunction &Func) override;
getPassName() const73   StringRef getPassName() const override {
74     return "AArch64 Stack Tagging PreRA";
75   }
76 
getAnalysisUsage(AnalysisUsage & AU) const77   void getAnalysisUsage(AnalysisUsage &AU) const override {
78     AU.setPreservesCFG();
79     MachineFunctionPass::getAnalysisUsage(AU);
80   }
81 };
82 } // end anonymous namespace
83 
84 char AArch64StackTaggingPreRA::ID = 0;
85 
86 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
87                       "AArch64 Stack Tagging PreRA Pass", false, false)
88 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
89                     "AArch64 Stack Tagging PreRA Pass", false, false)
90 
createAArch64StackTaggingPreRAPass()91 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
92   return new AArch64StackTaggingPreRA();
93 }
94 
isUncheckedLoadOrStoreOpcode(unsigned Opcode)95 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
96   switch (Opcode) {
97   case AArch64::LDRBBui:
98   case AArch64::LDRHHui:
99   case AArch64::LDRWui:
100   case AArch64::LDRXui:
101 
102   case AArch64::LDRBui:
103   case AArch64::LDRHui:
104   case AArch64::LDRSui:
105   case AArch64::LDRDui:
106   case AArch64::LDRQui:
107 
108   case AArch64::LDRSHWui:
109   case AArch64::LDRSHXui:
110 
111   case AArch64::LDRSBWui:
112   case AArch64::LDRSBXui:
113 
114   case AArch64::LDRSWui:
115 
116   case AArch64::STRBBui:
117   case AArch64::STRHHui:
118   case AArch64::STRWui:
119   case AArch64::STRXui:
120 
121   case AArch64::STRBui:
122   case AArch64::STRHui:
123   case AArch64::STRSui:
124   case AArch64::STRDui:
125   case AArch64::STRQui:
126 
127   case AArch64::LDPWi:
128   case AArch64::LDPXi:
129   case AArch64::LDPSi:
130   case AArch64::LDPDi:
131   case AArch64::LDPQi:
132 
133   case AArch64::LDPSWi:
134 
135   case AArch64::STPWi:
136   case AArch64::STPXi:
137   case AArch64::STPSi:
138   case AArch64::STPDi:
139   case AArch64::STPQi:
140     return true;
141   default:
142     return false;
143   }
144 }
145 
mayUseUncheckedLoadStore()146 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
147   if (ClUncheckedLdSt == UncheckedNever)
148     return false;
149   else if (ClUncheckedLdSt == UncheckedAlways)
150     return true;
151 
152   // This estimate can be improved if we had harder guarantees about stack frame
153   // layout. With LocalStackAllocation we can estimate SP offset to any
154   // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
155   // objects ahead of non-tagged ones, but that's not always desirable.
156   //
157   // Underestimating SP offset here may require the use of LDG to materialize
158   // the tagged address of the stack slot, along with a scratch register
159   // allocation (post-regalloc!).
160   //
161   // For now we do the safe thing here and require that the entire stack frame
162   // is within range of the shortest of the unchecked instructions.
163   unsigned FrameSize = 0;
164   for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
165     FrameSize += MFI->getObjectSize(i);
166   bool EntireFrameReachableFromSP = FrameSize < 0xf00;
167   return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
168 }
169 
uncheckUsesOf(unsigned TaggedReg,int FI)170 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
171   for (MachineInstr &UseI :
172        llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
173     if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
174       // FI operand is always the one before the immediate offset.
175       unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
176       if (UseI.getOperand(OpIdx).isReg() &&
177           UseI.getOperand(OpIdx).getReg() == TaggedReg) {
178         UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
179         UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
180       }
181     } else if (UseI.isCopy() && UseI.getOperand(0).getReg().isVirtual()) {
182       uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
183     }
184   }
185 }
186 
uncheckLoadsAndStores()187 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
188   for (auto *I : ReTags) {
189     Register TaggedReg = I->getOperand(0).getReg();
190     int FI = I->getOperand(1).getIndex();
191     uncheckUsesOf(TaggedReg, FI);
192   }
193 }
194 
195 namespace {
196 struct SlotWithTag {
197   int FI;
198   int Tag;
SlotWithTag__anon88be9b980211::SlotWithTag199   SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
SlotWithTag__anon88be9b980211::SlotWithTag200   explicit SlotWithTag(const MachineInstr &MI)
201       : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
operator ==__anon88be9b980211::SlotWithTag202   bool operator==(const SlotWithTag &Other) const {
203     return FI == Other.FI && Tag == Other.Tag;
204   }
205 };
206 } // namespace
207 
208 namespace llvm {
209 template <> struct DenseMapInfo<SlotWithTag> {
getEmptyKeyllvm::DenseMapInfo210   static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
getTombstoneKeyllvm::DenseMapInfo211   static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
getHashValuellvm::DenseMapInfo212   static unsigned getHashValue(const SlotWithTag &V) {
213     return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
214                         DenseMapInfo<int>::getHashValue(V.Tag));
215   }
isEqualllvm::DenseMapInfo216   static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
217     return A == B;
218   }
219 };
220 } // namespace llvm
221 
isSlotPreAllocated(MachineFrameInfo * MFI,int FI)222 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
223   return MFI->getUseLocalStackAllocationBlock() &&
224          MFI->isObjectPreAllocated(FI);
225 }
226 
227 // Pin one of the tagged slots to offset 0 from the tagged base pointer.
228 // This would make its address available in a virtual register (IRG's def), as
229 // opposed to requiring an ADDG instruction to materialize. This effectively
230 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually
231 // live almost everywhere anyway), and therefore needs to happen before
232 // regalloc.
findFirstSlotCandidate()233 std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
234   // Find the best (FI, Tag) pair to pin to offset 0.
235   // Looking at the possible uses of a tagged address, the advantage of pinning
236   // is:
237   // - COPY to physical register.
238   //   Does not matter, this would trade a MOV instruction for an ADDG.
239   // - ST*G matter, but those mostly appear near the function prologue where all
240   //   the tagged addresses need to be materialized anyway; also, counting ST*G
241   //   uses would overweight large allocas that require more than one ST*G
242   //   instruction.
243   // - Load/Store instructions in the address operand do not require a tagged
244   //   pointer, so they also do not benefit. These operands have already been
245   //   eliminated (see uncheckLoadsAndStores) so all remaining load/store
246   //   instructions count.
247   // - Any other instruction may benefit from being pinned to offset 0.
248   LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
249   if (!ClFirstSlot)
250     return std::nullopt;
251 
252   DenseMap<SlotWithTag, int> RetagScore;
253   SlotWithTag MaxScoreST{-1, -1};
254   int MaxScore = -1;
255   for (auto *I : ReTags) {
256     SlotWithTag ST{*I};
257     if (isSlotPreAllocated(MFI, ST.FI))
258       continue;
259 
260     Register RetagReg = I->getOperand(0).getReg();
261     if (!RetagReg.isVirtual())
262       continue;
263 
264     int Score = 0;
265     SmallVector<Register, 8> WorkList;
266     WorkList.push_back(RetagReg);
267 
268     while (!WorkList.empty()) {
269       Register UseReg = WorkList.pop_back_val();
270       for (auto &UseI : MRI->use_instructions(UseReg)) {
271         unsigned Opcode = UseI.getOpcode();
272         if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi ||
273             Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi ||
274             Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
275             Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
276             Opcode == AArch64::STZGloop_wback)
277           continue;
278         if (UseI.isCopy()) {
279           Register DstReg = UseI.getOperand(0).getReg();
280           if (DstReg.isVirtual())
281             WorkList.push_back(DstReg);
282           continue;
283         }
284         LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of "
285                           << printReg(UseReg) << " in " << UseI << "\n");
286         Score++;
287       }
288     }
289 
290     int TotalScore = RetagScore[ST] += Score;
291     if (TotalScore > MaxScore ||
292         (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
293       MaxScore = TotalScore;
294       MaxScoreST = ST;
295     }
296   }
297 
298   if (MaxScoreST.FI < 0)
299     return std::nullopt;
300 
301   // If FI's tag is already 0, we are done.
302   if (MaxScoreST.Tag == 0)
303     return MaxScoreST.FI;
304 
305   // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
306   SlotWithTag SwapST{-1, -1};
307   for (auto *I : ReTags) {
308     SlotWithTag ST{*I};
309     if (ST.Tag == 0) {
310       SwapST = ST;
311       break;
312     }
313   }
314 
315   // Swap tags between the victim and the highest scoring pair.
316   // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
317   // the highest score slot without changing anything else.
318   for (auto *&I : ReTags) {
319     SlotWithTag ST{*I};
320     MachineOperand &TagOp = I->getOperand(4);
321     if (ST == MaxScoreST) {
322       TagOp.setImm(0);
323     } else if (ST == SwapST) {
324       TagOp.setImm(MaxScoreST.Tag);
325     }
326   }
327   return MaxScoreST.FI;
328 }
329 
runOnMachineFunction(MachineFunction & Func)330 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
331   MF = &Func;
332   MRI = &MF->getRegInfo();
333   AFI = MF->getInfo<AArch64FunctionInfo>();
334   TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
335   TRI = static_cast<const AArch64RegisterInfo *>(
336       MF->getSubtarget().getRegisterInfo());
337   MFI = &MF->getFrameInfo();
338   ReTags.clear();
339 
340   assert(MRI->isSSA());
341 
342   LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
343                     << "********** Function: " << MF->getName() << '\n');
344 
345   SmallSetVector<int, 8> TaggedSlots;
346   for (auto &BB : *MF) {
347     for (auto &I : BB) {
348       if (I.getOpcode() == AArch64::TAGPstack) {
349         ReTags.push_back(&I);
350         int FI = I.getOperand(1).getIndex();
351         TaggedSlots.insert(FI);
352         // There should be no offsets in TAGP yet.
353         assert(I.getOperand(2).getImm() == 0);
354       }
355     }
356   }
357 
358   // Take over from SSP. It does nothing for tagged slots, and should not really
359   // have been enabled in the first place.
360   for (int FI : TaggedSlots)
361     MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
362 
363   if (ReTags.empty())
364     return false;
365 
366   if (mayUseUncheckedLoadStore())
367     uncheckLoadsAndStores();
368 
369   // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
370   // If the base tagged pointer is set up to the address of this slot,
371   // the ADDG instruction can be eliminated.
372   std::optional<int> BaseSlot = findFirstSlotCandidate();
373   if (BaseSlot)
374     AFI->setTaggedBasePointerIndex(*BaseSlot);
375 
376   for (auto *I : ReTags) {
377     int FI = I->getOperand(1).getIndex();
378     int Tag = I->getOperand(4).getImm();
379     Register Base = I->getOperand(3).getReg();
380     if (Tag == 0 && FI == BaseSlot) {
381       BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
382               I->getOperand(0).getReg())
383           .addReg(Base);
384       I->eraseFromParent();
385     }
386   }
387 
388   return true;
389 }
390