xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp (revision 5b56413d04e608379c9a306373554a8e4d321bc0)
1 //===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation  --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass to insert code to mitigate against side channel
10 // vulnerabilities that may happen under control flow miss-speculation.
11 //
12 // The pass implements tracking of control flow miss-speculation into a "taint"
13 // register. That taint register can then be used to mask off registers with
14 // sensitive data when executing under miss-speculation, a.k.a. "transient
15 // execution".
16 // This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
17 //
18 // It also implements speculative load hardening, i.e. using the taint register
19 // to automatically mask off loaded data.
20 //
21 // As a possible follow-on improvement, also an intrinsics-based approach as
22 // explained at https://lwn.net/Articles/759423/ could be implemented on top of
23 // the current design.
24 //
25 // For AArch64, the following implementation choices are made to implement the
26 // tracking of control flow miss-speculation into a taint register:
27 // Some of these are different than the implementation choices made in
28 // the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
29 // the instruction set characteristics result in different trade-offs.
30 // - The speculation hardening is done after register allocation. With a
31 //   relative abundance of registers, one register is reserved (X16) to be
32 //   the taint register. X16 is expected to not clash with other register
33 //   reservation mechanisms with very high probability because:
34 //   . The AArch64 ABI doesn't guarantee X16 to be retained across any call.
35 //   . The only way to request X16 to be used as a programmer is through
36 //     inline assembly. In the rare case a function explicitly demands to
37 //     use X16/W16, this pass falls back to hardening against speculation
38 //     by inserting a DSB SYS/ISB barrier pair which will prevent control
39 //     flow speculation.
40 // - It is easy to insert mask operations at this late stage as we have
41 //   mask operations available that don't set flags.
42 // - The taint variable contains all-ones when no miss-speculation is detected,
43 //   and contains all-zeros when miss-speculation is detected. Therefore, when
44 //   masking, an AND instruction (which only changes the register to be masked,
45 //   no other side effects) can easily be inserted anywhere that's needed.
46 // - The tracking of miss-speculation is done by using a data-flow conditional
47 //   select instruction (CSEL) to evaluate the flags that were also used to
48 //   make conditional branch direction decisions. Speculation of the CSEL
49 //   instruction can be limited with a CSDB instruction - so the combination of
50 //   CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
51 //   aren't speculated. When conditional branch direction gets miss-speculated,
52 //   the semantics of the inserted CSEL instruction is such that the taint
53 //   register will contain all zero bits.
54 //   One key requirement for this to work is that the conditional branch is
55 //   followed by an execution of the CSEL instruction, where the CSEL
56 //   instruction needs to use the same flags status as the conditional branch.
57 //   This means that the conditional branches must not be implemented as one
58 //   of the AArch64 conditional branches that do not use the flags as input
59 //   (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
60 //   selectors to not produce these instructions when speculation hardening
61 //   is enabled. This pass will assert if it does encounter such an instruction.
62 // - On function call boundaries, the miss-speculation state is transferred from
63 //   the taint register X16 to be encoded in the SP register as value 0.
64 //
65 // For the aspect of automatically hardening loads, using the taint register,
66 // (a.k.a. speculative load hardening, see
67 //  https://llvm.org/docs/SpeculativeLoadHardening.html), the following
68 // implementation choices are made for AArch64:
69 //   - Many of the optimizations described at
70 //     https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer
71 //     loads haven't been implemented yet - but for some of them there are
72 //     FIXMEs in the code.
73 //   - loads that load into general purpose (X or W) registers get hardened by
74 //     masking the loaded data. For loads that load into other registers, the
75 //     address loaded from gets hardened. It is expected that hardening the
76 //     loaded data may be more efficient; but masking data in registers other
77 //     than X or W is not easy and may result in being slower than just
78 //     hardening the X address register loaded from.
79 //   - On AArch64, CSDB instructions are inserted between the masking of the
80 //     register and its first use, to ensure there's no non-control-flow
81 //     speculation that might undermine the hardening mechanism.
82 //
83 // Future extensions/improvements could be:
84 // - Implement this functionality using full speculation barriers, akin to the
85 //   x86-slh-lfence option. This may be more useful for the intrinsics-based
86 //   approach than for the SLH approach to masking.
87 //   Note that this pass already inserts the full speculation barriers if the
88 //   function for some niche reason makes use of X16/W16.
89 // - no indirect branch misprediction gets protected/instrumented; but this
90 //   could be done for some indirect branches, such as switch jump tables.
91 //===----------------------------------------------------------------------===//
92 
93 #include "AArch64InstrInfo.h"
94 #include "AArch64Subtarget.h"
95 #include "Utils/AArch64BaseInfo.h"
96 #include "llvm/ADT/BitVector.h"
97 #include "llvm/ADT/SmallVector.h"
98 #include "llvm/CodeGen/MachineBasicBlock.h"
99 #include "llvm/CodeGen/MachineFunction.h"
100 #include "llvm/CodeGen/MachineFunctionPass.h"
101 #include "llvm/CodeGen/MachineInstr.h"
102 #include "llvm/CodeGen/MachineInstrBuilder.h"
103 #include "llvm/CodeGen/MachineOperand.h"
104 #include "llvm/CodeGen/MachineRegisterInfo.h"
105 #include "llvm/CodeGen/RegisterScavenging.h"
106 #include "llvm/IR/DebugLoc.h"
107 #include "llvm/Pass.h"
108 #include "llvm/Support/CodeGen.h"
109 #include "llvm/Support/Debug.h"
110 #include "llvm/Target/TargetMachine.h"
111 #include <cassert>
112 
113 using namespace llvm;
114 
115 #define DEBUG_TYPE "aarch64-speculation-hardening"
116 
117 #define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass"
118 
119 static cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden,
120                                  cl::desc("Sanitize loads from memory."),
121                                  cl::init(true));
122 
123 namespace {
124 
125 class AArch64SpeculationHardening : public MachineFunctionPass {
126 public:
127   const TargetInstrInfo *TII;
128   const TargetRegisterInfo *TRI;
129 
130   static char ID;
131 
132   AArch64SpeculationHardening() : MachineFunctionPass(ID) {
133     initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry());
134   }
135 
136   bool runOnMachineFunction(MachineFunction &Fn) override;
137 
138   StringRef getPassName() const override {
139     return AARCH64_SPECULATION_HARDENING_NAME;
140   }
141 
142 private:
143   unsigned MisspeculatingTaintReg;
144   unsigned MisspeculatingTaintReg32Bit;
145   bool UseControlFlowSpeculationBarrier;
146   BitVector RegsNeedingCSDBBeforeUse;
147   BitVector RegsAlreadyMasked;
148 
149   bool functionUsesHardeningRegister(MachineFunction &MF) const;
150   bool instrumentControlFlow(MachineBasicBlock &MBB,
151                              bool &UsesFullSpeculationBarrier);
152   bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
153                                MachineBasicBlock *&FBB,
154                                AArch64CC::CondCode &CondCode) const;
155   void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
156                           AArch64CC::CondCode &CondCode, DebugLoc DL) const;
157   void insertSPToRegTaintPropagation(MachineBasicBlock &MBB,
158                                      MachineBasicBlock::iterator MBBI) const;
159   void insertRegToSPTaintPropagation(MachineBasicBlock &MBB,
160                                      MachineBasicBlock::iterator MBBI,
161                                      unsigned TmpReg) const;
162   void insertFullSpeculationBarrier(MachineBasicBlock &MBB,
163                                     MachineBasicBlock::iterator MBBI,
164                                     DebugLoc DL) const;
165 
166   bool slhLoads(MachineBasicBlock &MBB);
167   bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
168                               MachineBasicBlock::iterator MBBI,
169                               MachineInstr &MI, unsigned Reg);
170   bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB,
171                                         bool UsesFullSpeculationBarrier);
172   bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
173                                   MachineBasicBlock::iterator MBBI,
174                                   bool UsesFullSpeculationBarrier);
175   bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
176                   DebugLoc DL);
177 };
178 
179 } // end anonymous namespace
180 
181 char AArch64SpeculationHardening::ID = 0;
182 
183 INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening",
184                 AARCH64_SPECULATION_HARDENING_NAME, false, false)
185 
186 bool AArch64SpeculationHardening::endsWithCondControlFlow(
187     MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
188     AArch64CC::CondCode &CondCode) const {
189   SmallVector<MachineOperand, 1> analyzeBranchCondCode;
190   if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false))
191     return false;
192 
193   // Ignore if the BB ends in an unconditional branch/fall-through.
194   if (analyzeBranchCondCode.empty())
195     return false;
196 
197   // If the BB ends with a single conditional branch, FBB will be set to
198   // nullptr (see API docs for TII->analyzeBranch). For the rest of the
199   // analysis we want the FBB block to be set always.
200   assert(TBB != nullptr);
201   if (FBB == nullptr)
202     FBB = MBB.getFallThrough();
203 
204   // If both the true and the false condition jump to the same basic block,
205   // there isn't need for any protection - whether the branch is speculated
206   // correctly or not, we end up executing the architecturally correct code.
207   if (TBB == FBB)
208     return false;
209 
210   assert(MBB.succ_size() == 2);
211   // translate analyzeBranchCondCode to CondCode.
212   assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format");
213   CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm());
214   return true;
215 }
216 
217 void AArch64SpeculationHardening::insertFullSpeculationBarrier(
218     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
219     DebugLoc DL) const {
220   // A full control flow speculation barrier consists of (DSB SYS + ISB)
221   BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf);
222   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf);
223 }
224 
225 void AArch64SpeculationHardening::insertTrackingCode(
226     MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
227     DebugLoc DL) const {
228   if (UseControlFlowSpeculationBarrier) {
229     insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL);
230   } else {
231     BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
232         .addDef(MisspeculatingTaintReg)
233         .addUse(MisspeculatingTaintReg)
234         .addUse(AArch64::XZR)
235         .addImm(CondCode);
236     SplitEdgeBB.addLiveIn(AArch64::NZCV);
237   }
238 }
239 
240 bool AArch64SpeculationHardening::instrumentControlFlow(
241     MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) {
242   LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
243 
244   bool Modified = false;
245   MachineBasicBlock *TBB = nullptr;
246   MachineBasicBlock *FBB = nullptr;
247   AArch64CC::CondCode CondCode;
248 
249   if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) {
250     LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n");
251   } else {
252     // Now insert:
253     // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and
254     // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False
255     // edge.
256     AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode);
257 
258     MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this);
259     MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this);
260 
261     assert(SplitEdgeTBB != nullptr);
262     assert(SplitEdgeFBB != nullptr);
263 
264     DebugLoc DL;
265     if (MBB.instr_end() != MBB.instr_begin())
266       DL = (--MBB.instr_end())->getDebugLoc();
267 
268     insertTrackingCode(*SplitEdgeTBB, CondCode, DL);
269     insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL);
270 
271     LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n");
272     LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n");
273     Modified = true;
274   }
275 
276   // Perform correct code generation around function calls and before returns.
277   // The below variables record the return/terminator instructions and the call
278   // instructions respectively; including which register is available as a
279   // temporary register just before the recorded instructions.
280   SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions;
281   SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions;
282   // if a temporary register is not available for at least one of the
283   // instructions for which we need to transfer taint to the stack pointer, we
284   // need to insert a full speculation barrier.
285   // TmpRegisterNotAvailableEverywhere tracks that condition.
286   bool TmpRegisterNotAvailableEverywhere = false;
287 
288   RegScavenger RS;
289   RS.enterBasicBlockEnd(MBB);
290 
291   for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) {
292     MachineInstr &MI = *--I;
293     if (!MI.isReturn() && !MI.isCall())
294       continue;
295 
296     // The RegScavenger represents registers available *after* the MI
297     // instruction pointed to by RS.getCurrentPosition().
298     // We need to have a register that is available *before* the MI is executed.
299     if (I == MBB.begin())
300       RS.enterBasicBlock(MBB);
301     else
302       RS.backward(I);
303     // FIXME: The below just finds *a* unused register. Maybe code could be
304     // optimized more if this looks for the register that isn't used for the
305     // longest time around this place, to enable more scheduling freedom. Not
306     // sure if that would actually result in a big performance difference
307     // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
308     // already to do this - but it's unclear if that could easily be used here.
309     Register TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
310     LLVM_DEBUG(dbgs() << "RS finds "
311                       << ((TmpReg == 0) ? "no register " : "register ");
312                if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
313                dbgs() << "to be available at MI " << MI);
314     if (TmpReg == 0)
315       TmpRegisterNotAvailableEverywhere = true;
316     if (MI.isReturn())
317       ReturnInstructions.push_back({&MI, TmpReg});
318     else if (MI.isCall())
319       CallInstructions.push_back({&MI, TmpReg});
320   }
321 
322   if (TmpRegisterNotAvailableEverywhere) {
323     // When a temporary register is not available everywhere in this basic
324     // basic block where a propagate-taint-to-sp operation is needed, just
325     // emit a full speculation barrier at the start of this basic block, which
326     // renders the taint/speculation tracking in this basic block unnecessary.
327     insertFullSpeculationBarrier(MBB, MBB.begin(),
328                                  (MBB.begin())->getDebugLoc());
329     UsesFullSpeculationBarrier = true;
330     Modified = true;
331   } else {
332     for (auto MI_Reg : ReturnInstructions) {
333       assert(MI_Reg.second != 0);
334       LLVM_DEBUG(
335           dbgs()
336           << " About to insert Reg to SP taint propagation with temp register "
337           << printReg(MI_Reg.second, TRI)
338           << " on instruction: " << *MI_Reg.first);
339       insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
340       Modified = true;
341     }
342 
343     for (auto MI_Reg : CallInstructions) {
344       assert(MI_Reg.second != 0);
345       LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint "
346                            "propagation with temp register "
347                         << printReg(MI_Reg.second, TRI)
348                         << " around instruction: " << *MI_Reg.first);
349       // Just after the call:
350       insertSPToRegTaintPropagation(
351           MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first));
352       // Just before the call:
353       insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
354       Modified = true;
355     }
356   }
357   return Modified;
358 }
359 
360 void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
361     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
362   // If full control flow speculation barriers are used, emit a control flow
363   // barrier to block potential miss-speculation in flight coming in to this
364   // function.
365   if (UseControlFlowSpeculationBarrier) {
366     insertFullSpeculationBarrier(MBB, MBBI, DebugLoc());
367     return;
368   }
369 
370   // CMP   SP, #0   === SUBS   xzr, SP, #0
371   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
372       .addDef(AArch64::XZR)
373       .addUse(AArch64::SP)
374       .addImm(0)
375       .addImm(0); // no shift
376   // CSETM x16, NE  === CSINV  x16, xzr, xzr, EQ
377   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
378       .addDef(MisspeculatingTaintReg)
379       .addUse(AArch64::XZR)
380       .addUse(AArch64::XZR)
381       .addImm(AArch64CC::EQ);
382 }
383 
384 void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
385     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
386     unsigned TmpReg) const {
387   // If full control flow speculation barriers are used, there will not be
388   // miss-speculation when returning from this function, and therefore, also
389   // no need to encode potential miss-speculation into the stack pointer.
390   if (UseControlFlowSpeculationBarrier)
391     return;
392 
393   // mov   Xtmp, SP  === ADD  Xtmp, SP, #0
394   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
395       .addDef(TmpReg)
396       .addUse(AArch64::SP)
397       .addImm(0)
398       .addImm(0); // no shift
399   // and   Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
400   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
401       .addDef(TmpReg, RegState::Renamable)
402       .addUse(TmpReg, RegState::Kill | RegState::Renamable)
403       .addUse(MisspeculatingTaintReg, RegState::Kill)
404       .addImm(0);
405   // mov   SP, Xtmp === ADD SP, Xtmp, #0
406   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
407       .addDef(AArch64::SP)
408       .addUse(TmpReg, RegState::Kill)
409       .addImm(0)
410       .addImm(0); // no shift
411 }
412 
413 bool AArch64SpeculationHardening::functionUsesHardeningRegister(
414     MachineFunction &MF) const {
415   for (MachineBasicBlock &MBB : MF) {
416     for (MachineInstr &MI : MBB) {
417       // treat function calls specially, as the hardening register does not
418       // need to remain live across function calls.
419       if (MI.isCall())
420         continue;
421       if (MI.readsRegister(MisspeculatingTaintReg, TRI) ||
422           MI.modifiesRegister(MisspeculatingTaintReg, TRI))
423         return true;
424     }
425   }
426   return false;
427 }
428 
429 // Make GPR register Reg speculation-safe by putting it through the
430 // SpeculationSafeValue pseudo instruction, if we can't prove that
431 // the value in the register has already been hardened.
432 bool AArch64SpeculationHardening::makeGPRSpeculationSafe(
433     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI,
434     unsigned Reg) {
435   assert(AArch64::GPR32allRegClass.contains(Reg) ||
436          AArch64::GPR64allRegClass.contains(Reg));
437 
438   // Loads cannot directly load a value into the SP (nor WSP).
439   // Therefore, if Reg is SP or WSP, it is because the instruction loads from
440   // the stack through the stack pointer.
441   //
442   // Since the stack pointer is never dynamically controllable, don't harden it.
443   if (Reg == AArch64::SP || Reg == AArch64::WSP)
444     return false;
445 
446   // Do not harden the register again if already hardened before.
447   if (RegsAlreadyMasked[Reg])
448     return false;
449 
450   const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg);
451   LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n");
452   BuildMI(MBB, MBBI, MI.getDebugLoc(),
453           TII->get(Is64Bit ? AArch64::SpeculationSafeValueX
454                            : AArch64::SpeculationSafeValueW))
455       .addDef(Reg)
456       .addUse(Reg);
457   RegsAlreadyMasked.set(Reg);
458   return true;
459 }
460 
461 bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
462   bool Modified = false;
463 
464   LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB);
465 
466   RegsAlreadyMasked.reset();
467 
468   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
469   MachineBasicBlock::iterator NextMBBI;
470   for (; MBBI != E; MBBI = NextMBBI) {
471     MachineInstr &MI = *MBBI;
472     NextMBBI = std::next(MBBI);
473     // Only harden loaded values or addresses used in loads.
474     if (!MI.mayLoad())
475       continue;
476 
477     LLVM_DEBUG(dbgs() << "About to harden: " << MI);
478 
479     // For general purpose register loads, harden the registers loaded into.
480     // For other loads, harden the address loaded from.
481     // Masking the loaded value is expected to result in less performance
482     // overhead, as the load can still execute speculatively in comparison to
483     // when the address loaded from gets masked. However, masking is only
484     // easy to do efficiently on GPR registers, so for loads into non-GPR
485     // registers (e.g. floating point loads), mask the address loaded from.
486     bool AllDefsAreGPR = llvm::all_of(MI.defs(), [&](MachineOperand &Op) {
487       return Op.isReg() && (AArch64::GPR32allRegClass.contains(Op.getReg()) ||
488                             AArch64::GPR64allRegClass.contains(Op.getReg()));
489     });
490     // FIXME: it might be a worthwhile optimization to not mask loaded
491     // values if all the registers involved in address calculation are already
492     // hardened, leading to this load not able to execute on a miss-speculated
493     // path.
494     bool HardenLoadedData = AllDefsAreGPR;
495     bool HardenAddressLoadedFrom = !HardenLoadedData;
496 
497     // First remove registers from AlreadyMaskedRegisters if their value is
498     // updated by this instruction - it makes them contain a new value that is
499     // not guaranteed to already have been masked.
500     for (MachineOperand Op : MI.defs())
501       for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
502         RegsAlreadyMasked.reset(*AI);
503 
504     // FIXME: loads from the stack with an immediate offset from the stack
505     // pointer probably shouldn't be hardened, which could result in a
506     // significant optimization. See section "Don’t check loads from
507     // compile-time constant stack offsets", in
508     // https://llvm.org/docs/SpeculativeLoadHardening.html
509 
510     if (HardenLoadedData)
511       for (auto Def : MI.defs()) {
512         if (Def.isDead())
513           // Do not mask a register that is not used further.
514           continue;
515         // FIXME: For pre/post-increment addressing modes, the base register
516         // used in address calculation is also defined by this instruction.
517         // It might be a worthwhile optimization to not harden that
518         // base register increment/decrement when the increment/decrement is
519         // an immediate.
520         Modified |= makeGPRSpeculationSafe(MBB, NextMBBI, MI, Def.getReg());
521       }
522 
523     if (HardenAddressLoadedFrom)
524       for (auto Use : MI.uses()) {
525         if (!Use.isReg())
526           continue;
527         Register Reg = Use.getReg();
528         // Some loads of floating point data have implicit defs/uses on a
529         // super register of that floating point data. Some examples:
530         // $s0 = LDRSui $sp, 22, implicit-def $q0
531         // $q0 = LD1i64 $q0, 1, renamable $x0
532         // We need to filter out these uses for non-GPR register which occur
533         // because the load partially fills a non-GPR register with the loaded
534         // data. Just skipping all non-GPR registers is safe (for now) as all
535         // AArch64 load instructions only use GPR registers to perform the
536         // address calculation. FIXME: However that might change once we can
537         // produce SVE gather instructions.
538         if (!(AArch64::GPR32allRegClass.contains(Reg) ||
539               AArch64::GPR64allRegClass.contains(Reg)))
540           continue;
541         Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg);
542       }
543   }
544   return Modified;
545 }
546 
547 /// \brief If MBBI references a pseudo instruction that should be expanded
548 /// here, do the expansion and return true. Otherwise return false.
549 bool AArch64SpeculationHardening::expandSpeculationSafeValue(
550     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
551     bool UsesFullSpeculationBarrier) {
552   MachineInstr &MI = *MBBI;
553   unsigned Opcode = MI.getOpcode();
554   bool Is64Bit = true;
555 
556   switch (Opcode) {
557   default:
558     break;
559   case AArch64::SpeculationSafeValueW:
560     Is64Bit = false;
561     [[fallthrough]];
562   case AArch64::SpeculationSafeValueX:
563     // Just remove the SpeculationSafe pseudo's if control flow
564     // miss-speculation isn't happening because we're already inserting barriers
565     // to guarantee that.
566     if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
567       Register DstReg = MI.getOperand(0).getReg();
568       Register SrcReg = MI.getOperand(1).getReg();
569       // Mark this register and all its aliasing registers as needing to be
570       // value speculation hardened before its next use, by using a CSDB
571       // barrier instruction.
572       for (MachineOperand Op : MI.defs())
573         for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
574           RegsNeedingCSDBBeforeUse.set(*AI);
575 
576       // Mask off with taint state.
577       BuildMI(MBB, MBBI, MI.getDebugLoc(),
578               Is64Bit ? TII->get(AArch64::ANDXrs) : TII->get(AArch64::ANDWrs))
579           .addDef(DstReg)
580           .addUse(SrcReg, RegState::Kill)
581           .addUse(Is64Bit ? MisspeculatingTaintReg
582                           : MisspeculatingTaintReg32Bit)
583           .addImm(0);
584     }
585     MI.eraseFromParent();
586     return true;
587   }
588   return false;
589 }
590 
591 bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
592                                              MachineBasicBlock::iterator MBBI,
593                                              DebugLoc DL) {
594   assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when "
595                                               "control flow miss-speculation "
596                                               "is already blocked");
597   // insert data value speculation barrier (CSDB)
598   BuildMI(MBB, MBBI, DL, TII->get(AArch64::HINT)).addImm(0x14);
599   RegsNeedingCSDBBeforeUse.reset();
600   return true;
601 }
602 
603 bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
604     MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) {
605   bool Modified = false;
606 
607   RegsNeedingCSDBBeforeUse.reset();
608 
609   // The following loop iterates over all instructions in the basic block,
610   // and performs 2 operations:
611   // 1. Insert a CSDB at this location if needed.
612   // 2. Expand the SpeculationSafeValuePseudo if the current instruction is
613   // one.
614   //
615   // The insertion of the CSDB is done as late as possible (i.e. just before
616   // the use of a masked register), in the hope that that will reduce the
617   // total number of CSDBs in a block when there are multiple masked registers
618   // in the block.
619   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
620   DebugLoc DL;
621   while (MBBI != E) {
622     MachineInstr &MI = *MBBI;
623     DL = MI.getDebugLoc();
624     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
625 
626     // First check if a CSDB needs to be inserted due to earlier registers
627     // that were masked and that are used by the next instruction.
628     // Also emit the barrier on any potential control flow changes.
629     bool NeedToEmitBarrier = false;
630     if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator()))
631       NeedToEmitBarrier = true;
632     if (!NeedToEmitBarrier)
633       for (MachineOperand Op : MI.uses())
634         if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) {
635           NeedToEmitBarrier = true;
636           break;
637         }
638 
639     if (NeedToEmitBarrier && !UsesFullSpeculationBarrier)
640       Modified |= insertCSDB(MBB, MBBI, DL);
641 
642     Modified |=
643         expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier);
644 
645     MBBI = NMBBI;
646   }
647 
648   if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier)
649     Modified |= insertCSDB(MBB, MBBI, DL);
650 
651   return Modified;
652 }
653 
654 bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
655   if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
656     return false;
657 
658   MisspeculatingTaintReg = AArch64::X16;
659   MisspeculatingTaintReg32Bit = AArch64::W16;
660   TII = MF.getSubtarget().getInstrInfo();
661   TRI = MF.getSubtarget().getRegisterInfo();
662   RegsNeedingCSDBBeforeUse.resize(TRI->getNumRegs());
663   RegsAlreadyMasked.resize(TRI->getNumRegs());
664   UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF);
665 
666   bool Modified = false;
667 
668   // Step 1: Enable automatic insertion of SpeculationSafeValue.
669   if (HardenLoads) {
670     LLVM_DEBUG(
671         dbgs() << "***** AArch64SpeculationHardening - automatic insertion of "
672                   "SpeculationSafeValue intrinsics *****\n");
673     for (auto &MBB : MF)
674       Modified |= slhLoads(MBB);
675   }
676 
677   // 2. Add instrumentation code to function entry and exits.
678   LLVM_DEBUG(
679       dbgs()
680       << "***** AArch64SpeculationHardening - track control flow *****\n");
681 
682   SmallVector<MachineBasicBlock *, 2> EntryBlocks;
683   EntryBlocks.push_back(&MF.front());
684   for (const LandingPadInfo &LPI : MF.getLandingPads())
685     EntryBlocks.push_back(LPI.LandingPadBlock);
686   for (auto *Entry : EntryBlocks)
687     insertSPToRegTaintPropagation(
688         *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
689 
690   // 3. Add instrumentation code to every basic block.
691   for (auto &MBB : MF) {
692     bool UsesFullSpeculationBarrier = false;
693     Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier);
694     Modified |=
695         lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier);
696   }
697 
698   return Modified;
699 }
700 
701 /// \brief Returns an instance of the pseudo instruction expansion pass.
702 FunctionPass *llvm::createAArch64SpeculationHardeningPass() {
703   return new AArch64SpeculationHardening();
704 }
705