1 //===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass to insert code to mitigate against side channel 10 // vulnerabilities that may happen under control flow miss-speculation. 11 // 12 // The pass implements tracking of control flow miss-speculation into a "taint" 13 // register. That taint register can then be used to mask off registers with 14 // sensitive data when executing under miss-speculation, a.k.a. "transient 15 // execution". 16 // This pass is aimed at mitigating against SpectreV1-style vulnarabilities. 17 // 18 // It also implements speculative load hardening, i.e. using the taint register 19 // to automatically mask off loaded data. 20 // 21 // As a possible follow-on improvement, also an intrinsics-based approach as 22 // explained at https://lwn.net/Articles/759423/ could be implemented on top of 23 // the current design. 24 // 25 // For AArch64, the following implementation choices are made to implement the 26 // tracking of control flow miss-speculation into a taint register: 27 // Some of these are different than the implementation choices made in 28 // the similar pass implemented in X86SpeculativeLoadHardening.cpp, as 29 // the instruction set characteristics result in different trade-offs. 30 // - The speculation hardening is done after register allocation. With a 31 // relative abundance of registers, one register is reserved (X16) to be 32 // the taint register. X16 is expected to not clash with other register 33 // reservation mechanisms with very high probability because: 34 // . The AArch64 ABI doesn't guarantee X16 to be retained across any call. 35 // . The only way to request X16 to be used as a programmer is through 36 // inline assembly. In the rare case a function explicitly demands to 37 // use X16/W16, this pass falls back to hardening against speculation 38 // by inserting a DSB SYS/ISB barrier pair which will prevent control 39 // flow speculation. 40 // - It is easy to insert mask operations at this late stage as we have 41 // mask operations available that don't set flags. 42 // - The taint variable contains all-ones when no miss-speculation is detected, 43 // and contains all-zeros when miss-speculation is detected. Therefore, when 44 // masking, an AND instruction (which only changes the register to be masked, 45 // no other side effects) can easily be inserted anywhere that's needed. 46 // - The tracking of miss-speculation is done by using a data-flow conditional 47 // select instruction (CSEL) to evaluate the flags that were also used to 48 // make conditional branch direction decisions. Speculation of the CSEL 49 // instruction can be limited with a CSDB instruction - so the combination of 50 // CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL 51 // aren't speculated. When conditional branch direction gets miss-speculated, 52 // the semantics of the inserted CSEL instruction is such that the taint 53 // register will contain all zero bits. 54 // One key requirement for this to work is that the conditional branch is 55 // followed by an execution of the CSEL instruction, where the CSEL 56 // instruction needs to use the same flags status as the conditional branch. 57 // This means that the conditional branches must not be implemented as one 58 // of the AArch64 conditional branches that do not use the flags as input 59 // (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction 60 // selectors to not produce these instructions when speculation hardening 61 // is enabled. This pass will assert if it does encounter such an instruction. 62 // - On function call boundaries, the miss-speculation state is transferred from 63 // the taint register X16 to be encoded in the SP register as value 0. 64 // 65 // For the aspect of automatically hardening loads, using the taint register, 66 // (a.k.a. speculative load hardening, see 67 // https://llvm.org/docs/SpeculativeLoadHardening.html), the following 68 // implementation choices are made for AArch64: 69 // - Many of the optimizations described at 70 // https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer 71 // loads haven't been implemented yet - but for some of them there are 72 // FIXMEs in the code. 73 // - loads that load into general purpose (X or W) registers get hardened by 74 // masking the loaded data. For loads that load into other registers, the 75 // address loaded from gets hardened. It is expected that hardening the 76 // loaded data may be more efficient; but masking data in registers other 77 // than X or W is not easy and may result in being slower than just 78 // hardening the X address register loaded from. 79 // - On AArch64, CSDB instructions are inserted between the masking of the 80 // register and its first use, to ensure there's no non-control-flow 81 // speculation that might undermine the hardening mechanism. 82 // 83 // Future extensions/improvements could be: 84 // - Implement this functionality using full speculation barriers, akin to the 85 // x86-slh-lfence option. This may be more useful for the intrinsics-based 86 // approach than for the SLH approach to masking. 87 // Note that this pass already inserts the full speculation barriers if the 88 // function for some niche reason makes use of X16/W16. 89 // - no indirect branch misprediction gets protected/instrumented; but this 90 // could be done for some indirect branches, such as switch jump tables. 91 //===----------------------------------------------------------------------===// 92 93 #include "AArch64InstrInfo.h" 94 #include "AArch64Subtarget.h" 95 #include "Utils/AArch64BaseInfo.h" 96 #include "llvm/ADT/BitVector.h" 97 #include "llvm/ADT/SmallVector.h" 98 #include "llvm/CodeGen/MachineBasicBlock.h" 99 #include "llvm/CodeGen/MachineFunction.h" 100 #include "llvm/CodeGen/MachineFunctionPass.h" 101 #include "llvm/CodeGen/MachineInstr.h" 102 #include "llvm/CodeGen/MachineInstrBuilder.h" 103 #include "llvm/CodeGen/MachineOperand.h" 104 #include "llvm/CodeGen/MachineRegisterInfo.h" 105 #include "llvm/CodeGen/RegisterScavenging.h" 106 #include "llvm/IR/DebugLoc.h" 107 #include "llvm/Pass.h" 108 #include "llvm/Support/CodeGen.h" 109 #include "llvm/Support/Debug.h" 110 #include "llvm/Target/TargetMachine.h" 111 #include <cassert> 112 113 using namespace llvm; 114 115 #define DEBUG_TYPE "aarch64-speculation-hardening" 116 117 #define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass" 118 119 static cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden, 120 cl::desc("Sanitize loads from memory."), 121 cl::init(true)); 122 123 namespace { 124 125 class AArch64SpeculationHardening : public MachineFunctionPass { 126 public: 127 const TargetInstrInfo *TII; 128 const TargetRegisterInfo *TRI; 129 130 static char ID; 131 132 AArch64SpeculationHardening() : MachineFunctionPass(ID) { 133 initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry()); 134 } 135 136 bool runOnMachineFunction(MachineFunction &Fn) override; 137 138 StringRef getPassName() const override { 139 return AARCH64_SPECULATION_HARDENING_NAME; 140 } 141 142 private: 143 unsigned MisspeculatingTaintReg; 144 unsigned MisspeculatingTaintReg32Bit; 145 bool UseControlFlowSpeculationBarrier; 146 BitVector RegsNeedingCSDBBeforeUse; 147 BitVector RegsAlreadyMasked; 148 149 bool functionUsesHardeningRegister(MachineFunction &MF) const; 150 bool instrumentControlFlow(MachineBasicBlock &MBB, 151 bool &UsesFullSpeculationBarrier); 152 bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 153 MachineBasicBlock *&FBB, 154 AArch64CC::CondCode &CondCode) const; 155 void insertTrackingCode(MachineBasicBlock &SplitEdgeBB, 156 AArch64CC::CondCode &CondCode, DebugLoc DL) const; 157 void insertSPToRegTaintPropagation(MachineBasicBlock &MBB, 158 MachineBasicBlock::iterator MBBI) const; 159 void insertRegToSPTaintPropagation(MachineBasicBlock &MBB, 160 MachineBasicBlock::iterator MBBI, 161 unsigned TmpReg) const; 162 void insertFullSpeculationBarrier(MachineBasicBlock &MBB, 163 MachineBasicBlock::iterator MBBI, 164 DebugLoc DL) const; 165 166 bool slhLoads(MachineBasicBlock &MBB); 167 bool makeGPRSpeculationSafe(MachineBasicBlock &MBB, 168 MachineBasicBlock::iterator MBBI, 169 MachineInstr &MI, unsigned Reg); 170 bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB, 171 bool UsesFullSpeculationBarrier); 172 bool expandSpeculationSafeValue(MachineBasicBlock &MBB, 173 MachineBasicBlock::iterator MBBI, 174 bool UsesFullSpeculationBarrier); 175 bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 176 DebugLoc DL); 177 }; 178 179 } // end anonymous namespace 180 181 char AArch64SpeculationHardening::ID = 0; 182 183 INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening", 184 AARCH64_SPECULATION_HARDENING_NAME, false, false) 185 186 bool AArch64SpeculationHardening::endsWithCondControlFlow( 187 MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, 188 AArch64CC::CondCode &CondCode) const { 189 SmallVector<MachineOperand, 1> analyzeBranchCondCode; 190 if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false)) 191 return false; 192 193 // Ignore if the BB ends in an unconditional branch/fall-through. 194 if (analyzeBranchCondCode.empty()) 195 return false; 196 197 // If the BB ends with a single conditional branch, FBB will be set to 198 // nullptr (see API docs for TII->analyzeBranch). For the rest of the 199 // analysis we want the FBB block to be set always. 200 assert(TBB != nullptr); 201 if (FBB == nullptr) 202 FBB = MBB.getFallThrough(); 203 204 // If both the true and the false condition jump to the same basic block, 205 // there isn't need for any protection - whether the branch is speculated 206 // correctly or not, we end up executing the architecturally correct code. 207 if (TBB == FBB) 208 return false; 209 210 assert(MBB.succ_size() == 2); 211 // translate analyzeBranchCondCode to CondCode. 212 assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format"); 213 CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm()); 214 return true; 215 } 216 217 void AArch64SpeculationHardening::insertFullSpeculationBarrier( 218 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 219 DebugLoc DL) const { 220 // A full control flow speculation barrier consists of (DSB SYS + ISB) 221 BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf); 222 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf); 223 } 224 225 void AArch64SpeculationHardening::insertTrackingCode( 226 MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode, 227 DebugLoc DL) const { 228 if (UseControlFlowSpeculationBarrier) { 229 insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL); 230 } else { 231 BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr)) 232 .addDef(MisspeculatingTaintReg) 233 .addUse(MisspeculatingTaintReg) 234 .addUse(AArch64::XZR) 235 .addImm(CondCode); 236 SplitEdgeBB.addLiveIn(AArch64::NZCV); 237 } 238 } 239 240 bool AArch64SpeculationHardening::instrumentControlFlow( 241 MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) { 242 LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB); 243 244 bool Modified = false; 245 MachineBasicBlock *TBB = nullptr; 246 MachineBasicBlock *FBB = nullptr; 247 AArch64CC::CondCode CondCode; 248 249 if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) { 250 LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n"); 251 } else { 252 // Now insert: 253 // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and 254 // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False 255 // edge. 256 AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode); 257 258 MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this); 259 MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this); 260 261 assert(SplitEdgeTBB != nullptr); 262 assert(SplitEdgeFBB != nullptr); 263 264 DebugLoc DL; 265 if (MBB.instr_end() != MBB.instr_begin()) 266 DL = (--MBB.instr_end())->getDebugLoc(); 267 268 insertTrackingCode(*SplitEdgeTBB, CondCode, DL); 269 insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL); 270 271 LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n"); 272 LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n"); 273 Modified = true; 274 } 275 276 // Perform correct code generation around function calls and before returns. 277 // The below variables record the return/terminator instructions and the call 278 // instructions respectively; including which register is available as a 279 // temporary register just before the recorded instructions. 280 SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions; 281 SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions; 282 // if a temporary register is not available for at least one of the 283 // instructions for which we need to transfer taint to the stack pointer, we 284 // need to insert a full speculation barrier. 285 // TmpRegisterNotAvailableEverywhere tracks that condition. 286 bool TmpRegisterNotAvailableEverywhere = false; 287 288 RegScavenger RS; 289 RS.enterBasicBlockEnd(MBB); 290 291 for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) { 292 MachineInstr &MI = *--I; 293 if (!MI.isReturn() && !MI.isCall()) 294 continue; 295 296 // The RegScavenger represents registers available *after* the MI 297 // instruction pointed to by RS.getCurrentPosition(). 298 // We need to have a register that is available *before* the MI is executed. 299 if (I == MBB.begin()) 300 RS.enterBasicBlock(MBB); 301 else 302 RS.backward(std::prev(I)); 303 // FIXME: The below just finds *a* unused register. Maybe code could be 304 // optimized more if this looks for the register that isn't used for the 305 // longest time around this place, to enable more scheduling freedom. Not 306 // sure if that would actually result in a big performance difference 307 // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic 308 // already to do this - but it's unclear if that could easily be used here. 309 Register TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass); 310 LLVM_DEBUG(dbgs() << "RS finds " 311 << ((TmpReg == 0) ? "no register " : "register "); 312 if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " "; 313 dbgs() << "to be available at MI " << MI); 314 if (TmpReg == 0) 315 TmpRegisterNotAvailableEverywhere = true; 316 if (MI.isReturn()) 317 ReturnInstructions.push_back({&MI, TmpReg}); 318 else if (MI.isCall()) 319 CallInstructions.push_back({&MI, TmpReg}); 320 } 321 322 if (TmpRegisterNotAvailableEverywhere) { 323 // When a temporary register is not available everywhere in this basic 324 // basic block where a propagate-taint-to-sp operation is needed, just 325 // emit a full speculation barrier at the start of this basic block, which 326 // renders the taint/speculation tracking in this basic block unnecessary. 327 insertFullSpeculationBarrier(MBB, MBB.begin(), 328 (MBB.begin())->getDebugLoc()); 329 UsesFullSpeculationBarrier = true; 330 Modified = true; 331 } else { 332 for (auto MI_Reg : ReturnInstructions) { 333 assert(MI_Reg.second != 0); 334 LLVM_DEBUG( 335 dbgs() 336 << " About to insert Reg to SP taint propagation with temp register " 337 << printReg(MI_Reg.second, TRI) 338 << " on instruction: " << *MI_Reg.first); 339 insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second); 340 Modified = true; 341 } 342 343 for (auto MI_Reg : CallInstructions) { 344 assert(MI_Reg.second != 0); 345 LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint " 346 "propagation with temp register " 347 << printReg(MI_Reg.second, TRI) 348 << " around instruction: " << *MI_Reg.first); 349 // Just after the call: 350 insertSPToRegTaintPropagation( 351 MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first)); 352 // Just before the call: 353 insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second); 354 Modified = true; 355 } 356 } 357 return Modified; 358 } 359 360 void AArch64SpeculationHardening::insertSPToRegTaintPropagation( 361 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 362 // If full control flow speculation barriers are used, emit a control flow 363 // barrier to block potential miss-speculation in flight coming in to this 364 // function. 365 if (UseControlFlowSpeculationBarrier) { 366 insertFullSpeculationBarrier(MBB, MBBI, DebugLoc()); 367 return; 368 } 369 370 // CMP SP, #0 === SUBS xzr, SP, #0 371 BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri)) 372 .addDef(AArch64::XZR) 373 .addUse(AArch64::SP) 374 .addImm(0) 375 .addImm(0); // no shift 376 // CSETM x16, NE === CSINV x16, xzr, xzr, EQ 377 BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr)) 378 .addDef(MisspeculatingTaintReg) 379 .addUse(AArch64::XZR) 380 .addUse(AArch64::XZR) 381 .addImm(AArch64CC::EQ); 382 } 383 384 void AArch64SpeculationHardening::insertRegToSPTaintPropagation( 385 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 386 unsigned TmpReg) const { 387 // If full control flow speculation barriers are used, there will not be 388 // miss-speculation when returning from this function, and therefore, also 389 // no need to encode potential miss-speculation into the stack pointer. 390 if (UseControlFlowSpeculationBarrier) 391 return; 392 393 // mov Xtmp, SP === ADD Xtmp, SP, #0 394 BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) 395 .addDef(TmpReg) 396 .addUse(AArch64::SP) 397 .addImm(0) 398 .addImm(0); // no shift 399 // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0 400 BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs)) 401 .addDef(TmpReg, RegState::Renamable) 402 .addUse(TmpReg, RegState::Kill | RegState::Renamable) 403 .addUse(MisspeculatingTaintReg, RegState::Kill) 404 .addImm(0); 405 // mov SP, Xtmp === ADD SP, Xtmp, #0 406 BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) 407 .addDef(AArch64::SP) 408 .addUse(TmpReg, RegState::Kill) 409 .addImm(0) 410 .addImm(0); // no shift 411 } 412 413 bool AArch64SpeculationHardening::functionUsesHardeningRegister( 414 MachineFunction &MF) const { 415 for (MachineBasicBlock &MBB : MF) { 416 for (MachineInstr &MI : MBB) { 417 // treat function calls specially, as the hardening register does not 418 // need to remain live across function calls. 419 if (MI.isCall()) 420 continue; 421 if (MI.readsRegister(MisspeculatingTaintReg, TRI) || 422 MI.modifiesRegister(MisspeculatingTaintReg, TRI)) 423 return true; 424 } 425 } 426 return false; 427 } 428 429 // Make GPR register Reg speculation-safe by putting it through the 430 // SpeculationSafeValue pseudo instruction, if we can't prove that 431 // the value in the register has already been hardened. 432 bool AArch64SpeculationHardening::makeGPRSpeculationSafe( 433 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI, 434 unsigned Reg) { 435 assert(AArch64::GPR32allRegClass.contains(Reg) || 436 AArch64::GPR64allRegClass.contains(Reg)); 437 438 // Loads cannot directly load a value into the SP (nor WSP). 439 // Therefore, if Reg is SP or WSP, it is because the instruction loads from 440 // the stack through the stack pointer. 441 // 442 // Since the stack pointer is never dynamically controllable, don't harden it. 443 if (Reg == AArch64::SP || Reg == AArch64::WSP) 444 return false; 445 446 // Do not harden the register again if already hardened before. 447 if (RegsAlreadyMasked[Reg]) 448 return false; 449 450 const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg); 451 LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n"); 452 BuildMI(MBB, MBBI, MI.getDebugLoc(), 453 TII->get(Is64Bit ? AArch64::SpeculationSafeValueX 454 : AArch64::SpeculationSafeValueW)) 455 .addDef(Reg) 456 .addUse(Reg); 457 RegsAlreadyMasked.set(Reg); 458 return true; 459 } 460 461 bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) { 462 bool Modified = false; 463 464 LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB); 465 466 RegsAlreadyMasked.reset(); 467 468 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 469 MachineBasicBlock::iterator NextMBBI; 470 for (; MBBI != E; MBBI = NextMBBI) { 471 MachineInstr &MI = *MBBI; 472 NextMBBI = std::next(MBBI); 473 // Only harden loaded values or addresses used in loads. 474 if (!MI.mayLoad()) 475 continue; 476 477 LLVM_DEBUG(dbgs() << "About to harden: " << MI); 478 479 // For general purpose register loads, harden the registers loaded into. 480 // For other loads, harden the address loaded from. 481 // Masking the loaded value is expected to result in less performance 482 // overhead, as the load can still execute speculatively in comparison to 483 // when the address loaded from gets masked. However, masking is only 484 // easy to do efficiently on GPR registers, so for loads into non-GPR 485 // registers (e.g. floating point loads), mask the address loaded from. 486 bool AllDefsAreGPR = llvm::all_of(MI.defs(), [&](MachineOperand &Op) { 487 return Op.isReg() && (AArch64::GPR32allRegClass.contains(Op.getReg()) || 488 AArch64::GPR64allRegClass.contains(Op.getReg())); 489 }); 490 // FIXME: it might be a worthwhile optimization to not mask loaded 491 // values if all the registers involved in address calculation are already 492 // hardened, leading to this load not able to execute on a miss-speculated 493 // path. 494 bool HardenLoadedData = AllDefsAreGPR; 495 bool HardenAddressLoadedFrom = !HardenLoadedData; 496 497 // First remove registers from AlreadyMaskedRegisters if their value is 498 // updated by this instruction - it makes them contain a new value that is 499 // not guaranteed to already have been masked. 500 for (MachineOperand Op : MI.defs()) 501 for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI) 502 RegsAlreadyMasked.reset(*AI); 503 504 // FIXME: loads from the stack with an immediate offset from the stack 505 // pointer probably shouldn't be hardened, which could result in a 506 // significant optimization. See section "Don’t check loads from 507 // compile-time constant stack offsets", in 508 // https://llvm.org/docs/SpeculativeLoadHardening.html 509 510 if (HardenLoadedData) 511 for (auto Def : MI.defs()) { 512 if (Def.isDead()) 513 // Do not mask a register that is not used further. 514 continue; 515 // FIXME: For pre/post-increment addressing modes, the base register 516 // used in address calculation is also defined by this instruction. 517 // It might be a worthwhile optimization to not harden that 518 // base register increment/decrement when the increment/decrement is 519 // an immediate. 520 Modified |= makeGPRSpeculationSafe(MBB, NextMBBI, MI, Def.getReg()); 521 } 522 523 if (HardenAddressLoadedFrom) 524 for (auto Use : MI.uses()) { 525 if (!Use.isReg()) 526 continue; 527 Register Reg = Use.getReg(); 528 // Some loads of floating point data have implicit defs/uses on a 529 // super register of that floating point data. Some examples: 530 // $s0 = LDRSui $sp, 22, implicit-def $q0 531 // $q0 = LD1i64 $q0, 1, renamable $x0 532 // We need to filter out these uses for non-GPR register which occur 533 // because the load partially fills a non-GPR register with the loaded 534 // data. Just skipping all non-GPR registers is safe (for now) as all 535 // AArch64 load instructions only use GPR registers to perform the 536 // address calculation. FIXME: However that might change once we can 537 // produce SVE gather instructions. 538 if (!(AArch64::GPR32allRegClass.contains(Reg) || 539 AArch64::GPR64allRegClass.contains(Reg))) 540 continue; 541 Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg); 542 } 543 } 544 return Modified; 545 } 546 547 /// \brief If MBBI references a pseudo instruction that should be expanded 548 /// here, do the expansion and return true. Otherwise return false. 549 bool AArch64SpeculationHardening::expandSpeculationSafeValue( 550 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 551 bool UsesFullSpeculationBarrier) { 552 MachineInstr &MI = *MBBI; 553 unsigned Opcode = MI.getOpcode(); 554 bool Is64Bit = true; 555 556 switch (Opcode) { 557 default: 558 break; 559 case AArch64::SpeculationSafeValueW: 560 Is64Bit = false; 561 [[fallthrough]]; 562 case AArch64::SpeculationSafeValueX: 563 // Just remove the SpeculationSafe pseudo's if control flow 564 // miss-speculation isn't happening because we're already inserting barriers 565 // to guarantee that. 566 if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) { 567 Register DstReg = MI.getOperand(0).getReg(); 568 Register SrcReg = MI.getOperand(1).getReg(); 569 // Mark this register and all its aliasing registers as needing to be 570 // value speculation hardened before its next use, by using a CSDB 571 // barrier instruction. 572 for (MachineOperand Op : MI.defs()) 573 for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI) 574 RegsNeedingCSDBBeforeUse.set(*AI); 575 576 // Mask off with taint state. 577 BuildMI(MBB, MBBI, MI.getDebugLoc(), 578 Is64Bit ? TII->get(AArch64::ANDXrs) : TII->get(AArch64::ANDWrs)) 579 .addDef(DstReg) 580 .addUse(SrcReg, RegState::Kill) 581 .addUse(Is64Bit ? MisspeculatingTaintReg 582 : MisspeculatingTaintReg32Bit) 583 .addImm(0); 584 } 585 MI.eraseFromParent(); 586 return true; 587 } 588 return false; 589 } 590 591 bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB, 592 MachineBasicBlock::iterator MBBI, 593 DebugLoc DL) { 594 assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when " 595 "control flow miss-speculation " 596 "is already blocked"); 597 // insert data value speculation barrier (CSDB) 598 BuildMI(MBB, MBBI, DL, TII->get(AArch64::HINT)).addImm(0x14); 599 RegsNeedingCSDBBeforeUse.reset(); 600 return true; 601 } 602 603 bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos( 604 MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) { 605 bool Modified = false; 606 607 RegsNeedingCSDBBeforeUse.reset(); 608 609 // The following loop iterates over all instructions in the basic block, 610 // and performs 2 operations: 611 // 1. Insert a CSDB at this location if needed. 612 // 2. Expand the SpeculationSafeValuePseudo if the current instruction is 613 // one. 614 // 615 // The insertion of the CSDB is done as late as possible (i.e. just before 616 // the use of a masked register), in the hope that that will reduce the 617 // total number of CSDBs in a block when there are multiple masked registers 618 // in the block. 619 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 620 DebugLoc DL; 621 while (MBBI != E) { 622 MachineInstr &MI = *MBBI; 623 DL = MI.getDebugLoc(); 624 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 625 626 // First check if a CSDB needs to be inserted due to earlier registers 627 // that were masked and that are used by the next instruction. 628 // Also emit the barrier on any potential control flow changes. 629 bool NeedToEmitBarrier = false; 630 if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator())) 631 NeedToEmitBarrier = true; 632 if (!NeedToEmitBarrier) 633 for (MachineOperand Op : MI.uses()) 634 if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) { 635 NeedToEmitBarrier = true; 636 break; 637 } 638 639 if (NeedToEmitBarrier && !UsesFullSpeculationBarrier) 640 Modified |= insertCSDB(MBB, MBBI, DL); 641 642 Modified |= 643 expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier); 644 645 MBBI = NMBBI; 646 } 647 648 if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier) 649 Modified |= insertCSDB(MBB, MBBI, DL); 650 651 return Modified; 652 } 653 654 bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) { 655 if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) 656 return false; 657 658 MisspeculatingTaintReg = AArch64::X16; 659 MisspeculatingTaintReg32Bit = AArch64::W16; 660 TII = MF.getSubtarget().getInstrInfo(); 661 TRI = MF.getSubtarget().getRegisterInfo(); 662 RegsNeedingCSDBBeforeUse.resize(TRI->getNumRegs()); 663 RegsAlreadyMasked.resize(TRI->getNumRegs()); 664 UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF); 665 666 bool Modified = false; 667 668 // Step 1: Enable automatic insertion of SpeculationSafeValue. 669 if (HardenLoads) { 670 LLVM_DEBUG( 671 dbgs() << "***** AArch64SpeculationHardening - automatic insertion of " 672 "SpeculationSafeValue intrinsics *****\n"); 673 for (auto &MBB : MF) 674 Modified |= slhLoads(MBB); 675 } 676 677 // 2. Add instrumentation code to function entry and exits. 678 LLVM_DEBUG( 679 dbgs() 680 << "***** AArch64SpeculationHardening - track control flow *****\n"); 681 682 SmallVector<MachineBasicBlock *, 2> EntryBlocks; 683 EntryBlocks.push_back(&MF.front()); 684 for (const LandingPadInfo &LPI : MF.getLandingPads()) 685 EntryBlocks.push_back(LPI.LandingPadBlock); 686 for (auto *Entry : EntryBlocks) 687 insertSPToRegTaintPropagation( 688 *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin())); 689 690 // 3. Add instrumentation code to every basic block. 691 for (auto &MBB : MF) { 692 bool UsesFullSpeculationBarrier = false; 693 Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier); 694 Modified |= 695 lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier); 696 } 697 698 return Modified; 699 } 700 701 /// \brief Returns an instance of the pseudo instruction expansion pass. 702 FunctionPass *llvm::createAArch64SpeculationHardeningPass() { 703 return new AArch64SpeculationHardening(); 704 } 705