//===- AArch64SLSHardening.cpp - Harden Straight Line Missspeculation -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains a pass to insert code to mitigate against side channel // vulnerabilities that may happen under straight line miss-speculation. // //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/IndirectThunks.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" #include using namespace llvm; #define DEBUG_TYPE "aarch64-sls-hardening" #define AARCH64_SLS_HARDENING_NAME "AArch64 sls hardening pass" namespace { class AArch64SLSHardening : public MachineFunctionPass { public: const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const AArch64Subtarget *ST; static char ID; AArch64SLSHardening() : MachineFunctionPass(ID) { initializeAArch64SLSHardeningPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &Fn) override; StringRef getPassName() const override { return AARCH64_SLS_HARDENING_NAME; } private: bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const; bool hardenBLRs(MachineBasicBlock &MBB) const; MachineBasicBlock &ConvertBLRToBL(MachineBasicBlock &MBB, MachineBasicBlock::iterator) const; }; } // end anonymous namespace char AArch64SLSHardening::ID = 0; INITIALIZE_PASS(AArch64SLSHardening, "aarch64-sls-hardening", AARCH64_SLS_HARDENING_NAME, false, false) static void insertSpeculationBarrier(const AArch64Subtarget *ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, bool AlwaysUseISBDSB = false) { assert(MBBI != MBB.begin() && "Must not insert SpeculationBarrierEndBB as only instruction in MBB."); assert(std::prev(MBBI)->isBarrier() && "SpeculationBarrierEndBB must only follow unconditional control flow " "instructions."); assert(std::prev(MBBI)->isTerminator() && "SpeculationBarrierEndBB must only follow terminators."); const TargetInstrInfo *TII = ST->getInstrInfo(); unsigned BarrierOpc = ST->hasSB() && !AlwaysUseISBDSB ? AArch64::SpeculationBarrierSBEndBB : AArch64::SpeculationBarrierISBDSBEndBB; if (MBBI == MBB.end() || (MBBI->getOpcode() != AArch64::SpeculationBarrierSBEndBB && MBBI->getOpcode() != AArch64::SpeculationBarrierISBDSBEndBB)) BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc)); } bool AArch64SLSHardening::runOnMachineFunction(MachineFunction &MF) { ST = &MF.getSubtarget(); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); bool Modified = false; for (auto &MBB : MF) { Modified |= hardenReturnsAndBRs(MBB); Modified |= hardenBLRs(MBB); } return Modified; } static bool isBLR(const MachineInstr &MI) { switch (MI.getOpcode()) { case AArch64::BLR: case AArch64::BLRNoIP: return true; case AArch64::BLRAA: case AArch64::BLRAB: case AArch64::BLRAAZ: case AArch64::BLRABZ: llvm_unreachable("Currently, LLVM's code generator does not support " "producing BLRA* instructions. Therefore, there's no " "support in this pass for those instructions."); } return false; } bool AArch64SLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const { if (!ST->hardenSlsRetBr()) return false; bool Modified = false; MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(), E = MBB.end(); MachineBasicBlock::iterator NextMBBI; for (; MBBI != E; MBBI = NextMBBI) { MachineInstr &MI = *MBBI; NextMBBI = std::next(MBBI); if (MI.isReturn() || isIndirectBranchOpcode(MI.getOpcode())) { assert(MI.isTerminator()); insertSpeculationBarrier(ST, MBB, std::next(MBBI), MI.getDebugLoc()); Modified = true; } } return Modified; } static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_"; static const struct ThunkNameAndReg { const char* Name; Register Reg; } SLSBLRThunks[] = { { "__llvm_slsblr_thunk_x0", AArch64::X0}, { "__llvm_slsblr_thunk_x1", AArch64::X1}, { "__llvm_slsblr_thunk_x2", AArch64::X2}, { "__llvm_slsblr_thunk_x3", AArch64::X3}, { "__llvm_slsblr_thunk_x4", AArch64::X4}, { "__llvm_slsblr_thunk_x5", AArch64::X5}, { "__llvm_slsblr_thunk_x6", AArch64::X6}, { "__llvm_slsblr_thunk_x7", AArch64::X7}, { "__llvm_slsblr_thunk_x8", AArch64::X8}, { "__llvm_slsblr_thunk_x9", AArch64::X9}, { "__llvm_slsblr_thunk_x10", AArch64::X10}, { "__llvm_slsblr_thunk_x11", AArch64::X11}, { "__llvm_slsblr_thunk_x12", AArch64::X12}, { "__llvm_slsblr_thunk_x13", AArch64::X13}, { "__llvm_slsblr_thunk_x14", AArch64::X14}, { "__llvm_slsblr_thunk_x15", AArch64::X15}, // X16 and X17 are deliberately missing, as the mitigation requires those // register to not be used in BLR. See comment in ConvertBLRToBL for more // details. { "__llvm_slsblr_thunk_x18", AArch64::X18}, { "__llvm_slsblr_thunk_x19", AArch64::X19}, { "__llvm_slsblr_thunk_x20", AArch64::X20}, { "__llvm_slsblr_thunk_x21", AArch64::X21}, { "__llvm_slsblr_thunk_x22", AArch64::X22}, { "__llvm_slsblr_thunk_x23", AArch64::X23}, { "__llvm_slsblr_thunk_x24", AArch64::X24}, { "__llvm_slsblr_thunk_x25", AArch64::X25}, { "__llvm_slsblr_thunk_x26", AArch64::X26}, { "__llvm_slsblr_thunk_x27", AArch64::X27}, { "__llvm_slsblr_thunk_x28", AArch64::X28}, { "__llvm_slsblr_thunk_x29", AArch64::FP}, // X30 is deliberately missing, for similar reasons as X16 and X17 are // missing. { "__llvm_slsblr_thunk_x31", AArch64::XZR}, }; namespace { struct SLSBLRThunkInserter : ThunkInserter { const char *getThunkPrefix() { return SLSBLRNamePrefix; } bool mayUseThunk(const MachineFunction &MF) { ComdatThunks &= !MF.getSubtarget().hardenSlsNoComdat(); // FIXME: This could also check if there are any BLRs in the function // to more accurately reflect if a thunk will be needed. return MF.getSubtarget().hardenSlsBlr(); } void insertThunks(MachineModuleInfo &MMI); void populateThunk(MachineFunction &MF); private: bool ComdatThunks = true; }; } // namespace void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) { // FIXME: It probably would be possible to filter which thunks to produce // based on which registers are actually used in BLR instructions in this // function. But would that be a worthwhile optimization? for (auto T : SLSBLRThunks) createThunkFunction(MMI, T.Name, ComdatThunks); } void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) { // FIXME: How to better communicate Register number, rather than through // name and lookup table? assert(MF.getName().startswith(getThunkPrefix())); auto ThunkIt = llvm::find_if( SLSBLRThunks, [&MF](auto T) { return T.Name == MF.getName(); }); assert(ThunkIt != std::end(SLSBLRThunks)); Register ThunkReg = ThunkIt->Reg; const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); assert (MF.size() == 1); MachineBasicBlock *Entry = &MF.front(); Entry->clear(); // These thunks need to consist of the following instructions: // __llvm_slsblr_thunk_xN: // BR xN // barrierInsts Entry->addLiveIn(ThunkReg); // MOV X16, ThunkReg == ORR X16, XZR, ThunkReg, LSL #0 BuildMI(Entry, DebugLoc(), TII->get(AArch64::ORRXrs), AArch64::X16) .addReg(AArch64::XZR) .addReg(ThunkReg) .addImm(0); BuildMI(Entry, DebugLoc(), TII->get(AArch64::BR)).addReg(AArch64::X16); // Make sure the thunks do not make use of the SB extension in case there is // a function somewhere that will call to it that for some reason disabled // the SB extension locally on that function, even though it's enabled for // the module otherwise. Therefore set AlwaysUseISBSDB to true. insertSpeculationBarrier(&MF.getSubtarget(), *Entry, Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/); } MachineBasicBlock & AArch64SLSHardening::ConvertBLRToBL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { // Transform a BLR to a BL as follows: // Before: // |-----------------------------| // | ... | // | instI | // | BLR xN | // | instJ | // | ... | // |-----------------------------| // // After: // |-----------------------------| // | ... | // | instI | // | BL __llvm_slsblr_thunk_xN | // | instJ | // | ... | // |-----------------------------| // // __llvm_slsblr_thunk_xN: // |-----------------------------| // | BR xN | // | barrierInsts | // |-----------------------------| // // The __llvm_slsblr_thunk_xN thunks are created by the SLSBLRThunkInserter. // This function merely needs to transform BLR xN into BL // __llvm_slsblr_thunk_xN. // // Since linkers are allowed to clobber X16 and X17 on function calls, the // above mitigation only works if the original BLR instruction was not // BLR X16 nor BLR X17. Code generation before must make sure that no BLR // X16|X17 was produced if the mitigation is enabled. MachineInstr &BLR = *MBBI; assert(isBLR(BLR)); unsigned BLOpcode; Register Reg; bool RegIsKilled; switch (BLR.getOpcode()) { case AArch64::BLR: case AArch64::BLRNoIP: BLOpcode = AArch64::BL; Reg = BLR.getOperand(0).getReg(); assert(Reg != AArch64::X16 && Reg != AArch64::X17 && Reg != AArch64::LR); RegIsKilled = BLR.getOperand(0).isKill(); break; case AArch64::BLRAA: case AArch64::BLRAB: case AArch64::BLRAAZ: case AArch64::BLRABZ: llvm_unreachable("BLRA instructions cannot yet be produced by LLVM, " "therefore there is no need to support them for now."); default: llvm_unreachable("unhandled BLR"); } DebugLoc DL = BLR.getDebugLoc(); // If we'd like to support also BLRAA and BLRAB instructions, we'd need // a lot more different kind of thunks. // For example, a // // BLRAA xN, xM // // instruction probably would need to be transformed to something like: // // BL __llvm_slsblraa_thunk_x_x // // __llvm_slsblraa_thunk_x_x: // BRAA x, x // barrierInsts // // Given that about 30 different values of N are possible and about 30 // different values of M are possible in the above, with the current way // of producing indirect thunks, we'd be producing about 30 times 30, i.e. // about 900 thunks (where most might not be actually called). This would // multiply further by two to support both BLRAA and BLRAB variants of those // instructions. // If we'd want to support this, we'd probably need to look into a different // way to produce thunk functions, based on which variants are actually // needed, rather than producing all possible variants. // So far, LLVM does never produce BLRA* instructions, so let's leave this // for the future when LLVM can start producing BLRA* instructions. MachineFunction &MF = *MBBI->getMF(); MCContext &Context = MBB.getParent()->getContext(); auto ThunkIt = llvm::find_if(SLSBLRThunks, [Reg](auto T) { return T.Reg == Reg; }); assert (ThunkIt != std::end(SLSBLRThunks)); MCSymbol *Sym = Context.getOrCreateSymbol(ThunkIt->Name); MachineInstr *BL = BuildMI(MBB, MBBI, DL, TII->get(BLOpcode)).addSym(Sym); // Now copy the implicit operands from BLR to BL and copy other necessary // info. // However, both BLR and BL instructions implictly use SP and implicitly // define LR. Blindly copying implicit operands would result in SP and LR // operands to be present multiple times. While this may not be too much of // an issue, let's avoid that for cleanliness, by removing those implicit // operands from the BL created above before we copy over all implicit // operands from the BLR. int ImpLROpIdx = -1; int ImpSPOpIdx = -1; for (unsigned OpIdx = BL->getNumExplicitOperands(); OpIdx < BL->getNumOperands(); OpIdx++) { MachineOperand Op = BL->getOperand(OpIdx); if (!Op.isReg()) continue; if (Op.getReg() == AArch64::LR && Op.isDef()) ImpLROpIdx = OpIdx; if (Op.getReg() == AArch64::SP && !Op.isDef()) ImpSPOpIdx = OpIdx; } assert(ImpLROpIdx != -1); assert(ImpSPOpIdx != -1); int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx); int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx); BL->removeOperand(FirstOpIdxToRemove); BL->removeOperand(SecondOpIdxToRemove); // Now copy over the implicit operands from the original BLR BL->copyImplicitOps(MF, BLR); MF.moveCallSiteInfo(&BLR, BL); // Also add the register called in the BLR as being used in the called thunk. BL->addOperand(MachineOperand::CreateReg(Reg, false /*isDef*/, true /*isImp*/, RegIsKilled /*isKill*/)); // Remove BLR instruction MBB.erase(MBBI); return MBB; } bool AArch64SLSHardening::hardenBLRs(MachineBasicBlock &MBB) const { if (!ST->hardenSlsBlr()) return false; bool Modified = false; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMBBI; for (; MBBI != E; MBBI = NextMBBI) { MachineInstr &MI = *MBBI; NextMBBI = std::next(MBBI); if (isBLR(MI)) { ConvertBLRToBL(MBB, MBBI); Modified = true; } } return Modified; } FunctionPass *llvm::createAArch64SLSHardeningPass() { return new AArch64SLSHardening(); } namespace { class AArch64IndirectThunks : public MachineFunctionPass { public: static char ID; AArch64IndirectThunks() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "AArch64 Indirect Thunks"; } bool doInitialization(Module &M) override; bool runOnMachineFunction(MachineFunction &MF) override; private: std::tuple TIs; // FIXME: When LLVM moves to C++17, these can become folds template static void initTIs(Module &M, std::tuple &ThunkInserters) { (void)std::initializer_list{ (std::get(ThunkInserters).init(M), 0)...}; } template static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF, std::tuple &ThunkInserters) { bool Modified = false; (void)std::initializer_list{ Modified |= std::get(ThunkInserters).run(MMI, MF)...}; return Modified; } }; } // end anonymous namespace char AArch64IndirectThunks::ID = 0; FunctionPass *llvm::createAArch64IndirectThunks() { return new AArch64IndirectThunks(); } bool AArch64IndirectThunks::doInitialization(Module &M) { initTIs(M, TIs); return false; } bool AArch64IndirectThunks::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << getPassName() << '\n'); auto &MMI = getAnalysis().getMMI(); return runTIs(MMI, MF, TIs); }