1 //==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// Pass that injects an MI thunk that is used to lower indirect calls in a way 11 /// that prevents speculation on some x86 processors and can be used to mitigate 12 /// security vulnerabilities due to targeted speculative execution and side 13 /// channels such as CVE-2017-5715. 14 /// 15 /// Currently supported thunks include: 16 /// - Retpoline -- A RET-implemented trampoline that lowers indirect calls 17 /// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization 18 /// before making an indirect call/jump 19 /// 20 /// Note that the reason that this is implemented as a MachineFunctionPass and 21 /// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline 22 /// serialize all transformations, which can consume lots of memory. 23 /// 24 /// TODO(chandlerc): All of this code could use better comments and 25 /// documentation. 26 /// 27 //===----------------------------------------------------------------------===// 28 29 #include "X86.h" 30 #include "X86InstrBuilder.h" 31 #include "X86Subtarget.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineModuleInfo.h" 35 #include "llvm/CodeGen/Passes.h" 36 #include "llvm/CodeGen/TargetPassConfig.h" 37 #include "llvm/IR/IRBuilder.h" 38 #include "llvm/IR/Instructions.h" 39 #include "llvm/IR/Module.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/raw_ostream.h" 43 44 using namespace llvm; 45 46 #define DEBUG_TYPE "x86-retpoline-thunks" 47 48 static const char RetpolineNamePrefix[] = "__llvm_retpoline_"; 49 static const char R11RetpolineName[] = "__llvm_retpoline_r11"; 50 static const char EAXRetpolineName[] = "__llvm_retpoline_eax"; 51 static const char ECXRetpolineName[] = "__llvm_retpoline_ecx"; 52 static const char EDXRetpolineName[] = "__llvm_retpoline_edx"; 53 static const char EDIRetpolineName[] = "__llvm_retpoline_edi"; 54 55 static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_"; 56 static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11"; 57 58 namespace { 59 template <typename Derived> class ThunkInserter { 60 Derived &getDerived() { return *static_cast<Derived *>(this); } 61 62 protected: 63 bool InsertedThunks; 64 void doInitialization(Module &M) {} 65 void createThunkFunction(MachineModuleInfo &MMI, StringRef Name); 66 67 public: 68 void init(Module &M) { 69 InsertedThunks = false; 70 getDerived().doInitialization(M); 71 } 72 // return `true` if `MMI` or `MF` was modified 73 bool run(MachineModuleInfo &MMI, MachineFunction &MF); 74 }; 75 76 struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> { 77 const char *getThunkPrefix() { return RetpolineNamePrefix; } 78 bool mayUseThunk(const MachineFunction &MF) { 79 const auto &STI = MF.getSubtarget<X86Subtarget>(); 80 return (STI.useRetpolineIndirectCalls() || 81 STI.useRetpolineIndirectBranches()) && 82 !STI.useRetpolineExternalThunk(); 83 } 84 void insertThunks(MachineModuleInfo &MMI); 85 void populateThunk(MachineFunction &MF); 86 }; 87 88 struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> { 89 const char *getThunkPrefix() { return LVIThunkNamePrefix; } 90 bool mayUseThunk(const MachineFunction &MF) { 91 return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity(); 92 } 93 void insertThunks(MachineModuleInfo &MMI) { 94 createThunkFunction(MMI, R11LVIThunkName); 95 } 96 void populateThunk(MachineFunction &MF) { 97 // Grab the entry MBB and erase any other blocks. O0 codegen appears to 98 // generate two bbs for the entry block. 99 MachineBasicBlock *Entry = &MF.front(); 100 Entry->clear(); 101 while (MF.size() > 1) 102 MF.erase(std::next(MF.begin())); 103 104 // This code mitigates LVI by replacing each indirect call/jump with a 105 // direct call/jump to a thunk that looks like: 106 // ``` 107 // lfence 108 // jmpq *%r11 109 // ``` 110 // This ensures that if the value in register %r11 was loaded from memory, 111 // then the value in %r11 is (architecturally) correct prior to the jump. 112 const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); 113 BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE)); 114 BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11); 115 MF.front().addLiveIn(X86::R11); 116 return; 117 } 118 }; 119 120 class X86IndirectThunks : public MachineFunctionPass { 121 public: 122 static char ID; 123 124 X86IndirectThunks() : MachineFunctionPass(ID) {} 125 126 StringRef getPassName() const override { return "X86 Indirect Thunks"; } 127 128 bool doInitialization(Module &M) override; 129 bool runOnMachineFunction(MachineFunction &MF) override; 130 131 void getAnalysisUsage(AnalysisUsage &AU) const override { 132 MachineFunctionPass::getAnalysisUsage(AU); 133 AU.addRequired<MachineModuleInfoWrapperPass>(); 134 AU.addPreserved<MachineModuleInfoWrapperPass>(); 135 } 136 137 private: 138 std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs; 139 140 // FIXME: When LLVM moves to C++17, these can become folds 141 template <typename... ThunkInserterT> 142 static void initTIs(Module &M, 143 std::tuple<ThunkInserterT...> &ThunkInserters) { 144 (void)std::initializer_list<int>{ 145 (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...}; 146 } 147 template <typename... ThunkInserterT> 148 static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF, 149 std::tuple<ThunkInserterT...> &ThunkInserters) { 150 bool Modified = false; 151 (void)std::initializer_list<int>{ 152 Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...}; 153 return Modified; 154 } 155 }; 156 157 } // end anonymous namespace 158 159 void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) { 160 if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64) 161 createThunkFunction(MMI, R11RetpolineName); 162 else 163 for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName, 164 EDIRetpolineName}) 165 createThunkFunction(MMI, Name); 166 } 167 168 void RetpolineThunkInserter::populateThunk(MachineFunction &MF) { 169 bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64; 170 Register ThunkReg; 171 if (Is64Bit) { 172 assert(MF.getName() == "__llvm_retpoline_r11" && 173 "Should only have an r11 thunk on 64-bit targets"); 174 175 // __llvm_retpoline_r11: 176 // callq .Lr11_call_target 177 // .Lr11_capture_spec: 178 // pause 179 // lfence 180 // jmp .Lr11_capture_spec 181 // .align 16 182 // .Lr11_call_target: 183 // movq %r11, (%rsp) 184 // retq 185 ThunkReg = X86::R11; 186 } else { 187 // For 32-bit targets we need to emit a collection of thunks for various 188 // possible scratch registers as well as a fallback that uses EDI, which is 189 // normally callee saved. 190 // __llvm_retpoline_eax: 191 // calll .Leax_call_target 192 // .Leax_capture_spec: 193 // pause 194 // jmp .Leax_capture_spec 195 // .align 16 196 // .Leax_call_target: 197 // movl %eax, (%esp) # Clobber return addr 198 // retl 199 // 200 // __llvm_retpoline_ecx: 201 // ... # Same setup 202 // movl %ecx, (%esp) 203 // retl 204 // 205 // __llvm_retpoline_edx: 206 // ... # Same setup 207 // movl %edx, (%esp) 208 // retl 209 // 210 // __llvm_retpoline_edi: 211 // ... # Same setup 212 // movl %edi, (%esp) 213 // retl 214 if (MF.getName() == EAXRetpolineName) 215 ThunkReg = X86::EAX; 216 else if (MF.getName() == ECXRetpolineName) 217 ThunkReg = X86::ECX; 218 else if (MF.getName() == EDXRetpolineName) 219 ThunkReg = X86::EDX; 220 else if (MF.getName() == EDIRetpolineName) 221 ThunkReg = X86::EDI; 222 else 223 llvm_unreachable("Invalid thunk name on x86-32!"); 224 } 225 226 const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); 227 // Grab the entry MBB and erase any other blocks. O0 codegen appears to 228 // generate two bbs for the entry block. 229 MachineBasicBlock *Entry = &MF.front(); 230 Entry->clear(); 231 while (MF.size() > 1) 232 MF.erase(std::next(MF.begin())); 233 234 MachineBasicBlock *CaptureSpec = 235 MF.CreateMachineBasicBlock(Entry->getBasicBlock()); 236 MachineBasicBlock *CallTarget = 237 MF.CreateMachineBasicBlock(Entry->getBasicBlock()); 238 MCSymbol *TargetSym = MF.getContext().createTempSymbol(); 239 MF.push_back(CaptureSpec); 240 MF.push_back(CallTarget); 241 242 const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; 243 const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; 244 245 Entry->addLiveIn(ThunkReg); 246 BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); 247 248 // The MIR verifier thinks that the CALL in the entry block will fall through 249 // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is 250 // the successor, but the MIR verifier doesn't know how to cope with that. 251 Entry->addSuccessor(CaptureSpec); 252 253 // In the capture loop for speculation, we want to stop the processor from 254 // speculating as fast as possible. On Intel processors, the PAUSE instruction 255 // will block speculation without consuming any execution resources. On AMD 256 // processors, the PAUSE instruction is (essentially) a nop, so we also use an 257 // LFENCE instruction which they have advised will stop speculation as well 258 // with minimal resource utilization. We still end the capture with a jump to 259 // form an infinite loop to fully guarantee that no matter what implementation 260 // of the x86 ISA, speculating this code path never escapes. 261 BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); 262 BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); 263 BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); 264 CaptureSpec->setHasAddressTaken(); 265 CaptureSpec->addSuccessor(CaptureSpec); 266 267 CallTarget->addLiveIn(ThunkReg); 268 CallTarget->setHasAddressTaken(); 269 CallTarget->setAlignment(Align(16)); 270 271 // Insert return address clobber 272 const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; 273 const Register SPReg = Is64Bit ? X86::RSP : X86::ESP; 274 addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false, 275 0) 276 .addReg(ThunkReg); 277 278 CallTarget->back().setPreInstrSymbol(MF, TargetSym); 279 BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); 280 } 281 282 template <typename Derived> 283 void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI, 284 StringRef Name) { 285 assert(Name.startswith(getDerived().getThunkPrefix()) && 286 "Created a thunk with an unexpected prefix!"); 287 288 Module &M = const_cast<Module &>(*MMI.getModule()); 289 LLVMContext &Ctx = M.getContext(); 290 auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); 291 Function *F = 292 Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); 293 F->setVisibility(GlobalValue::HiddenVisibility); 294 F->setComdat(M.getOrInsertComdat(Name)); 295 296 // Add Attributes so that we don't create a frame, unwind information, or 297 // inline. 298 AttrBuilder B; 299 B.addAttribute(llvm::Attribute::NoUnwind); 300 B.addAttribute(llvm::Attribute::Naked); 301 F->addAttributes(llvm::AttributeList::FunctionIndex, B); 302 303 // Populate our function a bit so that we can verify. 304 BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); 305 IRBuilder<> Builder(Entry); 306 307 Builder.CreateRetVoid(); 308 309 // MachineFunctions/MachineBasicBlocks aren't created automatically for the 310 // IR-level constructs we already made. Create them and insert them into the 311 // module. 312 MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); 313 MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); 314 315 // Insert EntryMBB into MF. It's not in the module until we do this. 316 MF.insert(MF.end(), EntryMBB); 317 // Set MF properties. We never use vregs... 318 MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); 319 } 320 321 template <typename Derived> 322 bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) { 323 // If MF is not a thunk, check to see if we need to insert a thunk. 324 if (!MF.getName().startswith(getDerived().getThunkPrefix())) { 325 // If we've already inserted a thunk, nothing else to do. 326 if (InsertedThunks) 327 return false; 328 329 // Only add a thunk if one of the functions has the corresponding feature 330 // enabled in its subtarget, and doesn't enable external thunks. 331 // FIXME: Conditionalize on indirect calls so we don't emit a thunk when 332 // nothing will end up calling it. 333 // FIXME: It's a little silly to look at every function just to enumerate 334 // the subtargets, but eventually we'll want to look at them for indirect 335 // calls, so maybe this is OK. 336 if (!getDerived().mayUseThunk(MF)) 337 return false; 338 339 getDerived().insertThunks(MMI); 340 InsertedThunks = true; 341 return true; 342 } 343 344 // If this *is* a thunk function, we need to populate it with the correct MI. 345 getDerived().populateThunk(MF); 346 return true; 347 } 348 349 FunctionPass *llvm::createX86IndirectThunksPass() { 350 return new X86IndirectThunks(); 351 } 352 353 char X86IndirectThunks::ID = 0; 354 355 bool X86IndirectThunks::doInitialization(Module &M) { 356 initTIs(M, TIs); 357 return false; 358 } 359 360 bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) { 361 LLVM_DEBUG(dbgs() << getPassName() << '\n'); 362 auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); 363 return runTIs(MMI, MF, TIs); 364 } 365