1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains code to lower X86 MachineInstrs to their corresponding 10 // MCInst records. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/X86ATTInstPrinter.h" 15 #include "MCTargetDesc/X86BaseInfo.h" 16 #include "MCTargetDesc/X86EncodingOptimization.h" 17 #include "MCTargetDesc/X86InstComments.h" 18 #include "MCTargetDesc/X86ShuffleDecode.h" 19 #include "MCTargetDesc/X86TargetStreamer.h" 20 #include "X86AsmPrinter.h" 21 #include "X86MachineFunctionInfo.h" 22 #include "X86RegisterInfo.h" 23 #include "X86ShuffleDecodeConstantPool.h" 24 #include "X86Subtarget.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/CodeGen/MachineConstantPool.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineModuleInfoImpls.h" 30 #include "llvm/CodeGen/MachineOperand.h" 31 #include "llvm/CodeGen/StackMaps.h" 32 #include "llvm/IR/DataLayout.h" 33 #include "llvm/IR/GlobalValue.h" 34 #include "llvm/IR/Mangler.h" 35 #include "llvm/MC/MCAsmInfo.h" 36 #include "llvm/MC/MCCodeEmitter.h" 37 #include "llvm/MC/MCContext.h" 38 #include "llvm/MC/MCExpr.h" 39 #include "llvm/MC/MCFixup.h" 40 #include "llvm/MC/MCInst.h" 41 #include "llvm/MC/MCInstBuilder.h" 42 #include "llvm/MC/MCSection.h" 43 #include "llvm/MC/MCSectionELF.h" 44 #include "llvm/MC/MCStreamer.h" 45 #include "llvm/MC/MCSymbol.h" 46 #include "llvm/MC/MCSymbolELF.h" 47 #include "llvm/MC/TargetRegistry.h" 48 #include "llvm/Target/TargetLoweringObjectFile.h" 49 #include "llvm/Target/TargetMachine.h" 50 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" 51 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" 52 #include <string> 53 54 using namespace llvm; 55 56 namespace { 57 58 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. 59 class X86MCInstLower { 60 MCContext &Ctx; 61 const MachineFunction &MF; 62 const TargetMachine &TM; 63 const MCAsmInfo &MAI; 64 X86AsmPrinter &AsmPrinter; 65 66 public: 67 X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); 68 69 std::optional<MCOperand> LowerMachineOperand(const MachineInstr *MI, 70 const MachineOperand &MO) const; 71 void Lower(const MachineInstr *MI, MCInst &OutMI) const; 72 73 MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; 74 MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; 75 76 private: 77 MachineModuleInfoMachO &getMachOMMI() const; 78 }; 79 80 } // end anonymous namespace 81 82 /// A RAII helper which defines a region of instructions which can't have 83 /// padding added between them for correctness. 84 struct NoAutoPaddingScope { 85 MCStreamer &OS; 86 const bool OldAllowAutoPadding; 87 NoAutoPaddingScope(MCStreamer &OS) 88 : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { 89 changeAndComment(false); 90 } 91 ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); } 92 void changeAndComment(bool b) { 93 if (b == OS.getAllowAutoPadding()) 94 return; 95 OS.setAllowAutoPadding(b); 96 if (b) 97 OS.emitRawComment("autopadding"); 98 else 99 OS.emitRawComment("noautopadding"); 100 } 101 }; 102 103 // Emit a minimal sequence of nops spanning NumBytes bytes. 104 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, 105 const X86Subtarget *Subtarget); 106 107 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, 108 const MCSubtargetInfo &STI, 109 MCCodeEmitter *CodeEmitter) { 110 if (InShadow) { 111 SmallString<256> Code; 112 SmallVector<MCFixup, 4> Fixups; 113 CodeEmitter->encodeInstruction(Inst, Code, Fixups, STI); 114 CurrentShadowSize += Code.size(); 115 if (CurrentShadowSize >= RequiredShadowSize) 116 InShadow = false; // The shadow is big enough. Stop counting. 117 } 118 } 119 120 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( 121 MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { 122 if (InShadow && CurrentShadowSize < RequiredShadowSize) { 123 InShadow = false; 124 emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize, 125 &MF->getSubtarget<X86Subtarget>()); 126 } 127 } 128 129 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { 130 OutStreamer->emitInstruction(Inst, getSubtargetInfo()); 131 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); 132 } 133 134 X86MCInstLower::X86MCInstLower(const MachineFunction &mf, 135 X86AsmPrinter &asmprinter) 136 : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), 137 AsmPrinter(asmprinter) {} 138 139 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { 140 return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>(); 141 } 142 143 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol 144 /// operand to an MCSymbol. 145 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { 146 const Triple &TT = TM.getTargetTriple(); 147 if (MO.isGlobal() && TT.isOSBinFormatELF()) 148 return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal()); 149 150 const DataLayout &DL = MF.getDataLayout(); 151 assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && 152 "Isn't a symbol reference"); 153 154 MCSymbol *Sym = nullptr; 155 SmallString<128> Name; 156 StringRef Suffix; 157 158 switch (MO.getTargetFlags()) { 159 case X86II::MO_DLLIMPORT: 160 // Handle dllimport linkage. 161 Name += "__imp_"; 162 break; 163 case X86II::MO_COFFSTUB: 164 Name += ".refptr."; 165 break; 166 case X86II::MO_DARWIN_NONLAZY: 167 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 168 Suffix = "$non_lazy_ptr"; 169 break; 170 } 171 172 if (!Suffix.empty()) 173 Name += DL.getPrivateGlobalPrefix(); 174 175 if (MO.isGlobal()) { 176 const GlobalValue *GV = MO.getGlobal(); 177 AsmPrinter.getNameWithPrefix(Name, GV); 178 } else if (MO.isSymbol()) { 179 Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); 180 } else if (MO.isMBB()) { 181 assert(Suffix.empty()); 182 Sym = MO.getMBB()->getSymbol(); 183 } 184 185 Name += Suffix; 186 if (!Sym) 187 Sym = Ctx.getOrCreateSymbol(Name); 188 189 // If the target flags on the operand changes the name of the symbol, do that 190 // before we return the symbol. 191 switch (MO.getTargetFlags()) { 192 default: 193 break; 194 case X86II::MO_COFFSTUB: { 195 MachineModuleInfoCOFF &MMICOFF = 196 MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>(); 197 MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym); 198 if (!StubSym.getPointer()) { 199 assert(MO.isGlobal() && "Extern symbol not handled yet"); 200 StubSym = MachineModuleInfoImpl::StubValueTy( 201 AsmPrinter.getSymbol(MO.getGlobal()), true); 202 } 203 break; 204 } 205 case X86II::MO_DARWIN_NONLAZY: 206 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { 207 MachineModuleInfoImpl::StubValueTy &StubSym = 208 getMachOMMI().getGVStubEntry(Sym); 209 if (!StubSym.getPointer()) { 210 assert(MO.isGlobal() && "Extern symbol not handled yet"); 211 StubSym = MachineModuleInfoImpl::StubValueTy( 212 AsmPrinter.getSymbol(MO.getGlobal()), 213 !MO.getGlobal()->hasInternalLinkage()); 214 } 215 break; 216 } 217 } 218 219 return Sym; 220 } 221 222 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, 223 MCSymbol *Sym) const { 224 // FIXME: We would like an efficient form for this, so we don't have to do a 225 // lot of extra uniquing. 226 const MCExpr *Expr = nullptr; 227 MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; 228 229 switch (MO.getTargetFlags()) { 230 default: 231 llvm_unreachable("Unknown target flag on GV operand"); 232 case X86II::MO_NO_FLAG: // No flag. 233 // These affect the name of the symbol, not any suffix. 234 case X86II::MO_DARWIN_NONLAZY: 235 case X86II::MO_DLLIMPORT: 236 case X86II::MO_COFFSTUB: 237 break; 238 239 case X86II::MO_TLVP: 240 RefKind = MCSymbolRefExpr::VK_TLVP; 241 break; 242 case X86II::MO_TLVP_PIC_BASE: 243 Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); 244 // Subtract the pic base. 245 Expr = MCBinaryExpr::createSub( 246 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); 247 break; 248 case X86II::MO_SECREL: 249 RefKind = MCSymbolRefExpr::VK_SECREL; 250 break; 251 case X86II::MO_TLSGD: 252 RefKind = MCSymbolRefExpr::VK_TLSGD; 253 break; 254 case X86II::MO_TLSLD: 255 RefKind = MCSymbolRefExpr::VK_TLSLD; 256 break; 257 case X86II::MO_TLSLDM: 258 RefKind = MCSymbolRefExpr::VK_TLSLDM; 259 break; 260 case X86II::MO_GOTTPOFF: 261 RefKind = MCSymbolRefExpr::VK_GOTTPOFF; 262 break; 263 case X86II::MO_INDNTPOFF: 264 RefKind = MCSymbolRefExpr::VK_INDNTPOFF; 265 break; 266 case X86II::MO_TPOFF: 267 RefKind = MCSymbolRefExpr::VK_TPOFF; 268 break; 269 case X86II::MO_DTPOFF: 270 RefKind = MCSymbolRefExpr::VK_DTPOFF; 271 break; 272 case X86II::MO_NTPOFF: 273 RefKind = MCSymbolRefExpr::VK_NTPOFF; 274 break; 275 case X86II::MO_GOTNTPOFF: 276 RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; 277 break; 278 case X86II::MO_GOTPCREL: 279 RefKind = MCSymbolRefExpr::VK_GOTPCREL; 280 break; 281 case X86II::MO_GOTPCREL_NORELAX: 282 RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX; 283 break; 284 case X86II::MO_GOT: 285 RefKind = MCSymbolRefExpr::VK_GOT; 286 break; 287 case X86II::MO_GOTOFF: 288 RefKind = MCSymbolRefExpr::VK_GOTOFF; 289 break; 290 case X86II::MO_PLT: 291 RefKind = MCSymbolRefExpr::VK_PLT; 292 break; 293 case X86II::MO_ABS8: 294 RefKind = MCSymbolRefExpr::VK_X86_ABS8; 295 break; 296 case X86II::MO_PIC_BASE_OFFSET: 297 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 298 Expr = MCSymbolRefExpr::create(Sym, Ctx); 299 // Subtract the pic base. 300 Expr = MCBinaryExpr::createSub( 301 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); 302 if (MO.isJTI()) { 303 assert(MAI.doesSetDirectiveSuppressReloc()); 304 // If .set directive is supported, use it to reduce the number of 305 // relocations the assembler will generate for differences between 306 // local labels. This is only safe when the symbols are in the same 307 // section so we are restricting it to jumptable references. 308 MCSymbol *Label = Ctx.createTempSymbol(); 309 AsmPrinter.OutStreamer->emitAssignment(Label, Expr); 310 Expr = MCSymbolRefExpr::create(Label, Ctx); 311 } 312 break; 313 } 314 315 if (!Expr) 316 Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); 317 318 if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) 319 Expr = MCBinaryExpr::createAdd( 320 Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); 321 return MCOperand::createExpr(Expr); 322 } 323 324 static unsigned getRetOpcode(const X86Subtarget &Subtarget) { 325 return Subtarget.is64Bit() ? X86::RET64 : X86::RET32; 326 } 327 328 std::optional<MCOperand> 329 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, 330 const MachineOperand &MO) const { 331 switch (MO.getType()) { 332 default: 333 MI->print(errs()); 334 llvm_unreachable("unknown operand type"); 335 case MachineOperand::MO_Register: 336 // Ignore all implicit register operands. 337 if (MO.isImplicit()) 338 return std::nullopt; 339 return MCOperand::createReg(MO.getReg()); 340 case MachineOperand::MO_Immediate: 341 return MCOperand::createImm(MO.getImm()); 342 case MachineOperand::MO_MachineBasicBlock: 343 case MachineOperand::MO_GlobalAddress: 344 case MachineOperand::MO_ExternalSymbol: 345 return LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); 346 case MachineOperand::MO_MCSymbol: 347 return LowerSymbolOperand(MO, MO.getMCSymbol()); 348 case MachineOperand::MO_JumpTableIndex: 349 return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex())); 350 case MachineOperand::MO_ConstantPoolIndex: 351 return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex())); 352 case MachineOperand::MO_BlockAddress: 353 return LowerSymbolOperand( 354 MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); 355 case MachineOperand::MO_RegisterMask: 356 // Ignore call clobbers. 357 return std::nullopt; 358 } 359 } 360 361 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding 362 // information. 363 static unsigned convertTailJumpOpcode(unsigned Opcode) { 364 switch (Opcode) { 365 case X86::TAILJMPr: 366 Opcode = X86::JMP32r; 367 break; 368 case X86::TAILJMPm: 369 Opcode = X86::JMP32m; 370 break; 371 case X86::TAILJMPr64: 372 Opcode = X86::JMP64r; 373 break; 374 case X86::TAILJMPm64: 375 Opcode = X86::JMP64m; 376 break; 377 case X86::TAILJMPr64_REX: 378 Opcode = X86::JMP64r_REX; 379 break; 380 case X86::TAILJMPm64_REX: 381 Opcode = X86::JMP64m_REX; 382 break; 383 case X86::TAILJMPd: 384 case X86::TAILJMPd64: 385 Opcode = X86::JMP_1; 386 break; 387 case X86::TAILJMPd_CC: 388 case X86::TAILJMPd64_CC: 389 Opcode = X86::JCC_1; 390 break; 391 } 392 393 return Opcode; 394 } 395 396 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { 397 OutMI.setOpcode(MI->getOpcode()); 398 399 for (const MachineOperand &MO : MI->operands()) 400 if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) 401 OutMI.addOperand(*MaybeMCOp); 402 403 bool In64BitMode = AsmPrinter.getSubtarget().is64Bit(); 404 if (X86::optimizeInstFromVEX3ToVEX2(OutMI, MI->getDesc()) || 405 X86::optimizeShiftRotateWithImmediateOne(OutMI) || 406 X86::optimizeVPCMPWithImmediateOneOrSix(OutMI) || 407 X86::optimizeMOVSX(OutMI) || X86::optimizeINCDEC(OutMI, In64BitMode) || 408 X86::optimizeMOV(OutMI, In64BitMode) || 409 X86::optimizeToFixedRegisterOrShortImmediateForm(OutMI)) 410 return; 411 412 // Handle a few special cases to eliminate operand modifiers. 413 switch (OutMI.getOpcode()) { 414 case X86::LEA64_32r: 415 case X86::LEA64r: 416 case X86::LEA16r: 417 case X86::LEA32r: 418 // LEA should have a segment register, but it must be empty. 419 assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands && 420 "Unexpected # of LEA operands"); 421 assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && 422 "LEA has segment specified!"); 423 break; 424 case X86::MULX32Hrr: 425 case X86::MULX32Hrm: 426 case X86::MULX64Hrr: 427 case X86::MULX64Hrm: { 428 // Turn into regular MULX by duplicating the destination. 429 unsigned NewOpc; 430 switch (OutMI.getOpcode()) { 431 default: llvm_unreachable("Invalid opcode"); 432 case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; 433 case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; 434 case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; 435 case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; 436 } 437 OutMI.setOpcode(NewOpc); 438 // Duplicate the destination. 439 unsigned DestReg = OutMI.getOperand(0).getReg(); 440 OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg)); 441 break; 442 } 443 // CALL64r, CALL64pcrel32 - These instructions used to have 444 // register inputs modeled as normal uses instead of implicit uses. As such, 445 // they we used to truncate off all but the first operand (the callee). This 446 // issue seems to have been fixed at some point. This assert verifies that. 447 case X86::CALL64r: 448 case X86::CALL64pcrel32: 449 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); 450 break; 451 case X86::EH_RETURN: 452 case X86::EH_RETURN64: { 453 OutMI = MCInst(); 454 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 455 break; 456 } 457 case X86::CLEANUPRET: { 458 // Replace CLEANUPRET with the appropriate RET. 459 OutMI = MCInst(); 460 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 461 break; 462 } 463 case X86::CATCHRET: { 464 // Replace CATCHRET with the appropriate RET. 465 const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); 466 unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX; 467 OutMI = MCInst(); 468 OutMI.setOpcode(getRetOpcode(Subtarget)); 469 OutMI.addOperand(MCOperand::createReg(ReturnReg)); 470 break; 471 } 472 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump 473 // instruction. 474 case X86::TAILJMPr: 475 case X86::TAILJMPr64: 476 case X86::TAILJMPr64_REX: 477 case X86::TAILJMPd: 478 case X86::TAILJMPd64: 479 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); 480 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 481 break; 482 case X86::TAILJMPd_CC: 483 case X86::TAILJMPd64_CC: 484 assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!"); 485 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 486 break; 487 case X86::TAILJMPm: 488 case X86::TAILJMPm64: 489 case X86::TAILJMPm64_REX: 490 assert(OutMI.getNumOperands() == X86::AddrNumOperands && 491 "Unexpected number of operands!"); 492 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 493 break; 494 case X86::MASKMOVDQU: 495 case X86::VMASKMOVDQU: 496 if (In64BitMode) 497 OutMI.setFlags(X86::IP_HAS_AD_SIZE); 498 break; 499 case X86::BSF16rm: 500 case X86::BSF16rr: 501 case X86::BSF32rm: 502 case X86::BSF32rr: 503 case X86::BSF64rm: 504 case X86::BSF64rr: { 505 // Add an REP prefix to BSF instructions so that new processors can 506 // recognize as TZCNT, which has better performance than BSF. 507 // BSF and TZCNT have different interpretations on ZF bit. So make sure 508 // it won't be used later. 509 const MachineOperand *FlagDef = MI->findRegisterDefOperand(X86::EFLAGS); 510 if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead()) 511 OutMI.setFlags(X86::IP_HAS_REPEAT); 512 break; 513 } 514 default: 515 break; 516 } 517 } 518 519 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, 520 const MachineInstr &MI) { 521 NoAutoPaddingScope NoPadScope(*OutStreamer); 522 bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 && 523 MI.getOpcode() != X86::TLS_base_addr32; 524 bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 || 525 MI.getOpcode() == X86::TLS_base_addr64; 526 MCContext &Ctx = OutStreamer->getContext(); 527 528 MCSymbolRefExpr::VariantKind SRVK; 529 switch (MI.getOpcode()) { 530 case X86::TLS_addr32: 531 case X86::TLS_addr64: 532 case X86::TLS_addrX32: 533 SRVK = MCSymbolRefExpr::VK_TLSGD; 534 break; 535 case X86::TLS_base_addr32: 536 SRVK = MCSymbolRefExpr::VK_TLSLDM; 537 break; 538 case X86::TLS_base_addr64: 539 case X86::TLS_base_addrX32: 540 SRVK = MCSymbolRefExpr::VK_TLSLD; 541 break; 542 default: 543 llvm_unreachable("unexpected opcode"); 544 } 545 546 const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create( 547 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx); 548 549 // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD 550 // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is 551 // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by 552 // only using GOT when GOTPCRELX is enabled. 553 // TODO Delete the workaround when GOTPCRELX becomes commonplace. 554 bool UseGot = MMI->getModule()->getRtLibUseGOT() && 555 Ctx.getAsmInfo()->canRelaxRelocations(); 556 557 if (Is64Bits) { 558 bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD; 559 if (NeedsPadding && Is64BitsLP64) 560 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 561 EmitAndCountInstruction(MCInstBuilder(X86::LEA64r) 562 .addReg(X86::RDI) 563 .addReg(X86::RIP) 564 .addImm(1) 565 .addReg(0) 566 .addExpr(Sym) 567 .addReg(0)); 568 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr"); 569 if (NeedsPadding) { 570 if (!UseGot) 571 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 572 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 573 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); 574 } 575 if (UseGot) { 576 const MCExpr *Expr = MCSymbolRefExpr::create( 577 TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx); 578 EmitAndCountInstruction(MCInstBuilder(X86::CALL64m) 579 .addReg(X86::RIP) 580 .addImm(1) 581 .addReg(0) 582 .addExpr(Expr) 583 .addReg(0)); 584 } else { 585 EmitAndCountInstruction( 586 MCInstBuilder(X86::CALL64pcrel32) 587 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, 588 MCSymbolRefExpr::VK_PLT, Ctx))); 589 } 590 } else { 591 if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) { 592 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) 593 .addReg(X86::EAX) 594 .addReg(0) 595 .addImm(1) 596 .addReg(X86::EBX) 597 .addExpr(Sym) 598 .addReg(0)); 599 } else { 600 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) 601 .addReg(X86::EAX) 602 .addReg(X86::EBX) 603 .addImm(1) 604 .addReg(0) 605 .addExpr(Sym) 606 .addReg(0)); 607 } 608 609 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr"); 610 if (UseGot) { 611 const MCExpr *Expr = 612 MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx); 613 EmitAndCountInstruction(MCInstBuilder(X86::CALL32m) 614 .addReg(X86::EBX) 615 .addImm(1) 616 .addReg(0) 617 .addExpr(Expr) 618 .addReg(0)); 619 } else { 620 EmitAndCountInstruction( 621 MCInstBuilder(X86::CALLpcrel32) 622 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, 623 MCSymbolRefExpr::VK_PLT, Ctx))); 624 } 625 } 626 } 627 628 /// Emit the largest nop instruction smaller than or equal to \p NumBytes 629 /// bytes. Return the size of nop emitted. 630 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, 631 const X86Subtarget *Subtarget) { 632 // Determine the longest nop which can be efficiently decoded for the given 633 // target cpu. 15-bytes is the longest single NOP instruction, but some 634 // platforms can't decode the longest forms efficiently. 635 unsigned MaxNopLength = 1; 636 if (Subtarget->is64Bit()) { 637 // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the 638 // IndexReg/BaseReg below need to be updated. 639 if (Subtarget->hasFeature(X86::TuningFast7ByteNOP)) 640 MaxNopLength = 7; 641 else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP)) 642 MaxNopLength = 15; 643 else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP)) 644 MaxNopLength = 11; 645 else 646 MaxNopLength = 10; 647 } if (Subtarget->is32Bit()) 648 MaxNopLength = 2; 649 650 // Cap a single nop emission at the profitable value for the target 651 NumBytes = std::min(NumBytes, MaxNopLength); 652 653 unsigned NopSize; 654 unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; 655 IndexReg = Displacement = SegmentReg = 0; 656 BaseReg = X86::RAX; 657 ScaleVal = 1; 658 switch (NumBytes) { 659 case 0: 660 llvm_unreachable("Zero nops?"); 661 break; 662 case 1: 663 NopSize = 1; 664 Opc = X86::NOOP; 665 break; 666 case 2: 667 NopSize = 2; 668 Opc = X86::XCHG16ar; 669 break; 670 case 3: 671 NopSize = 3; 672 Opc = X86::NOOPL; 673 break; 674 case 4: 675 NopSize = 4; 676 Opc = X86::NOOPL; 677 Displacement = 8; 678 break; 679 case 5: 680 NopSize = 5; 681 Opc = X86::NOOPL; 682 Displacement = 8; 683 IndexReg = X86::RAX; 684 break; 685 case 6: 686 NopSize = 6; 687 Opc = X86::NOOPW; 688 Displacement = 8; 689 IndexReg = X86::RAX; 690 break; 691 case 7: 692 NopSize = 7; 693 Opc = X86::NOOPL; 694 Displacement = 512; 695 break; 696 case 8: 697 NopSize = 8; 698 Opc = X86::NOOPL; 699 Displacement = 512; 700 IndexReg = X86::RAX; 701 break; 702 case 9: 703 NopSize = 9; 704 Opc = X86::NOOPW; 705 Displacement = 512; 706 IndexReg = X86::RAX; 707 break; 708 default: 709 NopSize = 10; 710 Opc = X86::NOOPW; 711 Displacement = 512; 712 IndexReg = X86::RAX; 713 SegmentReg = X86::CS; 714 break; 715 } 716 717 unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U); 718 NopSize += NumPrefixes; 719 for (unsigned i = 0; i != NumPrefixes; ++i) 720 OS.emitBytes("\x66"); 721 722 switch (Opc) { 723 default: llvm_unreachable("Unexpected opcode"); 724 case X86::NOOP: 725 OS.emitInstruction(MCInstBuilder(Opc), *Subtarget); 726 break; 727 case X86::XCHG16ar: 728 OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), 729 *Subtarget); 730 break; 731 case X86::NOOPL: 732 case X86::NOOPW: 733 OS.emitInstruction(MCInstBuilder(Opc) 734 .addReg(BaseReg) 735 .addImm(ScaleVal) 736 .addReg(IndexReg) 737 .addImm(Displacement) 738 .addReg(SegmentReg), 739 *Subtarget); 740 break; 741 } 742 assert(NopSize <= NumBytes && "We overemitted?"); 743 return NopSize; 744 } 745 746 /// Emit the optimal amount of multi-byte nops on X86. 747 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, 748 const X86Subtarget *Subtarget) { 749 unsigned NopsToEmit = NumBytes; 750 (void)NopsToEmit; 751 while (NumBytes) { 752 NumBytes -= emitNop(OS, NumBytes, Subtarget); 753 assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!"); 754 } 755 } 756 757 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, 758 X86MCInstLower &MCIL) { 759 assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); 760 761 NoAutoPaddingScope NoPadScope(*OutStreamer); 762 763 StatepointOpers SOpers(&MI); 764 if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { 765 emitX86Nops(*OutStreamer, PatchBytes, Subtarget); 766 } else { 767 // Lower call target and choose correct opcode 768 const MachineOperand &CallTarget = SOpers.getCallTarget(); 769 MCOperand CallTargetMCOp; 770 unsigned CallOpcode; 771 switch (CallTarget.getType()) { 772 case MachineOperand::MO_GlobalAddress: 773 case MachineOperand::MO_ExternalSymbol: 774 CallTargetMCOp = MCIL.LowerSymbolOperand( 775 CallTarget, MCIL.GetSymbolFromOperand(CallTarget)); 776 CallOpcode = X86::CALL64pcrel32; 777 // Currently, we only support relative addressing with statepoints. 778 // Otherwise, we'll need a scratch register to hold the target 779 // address. You'll fail asserts during load & relocation if this 780 // symbol is to far away. (TODO: support non-relative addressing) 781 break; 782 case MachineOperand::MO_Immediate: 783 CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); 784 CallOpcode = X86::CALL64pcrel32; 785 // Currently, we only support relative addressing with statepoints. 786 // Otherwise, we'll need a scratch register to hold the target 787 // immediate. You'll fail asserts during load & relocation if this 788 // address is to far away. (TODO: support non-relative addressing) 789 break; 790 case MachineOperand::MO_Register: 791 // FIXME: Add retpoline support and remove this. 792 if (Subtarget->useIndirectThunkCalls()) 793 report_fatal_error("Lowering register statepoints with thunks not " 794 "yet implemented."); 795 CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); 796 CallOpcode = X86::CALL64r; 797 break; 798 default: 799 llvm_unreachable("Unsupported operand type in statepoint call target"); 800 break; 801 } 802 803 // Emit call 804 MCInst CallInst; 805 CallInst.setOpcode(CallOpcode); 806 CallInst.addOperand(CallTargetMCOp); 807 OutStreamer->emitInstruction(CallInst, getSubtargetInfo()); 808 } 809 810 // Record our statepoint node in the same section used by STACKMAP 811 // and PATCHPOINT 812 auto &Ctx = OutStreamer->getContext(); 813 MCSymbol *MILabel = Ctx.createTempSymbol(); 814 OutStreamer->emitLabel(MILabel); 815 SM.recordStatepoint(*MILabel, MI); 816 } 817 818 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, 819 X86MCInstLower &MCIL) { 820 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, 821 // <opcode>, <operands> 822 823 NoAutoPaddingScope NoPadScope(*OutStreamer); 824 825 Register DefRegister = FaultingMI.getOperand(0).getReg(); 826 FaultMaps::FaultKind FK = 827 static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm()); 828 MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol(); 829 unsigned Opcode = FaultingMI.getOperand(3).getImm(); 830 unsigned OperandsBeginIdx = 4; 831 832 auto &Ctx = OutStreamer->getContext(); 833 MCSymbol *FaultingLabel = Ctx.createTempSymbol(); 834 OutStreamer->emitLabel(FaultingLabel); 835 836 assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!"); 837 FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel); 838 839 MCInst MI; 840 MI.setOpcode(Opcode); 841 842 if (DefRegister != X86::NoRegister) 843 MI.addOperand(MCOperand::createReg(DefRegister)); 844 845 for (const MachineOperand &MO : 846 llvm::drop_begin(FaultingMI.operands(), OperandsBeginIdx)) 847 if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, MO)) 848 MI.addOperand(*MaybeOperand); 849 850 OutStreamer->AddComment("on-fault: " + HandlerLabel->getName()); 851 OutStreamer->emitInstruction(MI, getSubtargetInfo()); 852 } 853 854 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, 855 X86MCInstLower &MCIL) { 856 bool Is64Bits = Subtarget->is64Bit(); 857 MCContext &Ctx = OutStreamer->getContext(); 858 MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__"); 859 const MCSymbolRefExpr *Op = 860 MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx); 861 862 EmitAndCountInstruction( 863 MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) 864 .addExpr(Op)); 865 } 866 867 void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { 868 assert(std::next(MI.getIterator())->isCall() && 869 "KCFI_CHECK not followed by a call instruction"); 870 871 // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop() 872 // returns a 1-byte X86::NOOP, which means the offset is the same in 873 // bytes. This assumes that patchable-function-prefix is the same for all 874 // functions. 875 const MachineFunction &MF = *MI.getMF(); 876 int64_t PrefixNops = 0; 877 (void)MF.getFunction() 878 .getFnAttribute("patchable-function-prefix") 879 .getValueAsString() 880 .getAsInteger(10, PrefixNops); 881 882 // KCFI allows indirect calls to any location that's preceded by a valid 883 // type identifier. To avoid encoding the full constant into an instruction, 884 // and thus emitting potential call target gadgets at each indirect call 885 // site, load a negated constant to a register and compare that to the 886 // expected value at the call target. 887 const Register AddrReg = MI.getOperand(0).getReg(); 888 const uint32_t Type = MI.getOperand(1).getImm(); 889 // The check is immediately before the call. If the call target is in R10, 890 // we can clobber R11 for the check instead. 891 unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D; 892 EmitAndCountInstruction( 893 MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type))); 894 EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm) 895 .addReg(X86::NoRegister) 896 .addReg(TempReg) 897 .addReg(AddrReg) 898 .addImm(1) 899 .addReg(X86::NoRegister) 900 .addImm(-(PrefixNops + 4)) 901 .addReg(X86::NoRegister)); 902 903 MCSymbol *Pass = OutContext.createTempSymbol(); 904 EmitAndCountInstruction( 905 MCInstBuilder(X86::JCC_1) 906 .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) 907 .addImm(X86::COND_E)); 908 909 MCSymbol *Trap = OutContext.createTempSymbol(); 910 OutStreamer->emitLabel(Trap); 911 EmitAndCountInstruction(MCInstBuilder(X86::TRAP)); 912 emitKCFITrapEntry(MF, Trap); 913 OutStreamer->emitLabel(Pass); 914 } 915 916 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { 917 // FIXME: Make this work on non-ELF. 918 if (!TM.getTargetTriple().isOSBinFormatELF()) { 919 report_fatal_error("llvm.asan.check.memaccess only supported on ELF"); 920 return; 921 } 922 923 const auto &Reg = MI.getOperand(0).getReg(); 924 ASanAccessInfo AccessInfo(MI.getOperand(1).getImm()); 925 926 uint64_t ShadowBase; 927 int MappingScale; 928 bool OrShadowOffset; 929 getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64, 930 AccessInfo.CompileKernel, &ShadowBase, 931 &MappingScale, &OrShadowOffset); 932 933 StringRef Name = AccessInfo.IsWrite ? "store" : "load"; 934 StringRef Op = OrShadowOffset ? "or" : "add"; 935 std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" + 936 Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" + 937 TM.getMCRegisterInfo()->getName(Reg.asMCReg())) 938 .str(); 939 if (OrShadowOffset) 940 report_fatal_error( 941 "OrShadowOffset is not supported with optimized callbacks"); 942 943 EmitAndCountInstruction( 944 MCInstBuilder(X86::CALL64pcrel32) 945 .addExpr(MCSymbolRefExpr::create( 946 OutContext.getOrCreateSymbol(SymName), OutContext))); 947 } 948 949 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, 950 X86MCInstLower &MCIL) { 951 // PATCHABLE_OP minsize 952 953 NoAutoPaddingScope NoPadScope(*OutStreamer); 954 955 auto NextMI = std::find_if(std::next(MI.getIterator()), 956 MI.getParent()->end().getInstrIterator(), 957 [](auto &II) { return !II.isMetaInstruction(); }); 958 959 SmallString<256> Code; 960 unsigned MinSize = MI.getOperand(0).getImm(); 961 962 if (NextMI != MI.getParent()->end()) { 963 // Lower the next MachineInstr to find its byte size. 964 MCInst MCI; 965 MCIL.Lower(&*NextMI, MCI); 966 967 SmallVector<MCFixup, 4> Fixups; 968 CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo()); 969 } 970 971 if (Code.size() < MinSize) { 972 if (MinSize == 2 && Subtarget->is32Bit() && 973 Subtarget->isTargetWindowsMSVC() && 974 (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) { 975 // For compatibility reasons, when targetting MSVC, it is important to 976 // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools 977 // rely specifically on this pattern to be able to patch a function. 978 // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. 979 OutStreamer->emitInstruction( 980 MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI), 981 *Subtarget); 982 } else { 983 unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget); 984 assert(NopSize == MinSize && "Could not implement MinSize!"); 985 (void)NopSize; 986 } 987 } 988 } 989 990 // Lower a stackmap of the form: 991 // <id>, <shadowBytes>, ... 992 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { 993 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 994 995 auto &Ctx = OutStreamer->getContext(); 996 MCSymbol *MILabel = Ctx.createTempSymbol(); 997 OutStreamer->emitLabel(MILabel); 998 999 SM.recordStackMap(*MILabel, MI); 1000 unsigned NumShadowBytes = MI.getOperand(1).getImm(); 1001 SMShadowTracker.reset(NumShadowBytes); 1002 } 1003 1004 // Lower a patchpoint of the form: 1005 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... 1006 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, 1007 X86MCInstLower &MCIL) { 1008 assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64"); 1009 1010 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 1011 1012 NoAutoPaddingScope NoPadScope(*OutStreamer); 1013 1014 auto &Ctx = OutStreamer->getContext(); 1015 MCSymbol *MILabel = Ctx.createTempSymbol(); 1016 OutStreamer->emitLabel(MILabel); 1017 SM.recordPatchPoint(*MILabel, MI); 1018 1019 PatchPointOpers opers(&MI); 1020 unsigned ScratchIdx = opers.getNextScratchIdx(); 1021 unsigned EncodedBytes = 0; 1022 const MachineOperand &CalleeMO = opers.getCallTarget(); 1023 1024 // Check for null target. If target is non-null (i.e. is non-zero or is 1025 // symbolic) then emit a call. 1026 if (!(CalleeMO.isImm() && !CalleeMO.getImm())) { 1027 MCOperand CalleeMCOp; 1028 switch (CalleeMO.getType()) { 1029 default: 1030 /// FIXME: Add a verifier check for bad callee types. 1031 llvm_unreachable("Unrecognized callee operand type."); 1032 case MachineOperand::MO_Immediate: 1033 if (CalleeMO.getImm()) 1034 CalleeMCOp = MCOperand::createImm(CalleeMO.getImm()); 1035 break; 1036 case MachineOperand::MO_ExternalSymbol: 1037 case MachineOperand::MO_GlobalAddress: 1038 CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO, 1039 MCIL.GetSymbolFromOperand(CalleeMO)); 1040 break; 1041 } 1042 1043 // Emit MOV to materialize the target address and the CALL to target. 1044 // This is encoded with 12-13 bytes, depending on which register is used. 1045 Register ScratchReg = MI.getOperand(ScratchIdx).getReg(); 1046 if (X86II::isX86_64ExtendedReg(ScratchReg)) 1047 EncodedBytes = 13; 1048 else 1049 EncodedBytes = 12; 1050 1051 EmitAndCountInstruction( 1052 MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); 1053 // FIXME: Add retpoline support and remove this. 1054 if (Subtarget->useIndirectThunkCalls()) 1055 report_fatal_error( 1056 "Lowering patchpoint with thunks not yet implemented."); 1057 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); 1058 } 1059 1060 // Emit padding. 1061 unsigned NumBytes = opers.getNumPatchBytes(); 1062 assert(NumBytes >= EncodedBytes && 1063 "Patchpoint can't request size less than the length of a call."); 1064 1065 emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget); 1066 } 1067 1068 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, 1069 X86MCInstLower &MCIL) { 1070 assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); 1071 1072 NoAutoPaddingScope NoPadScope(*OutStreamer); 1073 1074 // We want to emit the following pattern, which follows the x86 calling 1075 // convention to prepare for the trampoline call to be patched in. 1076 // 1077 // .p2align 1, ... 1078 // .Lxray_event_sled_N: 1079 // jmp +N // jump across the instrumentation sled 1080 // ... // set up arguments in register 1081 // callq __xray_CustomEvent@plt // force dependency to symbol 1082 // ... 1083 // <jump here> 1084 // 1085 // After patching, it would look something like: 1086 // 1087 // nopw (2-byte nop) 1088 // ... 1089 // callq __xrayCustomEvent // already lowered 1090 // ... 1091 // 1092 // --- 1093 // First we emit the label and the jump. 1094 auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); 1095 OutStreamer->AddComment("# XRay Custom Event Log"); 1096 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1097 OutStreamer->emitLabel(CurSled); 1098 1099 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1100 // an operand (computed as an offset from the jmp instruction). 1101 // FIXME: Find another less hacky way do force the relative jump. 1102 OutStreamer->emitBinaryData("\xeb\x0f"); 1103 1104 // The default C calling convention will place two arguments into %rcx and 1105 // %rdx -- so we only work with those. 1106 const Register DestRegs[] = {X86::RDI, X86::RSI}; 1107 bool UsedMask[] = {false, false}; 1108 // Filled out in loop. 1109 Register SrcRegs[] = {0, 0}; 1110 1111 // Then we put the operands in the %rdi and %rsi registers. We spill the 1112 // values in the register before we clobber them, and mark them as used in 1113 // UsedMask. In case the arguments are already in the correct register, we use 1114 // emit nops appropriately sized to keep the sled the same size in every 1115 // situation. 1116 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1117 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { 1118 assert(Op->isReg() && "Only support arguments in registers"); 1119 SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64); 1120 assert(SrcRegs[I].isValid() && "Invalid operand"); 1121 if (SrcRegs[I] != DestRegs[I]) { 1122 UsedMask[I] = true; 1123 EmitAndCountInstruction( 1124 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); 1125 } else { 1126 emitX86Nops(*OutStreamer, 4, Subtarget); 1127 } 1128 } 1129 1130 // Now that the register values are stashed, mov arguments into place. 1131 // FIXME: This doesn't work if one of the later SrcRegs is equal to an 1132 // earlier DestReg. We will have already overwritten over the register before 1133 // we can copy from it. 1134 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1135 if (SrcRegs[I] != DestRegs[I]) 1136 EmitAndCountInstruction( 1137 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); 1138 1139 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the 1140 // name of the trampoline to be implemented by the XRay runtime. 1141 auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); 1142 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); 1143 if (isPositionIndependent()) 1144 TOp.setTargetFlags(X86II::MO_PLT); 1145 1146 // Emit the call instruction. 1147 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) 1148 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); 1149 1150 // Restore caller-saved and used registers. 1151 for (unsigned I = sizeof UsedMask; I-- > 0;) 1152 if (UsedMask[I]) 1153 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); 1154 else 1155 emitX86Nops(*OutStreamer, 1, Subtarget); 1156 1157 OutStreamer->AddComment("xray custom event end."); 1158 1159 // Record the sled version. Version 0 of this sled was spelled differently, so 1160 // we let the runtime handle the different offsets we're using. Version 2 1161 // changed the absolute address to a PC-relative address. 1162 recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2); 1163 } 1164 1165 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, 1166 X86MCInstLower &MCIL) { 1167 assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64"); 1168 1169 NoAutoPaddingScope NoPadScope(*OutStreamer); 1170 1171 // We want to emit the following pattern, which follows the x86 calling 1172 // convention to prepare for the trampoline call to be patched in. 1173 // 1174 // .p2align 1, ... 1175 // .Lxray_event_sled_N: 1176 // jmp +N // jump across the instrumentation sled 1177 // ... // set up arguments in register 1178 // callq __xray_TypedEvent@plt // force dependency to symbol 1179 // ... 1180 // <jump here> 1181 // 1182 // After patching, it would look something like: 1183 // 1184 // nopw (2-byte nop) 1185 // ... 1186 // callq __xrayTypedEvent // already lowered 1187 // ... 1188 // 1189 // --- 1190 // First we emit the label and the jump. 1191 auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true); 1192 OutStreamer->AddComment("# XRay Typed Event Log"); 1193 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1194 OutStreamer->emitLabel(CurSled); 1195 1196 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1197 // an operand (computed as an offset from the jmp instruction). 1198 // FIXME: Find another less hacky way do force the relative jump. 1199 OutStreamer->emitBinaryData("\xeb\x14"); 1200 1201 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, 1202 // so we'll work with those. Or we may be called via SystemV, in which case 1203 // we don't have to do any translation. 1204 const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; 1205 bool UsedMask[] = {false, false, false}; 1206 1207 // Will fill out src regs in the loop. 1208 Register SrcRegs[] = {0, 0, 0}; 1209 1210 // Then we put the operands in the SystemV registers. We spill the values in 1211 // the registers before we clobber them, and mark them as used in UsedMask. 1212 // In case the arguments are already in the correct register, we emit nops 1213 // appropriately sized to keep the sled the same size in every situation. 1214 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1215 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { 1216 // TODO: Is register only support adequate? 1217 assert(Op->isReg() && "Only supports arguments in registers"); 1218 SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64); 1219 assert(SrcRegs[I].isValid() && "Invalid operand"); 1220 if (SrcRegs[I] != DestRegs[I]) { 1221 UsedMask[I] = true; 1222 EmitAndCountInstruction( 1223 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); 1224 } else { 1225 emitX86Nops(*OutStreamer, 4, Subtarget); 1226 } 1227 } 1228 1229 // In the above loop we only stash all of the destination registers or emit 1230 // nops if the arguments are already in the right place. Doing the actually 1231 // moving is postponed until after all the registers are stashed so nothing 1232 // is clobbers. We've already added nops to account for the size of mov and 1233 // push if the register is in the right place, so we only have to worry about 1234 // emitting movs. 1235 // FIXME: This doesn't work if one of the later SrcRegs is equal to an 1236 // earlier DestReg. We will have already overwritten over the register before 1237 // we can copy from it. 1238 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1239 if (UsedMask[I]) 1240 EmitAndCountInstruction( 1241 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); 1242 1243 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the 1244 // name of the trampoline to be implemented by the XRay runtime. 1245 auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent"); 1246 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); 1247 if (isPositionIndependent()) 1248 TOp.setTargetFlags(X86II::MO_PLT); 1249 1250 // Emit the call instruction. 1251 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) 1252 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); 1253 1254 // Restore caller-saved and used registers. 1255 for (unsigned I = sizeof UsedMask; I-- > 0;) 1256 if (UsedMask[I]) 1257 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); 1258 else 1259 emitX86Nops(*OutStreamer, 1, Subtarget); 1260 1261 OutStreamer->AddComment("xray typed event end."); 1262 1263 // Record the sled version. 1264 recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2); 1265 } 1266 1267 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, 1268 X86MCInstLower &MCIL) { 1269 1270 NoAutoPaddingScope NoPadScope(*OutStreamer); 1271 1272 const Function &F = MF->getFunction(); 1273 if (F.hasFnAttribute("patchable-function-entry")) { 1274 unsigned Num; 1275 if (F.getFnAttribute("patchable-function-entry") 1276 .getValueAsString() 1277 .getAsInteger(10, Num)) 1278 return; 1279 emitX86Nops(*OutStreamer, Num, Subtarget); 1280 return; 1281 } 1282 // We want to emit the following pattern: 1283 // 1284 // .p2align 1, ... 1285 // .Lxray_sled_N: 1286 // jmp .tmpN 1287 // # 9 bytes worth of noops 1288 // 1289 // We need the 9 bytes because at runtime, we'd be patching over the full 11 1290 // bytes with the following pattern: 1291 // 1292 // mov %r10, <function id, 32-bit> // 6 bytes 1293 // call <relative offset, 32-bits> // 5 bytes 1294 // 1295 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1296 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1297 OutStreamer->emitLabel(CurSled); 1298 1299 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1300 // an operand (computed as an offset from the jmp instruction). 1301 // FIXME: Find another less hacky way do force the relative jump. 1302 OutStreamer->emitBytes("\xeb\x09"); 1303 emitX86Nops(*OutStreamer, 9, Subtarget); 1304 recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2); 1305 } 1306 1307 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, 1308 X86MCInstLower &MCIL) { 1309 NoAutoPaddingScope NoPadScope(*OutStreamer); 1310 1311 // Since PATCHABLE_RET takes the opcode of the return statement as an 1312 // argument, we use that to emit the correct form of the RET that we want. 1313 // i.e. when we see this: 1314 // 1315 // PATCHABLE_RET X86::RET ... 1316 // 1317 // We should emit the RET followed by sleds. 1318 // 1319 // .p2align 1, ... 1320 // .Lxray_sled_N: 1321 // ret # or equivalent instruction 1322 // # 10 bytes worth of noops 1323 // 1324 // This just makes sure that the alignment for the next instruction is 2. 1325 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1326 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1327 OutStreamer->emitLabel(CurSled); 1328 unsigned OpCode = MI.getOperand(0).getImm(); 1329 MCInst Ret; 1330 Ret.setOpcode(OpCode); 1331 for (auto &MO : drop_begin(MI.operands())) 1332 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) 1333 Ret.addOperand(*MaybeOperand); 1334 OutStreamer->emitInstruction(Ret, getSubtargetInfo()); 1335 emitX86Nops(*OutStreamer, 10, Subtarget); 1336 recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2); 1337 } 1338 1339 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, 1340 X86MCInstLower &MCIL) { 1341 NoAutoPaddingScope NoPadScope(*OutStreamer); 1342 1343 // Like PATCHABLE_RET, we have the actual instruction in the operands to this 1344 // instruction so we lower that particular instruction and its operands. 1345 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how 1346 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to 1347 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual 1348 // tail call much like how we have it in PATCHABLE_RET. 1349 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1350 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1351 OutStreamer->emitLabel(CurSled); 1352 auto Target = OutContext.createTempSymbol(); 1353 1354 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1355 // an operand (computed as an offset from the jmp instruction). 1356 // FIXME: Find another less hacky way do force the relative jump. 1357 OutStreamer->emitBytes("\xeb\x09"); 1358 emitX86Nops(*OutStreamer, 9, Subtarget); 1359 OutStreamer->emitLabel(Target); 1360 recordSled(CurSled, MI, SledKind::TAIL_CALL, 2); 1361 1362 unsigned OpCode = MI.getOperand(0).getImm(); 1363 OpCode = convertTailJumpOpcode(OpCode); 1364 MCInst TC; 1365 TC.setOpcode(OpCode); 1366 1367 // Before emitting the instruction, add a comment to indicate that this is 1368 // indeed a tail call. 1369 OutStreamer->AddComment("TAILCALL"); 1370 for (auto &MO : drop_begin(MI.operands())) 1371 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) 1372 TC.addOperand(*MaybeOperand); 1373 OutStreamer->emitInstruction(TC, getSubtargetInfo()); 1374 } 1375 1376 // Returns instruction preceding MBBI in MachineFunction. 1377 // If MBBI is the first instruction of the first basic block, returns null. 1378 static MachineBasicBlock::const_iterator 1379 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { 1380 const MachineBasicBlock *MBB = MBBI->getParent(); 1381 while (MBBI == MBB->begin()) { 1382 if (MBB == &MBB->getParent()->front()) 1383 return MachineBasicBlock::const_iterator(); 1384 MBB = MBB->getPrevNode(); 1385 MBBI = MBB->end(); 1386 } 1387 --MBBI; 1388 return MBBI; 1389 } 1390 1391 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx, 1392 unsigned SrcOp2Idx, ArrayRef<int> Mask) { 1393 std::string Comment; 1394 1395 // Compute the name for a register. This is really goofy because we have 1396 // multiple instruction printers that could (in theory) use different 1397 // names. Fortunately most people use the ATT style (outside of Windows) 1398 // and they actually agree on register naming here. Ultimately, this is 1399 // a comment, and so its OK if it isn't perfect. 1400 auto GetRegisterName = [](MCRegister Reg) -> StringRef { 1401 return X86ATTInstPrinter::getRegisterName(Reg); 1402 }; 1403 1404 const MachineOperand &DstOp = MI->getOperand(0); 1405 const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx); 1406 const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx); 1407 1408 StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem"; 1409 StringRef Src1Name = 1410 SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem"; 1411 StringRef Src2Name = 1412 SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem"; 1413 1414 // One source operand, fix the mask to print all elements in one span. 1415 SmallVector<int, 8> ShuffleMask(Mask); 1416 if (Src1Name == Src2Name) 1417 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) 1418 if (ShuffleMask[i] >= e) 1419 ShuffleMask[i] -= e; 1420 1421 raw_string_ostream CS(Comment); 1422 CS << DstName; 1423 1424 // Handle AVX512 MASK/MASXZ write mask comments. 1425 // MASK: zmmX {%kY} 1426 // MASKZ: zmmX {%kY} {z} 1427 if (SrcOp1Idx > 1) { 1428 assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask"); 1429 1430 const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1); 1431 if (WriteMaskOp.isReg()) { 1432 CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}"; 1433 1434 if (SrcOp1Idx == 2) { 1435 CS << " {z}"; 1436 } 1437 } 1438 } 1439 1440 CS << " = "; 1441 1442 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { 1443 if (i != 0) 1444 CS << ","; 1445 if (ShuffleMask[i] == SM_SentinelZero) { 1446 CS << "zero"; 1447 continue; 1448 } 1449 1450 // Otherwise, it must come from src1 or src2. Print the span of elements 1451 // that comes from this src. 1452 bool isSrc1 = ShuffleMask[i] < (int)e; 1453 CS << (isSrc1 ? Src1Name : Src2Name) << '['; 1454 1455 bool IsFirst = true; 1456 while (i != e && ShuffleMask[i] != SM_SentinelZero && 1457 (ShuffleMask[i] < (int)e) == isSrc1) { 1458 if (!IsFirst) 1459 CS << ','; 1460 else 1461 IsFirst = false; 1462 if (ShuffleMask[i] == SM_SentinelUndef) 1463 CS << "u"; 1464 else 1465 CS << ShuffleMask[i] % (int)e; 1466 ++i; 1467 } 1468 CS << ']'; 1469 --i; // For loop increments element #. 1470 } 1471 CS.flush(); 1472 1473 return Comment; 1474 } 1475 1476 static void printConstant(const APInt &Val, raw_ostream &CS, 1477 bool PrintZero = false) { 1478 if (Val.getBitWidth() <= 64) { 1479 CS << (PrintZero ? 0ULL : Val.getZExtValue()); 1480 } else { 1481 // print multi-word constant as (w0,w1) 1482 CS << "("; 1483 for (int i = 0, N = Val.getNumWords(); i < N; ++i) { 1484 if (i > 0) 1485 CS << ","; 1486 CS << (PrintZero ? 0ULL : Val.getRawData()[i]); 1487 } 1488 CS << ")"; 1489 } 1490 } 1491 1492 static void printConstant(const APFloat &Flt, raw_ostream &CS, 1493 bool PrintZero = false) { 1494 SmallString<32> Str; 1495 // Force scientific notation to distinguish from integers. 1496 if (PrintZero) 1497 APFloat::getZero(Flt.getSemantics()).toString(Str, 0, 0); 1498 else 1499 Flt.toString(Str, 0, 0); 1500 CS << Str; 1501 } 1502 1503 static void printConstant(const Constant *COp, unsigned BitWidth, 1504 raw_ostream &CS, bool PrintZero = false) { 1505 if (isa<UndefValue>(COp)) { 1506 CS << "u"; 1507 } else if (auto *CI = dyn_cast<ConstantInt>(COp)) { 1508 printConstant(CI->getValue(), CS, PrintZero); 1509 } else if (auto *CF = dyn_cast<ConstantFP>(COp)) { 1510 printConstant(CF->getValueAPF(), CS, PrintZero); 1511 } else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) { 1512 Type *EltTy = CDS->getElementType(); 1513 bool IsInteger = EltTy->isIntegerTy(); 1514 bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); 1515 unsigned EltBits = EltTy->getPrimitiveSizeInBits(); 1516 unsigned E = std::min(BitWidth / EltBits, CDS->getNumElements()); 1517 assert((BitWidth % EltBits) == 0 && "Element size mismatch"); 1518 for (unsigned I = 0; I != E; ++I) { 1519 if (I != 0) 1520 CS << ","; 1521 if (IsInteger) 1522 printConstant(CDS->getElementAsAPInt(I), CS, PrintZero); 1523 else if (IsFP) 1524 printConstant(CDS->getElementAsAPFloat(I), CS, PrintZero); 1525 else 1526 CS << "?"; 1527 } 1528 } else if (auto *CV = dyn_cast<ConstantVector>(COp)) { 1529 unsigned EltBits = CV->getType()->getScalarSizeInBits(); 1530 unsigned E = std::min(BitWidth / EltBits, CV->getNumOperands()); 1531 assert((BitWidth % EltBits) == 0 && "Element size mismatch"); 1532 for (unsigned I = 0; I != E; ++I) { 1533 if (I != 0) 1534 CS << ","; 1535 printConstant(CV->getOperand(I), EltBits, CS, PrintZero); 1536 } 1537 } else { 1538 CS << "?"; 1539 } 1540 } 1541 1542 static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer, 1543 int SclWidth, int VecWidth, 1544 const char *ShuffleComment) { 1545 std::string Comment; 1546 raw_string_ostream CS(Comment); 1547 const MachineOperand &DstOp = MI->getOperand(0); 1548 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1549 1550 if (auto *C = X86::getConstantFromPool(*MI, 1)) { 1551 CS << "["; 1552 printConstant(C, SclWidth, CS); 1553 for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) { 1554 CS << ","; 1555 printConstant(C, SclWidth, CS, true); 1556 } 1557 CS << "]"; 1558 OutStreamer.AddComment(CS.str()); 1559 return; // early-out 1560 } 1561 1562 // We didn't find a constant load, fallback to a shuffle mask decode. 1563 CS << ShuffleComment; 1564 OutStreamer.AddComment(CS.str()); 1565 } 1566 1567 static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer, 1568 int Repeats, int BitWidth) { 1569 if (auto *C = X86::getConstantFromPool(*MI, 1)) { 1570 std::string Comment; 1571 raw_string_ostream CS(Comment); 1572 const MachineOperand &DstOp = MI->getOperand(0); 1573 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1574 CS << "["; 1575 for (int l = 0; l != Repeats; ++l) { 1576 if (l != 0) 1577 CS << ","; 1578 printConstant(C, BitWidth, CS); 1579 } 1580 CS << "]"; 1581 OutStreamer.AddComment(CS.str()); 1582 } 1583 } 1584 1585 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { 1586 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); 1587 assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) && 1588 "SEH_ instruction Windows and UEFI only"); 1589 1590 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. 1591 if (EmitFPOData) { 1592 X86TargetStreamer *XTS = 1593 static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()); 1594 switch (MI->getOpcode()) { 1595 case X86::SEH_PushReg: 1596 XTS->emitFPOPushReg(MI->getOperand(0).getImm()); 1597 break; 1598 case X86::SEH_StackAlloc: 1599 XTS->emitFPOStackAlloc(MI->getOperand(0).getImm()); 1600 break; 1601 case X86::SEH_StackAlign: 1602 XTS->emitFPOStackAlign(MI->getOperand(0).getImm()); 1603 break; 1604 case X86::SEH_SetFrame: 1605 assert(MI->getOperand(1).getImm() == 0 && 1606 ".cv_fpo_setframe takes no offset"); 1607 XTS->emitFPOSetFrame(MI->getOperand(0).getImm()); 1608 break; 1609 case X86::SEH_EndPrologue: 1610 XTS->emitFPOEndPrologue(); 1611 break; 1612 case X86::SEH_SaveReg: 1613 case X86::SEH_SaveXMM: 1614 case X86::SEH_PushFrame: 1615 llvm_unreachable("SEH_ directive incompatible with FPO"); 1616 break; 1617 default: 1618 llvm_unreachable("expected SEH_ instruction"); 1619 } 1620 return; 1621 } 1622 1623 // Otherwise, use the .seh_ directives for all other Windows platforms. 1624 switch (MI->getOpcode()) { 1625 case X86::SEH_PushReg: 1626 OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm()); 1627 break; 1628 1629 case X86::SEH_SaveReg: 1630 OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(), 1631 MI->getOperand(1).getImm()); 1632 break; 1633 1634 case X86::SEH_SaveXMM: 1635 OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(), 1636 MI->getOperand(1).getImm()); 1637 break; 1638 1639 case X86::SEH_StackAlloc: 1640 OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm()); 1641 break; 1642 1643 case X86::SEH_SetFrame: 1644 OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(), 1645 MI->getOperand(1).getImm()); 1646 break; 1647 1648 case X86::SEH_PushFrame: 1649 OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm()); 1650 break; 1651 1652 case X86::SEH_EndPrologue: 1653 OutStreamer->emitWinCFIEndProlog(); 1654 break; 1655 1656 default: 1657 llvm_unreachable("expected SEH_ instruction"); 1658 } 1659 } 1660 1661 static unsigned getRegisterWidth(const MCOperandInfo &Info) { 1662 if (Info.RegClass == X86::VR128RegClassID || 1663 Info.RegClass == X86::VR128XRegClassID) 1664 return 128; 1665 if (Info.RegClass == X86::VR256RegClassID || 1666 Info.RegClass == X86::VR256XRegClassID) 1667 return 256; 1668 if (Info.RegClass == X86::VR512RegClassID) 1669 return 512; 1670 llvm_unreachable("Unknown register class!"); 1671 } 1672 1673 static void addConstantComments(const MachineInstr *MI, 1674 MCStreamer &OutStreamer) { 1675 switch (MI->getOpcode()) { 1676 // Lower PSHUFB and VPERMILP normally but add a comment if we can find 1677 // a constant shuffle mask. We won't be able to do this at the MC layer 1678 // because the mask isn't an immediate. 1679 case X86::PSHUFBrm: 1680 case X86::VPSHUFBrm: 1681 case X86::VPSHUFBYrm: 1682 case X86::VPSHUFBZ128rm: 1683 case X86::VPSHUFBZ128rmk: 1684 case X86::VPSHUFBZ128rmkz: 1685 case X86::VPSHUFBZ256rm: 1686 case X86::VPSHUFBZ256rmk: 1687 case X86::VPSHUFBZ256rmkz: 1688 case X86::VPSHUFBZrm: 1689 case X86::VPSHUFBZrmk: 1690 case X86::VPSHUFBZrmkz: { 1691 unsigned SrcIdx = 1; 1692 if (X86II::isKMasked(MI->getDesc().TSFlags)) { 1693 // Skip mask operand. 1694 ++SrcIdx; 1695 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { 1696 // Skip passthru operand. 1697 ++SrcIdx; 1698 } 1699 } 1700 1701 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1702 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1703 SmallVector<int, 64> Mask; 1704 DecodePSHUFBMask(C, Width, Mask); 1705 if (!Mask.empty()) 1706 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1707 } 1708 break; 1709 } 1710 1711 case X86::VPERMILPSrm: 1712 case X86::VPERMILPSYrm: 1713 case X86::VPERMILPSZ128rm: 1714 case X86::VPERMILPSZ128rmk: 1715 case X86::VPERMILPSZ128rmkz: 1716 case X86::VPERMILPSZ256rm: 1717 case X86::VPERMILPSZ256rmk: 1718 case X86::VPERMILPSZ256rmkz: 1719 case X86::VPERMILPSZrm: 1720 case X86::VPERMILPSZrmk: 1721 case X86::VPERMILPSZrmkz: 1722 case X86::VPERMILPDrm: 1723 case X86::VPERMILPDYrm: 1724 case X86::VPERMILPDZ128rm: 1725 case X86::VPERMILPDZ128rmk: 1726 case X86::VPERMILPDZ128rmkz: 1727 case X86::VPERMILPDZ256rm: 1728 case X86::VPERMILPDZ256rmk: 1729 case X86::VPERMILPDZ256rmkz: 1730 case X86::VPERMILPDZrm: 1731 case X86::VPERMILPDZrmk: 1732 case X86::VPERMILPDZrmkz: { 1733 unsigned ElSize; 1734 switch (MI->getOpcode()) { 1735 default: llvm_unreachable("Invalid opcode"); 1736 case X86::VPERMILPSrm: 1737 case X86::VPERMILPSYrm: 1738 case X86::VPERMILPSZ128rm: 1739 case X86::VPERMILPSZ256rm: 1740 case X86::VPERMILPSZrm: 1741 case X86::VPERMILPSZ128rmkz: 1742 case X86::VPERMILPSZ256rmkz: 1743 case X86::VPERMILPSZrmkz: 1744 case X86::VPERMILPSZ128rmk: 1745 case X86::VPERMILPSZ256rmk: 1746 case X86::VPERMILPSZrmk: 1747 ElSize = 32; 1748 break; 1749 case X86::VPERMILPDrm: 1750 case X86::VPERMILPDYrm: 1751 case X86::VPERMILPDZ128rm: 1752 case X86::VPERMILPDZ256rm: 1753 case X86::VPERMILPDZrm: 1754 case X86::VPERMILPDZ128rmkz: 1755 case X86::VPERMILPDZ256rmkz: 1756 case X86::VPERMILPDZrmkz: 1757 case X86::VPERMILPDZ128rmk: 1758 case X86::VPERMILPDZ256rmk: 1759 case X86::VPERMILPDZrmk: 1760 ElSize = 64; 1761 break; 1762 } 1763 1764 unsigned SrcIdx = 1; 1765 if (X86II::isKMasked(MI->getDesc().TSFlags)) { 1766 // Skip mask operand. 1767 ++SrcIdx; 1768 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { 1769 // Skip passthru operand. 1770 ++SrcIdx; 1771 } 1772 } 1773 1774 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1775 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1776 SmallVector<int, 16> Mask; 1777 DecodeVPERMILPMask(C, ElSize, Width, Mask); 1778 if (!Mask.empty()) 1779 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1780 } 1781 break; 1782 } 1783 1784 case X86::VPERMIL2PDrm: 1785 case X86::VPERMIL2PSrm: 1786 case X86::VPERMIL2PDYrm: 1787 case X86::VPERMIL2PSYrm: { 1788 assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && 1789 "Unexpected number of operands!"); 1790 1791 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); 1792 if (!CtrlOp.isImm()) 1793 break; 1794 1795 unsigned ElSize; 1796 switch (MI->getOpcode()) { 1797 default: llvm_unreachable("Invalid opcode"); 1798 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break; 1799 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; 1800 } 1801 1802 if (auto *C = X86::getConstantFromPool(*MI, 3)) { 1803 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1804 SmallVector<int, 16> Mask; 1805 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask); 1806 if (!Mask.empty()) 1807 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); 1808 } 1809 break; 1810 } 1811 1812 case X86::VPPERMrrm: { 1813 if (auto *C = X86::getConstantFromPool(*MI, 3)) { 1814 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1815 SmallVector<int, 16> Mask; 1816 DecodeVPPERMMask(C, Width, Mask); 1817 if (!Mask.empty()) 1818 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); 1819 } 1820 break; 1821 } 1822 1823 case X86::MMX_MOVQ64rm: { 1824 if (auto *C = X86::getConstantFromPool(*MI, 1)) { 1825 std::string Comment; 1826 raw_string_ostream CS(Comment); 1827 const MachineOperand &DstOp = MI->getOperand(0); 1828 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1829 if (auto *CF = dyn_cast<ConstantFP>(C)) { 1830 CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false); 1831 OutStreamer.AddComment(CS.str()); 1832 } 1833 } 1834 break; 1835 } 1836 1837 case X86::MOVSDrm: 1838 case X86::VMOVSDrm: 1839 case X86::VMOVSDZrm: 1840 case X86::MOVSDrm_alt: 1841 case X86::VMOVSDrm_alt: 1842 case X86::VMOVSDZrm_alt: 1843 case X86::MOVQI2PQIrm: 1844 case X86::VMOVQI2PQIrm: 1845 case X86::VMOVQI2PQIZrm: 1846 printZeroUpperMove(MI, OutStreamer, 64, 128, "mem[0],zero"); 1847 break; 1848 1849 case X86::MOVSSrm: 1850 case X86::VMOVSSrm: 1851 case X86::VMOVSSZrm: 1852 case X86::MOVSSrm_alt: 1853 case X86::VMOVSSrm_alt: 1854 case X86::VMOVSSZrm_alt: 1855 case X86::MOVDI2PDIrm: 1856 case X86::VMOVDI2PDIrm: 1857 case X86::VMOVDI2PDIZrm: 1858 printZeroUpperMove(MI, OutStreamer, 32, 128, "mem[0],zero,zero,zero"); 1859 break; 1860 1861 #define MOV_CASE(Prefix, Suffix) \ 1862 case X86::Prefix##MOVAPD##Suffix##rm: \ 1863 case X86::Prefix##MOVAPS##Suffix##rm: \ 1864 case X86::Prefix##MOVUPD##Suffix##rm: \ 1865 case X86::Prefix##MOVUPS##Suffix##rm: \ 1866 case X86::Prefix##MOVDQA##Suffix##rm: \ 1867 case X86::Prefix##MOVDQU##Suffix##rm: 1868 1869 #define MOV_AVX512_CASE(Suffix) \ 1870 case X86::VMOVDQA64##Suffix##rm: \ 1871 case X86::VMOVDQA32##Suffix##rm: \ 1872 case X86::VMOVDQU64##Suffix##rm: \ 1873 case X86::VMOVDQU32##Suffix##rm: \ 1874 case X86::VMOVDQU16##Suffix##rm: \ 1875 case X86::VMOVDQU8##Suffix##rm: \ 1876 case X86::VMOVAPS##Suffix##rm: \ 1877 case X86::VMOVAPD##Suffix##rm: \ 1878 case X86::VMOVUPS##Suffix##rm: \ 1879 case X86::VMOVUPD##Suffix##rm: 1880 1881 #define CASE_128_MOV_RM() \ 1882 MOV_CASE(, ) /* SSE */ \ 1883 MOV_CASE(V, ) /* AVX-128 */ \ 1884 MOV_AVX512_CASE(Z128) 1885 1886 #define CASE_256_MOV_RM() \ 1887 MOV_CASE(V, Y) /* AVX-256 */ \ 1888 MOV_AVX512_CASE(Z256) 1889 1890 #define CASE_512_MOV_RM() \ 1891 MOV_AVX512_CASE(Z) 1892 1893 // For loads from a constant pool to a vector register, print the constant 1894 // loaded. 1895 CASE_128_MOV_RM() 1896 printBroadcast(MI, OutStreamer, 1, 128); 1897 break; 1898 CASE_256_MOV_RM() 1899 printBroadcast(MI, OutStreamer, 1, 256); 1900 break; 1901 CASE_512_MOV_RM() 1902 printBroadcast(MI, OutStreamer, 1, 512); 1903 break; 1904 case X86::VBROADCASTF128rm: 1905 case X86::VBROADCASTI128rm: 1906 case X86::VBROADCASTF32X4Z256rm: 1907 case X86::VBROADCASTF64X2Z128rm: 1908 case X86::VBROADCASTI32X4Z256rm: 1909 case X86::VBROADCASTI64X2Z128rm: 1910 printBroadcast(MI, OutStreamer, 2, 128); 1911 break; 1912 case X86::VBROADCASTF32X4rm: 1913 case X86::VBROADCASTF64X2rm: 1914 case X86::VBROADCASTI32X4rm: 1915 case X86::VBROADCASTI64X2rm: 1916 printBroadcast(MI, OutStreamer, 4, 128); 1917 break; 1918 case X86::VBROADCASTF32X8rm: 1919 case X86::VBROADCASTF64X4rm: 1920 case X86::VBROADCASTI32X8rm: 1921 case X86::VBROADCASTI64X4rm: 1922 printBroadcast(MI, OutStreamer, 2, 256); 1923 break; 1924 1925 // For broadcast loads from a constant pool to a vector register, repeatedly 1926 // print the constant loaded. 1927 case X86::MOVDDUPrm: 1928 case X86::VMOVDDUPrm: 1929 case X86::VMOVDDUPZ128rm: 1930 case X86::VPBROADCASTQrm: 1931 case X86::VPBROADCASTQZ128rm: 1932 printBroadcast(MI, OutStreamer, 2, 64); 1933 break; 1934 case X86::VBROADCASTSDYrm: 1935 case X86::VBROADCASTSDZ256rm: 1936 case X86::VPBROADCASTQYrm: 1937 case X86::VPBROADCASTQZ256rm: 1938 printBroadcast(MI, OutStreamer, 4, 64); 1939 break; 1940 case X86::VBROADCASTSDZrm: 1941 case X86::VPBROADCASTQZrm: 1942 printBroadcast(MI, OutStreamer, 8, 64); 1943 break; 1944 case X86::VBROADCASTSSrm: 1945 case X86::VBROADCASTSSZ128rm: 1946 case X86::VPBROADCASTDrm: 1947 case X86::VPBROADCASTDZ128rm: 1948 printBroadcast(MI, OutStreamer, 4, 32); 1949 break; 1950 case X86::VBROADCASTSSYrm: 1951 case X86::VBROADCASTSSZ256rm: 1952 case X86::VPBROADCASTDYrm: 1953 case X86::VPBROADCASTDZ256rm: 1954 printBroadcast(MI, OutStreamer, 8, 32); 1955 break; 1956 case X86::VBROADCASTSSZrm: 1957 case X86::VPBROADCASTDZrm: 1958 printBroadcast(MI, OutStreamer, 16, 32); 1959 break; 1960 case X86::VPBROADCASTWrm: 1961 case X86::VPBROADCASTWZ128rm: 1962 printBroadcast(MI, OutStreamer, 8, 16); 1963 break; 1964 case X86::VPBROADCASTWYrm: 1965 case X86::VPBROADCASTWZ256rm: 1966 printBroadcast(MI, OutStreamer, 16, 16); 1967 break; 1968 case X86::VPBROADCASTWZrm: 1969 printBroadcast(MI, OutStreamer, 32, 16); 1970 break; 1971 case X86::VPBROADCASTBrm: 1972 case X86::VPBROADCASTBZ128rm: 1973 printBroadcast(MI, OutStreamer, 16, 8); 1974 break; 1975 case X86::VPBROADCASTBYrm: 1976 case X86::VPBROADCASTBZ256rm: 1977 printBroadcast(MI, OutStreamer, 32, 8); 1978 break; 1979 case X86::VPBROADCASTBZrm: 1980 printBroadcast(MI, OutStreamer, 64, 8); 1981 break; 1982 } 1983 } 1984 1985 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { 1986 // FIXME: Enable feature predicate checks once all the test pass. 1987 // X86_MC::verifyInstructionPredicates(MI->getOpcode(), 1988 // Subtarget->getFeatureBits()); 1989 1990 X86MCInstLower MCInstLowering(*MF, *this); 1991 const X86RegisterInfo *RI = 1992 MF->getSubtarget<X86Subtarget>().getRegisterInfo(); 1993 1994 if (MI->getOpcode() == X86::OR64rm) { 1995 for (auto &Opd : MI->operands()) { 1996 if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) == 1997 "swift_async_extendedFramePointerFlags") { 1998 ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true; 1999 } 2000 } 2001 } 2002 2003 // Add comments for values loaded from constant pool. 2004 if (OutStreamer->isVerboseAsm()) 2005 addConstantComments(MI, *OutStreamer); 2006 2007 // Add a comment about EVEX compression 2008 if (TM.Options.MCOptions.ShowMCEncoding) { 2009 if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) 2010 OutStreamer->AddComment("EVEX TO LEGACY Compression ", false); 2011 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) 2012 OutStreamer->AddComment("EVEX TO VEX Compression ", false); 2013 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX) 2014 OutStreamer->AddComment("EVEX TO EVEX Compression ", false); 2015 } 2016 2017 switch (MI->getOpcode()) { 2018 case TargetOpcode::DBG_VALUE: 2019 llvm_unreachable("Should be handled target independently"); 2020 2021 case X86::EH_RETURN: 2022 case X86::EH_RETURN64: { 2023 // Lower these as normal, but add some comments. 2024 Register Reg = MI->getOperand(0).getReg(); 2025 OutStreamer->AddComment(StringRef("eh_return, addr: %") + 2026 X86ATTInstPrinter::getRegisterName(Reg)); 2027 break; 2028 } 2029 case X86::CLEANUPRET: { 2030 // Lower these as normal, but add some comments. 2031 OutStreamer->AddComment("CLEANUPRET"); 2032 break; 2033 } 2034 2035 case X86::CATCHRET: { 2036 // Lower these as normal, but add some comments. 2037 OutStreamer->AddComment("CATCHRET"); 2038 break; 2039 } 2040 2041 case X86::ENDBR32: 2042 case X86::ENDBR64: { 2043 // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for 2044 // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be 2045 // non-empty. If MI is the initial ENDBR, place the 2046 // __patchable_function_entries label after ENDBR. 2047 if (CurrentPatchableFunctionEntrySym && 2048 CurrentPatchableFunctionEntrySym == CurrentFnBegin && 2049 MI == &MF->front().front()) { 2050 MCInst Inst; 2051 MCInstLowering.Lower(MI, Inst); 2052 EmitAndCountInstruction(Inst); 2053 CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); 2054 OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym); 2055 return; 2056 } 2057 break; 2058 } 2059 2060 case X86::TAILJMPd64: 2061 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) 2062 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); 2063 [[fallthrough]]; 2064 case X86::TAILJMPr: 2065 case X86::TAILJMPm: 2066 case X86::TAILJMPd: 2067 case X86::TAILJMPd_CC: 2068 case X86::TAILJMPr64: 2069 case X86::TAILJMPm64: 2070 case X86::TAILJMPd64_CC: 2071 case X86::TAILJMPr64_REX: 2072 case X86::TAILJMPm64_REX: 2073 // Lower these as normal, but add some comments. 2074 OutStreamer->AddComment("TAILCALL"); 2075 break; 2076 2077 case X86::TLS_addr32: 2078 case X86::TLS_addr64: 2079 case X86::TLS_addrX32: 2080 case X86::TLS_base_addr32: 2081 case X86::TLS_base_addr64: 2082 case X86::TLS_base_addrX32: 2083 return LowerTlsAddr(MCInstLowering, *MI); 2084 2085 case X86::MOVPC32r: { 2086 // This is a pseudo op for a two instruction sequence with a label, which 2087 // looks like: 2088 // call "L1$pb" 2089 // "L1$pb": 2090 // popl %esi 2091 2092 // Emit the call. 2093 MCSymbol *PICBase = MF->getPICBaseSymbol(); 2094 // FIXME: We would like an efficient form for this, so we don't have to do a 2095 // lot of extra uniquing. 2096 EmitAndCountInstruction( 2097 MCInstBuilder(X86::CALLpcrel32) 2098 .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); 2099 2100 const X86FrameLowering *FrameLowering = 2101 MF->getSubtarget<X86Subtarget>().getFrameLowering(); 2102 bool hasFP = FrameLowering->hasFP(*MF); 2103 2104 // TODO: This is needed only if we require precise CFA. 2105 bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && 2106 !OutStreamer->getDwarfFrameInfos().back().End; 2107 2108 int stackGrowth = -RI->getSlotSize(); 2109 2110 if (HasActiveDwarfFrame && !hasFP) { 2111 OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth); 2112 MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); 2113 } 2114 2115 // Emit the label. 2116 OutStreamer->emitLabel(PICBase); 2117 2118 // popl $reg 2119 EmitAndCountInstruction( 2120 MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); 2121 2122 if (HasActiveDwarfFrame && !hasFP) { 2123 OutStreamer->emitCFIAdjustCfaOffset(stackGrowth); 2124 } 2125 return; 2126 } 2127 2128 case X86::ADD32ri: { 2129 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. 2130 if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) 2131 break; 2132 2133 // Okay, we have something like: 2134 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) 2135 2136 // For this, we want to print something like: 2137 // MYGLOBAL + (. - PICBASE) 2138 // However, we can't generate a ".", so just emit a new label here and refer 2139 // to it. 2140 MCSymbol *DotSym = OutContext.createTempSymbol(); 2141 OutStreamer->emitLabel(DotSym); 2142 2143 // Now that we have emitted the label, lower the complex operand expression. 2144 MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); 2145 2146 const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); 2147 const MCExpr *PICBase = 2148 MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); 2149 DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); 2150 2151 DotExpr = MCBinaryExpr::createAdd( 2152 MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext); 2153 2154 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) 2155 .addReg(MI->getOperand(0).getReg()) 2156 .addReg(MI->getOperand(1).getReg()) 2157 .addExpr(DotExpr)); 2158 return; 2159 } 2160 case TargetOpcode::STATEPOINT: 2161 return LowerSTATEPOINT(*MI, MCInstLowering); 2162 2163 case TargetOpcode::FAULTING_OP: 2164 return LowerFAULTING_OP(*MI, MCInstLowering); 2165 2166 case TargetOpcode::FENTRY_CALL: 2167 return LowerFENTRY_CALL(*MI, MCInstLowering); 2168 2169 case TargetOpcode::PATCHABLE_OP: 2170 return LowerPATCHABLE_OP(*MI, MCInstLowering); 2171 2172 case TargetOpcode::STACKMAP: 2173 return LowerSTACKMAP(*MI); 2174 2175 case TargetOpcode::PATCHPOINT: 2176 return LowerPATCHPOINT(*MI, MCInstLowering); 2177 2178 case TargetOpcode::PATCHABLE_FUNCTION_ENTER: 2179 return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); 2180 2181 case TargetOpcode::PATCHABLE_RET: 2182 return LowerPATCHABLE_RET(*MI, MCInstLowering); 2183 2184 case TargetOpcode::PATCHABLE_TAIL_CALL: 2185 return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); 2186 2187 case TargetOpcode::PATCHABLE_EVENT_CALL: 2188 return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); 2189 2190 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: 2191 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); 2192 2193 case X86::MORESTACK_RET: 2194 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 2195 return; 2196 2197 case X86::KCFI_CHECK: 2198 return LowerKCFI_CHECK(*MI); 2199 2200 case X86::ASAN_CHECK_MEMACCESS: 2201 return LowerASAN_CHECK_MEMACCESS(*MI); 2202 2203 case X86::MORESTACK_RET_RESTORE_R10: 2204 // Return, then restore R10. 2205 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 2206 EmitAndCountInstruction( 2207 MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); 2208 return; 2209 2210 case X86::SEH_PushReg: 2211 case X86::SEH_SaveReg: 2212 case X86::SEH_SaveXMM: 2213 case X86::SEH_StackAlloc: 2214 case X86::SEH_StackAlign: 2215 case X86::SEH_SetFrame: 2216 case X86::SEH_PushFrame: 2217 case X86::SEH_EndPrologue: 2218 EmitSEHInstruction(MI); 2219 return; 2220 2221 case X86::SEH_Epilogue: { 2222 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); 2223 MachineBasicBlock::const_iterator MBBI(MI); 2224 // Check if preceded by a call and emit nop if so. 2225 for (MBBI = PrevCrossBBInst(MBBI); 2226 MBBI != MachineBasicBlock::const_iterator(); 2227 MBBI = PrevCrossBBInst(MBBI)) { 2228 // Pseudo instructions that aren't a call are assumed to not emit any 2229 // code. If they do, we worst case generate unnecessary noops after a 2230 // call. 2231 if (MBBI->isCall() || !MBBI->isPseudo()) { 2232 if (MBBI->isCall()) 2233 EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); 2234 break; 2235 } 2236 } 2237 return; 2238 } 2239 case X86::UBSAN_UD1: 2240 EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm) 2241 .addReg(X86::EAX) 2242 .addReg(X86::EAX) 2243 .addImm(1) 2244 .addReg(X86::NoRegister) 2245 .addImm(MI->getOperand(0).getImm()) 2246 .addReg(X86::NoRegister)); 2247 return; 2248 case X86::CALL64pcrel32: 2249 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) 2250 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); 2251 break; 2252 } 2253 2254 MCInst TmpInst; 2255 MCInstLowering.Lower(MI, TmpInst); 2256 2257 // Stackmap shadows cannot include branch targets, so we can count the bytes 2258 // in a call towards the shadow, but must ensure that the no thread returns 2259 // in to the stackmap shadow. The only way to achieve this is if the call 2260 // is at the end of the shadow. 2261 if (MI->isCall()) { 2262 // Count then size of the call towards the shadow 2263 SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get()); 2264 // Then flush the shadow so that we fill with nops before the call, not 2265 // after it. 2266 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 2267 // Then emit the call 2268 OutStreamer->emitInstruction(TmpInst, getSubtargetInfo()); 2269 return; 2270 } 2271 2272 EmitAndCountInstruction(TmpInst); 2273 } 2274