1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains code to lower X86 MachineInstrs to their corresponding 10 // MCInst records. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/X86ATTInstPrinter.h" 15 #include "MCTargetDesc/X86BaseInfo.h" 16 #include "MCTargetDesc/X86EncodingOptimization.h" 17 #include "MCTargetDesc/X86InstComments.h" 18 #include "MCTargetDesc/X86ShuffleDecode.h" 19 #include "MCTargetDesc/X86TargetStreamer.h" 20 #include "X86AsmPrinter.h" 21 #include "X86MachineFunctionInfo.h" 22 #include "X86RegisterInfo.h" 23 #include "X86ShuffleDecodeConstantPool.h" 24 #include "X86Subtarget.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/CodeGen/MachineConstantPool.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineModuleInfoImpls.h" 30 #include "llvm/CodeGen/MachineOperand.h" 31 #include "llvm/CodeGen/StackMaps.h" 32 #include "llvm/IR/DataLayout.h" 33 #include "llvm/IR/GlobalValue.h" 34 #include "llvm/IR/Mangler.h" 35 #include "llvm/MC/MCAsmInfo.h" 36 #include "llvm/MC/MCCodeEmitter.h" 37 #include "llvm/MC/MCContext.h" 38 #include "llvm/MC/MCExpr.h" 39 #include "llvm/MC/MCFixup.h" 40 #include "llvm/MC/MCInst.h" 41 #include "llvm/MC/MCInstBuilder.h" 42 #include "llvm/MC/MCSection.h" 43 #include "llvm/MC/MCSectionELF.h" 44 #include "llvm/MC/MCStreamer.h" 45 #include "llvm/MC/MCSymbol.h" 46 #include "llvm/MC/MCSymbolELF.h" 47 #include "llvm/MC/TargetRegistry.h" 48 #include "llvm/Target/TargetLoweringObjectFile.h" 49 #include "llvm/Target/TargetMachine.h" 50 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" 51 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" 52 #include <string> 53 54 using namespace llvm; 55 56 namespace { 57 58 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. 59 class X86MCInstLower { 60 MCContext &Ctx; 61 const MachineFunction &MF; 62 const TargetMachine &TM; 63 const MCAsmInfo &MAI; 64 X86AsmPrinter &AsmPrinter; 65 66 public: 67 X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); 68 69 std::optional<MCOperand> LowerMachineOperand(const MachineInstr *MI, 70 const MachineOperand &MO) const; 71 void Lower(const MachineInstr *MI, MCInst &OutMI) const; 72 73 MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; 74 MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; 75 76 private: 77 MachineModuleInfoMachO &getMachOMMI() const; 78 }; 79 80 } // end anonymous namespace 81 82 /// A RAII helper which defines a region of instructions which can't have 83 /// padding added between them for correctness. 84 struct NoAutoPaddingScope { 85 MCStreamer &OS; 86 const bool OldAllowAutoPadding; 87 NoAutoPaddingScope(MCStreamer &OS) 88 : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { 89 changeAndComment(false); 90 } 91 ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); } 92 void changeAndComment(bool b) { 93 if (b == OS.getAllowAutoPadding()) 94 return; 95 OS.setAllowAutoPadding(b); 96 if (b) 97 OS.emitRawComment("autopadding"); 98 else 99 OS.emitRawComment("noautopadding"); 100 } 101 }; 102 103 // Emit a minimal sequence of nops spanning NumBytes bytes. 104 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, 105 const X86Subtarget *Subtarget); 106 107 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, 108 const MCSubtargetInfo &STI, 109 MCCodeEmitter *CodeEmitter) { 110 if (InShadow) { 111 SmallString<256> Code; 112 SmallVector<MCFixup, 4> Fixups; 113 CodeEmitter->encodeInstruction(Inst, Code, Fixups, STI); 114 CurrentShadowSize += Code.size(); 115 if (CurrentShadowSize >= RequiredShadowSize) 116 InShadow = false; // The shadow is big enough. Stop counting. 117 } 118 } 119 120 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( 121 MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { 122 if (InShadow && CurrentShadowSize < RequiredShadowSize) { 123 InShadow = false; 124 emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize, 125 &MF->getSubtarget<X86Subtarget>()); 126 } 127 } 128 129 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { 130 OutStreamer->emitInstruction(Inst, getSubtargetInfo()); 131 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); 132 } 133 134 X86MCInstLower::X86MCInstLower(const MachineFunction &mf, 135 X86AsmPrinter &asmprinter) 136 : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), 137 AsmPrinter(asmprinter) {} 138 139 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { 140 return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>(); 141 } 142 143 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol 144 /// operand to an MCSymbol. 145 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { 146 const Triple &TT = TM.getTargetTriple(); 147 if (MO.isGlobal() && TT.isOSBinFormatELF()) 148 return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal()); 149 150 const DataLayout &DL = MF.getDataLayout(); 151 assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && 152 "Isn't a symbol reference"); 153 154 MCSymbol *Sym = nullptr; 155 SmallString<128> Name; 156 StringRef Suffix; 157 158 switch (MO.getTargetFlags()) { 159 case X86II::MO_DLLIMPORT: 160 // Handle dllimport linkage. 161 Name += "__imp_"; 162 break; 163 case X86II::MO_COFFSTUB: 164 Name += ".refptr."; 165 break; 166 case X86II::MO_DARWIN_NONLAZY: 167 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 168 Suffix = "$non_lazy_ptr"; 169 break; 170 } 171 172 if (!Suffix.empty()) 173 Name += DL.getPrivateGlobalPrefix(); 174 175 if (MO.isGlobal()) { 176 const GlobalValue *GV = MO.getGlobal(); 177 AsmPrinter.getNameWithPrefix(Name, GV); 178 } else if (MO.isSymbol()) { 179 Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); 180 } else if (MO.isMBB()) { 181 assert(Suffix.empty()); 182 Sym = MO.getMBB()->getSymbol(); 183 } 184 185 Name += Suffix; 186 if (!Sym) 187 Sym = Ctx.getOrCreateSymbol(Name); 188 189 // If the target flags on the operand changes the name of the symbol, do that 190 // before we return the symbol. 191 switch (MO.getTargetFlags()) { 192 default: 193 break; 194 case X86II::MO_COFFSTUB: { 195 MachineModuleInfoCOFF &MMICOFF = 196 MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>(); 197 MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym); 198 if (!StubSym.getPointer()) { 199 assert(MO.isGlobal() && "Extern symbol not handled yet"); 200 StubSym = MachineModuleInfoImpl::StubValueTy( 201 AsmPrinter.getSymbol(MO.getGlobal()), true); 202 } 203 break; 204 } 205 case X86II::MO_DARWIN_NONLAZY: 206 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { 207 MachineModuleInfoImpl::StubValueTy &StubSym = 208 getMachOMMI().getGVStubEntry(Sym); 209 if (!StubSym.getPointer()) { 210 assert(MO.isGlobal() && "Extern symbol not handled yet"); 211 StubSym = MachineModuleInfoImpl::StubValueTy( 212 AsmPrinter.getSymbol(MO.getGlobal()), 213 !MO.getGlobal()->hasInternalLinkage()); 214 } 215 break; 216 } 217 } 218 219 return Sym; 220 } 221 222 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, 223 MCSymbol *Sym) const { 224 // FIXME: We would like an efficient form for this, so we don't have to do a 225 // lot of extra uniquing. 226 const MCExpr *Expr = nullptr; 227 MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; 228 229 switch (MO.getTargetFlags()) { 230 default: 231 llvm_unreachable("Unknown target flag on GV operand"); 232 case X86II::MO_NO_FLAG: // No flag. 233 // These affect the name of the symbol, not any suffix. 234 case X86II::MO_DARWIN_NONLAZY: 235 case X86II::MO_DLLIMPORT: 236 case X86II::MO_COFFSTUB: 237 break; 238 239 case X86II::MO_TLVP: 240 RefKind = MCSymbolRefExpr::VK_TLVP; 241 break; 242 case X86II::MO_TLVP_PIC_BASE: 243 Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); 244 // Subtract the pic base. 245 Expr = MCBinaryExpr::createSub( 246 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); 247 break; 248 case X86II::MO_SECREL: 249 RefKind = MCSymbolRefExpr::VK_SECREL; 250 break; 251 case X86II::MO_TLSGD: 252 RefKind = MCSymbolRefExpr::VK_TLSGD; 253 break; 254 case X86II::MO_TLSLD: 255 RefKind = MCSymbolRefExpr::VK_TLSLD; 256 break; 257 case X86II::MO_TLSLDM: 258 RefKind = MCSymbolRefExpr::VK_TLSLDM; 259 break; 260 case X86II::MO_GOTTPOFF: 261 RefKind = MCSymbolRefExpr::VK_GOTTPOFF; 262 break; 263 case X86II::MO_INDNTPOFF: 264 RefKind = MCSymbolRefExpr::VK_INDNTPOFF; 265 break; 266 case X86II::MO_TPOFF: 267 RefKind = MCSymbolRefExpr::VK_TPOFF; 268 break; 269 case X86II::MO_DTPOFF: 270 RefKind = MCSymbolRefExpr::VK_DTPOFF; 271 break; 272 case X86II::MO_NTPOFF: 273 RefKind = MCSymbolRefExpr::VK_NTPOFF; 274 break; 275 case X86II::MO_GOTNTPOFF: 276 RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; 277 break; 278 case X86II::MO_GOTPCREL: 279 RefKind = MCSymbolRefExpr::VK_GOTPCREL; 280 break; 281 case X86II::MO_GOTPCREL_NORELAX: 282 RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX; 283 break; 284 case X86II::MO_GOT: 285 RefKind = MCSymbolRefExpr::VK_GOT; 286 break; 287 case X86II::MO_GOTOFF: 288 RefKind = MCSymbolRefExpr::VK_GOTOFF; 289 break; 290 case X86II::MO_PLT: 291 RefKind = MCSymbolRefExpr::VK_PLT; 292 break; 293 case X86II::MO_ABS8: 294 RefKind = MCSymbolRefExpr::VK_X86_ABS8; 295 break; 296 case X86II::MO_PIC_BASE_OFFSET: 297 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 298 Expr = MCSymbolRefExpr::create(Sym, Ctx); 299 // Subtract the pic base. 300 Expr = MCBinaryExpr::createSub( 301 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); 302 if (MO.isJTI()) { 303 assert(MAI.doesSetDirectiveSuppressReloc()); 304 // If .set directive is supported, use it to reduce the number of 305 // relocations the assembler will generate for differences between 306 // local labels. This is only safe when the symbols are in the same 307 // section so we are restricting it to jumptable references. 308 MCSymbol *Label = Ctx.createTempSymbol(); 309 AsmPrinter.OutStreamer->emitAssignment(Label, Expr); 310 Expr = MCSymbolRefExpr::create(Label, Ctx); 311 } 312 break; 313 } 314 315 if (!Expr) 316 Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); 317 318 if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) 319 Expr = MCBinaryExpr::createAdd( 320 Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); 321 return MCOperand::createExpr(Expr); 322 } 323 324 static unsigned getRetOpcode(const X86Subtarget &Subtarget) { 325 return Subtarget.is64Bit() ? X86::RET64 : X86::RET32; 326 } 327 328 std::optional<MCOperand> 329 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, 330 const MachineOperand &MO) const { 331 switch (MO.getType()) { 332 default: 333 MI->print(errs()); 334 llvm_unreachable("unknown operand type"); 335 case MachineOperand::MO_Register: 336 // Ignore all implicit register operands. 337 if (MO.isImplicit()) 338 return std::nullopt; 339 return MCOperand::createReg(MO.getReg()); 340 case MachineOperand::MO_Immediate: 341 return MCOperand::createImm(MO.getImm()); 342 case MachineOperand::MO_MachineBasicBlock: 343 case MachineOperand::MO_GlobalAddress: 344 case MachineOperand::MO_ExternalSymbol: 345 return LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); 346 case MachineOperand::MO_MCSymbol: 347 return LowerSymbolOperand(MO, MO.getMCSymbol()); 348 case MachineOperand::MO_JumpTableIndex: 349 return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex())); 350 case MachineOperand::MO_ConstantPoolIndex: 351 return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex())); 352 case MachineOperand::MO_BlockAddress: 353 return LowerSymbolOperand( 354 MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); 355 case MachineOperand::MO_RegisterMask: 356 // Ignore call clobbers. 357 return std::nullopt; 358 } 359 } 360 361 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding 362 // information. 363 static unsigned convertTailJumpOpcode(unsigned Opcode) { 364 switch (Opcode) { 365 case X86::TAILJMPr: 366 Opcode = X86::JMP32r; 367 break; 368 case X86::TAILJMPm: 369 Opcode = X86::JMP32m; 370 break; 371 case X86::TAILJMPr64: 372 Opcode = X86::JMP64r; 373 break; 374 case X86::TAILJMPm64: 375 Opcode = X86::JMP64m; 376 break; 377 case X86::TAILJMPr64_REX: 378 Opcode = X86::JMP64r_REX; 379 break; 380 case X86::TAILJMPm64_REX: 381 Opcode = X86::JMP64m_REX; 382 break; 383 case X86::TAILJMPd: 384 case X86::TAILJMPd64: 385 Opcode = X86::JMP_1; 386 break; 387 case X86::TAILJMPd_CC: 388 case X86::TAILJMPd64_CC: 389 Opcode = X86::JCC_1; 390 break; 391 } 392 393 return Opcode; 394 } 395 396 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { 397 OutMI.setOpcode(MI->getOpcode()); 398 399 for (const MachineOperand &MO : MI->operands()) 400 if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) 401 OutMI.addOperand(*MaybeMCOp); 402 403 bool In64BitMode = AsmPrinter.getSubtarget().is64Bit(); 404 if (X86::optimizeInstFromVEX3ToVEX2(OutMI, MI->getDesc()) || 405 X86::optimizeShiftRotateWithImmediateOne(OutMI) || 406 X86::optimizeVPCMPWithImmediateOneOrSix(OutMI) || 407 X86::optimizeMOVSX(OutMI) || X86::optimizeINCDEC(OutMI, In64BitMode) || 408 X86::optimizeMOV(OutMI, In64BitMode) || 409 X86::optimizeToFixedRegisterOrShortImmediateForm(OutMI)) 410 return; 411 412 // Handle a few special cases to eliminate operand modifiers. 413 switch (OutMI.getOpcode()) { 414 case X86::LEA64_32r: 415 case X86::LEA64r: 416 case X86::LEA16r: 417 case X86::LEA32r: 418 // LEA should have a segment register, but it must be empty. 419 assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands && 420 "Unexpected # of LEA operands"); 421 assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && 422 "LEA has segment specified!"); 423 break; 424 case X86::MULX32Hrr: 425 case X86::MULX32Hrm: 426 case X86::MULX64Hrr: 427 case X86::MULX64Hrm: { 428 // Turn into regular MULX by duplicating the destination. 429 unsigned NewOpc; 430 switch (OutMI.getOpcode()) { 431 default: llvm_unreachable("Invalid opcode"); 432 case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; 433 case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; 434 case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; 435 case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; 436 } 437 OutMI.setOpcode(NewOpc); 438 // Duplicate the destination. 439 unsigned DestReg = OutMI.getOperand(0).getReg(); 440 OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg)); 441 break; 442 } 443 // CALL64r, CALL64pcrel32 - These instructions used to have 444 // register inputs modeled as normal uses instead of implicit uses. As such, 445 // they we used to truncate off all but the first operand (the callee). This 446 // issue seems to have been fixed at some point. This assert verifies that. 447 case X86::CALL64r: 448 case X86::CALL64pcrel32: 449 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); 450 break; 451 case X86::EH_RETURN: 452 case X86::EH_RETURN64: { 453 OutMI = MCInst(); 454 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 455 break; 456 } 457 case X86::CLEANUPRET: { 458 // Replace CLEANUPRET with the appropriate RET. 459 OutMI = MCInst(); 460 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 461 break; 462 } 463 case X86::CATCHRET: { 464 // Replace CATCHRET with the appropriate RET. 465 const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); 466 unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX; 467 OutMI = MCInst(); 468 OutMI.setOpcode(getRetOpcode(Subtarget)); 469 OutMI.addOperand(MCOperand::createReg(ReturnReg)); 470 break; 471 } 472 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump 473 // instruction. 474 case X86::TAILJMPr: 475 case X86::TAILJMPr64: 476 case X86::TAILJMPr64_REX: 477 case X86::TAILJMPd: 478 case X86::TAILJMPd64: 479 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); 480 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 481 break; 482 case X86::TAILJMPd_CC: 483 case X86::TAILJMPd64_CC: 484 assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!"); 485 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 486 break; 487 case X86::TAILJMPm: 488 case X86::TAILJMPm64: 489 case X86::TAILJMPm64_REX: 490 assert(OutMI.getNumOperands() == X86::AddrNumOperands && 491 "Unexpected number of operands!"); 492 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 493 break; 494 case X86::MASKMOVDQU: 495 case X86::VMASKMOVDQU: 496 if (In64BitMode) 497 OutMI.setFlags(X86::IP_HAS_AD_SIZE); 498 break; 499 case X86::BSF16rm: 500 case X86::BSF16rr: 501 case X86::BSF32rm: 502 case X86::BSF32rr: 503 case X86::BSF64rm: 504 case X86::BSF64rr: { 505 // Add an REP prefix to BSF instructions so that new processors can 506 // recognize as TZCNT, which has better performance than BSF. 507 // BSF and TZCNT have different interpretations on ZF bit. So make sure 508 // it won't be used later. 509 const MachineOperand *FlagDef = MI->findRegisterDefOperand(X86::EFLAGS); 510 if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead()) 511 OutMI.setFlags(X86::IP_HAS_REPEAT); 512 break; 513 } 514 default: 515 break; 516 } 517 } 518 519 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, 520 const MachineInstr &MI) { 521 NoAutoPaddingScope NoPadScope(*OutStreamer); 522 bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 && 523 MI.getOpcode() != X86::TLS_base_addr32; 524 bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 || 525 MI.getOpcode() == X86::TLS_base_addr64; 526 MCContext &Ctx = OutStreamer->getContext(); 527 528 MCSymbolRefExpr::VariantKind SRVK; 529 switch (MI.getOpcode()) { 530 case X86::TLS_addr32: 531 case X86::TLS_addr64: 532 case X86::TLS_addrX32: 533 SRVK = MCSymbolRefExpr::VK_TLSGD; 534 break; 535 case X86::TLS_base_addr32: 536 SRVK = MCSymbolRefExpr::VK_TLSLDM; 537 break; 538 case X86::TLS_base_addr64: 539 case X86::TLS_base_addrX32: 540 SRVK = MCSymbolRefExpr::VK_TLSLD; 541 break; 542 default: 543 llvm_unreachable("unexpected opcode"); 544 } 545 546 const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create( 547 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx); 548 549 // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD 550 // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is 551 // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by 552 // only using GOT when GOTPCRELX is enabled. 553 // TODO Delete the workaround when GOTPCRELX becomes commonplace. 554 bool UseGot = MMI->getModule()->getRtLibUseGOT() && 555 Ctx.getAsmInfo()->canRelaxRelocations(); 556 557 if (Is64Bits) { 558 bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD; 559 if (NeedsPadding && Is64BitsLP64) 560 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 561 EmitAndCountInstruction(MCInstBuilder(X86::LEA64r) 562 .addReg(X86::RDI) 563 .addReg(X86::RIP) 564 .addImm(1) 565 .addReg(0) 566 .addExpr(Sym) 567 .addReg(0)); 568 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr"); 569 if (NeedsPadding) { 570 if (!UseGot) 571 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 572 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 573 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); 574 } 575 if (UseGot) { 576 const MCExpr *Expr = MCSymbolRefExpr::create( 577 TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx); 578 EmitAndCountInstruction(MCInstBuilder(X86::CALL64m) 579 .addReg(X86::RIP) 580 .addImm(1) 581 .addReg(0) 582 .addExpr(Expr) 583 .addReg(0)); 584 } else { 585 EmitAndCountInstruction( 586 MCInstBuilder(X86::CALL64pcrel32) 587 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, 588 MCSymbolRefExpr::VK_PLT, Ctx))); 589 } 590 } else { 591 if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) { 592 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) 593 .addReg(X86::EAX) 594 .addReg(0) 595 .addImm(1) 596 .addReg(X86::EBX) 597 .addExpr(Sym) 598 .addReg(0)); 599 } else { 600 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) 601 .addReg(X86::EAX) 602 .addReg(X86::EBX) 603 .addImm(1) 604 .addReg(0) 605 .addExpr(Sym) 606 .addReg(0)); 607 } 608 609 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr"); 610 if (UseGot) { 611 const MCExpr *Expr = 612 MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx); 613 EmitAndCountInstruction(MCInstBuilder(X86::CALL32m) 614 .addReg(X86::EBX) 615 .addImm(1) 616 .addReg(0) 617 .addExpr(Expr) 618 .addReg(0)); 619 } else { 620 EmitAndCountInstruction( 621 MCInstBuilder(X86::CALLpcrel32) 622 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, 623 MCSymbolRefExpr::VK_PLT, Ctx))); 624 } 625 } 626 } 627 628 /// Emit the largest nop instruction smaller than or equal to \p NumBytes 629 /// bytes. Return the size of nop emitted. 630 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, 631 const X86Subtarget *Subtarget) { 632 // Determine the longest nop which can be efficiently decoded for the given 633 // target cpu. 15-bytes is the longest single NOP instruction, but some 634 // platforms can't decode the longest forms efficiently. 635 unsigned MaxNopLength = 1; 636 if (Subtarget->is64Bit()) { 637 // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the 638 // IndexReg/BaseReg below need to be updated. 639 if (Subtarget->hasFeature(X86::TuningFast7ByteNOP)) 640 MaxNopLength = 7; 641 else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP)) 642 MaxNopLength = 15; 643 else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP)) 644 MaxNopLength = 11; 645 else 646 MaxNopLength = 10; 647 } if (Subtarget->is32Bit()) 648 MaxNopLength = 2; 649 650 // Cap a single nop emission at the profitable value for the target 651 NumBytes = std::min(NumBytes, MaxNopLength); 652 653 unsigned NopSize; 654 unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; 655 IndexReg = Displacement = SegmentReg = 0; 656 BaseReg = X86::RAX; 657 ScaleVal = 1; 658 switch (NumBytes) { 659 case 0: 660 llvm_unreachable("Zero nops?"); 661 break; 662 case 1: 663 NopSize = 1; 664 Opc = X86::NOOP; 665 break; 666 case 2: 667 NopSize = 2; 668 Opc = X86::XCHG16ar; 669 break; 670 case 3: 671 NopSize = 3; 672 Opc = X86::NOOPL; 673 break; 674 case 4: 675 NopSize = 4; 676 Opc = X86::NOOPL; 677 Displacement = 8; 678 break; 679 case 5: 680 NopSize = 5; 681 Opc = X86::NOOPL; 682 Displacement = 8; 683 IndexReg = X86::RAX; 684 break; 685 case 6: 686 NopSize = 6; 687 Opc = X86::NOOPW; 688 Displacement = 8; 689 IndexReg = X86::RAX; 690 break; 691 case 7: 692 NopSize = 7; 693 Opc = X86::NOOPL; 694 Displacement = 512; 695 break; 696 case 8: 697 NopSize = 8; 698 Opc = X86::NOOPL; 699 Displacement = 512; 700 IndexReg = X86::RAX; 701 break; 702 case 9: 703 NopSize = 9; 704 Opc = X86::NOOPW; 705 Displacement = 512; 706 IndexReg = X86::RAX; 707 break; 708 default: 709 NopSize = 10; 710 Opc = X86::NOOPW; 711 Displacement = 512; 712 IndexReg = X86::RAX; 713 SegmentReg = X86::CS; 714 break; 715 } 716 717 unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U); 718 NopSize += NumPrefixes; 719 for (unsigned i = 0; i != NumPrefixes; ++i) 720 OS.emitBytes("\x66"); 721 722 switch (Opc) { 723 default: llvm_unreachable("Unexpected opcode"); 724 case X86::NOOP: 725 OS.emitInstruction(MCInstBuilder(Opc), *Subtarget); 726 break; 727 case X86::XCHG16ar: 728 OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), 729 *Subtarget); 730 break; 731 case X86::NOOPL: 732 case X86::NOOPW: 733 OS.emitInstruction(MCInstBuilder(Opc) 734 .addReg(BaseReg) 735 .addImm(ScaleVal) 736 .addReg(IndexReg) 737 .addImm(Displacement) 738 .addReg(SegmentReg), 739 *Subtarget); 740 break; 741 } 742 assert(NopSize <= NumBytes && "We overemitted?"); 743 return NopSize; 744 } 745 746 /// Emit the optimal amount of multi-byte nops on X86. 747 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, 748 const X86Subtarget *Subtarget) { 749 unsigned NopsToEmit = NumBytes; 750 (void)NopsToEmit; 751 while (NumBytes) { 752 NumBytes -= emitNop(OS, NumBytes, Subtarget); 753 assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!"); 754 } 755 } 756 757 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, 758 X86MCInstLower &MCIL) { 759 assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); 760 761 NoAutoPaddingScope NoPadScope(*OutStreamer); 762 763 StatepointOpers SOpers(&MI); 764 if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { 765 emitX86Nops(*OutStreamer, PatchBytes, Subtarget); 766 } else { 767 // Lower call target and choose correct opcode 768 const MachineOperand &CallTarget = SOpers.getCallTarget(); 769 MCOperand CallTargetMCOp; 770 unsigned CallOpcode; 771 switch (CallTarget.getType()) { 772 case MachineOperand::MO_GlobalAddress: 773 case MachineOperand::MO_ExternalSymbol: 774 CallTargetMCOp = MCIL.LowerSymbolOperand( 775 CallTarget, MCIL.GetSymbolFromOperand(CallTarget)); 776 CallOpcode = X86::CALL64pcrel32; 777 // Currently, we only support relative addressing with statepoints. 778 // Otherwise, we'll need a scratch register to hold the target 779 // address. You'll fail asserts during load & relocation if this 780 // symbol is to far away. (TODO: support non-relative addressing) 781 break; 782 case MachineOperand::MO_Immediate: 783 CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); 784 CallOpcode = X86::CALL64pcrel32; 785 // Currently, we only support relative addressing with statepoints. 786 // Otherwise, we'll need a scratch register to hold the target 787 // immediate. You'll fail asserts during load & relocation if this 788 // address is to far away. (TODO: support non-relative addressing) 789 break; 790 case MachineOperand::MO_Register: 791 // FIXME: Add retpoline support and remove this. 792 if (Subtarget->useIndirectThunkCalls()) 793 report_fatal_error("Lowering register statepoints with thunks not " 794 "yet implemented."); 795 CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); 796 CallOpcode = X86::CALL64r; 797 break; 798 default: 799 llvm_unreachable("Unsupported operand type in statepoint call target"); 800 break; 801 } 802 803 // Emit call 804 MCInst CallInst; 805 CallInst.setOpcode(CallOpcode); 806 CallInst.addOperand(CallTargetMCOp); 807 OutStreamer->emitInstruction(CallInst, getSubtargetInfo()); 808 } 809 810 // Record our statepoint node in the same section used by STACKMAP 811 // and PATCHPOINT 812 auto &Ctx = OutStreamer->getContext(); 813 MCSymbol *MILabel = Ctx.createTempSymbol(); 814 OutStreamer->emitLabel(MILabel); 815 SM.recordStatepoint(*MILabel, MI); 816 } 817 818 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, 819 X86MCInstLower &MCIL) { 820 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, 821 // <opcode>, <operands> 822 823 NoAutoPaddingScope NoPadScope(*OutStreamer); 824 825 Register DefRegister = FaultingMI.getOperand(0).getReg(); 826 FaultMaps::FaultKind FK = 827 static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm()); 828 MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol(); 829 unsigned Opcode = FaultingMI.getOperand(3).getImm(); 830 unsigned OperandsBeginIdx = 4; 831 832 auto &Ctx = OutStreamer->getContext(); 833 MCSymbol *FaultingLabel = Ctx.createTempSymbol(); 834 OutStreamer->emitLabel(FaultingLabel); 835 836 assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!"); 837 FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel); 838 839 MCInst MI; 840 MI.setOpcode(Opcode); 841 842 if (DefRegister != X86::NoRegister) 843 MI.addOperand(MCOperand::createReg(DefRegister)); 844 845 for (const MachineOperand &MO : 846 llvm::drop_begin(FaultingMI.operands(), OperandsBeginIdx)) 847 if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, MO)) 848 MI.addOperand(*MaybeOperand); 849 850 OutStreamer->AddComment("on-fault: " + HandlerLabel->getName()); 851 OutStreamer->emitInstruction(MI, getSubtargetInfo()); 852 } 853 854 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, 855 X86MCInstLower &MCIL) { 856 bool Is64Bits = Subtarget->is64Bit(); 857 MCContext &Ctx = OutStreamer->getContext(); 858 MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__"); 859 const MCSymbolRefExpr *Op = 860 MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx); 861 862 EmitAndCountInstruction( 863 MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) 864 .addExpr(Op)); 865 } 866 867 void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { 868 assert(std::next(MI.getIterator())->isCall() && 869 "KCFI_CHECK not followed by a call instruction"); 870 871 // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop() 872 // returns a 1-byte X86::NOOP, which means the offset is the same in 873 // bytes. This assumes that patchable-function-prefix is the same for all 874 // functions. 875 const MachineFunction &MF = *MI.getMF(); 876 int64_t PrefixNops = 0; 877 (void)MF.getFunction() 878 .getFnAttribute("patchable-function-prefix") 879 .getValueAsString() 880 .getAsInteger(10, PrefixNops); 881 882 // KCFI allows indirect calls to any location that's preceded by a valid 883 // type identifier. To avoid encoding the full constant into an instruction, 884 // and thus emitting potential call target gadgets at each indirect call 885 // site, load a negated constant to a register and compare that to the 886 // expected value at the call target. 887 const Register AddrReg = MI.getOperand(0).getReg(); 888 const uint32_t Type = MI.getOperand(1).getImm(); 889 // The check is immediately before the call. If the call target is in R10, 890 // we can clobber R11 for the check instead. 891 unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D; 892 EmitAndCountInstruction( 893 MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type))); 894 EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm) 895 .addReg(X86::NoRegister) 896 .addReg(TempReg) 897 .addReg(AddrReg) 898 .addImm(1) 899 .addReg(X86::NoRegister) 900 .addImm(-(PrefixNops + 4)) 901 .addReg(X86::NoRegister)); 902 903 MCSymbol *Pass = OutContext.createTempSymbol(); 904 EmitAndCountInstruction( 905 MCInstBuilder(X86::JCC_1) 906 .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) 907 .addImm(X86::COND_E)); 908 909 MCSymbol *Trap = OutContext.createTempSymbol(); 910 OutStreamer->emitLabel(Trap); 911 EmitAndCountInstruction(MCInstBuilder(X86::TRAP)); 912 emitKCFITrapEntry(MF, Trap); 913 OutStreamer->emitLabel(Pass); 914 } 915 916 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { 917 // FIXME: Make this work on non-ELF. 918 if (!TM.getTargetTriple().isOSBinFormatELF()) { 919 report_fatal_error("llvm.asan.check.memaccess only supported on ELF"); 920 return; 921 } 922 923 const auto &Reg = MI.getOperand(0).getReg(); 924 ASanAccessInfo AccessInfo(MI.getOperand(1).getImm()); 925 926 uint64_t ShadowBase; 927 int MappingScale; 928 bool OrShadowOffset; 929 getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64, 930 AccessInfo.CompileKernel, &ShadowBase, 931 &MappingScale, &OrShadowOffset); 932 933 StringRef Name = AccessInfo.IsWrite ? "store" : "load"; 934 StringRef Op = OrShadowOffset ? "or" : "add"; 935 std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" + 936 Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" + 937 TM.getMCRegisterInfo()->getName(Reg.asMCReg())) 938 .str(); 939 if (OrShadowOffset) 940 report_fatal_error( 941 "OrShadowOffset is not supported with optimized callbacks"); 942 943 EmitAndCountInstruction( 944 MCInstBuilder(X86::CALL64pcrel32) 945 .addExpr(MCSymbolRefExpr::create( 946 OutContext.getOrCreateSymbol(SymName), OutContext))); 947 } 948 949 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, 950 X86MCInstLower &MCIL) { 951 // PATCHABLE_OP minsize 952 953 NoAutoPaddingScope NoPadScope(*OutStreamer); 954 955 auto NextMI = std::find_if(std::next(MI.getIterator()), 956 MI.getParent()->end().getInstrIterator(), 957 [](auto &II) { return !II.isMetaInstruction(); }); 958 959 SmallString<256> Code; 960 unsigned MinSize = MI.getOperand(0).getImm(); 961 962 if (NextMI != MI.getParent()->end() && !NextMI->isInlineAsm()) { 963 // Lower the next MachineInstr to find its byte size. 964 // If the next instruction is inline assembly, we skip lowering it for now, 965 // and assume we should always generate NOPs. 966 MCInst MCI; 967 MCIL.Lower(&*NextMI, MCI); 968 969 SmallVector<MCFixup, 4> Fixups; 970 CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo()); 971 } 972 973 if (Code.size() < MinSize) { 974 if (MinSize == 2 && Subtarget->is32Bit() && 975 Subtarget->isTargetWindowsMSVC() && 976 (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) { 977 // For compatibility reasons, when targetting MSVC, it is important to 978 // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools 979 // rely specifically on this pattern to be able to patch a function. 980 // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. 981 OutStreamer->emitInstruction( 982 MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI), 983 *Subtarget); 984 } else { 985 unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget); 986 assert(NopSize == MinSize && "Could not implement MinSize!"); 987 (void)NopSize; 988 } 989 } 990 } 991 992 // Lower a stackmap of the form: 993 // <id>, <shadowBytes>, ... 994 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { 995 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 996 997 auto &Ctx = OutStreamer->getContext(); 998 MCSymbol *MILabel = Ctx.createTempSymbol(); 999 OutStreamer->emitLabel(MILabel); 1000 1001 SM.recordStackMap(*MILabel, MI); 1002 unsigned NumShadowBytes = MI.getOperand(1).getImm(); 1003 SMShadowTracker.reset(NumShadowBytes); 1004 } 1005 1006 // Lower a patchpoint of the form: 1007 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... 1008 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, 1009 X86MCInstLower &MCIL) { 1010 assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64"); 1011 1012 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 1013 1014 NoAutoPaddingScope NoPadScope(*OutStreamer); 1015 1016 auto &Ctx = OutStreamer->getContext(); 1017 MCSymbol *MILabel = Ctx.createTempSymbol(); 1018 OutStreamer->emitLabel(MILabel); 1019 SM.recordPatchPoint(*MILabel, MI); 1020 1021 PatchPointOpers opers(&MI); 1022 unsigned ScratchIdx = opers.getNextScratchIdx(); 1023 unsigned EncodedBytes = 0; 1024 const MachineOperand &CalleeMO = opers.getCallTarget(); 1025 1026 // Check for null target. If target is non-null (i.e. is non-zero or is 1027 // symbolic) then emit a call. 1028 if (!(CalleeMO.isImm() && !CalleeMO.getImm())) { 1029 MCOperand CalleeMCOp; 1030 switch (CalleeMO.getType()) { 1031 default: 1032 /// FIXME: Add a verifier check for bad callee types. 1033 llvm_unreachable("Unrecognized callee operand type."); 1034 case MachineOperand::MO_Immediate: 1035 if (CalleeMO.getImm()) 1036 CalleeMCOp = MCOperand::createImm(CalleeMO.getImm()); 1037 break; 1038 case MachineOperand::MO_ExternalSymbol: 1039 case MachineOperand::MO_GlobalAddress: 1040 CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO, 1041 MCIL.GetSymbolFromOperand(CalleeMO)); 1042 break; 1043 } 1044 1045 // Emit MOV to materialize the target address and the CALL to target. 1046 // This is encoded with 12-13 bytes, depending on which register is used. 1047 Register ScratchReg = MI.getOperand(ScratchIdx).getReg(); 1048 if (X86II::isX86_64ExtendedReg(ScratchReg)) 1049 EncodedBytes = 13; 1050 else 1051 EncodedBytes = 12; 1052 1053 EmitAndCountInstruction( 1054 MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); 1055 // FIXME: Add retpoline support and remove this. 1056 if (Subtarget->useIndirectThunkCalls()) 1057 report_fatal_error( 1058 "Lowering patchpoint with thunks not yet implemented."); 1059 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); 1060 } 1061 1062 // Emit padding. 1063 unsigned NumBytes = opers.getNumPatchBytes(); 1064 assert(NumBytes >= EncodedBytes && 1065 "Patchpoint can't request size less than the length of a call."); 1066 1067 emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget); 1068 } 1069 1070 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, 1071 X86MCInstLower &MCIL) { 1072 assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); 1073 1074 NoAutoPaddingScope NoPadScope(*OutStreamer); 1075 1076 // We want to emit the following pattern, which follows the x86 calling 1077 // convention to prepare for the trampoline call to be patched in. 1078 // 1079 // .p2align 1, ... 1080 // .Lxray_event_sled_N: 1081 // jmp +N // jump across the instrumentation sled 1082 // ... // set up arguments in register 1083 // callq __xray_CustomEvent@plt // force dependency to symbol 1084 // ... 1085 // <jump here> 1086 // 1087 // After patching, it would look something like: 1088 // 1089 // nopw (2-byte nop) 1090 // ... 1091 // callq __xrayCustomEvent // already lowered 1092 // ... 1093 // 1094 // --- 1095 // First we emit the label and the jump. 1096 auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); 1097 OutStreamer->AddComment("# XRay Custom Event Log"); 1098 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1099 OutStreamer->emitLabel(CurSled); 1100 1101 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1102 // an operand (computed as an offset from the jmp instruction). 1103 // FIXME: Find another less hacky way do force the relative jump. 1104 OutStreamer->emitBinaryData("\xeb\x0f"); 1105 1106 // The default C calling convention will place two arguments into %rcx and 1107 // %rdx -- so we only work with those. 1108 const Register DestRegs[] = {X86::RDI, X86::RSI}; 1109 bool UsedMask[] = {false, false}; 1110 // Filled out in loop. 1111 Register SrcRegs[] = {0, 0}; 1112 1113 // Then we put the operands in the %rdi and %rsi registers. We spill the 1114 // values in the register before we clobber them, and mark them as used in 1115 // UsedMask. In case the arguments are already in the correct register, we use 1116 // emit nops appropriately sized to keep the sled the same size in every 1117 // situation. 1118 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1119 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { 1120 assert(Op->isReg() && "Only support arguments in registers"); 1121 SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64); 1122 assert(SrcRegs[I].isValid() && "Invalid operand"); 1123 if (SrcRegs[I] != DestRegs[I]) { 1124 UsedMask[I] = true; 1125 EmitAndCountInstruction( 1126 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); 1127 } else { 1128 emitX86Nops(*OutStreamer, 4, Subtarget); 1129 } 1130 } 1131 1132 // Now that the register values are stashed, mov arguments into place. 1133 // FIXME: This doesn't work if one of the later SrcRegs is equal to an 1134 // earlier DestReg. We will have already overwritten over the register before 1135 // we can copy from it. 1136 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1137 if (SrcRegs[I] != DestRegs[I]) 1138 EmitAndCountInstruction( 1139 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); 1140 1141 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the 1142 // name of the trampoline to be implemented by the XRay runtime. 1143 auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); 1144 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); 1145 if (isPositionIndependent()) 1146 TOp.setTargetFlags(X86II::MO_PLT); 1147 1148 // Emit the call instruction. 1149 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) 1150 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); 1151 1152 // Restore caller-saved and used registers. 1153 for (unsigned I = sizeof UsedMask; I-- > 0;) 1154 if (UsedMask[I]) 1155 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); 1156 else 1157 emitX86Nops(*OutStreamer, 1, Subtarget); 1158 1159 OutStreamer->AddComment("xray custom event end."); 1160 1161 // Record the sled version. Version 0 of this sled was spelled differently, so 1162 // we let the runtime handle the different offsets we're using. Version 2 1163 // changed the absolute address to a PC-relative address. 1164 recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2); 1165 } 1166 1167 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, 1168 X86MCInstLower &MCIL) { 1169 assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64"); 1170 1171 NoAutoPaddingScope NoPadScope(*OutStreamer); 1172 1173 // We want to emit the following pattern, which follows the x86 calling 1174 // convention to prepare for the trampoline call to be patched in. 1175 // 1176 // .p2align 1, ... 1177 // .Lxray_event_sled_N: 1178 // jmp +N // jump across the instrumentation sled 1179 // ... // set up arguments in register 1180 // callq __xray_TypedEvent@plt // force dependency to symbol 1181 // ... 1182 // <jump here> 1183 // 1184 // After patching, it would look something like: 1185 // 1186 // nopw (2-byte nop) 1187 // ... 1188 // callq __xrayTypedEvent // already lowered 1189 // ... 1190 // 1191 // --- 1192 // First we emit the label and the jump. 1193 auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true); 1194 OutStreamer->AddComment("# XRay Typed Event Log"); 1195 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1196 OutStreamer->emitLabel(CurSled); 1197 1198 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1199 // an operand (computed as an offset from the jmp instruction). 1200 // FIXME: Find another less hacky way do force the relative jump. 1201 OutStreamer->emitBinaryData("\xeb\x14"); 1202 1203 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, 1204 // so we'll work with those. Or we may be called via SystemV, in which case 1205 // we don't have to do any translation. 1206 const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; 1207 bool UsedMask[] = {false, false, false}; 1208 1209 // Will fill out src regs in the loop. 1210 Register SrcRegs[] = {0, 0, 0}; 1211 1212 // Then we put the operands in the SystemV registers. We spill the values in 1213 // the registers before we clobber them, and mark them as used in UsedMask. 1214 // In case the arguments are already in the correct register, we emit nops 1215 // appropriately sized to keep the sled the same size in every situation. 1216 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1217 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { 1218 // TODO: Is register only support adequate? 1219 assert(Op->isReg() && "Only supports arguments in registers"); 1220 SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64); 1221 assert(SrcRegs[I].isValid() && "Invalid operand"); 1222 if (SrcRegs[I] != DestRegs[I]) { 1223 UsedMask[I] = true; 1224 EmitAndCountInstruction( 1225 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); 1226 } else { 1227 emitX86Nops(*OutStreamer, 4, Subtarget); 1228 } 1229 } 1230 1231 // In the above loop we only stash all of the destination registers or emit 1232 // nops if the arguments are already in the right place. Doing the actually 1233 // moving is postponed until after all the registers are stashed so nothing 1234 // is clobbers. We've already added nops to account for the size of mov and 1235 // push if the register is in the right place, so we only have to worry about 1236 // emitting movs. 1237 // FIXME: This doesn't work if one of the later SrcRegs is equal to an 1238 // earlier DestReg. We will have already overwritten over the register before 1239 // we can copy from it. 1240 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1241 if (UsedMask[I]) 1242 EmitAndCountInstruction( 1243 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); 1244 1245 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the 1246 // name of the trampoline to be implemented by the XRay runtime. 1247 auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent"); 1248 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); 1249 if (isPositionIndependent()) 1250 TOp.setTargetFlags(X86II::MO_PLT); 1251 1252 // Emit the call instruction. 1253 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) 1254 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); 1255 1256 // Restore caller-saved and used registers. 1257 for (unsigned I = sizeof UsedMask; I-- > 0;) 1258 if (UsedMask[I]) 1259 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); 1260 else 1261 emitX86Nops(*OutStreamer, 1, Subtarget); 1262 1263 OutStreamer->AddComment("xray typed event end."); 1264 1265 // Record the sled version. 1266 recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2); 1267 } 1268 1269 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, 1270 X86MCInstLower &MCIL) { 1271 1272 NoAutoPaddingScope NoPadScope(*OutStreamer); 1273 1274 const Function &F = MF->getFunction(); 1275 if (F.hasFnAttribute("patchable-function-entry")) { 1276 unsigned Num; 1277 if (F.getFnAttribute("patchable-function-entry") 1278 .getValueAsString() 1279 .getAsInteger(10, Num)) 1280 return; 1281 emitX86Nops(*OutStreamer, Num, Subtarget); 1282 return; 1283 } 1284 // We want to emit the following pattern: 1285 // 1286 // .p2align 1, ... 1287 // .Lxray_sled_N: 1288 // jmp .tmpN 1289 // # 9 bytes worth of noops 1290 // 1291 // We need the 9 bytes because at runtime, we'd be patching over the full 11 1292 // bytes with the following pattern: 1293 // 1294 // mov %r10, <function id, 32-bit> // 6 bytes 1295 // call <relative offset, 32-bits> // 5 bytes 1296 // 1297 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1298 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1299 OutStreamer->emitLabel(CurSled); 1300 1301 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1302 // an operand (computed as an offset from the jmp instruction). 1303 // FIXME: Find another less hacky way do force the relative jump. 1304 OutStreamer->emitBytes("\xeb\x09"); 1305 emitX86Nops(*OutStreamer, 9, Subtarget); 1306 recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2); 1307 } 1308 1309 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, 1310 X86MCInstLower &MCIL) { 1311 NoAutoPaddingScope NoPadScope(*OutStreamer); 1312 1313 // Since PATCHABLE_RET takes the opcode of the return statement as an 1314 // argument, we use that to emit the correct form of the RET that we want. 1315 // i.e. when we see this: 1316 // 1317 // PATCHABLE_RET X86::RET ... 1318 // 1319 // We should emit the RET followed by sleds. 1320 // 1321 // .p2align 1, ... 1322 // .Lxray_sled_N: 1323 // ret # or equivalent instruction 1324 // # 10 bytes worth of noops 1325 // 1326 // This just makes sure that the alignment for the next instruction is 2. 1327 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1328 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1329 OutStreamer->emitLabel(CurSled); 1330 unsigned OpCode = MI.getOperand(0).getImm(); 1331 MCInst Ret; 1332 Ret.setOpcode(OpCode); 1333 for (auto &MO : drop_begin(MI.operands())) 1334 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) 1335 Ret.addOperand(*MaybeOperand); 1336 OutStreamer->emitInstruction(Ret, getSubtargetInfo()); 1337 emitX86Nops(*OutStreamer, 10, Subtarget); 1338 recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2); 1339 } 1340 1341 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, 1342 X86MCInstLower &MCIL) { 1343 NoAutoPaddingScope NoPadScope(*OutStreamer); 1344 1345 // Like PATCHABLE_RET, we have the actual instruction in the operands to this 1346 // instruction so we lower that particular instruction and its operands. 1347 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how 1348 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to 1349 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual 1350 // tail call much like how we have it in PATCHABLE_RET. 1351 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1352 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1353 OutStreamer->emitLabel(CurSled); 1354 auto Target = OutContext.createTempSymbol(); 1355 1356 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1357 // an operand (computed as an offset from the jmp instruction). 1358 // FIXME: Find another less hacky way do force the relative jump. 1359 OutStreamer->emitBytes("\xeb\x09"); 1360 emitX86Nops(*OutStreamer, 9, Subtarget); 1361 OutStreamer->emitLabel(Target); 1362 recordSled(CurSled, MI, SledKind::TAIL_CALL, 2); 1363 1364 unsigned OpCode = MI.getOperand(0).getImm(); 1365 OpCode = convertTailJumpOpcode(OpCode); 1366 MCInst TC; 1367 TC.setOpcode(OpCode); 1368 1369 // Before emitting the instruction, add a comment to indicate that this is 1370 // indeed a tail call. 1371 OutStreamer->AddComment("TAILCALL"); 1372 for (auto &MO : drop_begin(MI.operands())) 1373 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) 1374 TC.addOperand(*MaybeOperand); 1375 OutStreamer->emitInstruction(TC, getSubtargetInfo()); 1376 } 1377 1378 // Returns instruction preceding MBBI in MachineFunction. 1379 // If MBBI is the first instruction of the first basic block, returns null. 1380 static MachineBasicBlock::const_iterator 1381 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { 1382 const MachineBasicBlock *MBB = MBBI->getParent(); 1383 while (MBBI == MBB->begin()) { 1384 if (MBB == &MBB->getParent()->front()) 1385 return MachineBasicBlock::const_iterator(); 1386 MBB = MBB->getPrevNode(); 1387 MBBI = MBB->end(); 1388 } 1389 --MBBI; 1390 return MBBI; 1391 } 1392 1393 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx, 1394 unsigned SrcOp2Idx, ArrayRef<int> Mask) { 1395 std::string Comment; 1396 1397 // Compute the name for a register. This is really goofy because we have 1398 // multiple instruction printers that could (in theory) use different 1399 // names. Fortunately most people use the ATT style (outside of Windows) 1400 // and they actually agree on register naming here. Ultimately, this is 1401 // a comment, and so its OK if it isn't perfect. 1402 auto GetRegisterName = [](MCRegister Reg) -> StringRef { 1403 return X86ATTInstPrinter::getRegisterName(Reg); 1404 }; 1405 1406 const MachineOperand &DstOp = MI->getOperand(0); 1407 const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx); 1408 const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx); 1409 1410 StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem"; 1411 StringRef Src1Name = 1412 SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem"; 1413 StringRef Src2Name = 1414 SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem"; 1415 1416 // One source operand, fix the mask to print all elements in one span. 1417 SmallVector<int, 8> ShuffleMask(Mask); 1418 if (Src1Name == Src2Name) 1419 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) 1420 if (ShuffleMask[i] >= e) 1421 ShuffleMask[i] -= e; 1422 1423 raw_string_ostream CS(Comment); 1424 CS << DstName; 1425 1426 // Handle AVX512 MASK/MASXZ write mask comments. 1427 // MASK: zmmX {%kY} 1428 // MASKZ: zmmX {%kY} {z} 1429 if (SrcOp1Idx > 1) { 1430 assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask"); 1431 1432 const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1); 1433 if (WriteMaskOp.isReg()) { 1434 CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}"; 1435 1436 if (SrcOp1Idx == 2) { 1437 CS << " {z}"; 1438 } 1439 } 1440 } 1441 1442 CS << " = "; 1443 1444 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { 1445 if (i != 0) 1446 CS << ","; 1447 if (ShuffleMask[i] == SM_SentinelZero) { 1448 CS << "zero"; 1449 continue; 1450 } 1451 1452 // Otherwise, it must come from src1 or src2. Print the span of elements 1453 // that comes from this src. 1454 bool isSrc1 = ShuffleMask[i] < (int)e; 1455 CS << (isSrc1 ? Src1Name : Src2Name) << '['; 1456 1457 bool IsFirst = true; 1458 while (i != e && ShuffleMask[i] != SM_SentinelZero && 1459 (ShuffleMask[i] < (int)e) == isSrc1) { 1460 if (!IsFirst) 1461 CS << ','; 1462 else 1463 IsFirst = false; 1464 if (ShuffleMask[i] == SM_SentinelUndef) 1465 CS << "u"; 1466 else 1467 CS << ShuffleMask[i] % (int)e; 1468 ++i; 1469 } 1470 CS << ']'; 1471 --i; // For loop increments element #. 1472 } 1473 CS.flush(); 1474 1475 return Comment; 1476 } 1477 1478 static void printConstant(const APInt &Val, raw_ostream &CS, 1479 bool PrintZero = false) { 1480 if (Val.getBitWidth() <= 64) { 1481 CS << (PrintZero ? 0ULL : Val.getZExtValue()); 1482 } else { 1483 // print multi-word constant as (w0,w1) 1484 CS << "("; 1485 for (int i = 0, N = Val.getNumWords(); i < N; ++i) { 1486 if (i > 0) 1487 CS << ","; 1488 CS << (PrintZero ? 0ULL : Val.getRawData()[i]); 1489 } 1490 CS << ")"; 1491 } 1492 } 1493 1494 static void printConstant(const APFloat &Flt, raw_ostream &CS, 1495 bool PrintZero = false) { 1496 SmallString<32> Str; 1497 // Force scientific notation to distinguish from integers. 1498 if (PrintZero) 1499 APFloat::getZero(Flt.getSemantics()).toString(Str, 0, 0); 1500 else 1501 Flt.toString(Str, 0, 0); 1502 CS << Str; 1503 } 1504 1505 static void printConstant(const Constant *COp, unsigned BitWidth, 1506 raw_ostream &CS, bool PrintZero = false) { 1507 if (isa<UndefValue>(COp)) { 1508 CS << "u"; 1509 } else if (auto *CI = dyn_cast<ConstantInt>(COp)) { 1510 printConstant(CI->getValue(), CS, PrintZero); 1511 } else if (auto *CF = dyn_cast<ConstantFP>(COp)) { 1512 printConstant(CF->getValueAPF(), CS, PrintZero); 1513 } else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) { 1514 Type *EltTy = CDS->getElementType(); 1515 bool IsInteger = EltTy->isIntegerTy(); 1516 bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); 1517 unsigned EltBits = EltTy->getPrimitiveSizeInBits(); 1518 unsigned E = std::min(BitWidth / EltBits, CDS->getNumElements()); 1519 assert((BitWidth % EltBits) == 0 && "Element size mismatch"); 1520 for (unsigned I = 0; I != E; ++I) { 1521 if (I != 0) 1522 CS << ","; 1523 if (IsInteger) 1524 printConstant(CDS->getElementAsAPInt(I), CS, PrintZero); 1525 else if (IsFP) 1526 printConstant(CDS->getElementAsAPFloat(I), CS, PrintZero); 1527 else 1528 CS << "?"; 1529 } 1530 } else if (auto *CV = dyn_cast<ConstantVector>(COp)) { 1531 unsigned EltBits = CV->getType()->getScalarSizeInBits(); 1532 unsigned E = std::min(BitWidth / EltBits, CV->getNumOperands()); 1533 assert((BitWidth % EltBits) == 0 && "Element size mismatch"); 1534 for (unsigned I = 0; I != E; ++I) { 1535 if (I != 0) 1536 CS << ","; 1537 printConstant(CV->getOperand(I), EltBits, CS, PrintZero); 1538 } 1539 } else { 1540 CS << "?"; 1541 } 1542 } 1543 1544 static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer, 1545 int SclWidth, int VecWidth, 1546 const char *ShuffleComment) { 1547 std::string Comment; 1548 raw_string_ostream CS(Comment); 1549 const MachineOperand &DstOp = MI->getOperand(0); 1550 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1551 1552 if (auto *C = X86::getConstantFromPool(*MI, 1)) { 1553 CS << "["; 1554 printConstant(C, SclWidth, CS); 1555 for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) { 1556 CS << ","; 1557 printConstant(C, SclWidth, CS, true); 1558 } 1559 CS << "]"; 1560 OutStreamer.AddComment(CS.str()); 1561 return; // early-out 1562 } 1563 1564 // We didn't find a constant load, fallback to a shuffle mask decode. 1565 CS << ShuffleComment; 1566 OutStreamer.AddComment(CS.str()); 1567 } 1568 1569 static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer, 1570 int Repeats, int BitWidth) { 1571 if (auto *C = X86::getConstantFromPool(*MI, 1)) { 1572 std::string Comment; 1573 raw_string_ostream CS(Comment); 1574 const MachineOperand &DstOp = MI->getOperand(0); 1575 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1576 CS << "["; 1577 for (int l = 0; l != Repeats; ++l) { 1578 if (l != 0) 1579 CS << ","; 1580 printConstant(C, BitWidth, CS); 1581 } 1582 CS << "]"; 1583 OutStreamer.AddComment(CS.str()); 1584 } 1585 } 1586 1587 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { 1588 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); 1589 assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) && 1590 "SEH_ instruction Windows and UEFI only"); 1591 1592 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. 1593 if (EmitFPOData) { 1594 X86TargetStreamer *XTS = 1595 static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()); 1596 switch (MI->getOpcode()) { 1597 case X86::SEH_PushReg: 1598 XTS->emitFPOPushReg(MI->getOperand(0).getImm()); 1599 break; 1600 case X86::SEH_StackAlloc: 1601 XTS->emitFPOStackAlloc(MI->getOperand(0).getImm()); 1602 break; 1603 case X86::SEH_StackAlign: 1604 XTS->emitFPOStackAlign(MI->getOperand(0).getImm()); 1605 break; 1606 case X86::SEH_SetFrame: 1607 assert(MI->getOperand(1).getImm() == 0 && 1608 ".cv_fpo_setframe takes no offset"); 1609 XTS->emitFPOSetFrame(MI->getOperand(0).getImm()); 1610 break; 1611 case X86::SEH_EndPrologue: 1612 XTS->emitFPOEndPrologue(); 1613 break; 1614 case X86::SEH_SaveReg: 1615 case X86::SEH_SaveXMM: 1616 case X86::SEH_PushFrame: 1617 llvm_unreachable("SEH_ directive incompatible with FPO"); 1618 break; 1619 default: 1620 llvm_unreachable("expected SEH_ instruction"); 1621 } 1622 return; 1623 } 1624 1625 // Otherwise, use the .seh_ directives for all other Windows platforms. 1626 switch (MI->getOpcode()) { 1627 case X86::SEH_PushReg: 1628 OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm()); 1629 break; 1630 1631 case X86::SEH_SaveReg: 1632 OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(), 1633 MI->getOperand(1).getImm()); 1634 break; 1635 1636 case X86::SEH_SaveXMM: 1637 OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(), 1638 MI->getOperand(1).getImm()); 1639 break; 1640 1641 case X86::SEH_StackAlloc: 1642 OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm()); 1643 break; 1644 1645 case X86::SEH_SetFrame: 1646 OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(), 1647 MI->getOperand(1).getImm()); 1648 break; 1649 1650 case X86::SEH_PushFrame: 1651 OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm()); 1652 break; 1653 1654 case X86::SEH_EndPrologue: 1655 OutStreamer->emitWinCFIEndProlog(); 1656 break; 1657 1658 default: 1659 llvm_unreachable("expected SEH_ instruction"); 1660 } 1661 } 1662 1663 static unsigned getRegisterWidth(const MCOperandInfo &Info) { 1664 if (Info.RegClass == X86::VR128RegClassID || 1665 Info.RegClass == X86::VR128XRegClassID) 1666 return 128; 1667 if (Info.RegClass == X86::VR256RegClassID || 1668 Info.RegClass == X86::VR256XRegClassID) 1669 return 256; 1670 if (Info.RegClass == X86::VR512RegClassID) 1671 return 512; 1672 llvm_unreachable("Unknown register class!"); 1673 } 1674 1675 static void addConstantComments(const MachineInstr *MI, 1676 MCStreamer &OutStreamer) { 1677 switch (MI->getOpcode()) { 1678 // Lower PSHUFB and VPERMILP normally but add a comment if we can find 1679 // a constant shuffle mask. We won't be able to do this at the MC layer 1680 // because the mask isn't an immediate. 1681 case X86::PSHUFBrm: 1682 case X86::VPSHUFBrm: 1683 case X86::VPSHUFBYrm: 1684 case X86::VPSHUFBZ128rm: 1685 case X86::VPSHUFBZ128rmk: 1686 case X86::VPSHUFBZ128rmkz: 1687 case X86::VPSHUFBZ256rm: 1688 case X86::VPSHUFBZ256rmk: 1689 case X86::VPSHUFBZ256rmkz: 1690 case X86::VPSHUFBZrm: 1691 case X86::VPSHUFBZrmk: 1692 case X86::VPSHUFBZrmkz: { 1693 unsigned SrcIdx = 1; 1694 if (X86II::isKMasked(MI->getDesc().TSFlags)) { 1695 // Skip mask operand. 1696 ++SrcIdx; 1697 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { 1698 // Skip passthru operand. 1699 ++SrcIdx; 1700 } 1701 } 1702 1703 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1704 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1705 SmallVector<int, 64> Mask; 1706 DecodePSHUFBMask(C, Width, Mask); 1707 if (!Mask.empty()) 1708 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1709 } 1710 break; 1711 } 1712 1713 case X86::VPERMILPSrm: 1714 case X86::VPERMILPSYrm: 1715 case X86::VPERMILPSZ128rm: 1716 case X86::VPERMILPSZ128rmk: 1717 case X86::VPERMILPSZ128rmkz: 1718 case X86::VPERMILPSZ256rm: 1719 case X86::VPERMILPSZ256rmk: 1720 case X86::VPERMILPSZ256rmkz: 1721 case X86::VPERMILPSZrm: 1722 case X86::VPERMILPSZrmk: 1723 case X86::VPERMILPSZrmkz: 1724 case X86::VPERMILPDrm: 1725 case X86::VPERMILPDYrm: 1726 case X86::VPERMILPDZ128rm: 1727 case X86::VPERMILPDZ128rmk: 1728 case X86::VPERMILPDZ128rmkz: 1729 case X86::VPERMILPDZ256rm: 1730 case X86::VPERMILPDZ256rmk: 1731 case X86::VPERMILPDZ256rmkz: 1732 case X86::VPERMILPDZrm: 1733 case X86::VPERMILPDZrmk: 1734 case X86::VPERMILPDZrmkz: { 1735 unsigned ElSize; 1736 switch (MI->getOpcode()) { 1737 default: llvm_unreachable("Invalid opcode"); 1738 case X86::VPERMILPSrm: 1739 case X86::VPERMILPSYrm: 1740 case X86::VPERMILPSZ128rm: 1741 case X86::VPERMILPSZ256rm: 1742 case X86::VPERMILPSZrm: 1743 case X86::VPERMILPSZ128rmkz: 1744 case X86::VPERMILPSZ256rmkz: 1745 case X86::VPERMILPSZrmkz: 1746 case X86::VPERMILPSZ128rmk: 1747 case X86::VPERMILPSZ256rmk: 1748 case X86::VPERMILPSZrmk: 1749 ElSize = 32; 1750 break; 1751 case X86::VPERMILPDrm: 1752 case X86::VPERMILPDYrm: 1753 case X86::VPERMILPDZ128rm: 1754 case X86::VPERMILPDZ256rm: 1755 case X86::VPERMILPDZrm: 1756 case X86::VPERMILPDZ128rmkz: 1757 case X86::VPERMILPDZ256rmkz: 1758 case X86::VPERMILPDZrmkz: 1759 case X86::VPERMILPDZ128rmk: 1760 case X86::VPERMILPDZ256rmk: 1761 case X86::VPERMILPDZrmk: 1762 ElSize = 64; 1763 break; 1764 } 1765 1766 unsigned SrcIdx = 1; 1767 if (X86II::isKMasked(MI->getDesc().TSFlags)) { 1768 // Skip mask operand. 1769 ++SrcIdx; 1770 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { 1771 // Skip passthru operand. 1772 ++SrcIdx; 1773 } 1774 } 1775 1776 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1777 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1778 SmallVector<int, 16> Mask; 1779 DecodeVPERMILPMask(C, ElSize, Width, Mask); 1780 if (!Mask.empty()) 1781 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1782 } 1783 break; 1784 } 1785 1786 case X86::VPERMIL2PDrm: 1787 case X86::VPERMIL2PSrm: 1788 case X86::VPERMIL2PDYrm: 1789 case X86::VPERMIL2PSYrm: { 1790 assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && 1791 "Unexpected number of operands!"); 1792 1793 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); 1794 if (!CtrlOp.isImm()) 1795 break; 1796 1797 unsigned ElSize; 1798 switch (MI->getOpcode()) { 1799 default: llvm_unreachable("Invalid opcode"); 1800 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break; 1801 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; 1802 } 1803 1804 if (auto *C = X86::getConstantFromPool(*MI, 3)) { 1805 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1806 SmallVector<int, 16> Mask; 1807 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask); 1808 if (!Mask.empty()) 1809 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); 1810 } 1811 break; 1812 } 1813 1814 case X86::VPPERMrrm: { 1815 if (auto *C = X86::getConstantFromPool(*MI, 3)) { 1816 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1817 SmallVector<int, 16> Mask; 1818 DecodeVPPERMMask(C, Width, Mask); 1819 if (!Mask.empty()) 1820 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); 1821 } 1822 break; 1823 } 1824 1825 case X86::MMX_MOVQ64rm: { 1826 if (auto *C = X86::getConstantFromPool(*MI, 1)) { 1827 std::string Comment; 1828 raw_string_ostream CS(Comment); 1829 const MachineOperand &DstOp = MI->getOperand(0); 1830 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1831 if (auto *CF = dyn_cast<ConstantFP>(C)) { 1832 CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false); 1833 OutStreamer.AddComment(CS.str()); 1834 } 1835 } 1836 break; 1837 } 1838 1839 case X86::MOVSDrm: 1840 case X86::VMOVSDrm: 1841 case X86::VMOVSDZrm: 1842 case X86::MOVSDrm_alt: 1843 case X86::VMOVSDrm_alt: 1844 case X86::VMOVSDZrm_alt: 1845 case X86::MOVQI2PQIrm: 1846 case X86::VMOVQI2PQIrm: 1847 case X86::VMOVQI2PQIZrm: 1848 printZeroUpperMove(MI, OutStreamer, 64, 128, "mem[0],zero"); 1849 break; 1850 1851 case X86::MOVSSrm: 1852 case X86::VMOVSSrm: 1853 case X86::VMOVSSZrm: 1854 case X86::MOVSSrm_alt: 1855 case X86::VMOVSSrm_alt: 1856 case X86::VMOVSSZrm_alt: 1857 case X86::MOVDI2PDIrm: 1858 case X86::VMOVDI2PDIrm: 1859 case X86::VMOVDI2PDIZrm: 1860 printZeroUpperMove(MI, OutStreamer, 32, 128, "mem[0],zero,zero,zero"); 1861 break; 1862 1863 #define MOV_CASE(Prefix, Suffix) \ 1864 case X86::Prefix##MOVAPD##Suffix##rm: \ 1865 case X86::Prefix##MOVAPS##Suffix##rm: \ 1866 case X86::Prefix##MOVUPD##Suffix##rm: \ 1867 case X86::Prefix##MOVUPS##Suffix##rm: \ 1868 case X86::Prefix##MOVDQA##Suffix##rm: \ 1869 case X86::Prefix##MOVDQU##Suffix##rm: 1870 1871 #define MOV_AVX512_CASE(Suffix) \ 1872 case X86::VMOVDQA64##Suffix##rm: \ 1873 case X86::VMOVDQA32##Suffix##rm: \ 1874 case X86::VMOVDQU64##Suffix##rm: \ 1875 case X86::VMOVDQU32##Suffix##rm: \ 1876 case X86::VMOVDQU16##Suffix##rm: \ 1877 case X86::VMOVDQU8##Suffix##rm: \ 1878 case X86::VMOVAPS##Suffix##rm: \ 1879 case X86::VMOVAPD##Suffix##rm: \ 1880 case X86::VMOVUPS##Suffix##rm: \ 1881 case X86::VMOVUPD##Suffix##rm: 1882 1883 #define CASE_128_MOV_RM() \ 1884 MOV_CASE(, ) /* SSE */ \ 1885 MOV_CASE(V, ) /* AVX-128 */ \ 1886 MOV_AVX512_CASE(Z128) 1887 1888 #define CASE_256_MOV_RM() \ 1889 MOV_CASE(V, Y) /* AVX-256 */ \ 1890 MOV_AVX512_CASE(Z256) 1891 1892 #define CASE_512_MOV_RM() \ 1893 MOV_AVX512_CASE(Z) 1894 1895 // For loads from a constant pool to a vector register, print the constant 1896 // loaded. 1897 CASE_128_MOV_RM() 1898 printBroadcast(MI, OutStreamer, 1, 128); 1899 break; 1900 CASE_256_MOV_RM() 1901 printBroadcast(MI, OutStreamer, 1, 256); 1902 break; 1903 CASE_512_MOV_RM() 1904 printBroadcast(MI, OutStreamer, 1, 512); 1905 break; 1906 case X86::VBROADCASTF128rm: 1907 case X86::VBROADCASTI128rm: 1908 case X86::VBROADCASTF32X4Z256rm: 1909 case X86::VBROADCASTF64X2Z128rm: 1910 case X86::VBROADCASTI32X4Z256rm: 1911 case X86::VBROADCASTI64X2Z128rm: 1912 printBroadcast(MI, OutStreamer, 2, 128); 1913 break; 1914 case X86::VBROADCASTF32X4rm: 1915 case X86::VBROADCASTF64X2rm: 1916 case X86::VBROADCASTI32X4rm: 1917 case X86::VBROADCASTI64X2rm: 1918 printBroadcast(MI, OutStreamer, 4, 128); 1919 break; 1920 case X86::VBROADCASTF32X8rm: 1921 case X86::VBROADCASTF64X4rm: 1922 case X86::VBROADCASTI32X8rm: 1923 case X86::VBROADCASTI64X4rm: 1924 printBroadcast(MI, OutStreamer, 2, 256); 1925 break; 1926 1927 // For broadcast loads from a constant pool to a vector register, repeatedly 1928 // print the constant loaded. 1929 case X86::MOVDDUPrm: 1930 case X86::VMOVDDUPrm: 1931 case X86::VMOVDDUPZ128rm: 1932 case X86::VPBROADCASTQrm: 1933 case X86::VPBROADCASTQZ128rm: 1934 printBroadcast(MI, OutStreamer, 2, 64); 1935 break; 1936 case X86::VBROADCASTSDYrm: 1937 case X86::VBROADCASTSDZ256rm: 1938 case X86::VPBROADCASTQYrm: 1939 case X86::VPBROADCASTQZ256rm: 1940 printBroadcast(MI, OutStreamer, 4, 64); 1941 break; 1942 case X86::VBROADCASTSDZrm: 1943 case X86::VPBROADCASTQZrm: 1944 printBroadcast(MI, OutStreamer, 8, 64); 1945 break; 1946 case X86::VBROADCASTSSrm: 1947 case X86::VBROADCASTSSZ128rm: 1948 case X86::VPBROADCASTDrm: 1949 case X86::VPBROADCASTDZ128rm: 1950 printBroadcast(MI, OutStreamer, 4, 32); 1951 break; 1952 case X86::VBROADCASTSSYrm: 1953 case X86::VBROADCASTSSZ256rm: 1954 case X86::VPBROADCASTDYrm: 1955 case X86::VPBROADCASTDZ256rm: 1956 printBroadcast(MI, OutStreamer, 8, 32); 1957 break; 1958 case X86::VBROADCASTSSZrm: 1959 case X86::VPBROADCASTDZrm: 1960 printBroadcast(MI, OutStreamer, 16, 32); 1961 break; 1962 case X86::VPBROADCASTWrm: 1963 case X86::VPBROADCASTWZ128rm: 1964 printBroadcast(MI, OutStreamer, 8, 16); 1965 break; 1966 case X86::VPBROADCASTWYrm: 1967 case X86::VPBROADCASTWZ256rm: 1968 printBroadcast(MI, OutStreamer, 16, 16); 1969 break; 1970 case X86::VPBROADCASTWZrm: 1971 printBroadcast(MI, OutStreamer, 32, 16); 1972 break; 1973 case X86::VPBROADCASTBrm: 1974 case X86::VPBROADCASTBZ128rm: 1975 printBroadcast(MI, OutStreamer, 16, 8); 1976 break; 1977 case X86::VPBROADCASTBYrm: 1978 case X86::VPBROADCASTBZ256rm: 1979 printBroadcast(MI, OutStreamer, 32, 8); 1980 break; 1981 case X86::VPBROADCASTBZrm: 1982 printBroadcast(MI, OutStreamer, 64, 8); 1983 break; 1984 } 1985 } 1986 1987 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { 1988 // FIXME: Enable feature predicate checks once all the test pass. 1989 // X86_MC::verifyInstructionPredicates(MI->getOpcode(), 1990 // Subtarget->getFeatureBits()); 1991 1992 X86MCInstLower MCInstLowering(*MF, *this); 1993 const X86RegisterInfo *RI = 1994 MF->getSubtarget<X86Subtarget>().getRegisterInfo(); 1995 1996 if (MI->getOpcode() == X86::OR64rm) { 1997 for (auto &Opd : MI->operands()) { 1998 if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) == 1999 "swift_async_extendedFramePointerFlags") { 2000 ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true; 2001 } 2002 } 2003 } 2004 2005 // Add comments for values loaded from constant pool. 2006 if (OutStreamer->isVerboseAsm()) 2007 addConstantComments(MI, *OutStreamer); 2008 2009 // Add a comment about EVEX compression 2010 if (TM.Options.MCOptions.ShowMCEncoding) { 2011 if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) 2012 OutStreamer->AddComment("EVEX TO LEGACY Compression ", false); 2013 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) 2014 OutStreamer->AddComment("EVEX TO VEX Compression ", false); 2015 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX) 2016 OutStreamer->AddComment("EVEX TO EVEX Compression ", false); 2017 } 2018 2019 switch (MI->getOpcode()) { 2020 case TargetOpcode::DBG_VALUE: 2021 llvm_unreachable("Should be handled target independently"); 2022 2023 case X86::EH_RETURN: 2024 case X86::EH_RETURN64: { 2025 // Lower these as normal, but add some comments. 2026 Register Reg = MI->getOperand(0).getReg(); 2027 OutStreamer->AddComment(StringRef("eh_return, addr: %") + 2028 X86ATTInstPrinter::getRegisterName(Reg)); 2029 break; 2030 } 2031 case X86::CLEANUPRET: { 2032 // Lower these as normal, but add some comments. 2033 OutStreamer->AddComment("CLEANUPRET"); 2034 break; 2035 } 2036 2037 case X86::CATCHRET: { 2038 // Lower these as normal, but add some comments. 2039 OutStreamer->AddComment("CATCHRET"); 2040 break; 2041 } 2042 2043 case X86::ENDBR32: 2044 case X86::ENDBR64: { 2045 // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for 2046 // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be 2047 // non-empty. If MI is the initial ENDBR, place the 2048 // __patchable_function_entries label after ENDBR. 2049 if (CurrentPatchableFunctionEntrySym && 2050 CurrentPatchableFunctionEntrySym == CurrentFnBegin && 2051 MI == &MF->front().front()) { 2052 MCInst Inst; 2053 MCInstLowering.Lower(MI, Inst); 2054 EmitAndCountInstruction(Inst); 2055 CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); 2056 OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym); 2057 return; 2058 } 2059 break; 2060 } 2061 2062 case X86::TAILJMPd64: 2063 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) 2064 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); 2065 [[fallthrough]]; 2066 case X86::TAILJMPr: 2067 case X86::TAILJMPm: 2068 case X86::TAILJMPd: 2069 case X86::TAILJMPd_CC: 2070 case X86::TAILJMPr64: 2071 case X86::TAILJMPm64: 2072 case X86::TAILJMPd64_CC: 2073 case X86::TAILJMPr64_REX: 2074 case X86::TAILJMPm64_REX: 2075 // Lower these as normal, but add some comments. 2076 OutStreamer->AddComment("TAILCALL"); 2077 break; 2078 2079 case X86::TLS_addr32: 2080 case X86::TLS_addr64: 2081 case X86::TLS_addrX32: 2082 case X86::TLS_base_addr32: 2083 case X86::TLS_base_addr64: 2084 case X86::TLS_base_addrX32: 2085 return LowerTlsAddr(MCInstLowering, *MI); 2086 2087 case X86::MOVPC32r: { 2088 // This is a pseudo op for a two instruction sequence with a label, which 2089 // looks like: 2090 // call "L1$pb" 2091 // "L1$pb": 2092 // popl %esi 2093 2094 // Emit the call. 2095 MCSymbol *PICBase = MF->getPICBaseSymbol(); 2096 // FIXME: We would like an efficient form for this, so we don't have to do a 2097 // lot of extra uniquing. 2098 EmitAndCountInstruction( 2099 MCInstBuilder(X86::CALLpcrel32) 2100 .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); 2101 2102 const X86FrameLowering *FrameLowering = 2103 MF->getSubtarget<X86Subtarget>().getFrameLowering(); 2104 bool hasFP = FrameLowering->hasFP(*MF); 2105 2106 // TODO: This is needed only if we require precise CFA. 2107 bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && 2108 !OutStreamer->getDwarfFrameInfos().back().End; 2109 2110 int stackGrowth = -RI->getSlotSize(); 2111 2112 if (HasActiveDwarfFrame && !hasFP) { 2113 OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth); 2114 MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); 2115 } 2116 2117 // Emit the label. 2118 OutStreamer->emitLabel(PICBase); 2119 2120 // popl $reg 2121 EmitAndCountInstruction( 2122 MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); 2123 2124 if (HasActiveDwarfFrame && !hasFP) { 2125 OutStreamer->emitCFIAdjustCfaOffset(stackGrowth); 2126 } 2127 return; 2128 } 2129 2130 case X86::ADD32ri: { 2131 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. 2132 if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) 2133 break; 2134 2135 // Okay, we have something like: 2136 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) 2137 2138 // For this, we want to print something like: 2139 // MYGLOBAL + (. - PICBASE) 2140 // However, we can't generate a ".", so just emit a new label here and refer 2141 // to it. 2142 MCSymbol *DotSym = OutContext.createTempSymbol(); 2143 OutStreamer->emitLabel(DotSym); 2144 2145 // Now that we have emitted the label, lower the complex operand expression. 2146 MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); 2147 2148 const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); 2149 const MCExpr *PICBase = 2150 MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); 2151 DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); 2152 2153 DotExpr = MCBinaryExpr::createAdd( 2154 MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext); 2155 2156 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) 2157 .addReg(MI->getOperand(0).getReg()) 2158 .addReg(MI->getOperand(1).getReg()) 2159 .addExpr(DotExpr)); 2160 return; 2161 } 2162 case TargetOpcode::STATEPOINT: 2163 return LowerSTATEPOINT(*MI, MCInstLowering); 2164 2165 case TargetOpcode::FAULTING_OP: 2166 return LowerFAULTING_OP(*MI, MCInstLowering); 2167 2168 case TargetOpcode::FENTRY_CALL: 2169 return LowerFENTRY_CALL(*MI, MCInstLowering); 2170 2171 case TargetOpcode::PATCHABLE_OP: 2172 return LowerPATCHABLE_OP(*MI, MCInstLowering); 2173 2174 case TargetOpcode::STACKMAP: 2175 return LowerSTACKMAP(*MI); 2176 2177 case TargetOpcode::PATCHPOINT: 2178 return LowerPATCHPOINT(*MI, MCInstLowering); 2179 2180 case TargetOpcode::PATCHABLE_FUNCTION_ENTER: 2181 return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); 2182 2183 case TargetOpcode::PATCHABLE_RET: 2184 return LowerPATCHABLE_RET(*MI, MCInstLowering); 2185 2186 case TargetOpcode::PATCHABLE_TAIL_CALL: 2187 return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); 2188 2189 case TargetOpcode::PATCHABLE_EVENT_CALL: 2190 return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); 2191 2192 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: 2193 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); 2194 2195 case X86::MORESTACK_RET: 2196 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 2197 return; 2198 2199 case X86::KCFI_CHECK: 2200 return LowerKCFI_CHECK(*MI); 2201 2202 case X86::ASAN_CHECK_MEMACCESS: 2203 return LowerASAN_CHECK_MEMACCESS(*MI); 2204 2205 case X86::MORESTACK_RET_RESTORE_R10: 2206 // Return, then restore R10. 2207 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 2208 EmitAndCountInstruction( 2209 MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); 2210 return; 2211 2212 case X86::SEH_PushReg: 2213 case X86::SEH_SaveReg: 2214 case X86::SEH_SaveXMM: 2215 case X86::SEH_StackAlloc: 2216 case X86::SEH_StackAlign: 2217 case X86::SEH_SetFrame: 2218 case X86::SEH_PushFrame: 2219 case X86::SEH_EndPrologue: 2220 EmitSEHInstruction(MI); 2221 return; 2222 2223 case X86::SEH_Epilogue: { 2224 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); 2225 MachineBasicBlock::const_iterator MBBI(MI); 2226 // Check if preceded by a call and emit nop if so. 2227 for (MBBI = PrevCrossBBInst(MBBI); 2228 MBBI != MachineBasicBlock::const_iterator(); 2229 MBBI = PrevCrossBBInst(MBBI)) { 2230 // Pseudo instructions that aren't a call are assumed to not emit any 2231 // code. If they do, we worst case generate unnecessary noops after a 2232 // call. 2233 if (MBBI->isCall() || !MBBI->isPseudo()) { 2234 if (MBBI->isCall()) 2235 EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); 2236 break; 2237 } 2238 } 2239 return; 2240 } 2241 case X86::UBSAN_UD1: 2242 EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm) 2243 .addReg(X86::EAX) 2244 .addReg(X86::EAX) 2245 .addImm(1) 2246 .addReg(X86::NoRegister) 2247 .addImm(MI->getOperand(0).getImm()) 2248 .addReg(X86::NoRegister)); 2249 return; 2250 case X86::CALL64pcrel32: 2251 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) 2252 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); 2253 break; 2254 } 2255 2256 MCInst TmpInst; 2257 MCInstLowering.Lower(MI, TmpInst); 2258 2259 // Stackmap shadows cannot include branch targets, so we can count the bytes 2260 // in a call towards the shadow, but must ensure that the no thread returns 2261 // in to the stackmap shadow. The only way to achieve this is if the call 2262 // is at the end of the shadow. 2263 if (MI->isCall()) { 2264 // Count then size of the call towards the shadow 2265 SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get()); 2266 // Then flush the shadow so that we fill with nops before the call, not 2267 // after it. 2268 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 2269 // Then emit the call 2270 OutStreamer->emitInstruction(TmpInst, getSubtargetInfo()); 2271 return; 2272 } 2273 2274 EmitAndCountInstruction(TmpInst); 2275 } 2276