1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains code to lower X86 MachineInstrs to their corresponding 10 // MCInst records. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/X86ATTInstPrinter.h" 15 #include "MCTargetDesc/X86BaseInfo.h" 16 #include "MCTargetDesc/X86EncodingOptimization.h" 17 #include "MCTargetDesc/X86InstComments.h" 18 #include "MCTargetDesc/X86ShuffleDecode.h" 19 #include "MCTargetDesc/X86TargetStreamer.h" 20 #include "X86AsmPrinter.h" 21 #include "X86MachineFunctionInfo.h" 22 #include "X86RegisterInfo.h" 23 #include "X86ShuffleDecodeConstantPool.h" 24 #include "X86Subtarget.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/CodeGen/MachineConstantPool.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineModuleInfoImpls.h" 30 #include "llvm/CodeGen/MachineOperand.h" 31 #include "llvm/CodeGen/StackMaps.h" 32 #include "llvm/IR/DataLayout.h" 33 #include "llvm/IR/GlobalValue.h" 34 #include "llvm/IR/Mangler.h" 35 #include "llvm/MC/MCAsmInfo.h" 36 #include "llvm/MC/MCCodeEmitter.h" 37 #include "llvm/MC/MCContext.h" 38 #include "llvm/MC/MCExpr.h" 39 #include "llvm/MC/MCFixup.h" 40 #include "llvm/MC/MCInst.h" 41 #include "llvm/MC/MCInstBuilder.h" 42 #include "llvm/MC/MCSection.h" 43 #include "llvm/MC/MCSectionELF.h" 44 #include "llvm/MC/MCStreamer.h" 45 #include "llvm/MC/MCSymbol.h" 46 #include "llvm/MC/MCSymbolELF.h" 47 #include "llvm/MC/TargetRegistry.h" 48 #include "llvm/Target/TargetLoweringObjectFile.h" 49 #include "llvm/Target/TargetMachine.h" 50 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" 51 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" 52 #include <string> 53 54 using namespace llvm; 55 56 namespace { 57 58 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. 59 class X86MCInstLower { 60 MCContext &Ctx; 61 const MachineFunction &MF; 62 const TargetMachine &TM; 63 const MCAsmInfo &MAI; 64 X86AsmPrinter &AsmPrinter; 65 66 public: 67 X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); 68 69 std::optional<MCOperand> LowerMachineOperand(const MachineInstr *MI, 70 const MachineOperand &MO) const; 71 void Lower(const MachineInstr *MI, MCInst &OutMI) const; 72 73 MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; 74 MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; 75 76 private: 77 MachineModuleInfoMachO &getMachOMMI() const; 78 }; 79 80 } // end anonymous namespace 81 82 /// A RAII helper which defines a region of instructions which can't have 83 /// padding added between them for correctness. 84 struct NoAutoPaddingScope { 85 MCStreamer &OS; 86 const bool OldAllowAutoPadding; 87 NoAutoPaddingScope(MCStreamer &OS) 88 : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { 89 changeAndComment(false); 90 } 91 ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); } 92 void changeAndComment(bool b) { 93 if (b == OS.getAllowAutoPadding()) 94 return; 95 OS.setAllowAutoPadding(b); 96 if (b) 97 OS.emitRawComment("autopadding"); 98 else 99 OS.emitRawComment("noautopadding"); 100 } 101 }; 102 103 // Emit a minimal sequence of nops spanning NumBytes bytes. 104 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, 105 const X86Subtarget *Subtarget); 106 107 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, 108 const MCSubtargetInfo &STI, 109 MCCodeEmitter *CodeEmitter) { 110 if (InShadow) { 111 SmallString<256> Code; 112 SmallVector<MCFixup, 4> Fixups; 113 CodeEmitter->encodeInstruction(Inst, Code, Fixups, STI); 114 CurrentShadowSize += Code.size(); 115 if (CurrentShadowSize >= RequiredShadowSize) 116 InShadow = false; // The shadow is big enough. Stop counting. 117 } 118 } 119 120 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( 121 MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { 122 if (InShadow && CurrentShadowSize < RequiredShadowSize) { 123 InShadow = false; 124 emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize, 125 &MF->getSubtarget<X86Subtarget>()); 126 } 127 } 128 129 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { 130 OutStreamer->emitInstruction(Inst, getSubtargetInfo()); 131 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); 132 } 133 134 X86MCInstLower::X86MCInstLower(const MachineFunction &mf, 135 X86AsmPrinter &asmprinter) 136 : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), 137 AsmPrinter(asmprinter) {} 138 139 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { 140 return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>(); 141 } 142 143 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol 144 /// operand to an MCSymbol. 145 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { 146 const Triple &TT = TM.getTargetTriple(); 147 if (MO.isGlobal() && TT.isOSBinFormatELF()) 148 return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal()); 149 150 const DataLayout &DL = MF.getDataLayout(); 151 assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && 152 "Isn't a symbol reference"); 153 154 MCSymbol *Sym = nullptr; 155 SmallString<128> Name; 156 StringRef Suffix; 157 158 switch (MO.getTargetFlags()) { 159 case X86II::MO_DLLIMPORT: 160 // Handle dllimport linkage. 161 Name += "__imp_"; 162 break; 163 case X86II::MO_COFFSTUB: 164 Name += ".refptr."; 165 break; 166 case X86II::MO_DARWIN_NONLAZY: 167 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 168 Suffix = "$non_lazy_ptr"; 169 break; 170 } 171 172 if (!Suffix.empty()) 173 Name += DL.getPrivateGlobalPrefix(); 174 175 if (MO.isGlobal()) { 176 const GlobalValue *GV = MO.getGlobal(); 177 AsmPrinter.getNameWithPrefix(Name, GV); 178 } else if (MO.isSymbol()) { 179 Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); 180 } else if (MO.isMBB()) { 181 assert(Suffix.empty()); 182 Sym = MO.getMBB()->getSymbol(); 183 } 184 185 Name += Suffix; 186 if (!Sym) 187 Sym = Ctx.getOrCreateSymbol(Name); 188 189 // If the target flags on the operand changes the name of the symbol, do that 190 // before we return the symbol. 191 switch (MO.getTargetFlags()) { 192 default: 193 break; 194 case X86II::MO_COFFSTUB: { 195 MachineModuleInfoCOFF &MMICOFF = 196 MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>(); 197 MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym); 198 if (!StubSym.getPointer()) { 199 assert(MO.isGlobal() && "Extern symbol not handled yet"); 200 StubSym = MachineModuleInfoImpl::StubValueTy( 201 AsmPrinter.getSymbol(MO.getGlobal()), true); 202 } 203 break; 204 } 205 case X86II::MO_DARWIN_NONLAZY: 206 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { 207 MachineModuleInfoImpl::StubValueTy &StubSym = 208 getMachOMMI().getGVStubEntry(Sym); 209 if (!StubSym.getPointer()) { 210 assert(MO.isGlobal() && "Extern symbol not handled yet"); 211 StubSym = MachineModuleInfoImpl::StubValueTy( 212 AsmPrinter.getSymbol(MO.getGlobal()), 213 !MO.getGlobal()->hasInternalLinkage()); 214 } 215 break; 216 } 217 } 218 219 return Sym; 220 } 221 222 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, 223 MCSymbol *Sym) const { 224 // FIXME: We would like an efficient form for this, so we don't have to do a 225 // lot of extra uniquing. 226 const MCExpr *Expr = nullptr; 227 MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; 228 229 switch (MO.getTargetFlags()) { 230 default: 231 llvm_unreachable("Unknown target flag on GV operand"); 232 case X86II::MO_NO_FLAG: // No flag. 233 // These affect the name of the symbol, not any suffix. 234 case X86II::MO_DARWIN_NONLAZY: 235 case X86II::MO_DLLIMPORT: 236 case X86II::MO_COFFSTUB: 237 break; 238 239 case X86II::MO_TLVP: 240 RefKind = MCSymbolRefExpr::VK_TLVP; 241 break; 242 case X86II::MO_TLVP_PIC_BASE: 243 Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); 244 // Subtract the pic base. 245 Expr = MCBinaryExpr::createSub( 246 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); 247 break; 248 case X86II::MO_SECREL: 249 RefKind = MCSymbolRefExpr::VK_SECREL; 250 break; 251 case X86II::MO_TLSGD: 252 RefKind = MCSymbolRefExpr::VK_TLSGD; 253 break; 254 case X86II::MO_TLSLD: 255 RefKind = MCSymbolRefExpr::VK_TLSLD; 256 break; 257 case X86II::MO_TLSLDM: 258 RefKind = MCSymbolRefExpr::VK_TLSLDM; 259 break; 260 case X86II::MO_GOTTPOFF: 261 RefKind = MCSymbolRefExpr::VK_GOTTPOFF; 262 break; 263 case X86II::MO_INDNTPOFF: 264 RefKind = MCSymbolRefExpr::VK_INDNTPOFF; 265 break; 266 case X86II::MO_TPOFF: 267 RefKind = MCSymbolRefExpr::VK_TPOFF; 268 break; 269 case X86II::MO_DTPOFF: 270 RefKind = MCSymbolRefExpr::VK_DTPOFF; 271 break; 272 case X86II::MO_NTPOFF: 273 RefKind = MCSymbolRefExpr::VK_NTPOFF; 274 break; 275 case X86II::MO_GOTNTPOFF: 276 RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; 277 break; 278 case X86II::MO_GOTPCREL: 279 RefKind = MCSymbolRefExpr::VK_GOTPCREL; 280 break; 281 case X86II::MO_GOTPCREL_NORELAX: 282 RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX; 283 break; 284 case X86II::MO_GOT: 285 RefKind = MCSymbolRefExpr::VK_GOT; 286 break; 287 case X86II::MO_GOTOFF: 288 RefKind = MCSymbolRefExpr::VK_GOTOFF; 289 break; 290 case X86II::MO_PLT: 291 RefKind = MCSymbolRefExpr::VK_PLT; 292 break; 293 case X86II::MO_ABS8: 294 RefKind = MCSymbolRefExpr::VK_X86_ABS8; 295 break; 296 case X86II::MO_PIC_BASE_OFFSET: 297 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 298 Expr = MCSymbolRefExpr::create(Sym, Ctx); 299 // Subtract the pic base. 300 Expr = MCBinaryExpr::createSub( 301 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); 302 if (MO.isJTI()) { 303 assert(MAI.doesSetDirectiveSuppressReloc()); 304 // If .set directive is supported, use it to reduce the number of 305 // relocations the assembler will generate for differences between 306 // local labels. This is only safe when the symbols are in the same 307 // section so we are restricting it to jumptable references. 308 MCSymbol *Label = Ctx.createTempSymbol(); 309 AsmPrinter.OutStreamer->emitAssignment(Label, Expr); 310 Expr = MCSymbolRefExpr::create(Label, Ctx); 311 } 312 break; 313 } 314 315 if (!Expr) 316 Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); 317 318 if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) 319 Expr = MCBinaryExpr::createAdd( 320 Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); 321 return MCOperand::createExpr(Expr); 322 } 323 324 static unsigned getRetOpcode(const X86Subtarget &Subtarget) { 325 return Subtarget.is64Bit() ? X86::RET64 : X86::RET32; 326 } 327 328 std::optional<MCOperand> 329 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, 330 const MachineOperand &MO) const { 331 switch (MO.getType()) { 332 default: 333 MI->print(errs()); 334 llvm_unreachable("unknown operand type"); 335 case MachineOperand::MO_Register: 336 // Ignore all implicit register operands. 337 if (MO.isImplicit()) 338 return std::nullopt; 339 return MCOperand::createReg(MO.getReg()); 340 case MachineOperand::MO_Immediate: 341 return MCOperand::createImm(MO.getImm()); 342 case MachineOperand::MO_MachineBasicBlock: 343 case MachineOperand::MO_GlobalAddress: 344 case MachineOperand::MO_ExternalSymbol: 345 return LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); 346 case MachineOperand::MO_MCSymbol: 347 return LowerSymbolOperand(MO, MO.getMCSymbol()); 348 case MachineOperand::MO_JumpTableIndex: 349 return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex())); 350 case MachineOperand::MO_ConstantPoolIndex: 351 return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex())); 352 case MachineOperand::MO_BlockAddress: 353 return LowerSymbolOperand( 354 MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); 355 case MachineOperand::MO_RegisterMask: 356 // Ignore call clobbers. 357 return std::nullopt; 358 } 359 } 360 361 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding 362 // information. 363 static unsigned convertTailJumpOpcode(unsigned Opcode) { 364 switch (Opcode) { 365 case X86::TAILJMPr: 366 Opcode = X86::JMP32r; 367 break; 368 case X86::TAILJMPm: 369 Opcode = X86::JMP32m; 370 break; 371 case X86::TAILJMPr64: 372 Opcode = X86::JMP64r; 373 break; 374 case X86::TAILJMPm64: 375 Opcode = X86::JMP64m; 376 break; 377 case X86::TAILJMPr64_REX: 378 Opcode = X86::JMP64r_REX; 379 break; 380 case X86::TAILJMPm64_REX: 381 Opcode = X86::JMP64m_REX; 382 break; 383 case X86::TAILJMPd: 384 case X86::TAILJMPd64: 385 Opcode = X86::JMP_1; 386 break; 387 case X86::TAILJMPd_CC: 388 case X86::TAILJMPd64_CC: 389 Opcode = X86::JCC_1; 390 break; 391 } 392 393 return Opcode; 394 } 395 396 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { 397 OutMI.setOpcode(MI->getOpcode()); 398 399 for (const MachineOperand &MO : MI->operands()) 400 if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) 401 OutMI.addOperand(*MaybeMCOp); 402 403 bool In64BitMode = AsmPrinter.getSubtarget().is64Bit(); 404 if (X86::optimizeInstFromVEX3ToVEX2(OutMI, MI->getDesc()) || 405 X86::optimizeShiftRotateWithImmediateOne(OutMI) || 406 X86::optimizeVPCMPWithImmediateOneOrSix(OutMI) || 407 X86::optimizeMOVSX(OutMI) || X86::optimizeINCDEC(OutMI, In64BitMode) || 408 X86::optimizeMOV(OutMI, In64BitMode) || 409 X86::optimizeToFixedRegisterOrShortImmediateForm(OutMI)) 410 return; 411 412 // Handle a few special cases to eliminate operand modifiers. 413 switch (OutMI.getOpcode()) { 414 case X86::LEA64_32r: 415 case X86::LEA64r: 416 case X86::LEA16r: 417 case X86::LEA32r: 418 // LEA should have a segment register, but it must be empty. 419 assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands && 420 "Unexpected # of LEA operands"); 421 assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && 422 "LEA has segment specified!"); 423 break; 424 case X86::MULX32Hrr: 425 case X86::MULX32Hrm: 426 case X86::MULX64Hrr: 427 case X86::MULX64Hrm: { 428 // Turn into regular MULX by duplicating the destination. 429 unsigned NewOpc; 430 switch (OutMI.getOpcode()) { 431 default: llvm_unreachable("Invalid opcode"); 432 case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; 433 case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; 434 case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; 435 case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; 436 } 437 OutMI.setOpcode(NewOpc); 438 // Duplicate the destination. 439 unsigned DestReg = OutMI.getOperand(0).getReg(); 440 OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg)); 441 break; 442 } 443 // CALL64r, CALL64pcrel32 - These instructions used to have 444 // register inputs modeled as normal uses instead of implicit uses. As such, 445 // they we used to truncate off all but the first operand (the callee). This 446 // issue seems to have been fixed at some point. This assert verifies that. 447 case X86::CALL64r: 448 case X86::CALL64pcrel32: 449 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); 450 break; 451 case X86::EH_RETURN: 452 case X86::EH_RETURN64: { 453 OutMI = MCInst(); 454 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 455 break; 456 } 457 case X86::CLEANUPRET: { 458 // Replace CLEANUPRET with the appropriate RET. 459 OutMI = MCInst(); 460 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 461 break; 462 } 463 case X86::CATCHRET: { 464 // Replace CATCHRET with the appropriate RET. 465 const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); 466 unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX; 467 OutMI = MCInst(); 468 OutMI.setOpcode(getRetOpcode(Subtarget)); 469 OutMI.addOperand(MCOperand::createReg(ReturnReg)); 470 break; 471 } 472 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump 473 // instruction. 474 case X86::TAILJMPr: 475 case X86::TAILJMPr64: 476 case X86::TAILJMPr64_REX: 477 case X86::TAILJMPd: 478 case X86::TAILJMPd64: 479 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); 480 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 481 break; 482 case X86::TAILJMPd_CC: 483 case X86::TAILJMPd64_CC: 484 assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!"); 485 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 486 break; 487 case X86::TAILJMPm: 488 case X86::TAILJMPm64: 489 case X86::TAILJMPm64_REX: 490 assert(OutMI.getNumOperands() == X86::AddrNumOperands && 491 "Unexpected number of operands!"); 492 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 493 break; 494 case X86::MASKMOVDQU: 495 case X86::VMASKMOVDQU: 496 if (In64BitMode) 497 OutMI.setFlags(X86::IP_HAS_AD_SIZE); 498 break; 499 case X86::BSF16rm: 500 case X86::BSF16rr: 501 case X86::BSF32rm: 502 case X86::BSF32rr: 503 case X86::BSF64rm: 504 case X86::BSF64rr: { 505 // Add an REP prefix to BSF instructions so that new processors can 506 // recognize as TZCNT, which has better performance than BSF. 507 // BSF and TZCNT have different interpretations on ZF bit. So make sure 508 // it won't be used later. 509 const MachineOperand *FlagDef = MI->findRegisterDefOperand(X86::EFLAGS); 510 if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead()) 511 OutMI.setFlags(X86::IP_HAS_REPEAT); 512 break; 513 } 514 default: 515 break; 516 } 517 } 518 519 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, 520 const MachineInstr &MI) { 521 NoAutoPaddingScope NoPadScope(*OutStreamer); 522 bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 && 523 MI.getOpcode() != X86::TLS_base_addr32; 524 bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 || 525 MI.getOpcode() == X86::TLS_base_addr64; 526 MCContext &Ctx = OutStreamer->getContext(); 527 528 MCSymbolRefExpr::VariantKind SRVK; 529 switch (MI.getOpcode()) { 530 case X86::TLS_addr32: 531 case X86::TLS_addr64: 532 case X86::TLS_addrX32: 533 SRVK = MCSymbolRefExpr::VK_TLSGD; 534 break; 535 case X86::TLS_base_addr32: 536 SRVK = MCSymbolRefExpr::VK_TLSLDM; 537 break; 538 case X86::TLS_base_addr64: 539 case X86::TLS_base_addrX32: 540 SRVK = MCSymbolRefExpr::VK_TLSLD; 541 break; 542 default: 543 llvm_unreachable("unexpected opcode"); 544 } 545 546 const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create( 547 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx); 548 549 // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD 550 // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is 551 // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by 552 // only using GOT when GOTPCRELX is enabled. 553 // TODO Delete the workaround when GOTPCRELX becomes commonplace. 554 bool UseGot = MMI->getModule()->getRtLibUseGOT() && 555 Ctx.getAsmInfo()->canRelaxRelocations(); 556 557 if (Is64Bits) { 558 bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD; 559 if (NeedsPadding && Is64BitsLP64) 560 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 561 EmitAndCountInstruction(MCInstBuilder(X86::LEA64r) 562 .addReg(X86::RDI) 563 .addReg(X86::RIP) 564 .addImm(1) 565 .addReg(0) 566 .addExpr(Sym) 567 .addReg(0)); 568 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr"); 569 if (NeedsPadding) { 570 if (!UseGot) 571 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 572 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 573 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); 574 } 575 if (UseGot) { 576 const MCExpr *Expr = MCSymbolRefExpr::create( 577 TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx); 578 EmitAndCountInstruction(MCInstBuilder(X86::CALL64m) 579 .addReg(X86::RIP) 580 .addImm(1) 581 .addReg(0) 582 .addExpr(Expr) 583 .addReg(0)); 584 } else { 585 EmitAndCountInstruction( 586 MCInstBuilder(X86::CALL64pcrel32) 587 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, 588 MCSymbolRefExpr::VK_PLT, Ctx))); 589 } 590 } else { 591 if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) { 592 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) 593 .addReg(X86::EAX) 594 .addReg(0) 595 .addImm(1) 596 .addReg(X86::EBX) 597 .addExpr(Sym) 598 .addReg(0)); 599 } else { 600 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) 601 .addReg(X86::EAX) 602 .addReg(X86::EBX) 603 .addImm(1) 604 .addReg(0) 605 .addExpr(Sym) 606 .addReg(0)); 607 } 608 609 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr"); 610 if (UseGot) { 611 const MCExpr *Expr = 612 MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx); 613 EmitAndCountInstruction(MCInstBuilder(X86::CALL32m) 614 .addReg(X86::EBX) 615 .addImm(1) 616 .addReg(0) 617 .addExpr(Expr) 618 .addReg(0)); 619 } else { 620 EmitAndCountInstruction( 621 MCInstBuilder(X86::CALLpcrel32) 622 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, 623 MCSymbolRefExpr::VK_PLT, Ctx))); 624 } 625 } 626 } 627 628 /// Emit the largest nop instruction smaller than or equal to \p NumBytes 629 /// bytes. Return the size of nop emitted. 630 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, 631 const X86Subtarget *Subtarget) { 632 // Determine the longest nop which can be efficiently decoded for the given 633 // target cpu. 15-bytes is the longest single NOP instruction, but some 634 // platforms can't decode the longest forms efficiently. 635 unsigned MaxNopLength = 1; 636 if (Subtarget->is64Bit()) { 637 // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the 638 // IndexReg/BaseReg below need to be updated. 639 if (Subtarget->hasFeature(X86::TuningFast7ByteNOP)) 640 MaxNopLength = 7; 641 else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP)) 642 MaxNopLength = 15; 643 else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP)) 644 MaxNopLength = 11; 645 else 646 MaxNopLength = 10; 647 } if (Subtarget->is32Bit()) 648 MaxNopLength = 2; 649 650 // Cap a single nop emission at the profitable value for the target 651 NumBytes = std::min(NumBytes, MaxNopLength); 652 653 unsigned NopSize; 654 unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; 655 IndexReg = Displacement = SegmentReg = 0; 656 BaseReg = X86::RAX; 657 ScaleVal = 1; 658 switch (NumBytes) { 659 case 0: 660 llvm_unreachable("Zero nops?"); 661 break; 662 case 1: 663 NopSize = 1; 664 Opc = X86::NOOP; 665 break; 666 case 2: 667 NopSize = 2; 668 Opc = X86::XCHG16ar; 669 break; 670 case 3: 671 NopSize = 3; 672 Opc = X86::NOOPL; 673 break; 674 case 4: 675 NopSize = 4; 676 Opc = X86::NOOPL; 677 Displacement = 8; 678 break; 679 case 5: 680 NopSize = 5; 681 Opc = X86::NOOPL; 682 Displacement = 8; 683 IndexReg = X86::RAX; 684 break; 685 case 6: 686 NopSize = 6; 687 Opc = X86::NOOPW; 688 Displacement = 8; 689 IndexReg = X86::RAX; 690 break; 691 case 7: 692 NopSize = 7; 693 Opc = X86::NOOPL; 694 Displacement = 512; 695 break; 696 case 8: 697 NopSize = 8; 698 Opc = X86::NOOPL; 699 Displacement = 512; 700 IndexReg = X86::RAX; 701 break; 702 case 9: 703 NopSize = 9; 704 Opc = X86::NOOPW; 705 Displacement = 512; 706 IndexReg = X86::RAX; 707 break; 708 default: 709 NopSize = 10; 710 Opc = X86::NOOPW; 711 Displacement = 512; 712 IndexReg = X86::RAX; 713 SegmentReg = X86::CS; 714 break; 715 } 716 717 unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U); 718 NopSize += NumPrefixes; 719 for (unsigned i = 0; i != NumPrefixes; ++i) 720 OS.emitBytes("\x66"); 721 722 switch (Opc) { 723 default: llvm_unreachable("Unexpected opcode"); 724 case X86::NOOP: 725 OS.emitInstruction(MCInstBuilder(Opc), *Subtarget); 726 break; 727 case X86::XCHG16ar: 728 OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), 729 *Subtarget); 730 break; 731 case X86::NOOPL: 732 case X86::NOOPW: 733 OS.emitInstruction(MCInstBuilder(Opc) 734 .addReg(BaseReg) 735 .addImm(ScaleVal) 736 .addReg(IndexReg) 737 .addImm(Displacement) 738 .addReg(SegmentReg), 739 *Subtarget); 740 break; 741 } 742 assert(NopSize <= NumBytes && "We overemitted?"); 743 return NopSize; 744 } 745 746 /// Emit the optimal amount of multi-byte nops on X86. 747 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, 748 const X86Subtarget *Subtarget) { 749 unsigned NopsToEmit = NumBytes; 750 (void)NopsToEmit; 751 while (NumBytes) { 752 NumBytes -= emitNop(OS, NumBytes, Subtarget); 753 assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!"); 754 } 755 } 756 757 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, 758 X86MCInstLower &MCIL) { 759 assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); 760 761 NoAutoPaddingScope NoPadScope(*OutStreamer); 762 763 StatepointOpers SOpers(&MI); 764 if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { 765 emitX86Nops(*OutStreamer, PatchBytes, Subtarget); 766 } else { 767 // Lower call target and choose correct opcode 768 const MachineOperand &CallTarget = SOpers.getCallTarget(); 769 MCOperand CallTargetMCOp; 770 unsigned CallOpcode; 771 switch (CallTarget.getType()) { 772 case MachineOperand::MO_GlobalAddress: 773 case MachineOperand::MO_ExternalSymbol: 774 CallTargetMCOp = MCIL.LowerSymbolOperand( 775 CallTarget, MCIL.GetSymbolFromOperand(CallTarget)); 776 CallOpcode = X86::CALL64pcrel32; 777 // Currently, we only support relative addressing with statepoints. 778 // Otherwise, we'll need a scratch register to hold the target 779 // address. You'll fail asserts during load & relocation if this 780 // symbol is to far away. (TODO: support non-relative addressing) 781 break; 782 case MachineOperand::MO_Immediate: 783 CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); 784 CallOpcode = X86::CALL64pcrel32; 785 // Currently, we only support relative addressing with statepoints. 786 // Otherwise, we'll need a scratch register to hold the target 787 // immediate. You'll fail asserts during load & relocation if this 788 // address is to far away. (TODO: support non-relative addressing) 789 break; 790 case MachineOperand::MO_Register: 791 // FIXME: Add retpoline support and remove this. 792 if (Subtarget->useIndirectThunkCalls()) 793 report_fatal_error("Lowering register statepoints with thunks not " 794 "yet implemented."); 795 CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); 796 CallOpcode = X86::CALL64r; 797 break; 798 default: 799 llvm_unreachable("Unsupported operand type in statepoint call target"); 800 break; 801 } 802 803 // Emit call 804 MCInst CallInst; 805 CallInst.setOpcode(CallOpcode); 806 CallInst.addOperand(CallTargetMCOp); 807 OutStreamer->emitInstruction(CallInst, getSubtargetInfo()); 808 } 809 810 // Record our statepoint node in the same section used by STACKMAP 811 // and PATCHPOINT 812 auto &Ctx = OutStreamer->getContext(); 813 MCSymbol *MILabel = Ctx.createTempSymbol(); 814 OutStreamer->emitLabel(MILabel); 815 SM.recordStatepoint(*MILabel, MI); 816 } 817 818 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, 819 X86MCInstLower &MCIL) { 820 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, 821 // <opcode>, <operands> 822 823 NoAutoPaddingScope NoPadScope(*OutStreamer); 824 825 Register DefRegister = FaultingMI.getOperand(0).getReg(); 826 FaultMaps::FaultKind FK = 827 static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm()); 828 MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol(); 829 unsigned Opcode = FaultingMI.getOperand(3).getImm(); 830 unsigned OperandsBeginIdx = 4; 831 832 auto &Ctx = OutStreamer->getContext(); 833 MCSymbol *FaultingLabel = Ctx.createTempSymbol(); 834 OutStreamer->emitLabel(FaultingLabel); 835 836 assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!"); 837 FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel); 838 839 MCInst MI; 840 MI.setOpcode(Opcode); 841 842 if (DefRegister != X86::NoRegister) 843 MI.addOperand(MCOperand::createReg(DefRegister)); 844 845 for (const MachineOperand &MO : 846 llvm::drop_begin(FaultingMI.operands(), OperandsBeginIdx)) 847 if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, MO)) 848 MI.addOperand(*MaybeOperand); 849 850 OutStreamer->AddComment("on-fault: " + HandlerLabel->getName()); 851 OutStreamer->emitInstruction(MI, getSubtargetInfo()); 852 } 853 854 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, 855 X86MCInstLower &MCIL) { 856 bool Is64Bits = Subtarget->is64Bit(); 857 MCContext &Ctx = OutStreamer->getContext(); 858 MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__"); 859 const MCSymbolRefExpr *Op = 860 MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx); 861 862 EmitAndCountInstruction( 863 MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) 864 .addExpr(Op)); 865 } 866 867 void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { 868 assert(std::next(MI.getIterator())->isCall() && 869 "KCFI_CHECK not followed by a call instruction"); 870 871 // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop() 872 // returns a 1-byte X86::NOOP, which means the offset is the same in 873 // bytes. This assumes that patchable-function-prefix is the same for all 874 // functions. 875 const MachineFunction &MF = *MI.getMF(); 876 int64_t PrefixNops = 0; 877 (void)MF.getFunction() 878 .getFnAttribute("patchable-function-prefix") 879 .getValueAsString() 880 .getAsInteger(10, PrefixNops); 881 882 // KCFI allows indirect calls to any location that's preceded by a valid 883 // type identifier. To avoid encoding the full constant into an instruction, 884 // and thus emitting potential call target gadgets at each indirect call 885 // site, load a negated constant to a register and compare that to the 886 // expected value at the call target. 887 const Register AddrReg = MI.getOperand(0).getReg(); 888 const uint32_t Type = MI.getOperand(1).getImm(); 889 // The check is immediately before the call. If the call target is in R10, 890 // we can clobber R11 for the check instead. 891 unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D; 892 EmitAndCountInstruction( 893 MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type))); 894 EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm) 895 .addReg(X86::NoRegister) 896 .addReg(TempReg) 897 .addReg(AddrReg) 898 .addImm(1) 899 .addReg(X86::NoRegister) 900 .addImm(-(PrefixNops + 4)) 901 .addReg(X86::NoRegister)); 902 903 MCSymbol *Pass = OutContext.createTempSymbol(); 904 EmitAndCountInstruction( 905 MCInstBuilder(X86::JCC_1) 906 .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) 907 .addImm(X86::COND_E)); 908 909 MCSymbol *Trap = OutContext.createTempSymbol(); 910 OutStreamer->emitLabel(Trap); 911 EmitAndCountInstruction(MCInstBuilder(X86::TRAP)); 912 emitKCFITrapEntry(MF, Trap); 913 OutStreamer->emitLabel(Pass); 914 } 915 916 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { 917 // FIXME: Make this work on non-ELF. 918 if (!TM.getTargetTriple().isOSBinFormatELF()) { 919 report_fatal_error("llvm.asan.check.memaccess only supported on ELF"); 920 return; 921 } 922 923 const auto &Reg = MI.getOperand(0).getReg(); 924 ASanAccessInfo AccessInfo(MI.getOperand(1).getImm()); 925 926 uint64_t ShadowBase; 927 int MappingScale; 928 bool OrShadowOffset; 929 getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64, 930 AccessInfo.CompileKernel, &ShadowBase, 931 &MappingScale, &OrShadowOffset); 932 933 StringRef Name = AccessInfo.IsWrite ? "store" : "load"; 934 StringRef Op = OrShadowOffset ? "or" : "add"; 935 std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" + 936 Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" + 937 TM.getMCRegisterInfo()->getName(Reg.asMCReg())) 938 .str(); 939 if (OrShadowOffset) 940 report_fatal_error( 941 "OrShadowOffset is not supported with optimized callbacks"); 942 943 EmitAndCountInstruction( 944 MCInstBuilder(X86::CALL64pcrel32) 945 .addExpr(MCSymbolRefExpr::create( 946 OutContext.getOrCreateSymbol(SymName), OutContext))); 947 } 948 949 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, 950 X86MCInstLower &MCIL) { 951 // PATCHABLE_OP minsize, opcode, operands 952 953 NoAutoPaddingScope NoPadScope(*OutStreamer); 954 955 unsigned MinSize = MI.getOperand(0).getImm(); 956 unsigned Opcode = MI.getOperand(1).getImm(); 957 // Opcode PATCHABLE_OP is a special case: there is no instruction to wrap, 958 // simply emit a nop of size MinSize. 959 bool EmptyInst = (Opcode == TargetOpcode::PATCHABLE_OP); 960 961 MCInst MCI; 962 MCI.setOpcode(Opcode); 963 for (auto &MO : drop_begin(MI.operands(), 2)) 964 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) 965 MCI.addOperand(*MaybeOperand); 966 967 SmallString<256> Code; 968 if (!EmptyInst) { 969 SmallVector<MCFixup, 4> Fixups; 970 CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo()); 971 } 972 973 if (Code.size() < MinSize) { 974 if (MinSize == 2 && Subtarget->is32Bit() && 975 Subtarget->isTargetWindowsMSVC() && 976 (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) { 977 // For compatibility reasons, when targetting MSVC, it is important to 978 // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools 979 // rely specifically on this pattern to be able to patch a function. 980 // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. 981 OutStreamer->emitInstruction( 982 MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI), 983 *Subtarget); 984 } else if (MinSize == 2 && Opcode == X86::PUSH64r) { 985 // This is an optimization that lets us get away without emitting a nop in 986 // many cases. 987 // 988 // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two 989 // bytes too, so the check on MinSize is important. 990 MCI.setOpcode(X86::PUSH64rmr); 991 } else { 992 unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget); 993 assert(NopSize == MinSize && "Could not implement MinSize!"); 994 (void)NopSize; 995 } 996 } 997 if (!EmptyInst) 998 OutStreamer->emitInstruction(MCI, getSubtargetInfo()); 999 } 1000 1001 // Lower a stackmap of the form: 1002 // <id>, <shadowBytes>, ... 1003 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { 1004 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 1005 1006 auto &Ctx = OutStreamer->getContext(); 1007 MCSymbol *MILabel = Ctx.createTempSymbol(); 1008 OutStreamer->emitLabel(MILabel); 1009 1010 SM.recordStackMap(*MILabel, MI); 1011 unsigned NumShadowBytes = MI.getOperand(1).getImm(); 1012 SMShadowTracker.reset(NumShadowBytes); 1013 } 1014 1015 // Lower a patchpoint of the form: 1016 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... 1017 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, 1018 X86MCInstLower &MCIL) { 1019 assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64"); 1020 1021 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 1022 1023 NoAutoPaddingScope NoPadScope(*OutStreamer); 1024 1025 auto &Ctx = OutStreamer->getContext(); 1026 MCSymbol *MILabel = Ctx.createTempSymbol(); 1027 OutStreamer->emitLabel(MILabel); 1028 SM.recordPatchPoint(*MILabel, MI); 1029 1030 PatchPointOpers opers(&MI); 1031 unsigned ScratchIdx = opers.getNextScratchIdx(); 1032 unsigned EncodedBytes = 0; 1033 const MachineOperand &CalleeMO = opers.getCallTarget(); 1034 1035 // Check for null target. If target is non-null (i.e. is non-zero or is 1036 // symbolic) then emit a call. 1037 if (!(CalleeMO.isImm() && !CalleeMO.getImm())) { 1038 MCOperand CalleeMCOp; 1039 switch (CalleeMO.getType()) { 1040 default: 1041 /// FIXME: Add a verifier check for bad callee types. 1042 llvm_unreachable("Unrecognized callee operand type."); 1043 case MachineOperand::MO_Immediate: 1044 if (CalleeMO.getImm()) 1045 CalleeMCOp = MCOperand::createImm(CalleeMO.getImm()); 1046 break; 1047 case MachineOperand::MO_ExternalSymbol: 1048 case MachineOperand::MO_GlobalAddress: 1049 CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO, 1050 MCIL.GetSymbolFromOperand(CalleeMO)); 1051 break; 1052 } 1053 1054 // Emit MOV to materialize the target address and the CALL to target. 1055 // This is encoded with 12-13 bytes, depending on which register is used. 1056 Register ScratchReg = MI.getOperand(ScratchIdx).getReg(); 1057 if (X86II::isX86_64ExtendedReg(ScratchReg)) 1058 EncodedBytes = 13; 1059 else 1060 EncodedBytes = 12; 1061 1062 EmitAndCountInstruction( 1063 MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); 1064 // FIXME: Add retpoline support and remove this. 1065 if (Subtarget->useIndirectThunkCalls()) 1066 report_fatal_error( 1067 "Lowering patchpoint with thunks not yet implemented."); 1068 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); 1069 } 1070 1071 // Emit padding. 1072 unsigned NumBytes = opers.getNumPatchBytes(); 1073 assert(NumBytes >= EncodedBytes && 1074 "Patchpoint can't request size less than the length of a call."); 1075 1076 emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget); 1077 } 1078 1079 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, 1080 X86MCInstLower &MCIL) { 1081 assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); 1082 1083 NoAutoPaddingScope NoPadScope(*OutStreamer); 1084 1085 // We want to emit the following pattern, which follows the x86 calling 1086 // convention to prepare for the trampoline call to be patched in. 1087 // 1088 // .p2align 1, ... 1089 // .Lxray_event_sled_N: 1090 // jmp +N // jump across the instrumentation sled 1091 // ... // set up arguments in register 1092 // callq __xray_CustomEvent@plt // force dependency to symbol 1093 // ... 1094 // <jump here> 1095 // 1096 // After patching, it would look something like: 1097 // 1098 // nopw (2-byte nop) 1099 // ... 1100 // callq __xrayCustomEvent // already lowered 1101 // ... 1102 // 1103 // --- 1104 // First we emit the label and the jump. 1105 auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); 1106 OutStreamer->AddComment("# XRay Custom Event Log"); 1107 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1108 OutStreamer->emitLabel(CurSled); 1109 1110 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1111 // an operand (computed as an offset from the jmp instruction). 1112 // FIXME: Find another less hacky way do force the relative jump. 1113 OutStreamer->emitBinaryData("\xeb\x0f"); 1114 1115 // The default C calling convention will place two arguments into %rcx and 1116 // %rdx -- so we only work with those. 1117 const Register DestRegs[] = {X86::RDI, X86::RSI}; 1118 bool UsedMask[] = {false, false}; 1119 // Filled out in loop. 1120 Register SrcRegs[] = {0, 0}; 1121 1122 // Then we put the operands in the %rdi and %rsi registers. We spill the 1123 // values in the register before we clobber them, and mark them as used in 1124 // UsedMask. In case the arguments are already in the correct register, we use 1125 // emit nops appropriately sized to keep the sled the same size in every 1126 // situation. 1127 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1128 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { 1129 assert(Op->isReg() && "Only support arguments in registers"); 1130 SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64); 1131 assert(SrcRegs[I].isValid() && "Invalid operand"); 1132 if (SrcRegs[I] != DestRegs[I]) { 1133 UsedMask[I] = true; 1134 EmitAndCountInstruction( 1135 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); 1136 } else { 1137 emitX86Nops(*OutStreamer, 4, Subtarget); 1138 } 1139 } 1140 1141 // Now that the register values are stashed, mov arguments into place. 1142 // FIXME: This doesn't work if one of the later SrcRegs is equal to an 1143 // earlier DestReg. We will have already overwritten over the register before 1144 // we can copy from it. 1145 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1146 if (SrcRegs[I] != DestRegs[I]) 1147 EmitAndCountInstruction( 1148 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); 1149 1150 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the 1151 // name of the trampoline to be implemented by the XRay runtime. 1152 auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); 1153 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); 1154 if (isPositionIndependent()) 1155 TOp.setTargetFlags(X86II::MO_PLT); 1156 1157 // Emit the call instruction. 1158 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) 1159 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); 1160 1161 // Restore caller-saved and used registers. 1162 for (unsigned I = sizeof UsedMask; I-- > 0;) 1163 if (UsedMask[I]) 1164 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); 1165 else 1166 emitX86Nops(*OutStreamer, 1, Subtarget); 1167 1168 OutStreamer->AddComment("xray custom event end."); 1169 1170 // Record the sled version. Version 0 of this sled was spelled differently, so 1171 // we let the runtime handle the different offsets we're using. Version 2 1172 // changed the absolute address to a PC-relative address. 1173 recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2); 1174 } 1175 1176 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, 1177 X86MCInstLower &MCIL) { 1178 assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64"); 1179 1180 NoAutoPaddingScope NoPadScope(*OutStreamer); 1181 1182 // We want to emit the following pattern, which follows the x86 calling 1183 // convention to prepare for the trampoline call to be patched in. 1184 // 1185 // .p2align 1, ... 1186 // .Lxray_event_sled_N: 1187 // jmp +N // jump across the instrumentation sled 1188 // ... // set up arguments in register 1189 // callq __xray_TypedEvent@plt // force dependency to symbol 1190 // ... 1191 // <jump here> 1192 // 1193 // After patching, it would look something like: 1194 // 1195 // nopw (2-byte nop) 1196 // ... 1197 // callq __xrayTypedEvent // already lowered 1198 // ... 1199 // 1200 // --- 1201 // First we emit the label and the jump. 1202 auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true); 1203 OutStreamer->AddComment("# XRay Typed Event Log"); 1204 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1205 OutStreamer->emitLabel(CurSled); 1206 1207 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1208 // an operand (computed as an offset from the jmp instruction). 1209 // FIXME: Find another less hacky way do force the relative jump. 1210 OutStreamer->emitBinaryData("\xeb\x14"); 1211 1212 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, 1213 // so we'll work with those. Or we may be called via SystemV, in which case 1214 // we don't have to do any translation. 1215 const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; 1216 bool UsedMask[] = {false, false, false}; 1217 1218 // Will fill out src regs in the loop. 1219 Register SrcRegs[] = {0, 0, 0}; 1220 1221 // Then we put the operands in the SystemV registers. We spill the values in 1222 // the registers before we clobber them, and mark them as used in UsedMask. 1223 // In case the arguments are already in the correct register, we emit nops 1224 // appropriately sized to keep the sled the same size in every situation. 1225 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1226 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { 1227 // TODO: Is register only support adequate? 1228 assert(Op->isReg() && "Only supports arguments in registers"); 1229 SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64); 1230 assert(SrcRegs[I].isValid() && "Invalid operand"); 1231 if (SrcRegs[I] != DestRegs[I]) { 1232 UsedMask[I] = true; 1233 EmitAndCountInstruction( 1234 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); 1235 } else { 1236 emitX86Nops(*OutStreamer, 4, Subtarget); 1237 } 1238 } 1239 1240 // In the above loop we only stash all of the destination registers or emit 1241 // nops if the arguments are already in the right place. Doing the actually 1242 // moving is postponed until after all the registers are stashed so nothing 1243 // is clobbers. We've already added nops to account for the size of mov and 1244 // push if the register is in the right place, so we only have to worry about 1245 // emitting movs. 1246 // FIXME: This doesn't work if one of the later SrcRegs is equal to an 1247 // earlier DestReg. We will have already overwritten over the register before 1248 // we can copy from it. 1249 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1250 if (UsedMask[I]) 1251 EmitAndCountInstruction( 1252 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); 1253 1254 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the 1255 // name of the trampoline to be implemented by the XRay runtime. 1256 auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent"); 1257 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); 1258 if (isPositionIndependent()) 1259 TOp.setTargetFlags(X86II::MO_PLT); 1260 1261 // Emit the call instruction. 1262 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) 1263 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); 1264 1265 // Restore caller-saved and used registers. 1266 for (unsigned I = sizeof UsedMask; I-- > 0;) 1267 if (UsedMask[I]) 1268 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); 1269 else 1270 emitX86Nops(*OutStreamer, 1, Subtarget); 1271 1272 OutStreamer->AddComment("xray typed event end."); 1273 1274 // Record the sled version. 1275 recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2); 1276 } 1277 1278 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, 1279 X86MCInstLower &MCIL) { 1280 1281 NoAutoPaddingScope NoPadScope(*OutStreamer); 1282 1283 const Function &F = MF->getFunction(); 1284 if (F.hasFnAttribute("patchable-function-entry")) { 1285 unsigned Num; 1286 if (F.getFnAttribute("patchable-function-entry") 1287 .getValueAsString() 1288 .getAsInteger(10, Num)) 1289 return; 1290 emitX86Nops(*OutStreamer, Num, Subtarget); 1291 return; 1292 } 1293 // We want to emit the following pattern: 1294 // 1295 // .p2align 1, ... 1296 // .Lxray_sled_N: 1297 // jmp .tmpN 1298 // # 9 bytes worth of noops 1299 // 1300 // We need the 9 bytes because at runtime, we'd be patching over the full 11 1301 // bytes with the following pattern: 1302 // 1303 // mov %r10, <function id, 32-bit> // 6 bytes 1304 // call <relative offset, 32-bits> // 5 bytes 1305 // 1306 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1307 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1308 OutStreamer->emitLabel(CurSled); 1309 1310 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1311 // an operand (computed as an offset from the jmp instruction). 1312 // FIXME: Find another less hacky way do force the relative jump. 1313 OutStreamer->emitBytes("\xeb\x09"); 1314 emitX86Nops(*OutStreamer, 9, Subtarget); 1315 recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2); 1316 } 1317 1318 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, 1319 X86MCInstLower &MCIL) { 1320 NoAutoPaddingScope NoPadScope(*OutStreamer); 1321 1322 // Since PATCHABLE_RET takes the opcode of the return statement as an 1323 // argument, we use that to emit the correct form of the RET that we want. 1324 // i.e. when we see this: 1325 // 1326 // PATCHABLE_RET X86::RET ... 1327 // 1328 // We should emit the RET followed by sleds. 1329 // 1330 // .p2align 1, ... 1331 // .Lxray_sled_N: 1332 // ret # or equivalent instruction 1333 // # 10 bytes worth of noops 1334 // 1335 // This just makes sure that the alignment for the next instruction is 2. 1336 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1337 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1338 OutStreamer->emitLabel(CurSled); 1339 unsigned OpCode = MI.getOperand(0).getImm(); 1340 MCInst Ret; 1341 Ret.setOpcode(OpCode); 1342 for (auto &MO : drop_begin(MI.operands())) 1343 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) 1344 Ret.addOperand(*MaybeOperand); 1345 OutStreamer->emitInstruction(Ret, getSubtargetInfo()); 1346 emitX86Nops(*OutStreamer, 10, Subtarget); 1347 recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2); 1348 } 1349 1350 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, 1351 X86MCInstLower &MCIL) { 1352 NoAutoPaddingScope NoPadScope(*OutStreamer); 1353 1354 // Like PATCHABLE_RET, we have the actual instruction in the operands to this 1355 // instruction so we lower that particular instruction and its operands. 1356 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how 1357 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to 1358 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual 1359 // tail call much like how we have it in PATCHABLE_RET. 1360 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1361 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1362 OutStreamer->emitLabel(CurSled); 1363 auto Target = OutContext.createTempSymbol(); 1364 1365 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1366 // an operand (computed as an offset from the jmp instruction). 1367 // FIXME: Find another less hacky way do force the relative jump. 1368 OutStreamer->emitBytes("\xeb\x09"); 1369 emitX86Nops(*OutStreamer, 9, Subtarget); 1370 OutStreamer->emitLabel(Target); 1371 recordSled(CurSled, MI, SledKind::TAIL_CALL, 2); 1372 1373 unsigned OpCode = MI.getOperand(0).getImm(); 1374 OpCode = convertTailJumpOpcode(OpCode); 1375 MCInst TC; 1376 TC.setOpcode(OpCode); 1377 1378 // Before emitting the instruction, add a comment to indicate that this is 1379 // indeed a tail call. 1380 OutStreamer->AddComment("TAILCALL"); 1381 for (auto &MO : drop_begin(MI.operands())) 1382 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) 1383 TC.addOperand(*MaybeOperand); 1384 OutStreamer->emitInstruction(TC, getSubtargetInfo()); 1385 } 1386 1387 // Returns instruction preceding MBBI in MachineFunction. 1388 // If MBBI is the first instruction of the first basic block, returns null. 1389 static MachineBasicBlock::const_iterator 1390 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { 1391 const MachineBasicBlock *MBB = MBBI->getParent(); 1392 while (MBBI == MBB->begin()) { 1393 if (MBB == &MBB->getParent()->front()) 1394 return MachineBasicBlock::const_iterator(); 1395 MBB = MBB->getPrevNode(); 1396 MBBI = MBB->end(); 1397 } 1398 --MBBI; 1399 return MBBI; 1400 } 1401 1402 static const Constant *getConstantFromPool(const MachineInstr &MI, 1403 const MachineOperand &Op) { 1404 if (!Op.isCPI() || Op.getOffset() != 0) 1405 return nullptr; 1406 1407 ArrayRef<MachineConstantPoolEntry> Constants = 1408 MI.getParent()->getParent()->getConstantPool()->getConstants(); 1409 const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()]; 1410 1411 // Bail if this is a machine constant pool entry, we won't be able to dig out 1412 // anything useful. 1413 if (ConstantEntry.isMachineConstantPoolEntry()) 1414 return nullptr; 1415 1416 return ConstantEntry.Val.ConstVal; 1417 } 1418 1419 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx, 1420 unsigned SrcOp2Idx, ArrayRef<int> Mask) { 1421 std::string Comment; 1422 1423 // Compute the name for a register. This is really goofy because we have 1424 // multiple instruction printers that could (in theory) use different 1425 // names. Fortunately most people use the ATT style (outside of Windows) 1426 // and they actually agree on register naming here. Ultimately, this is 1427 // a comment, and so its OK if it isn't perfect. 1428 auto GetRegisterName = [](MCRegister Reg) -> StringRef { 1429 return X86ATTInstPrinter::getRegisterName(Reg); 1430 }; 1431 1432 const MachineOperand &DstOp = MI->getOperand(0); 1433 const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx); 1434 const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx); 1435 1436 StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem"; 1437 StringRef Src1Name = 1438 SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem"; 1439 StringRef Src2Name = 1440 SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem"; 1441 1442 // One source operand, fix the mask to print all elements in one span. 1443 SmallVector<int, 8> ShuffleMask(Mask); 1444 if (Src1Name == Src2Name) 1445 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) 1446 if (ShuffleMask[i] >= e) 1447 ShuffleMask[i] -= e; 1448 1449 raw_string_ostream CS(Comment); 1450 CS << DstName; 1451 1452 // Handle AVX512 MASK/MASXZ write mask comments. 1453 // MASK: zmmX {%kY} 1454 // MASKZ: zmmX {%kY} {z} 1455 if (SrcOp1Idx > 1) { 1456 assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask"); 1457 1458 const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1); 1459 if (WriteMaskOp.isReg()) { 1460 CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}"; 1461 1462 if (SrcOp1Idx == 2) { 1463 CS << " {z}"; 1464 } 1465 } 1466 } 1467 1468 CS << " = "; 1469 1470 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { 1471 if (i != 0) 1472 CS << ","; 1473 if (ShuffleMask[i] == SM_SentinelZero) { 1474 CS << "zero"; 1475 continue; 1476 } 1477 1478 // Otherwise, it must come from src1 or src2. Print the span of elements 1479 // that comes from this src. 1480 bool isSrc1 = ShuffleMask[i] < (int)e; 1481 CS << (isSrc1 ? Src1Name : Src2Name) << '['; 1482 1483 bool IsFirst = true; 1484 while (i != e && ShuffleMask[i] != SM_SentinelZero && 1485 (ShuffleMask[i] < (int)e) == isSrc1) { 1486 if (!IsFirst) 1487 CS << ','; 1488 else 1489 IsFirst = false; 1490 if (ShuffleMask[i] == SM_SentinelUndef) 1491 CS << "u"; 1492 else 1493 CS << ShuffleMask[i] % (int)e; 1494 ++i; 1495 } 1496 CS << ']'; 1497 --i; // For loop increments element #. 1498 } 1499 CS.flush(); 1500 1501 return Comment; 1502 } 1503 1504 static void printConstant(const APInt &Val, raw_ostream &CS) { 1505 if (Val.getBitWidth() <= 64) { 1506 CS << Val.getZExtValue(); 1507 } else { 1508 // print multi-word constant as (w0,w1) 1509 CS << "("; 1510 for (int i = 0, N = Val.getNumWords(); i < N; ++i) { 1511 if (i > 0) 1512 CS << ","; 1513 CS << Val.getRawData()[i]; 1514 } 1515 CS << ")"; 1516 } 1517 } 1518 1519 static void printConstant(const APFloat &Flt, raw_ostream &CS) { 1520 SmallString<32> Str; 1521 // Force scientific notation to distinquish from integers. 1522 Flt.toString(Str, 0, 0); 1523 CS << Str; 1524 } 1525 1526 static void printConstant(const Constant *COp, unsigned BitWidth, 1527 raw_ostream &CS) { 1528 if (isa<UndefValue>(COp)) { 1529 CS << "u"; 1530 } else if (auto *CI = dyn_cast<ConstantInt>(COp)) { 1531 printConstant(CI->getValue(), CS); 1532 } else if (auto *CF = dyn_cast<ConstantFP>(COp)) { 1533 printConstant(CF->getValueAPF(), CS); 1534 } else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) { 1535 Type *EltTy = CDS->getElementType(); 1536 bool IsInteger = EltTy->isIntegerTy(); 1537 bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); 1538 unsigned EltBits = EltTy->getPrimitiveSizeInBits(); 1539 unsigned E = std::min(BitWidth / EltBits, CDS->getNumElements()); 1540 assert((BitWidth % EltBits) == 0 && "Broadcast element size mismatch"); 1541 for (unsigned I = 0; I != E; ++I) { 1542 if (I != 0) 1543 CS << ","; 1544 if (IsInteger) 1545 printConstant(CDS->getElementAsAPInt(I), CS); 1546 else if (IsFP) 1547 printConstant(CDS->getElementAsAPFloat(I), CS); 1548 else 1549 CS << "?"; 1550 } 1551 } else { 1552 CS << "?"; 1553 } 1554 } 1555 1556 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { 1557 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); 1558 assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) && 1559 "SEH_ instruction Windows and UEFI only"); 1560 1561 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. 1562 if (EmitFPOData) { 1563 X86TargetStreamer *XTS = 1564 static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()); 1565 switch (MI->getOpcode()) { 1566 case X86::SEH_PushReg: 1567 XTS->emitFPOPushReg(MI->getOperand(0).getImm()); 1568 break; 1569 case X86::SEH_StackAlloc: 1570 XTS->emitFPOStackAlloc(MI->getOperand(0).getImm()); 1571 break; 1572 case X86::SEH_StackAlign: 1573 XTS->emitFPOStackAlign(MI->getOperand(0).getImm()); 1574 break; 1575 case X86::SEH_SetFrame: 1576 assert(MI->getOperand(1).getImm() == 0 && 1577 ".cv_fpo_setframe takes no offset"); 1578 XTS->emitFPOSetFrame(MI->getOperand(0).getImm()); 1579 break; 1580 case X86::SEH_EndPrologue: 1581 XTS->emitFPOEndPrologue(); 1582 break; 1583 case X86::SEH_SaveReg: 1584 case X86::SEH_SaveXMM: 1585 case X86::SEH_PushFrame: 1586 llvm_unreachable("SEH_ directive incompatible with FPO"); 1587 break; 1588 default: 1589 llvm_unreachable("expected SEH_ instruction"); 1590 } 1591 return; 1592 } 1593 1594 // Otherwise, use the .seh_ directives for all other Windows platforms. 1595 switch (MI->getOpcode()) { 1596 case X86::SEH_PushReg: 1597 OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm()); 1598 break; 1599 1600 case X86::SEH_SaveReg: 1601 OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(), 1602 MI->getOperand(1).getImm()); 1603 break; 1604 1605 case X86::SEH_SaveXMM: 1606 OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(), 1607 MI->getOperand(1).getImm()); 1608 break; 1609 1610 case X86::SEH_StackAlloc: 1611 OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm()); 1612 break; 1613 1614 case X86::SEH_SetFrame: 1615 OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(), 1616 MI->getOperand(1).getImm()); 1617 break; 1618 1619 case X86::SEH_PushFrame: 1620 OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm()); 1621 break; 1622 1623 case X86::SEH_EndPrologue: 1624 OutStreamer->emitWinCFIEndProlog(); 1625 break; 1626 1627 default: 1628 llvm_unreachable("expected SEH_ instruction"); 1629 } 1630 } 1631 1632 static unsigned getRegisterWidth(const MCOperandInfo &Info) { 1633 if (Info.RegClass == X86::VR128RegClassID || 1634 Info.RegClass == X86::VR128XRegClassID) 1635 return 128; 1636 if (Info.RegClass == X86::VR256RegClassID || 1637 Info.RegClass == X86::VR256XRegClassID) 1638 return 256; 1639 if (Info.RegClass == X86::VR512RegClassID) 1640 return 512; 1641 llvm_unreachable("Unknown register class!"); 1642 } 1643 1644 static void addConstantComments(const MachineInstr *MI, 1645 MCStreamer &OutStreamer) { 1646 switch (MI->getOpcode()) { 1647 // Lower PSHUFB and VPERMILP normally but add a comment if we can find 1648 // a constant shuffle mask. We won't be able to do this at the MC layer 1649 // because the mask isn't an immediate. 1650 case X86::PSHUFBrm: 1651 case X86::VPSHUFBrm: 1652 case X86::VPSHUFBYrm: 1653 case X86::VPSHUFBZ128rm: 1654 case X86::VPSHUFBZ128rmk: 1655 case X86::VPSHUFBZ128rmkz: 1656 case X86::VPSHUFBZ256rm: 1657 case X86::VPSHUFBZ256rmk: 1658 case X86::VPSHUFBZ256rmkz: 1659 case X86::VPSHUFBZrm: 1660 case X86::VPSHUFBZrmk: 1661 case X86::VPSHUFBZrmkz: { 1662 unsigned SrcIdx = 1; 1663 if (X86II::isKMasked(MI->getDesc().TSFlags)) { 1664 // Skip mask operand. 1665 ++SrcIdx; 1666 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { 1667 // Skip passthru operand. 1668 ++SrcIdx; 1669 } 1670 } 1671 unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp; 1672 1673 assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) && 1674 "Unexpected number of operands!"); 1675 1676 const MachineOperand &MaskOp = MI->getOperand(MaskIdx); 1677 if (auto *C = getConstantFromPool(*MI, MaskOp)) { 1678 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1679 SmallVector<int, 64> Mask; 1680 DecodePSHUFBMask(C, Width, Mask); 1681 if (!Mask.empty()) 1682 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1683 } 1684 break; 1685 } 1686 1687 case X86::VPERMILPSrm: 1688 case X86::VPERMILPSYrm: 1689 case X86::VPERMILPSZ128rm: 1690 case X86::VPERMILPSZ128rmk: 1691 case X86::VPERMILPSZ128rmkz: 1692 case X86::VPERMILPSZ256rm: 1693 case X86::VPERMILPSZ256rmk: 1694 case X86::VPERMILPSZ256rmkz: 1695 case X86::VPERMILPSZrm: 1696 case X86::VPERMILPSZrmk: 1697 case X86::VPERMILPSZrmkz: 1698 case X86::VPERMILPDrm: 1699 case X86::VPERMILPDYrm: 1700 case X86::VPERMILPDZ128rm: 1701 case X86::VPERMILPDZ128rmk: 1702 case X86::VPERMILPDZ128rmkz: 1703 case X86::VPERMILPDZ256rm: 1704 case X86::VPERMILPDZ256rmk: 1705 case X86::VPERMILPDZ256rmkz: 1706 case X86::VPERMILPDZrm: 1707 case X86::VPERMILPDZrmk: 1708 case X86::VPERMILPDZrmkz: { 1709 unsigned ElSize; 1710 switch (MI->getOpcode()) { 1711 default: llvm_unreachable("Invalid opcode"); 1712 case X86::VPERMILPSrm: 1713 case X86::VPERMILPSYrm: 1714 case X86::VPERMILPSZ128rm: 1715 case X86::VPERMILPSZ256rm: 1716 case X86::VPERMILPSZrm: 1717 case X86::VPERMILPSZ128rmkz: 1718 case X86::VPERMILPSZ256rmkz: 1719 case X86::VPERMILPSZrmkz: 1720 case X86::VPERMILPSZ128rmk: 1721 case X86::VPERMILPSZ256rmk: 1722 case X86::VPERMILPSZrmk: 1723 ElSize = 32; 1724 break; 1725 case X86::VPERMILPDrm: 1726 case X86::VPERMILPDYrm: 1727 case X86::VPERMILPDZ128rm: 1728 case X86::VPERMILPDZ256rm: 1729 case X86::VPERMILPDZrm: 1730 case X86::VPERMILPDZ128rmkz: 1731 case X86::VPERMILPDZ256rmkz: 1732 case X86::VPERMILPDZrmkz: 1733 case X86::VPERMILPDZ128rmk: 1734 case X86::VPERMILPDZ256rmk: 1735 case X86::VPERMILPDZrmk: 1736 ElSize = 64; 1737 break; 1738 } 1739 1740 unsigned SrcIdx = 1; 1741 if (X86II::isKMasked(MI->getDesc().TSFlags)) { 1742 // Skip mask operand. 1743 ++SrcIdx; 1744 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { 1745 // Skip passthru operand. 1746 ++SrcIdx; 1747 } 1748 } 1749 unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp; 1750 1751 assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) && 1752 "Unexpected number of operands!"); 1753 1754 const MachineOperand &MaskOp = MI->getOperand(MaskIdx); 1755 if (auto *C = getConstantFromPool(*MI, MaskOp)) { 1756 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1757 SmallVector<int, 16> Mask; 1758 DecodeVPERMILPMask(C, ElSize, Width, Mask); 1759 if (!Mask.empty()) 1760 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1761 } 1762 break; 1763 } 1764 1765 case X86::VPERMIL2PDrm: 1766 case X86::VPERMIL2PSrm: 1767 case X86::VPERMIL2PDYrm: 1768 case X86::VPERMIL2PSYrm: { 1769 assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && 1770 "Unexpected number of operands!"); 1771 1772 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); 1773 if (!CtrlOp.isImm()) 1774 break; 1775 1776 unsigned ElSize; 1777 switch (MI->getOpcode()) { 1778 default: llvm_unreachable("Invalid opcode"); 1779 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break; 1780 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; 1781 } 1782 1783 const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp); 1784 if (auto *C = getConstantFromPool(*MI, MaskOp)) { 1785 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1786 SmallVector<int, 16> Mask; 1787 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask); 1788 if (!Mask.empty()) 1789 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); 1790 } 1791 break; 1792 } 1793 1794 case X86::VPPERMrrm: { 1795 assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) && 1796 "Unexpected number of operands!"); 1797 1798 const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp); 1799 if (auto *C = getConstantFromPool(*MI, MaskOp)) { 1800 unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]); 1801 SmallVector<int, 16> Mask; 1802 DecodeVPPERMMask(C, Width, Mask); 1803 if (!Mask.empty()) 1804 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); 1805 } 1806 break; 1807 } 1808 1809 case X86::MMX_MOVQ64rm: { 1810 assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) && 1811 "Unexpected number of operands!"); 1812 if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) { 1813 std::string Comment; 1814 raw_string_ostream CS(Comment); 1815 const MachineOperand &DstOp = MI->getOperand(0); 1816 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1817 if (auto *CF = dyn_cast<ConstantFP>(C)) { 1818 CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false); 1819 OutStreamer.AddComment(CS.str()); 1820 } 1821 } 1822 break; 1823 } 1824 1825 #define MOV_CASE(Prefix, Suffix) \ 1826 case X86::Prefix##MOVAPD##Suffix##rm: \ 1827 case X86::Prefix##MOVAPS##Suffix##rm: \ 1828 case X86::Prefix##MOVUPD##Suffix##rm: \ 1829 case X86::Prefix##MOVUPS##Suffix##rm: \ 1830 case X86::Prefix##MOVDQA##Suffix##rm: \ 1831 case X86::Prefix##MOVDQU##Suffix##rm: 1832 1833 #define MOV_AVX512_CASE(Suffix) \ 1834 case X86::VMOVDQA64##Suffix##rm: \ 1835 case X86::VMOVDQA32##Suffix##rm: \ 1836 case X86::VMOVDQU64##Suffix##rm: \ 1837 case X86::VMOVDQU32##Suffix##rm: \ 1838 case X86::VMOVDQU16##Suffix##rm: \ 1839 case X86::VMOVDQU8##Suffix##rm: \ 1840 case X86::VMOVAPS##Suffix##rm: \ 1841 case X86::VMOVAPD##Suffix##rm: \ 1842 case X86::VMOVUPS##Suffix##rm: \ 1843 case X86::VMOVUPD##Suffix##rm: 1844 1845 #define CASE_128_MOV_RM() \ 1846 MOV_CASE(, ) /* SSE */ \ 1847 MOV_CASE(V, ) /* AVX-128 */ \ 1848 MOV_AVX512_CASE(Z128) 1849 1850 #define CASE_256_MOV_RM() \ 1851 MOV_CASE(V, Y) /* AVX-256 */ \ 1852 MOV_AVX512_CASE(Z256) 1853 1854 #define CASE_512_MOV_RM() \ 1855 MOV_AVX512_CASE(Z) 1856 1857 #define CASE_ALL_MOV_RM() \ 1858 MOV_CASE(, ) /* SSE */ \ 1859 MOV_CASE(V, ) /* AVX-128 */ \ 1860 MOV_CASE(V, Y) /* AVX-256 */ \ 1861 MOV_AVX512_CASE(Z) \ 1862 MOV_AVX512_CASE(Z256) \ 1863 MOV_AVX512_CASE(Z128) 1864 1865 // For loads from a constant pool to a vector register, print the constant 1866 // loaded. 1867 CASE_ALL_MOV_RM() 1868 case X86::VBROADCASTF128rm: 1869 case X86::VBROADCASTI128rm: 1870 case X86::VBROADCASTF32X4Z256rm: 1871 case X86::VBROADCASTF32X4rm: 1872 case X86::VBROADCASTF32X8rm: 1873 case X86::VBROADCASTF64X2Z128rm: 1874 case X86::VBROADCASTF64X2rm: 1875 case X86::VBROADCASTF64X4rm: 1876 case X86::VBROADCASTI32X4Z256rm: 1877 case X86::VBROADCASTI32X4rm: 1878 case X86::VBROADCASTI32X8rm: 1879 case X86::VBROADCASTI64X2Z128rm: 1880 case X86::VBROADCASTI64X2rm: 1881 case X86::VBROADCASTI64X4rm: 1882 assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) && 1883 "Unexpected number of operands!"); 1884 if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) { 1885 int NumLanes = 1; 1886 int BitWidth = 128; 1887 int CstEltSize = C->getType()->getScalarSizeInBits(); 1888 1889 // Get destination BitWidth + override NumLanes for the broadcasts. 1890 switch (MI->getOpcode()) { 1891 CASE_128_MOV_RM() NumLanes = 1; BitWidth = 128; break; 1892 CASE_256_MOV_RM() NumLanes = 1; BitWidth = 256; break; 1893 CASE_512_MOV_RM() NumLanes = 1; BitWidth = 512; break; 1894 case X86::VBROADCASTF128rm: NumLanes = 2; BitWidth = 128; break; 1895 case X86::VBROADCASTI128rm: NumLanes = 2; BitWidth = 128; break; 1896 case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; BitWidth = 128; break; 1897 case X86::VBROADCASTF32X4rm: NumLanes = 4; BitWidth = 128; break; 1898 case X86::VBROADCASTF32X8rm: NumLanes = 2; BitWidth = 256; break; 1899 case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; BitWidth = 128; break; 1900 case X86::VBROADCASTF64X2rm: NumLanes = 4; BitWidth = 128; break; 1901 case X86::VBROADCASTF64X4rm: NumLanes = 2; BitWidth = 256; break; 1902 case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; BitWidth = 128; break; 1903 case X86::VBROADCASTI32X4rm: NumLanes = 4; BitWidth = 128; break; 1904 case X86::VBROADCASTI32X8rm: NumLanes = 2; BitWidth = 256; break; 1905 case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; BitWidth = 128; break; 1906 case X86::VBROADCASTI64X2rm: NumLanes = 4; BitWidth = 128; break; 1907 case X86::VBROADCASTI64X4rm: NumLanes = 2; BitWidth = 256; break; 1908 } 1909 1910 std::string Comment; 1911 raw_string_ostream CS(Comment); 1912 const MachineOperand &DstOp = MI->getOperand(0); 1913 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1914 if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) { 1915 int NumElements = CDS->getNumElements(); 1916 if ((BitWidth % CstEltSize) == 0) 1917 NumElements = std::min<int>(NumElements, BitWidth / CstEltSize); 1918 CS << "["; 1919 for (int l = 0; l != NumLanes; ++l) { 1920 for (int i = 0; i < NumElements; ++i) { 1921 if (i != 0 || l != 0) 1922 CS << ","; 1923 if (CDS->getElementType()->isIntegerTy()) 1924 printConstant(CDS->getElementAsAPInt(i), CS); 1925 else if (CDS->getElementType()->isHalfTy() || 1926 CDS->getElementType()->isFloatTy() || 1927 CDS->getElementType()->isDoubleTy()) 1928 printConstant(CDS->getElementAsAPFloat(i), CS); 1929 else 1930 CS << "?"; 1931 } 1932 } 1933 CS << "]"; 1934 OutStreamer.AddComment(CS.str()); 1935 } else if (auto *CV = dyn_cast<ConstantVector>(C)) { 1936 int NumOperands = CV->getNumOperands(); 1937 if ((BitWidth % CstEltSize) == 0) 1938 NumOperands = std::min<int>(NumOperands, BitWidth / CstEltSize); 1939 CS << "<"; 1940 for (int l = 0; l != NumLanes; ++l) { 1941 for (int i = 0; i < NumOperands; ++i) { 1942 if (i != 0 || l != 0) 1943 CS << ","; 1944 printConstant(CV->getOperand(i), 1945 CV->getType()->getPrimitiveSizeInBits(), CS); 1946 } 1947 } 1948 CS << ">"; 1949 OutStreamer.AddComment(CS.str()); 1950 } 1951 } 1952 break; 1953 1954 case X86::MOVDDUPrm: 1955 case X86::VMOVDDUPrm: 1956 case X86::VMOVDDUPZ128rm: 1957 case X86::VBROADCASTSSrm: 1958 case X86::VBROADCASTSSYrm: 1959 case X86::VBROADCASTSSZ128rm: 1960 case X86::VBROADCASTSSZ256rm: 1961 case X86::VBROADCASTSSZrm: 1962 case X86::VBROADCASTSDYrm: 1963 case X86::VBROADCASTSDZ256rm: 1964 case X86::VBROADCASTSDZrm: 1965 case X86::VPBROADCASTBrm: 1966 case X86::VPBROADCASTBYrm: 1967 case X86::VPBROADCASTBZ128rm: 1968 case X86::VPBROADCASTBZ256rm: 1969 case X86::VPBROADCASTBZrm: 1970 case X86::VPBROADCASTDrm: 1971 case X86::VPBROADCASTDYrm: 1972 case X86::VPBROADCASTDZ128rm: 1973 case X86::VPBROADCASTDZ256rm: 1974 case X86::VPBROADCASTDZrm: 1975 case X86::VPBROADCASTQrm: 1976 case X86::VPBROADCASTQYrm: 1977 case X86::VPBROADCASTQZ128rm: 1978 case X86::VPBROADCASTQZ256rm: 1979 case X86::VPBROADCASTQZrm: 1980 case X86::VPBROADCASTWrm: 1981 case X86::VPBROADCASTWYrm: 1982 case X86::VPBROADCASTWZ128rm: 1983 case X86::VPBROADCASTWZ256rm: 1984 case X86::VPBROADCASTWZrm: 1985 assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) && 1986 "Unexpected number of operands!"); 1987 if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) { 1988 int NumElts, EltBits; 1989 switch (MI->getOpcode()) { 1990 default: llvm_unreachable("Invalid opcode"); 1991 case X86::MOVDDUPrm: NumElts = 2; EltBits = 64; break; 1992 case X86::VMOVDDUPrm: NumElts = 2; EltBits = 64; break; 1993 case X86::VMOVDDUPZ128rm: NumElts = 2; EltBits = 64; break; 1994 case X86::VBROADCASTSSrm: NumElts = 4; EltBits = 32; break; 1995 case X86::VBROADCASTSSYrm: NumElts = 8; EltBits = 32; break; 1996 case X86::VBROADCASTSSZ128rm: NumElts = 4; EltBits = 32; break; 1997 case X86::VBROADCASTSSZ256rm: NumElts = 8; EltBits = 32; break; 1998 case X86::VBROADCASTSSZrm: NumElts = 16; EltBits = 32; break; 1999 case X86::VBROADCASTSDYrm: NumElts = 4; EltBits = 64; break; 2000 case X86::VBROADCASTSDZ256rm: NumElts = 4; EltBits = 64; break; 2001 case X86::VBROADCASTSDZrm: NumElts = 8; EltBits = 64; break; 2002 case X86::VPBROADCASTBrm: NumElts = 16; EltBits = 8; break; 2003 case X86::VPBROADCASTBYrm: NumElts = 32; EltBits = 8; break; 2004 case X86::VPBROADCASTBZ128rm: NumElts = 16; EltBits = 8; break; 2005 case X86::VPBROADCASTBZ256rm: NumElts = 32; EltBits = 8; break; 2006 case X86::VPBROADCASTBZrm: NumElts = 64; EltBits = 8; break; 2007 case X86::VPBROADCASTDrm: NumElts = 4; EltBits = 32; break; 2008 case X86::VPBROADCASTDYrm: NumElts = 8; EltBits = 32; break; 2009 case X86::VPBROADCASTDZ128rm: NumElts = 4; EltBits = 32; break; 2010 case X86::VPBROADCASTDZ256rm: NumElts = 8; EltBits = 32; break; 2011 case X86::VPBROADCASTDZrm: NumElts = 16; EltBits = 32; break; 2012 case X86::VPBROADCASTQrm: NumElts = 2; EltBits = 64; break; 2013 case X86::VPBROADCASTQYrm: NumElts = 4; EltBits = 64; break; 2014 case X86::VPBROADCASTQZ128rm: NumElts = 2; EltBits = 64; break; 2015 case X86::VPBROADCASTQZ256rm: NumElts = 4; EltBits = 64; break; 2016 case X86::VPBROADCASTQZrm: NumElts = 8; EltBits = 64; break; 2017 case X86::VPBROADCASTWrm: NumElts = 8; EltBits = 16; break; 2018 case X86::VPBROADCASTWYrm: NumElts = 16; EltBits = 16; break; 2019 case X86::VPBROADCASTWZ128rm: NumElts = 8; EltBits = 16; break; 2020 case X86::VPBROADCASTWZ256rm: NumElts = 16; EltBits = 16; break; 2021 case X86::VPBROADCASTWZrm: NumElts = 32; EltBits = 16; break; 2022 } 2023 2024 std::string Comment; 2025 raw_string_ostream CS(Comment); 2026 const MachineOperand &DstOp = MI->getOperand(0); 2027 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 2028 CS << "["; 2029 for (int i = 0; i != NumElts; ++i) { 2030 if (i != 0) 2031 CS << ","; 2032 printConstant(C, EltBits, CS); 2033 } 2034 CS << "]"; 2035 OutStreamer.AddComment(CS.str()); 2036 } 2037 } 2038 } 2039 2040 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { 2041 // FIXME: Enable feature predicate checks once all the test pass. 2042 // X86_MC::verifyInstructionPredicates(MI->getOpcode(), 2043 // Subtarget->getFeatureBits()); 2044 2045 X86MCInstLower MCInstLowering(*MF, *this); 2046 const X86RegisterInfo *RI = 2047 MF->getSubtarget<X86Subtarget>().getRegisterInfo(); 2048 2049 if (MI->getOpcode() == X86::OR64rm) { 2050 for (auto &Opd : MI->operands()) { 2051 if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) == 2052 "swift_async_extendedFramePointerFlags") { 2053 ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true; 2054 } 2055 } 2056 } 2057 2058 // Add a comment about EVEX compression 2059 if (TM.Options.MCOptions.ShowMCEncoding) { 2060 if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) 2061 OutStreamer->AddComment("EVEX TO LEGACY Compression ", false); 2062 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) 2063 OutStreamer->AddComment("EVEX TO VEX Compression ", false); 2064 } 2065 2066 // Add comments for values loaded from constant pool. 2067 if (OutStreamer->isVerboseAsm()) 2068 addConstantComments(MI, *OutStreamer); 2069 2070 switch (MI->getOpcode()) { 2071 case TargetOpcode::DBG_VALUE: 2072 llvm_unreachable("Should be handled target independently"); 2073 2074 case X86::EH_RETURN: 2075 case X86::EH_RETURN64: { 2076 // Lower these as normal, but add some comments. 2077 Register Reg = MI->getOperand(0).getReg(); 2078 OutStreamer->AddComment(StringRef("eh_return, addr: %") + 2079 X86ATTInstPrinter::getRegisterName(Reg)); 2080 break; 2081 } 2082 case X86::CLEANUPRET: { 2083 // Lower these as normal, but add some comments. 2084 OutStreamer->AddComment("CLEANUPRET"); 2085 break; 2086 } 2087 2088 case X86::CATCHRET: { 2089 // Lower these as normal, but add some comments. 2090 OutStreamer->AddComment("CATCHRET"); 2091 break; 2092 } 2093 2094 case X86::ENDBR32: 2095 case X86::ENDBR64: { 2096 // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for 2097 // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be 2098 // non-empty. If MI is the initial ENDBR, place the 2099 // __patchable_function_entries label after ENDBR. 2100 if (CurrentPatchableFunctionEntrySym && 2101 CurrentPatchableFunctionEntrySym == CurrentFnBegin && 2102 MI == &MF->front().front()) { 2103 MCInst Inst; 2104 MCInstLowering.Lower(MI, Inst); 2105 EmitAndCountInstruction(Inst); 2106 CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); 2107 OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym); 2108 return; 2109 } 2110 break; 2111 } 2112 2113 case X86::TAILJMPd64: 2114 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) 2115 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); 2116 [[fallthrough]]; 2117 case X86::TAILJMPr: 2118 case X86::TAILJMPm: 2119 case X86::TAILJMPd: 2120 case X86::TAILJMPd_CC: 2121 case X86::TAILJMPr64: 2122 case X86::TAILJMPm64: 2123 case X86::TAILJMPd64_CC: 2124 case X86::TAILJMPr64_REX: 2125 case X86::TAILJMPm64_REX: 2126 // Lower these as normal, but add some comments. 2127 OutStreamer->AddComment("TAILCALL"); 2128 break; 2129 2130 case X86::TLS_addr32: 2131 case X86::TLS_addr64: 2132 case X86::TLS_addrX32: 2133 case X86::TLS_base_addr32: 2134 case X86::TLS_base_addr64: 2135 case X86::TLS_base_addrX32: 2136 return LowerTlsAddr(MCInstLowering, *MI); 2137 2138 case X86::MOVPC32r: { 2139 // This is a pseudo op for a two instruction sequence with a label, which 2140 // looks like: 2141 // call "L1$pb" 2142 // "L1$pb": 2143 // popl %esi 2144 2145 // Emit the call. 2146 MCSymbol *PICBase = MF->getPICBaseSymbol(); 2147 // FIXME: We would like an efficient form for this, so we don't have to do a 2148 // lot of extra uniquing. 2149 EmitAndCountInstruction( 2150 MCInstBuilder(X86::CALLpcrel32) 2151 .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); 2152 2153 const X86FrameLowering *FrameLowering = 2154 MF->getSubtarget<X86Subtarget>().getFrameLowering(); 2155 bool hasFP = FrameLowering->hasFP(*MF); 2156 2157 // TODO: This is needed only if we require precise CFA. 2158 bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && 2159 !OutStreamer->getDwarfFrameInfos().back().End; 2160 2161 int stackGrowth = -RI->getSlotSize(); 2162 2163 if (HasActiveDwarfFrame && !hasFP) { 2164 OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth); 2165 MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); 2166 } 2167 2168 // Emit the label. 2169 OutStreamer->emitLabel(PICBase); 2170 2171 // popl $reg 2172 EmitAndCountInstruction( 2173 MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); 2174 2175 if (HasActiveDwarfFrame && !hasFP) { 2176 OutStreamer->emitCFIAdjustCfaOffset(stackGrowth); 2177 } 2178 return; 2179 } 2180 2181 case X86::ADD32ri: { 2182 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. 2183 if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) 2184 break; 2185 2186 // Okay, we have something like: 2187 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) 2188 2189 // For this, we want to print something like: 2190 // MYGLOBAL + (. - PICBASE) 2191 // However, we can't generate a ".", so just emit a new label here and refer 2192 // to it. 2193 MCSymbol *DotSym = OutContext.createTempSymbol(); 2194 OutStreamer->emitLabel(DotSym); 2195 2196 // Now that we have emitted the label, lower the complex operand expression. 2197 MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); 2198 2199 const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); 2200 const MCExpr *PICBase = 2201 MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); 2202 DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); 2203 2204 DotExpr = MCBinaryExpr::createAdd( 2205 MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext); 2206 2207 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) 2208 .addReg(MI->getOperand(0).getReg()) 2209 .addReg(MI->getOperand(1).getReg()) 2210 .addExpr(DotExpr)); 2211 return; 2212 } 2213 case TargetOpcode::STATEPOINT: 2214 return LowerSTATEPOINT(*MI, MCInstLowering); 2215 2216 case TargetOpcode::FAULTING_OP: 2217 return LowerFAULTING_OP(*MI, MCInstLowering); 2218 2219 case TargetOpcode::FENTRY_CALL: 2220 return LowerFENTRY_CALL(*MI, MCInstLowering); 2221 2222 case TargetOpcode::PATCHABLE_OP: 2223 return LowerPATCHABLE_OP(*MI, MCInstLowering); 2224 2225 case TargetOpcode::STACKMAP: 2226 return LowerSTACKMAP(*MI); 2227 2228 case TargetOpcode::PATCHPOINT: 2229 return LowerPATCHPOINT(*MI, MCInstLowering); 2230 2231 case TargetOpcode::PATCHABLE_FUNCTION_ENTER: 2232 return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); 2233 2234 case TargetOpcode::PATCHABLE_RET: 2235 return LowerPATCHABLE_RET(*MI, MCInstLowering); 2236 2237 case TargetOpcode::PATCHABLE_TAIL_CALL: 2238 return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); 2239 2240 case TargetOpcode::PATCHABLE_EVENT_CALL: 2241 return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); 2242 2243 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: 2244 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); 2245 2246 case X86::MORESTACK_RET: 2247 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 2248 return; 2249 2250 case X86::KCFI_CHECK: 2251 return LowerKCFI_CHECK(*MI); 2252 2253 case X86::ASAN_CHECK_MEMACCESS: 2254 return LowerASAN_CHECK_MEMACCESS(*MI); 2255 2256 case X86::MORESTACK_RET_RESTORE_R10: 2257 // Return, then restore R10. 2258 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 2259 EmitAndCountInstruction( 2260 MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); 2261 return; 2262 2263 case X86::SEH_PushReg: 2264 case X86::SEH_SaveReg: 2265 case X86::SEH_SaveXMM: 2266 case X86::SEH_StackAlloc: 2267 case X86::SEH_StackAlign: 2268 case X86::SEH_SetFrame: 2269 case X86::SEH_PushFrame: 2270 case X86::SEH_EndPrologue: 2271 EmitSEHInstruction(MI); 2272 return; 2273 2274 case X86::SEH_Epilogue: { 2275 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); 2276 MachineBasicBlock::const_iterator MBBI(MI); 2277 // Check if preceded by a call and emit nop if so. 2278 for (MBBI = PrevCrossBBInst(MBBI); 2279 MBBI != MachineBasicBlock::const_iterator(); 2280 MBBI = PrevCrossBBInst(MBBI)) { 2281 // Pseudo instructions that aren't a call are assumed to not emit any 2282 // code. If they do, we worst case generate unnecessary noops after a 2283 // call. 2284 if (MBBI->isCall() || !MBBI->isPseudo()) { 2285 if (MBBI->isCall()) 2286 EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); 2287 break; 2288 } 2289 } 2290 return; 2291 } 2292 case X86::UBSAN_UD1: 2293 EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm) 2294 .addReg(X86::EAX) 2295 .addReg(X86::EAX) 2296 .addImm(1) 2297 .addReg(X86::NoRegister) 2298 .addImm(MI->getOperand(0).getImm()) 2299 .addReg(X86::NoRegister)); 2300 return; 2301 case X86::CALL64pcrel32: 2302 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) 2303 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); 2304 break; 2305 } 2306 2307 MCInst TmpInst; 2308 MCInstLowering.Lower(MI, TmpInst); 2309 2310 // Stackmap shadows cannot include branch targets, so we can count the bytes 2311 // in a call towards the shadow, but must ensure that the no thread returns 2312 // in to the stackmap shadow. The only way to achieve this is if the call 2313 // is at the end of the shadow. 2314 if (MI->isCall()) { 2315 // Count then size of the call towards the shadow 2316 SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get()); 2317 // Then flush the shadow so that we fill with nops before the call, not 2318 // after it. 2319 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 2320 // Then emit the call 2321 OutStreamer->emitInstruction(TmpInst, getSubtargetInfo()); 2322 return; 2323 } 2324 2325 EmitAndCountInstruction(TmpInst); 2326 } 2327