1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains code to lower X86 MachineInstrs to their corresponding 10 // MCInst records. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/X86ATTInstPrinter.h" 15 #include "MCTargetDesc/X86BaseInfo.h" 16 #include "MCTargetDesc/X86EncodingOptimization.h" 17 #include "MCTargetDesc/X86InstComments.h" 18 #include "MCTargetDesc/X86ShuffleDecode.h" 19 #include "MCTargetDesc/X86TargetStreamer.h" 20 #include "X86AsmPrinter.h" 21 #include "X86MachineFunctionInfo.h" 22 #include "X86RegisterInfo.h" 23 #include "X86ShuffleDecodeConstantPool.h" 24 #include "X86Subtarget.h" 25 #include "llvm/ADT/STLExtras.h" 26 #include "llvm/ADT/SmallString.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 29 #include "llvm/CodeGen/MachineConstantPool.h" 30 #include "llvm/CodeGen/MachineFunction.h" 31 #include "llvm/CodeGen/MachineModuleInfoImpls.h" 32 #include "llvm/CodeGen/MachineOperand.h" 33 #include "llvm/CodeGen/StackMaps.h" 34 #include "llvm/IR/DataLayout.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Mangler.h" 37 #include "llvm/MC/MCAsmInfo.h" 38 #include "llvm/MC/MCCodeEmitter.h" 39 #include "llvm/MC/MCContext.h" 40 #include "llvm/MC/MCExpr.h" 41 #include "llvm/MC/MCFixup.h" 42 #include "llvm/MC/MCInst.h" 43 #include "llvm/MC/MCInstBuilder.h" 44 #include "llvm/MC/MCSection.h" 45 #include "llvm/MC/MCSectionELF.h" 46 #include "llvm/MC/MCStreamer.h" 47 #include "llvm/MC/MCSymbol.h" 48 #include "llvm/MC/MCSymbolELF.h" 49 #include "llvm/MC/TargetRegistry.h" 50 #include "llvm/Target/TargetLoweringObjectFile.h" 51 #include "llvm/Target/TargetMachine.h" 52 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" 53 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" 54 #include <string> 55 56 using namespace llvm; 57 58 static cl::opt<bool> EnableBranchHint("enable-branch-hint", 59 cl::desc("Enable branch hint."), 60 cl::init(false), cl::Hidden); 61 static cl::opt<unsigned> BranchHintProbabilityThreshold( 62 "branch-hint-probability-threshold", 63 cl::desc("The probability threshold of enabling branch hint."), 64 cl::init(50), cl::Hidden); 65 66 namespace { 67 68 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. 69 class X86MCInstLower { 70 MCContext &Ctx; 71 const MachineFunction &MF; 72 const TargetMachine &TM; 73 const MCAsmInfo &MAI; 74 X86AsmPrinter &AsmPrinter; 75 76 public: 77 X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); 78 79 MCOperand LowerMachineOperand(const MachineInstr *MI, 80 const MachineOperand &MO) const; 81 void Lower(const MachineInstr *MI, MCInst &OutMI) const; 82 83 MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; 84 MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; 85 86 private: 87 MachineModuleInfoMachO &getMachOMMI() const; 88 }; 89 90 } // end anonymous namespace 91 92 /// A RAII helper which defines a region of instructions which can't have 93 /// padding added between them for correctness. 94 struct NoAutoPaddingScope { 95 MCStreamer &OS; 96 const bool OldAllowAutoPadding; 97 NoAutoPaddingScope(MCStreamer &OS) 98 : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { 99 changeAndComment(false); 100 } 101 ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); } 102 void changeAndComment(bool b) { 103 if (b == OS.getAllowAutoPadding()) 104 return; 105 OS.setAllowAutoPadding(b); 106 if (b) 107 OS.emitRawComment("autopadding"); 108 else 109 OS.emitRawComment("noautopadding"); 110 } 111 }; 112 113 // Emit a minimal sequence of nops spanning NumBytes bytes. 114 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, 115 const X86Subtarget *Subtarget); 116 117 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, 118 const MCSubtargetInfo &STI, 119 MCCodeEmitter *CodeEmitter) { 120 if (InShadow) { 121 SmallString<256> Code; 122 SmallVector<MCFixup, 4> Fixups; 123 CodeEmitter->encodeInstruction(Inst, Code, Fixups, STI); 124 CurrentShadowSize += Code.size(); 125 if (CurrentShadowSize >= RequiredShadowSize) 126 InShadow = false; // The shadow is big enough. Stop counting. 127 } 128 } 129 130 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( 131 MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { 132 if (InShadow && CurrentShadowSize < RequiredShadowSize) { 133 InShadow = false; 134 emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize, 135 &MF->getSubtarget<X86Subtarget>()); 136 } 137 } 138 139 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { 140 OutStreamer->emitInstruction(Inst, getSubtargetInfo()); 141 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); 142 } 143 144 X86MCInstLower::X86MCInstLower(const MachineFunction &mf, 145 X86AsmPrinter &asmprinter) 146 : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), 147 AsmPrinter(asmprinter) {} 148 149 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { 150 return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); 151 } 152 153 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol 154 /// operand to an MCSymbol. 155 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { 156 const Triple &TT = TM.getTargetTriple(); 157 if (MO.isGlobal() && TT.isOSBinFormatELF()) 158 return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal()); 159 160 const DataLayout &DL = MF.getDataLayout(); 161 assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && 162 "Isn't a symbol reference"); 163 164 MCSymbol *Sym = nullptr; 165 SmallString<128> Name; 166 StringRef Suffix; 167 168 switch (MO.getTargetFlags()) { 169 case X86II::MO_DLLIMPORT: 170 // Handle dllimport linkage. 171 Name += "__imp_"; 172 break; 173 case X86II::MO_COFFSTUB: 174 Name += ".refptr."; 175 break; 176 case X86II::MO_DARWIN_NONLAZY: 177 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 178 Suffix = "$non_lazy_ptr"; 179 break; 180 } 181 182 if (!Suffix.empty()) 183 Name += DL.getPrivateGlobalPrefix(); 184 185 if (MO.isGlobal()) { 186 const GlobalValue *GV = MO.getGlobal(); 187 AsmPrinter.getNameWithPrefix(Name, GV); 188 } else if (MO.isSymbol()) { 189 Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); 190 } else if (MO.isMBB()) { 191 assert(Suffix.empty()); 192 Sym = MO.getMBB()->getSymbol(); 193 } 194 195 Name += Suffix; 196 if (!Sym) 197 Sym = Ctx.getOrCreateSymbol(Name); 198 199 // If the target flags on the operand changes the name of the symbol, do that 200 // before we return the symbol. 201 switch (MO.getTargetFlags()) { 202 default: 203 break; 204 case X86II::MO_COFFSTUB: { 205 MachineModuleInfoCOFF &MMICOFF = 206 AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoCOFF>(); 207 MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym); 208 if (!StubSym.getPointer()) { 209 assert(MO.isGlobal() && "Extern symbol not handled yet"); 210 StubSym = MachineModuleInfoImpl::StubValueTy( 211 AsmPrinter.getSymbol(MO.getGlobal()), true); 212 } 213 break; 214 } 215 case X86II::MO_DARWIN_NONLAZY: 216 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { 217 MachineModuleInfoImpl::StubValueTy &StubSym = 218 getMachOMMI().getGVStubEntry(Sym); 219 if (!StubSym.getPointer()) { 220 assert(MO.isGlobal() && "Extern symbol not handled yet"); 221 StubSym = MachineModuleInfoImpl::StubValueTy( 222 AsmPrinter.getSymbol(MO.getGlobal()), 223 !MO.getGlobal()->hasInternalLinkage()); 224 } 225 break; 226 } 227 } 228 229 return Sym; 230 } 231 232 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, 233 MCSymbol *Sym) const { 234 // FIXME: We would like an efficient form for this, so we don't have to do a 235 // lot of extra uniquing. 236 const MCExpr *Expr = nullptr; 237 MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; 238 239 switch (MO.getTargetFlags()) { 240 default: 241 llvm_unreachable("Unknown target flag on GV operand"); 242 case X86II::MO_NO_FLAG: // No flag. 243 // These affect the name of the symbol, not any suffix. 244 case X86II::MO_DARWIN_NONLAZY: 245 case X86II::MO_DLLIMPORT: 246 case X86II::MO_COFFSTUB: 247 break; 248 249 case X86II::MO_TLVP: 250 RefKind = MCSymbolRefExpr::VK_TLVP; 251 break; 252 case X86II::MO_TLVP_PIC_BASE: 253 Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); 254 // Subtract the pic base. 255 Expr = MCBinaryExpr::createSub( 256 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); 257 break; 258 case X86II::MO_SECREL: 259 RefKind = MCSymbolRefExpr::VK_SECREL; 260 break; 261 case X86II::MO_TLSGD: 262 RefKind = MCSymbolRefExpr::VK_TLSGD; 263 break; 264 case X86II::MO_TLSLD: 265 RefKind = MCSymbolRefExpr::VK_TLSLD; 266 break; 267 case X86II::MO_TLSLDM: 268 RefKind = MCSymbolRefExpr::VK_TLSLDM; 269 break; 270 case X86II::MO_GOTTPOFF: 271 RefKind = MCSymbolRefExpr::VK_GOTTPOFF; 272 break; 273 case X86II::MO_INDNTPOFF: 274 RefKind = MCSymbolRefExpr::VK_INDNTPOFF; 275 break; 276 case X86II::MO_TPOFF: 277 RefKind = MCSymbolRefExpr::VK_TPOFF; 278 break; 279 case X86II::MO_DTPOFF: 280 RefKind = MCSymbolRefExpr::VK_DTPOFF; 281 break; 282 case X86II::MO_NTPOFF: 283 RefKind = MCSymbolRefExpr::VK_NTPOFF; 284 break; 285 case X86II::MO_GOTNTPOFF: 286 RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; 287 break; 288 case X86II::MO_GOTPCREL: 289 RefKind = MCSymbolRefExpr::VK_GOTPCREL; 290 break; 291 case X86II::MO_GOTPCREL_NORELAX: 292 RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX; 293 break; 294 case X86II::MO_GOT: 295 RefKind = MCSymbolRefExpr::VK_GOT; 296 break; 297 case X86II::MO_GOTOFF: 298 RefKind = MCSymbolRefExpr::VK_GOTOFF; 299 break; 300 case X86II::MO_PLT: 301 RefKind = MCSymbolRefExpr::VK_PLT; 302 break; 303 case X86II::MO_ABS8: 304 RefKind = MCSymbolRefExpr::VK_X86_ABS8; 305 break; 306 case X86II::MO_PIC_BASE_OFFSET: 307 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 308 Expr = MCSymbolRefExpr::create(Sym, Ctx); 309 // Subtract the pic base. 310 Expr = MCBinaryExpr::createSub( 311 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); 312 if (MO.isJTI()) { 313 assert(MAI.doesSetDirectiveSuppressReloc()); 314 // If .set directive is supported, use it to reduce the number of 315 // relocations the assembler will generate for differences between 316 // local labels. This is only safe when the symbols are in the same 317 // section so we are restricting it to jumptable references. 318 MCSymbol *Label = Ctx.createTempSymbol(); 319 AsmPrinter.OutStreamer->emitAssignment(Label, Expr); 320 Expr = MCSymbolRefExpr::create(Label, Ctx); 321 } 322 break; 323 } 324 325 if (!Expr) 326 Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); 327 328 if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) 329 Expr = MCBinaryExpr::createAdd( 330 Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); 331 return MCOperand::createExpr(Expr); 332 } 333 334 static unsigned getRetOpcode(const X86Subtarget &Subtarget) { 335 return Subtarget.is64Bit() ? X86::RET64 : X86::RET32; 336 } 337 338 MCOperand X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, 339 const MachineOperand &MO) const { 340 switch (MO.getType()) { 341 default: 342 MI->print(errs()); 343 llvm_unreachable("unknown operand type"); 344 case MachineOperand::MO_Register: 345 // Ignore all implicit register operands. 346 if (MO.isImplicit()) 347 return MCOperand(); 348 return MCOperand::createReg(MO.getReg()); 349 case MachineOperand::MO_Immediate: 350 return MCOperand::createImm(MO.getImm()); 351 case MachineOperand::MO_MachineBasicBlock: 352 case MachineOperand::MO_GlobalAddress: 353 case MachineOperand::MO_ExternalSymbol: 354 return LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); 355 case MachineOperand::MO_MCSymbol: 356 return LowerSymbolOperand(MO, MO.getMCSymbol()); 357 case MachineOperand::MO_JumpTableIndex: 358 return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex())); 359 case MachineOperand::MO_ConstantPoolIndex: 360 return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex())); 361 case MachineOperand::MO_BlockAddress: 362 return LowerSymbolOperand( 363 MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); 364 case MachineOperand::MO_RegisterMask: 365 // Ignore call clobbers. 366 return MCOperand(); 367 } 368 } 369 370 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding 371 // information. 372 static unsigned convertTailJumpOpcode(unsigned Opcode) { 373 switch (Opcode) { 374 case X86::TAILJMPr: 375 Opcode = X86::JMP32r; 376 break; 377 case X86::TAILJMPm: 378 Opcode = X86::JMP32m; 379 break; 380 case X86::TAILJMPr64: 381 Opcode = X86::JMP64r; 382 break; 383 case X86::TAILJMPm64: 384 Opcode = X86::JMP64m; 385 break; 386 case X86::TAILJMPr64_REX: 387 Opcode = X86::JMP64r_REX; 388 break; 389 case X86::TAILJMPm64_REX: 390 Opcode = X86::JMP64m_REX; 391 break; 392 case X86::TAILJMPd: 393 case X86::TAILJMPd64: 394 Opcode = X86::JMP_1; 395 break; 396 case X86::TAILJMPd_CC: 397 case X86::TAILJMPd64_CC: 398 Opcode = X86::JCC_1; 399 break; 400 } 401 402 return Opcode; 403 } 404 405 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { 406 OutMI.setOpcode(MI->getOpcode()); 407 408 for (const MachineOperand &MO : MI->operands()) 409 if (auto Op = LowerMachineOperand(MI, MO); Op.isValid()) 410 OutMI.addOperand(Op); 411 412 bool In64BitMode = AsmPrinter.getSubtarget().is64Bit(); 413 if (X86::optimizeInstFromVEX3ToVEX2(OutMI, MI->getDesc()) || 414 X86::optimizeShiftRotateWithImmediateOne(OutMI) || 415 X86::optimizeVPCMPWithImmediateOneOrSix(OutMI) || 416 X86::optimizeMOVSX(OutMI) || X86::optimizeINCDEC(OutMI, In64BitMode) || 417 X86::optimizeMOV(OutMI, In64BitMode) || 418 X86::optimizeToFixedRegisterOrShortImmediateForm(OutMI)) 419 return; 420 421 // Handle a few special cases to eliminate operand modifiers. 422 switch (OutMI.getOpcode()) { 423 case X86::LEA64_32r: 424 case X86::LEA64r: 425 case X86::LEA16r: 426 case X86::LEA32r: 427 // LEA should have a segment register, but it must be empty. 428 assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands && 429 "Unexpected # of LEA operands"); 430 assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && 431 "LEA has segment specified!"); 432 break; 433 case X86::MULX32Hrr: 434 case X86::MULX32Hrm: 435 case X86::MULX64Hrr: 436 case X86::MULX64Hrm: { 437 // Turn into regular MULX by duplicating the destination. 438 unsigned NewOpc; 439 switch (OutMI.getOpcode()) { 440 default: llvm_unreachable("Invalid opcode"); 441 case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; 442 case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; 443 case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; 444 case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; 445 } 446 OutMI.setOpcode(NewOpc); 447 // Duplicate the destination. 448 unsigned DestReg = OutMI.getOperand(0).getReg(); 449 OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg)); 450 break; 451 } 452 // CALL64r, CALL64pcrel32 - These instructions used to have 453 // register inputs modeled as normal uses instead of implicit uses. As such, 454 // they we used to truncate off all but the first operand (the callee). This 455 // issue seems to have been fixed at some point. This assert verifies that. 456 case X86::CALL64r: 457 case X86::CALL64pcrel32: 458 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); 459 break; 460 case X86::EH_RETURN: 461 case X86::EH_RETURN64: { 462 OutMI = MCInst(); 463 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 464 break; 465 } 466 case X86::CLEANUPRET: { 467 // Replace CLEANUPRET with the appropriate RET. 468 OutMI = MCInst(); 469 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 470 break; 471 } 472 case X86::CATCHRET: { 473 // Replace CATCHRET with the appropriate RET. 474 const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); 475 unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX; 476 OutMI = MCInst(); 477 OutMI.setOpcode(getRetOpcode(Subtarget)); 478 OutMI.addOperand(MCOperand::createReg(ReturnReg)); 479 break; 480 } 481 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump 482 // instruction. 483 case X86::TAILJMPr: 484 case X86::TAILJMPr64: 485 case X86::TAILJMPr64_REX: 486 case X86::TAILJMPd: 487 case X86::TAILJMPd64: 488 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); 489 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 490 break; 491 case X86::TAILJMPd_CC: 492 case X86::TAILJMPd64_CC: 493 assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!"); 494 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 495 break; 496 case X86::TAILJMPm: 497 case X86::TAILJMPm64: 498 case X86::TAILJMPm64_REX: 499 assert(OutMI.getNumOperands() == X86::AddrNumOperands && 500 "Unexpected number of operands!"); 501 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 502 break; 503 case X86::MASKMOVDQU: 504 case X86::VMASKMOVDQU: 505 if (In64BitMode) 506 OutMI.setFlags(X86::IP_HAS_AD_SIZE); 507 break; 508 case X86::BSF16rm: 509 case X86::BSF16rr: 510 case X86::BSF32rm: 511 case X86::BSF32rr: 512 case X86::BSF64rm: 513 case X86::BSF64rr: { 514 // Add an REP prefix to BSF instructions so that new processors can 515 // recognize as TZCNT, which has better performance than BSF. 516 // BSF and TZCNT have different interpretations on ZF bit. So make sure 517 // it won't be used later. 518 const MachineOperand *FlagDef = 519 MI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); 520 if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead()) 521 OutMI.setFlags(X86::IP_HAS_REPEAT); 522 break; 523 } 524 default: 525 break; 526 } 527 } 528 529 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, 530 const MachineInstr &MI) { 531 NoAutoPaddingScope NoPadScope(*OutStreamer); 532 bool Is64Bits = getSubtarget().is64Bit(); 533 bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64(); 534 MCContext &Ctx = OutStreamer->getContext(); 535 536 MCSymbolRefExpr::VariantKind SRVK; 537 switch (MI.getOpcode()) { 538 case X86::TLS_addr32: 539 case X86::TLS_addr64: 540 case X86::TLS_addrX32: 541 SRVK = MCSymbolRefExpr::VK_TLSGD; 542 break; 543 case X86::TLS_base_addr32: 544 SRVK = MCSymbolRefExpr::VK_TLSLDM; 545 break; 546 case X86::TLS_base_addr64: 547 case X86::TLS_base_addrX32: 548 SRVK = MCSymbolRefExpr::VK_TLSLD; 549 break; 550 case X86::TLS_desc32: 551 case X86::TLS_desc64: 552 SRVK = MCSymbolRefExpr::VK_TLSDESC; 553 break; 554 default: 555 llvm_unreachable("unexpected opcode"); 556 } 557 558 const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create( 559 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx); 560 561 // Before binutils 2.41, ld has a bogus TLS relaxation error when the GD/LD 562 // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is 563 // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by 564 // only using GOT when GOTPCRELX is enabled. 565 // TODO Delete the workaround when rustc no longer relies on the hack 566 bool UseGot = MMI->getModule()->getRtLibUseGOT() && 567 Ctx.getTargetOptions()->X86RelaxRelocations; 568 569 if (SRVK == MCSymbolRefExpr::VK_TLSDESC) { 570 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create( 571 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), 572 MCSymbolRefExpr::VK_TLSCALL, Ctx); 573 EmitAndCountInstruction( 574 MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r) 575 .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX) 576 .addReg(Is64Bits ? X86::RIP : X86::EBX) 577 .addImm(1) 578 .addReg(0) 579 .addExpr(Sym) 580 .addReg(0)); 581 EmitAndCountInstruction( 582 MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m) 583 .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX) 584 .addImm(1) 585 .addReg(0) 586 .addExpr(Expr) 587 .addReg(0)); 588 } else if (Is64Bits) { 589 bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD; 590 if (NeedsPadding && Is64BitsLP64) 591 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 592 EmitAndCountInstruction(MCInstBuilder(X86::LEA64r) 593 .addReg(X86::RDI) 594 .addReg(X86::RIP) 595 .addImm(1) 596 .addReg(0) 597 .addExpr(Sym) 598 .addReg(0)); 599 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr"); 600 if (NeedsPadding) { 601 if (!UseGot) 602 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 603 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 604 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); 605 } 606 if (UseGot) { 607 const MCExpr *Expr = MCSymbolRefExpr::create( 608 TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx); 609 EmitAndCountInstruction(MCInstBuilder(X86::CALL64m) 610 .addReg(X86::RIP) 611 .addImm(1) 612 .addReg(0) 613 .addExpr(Expr) 614 .addReg(0)); 615 } else { 616 EmitAndCountInstruction( 617 MCInstBuilder(X86::CALL64pcrel32) 618 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, 619 MCSymbolRefExpr::VK_PLT, Ctx))); 620 } 621 } else { 622 if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) { 623 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) 624 .addReg(X86::EAX) 625 .addReg(0) 626 .addImm(1) 627 .addReg(X86::EBX) 628 .addExpr(Sym) 629 .addReg(0)); 630 } else { 631 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) 632 .addReg(X86::EAX) 633 .addReg(X86::EBX) 634 .addImm(1) 635 .addReg(0) 636 .addExpr(Sym) 637 .addReg(0)); 638 } 639 640 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr"); 641 if (UseGot) { 642 const MCExpr *Expr = 643 MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx); 644 EmitAndCountInstruction(MCInstBuilder(X86::CALL32m) 645 .addReg(X86::EBX) 646 .addImm(1) 647 .addReg(0) 648 .addExpr(Expr) 649 .addReg(0)); 650 } else { 651 EmitAndCountInstruction( 652 MCInstBuilder(X86::CALLpcrel32) 653 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, 654 MCSymbolRefExpr::VK_PLT, Ctx))); 655 } 656 } 657 } 658 659 /// Emit the largest nop instruction smaller than or equal to \p NumBytes 660 /// bytes. Return the size of nop emitted. 661 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, 662 const X86Subtarget *Subtarget) { 663 // Determine the longest nop which can be efficiently decoded for the given 664 // target cpu. 15-bytes is the longest single NOP instruction, but some 665 // platforms can't decode the longest forms efficiently. 666 unsigned MaxNopLength = 1; 667 if (Subtarget->is64Bit()) { 668 // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the 669 // IndexReg/BaseReg below need to be updated. 670 if (Subtarget->hasFeature(X86::TuningFast7ByteNOP)) 671 MaxNopLength = 7; 672 else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP)) 673 MaxNopLength = 15; 674 else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP)) 675 MaxNopLength = 11; 676 else 677 MaxNopLength = 10; 678 } if (Subtarget->is32Bit()) 679 MaxNopLength = 2; 680 681 // Cap a single nop emission at the profitable value for the target 682 NumBytes = std::min(NumBytes, MaxNopLength); 683 684 unsigned NopSize; 685 unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; 686 IndexReg = Displacement = SegmentReg = 0; 687 BaseReg = X86::RAX; 688 ScaleVal = 1; 689 switch (NumBytes) { 690 case 0: 691 llvm_unreachable("Zero nops?"); 692 break; 693 case 1: 694 NopSize = 1; 695 Opc = X86::NOOP; 696 break; 697 case 2: 698 NopSize = 2; 699 Opc = X86::XCHG16ar; 700 break; 701 case 3: 702 NopSize = 3; 703 Opc = X86::NOOPL; 704 break; 705 case 4: 706 NopSize = 4; 707 Opc = X86::NOOPL; 708 Displacement = 8; 709 break; 710 case 5: 711 NopSize = 5; 712 Opc = X86::NOOPL; 713 Displacement = 8; 714 IndexReg = X86::RAX; 715 break; 716 case 6: 717 NopSize = 6; 718 Opc = X86::NOOPW; 719 Displacement = 8; 720 IndexReg = X86::RAX; 721 break; 722 case 7: 723 NopSize = 7; 724 Opc = X86::NOOPL; 725 Displacement = 512; 726 break; 727 case 8: 728 NopSize = 8; 729 Opc = X86::NOOPL; 730 Displacement = 512; 731 IndexReg = X86::RAX; 732 break; 733 case 9: 734 NopSize = 9; 735 Opc = X86::NOOPW; 736 Displacement = 512; 737 IndexReg = X86::RAX; 738 break; 739 default: 740 NopSize = 10; 741 Opc = X86::NOOPW; 742 Displacement = 512; 743 IndexReg = X86::RAX; 744 SegmentReg = X86::CS; 745 break; 746 } 747 748 unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U); 749 NopSize += NumPrefixes; 750 for (unsigned i = 0; i != NumPrefixes; ++i) 751 OS.emitBytes("\x66"); 752 753 switch (Opc) { 754 default: llvm_unreachable("Unexpected opcode"); 755 case X86::NOOP: 756 OS.emitInstruction(MCInstBuilder(Opc), *Subtarget); 757 break; 758 case X86::XCHG16ar: 759 OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), 760 *Subtarget); 761 break; 762 case X86::NOOPL: 763 case X86::NOOPW: 764 OS.emitInstruction(MCInstBuilder(Opc) 765 .addReg(BaseReg) 766 .addImm(ScaleVal) 767 .addReg(IndexReg) 768 .addImm(Displacement) 769 .addReg(SegmentReg), 770 *Subtarget); 771 break; 772 } 773 assert(NopSize <= NumBytes && "We overemitted?"); 774 return NopSize; 775 } 776 777 /// Emit the optimal amount of multi-byte nops on X86. 778 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, 779 const X86Subtarget *Subtarget) { 780 unsigned NopsToEmit = NumBytes; 781 (void)NopsToEmit; 782 while (NumBytes) { 783 NumBytes -= emitNop(OS, NumBytes, Subtarget); 784 assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!"); 785 } 786 } 787 788 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, 789 X86MCInstLower &MCIL) { 790 assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); 791 792 NoAutoPaddingScope NoPadScope(*OutStreamer); 793 794 StatepointOpers SOpers(&MI); 795 if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { 796 emitX86Nops(*OutStreamer, PatchBytes, Subtarget); 797 } else { 798 // Lower call target and choose correct opcode 799 const MachineOperand &CallTarget = SOpers.getCallTarget(); 800 MCOperand CallTargetMCOp; 801 unsigned CallOpcode; 802 switch (CallTarget.getType()) { 803 case MachineOperand::MO_GlobalAddress: 804 case MachineOperand::MO_ExternalSymbol: 805 CallTargetMCOp = MCIL.LowerSymbolOperand( 806 CallTarget, MCIL.GetSymbolFromOperand(CallTarget)); 807 CallOpcode = X86::CALL64pcrel32; 808 // Currently, we only support relative addressing with statepoints. 809 // Otherwise, we'll need a scratch register to hold the target 810 // address. You'll fail asserts during load & relocation if this 811 // symbol is to far away. (TODO: support non-relative addressing) 812 break; 813 case MachineOperand::MO_Immediate: 814 CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); 815 CallOpcode = X86::CALL64pcrel32; 816 // Currently, we only support relative addressing with statepoints. 817 // Otherwise, we'll need a scratch register to hold the target 818 // immediate. You'll fail asserts during load & relocation if this 819 // address is to far away. (TODO: support non-relative addressing) 820 break; 821 case MachineOperand::MO_Register: 822 // FIXME: Add retpoline support and remove this. 823 if (Subtarget->useIndirectThunkCalls()) 824 report_fatal_error("Lowering register statepoints with thunks not " 825 "yet implemented."); 826 CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); 827 CallOpcode = X86::CALL64r; 828 break; 829 default: 830 llvm_unreachable("Unsupported operand type in statepoint call target"); 831 break; 832 } 833 834 // Emit call 835 MCInst CallInst; 836 CallInst.setOpcode(CallOpcode); 837 CallInst.addOperand(CallTargetMCOp); 838 OutStreamer->emitInstruction(CallInst, getSubtargetInfo()); 839 } 840 841 // Record our statepoint node in the same section used by STACKMAP 842 // and PATCHPOINT 843 auto &Ctx = OutStreamer->getContext(); 844 MCSymbol *MILabel = Ctx.createTempSymbol(); 845 OutStreamer->emitLabel(MILabel); 846 SM.recordStatepoint(*MILabel, MI); 847 } 848 849 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, 850 X86MCInstLower &MCIL) { 851 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, 852 // <opcode>, <operands> 853 854 NoAutoPaddingScope NoPadScope(*OutStreamer); 855 856 Register DefRegister = FaultingMI.getOperand(0).getReg(); 857 FaultMaps::FaultKind FK = 858 static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm()); 859 MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol(); 860 unsigned Opcode = FaultingMI.getOperand(3).getImm(); 861 unsigned OperandsBeginIdx = 4; 862 863 auto &Ctx = OutStreamer->getContext(); 864 MCSymbol *FaultingLabel = Ctx.createTempSymbol(); 865 OutStreamer->emitLabel(FaultingLabel); 866 867 assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!"); 868 FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel); 869 870 MCInst MI; 871 MI.setOpcode(Opcode); 872 873 if (DefRegister != X86::NoRegister) 874 MI.addOperand(MCOperand::createReg(DefRegister)); 875 876 for (const MachineOperand &MO : 877 llvm::drop_begin(FaultingMI.operands(), OperandsBeginIdx)) 878 if (auto Op = MCIL.LowerMachineOperand(&FaultingMI, MO); Op.isValid()) 879 MI.addOperand(Op); 880 881 OutStreamer->AddComment("on-fault: " + HandlerLabel->getName()); 882 OutStreamer->emitInstruction(MI, getSubtargetInfo()); 883 } 884 885 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, 886 X86MCInstLower &MCIL) { 887 bool Is64Bits = Subtarget->is64Bit(); 888 MCContext &Ctx = OutStreamer->getContext(); 889 MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__"); 890 const MCSymbolRefExpr *Op = 891 MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx); 892 893 EmitAndCountInstruction( 894 MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) 895 .addExpr(Op)); 896 } 897 898 void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { 899 assert(std::next(MI.getIterator())->isCall() && 900 "KCFI_CHECK not followed by a call instruction"); 901 902 // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop() 903 // returns a 1-byte X86::NOOP, which means the offset is the same in 904 // bytes. This assumes that patchable-function-prefix is the same for all 905 // functions. 906 const MachineFunction &MF = *MI.getMF(); 907 int64_t PrefixNops = 0; 908 (void)MF.getFunction() 909 .getFnAttribute("patchable-function-prefix") 910 .getValueAsString() 911 .getAsInteger(10, PrefixNops); 912 913 // KCFI allows indirect calls to any location that's preceded by a valid 914 // type identifier. To avoid encoding the full constant into an instruction, 915 // and thus emitting potential call target gadgets at each indirect call 916 // site, load a negated constant to a register and compare that to the 917 // expected value at the call target. 918 const Register AddrReg = MI.getOperand(0).getReg(); 919 const uint32_t Type = MI.getOperand(1).getImm(); 920 // The check is immediately before the call. If the call target is in R10, 921 // we can clobber R11 for the check instead. 922 unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D; 923 EmitAndCountInstruction( 924 MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type))); 925 EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm) 926 .addReg(X86::NoRegister) 927 .addReg(TempReg) 928 .addReg(AddrReg) 929 .addImm(1) 930 .addReg(X86::NoRegister) 931 .addImm(-(PrefixNops + 4)) 932 .addReg(X86::NoRegister)); 933 934 MCSymbol *Pass = OutContext.createTempSymbol(); 935 EmitAndCountInstruction( 936 MCInstBuilder(X86::JCC_1) 937 .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) 938 .addImm(X86::COND_E)); 939 940 MCSymbol *Trap = OutContext.createTempSymbol(); 941 OutStreamer->emitLabel(Trap); 942 EmitAndCountInstruction(MCInstBuilder(X86::TRAP)); 943 emitKCFITrapEntry(MF, Trap); 944 OutStreamer->emitLabel(Pass); 945 } 946 947 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { 948 // FIXME: Make this work on non-ELF. 949 if (!TM.getTargetTriple().isOSBinFormatELF()) { 950 report_fatal_error("llvm.asan.check.memaccess only supported on ELF"); 951 return; 952 } 953 954 const auto &Reg = MI.getOperand(0).getReg(); 955 ASanAccessInfo AccessInfo(MI.getOperand(1).getImm()); 956 957 uint64_t ShadowBase; 958 int MappingScale; 959 bool OrShadowOffset; 960 getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64, 961 AccessInfo.CompileKernel, &ShadowBase, 962 &MappingScale, &OrShadowOffset); 963 964 StringRef Name = AccessInfo.IsWrite ? "store" : "load"; 965 StringRef Op = OrShadowOffset ? "or" : "add"; 966 std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" + 967 Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" + 968 TM.getMCRegisterInfo()->getName(Reg.asMCReg())) 969 .str(); 970 if (OrShadowOffset) 971 report_fatal_error( 972 "OrShadowOffset is not supported with optimized callbacks"); 973 974 EmitAndCountInstruction( 975 MCInstBuilder(X86::CALL64pcrel32) 976 .addExpr(MCSymbolRefExpr::create( 977 OutContext.getOrCreateSymbol(SymName), OutContext))); 978 } 979 980 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, 981 X86MCInstLower &MCIL) { 982 // PATCHABLE_OP minsize 983 984 NoAutoPaddingScope NoPadScope(*OutStreamer); 985 986 auto NextMI = std::find_if(std::next(MI.getIterator()), 987 MI.getParent()->end().getInstrIterator(), 988 [](auto &II) { return !II.isMetaInstruction(); }); 989 990 SmallString<256> Code; 991 unsigned MinSize = MI.getOperand(0).getImm(); 992 993 if (NextMI != MI.getParent()->end() && !NextMI->isInlineAsm()) { 994 // Lower the next MachineInstr to find its byte size. 995 // If the next instruction is inline assembly, we skip lowering it for now, 996 // and assume we should always generate NOPs. 997 MCInst MCI; 998 MCIL.Lower(&*NextMI, MCI); 999 1000 SmallVector<MCFixup, 4> Fixups; 1001 CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo()); 1002 } 1003 1004 if (Code.size() < MinSize) { 1005 if (MinSize == 2 && Subtarget->is32Bit() && 1006 Subtarget->isTargetWindowsMSVC() && 1007 (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) { 1008 // For compatibility reasons, when targetting MSVC, it is important to 1009 // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools 1010 // rely specifically on this pattern to be able to patch a function. 1011 // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. 1012 OutStreamer->emitInstruction( 1013 MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI), 1014 *Subtarget); 1015 } else { 1016 unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget); 1017 assert(NopSize == MinSize && "Could not implement MinSize!"); 1018 (void)NopSize; 1019 } 1020 } 1021 } 1022 1023 // Lower a stackmap of the form: 1024 // <id>, <shadowBytes>, ... 1025 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { 1026 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 1027 1028 auto &Ctx = OutStreamer->getContext(); 1029 MCSymbol *MILabel = Ctx.createTempSymbol(); 1030 OutStreamer->emitLabel(MILabel); 1031 1032 SM.recordStackMap(*MILabel, MI); 1033 unsigned NumShadowBytes = MI.getOperand(1).getImm(); 1034 SMShadowTracker.reset(NumShadowBytes); 1035 } 1036 1037 // Lower a patchpoint of the form: 1038 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... 1039 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, 1040 X86MCInstLower &MCIL) { 1041 assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64"); 1042 1043 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 1044 1045 NoAutoPaddingScope NoPadScope(*OutStreamer); 1046 1047 auto &Ctx = OutStreamer->getContext(); 1048 MCSymbol *MILabel = Ctx.createTempSymbol(); 1049 OutStreamer->emitLabel(MILabel); 1050 SM.recordPatchPoint(*MILabel, MI); 1051 1052 PatchPointOpers opers(&MI); 1053 unsigned ScratchIdx = opers.getNextScratchIdx(); 1054 unsigned EncodedBytes = 0; 1055 const MachineOperand &CalleeMO = opers.getCallTarget(); 1056 1057 // Check for null target. If target is non-null (i.e. is non-zero or is 1058 // symbolic) then emit a call. 1059 if (!(CalleeMO.isImm() && !CalleeMO.getImm())) { 1060 MCOperand CalleeMCOp; 1061 switch (CalleeMO.getType()) { 1062 default: 1063 /// FIXME: Add a verifier check for bad callee types. 1064 llvm_unreachable("Unrecognized callee operand type."); 1065 case MachineOperand::MO_Immediate: 1066 if (CalleeMO.getImm()) 1067 CalleeMCOp = MCOperand::createImm(CalleeMO.getImm()); 1068 break; 1069 case MachineOperand::MO_ExternalSymbol: 1070 case MachineOperand::MO_GlobalAddress: 1071 CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO, 1072 MCIL.GetSymbolFromOperand(CalleeMO)); 1073 break; 1074 } 1075 1076 // Emit MOV to materialize the target address and the CALL to target. 1077 // This is encoded with 12-13 bytes, depending on which register is used. 1078 Register ScratchReg = MI.getOperand(ScratchIdx).getReg(); 1079 if (X86II::isX86_64ExtendedReg(ScratchReg)) 1080 EncodedBytes = 13; 1081 else 1082 EncodedBytes = 12; 1083 1084 EmitAndCountInstruction( 1085 MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); 1086 // FIXME: Add retpoline support and remove this. 1087 if (Subtarget->useIndirectThunkCalls()) 1088 report_fatal_error( 1089 "Lowering patchpoint with thunks not yet implemented."); 1090 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); 1091 } 1092 1093 // Emit padding. 1094 unsigned NumBytes = opers.getNumPatchBytes(); 1095 assert(NumBytes >= EncodedBytes && 1096 "Patchpoint can't request size less than the length of a call."); 1097 1098 emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget); 1099 } 1100 1101 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, 1102 X86MCInstLower &MCIL) { 1103 assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); 1104 1105 NoAutoPaddingScope NoPadScope(*OutStreamer); 1106 1107 // We want to emit the following pattern, which follows the x86 calling 1108 // convention to prepare for the trampoline call to be patched in. 1109 // 1110 // .p2align 1, ... 1111 // .Lxray_event_sled_N: 1112 // jmp +N // jump across the instrumentation sled 1113 // ... // set up arguments in register 1114 // callq __xray_CustomEvent@plt // force dependency to symbol 1115 // ... 1116 // <jump here> 1117 // 1118 // After patching, it would look something like: 1119 // 1120 // nopw (2-byte nop) 1121 // ... 1122 // callq __xrayCustomEvent // already lowered 1123 // ... 1124 // 1125 // --- 1126 // First we emit the label and the jump. 1127 auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); 1128 OutStreamer->AddComment("# XRay Custom Event Log"); 1129 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1130 OutStreamer->emitLabel(CurSled); 1131 1132 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1133 // an operand (computed as an offset from the jmp instruction). 1134 // FIXME: Find another less hacky way do force the relative jump. 1135 OutStreamer->emitBinaryData("\xeb\x0f"); 1136 1137 // The default C calling convention will place two arguments into %rcx and 1138 // %rdx -- so we only work with those. 1139 const Register DestRegs[] = {X86::RDI, X86::RSI}; 1140 bool UsedMask[] = {false, false}; 1141 // Filled out in loop. 1142 Register SrcRegs[] = {0, 0}; 1143 1144 // Then we put the operands in the %rdi and %rsi registers. We spill the 1145 // values in the register before we clobber them, and mark them as used in 1146 // UsedMask. In case the arguments are already in the correct register, we use 1147 // emit nops appropriately sized to keep the sled the same size in every 1148 // situation. 1149 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1150 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I)); 1151 Op.isValid()) { 1152 assert(Op.isReg() && "Only support arguments in registers"); 1153 SrcRegs[I] = getX86SubSuperRegister(Op.getReg(), 64); 1154 assert(SrcRegs[I].isValid() && "Invalid operand"); 1155 if (SrcRegs[I] != DestRegs[I]) { 1156 UsedMask[I] = true; 1157 EmitAndCountInstruction( 1158 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); 1159 } else { 1160 emitX86Nops(*OutStreamer, 4, Subtarget); 1161 } 1162 } 1163 1164 // Now that the register values are stashed, mov arguments into place. 1165 // FIXME: This doesn't work if one of the later SrcRegs is equal to an 1166 // earlier DestReg. We will have already overwritten over the register before 1167 // we can copy from it. 1168 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1169 if (SrcRegs[I] != DestRegs[I]) 1170 EmitAndCountInstruction( 1171 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); 1172 1173 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the 1174 // name of the trampoline to be implemented by the XRay runtime. 1175 auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); 1176 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); 1177 if (isPositionIndependent()) 1178 TOp.setTargetFlags(X86II::MO_PLT); 1179 1180 // Emit the call instruction. 1181 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) 1182 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); 1183 1184 // Restore caller-saved and used registers. 1185 for (unsigned I = sizeof UsedMask; I-- > 0;) 1186 if (UsedMask[I]) 1187 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); 1188 else 1189 emitX86Nops(*OutStreamer, 1, Subtarget); 1190 1191 OutStreamer->AddComment("xray custom event end."); 1192 1193 // Record the sled version. Version 0 of this sled was spelled differently, so 1194 // we let the runtime handle the different offsets we're using. Version 2 1195 // changed the absolute address to a PC-relative address. 1196 recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2); 1197 } 1198 1199 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, 1200 X86MCInstLower &MCIL) { 1201 assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64"); 1202 1203 NoAutoPaddingScope NoPadScope(*OutStreamer); 1204 1205 // We want to emit the following pattern, which follows the x86 calling 1206 // convention to prepare for the trampoline call to be patched in. 1207 // 1208 // .p2align 1, ... 1209 // .Lxray_event_sled_N: 1210 // jmp +N // jump across the instrumentation sled 1211 // ... // set up arguments in register 1212 // callq __xray_TypedEvent@plt // force dependency to symbol 1213 // ... 1214 // <jump here> 1215 // 1216 // After patching, it would look something like: 1217 // 1218 // nopw (2-byte nop) 1219 // ... 1220 // callq __xrayTypedEvent // already lowered 1221 // ... 1222 // 1223 // --- 1224 // First we emit the label and the jump. 1225 auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true); 1226 OutStreamer->AddComment("# XRay Typed Event Log"); 1227 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1228 OutStreamer->emitLabel(CurSled); 1229 1230 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1231 // an operand (computed as an offset from the jmp instruction). 1232 // FIXME: Find another less hacky way do force the relative jump. 1233 OutStreamer->emitBinaryData("\xeb\x14"); 1234 1235 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, 1236 // so we'll work with those. Or we may be called via SystemV, in which case 1237 // we don't have to do any translation. 1238 const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; 1239 bool UsedMask[] = {false, false, false}; 1240 1241 // Will fill out src regs in the loop. 1242 Register SrcRegs[] = {0, 0, 0}; 1243 1244 // Then we put the operands in the SystemV registers. We spill the values in 1245 // the registers before we clobber them, and mark them as used in UsedMask. 1246 // In case the arguments are already in the correct register, we emit nops 1247 // appropriately sized to keep the sled the same size in every situation. 1248 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1249 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I)); 1250 Op.isValid()) { 1251 // TODO: Is register only support adequate? 1252 assert(Op.isReg() && "Only supports arguments in registers"); 1253 SrcRegs[I] = getX86SubSuperRegister(Op.getReg(), 64); 1254 assert(SrcRegs[I].isValid() && "Invalid operand"); 1255 if (SrcRegs[I] != DestRegs[I]) { 1256 UsedMask[I] = true; 1257 EmitAndCountInstruction( 1258 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); 1259 } else { 1260 emitX86Nops(*OutStreamer, 4, Subtarget); 1261 } 1262 } 1263 1264 // In the above loop we only stash all of the destination registers or emit 1265 // nops if the arguments are already in the right place. Doing the actually 1266 // moving is postponed until after all the registers are stashed so nothing 1267 // is clobbers. We've already added nops to account for the size of mov and 1268 // push if the register is in the right place, so we only have to worry about 1269 // emitting movs. 1270 // FIXME: This doesn't work if one of the later SrcRegs is equal to an 1271 // earlier DestReg. We will have already overwritten over the register before 1272 // we can copy from it. 1273 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1274 if (UsedMask[I]) 1275 EmitAndCountInstruction( 1276 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); 1277 1278 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the 1279 // name of the trampoline to be implemented by the XRay runtime. 1280 auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent"); 1281 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); 1282 if (isPositionIndependent()) 1283 TOp.setTargetFlags(X86II::MO_PLT); 1284 1285 // Emit the call instruction. 1286 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) 1287 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); 1288 1289 // Restore caller-saved and used registers. 1290 for (unsigned I = sizeof UsedMask; I-- > 0;) 1291 if (UsedMask[I]) 1292 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); 1293 else 1294 emitX86Nops(*OutStreamer, 1, Subtarget); 1295 1296 OutStreamer->AddComment("xray typed event end."); 1297 1298 // Record the sled version. 1299 recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2); 1300 } 1301 1302 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, 1303 X86MCInstLower &MCIL) { 1304 1305 NoAutoPaddingScope NoPadScope(*OutStreamer); 1306 1307 const Function &F = MF->getFunction(); 1308 if (F.hasFnAttribute("patchable-function-entry")) { 1309 unsigned Num; 1310 if (F.getFnAttribute("patchable-function-entry") 1311 .getValueAsString() 1312 .getAsInteger(10, Num)) 1313 return; 1314 emitX86Nops(*OutStreamer, Num, Subtarget); 1315 return; 1316 } 1317 // We want to emit the following pattern: 1318 // 1319 // .p2align 1, ... 1320 // .Lxray_sled_N: 1321 // jmp .tmpN 1322 // # 9 bytes worth of noops 1323 // 1324 // We need the 9 bytes because at runtime, we'd be patching over the full 11 1325 // bytes with the following pattern: 1326 // 1327 // mov %r10, <function id, 32-bit> // 6 bytes 1328 // call <relative offset, 32-bits> // 5 bytes 1329 // 1330 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1331 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1332 OutStreamer->emitLabel(CurSled); 1333 1334 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1335 // an operand (computed as an offset from the jmp instruction). 1336 // FIXME: Find another less hacky way do force the relative jump. 1337 OutStreamer->emitBytes("\xeb\x09"); 1338 emitX86Nops(*OutStreamer, 9, Subtarget); 1339 recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2); 1340 } 1341 1342 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, 1343 X86MCInstLower &MCIL) { 1344 NoAutoPaddingScope NoPadScope(*OutStreamer); 1345 1346 // Since PATCHABLE_RET takes the opcode of the return statement as an 1347 // argument, we use that to emit the correct form of the RET that we want. 1348 // i.e. when we see this: 1349 // 1350 // PATCHABLE_RET X86::RET ... 1351 // 1352 // We should emit the RET followed by sleds. 1353 // 1354 // .p2align 1, ... 1355 // .Lxray_sled_N: 1356 // ret # or equivalent instruction 1357 // # 10 bytes worth of noops 1358 // 1359 // This just makes sure that the alignment for the next instruction is 2. 1360 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1361 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1362 OutStreamer->emitLabel(CurSled); 1363 unsigned OpCode = MI.getOperand(0).getImm(); 1364 MCInst Ret; 1365 Ret.setOpcode(OpCode); 1366 for (auto &MO : drop_begin(MI.operands())) 1367 if (auto Op = MCIL.LowerMachineOperand(&MI, MO); Op.isValid()) 1368 Ret.addOperand(Op); 1369 OutStreamer->emitInstruction(Ret, getSubtargetInfo()); 1370 emitX86Nops(*OutStreamer, 10, Subtarget); 1371 recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2); 1372 } 1373 1374 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, 1375 X86MCInstLower &MCIL) { 1376 MCInst TC; 1377 TC.setOpcode(convertTailJumpOpcode(MI.getOperand(0).getImm())); 1378 // Drop the tail jump opcode. 1379 auto TCOperands = drop_begin(MI.operands()); 1380 bool IsConditional = TC.getOpcode() == X86::JCC_1; 1381 MCSymbol *FallthroughLabel; 1382 if (IsConditional) { 1383 // Rewrite: 1384 // je target 1385 // 1386 // To: 1387 // jne .fallthrough 1388 // .p2align 1, ... 1389 // .Lxray_sled_N: 1390 // SLED_CODE 1391 // jmp target 1392 // .fallthrough: 1393 FallthroughLabel = OutContext.createTempSymbol(); 1394 EmitToStreamer( 1395 *OutStreamer, 1396 MCInstBuilder(X86::JCC_1) 1397 .addExpr(MCSymbolRefExpr::create(FallthroughLabel, OutContext)) 1398 .addImm(X86::GetOppositeBranchCondition( 1399 static_cast<X86::CondCode>(MI.getOperand(2).getImm())))); 1400 TC.setOpcode(X86::JMP_1); 1401 // Drop the condition code. 1402 TCOperands = drop_end(TCOperands); 1403 } 1404 1405 NoAutoPaddingScope NoPadScope(*OutStreamer); 1406 1407 // Like PATCHABLE_RET, we have the actual instruction in the operands to this 1408 // instruction so we lower that particular instruction and its operands. 1409 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how 1410 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to 1411 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual 1412 // tail call much like how we have it in PATCHABLE_RET. 1413 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1414 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1415 OutStreamer->emitLabel(CurSled); 1416 auto Target = OutContext.createTempSymbol(); 1417 1418 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1419 // an operand (computed as an offset from the jmp instruction). 1420 // FIXME: Find another less hacky way do force the relative jump. 1421 OutStreamer->emitBytes("\xeb\x09"); 1422 emitX86Nops(*OutStreamer, 9, Subtarget); 1423 OutStreamer->emitLabel(Target); 1424 recordSled(CurSled, MI, SledKind::TAIL_CALL, 2); 1425 1426 // Before emitting the instruction, add a comment to indicate that this is 1427 // indeed a tail call. 1428 OutStreamer->AddComment("TAILCALL"); 1429 for (auto &MO : TCOperands) 1430 if (auto Op = MCIL.LowerMachineOperand(&MI, MO); Op.isValid()) 1431 TC.addOperand(Op); 1432 OutStreamer->emitInstruction(TC, getSubtargetInfo()); 1433 1434 if (IsConditional) 1435 OutStreamer->emitLabel(FallthroughLabel); 1436 } 1437 1438 // Returns instruction preceding MBBI in MachineFunction. 1439 // If MBBI is the first instruction of the first basic block, returns null. 1440 static MachineBasicBlock::const_iterator 1441 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { 1442 const MachineBasicBlock *MBB = MBBI->getParent(); 1443 while (MBBI == MBB->begin()) { 1444 if (MBB == &MBB->getParent()->front()) 1445 return MachineBasicBlock::const_iterator(); 1446 MBB = MBB->getPrevNode(); 1447 MBBI = MBB->end(); 1448 } 1449 --MBBI; 1450 return MBBI; 1451 } 1452 1453 static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) { 1454 if (X86II::isKMasked(MI->getDesc().TSFlags)) { 1455 // Skip mask operand. 1456 ++SrcIdx; 1457 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { 1458 // Skip passthru operand. 1459 ++SrcIdx; 1460 } 1461 } 1462 return SrcIdx; 1463 } 1464 1465 static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI, 1466 unsigned SrcOpIdx) { 1467 const MachineOperand &DstOp = MI->getOperand(0); 1468 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()); 1469 1470 // Handle AVX512 MASK/MASXZ write mask comments. 1471 // MASK: zmmX {%kY} 1472 // MASKZ: zmmX {%kY} {z} 1473 if (X86II::isKMasked(MI->getDesc().TSFlags)) { 1474 const MachineOperand &WriteMaskOp = MI->getOperand(SrcOpIdx - 1); 1475 StringRef Mask = X86ATTInstPrinter::getRegisterName(WriteMaskOp.getReg()); 1476 CS << " {%" << Mask << "}"; 1477 if (!X86II::isKMergeMasked(MI->getDesc().TSFlags)) { 1478 CS << " {z}"; 1479 } 1480 } 1481 } 1482 1483 static void printShuffleMask(raw_ostream &CS, StringRef Src1Name, 1484 StringRef Src2Name, ArrayRef<int> Mask) { 1485 // One source operand, fix the mask to print all elements in one span. 1486 SmallVector<int, 8> ShuffleMask(Mask); 1487 if (Src1Name == Src2Name) 1488 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) 1489 if (ShuffleMask[i] >= e) 1490 ShuffleMask[i] -= e; 1491 1492 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { 1493 if (i != 0) 1494 CS << ","; 1495 if (ShuffleMask[i] == SM_SentinelZero) { 1496 CS << "zero"; 1497 continue; 1498 } 1499 1500 // Otherwise, it must come from src1 or src2. Print the span of elements 1501 // that comes from this src. 1502 bool isSrc1 = ShuffleMask[i] < (int)e; 1503 CS << (isSrc1 ? Src1Name : Src2Name) << '['; 1504 1505 bool IsFirst = true; 1506 while (i != e && ShuffleMask[i] != SM_SentinelZero && 1507 (ShuffleMask[i] < (int)e) == isSrc1) { 1508 if (!IsFirst) 1509 CS << ','; 1510 else 1511 IsFirst = false; 1512 if (ShuffleMask[i] == SM_SentinelUndef) 1513 CS << "u"; 1514 else 1515 CS << ShuffleMask[i] % (int)e; 1516 ++i; 1517 } 1518 CS << ']'; 1519 --i; // For loop increments element #. 1520 } 1521 } 1522 1523 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx, 1524 unsigned SrcOp2Idx, ArrayRef<int> Mask) { 1525 std::string Comment; 1526 1527 const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx); 1528 const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx); 1529 StringRef Src1Name = SrcOp1.isReg() 1530 ? X86ATTInstPrinter::getRegisterName(SrcOp1.getReg()) 1531 : "mem"; 1532 StringRef Src2Name = SrcOp2.isReg() 1533 ? X86ATTInstPrinter::getRegisterName(SrcOp2.getReg()) 1534 : "mem"; 1535 1536 raw_string_ostream CS(Comment); 1537 printDstRegisterName(CS, MI, SrcOp1Idx); 1538 CS << " = "; 1539 printShuffleMask(CS, Src1Name, Src2Name, Mask); 1540 CS.flush(); 1541 1542 return Comment; 1543 } 1544 1545 static void printConstant(const APInt &Val, raw_ostream &CS, 1546 bool PrintZero = false) { 1547 if (Val.getBitWidth() <= 64) { 1548 CS << (PrintZero ? 0ULL : Val.getZExtValue()); 1549 } else { 1550 // print multi-word constant as (w0,w1) 1551 CS << "("; 1552 for (int i = 0, N = Val.getNumWords(); i < N; ++i) { 1553 if (i > 0) 1554 CS << ","; 1555 CS << (PrintZero ? 0ULL : Val.getRawData()[i]); 1556 } 1557 CS << ")"; 1558 } 1559 } 1560 1561 static void printConstant(const APFloat &Flt, raw_ostream &CS, 1562 bool PrintZero = false) { 1563 SmallString<32> Str; 1564 // Force scientific notation to distinguish from integers. 1565 if (PrintZero) 1566 APFloat::getZero(Flt.getSemantics()).toString(Str, 0, 0); 1567 else 1568 Flt.toString(Str, 0, 0); 1569 CS << Str; 1570 } 1571 1572 static void printConstant(const Constant *COp, unsigned BitWidth, 1573 raw_ostream &CS, bool PrintZero = false) { 1574 if (isa<UndefValue>(COp)) { 1575 CS << "u"; 1576 } else if (auto *CI = dyn_cast<ConstantInt>(COp)) { 1577 printConstant(CI->getValue(), CS, PrintZero); 1578 } else if (auto *CF = dyn_cast<ConstantFP>(COp)) { 1579 printConstant(CF->getValueAPF(), CS, PrintZero); 1580 } else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) { 1581 Type *EltTy = CDS->getElementType(); 1582 bool IsInteger = EltTy->isIntegerTy(); 1583 bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); 1584 unsigned EltBits = EltTy->getPrimitiveSizeInBits(); 1585 unsigned E = std::min(BitWidth / EltBits, CDS->getNumElements()); 1586 assert((BitWidth % EltBits) == 0 && "Element size mismatch"); 1587 for (unsigned I = 0; I != E; ++I) { 1588 if (I != 0) 1589 CS << ","; 1590 if (IsInteger) 1591 printConstant(CDS->getElementAsAPInt(I), CS, PrintZero); 1592 else if (IsFP) 1593 printConstant(CDS->getElementAsAPFloat(I), CS, PrintZero); 1594 else 1595 CS << "?"; 1596 } 1597 } else if (auto *CV = dyn_cast<ConstantVector>(COp)) { 1598 unsigned EltBits = CV->getType()->getScalarSizeInBits(); 1599 unsigned E = std::min(BitWidth / EltBits, CV->getNumOperands()); 1600 assert((BitWidth % EltBits) == 0 && "Element size mismatch"); 1601 for (unsigned I = 0; I != E; ++I) { 1602 if (I != 0) 1603 CS << ","; 1604 printConstant(CV->getOperand(I), EltBits, CS, PrintZero); 1605 } 1606 } else { 1607 CS << "?"; 1608 } 1609 } 1610 1611 static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer, 1612 int SclWidth, int VecWidth, 1613 const char *ShuffleComment) { 1614 unsigned SrcIdx = getSrcIdx(MI, 1); 1615 1616 std::string Comment; 1617 raw_string_ostream CS(Comment); 1618 printDstRegisterName(CS, MI, SrcIdx); 1619 CS << " = "; 1620 1621 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx)) { 1622 CS << "["; 1623 printConstant(C, SclWidth, CS); 1624 for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) { 1625 CS << ","; 1626 printConstant(C, SclWidth, CS, true); 1627 } 1628 CS << "]"; 1629 OutStreamer.AddComment(CS.str()); 1630 return; // early-out 1631 } 1632 1633 // We didn't find a constant load, fallback to a shuffle mask decode. 1634 CS << ShuffleComment; 1635 OutStreamer.AddComment(CS.str()); 1636 } 1637 1638 static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer, 1639 int Repeats, int BitWidth) { 1640 unsigned SrcIdx = getSrcIdx(MI, 1); 1641 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx)) { 1642 std::string Comment; 1643 raw_string_ostream CS(Comment); 1644 printDstRegisterName(CS, MI, SrcIdx); 1645 CS << " = ["; 1646 for (int l = 0; l != Repeats; ++l) { 1647 if (l != 0) 1648 CS << ","; 1649 printConstant(C, BitWidth, CS); 1650 } 1651 CS << "]"; 1652 OutStreamer.AddComment(CS.str()); 1653 } 1654 } 1655 1656 static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer, 1657 int SrcEltBits, int DstEltBits, bool IsSext) { 1658 unsigned SrcIdx = getSrcIdx(MI, 1); 1659 auto *C = X86::getConstantFromPool(*MI, SrcIdx); 1660 if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) { 1661 if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) { 1662 int NumElts = CDS->getNumElements(); 1663 std::string Comment; 1664 raw_string_ostream CS(Comment); 1665 printDstRegisterName(CS, MI, SrcIdx); 1666 CS << " = ["; 1667 for (int i = 0; i != NumElts; ++i) { 1668 if (i != 0) 1669 CS << ","; 1670 if (CDS->getElementType()->isIntegerTy()) { 1671 APInt Elt = CDS->getElementAsAPInt(i); 1672 Elt = IsSext ? Elt.sext(DstEltBits) : Elt.zext(DstEltBits); 1673 printConstant(Elt, CS); 1674 } else 1675 CS << "?"; 1676 } 1677 CS << "]"; 1678 OutStreamer.AddComment(CS.str()); 1679 return true; 1680 } 1681 } 1682 1683 return false; 1684 } 1685 static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer, 1686 int SrcEltBits, int DstEltBits) { 1687 printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, true); 1688 } 1689 static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer, 1690 int SrcEltBits, int DstEltBits) { 1691 if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, false)) 1692 return; 1693 1694 // We didn't find a constant load, fallback to a shuffle mask decode. 1695 std::string Comment; 1696 raw_string_ostream CS(Comment); 1697 printDstRegisterName(CS, MI, getSrcIdx(MI, 1)); 1698 CS << " = "; 1699 1700 SmallVector<int> Mask; 1701 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1702 assert((Width % DstEltBits) == 0 && (DstEltBits % SrcEltBits) == 0 && 1703 "Illegal extension ratio"); 1704 DecodeZeroExtendMask(SrcEltBits, DstEltBits, Width / DstEltBits, false, Mask); 1705 printShuffleMask(CS, "mem", "", Mask); 1706 1707 OutStreamer.AddComment(CS.str()); 1708 } 1709 1710 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { 1711 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); 1712 assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) && 1713 "SEH_ instruction Windows and UEFI only"); 1714 1715 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. 1716 if (EmitFPOData) { 1717 X86TargetStreamer *XTS = 1718 static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()); 1719 switch (MI->getOpcode()) { 1720 case X86::SEH_PushReg: 1721 XTS->emitFPOPushReg(MI->getOperand(0).getImm()); 1722 break; 1723 case X86::SEH_StackAlloc: 1724 XTS->emitFPOStackAlloc(MI->getOperand(0).getImm()); 1725 break; 1726 case X86::SEH_StackAlign: 1727 XTS->emitFPOStackAlign(MI->getOperand(0).getImm()); 1728 break; 1729 case X86::SEH_SetFrame: 1730 assert(MI->getOperand(1).getImm() == 0 && 1731 ".cv_fpo_setframe takes no offset"); 1732 XTS->emitFPOSetFrame(MI->getOperand(0).getImm()); 1733 break; 1734 case X86::SEH_EndPrologue: 1735 XTS->emitFPOEndPrologue(); 1736 break; 1737 case X86::SEH_SaveReg: 1738 case X86::SEH_SaveXMM: 1739 case X86::SEH_PushFrame: 1740 llvm_unreachable("SEH_ directive incompatible with FPO"); 1741 break; 1742 default: 1743 llvm_unreachable("expected SEH_ instruction"); 1744 } 1745 return; 1746 } 1747 1748 // Otherwise, use the .seh_ directives for all other Windows platforms. 1749 switch (MI->getOpcode()) { 1750 case X86::SEH_PushReg: 1751 OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm()); 1752 break; 1753 1754 case X86::SEH_SaveReg: 1755 OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(), 1756 MI->getOperand(1).getImm()); 1757 break; 1758 1759 case X86::SEH_SaveXMM: 1760 OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(), 1761 MI->getOperand(1).getImm()); 1762 break; 1763 1764 case X86::SEH_StackAlloc: 1765 OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm()); 1766 break; 1767 1768 case X86::SEH_SetFrame: 1769 OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(), 1770 MI->getOperand(1).getImm()); 1771 break; 1772 1773 case X86::SEH_PushFrame: 1774 OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm()); 1775 break; 1776 1777 case X86::SEH_EndPrologue: 1778 OutStreamer->emitWinCFIEndProlog(); 1779 break; 1780 1781 default: 1782 llvm_unreachable("expected SEH_ instruction"); 1783 } 1784 } 1785 1786 static void addConstantComments(const MachineInstr *MI, 1787 MCStreamer &OutStreamer) { 1788 switch (MI->getOpcode()) { 1789 // Lower PSHUFB and VPERMILP normally but add a comment if we can find 1790 // a constant shuffle mask. We won't be able to do this at the MC layer 1791 // because the mask isn't an immediate. 1792 case X86::PSHUFBrm: 1793 case X86::VPSHUFBrm: 1794 case X86::VPSHUFBYrm: 1795 case X86::VPSHUFBZ128rm: 1796 case X86::VPSHUFBZ128rmk: 1797 case X86::VPSHUFBZ128rmkz: 1798 case X86::VPSHUFBZ256rm: 1799 case X86::VPSHUFBZ256rmk: 1800 case X86::VPSHUFBZ256rmkz: 1801 case X86::VPSHUFBZrm: 1802 case X86::VPSHUFBZrmk: 1803 case X86::VPSHUFBZrmkz: { 1804 unsigned SrcIdx = getSrcIdx(MI, 1); 1805 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1806 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1807 SmallVector<int, 64> Mask; 1808 DecodePSHUFBMask(C, Width, Mask); 1809 if (!Mask.empty()) 1810 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1811 } 1812 break; 1813 } 1814 1815 case X86::VPERMILPSrm: 1816 case X86::VPERMILPSYrm: 1817 case X86::VPERMILPSZ128rm: 1818 case X86::VPERMILPSZ128rmk: 1819 case X86::VPERMILPSZ128rmkz: 1820 case X86::VPERMILPSZ256rm: 1821 case X86::VPERMILPSZ256rmk: 1822 case X86::VPERMILPSZ256rmkz: 1823 case X86::VPERMILPSZrm: 1824 case X86::VPERMILPSZrmk: 1825 case X86::VPERMILPSZrmkz: { 1826 unsigned SrcIdx = getSrcIdx(MI, 1); 1827 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1828 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1829 SmallVector<int, 16> Mask; 1830 DecodeVPERMILPMask(C, 32, Width, Mask); 1831 if (!Mask.empty()) 1832 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1833 } 1834 break; 1835 } 1836 case X86::VPERMILPDrm: 1837 case X86::VPERMILPDYrm: 1838 case X86::VPERMILPDZ128rm: 1839 case X86::VPERMILPDZ128rmk: 1840 case X86::VPERMILPDZ128rmkz: 1841 case X86::VPERMILPDZ256rm: 1842 case X86::VPERMILPDZ256rmk: 1843 case X86::VPERMILPDZ256rmkz: 1844 case X86::VPERMILPDZrm: 1845 case X86::VPERMILPDZrmk: 1846 case X86::VPERMILPDZrmkz: { 1847 unsigned SrcIdx = getSrcIdx(MI, 1); 1848 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1849 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1850 SmallVector<int, 16> Mask; 1851 DecodeVPERMILPMask(C, 64, Width, Mask); 1852 if (!Mask.empty()) 1853 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1854 } 1855 break; 1856 } 1857 1858 case X86::VPERMIL2PDrm: 1859 case X86::VPERMIL2PSrm: 1860 case X86::VPERMIL2PDYrm: 1861 case X86::VPERMIL2PSYrm: { 1862 assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && 1863 "Unexpected number of operands!"); 1864 1865 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); 1866 if (!CtrlOp.isImm()) 1867 break; 1868 1869 unsigned ElSize; 1870 switch (MI->getOpcode()) { 1871 default: llvm_unreachable("Invalid opcode"); 1872 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break; 1873 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; 1874 } 1875 1876 if (auto *C = X86::getConstantFromPool(*MI, 3)) { 1877 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1878 SmallVector<int, 16> Mask; 1879 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask); 1880 if (!Mask.empty()) 1881 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); 1882 } 1883 break; 1884 } 1885 1886 case X86::VPPERMrrm: { 1887 if (auto *C = X86::getConstantFromPool(*MI, 3)) { 1888 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1889 SmallVector<int, 16> Mask; 1890 DecodeVPPERMMask(C, Width, Mask); 1891 if (!Mask.empty()) 1892 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); 1893 } 1894 break; 1895 } 1896 1897 case X86::MMX_MOVQ64rm: { 1898 if (auto *C = X86::getConstantFromPool(*MI, 1)) { 1899 std::string Comment; 1900 raw_string_ostream CS(Comment); 1901 const MachineOperand &DstOp = MI->getOperand(0); 1902 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1903 if (auto *CF = dyn_cast<ConstantFP>(C)) { 1904 CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false); 1905 OutStreamer.AddComment(CS.str()); 1906 } 1907 } 1908 break; 1909 } 1910 1911 #define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \ 1912 case X86::Prefix##Instr##Suffix##rm##Postfix: 1913 1914 #define CASE_ARITH_RM(Instr) \ 1915 INSTR_CASE(, Instr, , ) /* SSE */ \ 1916 INSTR_CASE(V, Instr, , ) /* AVX-128 */ \ 1917 INSTR_CASE(V, Instr, Y, ) /* AVX-256 */ \ 1918 INSTR_CASE(V, Instr, Z128, ) \ 1919 INSTR_CASE(V, Instr, Z128, k) \ 1920 INSTR_CASE(V, Instr, Z128, kz) \ 1921 INSTR_CASE(V, Instr, Z256, ) \ 1922 INSTR_CASE(V, Instr, Z256, k) \ 1923 INSTR_CASE(V, Instr, Z256, kz) \ 1924 INSTR_CASE(V, Instr, Z, ) \ 1925 INSTR_CASE(V, Instr, Z, k) \ 1926 INSTR_CASE(V, Instr, Z, kz) 1927 1928 // TODO: Add additional instructions when useful. 1929 CASE_ARITH_RM(PMADDUBSW) { 1930 unsigned SrcIdx = getSrcIdx(MI, 1); 1931 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1932 if (C->getType()->getScalarSizeInBits() == 8) { 1933 std::string Comment; 1934 raw_string_ostream CS(Comment); 1935 unsigned VectorWidth = 1936 X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1937 CS << "["; 1938 printConstant(C, VectorWidth, CS); 1939 CS << "]"; 1940 OutStreamer.AddComment(CS.str()); 1941 } 1942 } 1943 break; 1944 } 1945 1946 CASE_ARITH_RM(PMADDWD) 1947 CASE_ARITH_RM(PMULLW) 1948 CASE_ARITH_RM(PMULHW) 1949 CASE_ARITH_RM(PMULHUW) 1950 CASE_ARITH_RM(PMULHRSW) { 1951 unsigned SrcIdx = getSrcIdx(MI, 1); 1952 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1953 if (C->getType()->getScalarSizeInBits() == 16) { 1954 std::string Comment; 1955 raw_string_ostream CS(Comment); 1956 unsigned VectorWidth = 1957 X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1958 CS << "["; 1959 printConstant(C, VectorWidth, CS); 1960 CS << "]"; 1961 OutStreamer.AddComment(CS.str()); 1962 } 1963 } 1964 break; 1965 } 1966 1967 #define MASK_AVX512_CASE(Instr) \ 1968 case Instr: \ 1969 case Instr##k: \ 1970 case Instr##kz: 1971 1972 case X86::MOVSDrm: 1973 case X86::VMOVSDrm: 1974 MASK_AVX512_CASE(X86::VMOVSDZrm) 1975 case X86::MOVSDrm_alt: 1976 case X86::VMOVSDrm_alt: 1977 case X86::VMOVSDZrm_alt: 1978 case X86::MOVQI2PQIrm: 1979 case X86::VMOVQI2PQIrm: 1980 case X86::VMOVQI2PQIZrm: 1981 printZeroUpperMove(MI, OutStreamer, 64, 128, "mem[0],zero"); 1982 break; 1983 1984 MASK_AVX512_CASE(X86::VMOVSHZrm) 1985 case X86::VMOVSHZrm_alt: 1986 printZeroUpperMove(MI, OutStreamer, 16, 128, 1987 "mem[0],zero,zero,zero,zero,zero,zero,zero"); 1988 break; 1989 1990 case X86::MOVSSrm: 1991 case X86::VMOVSSrm: 1992 MASK_AVX512_CASE(X86::VMOVSSZrm) 1993 case X86::MOVSSrm_alt: 1994 case X86::VMOVSSrm_alt: 1995 case X86::VMOVSSZrm_alt: 1996 case X86::MOVDI2PDIrm: 1997 case X86::VMOVDI2PDIrm: 1998 case X86::VMOVDI2PDIZrm: 1999 printZeroUpperMove(MI, OutStreamer, 32, 128, "mem[0],zero,zero,zero"); 2000 break; 2001 2002 #define MOV_CASE(Prefix, Suffix) \ 2003 case X86::Prefix##MOVAPD##Suffix##rm: \ 2004 case X86::Prefix##MOVAPS##Suffix##rm: \ 2005 case X86::Prefix##MOVUPD##Suffix##rm: \ 2006 case X86::Prefix##MOVUPS##Suffix##rm: \ 2007 case X86::Prefix##MOVDQA##Suffix##rm: \ 2008 case X86::Prefix##MOVDQU##Suffix##rm: 2009 2010 #define MOV_AVX512_CASE(Suffix, Postfix) \ 2011 case X86::VMOVDQA64##Suffix##rm##Postfix: \ 2012 case X86::VMOVDQA32##Suffix##rm##Postfix: \ 2013 case X86::VMOVDQU64##Suffix##rm##Postfix: \ 2014 case X86::VMOVDQU32##Suffix##rm##Postfix: \ 2015 case X86::VMOVDQU16##Suffix##rm##Postfix: \ 2016 case X86::VMOVDQU8##Suffix##rm##Postfix: \ 2017 case X86::VMOVAPS##Suffix##rm##Postfix: \ 2018 case X86::VMOVAPD##Suffix##rm##Postfix: \ 2019 case X86::VMOVUPS##Suffix##rm##Postfix: \ 2020 case X86::VMOVUPD##Suffix##rm##Postfix: 2021 2022 #define CASE_128_MOV_RM() \ 2023 MOV_CASE(, ) /* SSE */ \ 2024 MOV_CASE(V, ) /* AVX-128 */ \ 2025 MOV_AVX512_CASE(Z128, ) \ 2026 MOV_AVX512_CASE(Z128, k) \ 2027 MOV_AVX512_CASE(Z128, kz) 2028 2029 #define CASE_256_MOV_RM() \ 2030 MOV_CASE(V, Y) /* AVX-256 */ \ 2031 MOV_AVX512_CASE(Z256, ) \ 2032 MOV_AVX512_CASE(Z256, k) \ 2033 MOV_AVX512_CASE(Z256, kz) \ 2034 2035 #define CASE_512_MOV_RM() \ 2036 MOV_AVX512_CASE(Z, ) \ 2037 MOV_AVX512_CASE(Z, k) \ 2038 MOV_AVX512_CASE(Z, kz) \ 2039 2040 // For loads from a constant pool to a vector register, print the constant 2041 // loaded. 2042 CASE_128_MOV_RM() 2043 printBroadcast(MI, OutStreamer, 1, 128); 2044 break; 2045 CASE_256_MOV_RM() 2046 printBroadcast(MI, OutStreamer, 1, 256); 2047 break; 2048 CASE_512_MOV_RM() 2049 printBroadcast(MI, OutStreamer, 1, 512); 2050 break; 2051 case X86::VBROADCASTF128rm: 2052 case X86::VBROADCASTI128rm: 2053 MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm) 2054 MASK_AVX512_CASE(X86::VBROADCASTF64X2Z128rm) 2055 MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm) 2056 MASK_AVX512_CASE(X86::VBROADCASTI64X2Z128rm) 2057 printBroadcast(MI, OutStreamer, 2, 128); 2058 break; 2059 MASK_AVX512_CASE(X86::VBROADCASTF32X4rm) 2060 MASK_AVX512_CASE(X86::VBROADCASTF64X2rm) 2061 MASK_AVX512_CASE(X86::VBROADCASTI32X4rm) 2062 MASK_AVX512_CASE(X86::VBROADCASTI64X2rm) 2063 printBroadcast(MI, OutStreamer, 4, 128); 2064 break; 2065 MASK_AVX512_CASE(X86::VBROADCASTF32X8rm) 2066 MASK_AVX512_CASE(X86::VBROADCASTF64X4rm) 2067 MASK_AVX512_CASE(X86::VBROADCASTI32X8rm) 2068 MASK_AVX512_CASE(X86::VBROADCASTI64X4rm) 2069 printBroadcast(MI, OutStreamer, 2, 256); 2070 break; 2071 2072 // For broadcast loads from a constant pool to a vector register, repeatedly 2073 // print the constant loaded. 2074 case X86::MOVDDUPrm: 2075 case X86::VMOVDDUPrm: 2076 MASK_AVX512_CASE(X86::VMOVDDUPZ128rm) 2077 case X86::VPBROADCASTQrm: 2078 MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm) 2079 printBroadcast(MI, OutStreamer, 2, 64); 2080 break; 2081 case X86::VBROADCASTSDYrm: 2082 MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm) 2083 case X86::VPBROADCASTQYrm: 2084 MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm) 2085 printBroadcast(MI, OutStreamer, 4, 64); 2086 break; 2087 MASK_AVX512_CASE(X86::VBROADCASTSDZrm) 2088 MASK_AVX512_CASE(X86::VPBROADCASTQZrm) 2089 printBroadcast(MI, OutStreamer, 8, 64); 2090 break; 2091 case X86::VBROADCASTSSrm: 2092 MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm) 2093 case X86::VPBROADCASTDrm: 2094 MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm) 2095 printBroadcast(MI, OutStreamer, 4, 32); 2096 break; 2097 case X86::VBROADCASTSSYrm: 2098 MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm) 2099 case X86::VPBROADCASTDYrm: 2100 MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm) 2101 printBroadcast(MI, OutStreamer, 8, 32); 2102 break; 2103 MASK_AVX512_CASE(X86::VBROADCASTSSZrm) 2104 MASK_AVX512_CASE(X86::VPBROADCASTDZrm) 2105 printBroadcast(MI, OutStreamer, 16, 32); 2106 break; 2107 case X86::VPBROADCASTWrm: 2108 MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm) 2109 printBroadcast(MI, OutStreamer, 8, 16); 2110 break; 2111 case X86::VPBROADCASTWYrm: 2112 MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm) 2113 printBroadcast(MI, OutStreamer, 16, 16); 2114 break; 2115 MASK_AVX512_CASE(X86::VPBROADCASTWZrm) 2116 printBroadcast(MI, OutStreamer, 32, 16); 2117 break; 2118 case X86::VPBROADCASTBrm: 2119 MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm) 2120 printBroadcast(MI, OutStreamer, 16, 8); 2121 break; 2122 case X86::VPBROADCASTBYrm: 2123 MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm) 2124 printBroadcast(MI, OutStreamer, 32, 8); 2125 break; 2126 MASK_AVX512_CASE(X86::VPBROADCASTBZrm) 2127 printBroadcast(MI, OutStreamer, 64, 8); 2128 break; 2129 2130 #define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \ 2131 case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix: 2132 2133 #define CASE_MOVX_RM(Ext, Type) \ 2134 MOVX_CASE(, Ext, Type, , ) \ 2135 MOVX_CASE(V, Ext, Type, , ) \ 2136 MOVX_CASE(V, Ext, Type, Y, ) \ 2137 MOVX_CASE(V, Ext, Type, Z128, ) \ 2138 MOVX_CASE(V, Ext, Type, Z128, k ) \ 2139 MOVX_CASE(V, Ext, Type, Z128, kz ) \ 2140 MOVX_CASE(V, Ext, Type, Z256, ) \ 2141 MOVX_CASE(V, Ext, Type, Z256, k ) \ 2142 MOVX_CASE(V, Ext, Type, Z256, kz ) \ 2143 MOVX_CASE(V, Ext, Type, Z, ) \ 2144 MOVX_CASE(V, Ext, Type, Z, k ) \ 2145 MOVX_CASE(V, Ext, Type, Z, kz ) 2146 2147 CASE_MOVX_RM(SX, BD) 2148 printSignExtend(MI, OutStreamer, 8, 32); 2149 break; 2150 CASE_MOVX_RM(SX, BQ) 2151 printSignExtend(MI, OutStreamer, 8, 64); 2152 break; 2153 CASE_MOVX_RM(SX, BW) 2154 printSignExtend(MI, OutStreamer, 8, 16); 2155 break; 2156 CASE_MOVX_RM(SX, DQ) 2157 printSignExtend(MI, OutStreamer, 32, 64); 2158 break; 2159 CASE_MOVX_RM(SX, WD) 2160 printSignExtend(MI, OutStreamer, 16, 32); 2161 break; 2162 CASE_MOVX_RM(SX, WQ) 2163 printSignExtend(MI, OutStreamer, 16, 64); 2164 break; 2165 2166 CASE_MOVX_RM(ZX, BD) 2167 printZeroExtend(MI, OutStreamer, 8, 32); 2168 break; 2169 CASE_MOVX_RM(ZX, BQ) 2170 printZeroExtend(MI, OutStreamer, 8, 64); 2171 break; 2172 CASE_MOVX_RM(ZX, BW) 2173 printZeroExtend(MI, OutStreamer, 8, 16); 2174 break; 2175 CASE_MOVX_RM(ZX, DQ) 2176 printZeroExtend(MI, OutStreamer, 32, 64); 2177 break; 2178 CASE_MOVX_RM(ZX, WD) 2179 printZeroExtend(MI, OutStreamer, 16, 32); 2180 break; 2181 CASE_MOVX_RM(ZX, WQ) 2182 printZeroExtend(MI, OutStreamer, 16, 64); 2183 break; 2184 } 2185 } 2186 2187 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { 2188 // FIXME: Enable feature predicate checks once all the test pass. 2189 // X86_MC::verifyInstructionPredicates(MI->getOpcode(), 2190 // Subtarget->getFeatureBits()); 2191 2192 X86MCInstLower MCInstLowering(*MF, *this); 2193 const X86RegisterInfo *RI = 2194 MF->getSubtarget<X86Subtarget>().getRegisterInfo(); 2195 2196 if (MI->getOpcode() == X86::OR64rm) { 2197 for (auto &Opd : MI->operands()) { 2198 if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) == 2199 "swift_async_extendedFramePointerFlags") { 2200 ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true; 2201 } 2202 } 2203 } 2204 2205 // Add comments for values loaded from constant pool. 2206 if (OutStreamer->isVerboseAsm()) 2207 addConstantComments(MI, *OutStreamer); 2208 2209 // Add a comment about EVEX compression 2210 if (TM.Options.MCOptions.ShowMCEncoding) { 2211 if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) 2212 OutStreamer->AddComment("EVEX TO LEGACY Compression ", false); 2213 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) 2214 OutStreamer->AddComment("EVEX TO VEX Compression ", false); 2215 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX) 2216 OutStreamer->AddComment("EVEX TO EVEX Compression ", false); 2217 } 2218 2219 switch (MI->getOpcode()) { 2220 case TargetOpcode::DBG_VALUE: 2221 llvm_unreachable("Should be handled target independently"); 2222 2223 case X86::EH_RETURN: 2224 case X86::EH_RETURN64: { 2225 // Lower these as normal, but add some comments. 2226 Register Reg = MI->getOperand(0).getReg(); 2227 OutStreamer->AddComment(StringRef("eh_return, addr: %") + 2228 X86ATTInstPrinter::getRegisterName(Reg)); 2229 break; 2230 } 2231 case X86::CLEANUPRET: { 2232 // Lower these as normal, but add some comments. 2233 OutStreamer->AddComment("CLEANUPRET"); 2234 break; 2235 } 2236 2237 case X86::CATCHRET: { 2238 // Lower these as normal, but add some comments. 2239 OutStreamer->AddComment("CATCHRET"); 2240 break; 2241 } 2242 2243 case X86::ENDBR32: 2244 case X86::ENDBR64: { 2245 // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for 2246 // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be 2247 // non-empty. If MI is the initial ENDBR, place the 2248 // __patchable_function_entries label after ENDBR. 2249 if (CurrentPatchableFunctionEntrySym && 2250 CurrentPatchableFunctionEntrySym == CurrentFnBegin && 2251 MI == &MF->front().front()) { 2252 MCInst Inst; 2253 MCInstLowering.Lower(MI, Inst); 2254 EmitAndCountInstruction(Inst); 2255 CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); 2256 OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym); 2257 return; 2258 } 2259 break; 2260 } 2261 2262 case X86::TAILJMPd64: 2263 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) 2264 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); 2265 [[fallthrough]]; 2266 case X86::TAILJMPr: 2267 case X86::TAILJMPm: 2268 case X86::TAILJMPd: 2269 case X86::TAILJMPd_CC: 2270 case X86::TAILJMPr64: 2271 case X86::TAILJMPm64: 2272 case X86::TAILJMPd64_CC: 2273 case X86::TAILJMPr64_REX: 2274 case X86::TAILJMPm64_REX: 2275 // Lower these as normal, but add some comments. 2276 OutStreamer->AddComment("TAILCALL"); 2277 break; 2278 2279 case X86::TLS_addr32: 2280 case X86::TLS_addr64: 2281 case X86::TLS_addrX32: 2282 case X86::TLS_base_addr32: 2283 case X86::TLS_base_addr64: 2284 case X86::TLS_base_addrX32: 2285 case X86::TLS_desc32: 2286 case X86::TLS_desc64: 2287 return LowerTlsAddr(MCInstLowering, *MI); 2288 2289 case X86::MOVPC32r: { 2290 // This is a pseudo op for a two instruction sequence with a label, which 2291 // looks like: 2292 // call "L1$pb" 2293 // "L1$pb": 2294 // popl %esi 2295 2296 // Emit the call. 2297 MCSymbol *PICBase = MF->getPICBaseSymbol(); 2298 // FIXME: We would like an efficient form for this, so we don't have to do a 2299 // lot of extra uniquing. 2300 EmitAndCountInstruction( 2301 MCInstBuilder(X86::CALLpcrel32) 2302 .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); 2303 2304 const X86FrameLowering *FrameLowering = 2305 MF->getSubtarget<X86Subtarget>().getFrameLowering(); 2306 bool hasFP = FrameLowering->hasFP(*MF); 2307 2308 // TODO: This is needed only if we require precise CFA. 2309 bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && 2310 !OutStreamer->getDwarfFrameInfos().back().End; 2311 2312 int stackGrowth = -RI->getSlotSize(); 2313 2314 if (HasActiveDwarfFrame && !hasFP) { 2315 OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth); 2316 MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); 2317 } 2318 2319 // Emit the label. 2320 OutStreamer->emitLabel(PICBase); 2321 2322 // popl $reg 2323 EmitAndCountInstruction( 2324 MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); 2325 2326 if (HasActiveDwarfFrame && !hasFP) { 2327 OutStreamer->emitCFIAdjustCfaOffset(stackGrowth); 2328 } 2329 return; 2330 } 2331 2332 case X86::ADD32ri: { 2333 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. 2334 if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) 2335 break; 2336 2337 // Okay, we have something like: 2338 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) 2339 2340 // For this, we want to print something like: 2341 // MYGLOBAL + (. - PICBASE) 2342 // However, we can't generate a ".", so just emit a new label here and refer 2343 // to it. 2344 MCSymbol *DotSym = OutContext.createTempSymbol(); 2345 OutStreamer->emitLabel(DotSym); 2346 2347 // Now that we have emitted the label, lower the complex operand expression. 2348 MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); 2349 2350 const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); 2351 const MCExpr *PICBase = 2352 MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); 2353 DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); 2354 2355 DotExpr = MCBinaryExpr::createAdd( 2356 MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext); 2357 2358 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) 2359 .addReg(MI->getOperand(0).getReg()) 2360 .addReg(MI->getOperand(1).getReg()) 2361 .addExpr(DotExpr)); 2362 return; 2363 } 2364 case TargetOpcode::STATEPOINT: 2365 return LowerSTATEPOINT(*MI, MCInstLowering); 2366 2367 case TargetOpcode::FAULTING_OP: 2368 return LowerFAULTING_OP(*MI, MCInstLowering); 2369 2370 case TargetOpcode::FENTRY_CALL: 2371 return LowerFENTRY_CALL(*MI, MCInstLowering); 2372 2373 case TargetOpcode::PATCHABLE_OP: 2374 return LowerPATCHABLE_OP(*MI, MCInstLowering); 2375 2376 case TargetOpcode::STACKMAP: 2377 return LowerSTACKMAP(*MI); 2378 2379 case TargetOpcode::PATCHPOINT: 2380 return LowerPATCHPOINT(*MI, MCInstLowering); 2381 2382 case TargetOpcode::PATCHABLE_FUNCTION_ENTER: 2383 return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); 2384 2385 case TargetOpcode::PATCHABLE_RET: 2386 return LowerPATCHABLE_RET(*MI, MCInstLowering); 2387 2388 case TargetOpcode::PATCHABLE_TAIL_CALL: 2389 return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); 2390 2391 case TargetOpcode::PATCHABLE_EVENT_CALL: 2392 return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); 2393 2394 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: 2395 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); 2396 2397 case X86::MORESTACK_RET: 2398 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 2399 return; 2400 2401 case X86::KCFI_CHECK: 2402 return LowerKCFI_CHECK(*MI); 2403 2404 case X86::ASAN_CHECK_MEMACCESS: 2405 return LowerASAN_CHECK_MEMACCESS(*MI); 2406 2407 case X86::MORESTACK_RET_RESTORE_R10: 2408 // Return, then restore R10. 2409 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 2410 EmitAndCountInstruction( 2411 MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); 2412 return; 2413 2414 case X86::SEH_PushReg: 2415 case X86::SEH_SaveReg: 2416 case X86::SEH_SaveXMM: 2417 case X86::SEH_StackAlloc: 2418 case X86::SEH_StackAlign: 2419 case X86::SEH_SetFrame: 2420 case X86::SEH_PushFrame: 2421 case X86::SEH_EndPrologue: 2422 EmitSEHInstruction(MI); 2423 return; 2424 2425 case X86::SEH_Epilogue: { 2426 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); 2427 MachineBasicBlock::const_iterator MBBI(MI); 2428 // Check if preceded by a call and emit nop if so. 2429 for (MBBI = PrevCrossBBInst(MBBI); 2430 MBBI != MachineBasicBlock::const_iterator(); 2431 MBBI = PrevCrossBBInst(MBBI)) { 2432 // Pseudo instructions that aren't a call are assumed to not emit any 2433 // code. If they do, we worst case generate unnecessary noops after a 2434 // call. 2435 if (MBBI->isCall() || !MBBI->isPseudo()) { 2436 if (MBBI->isCall()) 2437 EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); 2438 break; 2439 } 2440 } 2441 return; 2442 } 2443 case X86::UBSAN_UD1: 2444 EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm) 2445 .addReg(X86::EAX) 2446 .addReg(X86::EAX) 2447 .addImm(1) 2448 .addReg(X86::NoRegister) 2449 .addImm(MI->getOperand(0).getImm()) 2450 .addReg(X86::NoRegister)); 2451 return; 2452 case X86::CALL64pcrel32: 2453 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) 2454 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); 2455 break; 2456 case X86::JCC_1: 2457 // Two instruction prefixes (2EH for branch not-taken and 3EH for branch 2458 // taken) are used as branch hints. Here we add branch taken prefix for 2459 // jump instruction with higher probability than threshold. 2460 if (getSubtarget().hasBranchHint() && EnableBranchHint) { 2461 const MachineBranchProbabilityInfo *MBPI = 2462 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); 2463 MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); 2464 BranchProbability EdgeProb = 2465 MBPI->getEdgeProbability(MI->getParent(), DestBB); 2466 BranchProbability Threshold(BranchHintProbabilityThreshold, 100); 2467 if (EdgeProb > Threshold) 2468 EmitAndCountInstruction(MCInstBuilder(X86::DS_PREFIX)); 2469 } 2470 break; 2471 } 2472 2473 MCInst TmpInst; 2474 MCInstLowering.Lower(MI, TmpInst); 2475 2476 // Stackmap shadows cannot include branch targets, so we can count the bytes 2477 // in a call towards the shadow, but must ensure that the no thread returns 2478 // in to the stackmap shadow. The only way to achieve this is if the call 2479 // is at the end of the shadow. 2480 if (MI->isCall()) { 2481 // Count then size of the call towards the shadow 2482 SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get()); 2483 // Then flush the shadow so that we fill with nops before the call, not 2484 // after it. 2485 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 2486 // Then emit the call 2487 OutStreamer->emitInstruction(TmpInst, getSubtargetInfo()); 2488 return; 2489 } 2490 2491 EmitAndCountInstruction(TmpInst); 2492 } 2493