1 //===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling, if-conversion, other late 11 // optimizations, or simply the encoding of the instructions. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86FrameLowering.h" 17 #include "X86InstrBuilder.h" 18 #include "X86InstrInfo.h" 19 #include "X86MachineFunctionInfo.h" 20 #include "X86Subtarget.h" 21 #include "llvm/Analysis/EHPersonalities.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved. 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/Target/TargetMachine.h" 27 using namespace llvm; 28 29 #define DEBUG_TYPE "x86-pseudo" 30 #define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass" 31 32 namespace { 33 class X86ExpandPseudo : public MachineFunctionPass { 34 public: 35 static char ID; 36 X86ExpandPseudo() : MachineFunctionPass(ID) {} 37 38 void getAnalysisUsage(AnalysisUsage &AU) const override { 39 AU.setPreservesCFG(); 40 AU.addPreservedID(MachineLoopInfoID); 41 AU.addPreservedID(MachineDominatorsID); 42 MachineFunctionPass::getAnalysisUsage(AU); 43 } 44 45 const X86Subtarget *STI = nullptr; 46 const X86InstrInfo *TII = nullptr; 47 const X86RegisterInfo *TRI = nullptr; 48 const X86MachineFunctionInfo *X86FI = nullptr; 49 const X86FrameLowering *X86FL = nullptr; 50 51 bool runOnMachineFunction(MachineFunction &Fn) override; 52 53 MachineFunctionProperties getRequiredProperties() const override { 54 return MachineFunctionProperties().set( 55 MachineFunctionProperties::Property::NoVRegs); 56 } 57 58 StringRef getPassName() const override { 59 return "X86 pseudo instruction expansion pass"; 60 } 61 62 private: 63 void ExpandICallBranchFunnel(MachineBasicBlock *MBB, 64 MachineBasicBlock::iterator MBBI); 65 void expandCALL_RVMARKER(MachineBasicBlock &MBB, 66 MachineBasicBlock::iterator MBBI); 67 bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 68 bool ExpandMBB(MachineBasicBlock &MBB); 69 70 /// This function expands pseudos which affects control flow. 71 /// It is done in separate pass to simplify blocks navigation in main 72 /// pass(calling ExpandMBB). 73 bool ExpandPseudosWhichAffectControlFlow(MachineFunction &MF); 74 75 /// Expand X86::VASTART_SAVE_XMM_REGS into set of xmm copying instructions, 76 /// placed into separate block guarded by check for al register(for SystemV 77 /// abi). 78 void ExpandVastartSaveXmmRegs( 79 MachineBasicBlock *MBB, 80 MachineBasicBlock::iterator VAStartPseudoInstr) const; 81 }; 82 char X86ExpandPseudo::ID = 0; 83 84 } // End anonymous namespace. 85 86 INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false, 87 false) 88 89 void X86ExpandPseudo::ExpandICallBranchFunnel( 90 MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) { 91 MachineBasicBlock *JTMBB = MBB; 92 MachineInstr *JTInst = &*MBBI; 93 MachineFunction *MF = MBB->getParent(); 94 const BasicBlock *BB = MBB->getBasicBlock(); 95 auto InsPt = MachineFunction::iterator(MBB); 96 ++InsPt; 97 98 std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs; 99 const DebugLoc &DL = JTInst->getDebugLoc(); 100 MachineOperand Selector = JTInst->getOperand(0); 101 const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal(); 102 103 auto CmpTarget = [&](unsigned Target) { 104 if (Selector.isReg()) 105 MBB->addLiveIn(Selector.getReg()); 106 BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11) 107 .addReg(X86::RIP) 108 .addImm(1) 109 .addReg(0) 110 .addGlobalAddress(CombinedGlobal, 111 JTInst->getOperand(2 + 2 * Target).getImm()) 112 .addReg(0); 113 BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr)) 114 .add(Selector) 115 .addReg(X86::R11); 116 }; 117 118 auto CreateMBB = [&]() { 119 auto *NewMBB = MF->CreateMachineBasicBlock(BB); 120 MBB->addSuccessor(NewMBB); 121 if (!MBB->isLiveIn(X86::EFLAGS)) 122 MBB->addLiveIn(X86::EFLAGS); 123 return NewMBB; 124 }; 125 126 auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) { 127 BuildMI(*MBB, MBBI, DL, TII->get(X86::JCC_1)).addMBB(ThenMBB).addImm(CC); 128 129 auto *ElseMBB = CreateMBB(); 130 MF->insert(InsPt, ElseMBB); 131 MBB = ElseMBB; 132 MBBI = MBB->end(); 133 }; 134 135 auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) { 136 auto *ThenMBB = CreateMBB(); 137 TargetMBBs.push_back({ThenMBB, Target}); 138 EmitCondJump(CC, ThenMBB); 139 }; 140 141 auto EmitTailCall = [&](unsigned Target) { 142 BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64)) 143 .add(JTInst->getOperand(3 + 2 * Target)); 144 }; 145 146 std::function<void(unsigned, unsigned)> EmitBranchFunnel = 147 [&](unsigned FirstTarget, unsigned NumTargets) { 148 if (NumTargets == 1) { 149 EmitTailCall(FirstTarget); 150 return; 151 } 152 153 if (NumTargets == 2) { 154 CmpTarget(FirstTarget + 1); 155 EmitCondJumpTarget(X86::COND_B, FirstTarget); 156 EmitTailCall(FirstTarget + 1); 157 return; 158 } 159 160 if (NumTargets < 6) { 161 CmpTarget(FirstTarget + 1); 162 EmitCondJumpTarget(X86::COND_B, FirstTarget); 163 EmitCondJumpTarget(X86::COND_E, FirstTarget + 1); 164 EmitBranchFunnel(FirstTarget + 2, NumTargets - 2); 165 return; 166 } 167 168 auto *ThenMBB = CreateMBB(); 169 CmpTarget(FirstTarget + (NumTargets / 2)); 170 EmitCondJump(X86::COND_B, ThenMBB); 171 EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2)); 172 EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1, 173 NumTargets - (NumTargets / 2) - 1); 174 175 MF->insert(InsPt, ThenMBB); 176 MBB = ThenMBB; 177 MBBI = MBB->end(); 178 EmitBranchFunnel(FirstTarget, NumTargets / 2); 179 }; 180 181 EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2); 182 for (auto P : TargetMBBs) { 183 MF->insert(InsPt, P.first); 184 BuildMI(P.first, DL, TII->get(X86::TAILJMPd64)) 185 .add(JTInst->getOperand(3 + 2 * P.second)); 186 } 187 JTMBB->erase(JTInst); 188 } 189 190 void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB, 191 MachineBasicBlock::iterator MBBI) { 192 // Expand CALL_RVMARKER pseudo to call instruction, followed by the special 193 //"movq %rax, %rdi" marker. 194 MachineInstr &MI = *MBBI; 195 196 MachineInstr *OriginalCall; 197 assert((MI.getOperand(1).isGlobal() || MI.getOperand(1).isReg()) && 198 "invalid operand for regular call"); 199 unsigned Opc = -1; 200 if (MI.getOpcode() == X86::CALL64m_RVMARKER) 201 Opc = X86::CALL64m; 202 else if (MI.getOpcode() == X86::CALL64r_RVMARKER) 203 Opc = X86::CALL64r; 204 else if (MI.getOpcode() == X86::CALL64pcrel32_RVMARKER) 205 Opc = X86::CALL64pcrel32; 206 else 207 llvm_unreachable("unexpected opcode"); 208 209 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 210 bool RAXImplicitDead = false; 211 for (MachineOperand &Op : llvm::drop_begin(MI.operands())) { 212 // RAX may be 'implicit dead', if there are no other users of the return 213 // value. We introduce a new use, so change it to 'implicit def'. 214 if (Op.isReg() && Op.isImplicit() && Op.isDead() && 215 TRI->regsOverlap(Op.getReg(), X86::RAX)) { 216 Op.setIsDead(false); 217 Op.setIsDef(true); 218 RAXImplicitDead = true; 219 } 220 OriginalCall->addOperand(Op); 221 } 222 223 // Emit marker "movq %rax, %rdi". %rdi is not callee-saved, so it cannot be 224 // live across the earlier call. The call to the ObjC runtime function returns 225 // the first argument, so the value of %rax is unchanged after the ObjC 226 // runtime call. 227 auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::MOV64rr)) 228 .addReg(X86::RDI, RegState::Define) 229 .addReg(X86::RAX) 230 .getInstr(); 231 if (MI.shouldUpdateCallSiteInfo()) 232 MBB.getParent()->moveCallSiteInfo(&MI, Marker); 233 234 // Emit call to ObjC runtime. 235 const uint32_t *RegMask = 236 TRI->getCallPreservedMask(*MBB.getParent(), CallingConv::C); 237 MachineInstr *RtCall = 238 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32)) 239 .addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0) 240 .addRegMask(RegMask) 241 .addReg(X86::RAX, 242 RegState::Implicit | 243 (RAXImplicitDead ? (RegState::Dead | RegState::Define) 244 : RegState::Define)) 245 .getInstr(); 246 MI.eraseFromParent(); 247 248 auto &TM = MBB.getParent()->getTarget(); 249 // On Darwin platforms, wrap the expanded sequence in a bundle to prevent 250 // later optimizations from breaking up the sequence. 251 if (TM.getTargetTriple().isOSDarwin()) 252 finalizeBundle(MBB, OriginalCall->getIterator(), 253 std::next(RtCall->getIterator())); 254 } 255 256 /// If \p MBBI is a pseudo instruction, this method expands 257 /// it to the corresponding (sequence of) actual instruction(s). 258 /// \returns true if \p MBBI has been expanded. 259 bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, 260 MachineBasicBlock::iterator MBBI) { 261 MachineInstr &MI = *MBBI; 262 unsigned Opcode = MI.getOpcode(); 263 const DebugLoc &DL = MBBI->getDebugLoc(); 264 switch (Opcode) { 265 default: 266 return false; 267 case X86::TCRETURNdi: 268 case X86::TCRETURNdicc: 269 case X86::TCRETURNri: 270 case X86::TCRETURNmi: 271 case X86::TCRETURNdi64: 272 case X86::TCRETURNdi64cc: 273 case X86::TCRETURNri64: 274 case X86::TCRETURNmi64: { 275 bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; 276 MachineOperand &JumpTarget = MBBI->getOperand(0); 277 MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands 278 : 1); 279 assert(StackAdjust.isImm() && "Expecting immediate value."); 280 281 // Adjust stack pointer. 282 int StackAdj = StackAdjust.getImm(); 283 int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 284 int Offset = 0; 285 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 286 287 // Incoporate the retaddr area. 288 Offset = StackAdj - MaxTCDelta; 289 assert(Offset >= 0 && "Offset should never be negative"); 290 291 if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) { 292 assert(Offset == 0 && "Conditional tail call cannot adjust the stack."); 293 } 294 295 if (Offset) { 296 // Check for possible merge with preceding ADD instruction. 297 Offset += X86FL->mergeSPUpdates(MBB, MBBI, true); 298 X86FL->emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue=*/true); 299 } 300 301 // Jump to label or value in register. 302 bool IsWin64 = STI->isTargetWin64(); 303 if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc || 304 Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) { 305 unsigned Op; 306 switch (Opcode) { 307 case X86::TCRETURNdi: 308 Op = X86::TAILJMPd; 309 break; 310 case X86::TCRETURNdicc: 311 Op = X86::TAILJMPd_CC; 312 break; 313 case X86::TCRETURNdi64cc: 314 assert(!MBB.getParent()->hasWinCFI() && 315 "Conditional tail calls confuse " 316 "the Win64 unwinder."); 317 Op = X86::TAILJMPd64_CC; 318 break; 319 default: 320 // Note: Win64 uses REX prefixes indirect jumps out of functions, but 321 // not direct ones. 322 Op = X86::TAILJMPd64; 323 break; 324 } 325 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); 326 if (JumpTarget.isGlobal()) { 327 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 328 JumpTarget.getTargetFlags()); 329 } else { 330 assert(JumpTarget.isSymbol()); 331 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 332 JumpTarget.getTargetFlags()); 333 } 334 if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) { 335 MIB.addImm(MBBI->getOperand(2).getImm()); 336 } 337 338 } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { 339 unsigned Op = (Opcode == X86::TCRETURNmi) 340 ? X86::TAILJMPm 341 : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); 342 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); 343 for (unsigned i = 0; i != X86::AddrNumOperands; ++i) 344 MIB.add(MBBI->getOperand(i)); 345 } else if (Opcode == X86::TCRETURNri64) { 346 JumpTarget.setIsKill(); 347 BuildMI(MBB, MBBI, DL, 348 TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) 349 .add(JumpTarget); 350 } else { 351 JumpTarget.setIsKill(); 352 BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr)) 353 .add(JumpTarget); 354 } 355 356 MachineInstr &NewMI = *std::prev(MBBI); 357 NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI); 358 359 // Update the call site info. 360 if (MBBI->isCandidateForCallSiteEntry()) 361 MBB.getParent()->moveCallSiteInfo(&*MBBI, &NewMI); 362 363 // Delete the pseudo instruction TCRETURN. 364 MBB.erase(MBBI); 365 366 return true; 367 } 368 case X86::EH_RETURN: 369 case X86::EH_RETURN64: { 370 MachineOperand &DestAddr = MBBI->getOperand(0); 371 assert(DestAddr.isReg() && "Offset should be in register!"); 372 const bool Uses64BitFramePtr = 373 STI->isTarget64BitLP64() || STI->isTargetNaCl64(); 374 Register StackPtr = TRI->getStackRegister(); 375 BuildMI(MBB, MBBI, DL, 376 TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr) 377 .addReg(DestAddr.getReg()); 378 // The EH_RETURN pseudo is really removed during the MC Lowering. 379 return true; 380 } 381 case X86::IRET: { 382 // Adjust stack to erase error code 383 int64_t StackAdj = MBBI->getOperand(0).getImm(); 384 X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, true); 385 // Replace pseudo with machine iret 386 unsigned RetOp = STI->is64Bit() ? X86::IRET64 : X86::IRET32; 387 // Use UIRET if UINTR is present (except for building kernel) 388 if (STI->is64Bit() && STI->hasUINTR() && 389 MBB.getParent()->getTarget().getCodeModel() != CodeModel::Kernel) 390 RetOp = X86::UIRET; 391 BuildMI(MBB, MBBI, DL, TII->get(RetOp)); 392 MBB.erase(MBBI); 393 return true; 394 } 395 case X86::RET: { 396 // Adjust stack to erase error code 397 int64_t StackAdj = MBBI->getOperand(0).getImm(); 398 MachineInstrBuilder MIB; 399 if (StackAdj == 0) { 400 MIB = BuildMI(MBB, MBBI, DL, 401 TII->get(STI->is64Bit() ? X86::RET64 : X86::RET32)); 402 } else if (isUInt<16>(StackAdj)) { 403 MIB = BuildMI(MBB, MBBI, DL, 404 TII->get(STI->is64Bit() ? X86::RETI64 : X86::RETI32)) 405 .addImm(StackAdj); 406 } else { 407 assert(!STI->is64Bit() && 408 "shouldn't need to do this for x86_64 targets!"); 409 // A ret can only handle immediates as big as 2**16-1. If we need to pop 410 // off bytes before the return address, we must do it manually. 411 BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define); 412 X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, /*InEpilogue=*/true); 413 BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX); 414 MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RET32)); 415 } 416 for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I) 417 MIB.add(MBBI->getOperand(I)); 418 MBB.erase(MBBI); 419 return true; 420 } 421 case X86::LCMPXCHG16B_SAVE_RBX: { 422 // Perform the following transformation. 423 // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx 424 // => 425 // RBX = InArg 426 // actualcmpxchg Addr 427 // RBX = SaveRbx 428 const MachineOperand &InArg = MBBI->getOperand(6); 429 Register SaveRbx = MBBI->getOperand(7).getReg(); 430 431 // Copy the input argument of the pseudo into the argument of the 432 // actual instruction. 433 // NOTE: We don't copy the kill flag since the input might be the same reg 434 // as one of the other operands of LCMPXCHG16B. 435 TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), false); 436 // Create the actual instruction. 437 MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(X86::LCMPXCHG16B)); 438 // Copy the operands related to the address. 439 for (unsigned Idx = 1; Idx < 6; ++Idx) 440 NewInstr->addOperand(MBBI->getOperand(Idx)); 441 // Finally, restore the value of RBX. 442 TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx, 443 /*SrcIsKill*/ true); 444 445 // Delete the pseudo. 446 MBBI->eraseFromParent(); 447 return true; 448 } 449 // Loading/storing mask pairs requires two kmov operations. The second one of 450 // these needs a 2 byte displacement relative to the specified address (with 451 // 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the 452 // same spill size, they all are stored using MASKPAIR16STORE, loaded using 453 // MASKPAIR16LOAD. 454 // 455 // The displacement value might wrap around in theory, thus the asserts in 456 // both cases. 457 case X86::MASKPAIR16LOAD: { 458 int64_t Disp = MBBI->getOperand(1 + X86::AddrDisp).getImm(); 459 assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); 460 Register Reg = MBBI->getOperand(0).getReg(); 461 bool DstIsDead = MBBI->getOperand(0).isDead(); 462 Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0); 463 Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1); 464 465 auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm)) 466 .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead)); 467 auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm)) 468 .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead)); 469 470 for (int i = 0; i < X86::AddrNumOperands; ++i) { 471 MIBLo.add(MBBI->getOperand(1 + i)); 472 if (i == X86::AddrDisp) 473 MIBHi.addImm(Disp + 2); 474 else 475 MIBHi.add(MBBI->getOperand(1 + i)); 476 } 477 478 // Split the memory operand, adjusting the offset and size for the halves. 479 MachineMemOperand *OldMMO = MBBI->memoperands().front(); 480 MachineFunction *MF = MBB.getParent(); 481 MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2); 482 MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2); 483 484 MIBLo.setMemRefs(MMOLo); 485 MIBHi.setMemRefs(MMOHi); 486 487 // Delete the pseudo. 488 MBB.erase(MBBI); 489 return true; 490 } 491 case X86::MASKPAIR16STORE: { 492 int64_t Disp = MBBI->getOperand(X86::AddrDisp).getImm(); 493 assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); 494 Register Reg = MBBI->getOperand(X86::AddrNumOperands).getReg(); 495 bool SrcIsKill = MBBI->getOperand(X86::AddrNumOperands).isKill(); 496 Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0); 497 Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1); 498 499 auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk)); 500 auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk)); 501 502 for (int i = 0; i < X86::AddrNumOperands; ++i) { 503 MIBLo.add(MBBI->getOperand(i)); 504 if (i == X86::AddrDisp) 505 MIBHi.addImm(Disp + 2); 506 else 507 MIBHi.add(MBBI->getOperand(i)); 508 } 509 MIBLo.addReg(Reg0, getKillRegState(SrcIsKill)); 510 MIBHi.addReg(Reg1, getKillRegState(SrcIsKill)); 511 512 // Split the memory operand, adjusting the offset and size for the halves. 513 MachineMemOperand *OldMMO = MBBI->memoperands().front(); 514 MachineFunction *MF = MBB.getParent(); 515 MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2); 516 MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2); 517 518 MIBLo.setMemRefs(MMOLo); 519 MIBHi.setMemRefs(MMOHi); 520 521 // Delete the pseudo. 522 MBB.erase(MBBI); 523 return true; 524 } 525 case X86::MWAITX_SAVE_RBX: { 526 // Perform the following transformation. 527 // SaveRbx = pseudomwaitx InArg, SaveRbx 528 // => 529 // [E|R]BX = InArg 530 // actualmwaitx 531 // [E|R]BX = SaveRbx 532 const MachineOperand &InArg = MBBI->getOperand(1); 533 // Copy the input argument of the pseudo into the argument of the 534 // actual instruction. 535 TII->copyPhysReg(MBB, MBBI, DL, X86::EBX, InArg.getReg(), InArg.isKill()); 536 // Create the actual instruction. 537 BuildMI(MBB, MBBI, DL, TII->get(X86::MWAITXrrr)); 538 // Finally, restore the value of RBX. 539 Register SaveRbx = MBBI->getOperand(2).getReg(); 540 TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx, /*SrcIsKill*/ true); 541 // Delete the pseudo. 542 MBBI->eraseFromParent(); 543 return true; 544 } 545 case TargetOpcode::ICALL_BRANCH_FUNNEL: 546 ExpandICallBranchFunnel(&MBB, MBBI); 547 return true; 548 case X86::PLDTILECFGV: { 549 MI.setDesc(TII->get(X86::LDTILECFG)); 550 return true; 551 } 552 case X86::PTILELOADDV: 553 case X86::PTILELOADDT1V: { 554 for (unsigned i = 2; i > 0; --i) 555 MI.RemoveOperand(i); 556 unsigned Opc = 557 Opcode == X86::PTILELOADDV ? X86::TILELOADD : X86::TILELOADDT1; 558 MI.setDesc(TII->get(Opc)); 559 return true; 560 } 561 case X86::PTDPBSSDV: 562 case X86::PTDPBSUDV: 563 case X86::PTDPBUSDV: 564 case X86::PTDPBUUDV: 565 case X86::PTDPBF16PSV: { 566 MI.untieRegOperand(4); 567 for (unsigned i = 3; i > 0; --i) 568 MI.RemoveOperand(i); 569 unsigned Opc; 570 switch (Opcode) { 571 case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break; 572 case X86::PTDPBSUDV: Opc = X86::TDPBSUD; break; 573 case X86::PTDPBUSDV: Opc = X86::TDPBUSD; break; 574 case X86::PTDPBUUDV: Opc = X86::TDPBUUD; break; 575 case X86::PTDPBF16PSV: Opc = X86::TDPBF16PS; break; 576 default: llvm_unreachable("Impossible Opcode!"); 577 } 578 MI.setDesc(TII->get(Opc)); 579 MI.tieOperands(0, 1); 580 return true; 581 } 582 case X86::PTILESTOREDV: { 583 for (int i = 1; i >= 0; --i) 584 MI.RemoveOperand(i); 585 MI.setDesc(TII->get(X86::TILESTORED)); 586 return true; 587 } 588 case X86::PTILEZEROV: { 589 for (int i = 2; i > 0; --i) // Remove row, col 590 MI.RemoveOperand(i); 591 MI.setDesc(TII->get(X86::TILEZERO)); 592 return true; 593 } 594 case X86::CALL64pcrel32_RVMARKER: 595 case X86::CALL64r_RVMARKER: 596 case X86::CALL64m_RVMARKER: 597 expandCALL_RVMARKER(MBB, MBBI); 598 return true; 599 } 600 llvm_unreachable("Previous switch has a fallthrough?"); 601 } 602 603 // This function creates additional block for storing varargs guarded 604 // registers. It adds check for %al into entry block, to skip 605 // GuardedRegsBlk if xmm registers should not be stored. 606 // 607 // EntryBlk[VAStartPseudoInstr] EntryBlk 608 // | | . 609 // | | . 610 // | | GuardedRegsBlk 611 // | => | . 612 // | | . 613 // | TailBlk 614 // | | 615 // | | 616 // 617 void X86ExpandPseudo::ExpandVastartSaveXmmRegs( 618 MachineBasicBlock *EntryBlk, 619 MachineBasicBlock::iterator VAStartPseudoInstr) const { 620 assert(VAStartPseudoInstr->getOpcode() == X86::VASTART_SAVE_XMM_REGS); 621 622 MachineFunction *Func = EntryBlk->getParent(); 623 const TargetInstrInfo *TII = STI->getInstrInfo(); 624 const DebugLoc &DL = VAStartPseudoInstr->getDebugLoc(); 625 Register CountReg = VAStartPseudoInstr->getOperand(0).getReg(); 626 627 // Calculate liveins for newly created blocks. 628 LivePhysRegs LiveRegs(*STI->getRegisterInfo()); 629 SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers; 630 631 LiveRegs.addLiveIns(*EntryBlk); 632 for (MachineInstr &MI : EntryBlk->instrs()) { 633 if (MI.getOpcode() == VAStartPseudoInstr->getOpcode()) 634 break; 635 636 LiveRegs.stepForward(MI, Clobbers); 637 } 638 639 // Create the new basic blocks. One block contains all the XMM stores, 640 // and another block is the final destination regardless of whether any 641 // stores were performed. 642 const BasicBlock *LLVMBlk = EntryBlk->getBasicBlock(); 643 MachineFunction::iterator EntryBlkIter = ++EntryBlk->getIterator(); 644 MachineBasicBlock *GuardedRegsBlk = Func->CreateMachineBasicBlock(LLVMBlk); 645 MachineBasicBlock *TailBlk = Func->CreateMachineBasicBlock(LLVMBlk); 646 Func->insert(EntryBlkIter, GuardedRegsBlk); 647 Func->insert(EntryBlkIter, TailBlk); 648 649 // Transfer the remainder of EntryBlk and its successor edges to TailBlk. 650 TailBlk->splice(TailBlk->begin(), EntryBlk, 651 std::next(MachineBasicBlock::iterator(VAStartPseudoInstr)), 652 EntryBlk->end()); 653 TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk); 654 655 uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm(); 656 uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm(); 657 658 // TODO: add support for YMM and ZMM here. 659 unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; 660 661 // In the XMM save block, save all the XMM argument registers. 662 for (int64_t OpndIdx = 7, RegIdx = 0; 663 OpndIdx < VAStartPseudoInstr->getNumOperands() - 1; 664 OpndIdx++, RegIdx++) { 665 auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)); 666 for (int i = 0; i < X86::AddrNumOperands; ++i) { 667 if (i == X86::AddrDisp) 668 NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16); 669 else 670 NewMI.add(VAStartPseudoInstr->getOperand(i + 1)); 671 } 672 NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()); 673 assert(Register::isPhysicalRegister( 674 VAStartPseudoInstr->getOperand(OpndIdx).getReg())); 675 } 676 677 // The original block will now fall through to the GuardedRegsBlk. 678 EntryBlk->addSuccessor(GuardedRegsBlk); 679 // The GuardedRegsBlk will fall through to the TailBlk. 680 GuardedRegsBlk->addSuccessor(TailBlk); 681 682 if (!STI->isCallingConvWin64(Func->getFunction().getCallingConv())) { 683 // If %al is 0, branch around the XMM save block. 684 BuildMI(EntryBlk, DL, TII->get(X86::TEST8rr)) 685 .addReg(CountReg) 686 .addReg(CountReg); 687 BuildMI(EntryBlk, DL, TII->get(X86::JCC_1)) 688 .addMBB(TailBlk) 689 .addImm(X86::COND_E); 690 EntryBlk->addSuccessor(TailBlk); 691 } 692 693 // Add liveins to the created block. 694 addLiveIns(*GuardedRegsBlk, LiveRegs); 695 addLiveIns(*TailBlk, LiveRegs); 696 697 // Delete the pseudo. 698 VAStartPseudoInstr->eraseFromParent(); 699 } 700 701 /// Expand all pseudo instructions contained in \p MBB. 702 /// \returns true if any expansion occurred for \p MBB. 703 bool X86ExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { 704 bool Modified = false; 705 706 // MBBI may be invalidated by the expansion. 707 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 708 while (MBBI != E) { 709 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 710 Modified |= ExpandMI(MBB, MBBI); 711 MBBI = NMBBI; 712 } 713 714 return Modified; 715 } 716 717 bool X86ExpandPseudo::ExpandPseudosWhichAffectControlFlow(MachineFunction &MF) { 718 // Currently pseudo which affects control flow is only 719 // X86::VASTART_SAVE_XMM_REGS which is located in Entry block. 720 // So we do not need to evaluate other blocks. 721 for (MachineInstr &Instr : MF.front().instrs()) { 722 if (Instr.getOpcode() == X86::VASTART_SAVE_XMM_REGS) { 723 ExpandVastartSaveXmmRegs(&(MF.front()), Instr); 724 return true; 725 } 726 } 727 728 return false; 729 } 730 731 bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 732 STI = &static_cast<const X86Subtarget &>(MF.getSubtarget()); 733 TII = STI->getInstrInfo(); 734 TRI = STI->getRegisterInfo(); 735 X86FI = MF.getInfo<X86MachineFunctionInfo>(); 736 X86FL = STI->getFrameLowering(); 737 738 bool Modified = ExpandPseudosWhichAffectControlFlow(MF); 739 740 for (MachineBasicBlock &MBB : MF) 741 Modified |= ExpandMBB(MBB); 742 return Modified; 743 } 744 745 /// Returns an instance of the pseudo instruction expansion pass. 746 FunctionPass *llvm::createX86ExpandPseudoPass() { 747 return new X86ExpandPseudo(); 748 } 749