1 //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual 11 /// flag bits. 12 /// 13 /// We have to do this by carefully analyzing and rewriting the usage of the 14 /// copied EFLAGS register because there is no general way to rematerialize the 15 /// entire EFLAGS register safely and efficiently. Using `popf` both forces 16 /// dynamic stack adjustment and can create correctness issues due to IF, TF, 17 /// and other non-status flags being overwritten. Using sequences involving 18 /// SAHF don't work on all x86 processors and are often quite slow compared to 19 /// directly testing a single status preserved in its own GPR. 20 /// 21 //===----------------------------------------------------------------------===// 22 23 #include "X86.h" 24 #include "X86InstrBuilder.h" 25 #include "X86InstrInfo.h" 26 #include "X86Subtarget.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/PostOrderIterator.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/ScopeExit.h" 32 #include "llvm/ADT/SmallPtrSet.h" 33 #include "llvm/ADT/SmallSet.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/ADT/SparseBitVector.h" 36 #include "llvm/ADT/Statistic.h" 37 #include "llvm/CodeGen/MachineBasicBlock.h" 38 #include "llvm/CodeGen/MachineConstantPool.h" 39 #include "llvm/CodeGen/MachineDominators.h" 40 #include "llvm/CodeGen/MachineFunction.h" 41 #include "llvm/CodeGen/MachineFunctionPass.h" 42 #include "llvm/CodeGen/MachineInstr.h" 43 #include "llvm/CodeGen/MachineInstrBuilder.h" 44 #include "llvm/CodeGen/MachineModuleInfo.h" 45 #include "llvm/CodeGen/MachineOperand.h" 46 #include "llvm/CodeGen/MachineRegisterInfo.h" 47 #include "llvm/CodeGen/MachineSSAUpdater.h" 48 #include "llvm/CodeGen/TargetInstrInfo.h" 49 #include "llvm/CodeGen/TargetRegisterInfo.h" 50 #include "llvm/CodeGen/TargetSchedule.h" 51 #include "llvm/CodeGen/TargetSubtargetInfo.h" 52 #include "llvm/IR/DebugLoc.h" 53 #include "llvm/MC/MCSchedule.h" 54 #include "llvm/Pass.h" 55 #include "llvm/Support/CommandLine.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/raw_ostream.h" 58 #include <algorithm> 59 #include <cassert> 60 #include <iterator> 61 #include <utility> 62 63 using namespace llvm; 64 65 #define PASS_KEY "x86-flags-copy-lowering" 66 #define DEBUG_TYPE PASS_KEY 67 68 STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated"); 69 STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted"); 70 STATISTIC(NumTestsInserted, "Number of test instructions inserted"); 71 STATISTIC(NumAddsInserted, "Number of adds instructions inserted"); 72 73 namespace { 74 75 // Convenient array type for storing registers associated with each condition. 76 using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>; 77 78 class X86FlagsCopyLoweringPass : public MachineFunctionPass { 79 public: 80 X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { } 81 82 StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; } 83 bool runOnMachineFunction(MachineFunction &MF) override; 84 void getAnalysisUsage(AnalysisUsage &AU) const override; 85 86 /// Pass identification, replacement for typeid. 87 static char ID; 88 89 private: 90 MachineRegisterInfo *MRI; 91 const X86Subtarget *Subtarget; 92 const X86InstrInfo *TII; 93 const TargetRegisterInfo *TRI; 94 const TargetRegisterClass *PromoteRC; 95 MachineDominatorTree *MDT; 96 97 CondRegArray collectCondsInRegs(MachineBasicBlock &MBB, 98 MachineBasicBlock::iterator CopyDefI); 99 100 unsigned promoteCondToReg(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator TestPos, 102 DebugLoc TestLoc, X86::CondCode Cond); 103 std::pair<unsigned, bool> 104 getCondOrInverseInReg(MachineBasicBlock &TestMBB, 105 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 106 X86::CondCode Cond, CondRegArray &CondRegs); 107 void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 108 DebugLoc Loc, unsigned Reg); 109 110 void rewriteArithmetic(MachineBasicBlock &TestMBB, 111 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 112 MachineInstr &MI, MachineOperand &FlagUse, 113 CondRegArray &CondRegs); 114 void rewriteCMov(MachineBasicBlock &TestMBB, 115 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 116 MachineInstr &CMovI, MachineOperand &FlagUse, 117 CondRegArray &CondRegs); 118 void rewriteFCMov(MachineBasicBlock &TestMBB, 119 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 120 MachineInstr &CMovI, MachineOperand &FlagUse, 121 CondRegArray &CondRegs); 122 void rewriteCondJmp(MachineBasicBlock &TestMBB, 123 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 124 MachineInstr &JmpI, CondRegArray &CondRegs); 125 void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse, 126 MachineInstr &CopyDefI); 127 void rewriteSetCarryExtended(MachineBasicBlock &TestMBB, 128 MachineBasicBlock::iterator TestPos, 129 DebugLoc TestLoc, MachineInstr &SetBI, 130 MachineOperand &FlagUse, CondRegArray &CondRegs); 131 void rewriteSetCC(MachineBasicBlock &TestMBB, 132 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 133 MachineInstr &SetCCI, MachineOperand &FlagUse, 134 CondRegArray &CondRegs); 135 }; 136 137 } // end anonymous namespace 138 139 INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE, 140 "X86 EFLAGS copy lowering", false, false) 141 INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE, 142 "X86 EFLAGS copy lowering", false, false) 143 144 FunctionPass *llvm::createX86FlagsCopyLoweringPass() { 145 return new X86FlagsCopyLoweringPass(); 146 } 147 148 char X86FlagsCopyLoweringPass::ID = 0; 149 150 void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const { 151 AU.addRequired<MachineDominatorTree>(); 152 MachineFunctionPass::getAnalysisUsage(AU); 153 } 154 155 namespace { 156 /// An enumeration of the arithmetic instruction mnemonics which have 157 /// interesting flag semantics. 158 /// 159 /// We can map instruction opcodes into these mnemonics to make it easy to 160 /// dispatch with specific functionality. 161 enum class FlagArithMnemonic { 162 ADC, 163 ADCX, 164 ADOX, 165 RCL, 166 RCR, 167 SBB, 168 }; 169 } // namespace 170 171 static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { 172 switch (Opcode) { 173 default: 174 report_fatal_error("No support for lowering a copy into EFLAGS when used " 175 "by this instruction!"); 176 177 #define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX) \ 178 case X86::MNEMONIC##8##SUFFIX: \ 179 case X86::MNEMONIC##16##SUFFIX: \ 180 case X86::MNEMONIC##32##SUFFIX: \ 181 case X86::MNEMONIC##64##SUFFIX: 182 183 #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ 184 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ 185 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ 186 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ 187 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ 188 case X86::MNEMONIC##8ri: \ 189 case X86::MNEMONIC##16ri8: \ 190 case X86::MNEMONIC##32ri8: \ 191 case X86::MNEMONIC##64ri8: \ 192 case X86::MNEMONIC##16ri: \ 193 case X86::MNEMONIC##32ri: \ 194 case X86::MNEMONIC##64ri32: \ 195 case X86::MNEMONIC##8mi: \ 196 case X86::MNEMONIC##16mi8: \ 197 case X86::MNEMONIC##32mi8: \ 198 case X86::MNEMONIC##64mi8: \ 199 case X86::MNEMONIC##16mi: \ 200 case X86::MNEMONIC##32mi: \ 201 case X86::MNEMONIC##64mi32: \ 202 case X86::MNEMONIC##8i8: \ 203 case X86::MNEMONIC##16i16: \ 204 case X86::MNEMONIC##32i32: \ 205 case X86::MNEMONIC##64i32: 206 207 LLVM_EXPAND_ADC_SBB_INSTR(ADC) 208 return FlagArithMnemonic::ADC; 209 210 LLVM_EXPAND_ADC_SBB_INSTR(SBB) 211 return FlagArithMnemonic::SBB; 212 213 #undef LLVM_EXPAND_ADC_SBB_INSTR 214 215 LLVM_EXPAND_INSTR_SIZES(RCL, rCL) 216 LLVM_EXPAND_INSTR_SIZES(RCL, r1) 217 LLVM_EXPAND_INSTR_SIZES(RCL, ri) 218 return FlagArithMnemonic::RCL; 219 220 LLVM_EXPAND_INSTR_SIZES(RCR, rCL) 221 LLVM_EXPAND_INSTR_SIZES(RCR, r1) 222 LLVM_EXPAND_INSTR_SIZES(RCR, ri) 223 return FlagArithMnemonic::RCR; 224 225 #undef LLVM_EXPAND_INSTR_SIZES 226 227 case X86::ADCX32rr: 228 case X86::ADCX64rr: 229 case X86::ADCX32rm: 230 case X86::ADCX64rm: 231 return FlagArithMnemonic::ADCX; 232 233 case X86::ADOX32rr: 234 case X86::ADOX64rr: 235 case X86::ADOX32rm: 236 case X86::ADOX64rm: 237 return FlagArithMnemonic::ADOX; 238 } 239 } 240 241 static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB, 242 MachineInstr &SplitI, 243 const X86InstrInfo &TII) { 244 MachineFunction &MF = *MBB.getParent(); 245 246 assert(SplitI.getParent() == &MBB && 247 "Split instruction must be in the split block!"); 248 assert(SplitI.isBranch() && 249 "Only designed to split a tail of branch instructions!"); 250 assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID && 251 "Must split on an actual jCC instruction!"); 252 253 // Dig out the previous instruction to the split point. 254 MachineInstr &PrevI = *std::prev(SplitI.getIterator()); 255 assert(PrevI.isBranch() && "Must split after a branch!"); 256 assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID && 257 "Must split after an actual jCC instruction!"); 258 assert(!std::prev(PrevI.getIterator())->isTerminator() && 259 "Must only have this one terminator prior to the split!"); 260 261 // Grab the one successor edge that will stay in `MBB`. 262 MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB(); 263 264 // Analyze the original block to see if we are actually splitting an edge 265 // into two edges. This can happen when we have multiple conditional jumps to 266 // the same successor. 267 bool IsEdgeSplit = 268 std::any_of(SplitI.getIterator(), MBB.instr_end(), 269 [&](MachineInstr &MI) { 270 assert(MI.isTerminator() && 271 "Should only have spliced terminators!"); 272 return llvm::any_of( 273 MI.operands(), [&](MachineOperand &MOp) { 274 return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc; 275 }); 276 }) || 277 MBB.getFallThrough() == &UnsplitSucc; 278 279 MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); 280 281 // Insert the new block immediately after the current one. Any existing 282 // fallthrough will be sunk into this new block anyways. 283 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); 284 285 // Splice the tail of instructions into the new block. 286 NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end()); 287 288 // Copy the necessary succesors (and their probability info) into the new 289 // block. 290 for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) 291 if (IsEdgeSplit || *SI != &UnsplitSucc) 292 NewMBB.copySuccessor(&MBB, SI); 293 // Normalize the probabilities if we didn't end up splitting the edge. 294 if (!IsEdgeSplit) 295 NewMBB.normalizeSuccProbs(); 296 297 // Now replace all of the moved successors in the original block with the new 298 // block. This will merge their probabilities. 299 for (MachineBasicBlock *Succ : NewMBB.successors()) 300 if (Succ != &UnsplitSucc) 301 MBB.replaceSuccessor(Succ, &NewMBB); 302 303 // We should always end up replacing at least one successor. 304 assert(MBB.isSuccessor(&NewMBB) && 305 "Failed to make the new block a successor!"); 306 307 // Now update all the PHIs. 308 for (MachineBasicBlock *Succ : NewMBB.successors()) { 309 for (MachineInstr &MI : *Succ) { 310 if (!MI.isPHI()) 311 break; 312 313 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; 314 OpIdx += 2) { 315 MachineOperand &OpV = MI.getOperand(OpIdx); 316 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); 317 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!"); 318 if (OpMBB.getMBB() != &MBB) 319 continue; 320 321 // Replace the operand for unsplit successors 322 if (!IsEdgeSplit || Succ != &UnsplitSucc) { 323 OpMBB.setMBB(&NewMBB); 324 325 // We have to continue scanning as there may be multiple entries in 326 // the PHI. 327 continue; 328 } 329 330 // When we have split the edge append a new successor. 331 MI.addOperand(MF, OpV); 332 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); 333 break; 334 } 335 } 336 } 337 338 return NewMBB; 339 } 340 341 static X86::CondCode getCondFromFCMOV(unsigned Opcode) { 342 switch (Opcode) { 343 default: return X86::COND_INVALID; 344 case X86::CMOVBE_Fp32: case X86::CMOVBE_Fp64: case X86::CMOVBE_Fp80: 345 return X86::COND_BE; 346 case X86::CMOVB_Fp32: case X86::CMOVB_Fp64: case X86::CMOVB_Fp80: 347 return X86::COND_B; 348 case X86::CMOVE_Fp32: case X86::CMOVE_Fp64: case X86::CMOVE_Fp80: 349 return X86::COND_E; 350 case X86::CMOVNBE_Fp32: case X86::CMOVNBE_Fp64: case X86::CMOVNBE_Fp80: 351 return X86::COND_A; 352 case X86::CMOVNB_Fp32: case X86::CMOVNB_Fp64: case X86::CMOVNB_Fp80: 353 return X86::COND_AE; 354 case X86::CMOVNE_Fp32: case X86::CMOVNE_Fp64: case X86::CMOVNE_Fp80: 355 return X86::COND_NE; 356 case X86::CMOVNP_Fp32: case X86::CMOVNP_Fp64: case X86::CMOVNP_Fp80: 357 return X86::COND_NP; 358 case X86::CMOVP_Fp32: case X86::CMOVP_Fp64: case X86::CMOVP_Fp80: 359 return X86::COND_P; 360 } 361 } 362 363 bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { 364 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() 365 << " **********\n"); 366 367 Subtarget = &MF.getSubtarget<X86Subtarget>(); 368 MRI = &MF.getRegInfo(); 369 TII = Subtarget->getInstrInfo(); 370 TRI = Subtarget->getRegisterInfo(); 371 MDT = &getAnalysis<MachineDominatorTree>(); 372 PromoteRC = &X86::GR8RegClass; 373 374 if (MF.begin() == MF.end()) 375 // Nothing to do for a degenerate empty function... 376 return false; 377 378 // Collect the copies in RPO so that when there are chains where a copy is in 379 // turn copied again we visit the first one first. This ensures we can find 380 // viable locations for testing the original EFLAGS that dominate all the 381 // uses across complex CFGs. 382 SmallVector<MachineInstr *, 4> Copies; 383 ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); 384 for (MachineBasicBlock *MBB : RPOT) 385 for (MachineInstr &MI : *MBB) 386 if (MI.getOpcode() == TargetOpcode::COPY && 387 MI.getOperand(0).getReg() == X86::EFLAGS) 388 Copies.push_back(&MI); 389 390 for (MachineInstr *CopyI : Copies) { 391 MachineBasicBlock &MBB = *CopyI->getParent(); 392 393 MachineOperand &VOp = CopyI->getOperand(1); 394 assert(VOp.isReg() && 395 "The input to the copy for EFLAGS should always be a register!"); 396 MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg()); 397 if (CopyDefI.getOpcode() != TargetOpcode::COPY) { 398 // FIXME: The big likely candidate here are PHI nodes. We could in theory 399 // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard 400 // enough that it is probably better to change every other part of LLVM 401 // to avoid creating them. The issue is that once we have PHIs we won't 402 // know which original EFLAGS value we need to capture with our setCCs 403 // below. The end result will be computing a complete set of setCCs that 404 // we *might* want, computing them in every place where we copy *out* of 405 // EFLAGS and then doing SSA formation on all of them to insert necessary 406 // PHI nodes and consume those here. Then hoping that somehow we DCE the 407 // unnecessary ones. This DCE seems very unlikely to be successful and so 408 // we will almost certainly end up with a glut of dead setCC 409 // instructions. Until we have a motivating test case and fail to avoid 410 // it by changing other parts of LLVM's lowering, we refuse to handle 411 // this complex case here. 412 LLVM_DEBUG( 413 dbgs() << "ERROR: Encountered unexpected def of an eflags copy: "; 414 CopyDefI.dump()); 415 report_fatal_error( 416 "Cannot lower EFLAGS copy unless it is defined in turn by a copy!"); 417 } 418 419 auto Cleanup = make_scope_exit([&] { 420 // All uses of the EFLAGS copy are now rewritten, kill the copy into 421 // eflags and if dead the copy from. 422 CopyI->eraseFromParent(); 423 if (MRI->use_empty(CopyDefI.getOperand(0).getReg())) 424 CopyDefI.eraseFromParent(); 425 ++NumCopiesEliminated; 426 }); 427 428 MachineOperand &DOp = CopyI->getOperand(0); 429 assert(DOp.isDef() && "Expected register def!"); 430 assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!"); 431 if (DOp.isDead()) 432 continue; 433 434 MachineBasicBlock *TestMBB = CopyDefI.getParent(); 435 auto TestPos = CopyDefI.getIterator(); 436 DebugLoc TestLoc = CopyDefI.getDebugLoc(); 437 438 LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump()); 439 440 // Walk up across live-in EFLAGS to find where they were actually def'ed. 441 // 442 // This copy's def may just be part of a region of blocks covered by 443 // a single def of EFLAGS and we want to find the top of that region where 444 // possible. 445 // 446 // This is essentially a search for a *candidate* reaching definition 447 // location. We don't need to ever find the actual reaching definition here, 448 // but we want to walk up the dominator tree to find the highest point which 449 // would be viable for such a definition. 450 auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin, 451 MachineBasicBlock::iterator End) { 452 // Scan backwards as we expect these to be relatively short and often find 453 // a clobber near the end. 454 return llvm::any_of( 455 llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) { 456 // Flag any instruction (other than the copy we are 457 // currently rewriting) that defs EFLAGS. 458 return &MI != CopyI && MI.findRegisterDefOperand(X86::EFLAGS); 459 }); 460 }; 461 auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB, 462 MachineBasicBlock *EndMBB) { 463 assert(MDT->dominates(BeginMBB, EndMBB) && 464 "Only support paths down the dominator tree!"); 465 SmallPtrSet<MachineBasicBlock *, 4> Visited; 466 SmallVector<MachineBasicBlock *, 4> Worklist; 467 // We terminate at the beginning. No need to scan it. 468 Visited.insert(BeginMBB); 469 Worklist.push_back(EndMBB); 470 do { 471 auto *MBB = Worklist.pop_back_val(); 472 for (auto *PredMBB : MBB->predecessors()) { 473 if (!Visited.insert(PredMBB).second) 474 continue; 475 if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end())) 476 return true; 477 // Enqueue this block to walk its predecessors. 478 Worklist.push_back(PredMBB); 479 } 480 } while (!Worklist.empty()); 481 // No clobber found along a path from the begin to end. 482 return false; 483 }; 484 while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() && 485 !HasEFLAGSClobber(TestMBB->begin(), TestPos)) { 486 // Find the nearest common dominator of the predecessors, as 487 // that will be the best candidate to hoist into. 488 MachineBasicBlock *HoistMBB = 489 std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(), 490 *TestMBB->pred_begin(), 491 [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) { 492 return MDT->findNearestCommonDominator(LHS, RHS); 493 }); 494 495 // Now we need to scan all predecessors that may be reached along paths to 496 // the hoist block. A clobber anywhere in any of these blocks the hoist. 497 // Note that this even handles loops because we require *no* clobbers. 498 if (HasEFLAGSClobberPath(HoistMBB, TestMBB)) 499 break; 500 501 // We also need the terminators to not sneakily clobber flags. 502 if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(), 503 HoistMBB->instr_end())) 504 break; 505 506 // We found a viable location, hoist our test position to it. 507 TestMBB = HoistMBB; 508 TestPos = TestMBB->getFirstTerminator()->getIterator(); 509 // Clear the debug location as it would just be confusing after hoisting. 510 TestLoc = DebugLoc(); 511 } 512 LLVM_DEBUG({ 513 auto DefIt = llvm::find_if( 514 llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)), 515 [&](MachineInstr &MI) { 516 return MI.findRegisterDefOperand(X86::EFLAGS); 517 }); 518 if (DefIt.base() != TestMBB->instr_begin()) { 519 dbgs() << " Using EFLAGS defined by: "; 520 DefIt->dump(); 521 } else { 522 dbgs() << " Using live-in flags for BB:\n"; 523 TestMBB->dump(); 524 } 525 }); 526 527 // While rewriting uses, we buffer jumps and rewrite them in a second pass 528 // because doing so will perturb the CFG that we are walking to find the 529 // uses in the first place. 530 SmallVector<MachineInstr *, 4> JmpIs; 531 532 // Gather the condition flags that have already been preserved in 533 // registers. We do this from scratch each time as we expect there to be 534 // very few of them and we expect to not revisit the same copy definition 535 // many times. If either of those change sufficiently we could build a map 536 // of these up front instead. 537 CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos); 538 539 // Collect the basic blocks we need to scan. Typically this will just be 540 // a single basic block but we may have to scan multiple blocks if the 541 // EFLAGS copy lives into successors. 542 SmallVector<MachineBasicBlock *, 2> Blocks; 543 SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks; 544 Blocks.push_back(&MBB); 545 546 do { 547 MachineBasicBlock &UseMBB = *Blocks.pop_back_val(); 548 549 // Track when if/when we find a kill of the flags in this block. 550 bool FlagsKilled = false; 551 552 // In most cases, we walk from the beginning to the end of the block. But 553 // when the block is the same block as the copy is from, we will visit it 554 // twice. The first time we start from the copy and go to the end. The 555 // second time we start from the beginning and go to the copy. This lets 556 // us handle copies inside of cycles. 557 // FIXME: This loop is *super* confusing. This is at least in part 558 // a symptom of all of this routine needing to be refactored into 559 // documentable components. Once done, there may be a better way to write 560 // this loop. 561 for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB)) 562 ? std::next(CopyI->getIterator()) 563 : UseMBB.instr_begin(), 564 MIE = UseMBB.instr_end(); 565 MII != MIE;) { 566 MachineInstr &MI = *MII++; 567 // If we are in the original copy block and encounter either the copy 568 // def or the copy itself, break so that we don't re-process any part of 569 // the block or process the instructions in the range that was copied 570 // over. 571 if (&MI == CopyI || &MI == &CopyDefI) { 572 assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) && 573 "Should only encounter these on the second pass over the " 574 "original block."); 575 break; 576 } 577 578 MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS); 579 if (!FlagUse) { 580 if (MI.findRegisterDefOperand(X86::EFLAGS)) { 581 // If EFLAGS are defined, it's as-if they were killed. We can stop 582 // scanning here. 583 // 584 // NB!!! Many instructions only modify some flags. LLVM currently 585 // models this as clobbering all flags, but if that ever changes 586 // this will need to be carefully updated to handle that more 587 // complex logic. 588 FlagsKilled = true; 589 break; 590 } 591 continue; 592 } 593 594 LLVM_DEBUG(dbgs() << " Rewriting use: "; MI.dump()); 595 596 // Check the kill flag before we rewrite as that may change it. 597 if (FlagUse->isKill()) 598 FlagsKilled = true; 599 600 // Once we encounter a branch, the rest of the instructions must also be 601 // branches. We can't rewrite in place here, so we handle them below. 602 // 603 // Note that we don't have to handle tail calls here, even conditional 604 // tail calls, as those are not introduced into the X86 MI until post-RA 605 // branch folding or black placement. As a consequence, we get to deal 606 // with the simpler formulation of conditional branches followed by tail 607 // calls. 608 if (X86::getCondFromBranch(MI) != X86::COND_INVALID) { 609 auto JmpIt = MI.getIterator(); 610 do { 611 JmpIs.push_back(&*JmpIt); 612 ++JmpIt; 613 } while (JmpIt != UseMBB.instr_end() && 614 X86::getCondFromBranch(*JmpIt) != 615 X86::COND_INVALID); 616 break; 617 } 618 619 // Otherwise we can just rewrite in-place. 620 if (X86::getCondFromCMov(MI) != X86::COND_INVALID) { 621 rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 622 } else if (getCondFromFCMOV(MI.getOpcode()) != X86::COND_INVALID) { 623 rewriteFCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 624 } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) { 625 rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 626 } else if (MI.getOpcode() == TargetOpcode::COPY) { 627 rewriteCopy(MI, *FlagUse, CopyDefI); 628 } else { 629 // We assume all other instructions that use flags also def them. 630 assert(MI.findRegisterDefOperand(X86::EFLAGS) && 631 "Expected a def of EFLAGS for this instruction!"); 632 633 // NB!!! Several arithmetic instructions only *partially* update 634 // flags. Theoretically, we could generate MI code sequences that 635 // would rely on this fact and observe different flags independently. 636 // But currently LLVM models all of these instructions as clobbering 637 // all the flags in an undef way. We rely on that to simplify the 638 // logic. 639 FlagsKilled = true; 640 641 switch (MI.getOpcode()) { 642 case X86::SETB_C8r: 643 case X86::SETB_C16r: 644 case X86::SETB_C32r: 645 case X86::SETB_C64r: 646 // Use custom lowering for arithmetic that is merely extending the 647 // carry flag. We model this as the SETB_C* pseudo instructions. 648 rewriteSetCarryExtended(*TestMBB, TestPos, TestLoc, MI, *FlagUse, 649 CondRegs); 650 break; 651 652 default: 653 // Generically handle remaining uses as arithmetic instructions. 654 rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse, 655 CondRegs); 656 break; 657 } 658 break; 659 } 660 661 // If this was the last use of the flags, we're done. 662 if (FlagsKilled) 663 break; 664 } 665 666 // If the flags were killed, we're done with this block. 667 if (FlagsKilled) 668 continue; 669 670 // Otherwise we need to scan successors for ones where the flags live-in 671 // and queue those up for processing. 672 for (MachineBasicBlock *SuccMBB : UseMBB.successors()) 673 if (SuccMBB->isLiveIn(X86::EFLAGS) && 674 VisitedBlocks.insert(SuccMBB).second) { 675 // We currently don't do any PHI insertion and so we require that the 676 // test basic block dominates all of the use basic blocks. Further, we 677 // can't have a cycle from the test block back to itself as that would 678 // create a cycle requiring a PHI to break it. 679 // 680 // We could in theory do PHI insertion here if it becomes useful by 681 // just taking undef values in along every edge that we don't trace 682 // this EFLAGS copy along. This isn't as bad as fully general PHI 683 // insertion, but still seems like a great deal of complexity. 684 // 685 // Because it is theoretically possible that some earlier MI pass or 686 // other lowering transformation could induce this to happen, we do 687 // a hard check even in non-debug builds here. 688 if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) { 689 LLVM_DEBUG({ 690 dbgs() 691 << "ERROR: Encountered use that is not dominated by our test " 692 "basic block! Rewriting this would require inserting PHI " 693 "nodes to track the flag state across the CFG.\n\nTest " 694 "block:\n"; 695 TestMBB->dump(); 696 dbgs() << "Use block:\n"; 697 SuccMBB->dump(); 698 }); 699 report_fatal_error( 700 "Cannot lower EFLAGS copy when original copy def " 701 "does not dominate all uses."); 702 } 703 704 Blocks.push_back(SuccMBB); 705 } 706 } while (!Blocks.empty()); 707 708 // Now rewrite the jumps that use the flags. These we handle specially 709 // because if there are multiple jumps in a single basic block we'll have 710 // to do surgery on the CFG. 711 MachineBasicBlock *LastJmpMBB = nullptr; 712 for (MachineInstr *JmpI : JmpIs) { 713 // Past the first jump within a basic block we need to split the blocks 714 // apart. 715 if (JmpI->getParent() == LastJmpMBB) 716 splitBlock(*JmpI->getParent(), *JmpI, *TII); 717 else 718 LastJmpMBB = JmpI->getParent(); 719 720 rewriteCondJmp(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs); 721 } 722 723 // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if 724 // the copy's def operand is itself a kill. 725 } 726 727 #ifndef NDEBUG 728 for (MachineBasicBlock &MBB : MF) 729 for (MachineInstr &MI : MBB) 730 if (MI.getOpcode() == TargetOpcode::COPY && 731 (MI.getOperand(0).getReg() == X86::EFLAGS || 732 MI.getOperand(1).getReg() == X86::EFLAGS)) { 733 LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; 734 MI.dump()); 735 llvm_unreachable("Unlowered EFLAGS copy!"); 736 } 737 #endif 738 739 return true; 740 } 741 742 /// Collect any conditions that have already been set in registers so that we 743 /// can re-use them rather than adding duplicates. 744 CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( 745 MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) { 746 CondRegArray CondRegs = {}; 747 748 // Scan backwards across the range of instructions with live EFLAGS. 749 for (MachineInstr &MI : 750 llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) { 751 X86::CondCode Cond = X86::getCondFromSETCC(MI); 752 if (Cond != X86::COND_INVALID && !MI.mayStore() && 753 MI.getOperand(0).isReg() && 754 Register::isVirtualRegister(MI.getOperand(0).getReg())) { 755 assert(MI.getOperand(0).isDef() && 756 "A non-storing SETcc should always define a register!"); 757 CondRegs[Cond] = MI.getOperand(0).getReg(); 758 } 759 760 // Stop scanning when we see the first definition of the EFLAGS as prior to 761 // this we would potentially capture the wrong flag state. 762 if (MI.findRegisterDefOperand(X86::EFLAGS)) 763 break; 764 } 765 return CondRegs; 766 } 767 768 unsigned X86FlagsCopyLoweringPass::promoteCondToReg( 769 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 770 DebugLoc TestLoc, X86::CondCode Cond) { 771 Register Reg = MRI->createVirtualRegister(PromoteRC); 772 auto SetI = BuildMI(TestMBB, TestPos, TestLoc, 773 TII->get(X86::SETCCr), Reg).addImm(Cond); 774 (void)SetI; 775 LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump()); 776 ++NumSetCCsInserted; 777 return Reg; 778 } 779 780 std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg( 781 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 782 DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) { 783 unsigned &CondReg = CondRegs[Cond]; 784 unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)]; 785 if (!CondReg && !InvCondReg) 786 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 787 788 if (CondReg) 789 return {CondReg, false}; 790 else 791 return {InvCondReg, true}; 792 } 793 794 void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, 795 MachineBasicBlock::iterator Pos, 796 DebugLoc Loc, unsigned Reg) { 797 auto TestI = 798 BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg); 799 (void)TestI; 800 LLVM_DEBUG(dbgs() << " test cond: "; TestI->dump()); 801 ++NumTestsInserted; 802 } 803 804 void X86FlagsCopyLoweringPass::rewriteArithmetic( 805 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 806 DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse, 807 CondRegArray &CondRegs) { 808 // Arithmetic is either reading CF or OF. Figure out which condition we need 809 // to preserve in a register. 810 X86::CondCode Cond; 811 812 // The addend to use to reset CF or OF when added to the flag value. 813 int Addend; 814 815 switch (getMnemonicFromOpcode(MI.getOpcode())) { 816 case FlagArithMnemonic::ADC: 817 case FlagArithMnemonic::ADCX: 818 case FlagArithMnemonic::RCL: 819 case FlagArithMnemonic::RCR: 820 case FlagArithMnemonic::SBB: 821 Cond = X86::COND_B; // CF == 1 822 // Set up an addend that when one is added will need a carry due to not 823 // having a higher bit available. 824 Addend = 255; 825 break; 826 827 case FlagArithMnemonic::ADOX: 828 Cond = X86::COND_O; // OF == 1 829 // Set up an addend that when one is added will turn from positive to 830 // negative and thus overflow in the signed domain. 831 Addend = 127; 832 break; 833 } 834 835 // Now get a register that contains the value of the flag input to the 836 // arithmetic. We require exactly this flag to simplify the arithmetic 837 // required to materialize it back into the flag. 838 unsigned &CondReg = CondRegs[Cond]; 839 if (!CondReg) 840 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 841 842 MachineBasicBlock &MBB = *MI.getParent(); 843 844 // Insert an instruction that will set the flag back to the desired value. 845 Register TmpReg = MRI->createVirtualRegister(PromoteRC); 846 auto AddI = 847 BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri)) 848 .addDef(TmpReg, RegState::Dead) 849 .addReg(CondReg) 850 .addImm(Addend); 851 (void)AddI; 852 LLVM_DEBUG(dbgs() << " add cond: "; AddI->dump()); 853 ++NumAddsInserted; 854 FlagUse.setIsKill(true); 855 } 856 857 void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB, 858 MachineBasicBlock::iterator TestPos, 859 DebugLoc TestLoc, 860 MachineInstr &CMovI, 861 MachineOperand &FlagUse, 862 CondRegArray &CondRegs) { 863 // First get the register containing this specific condition. 864 X86::CondCode Cond = X86::getCondFromCMov(CMovI); 865 unsigned CondReg; 866 bool Inverted; 867 std::tie(CondReg, Inverted) = 868 getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); 869 870 MachineBasicBlock &MBB = *CMovI.getParent(); 871 872 // Insert a direct test of the saved register. 873 insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); 874 875 // Rewrite the CMov to use the !ZF flag from the test, and then kill its use 876 // of the flags afterward. 877 CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1) 878 .setImm(Inverted ? X86::COND_E : X86::COND_NE); 879 FlagUse.setIsKill(true); 880 LLVM_DEBUG(dbgs() << " fixed cmov: "; CMovI.dump()); 881 } 882 883 void X86FlagsCopyLoweringPass::rewriteFCMov(MachineBasicBlock &TestMBB, 884 MachineBasicBlock::iterator TestPos, 885 DebugLoc TestLoc, 886 MachineInstr &CMovI, 887 MachineOperand &FlagUse, 888 CondRegArray &CondRegs) { 889 // First get the register containing this specific condition. 890 X86::CondCode Cond = getCondFromFCMOV(CMovI.getOpcode()); 891 unsigned CondReg; 892 bool Inverted; 893 std::tie(CondReg, Inverted) = 894 getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); 895 896 MachineBasicBlock &MBB = *CMovI.getParent(); 897 898 // Insert a direct test of the saved register. 899 insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); 900 901 auto getFCMOVOpcode = [](unsigned Opcode, bool Inverted) { 902 switch (Opcode) { 903 default: llvm_unreachable("Unexpected opcode!"); 904 case X86::CMOVBE_Fp32: case X86::CMOVNBE_Fp32: 905 case X86::CMOVB_Fp32: case X86::CMOVNB_Fp32: 906 case X86::CMOVE_Fp32: case X86::CMOVNE_Fp32: 907 case X86::CMOVP_Fp32: case X86::CMOVNP_Fp32: 908 return Inverted ? X86::CMOVE_Fp32 : X86::CMOVNE_Fp32; 909 case X86::CMOVBE_Fp64: case X86::CMOVNBE_Fp64: 910 case X86::CMOVB_Fp64: case X86::CMOVNB_Fp64: 911 case X86::CMOVE_Fp64: case X86::CMOVNE_Fp64: 912 case X86::CMOVP_Fp64: case X86::CMOVNP_Fp64: 913 return Inverted ? X86::CMOVE_Fp64 : X86::CMOVNE_Fp64; 914 case X86::CMOVBE_Fp80: case X86::CMOVNBE_Fp80: 915 case X86::CMOVB_Fp80: case X86::CMOVNB_Fp80: 916 case X86::CMOVE_Fp80: case X86::CMOVNE_Fp80: 917 case X86::CMOVP_Fp80: case X86::CMOVNP_Fp80: 918 return Inverted ? X86::CMOVE_Fp80 : X86::CMOVNE_Fp80; 919 } 920 }; 921 922 // Rewrite the CMov to use the !ZF flag from the test. 923 CMovI.setDesc(TII->get(getFCMOVOpcode(CMovI.getOpcode(), Inverted))); 924 FlagUse.setIsKill(true); 925 LLVM_DEBUG(dbgs() << " fixed fcmov: "; CMovI.dump()); 926 } 927 928 void X86FlagsCopyLoweringPass::rewriteCondJmp( 929 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 930 DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) { 931 // First get the register containing this specific condition. 932 X86::CondCode Cond = X86::getCondFromBranch(JmpI); 933 unsigned CondReg; 934 bool Inverted; 935 std::tie(CondReg, Inverted) = 936 getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); 937 938 MachineBasicBlock &JmpMBB = *JmpI.getParent(); 939 940 // Insert a direct test of the saved register. 941 insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg); 942 943 // Rewrite the jump to use the !ZF flag from the test, and kill its use of 944 // flags afterward. 945 JmpI.getOperand(1).setImm(Inverted ? X86::COND_E : X86::COND_NE); 946 JmpI.findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); 947 LLVM_DEBUG(dbgs() << " fixed jCC: "; JmpI.dump()); 948 } 949 950 void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI, 951 MachineOperand &FlagUse, 952 MachineInstr &CopyDefI) { 953 // Just replace this copy with the original copy def. 954 MRI->replaceRegWith(MI.getOperand(0).getReg(), 955 CopyDefI.getOperand(0).getReg()); 956 MI.eraseFromParent(); 957 } 958 959 void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( 960 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 961 DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse, 962 CondRegArray &CondRegs) { 963 // This routine is only used to handle pseudos for setting a register to zero 964 // or all ones based on CF. This is essentially the sign extended from 1-bit 965 // form of SETB and modeled with the SETB_C* pseudos. They require special 966 // handling as they aren't normal SETcc instructions and are lowered to an 967 // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that 968 // they are only provided in reg-defining forms. A complicating factor is that 969 // they can define many different register widths. 970 assert(SetBI.getOperand(0).isReg() && 971 "Cannot have a non-register defined operand to this variant of SETB!"); 972 973 // Little helper to do the common final step of replacing the register def'ed 974 // by this SETB instruction with a new register and removing the SETB 975 // instruction. 976 auto RewriteToReg = [&](unsigned Reg) { 977 MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg); 978 SetBI.eraseFromParent(); 979 }; 980 981 // Grab the register class used for this particular instruction. 982 auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg()); 983 984 MachineBasicBlock &MBB = *SetBI.getParent(); 985 auto SetPos = SetBI.getIterator(); 986 auto SetLoc = SetBI.getDebugLoc(); 987 988 auto AdjustReg = [&](unsigned Reg) { 989 auto &OrigRC = *MRI->getRegClass(Reg); 990 if (&OrigRC == &SetBRC) 991 return Reg; 992 993 unsigned NewReg; 994 995 int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8; 996 int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8; 997 assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!"); 998 assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!"); 999 int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit, 1000 X86::NoSubRegister, X86::sub_32bit}; 1001 1002 // If the original size is smaller than the target *and* is smaller than 4 1003 // bytes, we need to explicitly zero extend it. We always extend to 4-bytes 1004 // to maximize the chance of being able to CSE that operation and to avoid 1005 // partial dependency stalls extending to 2-bytes. 1006 if (OrigRegSize < TargetRegSize && OrigRegSize < 4) { 1007 NewReg = MRI->createVirtualRegister(&X86::GR32RegClass); 1008 BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg) 1009 .addReg(Reg); 1010 if (&SetBRC == &X86::GR32RegClass) 1011 return NewReg; 1012 Reg = NewReg; 1013 OrigRegSize = 4; 1014 } 1015 1016 NewReg = MRI->createVirtualRegister(&SetBRC); 1017 if (OrigRegSize < TargetRegSize) { 1018 BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG), 1019 NewReg) 1020 .addImm(0) 1021 .addReg(Reg) 1022 .addImm(SubRegIdx[OrigRegSize]); 1023 } else if (OrigRegSize > TargetRegSize) { 1024 if (TargetRegSize == 1 && !Subtarget->is64Bit()) { 1025 // Need to constrain the register class. 1026 MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass); 1027 } 1028 1029 BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), 1030 NewReg) 1031 .addReg(Reg, 0, SubRegIdx[TargetRegSize]); 1032 } else { 1033 BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg) 1034 .addReg(Reg); 1035 } 1036 return NewReg; 1037 }; 1038 1039 unsigned &CondReg = CondRegs[X86::COND_B]; 1040 if (!CondReg) 1041 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B); 1042 1043 // Adjust the condition to have the desired register width by zero-extending 1044 // as needed. 1045 // FIXME: We should use a better API to avoid the local reference and using a 1046 // different variable here. 1047 unsigned ExtCondReg = AdjustReg(CondReg); 1048 1049 // Now we need to turn this into a bitmask. We do this by subtracting it from 1050 // zero. 1051 Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass); 1052 BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg); 1053 ZeroReg = AdjustReg(ZeroReg); 1054 1055 unsigned Sub; 1056 switch (SetBI.getOpcode()) { 1057 case X86::SETB_C8r: 1058 Sub = X86::SUB8rr; 1059 break; 1060 1061 case X86::SETB_C16r: 1062 Sub = X86::SUB16rr; 1063 break; 1064 1065 case X86::SETB_C32r: 1066 Sub = X86::SUB32rr; 1067 break; 1068 1069 case X86::SETB_C64r: 1070 Sub = X86::SUB64rr; 1071 break; 1072 1073 default: 1074 llvm_unreachable("Invalid SETB_C* opcode!"); 1075 } 1076 Register ResultReg = MRI->createVirtualRegister(&SetBRC); 1077 BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg) 1078 .addReg(ZeroReg) 1079 .addReg(ExtCondReg); 1080 return RewriteToReg(ResultReg); 1081 } 1082 1083 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB, 1084 MachineBasicBlock::iterator TestPos, 1085 DebugLoc TestLoc, 1086 MachineInstr &SetCCI, 1087 MachineOperand &FlagUse, 1088 CondRegArray &CondRegs) { 1089 X86::CondCode Cond = X86::getCondFromSETCC(SetCCI); 1090 // Note that we can't usefully rewrite this to the inverse without complex 1091 // analysis of the users of the setCC. Largely we rely on duplicates which 1092 // could have been avoided already being avoided here. 1093 unsigned &CondReg = CondRegs[Cond]; 1094 if (!CondReg) 1095 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 1096 1097 // Rewriting a register def is trivial: we just replace the register and 1098 // remove the setcc. 1099 if (!SetCCI.mayStore()) { 1100 assert(SetCCI.getOperand(0).isReg() && 1101 "Cannot have a non-register defined operand to SETcc!"); 1102 MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg); 1103 SetCCI.eraseFromParent(); 1104 return; 1105 } 1106 1107 // Otherwise, we need to emit a store. 1108 auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(), 1109 SetCCI.getDebugLoc(), TII->get(X86::MOV8mr)); 1110 // Copy the address operands. 1111 for (int i = 0; i < X86::AddrNumOperands; ++i) 1112 MIB.add(SetCCI.getOperand(i)); 1113 1114 MIB.addReg(CondReg); 1115 1116 MIB.setMemRefs(SetCCI.memoperands()); 1117 1118 SetCCI.eraseFromParent(); 1119 return; 1120 } 1121