1 //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual 11 /// flag bits. 12 /// 13 /// We have to do this by carefully analyzing and rewriting the usage of the 14 /// copied EFLAGS register because there is no general way to rematerialize the 15 /// entire EFLAGS register safely and efficiently. Using `popf` both forces 16 /// dynamic stack adjustment and can create correctness issues due to IF, TF, 17 /// and other non-status flags being overwritten. Using sequences involving 18 /// SAHF don't work on all x86 processors and are often quite slow compared to 19 /// directly testing a single status preserved in its own GPR. 20 /// 21 //===----------------------------------------------------------------------===// 22 23 #include "X86.h" 24 #include "X86InstrBuilder.h" 25 #include "X86InstrInfo.h" 26 #include "X86Subtarget.h" 27 #include "llvm/ADT/DepthFirstIterator.h" 28 #include "llvm/ADT/PostOrderIterator.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/ScopeExit.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/CodeGen/MachineBasicBlock.h" 35 #include "llvm/CodeGen/MachineConstantPool.h" 36 #include "llvm/CodeGen/MachineDominators.h" 37 #include "llvm/CodeGen/MachineFunction.h" 38 #include "llvm/CodeGen/MachineFunctionPass.h" 39 #include "llvm/CodeGen/MachineInstr.h" 40 #include "llvm/CodeGen/MachineInstrBuilder.h" 41 #include "llvm/CodeGen/MachineModuleInfo.h" 42 #include "llvm/CodeGen/MachineOperand.h" 43 #include "llvm/CodeGen/MachineRegisterInfo.h" 44 #include "llvm/CodeGen/MachineSSAUpdater.h" 45 #include "llvm/CodeGen/TargetInstrInfo.h" 46 #include "llvm/CodeGen/TargetRegisterInfo.h" 47 #include "llvm/CodeGen/TargetSchedule.h" 48 #include "llvm/CodeGen/TargetSubtargetInfo.h" 49 #include "llvm/IR/DebugLoc.h" 50 #include "llvm/MC/MCSchedule.h" 51 #include "llvm/Pass.h" 52 #include "llvm/Support/CommandLine.h" 53 #include "llvm/Support/Debug.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <iterator> 58 #include <utility> 59 60 using namespace llvm; 61 62 #define PASS_KEY "x86-flags-copy-lowering" 63 #define DEBUG_TYPE PASS_KEY 64 65 STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated"); 66 STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted"); 67 STATISTIC(NumTestsInserted, "Number of test instructions inserted"); 68 STATISTIC(NumAddsInserted, "Number of adds instructions inserted"); 69 STATISTIC(NumNFsConvertedTo, "Number of NF instructions converted to"); 70 71 namespace { 72 73 // Convenient array type for storing registers associated with each condition. 74 using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>; 75 76 class X86FlagsCopyLoweringPass : public MachineFunctionPass { 77 public: 78 X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {} 79 80 StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; } 81 bool runOnMachineFunction(MachineFunction &MF) override; 82 void getAnalysisUsage(AnalysisUsage &AU) const override; 83 84 /// Pass identification, replacement for typeid. 85 static char ID; 86 87 private: 88 MachineRegisterInfo *MRI = nullptr; 89 const X86Subtarget *Subtarget = nullptr; 90 const X86InstrInfo *TII = nullptr; 91 const TargetRegisterInfo *TRI = nullptr; 92 const TargetRegisterClass *PromoteRC = nullptr; 93 MachineDominatorTree *MDT = nullptr; 94 95 CondRegArray collectCondsInRegs(MachineBasicBlock &MBB, 96 MachineBasicBlock::iterator CopyDefI); 97 98 Register promoteCondToReg(MachineBasicBlock &MBB, 99 MachineBasicBlock::iterator TestPos, 100 const DebugLoc &TestLoc, X86::CondCode Cond); 101 std::pair<unsigned, bool> getCondOrInverseInReg( 102 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 103 const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs); 104 void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 105 const DebugLoc &Loc, unsigned Reg); 106 107 void rewriteSetCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 108 const DebugLoc &Loc, MachineInstr &MI, 109 CondRegArray &CondRegs); 110 void rewriteArithmetic(MachineBasicBlock &MBB, 111 MachineBasicBlock::iterator Pos, const DebugLoc &Loc, 112 MachineInstr &MI, CondRegArray &CondRegs); 113 void rewriteMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 114 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs); 115 }; 116 117 } // end anonymous namespace 118 119 INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE, 120 "X86 EFLAGS copy lowering", false, false) 121 INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE, 122 "X86 EFLAGS copy lowering", false, false) 123 124 FunctionPass *llvm::createX86FlagsCopyLoweringPass() { 125 return new X86FlagsCopyLoweringPass(); 126 } 127 128 char X86FlagsCopyLoweringPass::ID = 0; 129 130 void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const { 131 AU.addUsedIfAvailable<MachineDominatorTreeWrapperPass>(); 132 MachineFunctionPass::getAnalysisUsage(AU); 133 } 134 135 static bool isArithmeticOp(unsigned Opc) { 136 return X86::isADC(Opc) || X86::isSBB(Opc) || X86::isRCL(Opc) || 137 X86::isRCR(Opc) || (Opc == X86::SETB_C32r || Opc == X86::SETB_C64r); 138 } 139 140 static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB, 141 MachineInstr &SplitI, 142 const X86InstrInfo &TII) { 143 MachineFunction &MF = *MBB.getParent(); 144 145 assert(SplitI.getParent() == &MBB && 146 "Split instruction must be in the split block!"); 147 assert(SplitI.isBranch() && 148 "Only designed to split a tail of branch instructions!"); 149 assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID && 150 "Must split on an actual jCC instruction!"); 151 152 // Dig out the previous instruction to the split point. 153 MachineInstr &PrevI = *std::prev(SplitI.getIterator()); 154 assert(PrevI.isBranch() && "Must split after a branch!"); 155 assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID && 156 "Must split after an actual jCC instruction!"); 157 assert(!std::prev(PrevI.getIterator())->isTerminator() && 158 "Must only have this one terminator prior to the split!"); 159 160 // Grab the one successor edge that will stay in `MBB`. 161 MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB(); 162 163 // Analyze the original block to see if we are actually splitting an edge 164 // into two edges. This can happen when we have multiple conditional jumps to 165 // the same successor. 166 bool IsEdgeSplit = 167 std::any_of(SplitI.getIterator(), MBB.instr_end(), 168 [&](MachineInstr &MI) { 169 assert(MI.isTerminator() && 170 "Should only have spliced terminators!"); 171 return llvm::any_of( 172 MI.operands(), [&](MachineOperand &MOp) { 173 return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc; 174 }); 175 }) || 176 MBB.getFallThrough() == &UnsplitSucc; 177 178 MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); 179 180 // Insert the new block immediately after the current one. Any existing 181 // fallthrough will be sunk into this new block anyways. 182 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); 183 184 // Splice the tail of instructions into the new block. 185 NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end()); 186 187 // Copy the necessary succesors (and their probability info) into the new 188 // block. 189 for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) 190 if (IsEdgeSplit || *SI != &UnsplitSucc) 191 NewMBB.copySuccessor(&MBB, SI); 192 // Normalize the probabilities if we didn't end up splitting the edge. 193 if (!IsEdgeSplit) 194 NewMBB.normalizeSuccProbs(); 195 196 // Now replace all of the moved successors in the original block with the new 197 // block. This will merge their probabilities. 198 for (MachineBasicBlock *Succ : NewMBB.successors()) 199 if (Succ != &UnsplitSucc) 200 MBB.replaceSuccessor(Succ, &NewMBB); 201 202 // We should always end up replacing at least one successor. 203 assert(MBB.isSuccessor(&NewMBB) && 204 "Failed to make the new block a successor!"); 205 206 // Now update all the PHIs. 207 for (MachineBasicBlock *Succ : NewMBB.successors()) { 208 for (MachineInstr &MI : *Succ) { 209 if (!MI.isPHI()) 210 break; 211 212 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; 213 OpIdx += 2) { 214 MachineOperand &OpV = MI.getOperand(OpIdx); 215 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); 216 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!"); 217 if (OpMBB.getMBB() != &MBB) 218 continue; 219 220 // Replace the operand for unsplit successors 221 if (!IsEdgeSplit || Succ != &UnsplitSucc) { 222 OpMBB.setMBB(&NewMBB); 223 224 // We have to continue scanning as there may be multiple entries in 225 // the PHI. 226 continue; 227 } 228 229 // When we have split the edge append a new successor. 230 MI.addOperand(MF, OpV); 231 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); 232 break; 233 } 234 } 235 } 236 237 return NewMBB; 238 } 239 240 enum EFLAGSClobber { NoClobber, EvitableClobber, InevitableClobber }; 241 242 static EFLAGSClobber getClobberType(const MachineInstr &MI) { 243 const MachineOperand *FlagDef = 244 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); 245 if (!FlagDef) 246 return NoClobber; 247 if (FlagDef->isDead() && X86::getNFVariant(MI.getOpcode())) 248 return EvitableClobber; 249 250 return InevitableClobber; 251 } 252 253 bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { 254 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() 255 << " **********\n"); 256 257 Subtarget = &MF.getSubtarget<X86Subtarget>(); 258 MRI = &MF.getRegInfo(); 259 TII = Subtarget->getInstrInfo(); 260 TRI = Subtarget->getRegisterInfo(); 261 PromoteRC = &X86::GR8RegClass; 262 263 if (MF.empty()) 264 // Nothing to do for a degenerate empty function... 265 return false; 266 267 if (none_of(MRI->def_instructions(X86::EFLAGS), [](const MachineInstr &MI) { 268 return MI.getOpcode() == TargetOpcode::COPY; 269 })) 270 return false; 271 272 // We change the code, so we don't preserve the dominator tree anyway. If we 273 // got a valid MDT from the pass manager, use that, otherwise construct one 274 // now. This is an optimization that avoids unnecessary MDT construction for 275 // functions that have no flag copies. 276 277 auto MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>(); 278 std::unique_ptr<MachineDominatorTree> OwnedMDT; 279 if (MDTWrapper) { 280 MDT = &MDTWrapper->getDomTree(); 281 } else { 282 OwnedMDT = std::make_unique<MachineDominatorTree>(); 283 OwnedMDT->getBase().recalculate(MF); 284 MDT = OwnedMDT.get(); 285 } 286 287 // Collect the copies in RPO so that when there are chains where a copy is in 288 // turn copied again we visit the first one first. This ensures we can find 289 // viable locations for testing the original EFLAGS that dominate all the 290 // uses across complex CFGs. 291 SmallSetVector<MachineInstr *, 4> Copies; 292 ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); 293 for (MachineBasicBlock *MBB : RPOT) 294 for (MachineInstr &MI : *MBB) 295 if (MI.getOpcode() == TargetOpcode::COPY && 296 MI.getOperand(0).getReg() == X86::EFLAGS) 297 Copies.insert(&MI); 298 299 // Try to elminate the copys by transform the instructions between copy and 300 // copydef to the NF (no flags update) variants, e.g. 301 // 302 // %1:gr64 = COPY $eflags 303 // OP1 implicit-def dead $eflags 304 // $eflags = COPY %1 305 // OP2 cc, implicit $eflags 306 // 307 // -> 308 // 309 // OP1_NF 310 // OP2 implicit $eflags 311 if (Subtarget->hasNF()) { 312 SmallSetVector<MachineInstr *, 4> RemovedCopies; 313 // CopyIIt may be invalidated by removing copies. 314 auto CopyIIt = Copies.begin(), CopyIEnd = Copies.end(); 315 while (CopyIIt != CopyIEnd) { 316 auto NCopyIIt = std::next(CopyIIt); 317 SmallSetVector<MachineInstr *, 4> EvitableClobbers; 318 MachineInstr *CopyI = *CopyIIt; 319 MachineOperand &VOp = CopyI->getOperand(1); 320 MachineInstr *CopyDefI = MRI->getVRegDef(VOp.getReg()); 321 MachineBasicBlock *CopyIMBB = CopyI->getParent(); 322 MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent(); 323 // Walk all basic blocks reachable in depth-first iteration on the inverse 324 // CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that 325 // may be executed between the execution of CopyDefIMBB and CopyIMBB. On 326 // all execution paths, instructions from CopyDefI to CopyI (exclusive) 327 // has to be NF-convertible if it clobbers flags. 328 for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE; 329 ++BI) { 330 MachineBasicBlock *MBB = *BI; 331 for (auto I = (MBB != CopyDefIMBB) 332 ? MBB->begin() 333 : std::next(MachineBasicBlock::iterator(CopyDefI)), 334 E = (MBB != CopyIMBB) ? MBB->end() 335 : MachineBasicBlock::iterator(CopyI); 336 I != E; ++I) { 337 MachineInstr &MI = *I; 338 EFLAGSClobber ClobberType = getClobberType(MI); 339 if (ClobberType == NoClobber) 340 continue; 341 342 if (ClobberType == InevitableClobber) 343 goto ProcessNextCopyI; 344 345 assert(ClobberType == EvitableClobber && "unexpected workflow"); 346 EvitableClobbers.insert(&MI); 347 } 348 } 349 // Covert evitable clobbers into NF variants and remove the copyies. 350 RemovedCopies.insert(CopyI); 351 CopyI->eraseFromParent(); 352 if (MRI->use_nodbg_empty(CopyDefI->getOperand(0).getReg())) { 353 RemovedCopies.insert(CopyDefI); 354 CopyDefI->eraseFromParent(); 355 } 356 ++NumCopiesEliminated; 357 for (auto *Clobber : EvitableClobbers) { 358 unsigned NewOpc = X86::getNFVariant(Clobber->getOpcode()); 359 assert(NewOpc && "evitable clobber must have a NF variant"); 360 Clobber->setDesc(TII->get(NewOpc)); 361 Clobber->removeOperand( 362 Clobber->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr) 363 ->getOperandNo()); 364 ++NumNFsConvertedTo; 365 } 366 // Update liveins for basic blocks in the path 367 for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE; 368 ++BI) 369 if (*BI != CopyDefIMBB) 370 BI->addLiveIn(X86::EFLAGS); 371 ProcessNextCopyI: 372 CopyIIt = NCopyIIt; 373 } 374 Copies.set_subtract(RemovedCopies); 375 } 376 377 // For the rest of copies that cannot be eliminated by NF transform, we use 378 // setcc to preserve the flags in GPR32 before OP1, and recheck its value 379 // before using the flags, e.g. 380 // 381 // %1:gr64 = COPY $eflags 382 // OP1 implicit-def dead $eflags 383 // $eflags = COPY %1 384 // OP2 cc, implicit $eflags 385 // 386 // -> 387 // 388 // %1:gr8 = SETCCr cc, implicit $eflags 389 // OP1 implicit-def dead $eflags 390 // TEST8rr %1, %1, implicit-def $eflags 391 // OP2 ne, implicit $eflags 392 for (MachineInstr *CopyI : Copies) { 393 MachineBasicBlock &MBB = *CopyI->getParent(); 394 395 MachineOperand &VOp = CopyI->getOperand(1); 396 assert(VOp.isReg() && 397 "The input to the copy for EFLAGS should always be a register!"); 398 MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg()); 399 if (CopyDefI.getOpcode() != TargetOpcode::COPY) { 400 // FIXME: The big likely candidate here are PHI nodes. We could in theory 401 // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard 402 // enough that it is probably better to change every other part of LLVM 403 // to avoid creating them. The issue is that once we have PHIs we won't 404 // know which original EFLAGS value we need to capture with our setCCs 405 // below. The end result will be computing a complete set of setCCs that 406 // we *might* want, computing them in every place where we copy *out* of 407 // EFLAGS and then doing SSA formation on all of them to insert necessary 408 // PHI nodes and consume those here. Then hoping that somehow we DCE the 409 // unnecessary ones. This DCE seems very unlikely to be successful and so 410 // we will almost certainly end up with a glut of dead setCC 411 // instructions. Until we have a motivating test case and fail to avoid 412 // it by changing other parts of LLVM's lowering, we refuse to handle 413 // this complex case here. 414 LLVM_DEBUG( 415 dbgs() << "ERROR: Encountered unexpected def of an eflags copy: "; 416 CopyDefI.dump()); 417 report_fatal_error( 418 "Cannot lower EFLAGS copy unless it is defined in turn by a copy!"); 419 } 420 421 auto Cleanup = make_scope_exit([&] { 422 // All uses of the EFLAGS copy are now rewritten, kill the copy into 423 // eflags and if dead the copy from. 424 CopyI->eraseFromParent(); 425 if (MRI->use_empty(CopyDefI.getOperand(0).getReg())) 426 CopyDefI.eraseFromParent(); 427 ++NumCopiesEliminated; 428 }); 429 430 MachineOperand &DOp = CopyI->getOperand(0); 431 assert(DOp.isDef() && "Expected register def!"); 432 assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!"); 433 if (DOp.isDead()) 434 continue; 435 436 MachineBasicBlock *TestMBB = CopyDefI.getParent(); 437 auto TestPos = CopyDefI.getIterator(); 438 DebugLoc TestLoc = CopyDefI.getDebugLoc(); 439 440 LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump()); 441 442 // Walk up across live-in EFLAGS to find where they were actually def'ed. 443 // 444 // This copy's def may just be part of a region of blocks covered by 445 // a single def of EFLAGS and we want to find the top of that region where 446 // possible. 447 // 448 // This is essentially a search for a *candidate* reaching definition 449 // location. We don't need to ever find the actual reaching definition here, 450 // but we want to walk up the dominator tree to find the highest point which 451 // would be viable for such a definition. 452 auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin, 453 MachineBasicBlock::iterator End) { 454 // Scan backwards as we expect these to be relatively short and often find 455 // a clobber near the end. 456 return llvm::any_of( 457 llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) { 458 // Flag any instruction (other than the copy we are 459 // currently rewriting) that defs EFLAGS. 460 return &MI != CopyI && 461 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); 462 }); 463 }; 464 auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB, 465 MachineBasicBlock *EndMBB) { 466 assert(MDT->dominates(BeginMBB, EndMBB) && 467 "Only support paths down the dominator tree!"); 468 SmallPtrSet<MachineBasicBlock *, 4> Visited; 469 SmallVector<MachineBasicBlock *, 4> Worklist; 470 // We terminate at the beginning. No need to scan it. 471 Visited.insert(BeginMBB); 472 Worklist.push_back(EndMBB); 473 do { 474 auto *MBB = Worklist.pop_back_val(); 475 for (auto *PredMBB : MBB->predecessors()) { 476 if (!Visited.insert(PredMBB).second) 477 continue; 478 if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end())) 479 return true; 480 // Enqueue this block to walk its predecessors. 481 Worklist.push_back(PredMBB); 482 } 483 } while (!Worklist.empty()); 484 // No clobber found along a path from the begin to end. 485 return false; 486 }; 487 while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() && 488 !HasEFLAGSClobber(TestMBB->begin(), TestPos)) { 489 // Find the nearest common dominator of the predecessors, as 490 // that will be the best candidate to hoist into. 491 MachineBasicBlock *HoistMBB = 492 std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(), 493 *TestMBB->pred_begin(), 494 [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) { 495 return MDT->findNearestCommonDominator(LHS, RHS); 496 }); 497 498 // Now we need to scan all predecessors that may be reached along paths to 499 // the hoist block. A clobber anywhere in any of these blocks the hoist. 500 // Note that this even handles loops because we require *no* clobbers. 501 if (HasEFLAGSClobberPath(HoistMBB, TestMBB)) 502 break; 503 504 // We also need the terminators to not sneakily clobber flags. 505 if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(), 506 HoistMBB->instr_end())) 507 break; 508 509 // We found a viable location, hoist our test position to it. 510 TestMBB = HoistMBB; 511 TestPos = TestMBB->getFirstTerminator()->getIterator(); 512 // Clear the debug location as it would just be confusing after hoisting. 513 TestLoc = DebugLoc(); 514 } 515 LLVM_DEBUG({ 516 auto DefIt = llvm::find_if( 517 llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)), 518 [&](MachineInstr &MI) { 519 return MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); 520 }); 521 if (DefIt.base() != TestMBB->instr_begin()) { 522 dbgs() << " Using EFLAGS defined by: "; 523 DefIt->dump(); 524 } else { 525 dbgs() << " Using live-in flags for BB:\n"; 526 TestMBB->dump(); 527 } 528 }); 529 530 // While rewriting uses, we buffer jumps and rewrite them in a second pass 531 // because doing so will perturb the CFG that we are walking to find the 532 // uses in the first place. 533 SmallVector<MachineInstr *, 4> JmpIs; 534 535 // Gather the condition flags that have already been preserved in 536 // registers. We do this from scratch each time as we expect there to be 537 // very few of them and we expect to not revisit the same copy definition 538 // many times. If either of those change sufficiently we could build a map 539 // of these up front instead. 540 CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos); 541 542 // Collect the basic blocks we need to scan. Typically this will just be 543 // a single basic block but we may have to scan multiple blocks if the 544 // EFLAGS copy lives into successors. 545 SmallVector<MachineBasicBlock *, 2> Blocks; 546 SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks; 547 Blocks.push_back(&MBB); 548 549 do { 550 MachineBasicBlock &UseMBB = *Blocks.pop_back_val(); 551 552 // Track when if/when we find a kill of the flags in this block. 553 bool FlagsKilled = false; 554 555 // In most cases, we walk from the beginning to the end of the block. But 556 // when the block is the same block as the copy is from, we will visit it 557 // twice. The first time we start from the copy and go to the end. The 558 // second time we start from the beginning and go to the copy. This lets 559 // us handle copies inside of cycles. 560 // FIXME: This loop is *super* confusing. This is at least in part 561 // a symptom of all of this routine needing to be refactored into 562 // documentable components. Once done, there may be a better way to write 563 // this loop. 564 for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB)) 565 ? std::next(CopyI->getIterator()) 566 : UseMBB.instr_begin(), 567 MIE = UseMBB.instr_end(); 568 MII != MIE;) { 569 MachineInstr &MI = *MII++; 570 // If we are in the original copy block and encounter either the copy 571 // def or the copy itself, break so that we don't re-process any part of 572 // the block or process the instructions in the range that was copied 573 // over. 574 if (&MI == CopyI || &MI == &CopyDefI) { 575 assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) && 576 "Should only encounter these on the second pass over the " 577 "original block."); 578 break; 579 } 580 581 MachineOperand *FlagUse = 582 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr); 583 FlagsKilled = MI.modifiesRegister(X86::EFLAGS, TRI); 584 585 if (!FlagUse && FlagsKilled) 586 break; 587 else if (!FlagUse) 588 continue; 589 590 LLVM_DEBUG(dbgs() << " Rewriting use: "; MI.dump()); 591 592 // Check the kill flag before we rewrite as that may change it. 593 if (FlagUse->isKill()) 594 FlagsKilled = true; 595 596 // Once we encounter a branch, the rest of the instructions must also be 597 // branches. We can't rewrite in place here, so we handle them below. 598 // 599 // Note that we don't have to handle tail calls here, even conditional 600 // tail calls, as those are not introduced into the X86 MI until post-RA 601 // branch folding or black placement. As a consequence, we get to deal 602 // with the simpler formulation of conditional branches followed by tail 603 // calls. 604 if (X86::getCondFromBranch(MI) != X86::COND_INVALID) { 605 auto JmpIt = MI.getIterator(); 606 do { 607 JmpIs.push_back(&*JmpIt); 608 ++JmpIt; 609 } while (JmpIt != UseMBB.instr_end() && 610 X86::getCondFromBranch(*JmpIt) != X86::COND_INVALID); 611 break; 612 } 613 614 // Otherwise we can just rewrite in-place. 615 unsigned Opc = MI.getOpcode(); 616 if (Opc == TargetOpcode::COPY) { 617 // Just replace this copy with the original copy def. 618 MRI->replaceRegWith(MI.getOperand(0).getReg(), 619 CopyDefI.getOperand(0).getReg()); 620 MI.eraseFromParent(); 621 } else if (X86::isSETCC(Opc)) { 622 rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, CondRegs); 623 } else if (isArithmeticOp(Opc)) { 624 rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, CondRegs); 625 } else { 626 rewriteMI(*TestMBB, TestPos, TestLoc, MI, CondRegs); 627 } 628 629 // If this was the last use of the flags, we're done. 630 if (FlagsKilled) 631 break; 632 } 633 634 // If the flags were killed, we're done with this block. 635 if (FlagsKilled) 636 continue; 637 638 // Otherwise we need to scan successors for ones where the flags live-in 639 // and queue those up for processing. 640 for (MachineBasicBlock *SuccMBB : UseMBB.successors()) 641 if (SuccMBB->isLiveIn(X86::EFLAGS) && 642 VisitedBlocks.insert(SuccMBB).second) { 643 // We currently don't do any PHI insertion and so we require that the 644 // test basic block dominates all of the use basic blocks. Further, we 645 // can't have a cycle from the test block back to itself as that would 646 // create a cycle requiring a PHI to break it. 647 // 648 // We could in theory do PHI insertion here if it becomes useful by 649 // just taking undef values in along every edge that we don't trace 650 // this EFLAGS copy along. This isn't as bad as fully general PHI 651 // insertion, but still seems like a great deal of complexity. 652 // 653 // Because it is theoretically possible that some earlier MI pass or 654 // other lowering transformation could induce this to happen, we do 655 // a hard check even in non-debug builds here. 656 if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) { 657 LLVM_DEBUG({ 658 dbgs() 659 << "ERROR: Encountered use that is not dominated by our test " 660 "basic block! Rewriting this would require inserting PHI " 661 "nodes to track the flag state across the CFG.\n\nTest " 662 "block:\n"; 663 TestMBB->dump(); 664 dbgs() << "Use block:\n"; 665 SuccMBB->dump(); 666 }); 667 report_fatal_error( 668 "Cannot lower EFLAGS copy when original copy def " 669 "does not dominate all uses."); 670 } 671 672 Blocks.push_back(SuccMBB); 673 674 // After this, EFLAGS will be recreated before each use. 675 SuccMBB->removeLiveIn(X86::EFLAGS); 676 } 677 } while (!Blocks.empty()); 678 679 // Now rewrite the jumps that use the flags. These we handle specially 680 // because if there are multiple jumps in a single basic block we'll have 681 // to do surgery on the CFG. 682 MachineBasicBlock *LastJmpMBB = nullptr; 683 for (MachineInstr *JmpI : JmpIs) { 684 // Past the first jump within a basic block we need to split the blocks 685 // apart. 686 if (JmpI->getParent() == LastJmpMBB) 687 splitBlock(*JmpI->getParent(), *JmpI, *TII); 688 else 689 LastJmpMBB = JmpI->getParent(); 690 691 rewriteMI(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs); 692 } 693 694 // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if 695 // the copy's def operand is itself a kill. 696 } 697 698 #ifndef NDEBUG 699 for (MachineBasicBlock &MBB : MF) 700 for (MachineInstr &MI : MBB) 701 if (MI.getOpcode() == TargetOpcode::COPY && 702 (MI.getOperand(0).getReg() == X86::EFLAGS || 703 MI.getOperand(1).getReg() == X86::EFLAGS)) { 704 LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; 705 MI.dump()); 706 llvm_unreachable("Unlowered EFLAGS copy!"); 707 } 708 #endif 709 710 return true; 711 } 712 713 /// Collect any conditions that have already been set in registers so that we 714 /// can re-use them rather than adding duplicates. 715 CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( 716 MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) { 717 CondRegArray CondRegs = {}; 718 719 // Scan backwards across the range of instructions with live EFLAGS. 720 for (MachineInstr &MI : 721 llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) { 722 X86::CondCode Cond = X86::getCondFromSETCC(MI); 723 if (Cond != X86::COND_INVALID && !MI.mayStore() && 724 MI.getOperand(0).isReg() && MI.getOperand(0).getReg().isVirtual()) { 725 assert(MI.getOperand(0).isDef() && 726 "A non-storing SETcc should always define a register!"); 727 CondRegs[Cond] = MI.getOperand(0).getReg(); 728 } 729 730 // Stop scanning when we see the first definition of the EFLAGS as prior to 731 // this we would potentially capture the wrong flag state. 732 if (MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) 733 break; 734 } 735 return CondRegs; 736 } 737 738 Register X86FlagsCopyLoweringPass::promoteCondToReg( 739 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 740 const DebugLoc &TestLoc, X86::CondCode Cond) { 741 Register Reg = MRI->createVirtualRegister(PromoteRC); 742 auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg) 743 .addImm(Cond); 744 (void)SetI; 745 LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump()); 746 ++NumSetCCsInserted; 747 return Reg; 748 } 749 750 std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg( 751 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 752 const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) { 753 unsigned &CondReg = CondRegs[Cond]; 754 unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)]; 755 if (!CondReg && !InvCondReg) 756 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 757 758 if (CondReg) 759 return {CondReg, false}; 760 else 761 return {InvCondReg, true}; 762 } 763 764 void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, 765 MachineBasicBlock::iterator Pos, 766 const DebugLoc &Loc, unsigned Reg) { 767 auto TestI = 768 BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg); 769 (void)TestI; 770 LLVM_DEBUG(dbgs() << " test cond: "; TestI->dump()); 771 ++NumTestsInserted; 772 } 773 774 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB, 775 MachineBasicBlock::iterator Pos, 776 const DebugLoc &Loc, 777 MachineInstr &MI, 778 CondRegArray &CondRegs) { 779 X86::CondCode Cond = X86::getCondFromSETCC(MI); 780 // Note that we can't usefully rewrite this to the inverse without complex 781 // analysis of the users of the setCC. Largely we rely on duplicates which 782 // could have been avoided already being avoided here. 783 unsigned &CondReg = CondRegs[Cond]; 784 if (!CondReg) 785 CondReg = promoteCondToReg(MBB, Pos, Loc, Cond); 786 787 // Rewriting a register def is trivial: we just replace the register and 788 // remove the setcc. 789 if (!MI.mayStore()) { 790 assert(MI.getOperand(0).isReg() && 791 "Cannot have a non-register defined operand to SETcc!"); 792 Register OldReg = MI.getOperand(0).getReg(); 793 // Drop Kill flags on the old register before replacing. CondReg may have 794 // a longer live range. 795 MRI->clearKillFlags(OldReg); 796 MRI->replaceRegWith(OldReg, CondReg); 797 MI.eraseFromParent(); 798 return; 799 } 800 801 // Otherwise, we need to emit a store. 802 auto MIB = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), 803 TII->get(X86::MOV8mr)); 804 // Copy the address operands. 805 for (int i = 0; i < X86::AddrNumOperands; ++i) 806 MIB.add(MI.getOperand(i)); 807 808 MIB.addReg(CondReg); 809 MIB.setMemRefs(MI.memoperands()); 810 MI.eraseFromParent(); 811 } 812 813 void X86FlagsCopyLoweringPass::rewriteArithmetic( 814 MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 815 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs) { 816 // Arithmetic is either reading CF or OF. 817 X86::CondCode Cond = X86::COND_B; // CF == 1 818 // The addend to use to reset CF or OF when added to the flag value. 819 // Set up an addend that when one is added will need a carry due to not 820 // having a higher bit available. 821 int Addend = 255; 822 823 // Now get a register that contains the value of the flag input to the 824 // arithmetic. We require exactly this flag to simplify the arithmetic 825 // required to materialize it back into the flag. 826 unsigned &CondReg = CondRegs[Cond]; 827 if (!CondReg) 828 CondReg = promoteCondToReg(MBB, Pos, Loc, Cond); 829 830 // Insert an instruction that will set the flag back to the desired value. 831 Register TmpReg = MRI->createVirtualRegister(PromoteRC); 832 auto AddI = 833 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), 834 TII->get(Subtarget->hasNDD() ? X86::ADD8ri_ND : X86::ADD8ri)) 835 .addDef(TmpReg, RegState::Dead) 836 .addReg(CondReg) 837 .addImm(Addend); 838 (void)AddI; 839 LLVM_DEBUG(dbgs() << " add cond: "; AddI->dump()); 840 ++NumAddsInserted; 841 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true); 842 } 843 844 static X86::CondCode getImplicitCondFromMI(unsigned Opc) { 845 #define FROM_TO(A, B) \ 846 case X86::CMOV##A##_Fp32: \ 847 case X86::CMOV##A##_Fp64: \ 848 case X86::CMOV##A##_Fp80: \ 849 return X86::COND_##B; 850 851 switch (Opc) { 852 default: 853 return X86::COND_INVALID; 854 FROM_TO(B, B) 855 FROM_TO(E, E) 856 FROM_TO(P, P) 857 FROM_TO(BE, BE) 858 FROM_TO(NB, AE) 859 FROM_TO(NE, NE) 860 FROM_TO(NP, NP) 861 FROM_TO(NBE, A) 862 } 863 #undef FROM_TO 864 } 865 866 static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC) { 867 assert((CC == X86::COND_E || CC == X86::COND_NE) && "Unexpected CC"); 868 #define CASE(A) \ 869 case X86::CMOVB_##A: \ 870 case X86::CMOVE_##A: \ 871 case X86::CMOVP_##A: \ 872 case X86::CMOVBE_##A: \ 873 case X86::CMOVNB_##A: \ 874 case X86::CMOVNE_##A: \ 875 case X86::CMOVNP_##A: \ 876 case X86::CMOVNBE_##A: \ 877 return (CC == X86::COND_E) ? X86::CMOVE_##A : X86::CMOVNE_##A; 878 switch (Opc) { 879 default: 880 llvm_unreachable("Unexpected opcode"); 881 CASE(Fp32) 882 CASE(Fp64) 883 CASE(Fp80) 884 } 885 #undef CASE 886 } 887 888 void X86FlagsCopyLoweringPass::rewriteMI(MachineBasicBlock &MBB, 889 MachineBasicBlock::iterator Pos, 890 const DebugLoc &Loc, MachineInstr &MI, 891 CondRegArray &CondRegs) { 892 // First get the register containing this specific condition. 893 bool IsImplicitCC = false; 894 X86::CondCode CC = X86::getCondFromMI(MI); 895 if (CC == X86::COND_INVALID) { 896 CC = getImplicitCondFromMI(MI.getOpcode()); 897 IsImplicitCC = true; 898 } 899 assert(CC != X86::COND_INVALID && "Unknown EFLAG user!"); 900 unsigned CondReg; 901 bool Inverted; 902 std::tie(CondReg, Inverted) = 903 getCondOrInverseInReg(MBB, Pos, Loc, CC, CondRegs); 904 905 // Insert a direct test of the saved register. 906 insertTest(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), CondReg); 907 908 // Rewrite the instruction to use the !ZF flag from the test, and then kill 909 // its use of the flags afterward. 910 X86::CondCode NewCC = Inverted ? X86::COND_E : X86::COND_NE; 911 if (IsImplicitCC) 912 MI.setDesc(TII->get(getOpcodeWithCC(MI.getOpcode(), NewCC))); 913 else 914 MI.getOperand(MI.getDesc().getNumOperands() - 1).setImm(NewCC); 915 916 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true); 917 LLVM_DEBUG(dbgs() << " fixed instruction: "; MI.dump()); 918 } 919