1*0b57cec5SDimitry Andric //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric /// \file 9*0b57cec5SDimitry Andric /// 10*0b57cec5SDimitry Andric /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual 11*0b57cec5SDimitry Andric /// flag bits. 12*0b57cec5SDimitry Andric /// 13*0b57cec5SDimitry Andric /// We have to do this by carefully analyzing and rewriting the usage of the 14*0b57cec5SDimitry Andric /// copied EFLAGS register because there is no general way to rematerialize the 15*0b57cec5SDimitry Andric /// entire EFLAGS register safely and efficiently. Using `popf` both forces 16*0b57cec5SDimitry Andric /// dynamic stack adjustment and can create correctness issues due to IF, TF, 17*0b57cec5SDimitry Andric /// and other non-status flags being overwritten. Using sequences involving 18*0b57cec5SDimitry Andric /// SAHF don't work on all x86 processors and are often quite slow compared to 19*0b57cec5SDimitry Andric /// directly testing a single status preserved in its own GPR. 20*0b57cec5SDimitry Andric /// 21*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 22*0b57cec5SDimitry Andric 23*0b57cec5SDimitry Andric #include "X86.h" 24*0b57cec5SDimitry Andric #include "X86InstrBuilder.h" 25*0b57cec5SDimitry Andric #include "X86InstrInfo.h" 26*0b57cec5SDimitry Andric #include "X86Subtarget.h" 27*0b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 28*0b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 29*0b57cec5SDimitry Andric #include "llvm/ADT/PostOrderIterator.h" 30*0b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 31*0b57cec5SDimitry Andric #include "llvm/ADT/ScopeExit.h" 32*0b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 33*0b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h" 34*0b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 35*0b57cec5SDimitry Andric #include "llvm/ADT/SparseBitVector.h" 36*0b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 37*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 38*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h" 39*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 40*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 41*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 42*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 43*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 44*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 45*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 46*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 47*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineSSAUpdater.h" 48*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 49*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 50*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h" 51*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 52*0b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 53*0b57cec5SDimitry Andric #include "llvm/MC/MCSchedule.h" 54*0b57cec5SDimitry Andric #include "llvm/Pass.h" 55*0b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 56*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 57*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 58*0b57cec5SDimitry Andric #include <algorithm> 59*0b57cec5SDimitry Andric #include <cassert> 60*0b57cec5SDimitry Andric #include <iterator> 61*0b57cec5SDimitry Andric #include <utility> 62*0b57cec5SDimitry Andric 63*0b57cec5SDimitry Andric using namespace llvm; 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric #define PASS_KEY "x86-flags-copy-lowering" 66*0b57cec5SDimitry Andric #define DEBUG_TYPE PASS_KEY 67*0b57cec5SDimitry Andric 68*0b57cec5SDimitry Andric STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated"); 69*0b57cec5SDimitry Andric STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted"); 70*0b57cec5SDimitry Andric STATISTIC(NumTestsInserted, "Number of test instructions inserted"); 71*0b57cec5SDimitry Andric STATISTIC(NumAddsInserted, "Number of adds instructions inserted"); 72*0b57cec5SDimitry Andric 73*0b57cec5SDimitry Andric namespace { 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric // Convenient array type for storing registers associated with each condition. 76*0b57cec5SDimitry Andric using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>; 77*0b57cec5SDimitry Andric 78*0b57cec5SDimitry Andric class X86FlagsCopyLoweringPass : public MachineFunctionPass { 79*0b57cec5SDimitry Andric public: 80*0b57cec5SDimitry Andric X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { } 81*0b57cec5SDimitry Andric 82*0b57cec5SDimitry Andric StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; } 83*0b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 84*0b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 85*0b57cec5SDimitry Andric 86*0b57cec5SDimitry Andric /// Pass identification, replacement for typeid. 87*0b57cec5SDimitry Andric static char ID; 88*0b57cec5SDimitry Andric 89*0b57cec5SDimitry Andric private: 90*0b57cec5SDimitry Andric MachineRegisterInfo *MRI; 91*0b57cec5SDimitry Andric const X86Subtarget *Subtarget; 92*0b57cec5SDimitry Andric const X86InstrInfo *TII; 93*0b57cec5SDimitry Andric const TargetRegisterInfo *TRI; 94*0b57cec5SDimitry Andric const TargetRegisterClass *PromoteRC; 95*0b57cec5SDimitry Andric MachineDominatorTree *MDT; 96*0b57cec5SDimitry Andric 97*0b57cec5SDimitry Andric CondRegArray collectCondsInRegs(MachineBasicBlock &MBB, 98*0b57cec5SDimitry Andric MachineBasicBlock::iterator CopyDefI); 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andric unsigned promoteCondToReg(MachineBasicBlock &MBB, 101*0b57cec5SDimitry Andric MachineBasicBlock::iterator TestPos, 102*0b57cec5SDimitry Andric DebugLoc TestLoc, X86::CondCode Cond); 103*0b57cec5SDimitry Andric std::pair<unsigned, bool> 104*0b57cec5SDimitry Andric getCondOrInverseInReg(MachineBasicBlock &TestMBB, 105*0b57cec5SDimitry Andric MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 106*0b57cec5SDimitry Andric X86::CondCode Cond, CondRegArray &CondRegs); 107*0b57cec5SDimitry Andric void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 108*0b57cec5SDimitry Andric DebugLoc Loc, unsigned Reg); 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andric void rewriteArithmetic(MachineBasicBlock &TestMBB, 111*0b57cec5SDimitry Andric MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 112*0b57cec5SDimitry Andric MachineInstr &MI, MachineOperand &FlagUse, 113*0b57cec5SDimitry Andric CondRegArray &CondRegs); 114*0b57cec5SDimitry Andric void rewriteCMov(MachineBasicBlock &TestMBB, 115*0b57cec5SDimitry Andric MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 116*0b57cec5SDimitry Andric MachineInstr &CMovI, MachineOperand &FlagUse, 117*0b57cec5SDimitry Andric CondRegArray &CondRegs); 118*0b57cec5SDimitry Andric void rewriteCondJmp(MachineBasicBlock &TestMBB, 119*0b57cec5SDimitry Andric MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 120*0b57cec5SDimitry Andric MachineInstr &JmpI, CondRegArray &CondRegs); 121*0b57cec5SDimitry Andric void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse, 122*0b57cec5SDimitry Andric MachineInstr &CopyDefI); 123*0b57cec5SDimitry Andric void rewriteSetCarryExtended(MachineBasicBlock &TestMBB, 124*0b57cec5SDimitry Andric MachineBasicBlock::iterator TestPos, 125*0b57cec5SDimitry Andric DebugLoc TestLoc, MachineInstr &SetBI, 126*0b57cec5SDimitry Andric MachineOperand &FlagUse, CondRegArray &CondRegs); 127*0b57cec5SDimitry Andric void rewriteSetCC(MachineBasicBlock &TestMBB, 128*0b57cec5SDimitry Andric MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 129*0b57cec5SDimitry Andric MachineInstr &SetCCI, MachineOperand &FlagUse, 130*0b57cec5SDimitry Andric CondRegArray &CondRegs); 131*0b57cec5SDimitry Andric }; 132*0b57cec5SDimitry Andric 133*0b57cec5SDimitry Andric } // end anonymous namespace 134*0b57cec5SDimitry Andric 135*0b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE, 136*0b57cec5SDimitry Andric "X86 EFLAGS copy lowering", false, false) 137*0b57cec5SDimitry Andric INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE, 138*0b57cec5SDimitry Andric "X86 EFLAGS copy lowering", false, false) 139*0b57cec5SDimitry Andric 140*0b57cec5SDimitry Andric FunctionPass *llvm::createX86FlagsCopyLoweringPass() { 141*0b57cec5SDimitry Andric return new X86FlagsCopyLoweringPass(); 142*0b57cec5SDimitry Andric } 143*0b57cec5SDimitry Andric 144*0b57cec5SDimitry Andric char X86FlagsCopyLoweringPass::ID = 0; 145*0b57cec5SDimitry Andric 146*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const { 147*0b57cec5SDimitry Andric AU.addRequired<MachineDominatorTree>(); 148*0b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 149*0b57cec5SDimitry Andric } 150*0b57cec5SDimitry Andric 151*0b57cec5SDimitry Andric namespace { 152*0b57cec5SDimitry Andric /// An enumeration of the arithmetic instruction mnemonics which have 153*0b57cec5SDimitry Andric /// interesting flag semantics. 154*0b57cec5SDimitry Andric /// 155*0b57cec5SDimitry Andric /// We can map instruction opcodes into these mnemonics to make it easy to 156*0b57cec5SDimitry Andric /// dispatch with specific functionality. 157*0b57cec5SDimitry Andric enum class FlagArithMnemonic { 158*0b57cec5SDimitry Andric ADC, 159*0b57cec5SDimitry Andric ADCX, 160*0b57cec5SDimitry Andric ADOX, 161*0b57cec5SDimitry Andric RCL, 162*0b57cec5SDimitry Andric RCR, 163*0b57cec5SDimitry Andric SBB, 164*0b57cec5SDimitry Andric }; 165*0b57cec5SDimitry Andric } // namespace 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andric static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { 168*0b57cec5SDimitry Andric switch (Opcode) { 169*0b57cec5SDimitry Andric default: 170*0b57cec5SDimitry Andric report_fatal_error("No support for lowering a copy into EFLAGS when used " 171*0b57cec5SDimitry Andric "by this instruction!"); 172*0b57cec5SDimitry Andric 173*0b57cec5SDimitry Andric #define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX) \ 174*0b57cec5SDimitry Andric case X86::MNEMONIC##8##SUFFIX: \ 175*0b57cec5SDimitry Andric case X86::MNEMONIC##16##SUFFIX: \ 176*0b57cec5SDimitry Andric case X86::MNEMONIC##32##SUFFIX: \ 177*0b57cec5SDimitry Andric case X86::MNEMONIC##64##SUFFIX: 178*0b57cec5SDimitry Andric 179*0b57cec5SDimitry Andric #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ 180*0b57cec5SDimitry Andric LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ 181*0b57cec5SDimitry Andric LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ 182*0b57cec5SDimitry Andric LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ 183*0b57cec5SDimitry Andric LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ 184*0b57cec5SDimitry Andric case X86::MNEMONIC##8ri: \ 185*0b57cec5SDimitry Andric case X86::MNEMONIC##16ri8: \ 186*0b57cec5SDimitry Andric case X86::MNEMONIC##32ri8: \ 187*0b57cec5SDimitry Andric case X86::MNEMONIC##64ri8: \ 188*0b57cec5SDimitry Andric case X86::MNEMONIC##16ri: \ 189*0b57cec5SDimitry Andric case X86::MNEMONIC##32ri: \ 190*0b57cec5SDimitry Andric case X86::MNEMONIC##64ri32: \ 191*0b57cec5SDimitry Andric case X86::MNEMONIC##8mi: \ 192*0b57cec5SDimitry Andric case X86::MNEMONIC##16mi8: \ 193*0b57cec5SDimitry Andric case X86::MNEMONIC##32mi8: \ 194*0b57cec5SDimitry Andric case X86::MNEMONIC##64mi8: \ 195*0b57cec5SDimitry Andric case X86::MNEMONIC##16mi: \ 196*0b57cec5SDimitry Andric case X86::MNEMONIC##32mi: \ 197*0b57cec5SDimitry Andric case X86::MNEMONIC##64mi32: \ 198*0b57cec5SDimitry Andric case X86::MNEMONIC##8i8: \ 199*0b57cec5SDimitry Andric case X86::MNEMONIC##16i16: \ 200*0b57cec5SDimitry Andric case X86::MNEMONIC##32i32: \ 201*0b57cec5SDimitry Andric case X86::MNEMONIC##64i32: 202*0b57cec5SDimitry Andric 203*0b57cec5SDimitry Andric LLVM_EXPAND_ADC_SBB_INSTR(ADC) 204*0b57cec5SDimitry Andric return FlagArithMnemonic::ADC; 205*0b57cec5SDimitry Andric 206*0b57cec5SDimitry Andric LLVM_EXPAND_ADC_SBB_INSTR(SBB) 207*0b57cec5SDimitry Andric return FlagArithMnemonic::SBB; 208*0b57cec5SDimitry Andric 209*0b57cec5SDimitry Andric #undef LLVM_EXPAND_ADC_SBB_INSTR 210*0b57cec5SDimitry Andric 211*0b57cec5SDimitry Andric LLVM_EXPAND_INSTR_SIZES(RCL, rCL) 212*0b57cec5SDimitry Andric LLVM_EXPAND_INSTR_SIZES(RCL, r1) 213*0b57cec5SDimitry Andric LLVM_EXPAND_INSTR_SIZES(RCL, ri) 214*0b57cec5SDimitry Andric return FlagArithMnemonic::RCL; 215*0b57cec5SDimitry Andric 216*0b57cec5SDimitry Andric LLVM_EXPAND_INSTR_SIZES(RCR, rCL) 217*0b57cec5SDimitry Andric LLVM_EXPAND_INSTR_SIZES(RCR, r1) 218*0b57cec5SDimitry Andric LLVM_EXPAND_INSTR_SIZES(RCR, ri) 219*0b57cec5SDimitry Andric return FlagArithMnemonic::RCR; 220*0b57cec5SDimitry Andric 221*0b57cec5SDimitry Andric #undef LLVM_EXPAND_INSTR_SIZES 222*0b57cec5SDimitry Andric 223*0b57cec5SDimitry Andric case X86::ADCX32rr: 224*0b57cec5SDimitry Andric case X86::ADCX64rr: 225*0b57cec5SDimitry Andric case X86::ADCX32rm: 226*0b57cec5SDimitry Andric case X86::ADCX64rm: 227*0b57cec5SDimitry Andric return FlagArithMnemonic::ADCX; 228*0b57cec5SDimitry Andric 229*0b57cec5SDimitry Andric case X86::ADOX32rr: 230*0b57cec5SDimitry Andric case X86::ADOX64rr: 231*0b57cec5SDimitry Andric case X86::ADOX32rm: 232*0b57cec5SDimitry Andric case X86::ADOX64rm: 233*0b57cec5SDimitry Andric return FlagArithMnemonic::ADOX; 234*0b57cec5SDimitry Andric } 235*0b57cec5SDimitry Andric } 236*0b57cec5SDimitry Andric 237*0b57cec5SDimitry Andric static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB, 238*0b57cec5SDimitry Andric MachineInstr &SplitI, 239*0b57cec5SDimitry Andric const X86InstrInfo &TII) { 240*0b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 241*0b57cec5SDimitry Andric 242*0b57cec5SDimitry Andric assert(SplitI.getParent() == &MBB && 243*0b57cec5SDimitry Andric "Split instruction must be in the split block!"); 244*0b57cec5SDimitry Andric assert(SplitI.isBranch() && 245*0b57cec5SDimitry Andric "Only designed to split a tail of branch instructions!"); 246*0b57cec5SDimitry Andric assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID && 247*0b57cec5SDimitry Andric "Must split on an actual jCC instruction!"); 248*0b57cec5SDimitry Andric 249*0b57cec5SDimitry Andric // Dig out the previous instruction to the split point. 250*0b57cec5SDimitry Andric MachineInstr &PrevI = *std::prev(SplitI.getIterator()); 251*0b57cec5SDimitry Andric assert(PrevI.isBranch() && "Must split after a branch!"); 252*0b57cec5SDimitry Andric assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID && 253*0b57cec5SDimitry Andric "Must split after an actual jCC instruction!"); 254*0b57cec5SDimitry Andric assert(!std::prev(PrevI.getIterator())->isTerminator() && 255*0b57cec5SDimitry Andric "Must only have this one terminator prior to the split!"); 256*0b57cec5SDimitry Andric 257*0b57cec5SDimitry Andric // Grab the one successor edge that will stay in `MBB`. 258*0b57cec5SDimitry Andric MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB(); 259*0b57cec5SDimitry Andric 260*0b57cec5SDimitry Andric // Analyze the original block to see if we are actually splitting an edge 261*0b57cec5SDimitry Andric // into two edges. This can happen when we have multiple conditional jumps to 262*0b57cec5SDimitry Andric // the same successor. 263*0b57cec5SDimitry Andric bool IsEdgeSplit = 264*0b57cec5SDimitry Andric std::any_of(SplitI.getIterator(), MBB.instr_end(), 265*0b57cec5SDimitry Andric [&](MachineInstr &MI) { 266*0b57cec5SDimitry Andric assert(MI.isTerminator() && 267*0b57cec5SDimitry Andric "Should only have spliced terminators!"); 268*0b57cec5SDimitry Andric return llvm::any_of( 269*0b57cec5SDimitry Andric MI.operands(), [&](MachineOperand &MOp) { 270*0b57cec5SDimitry Andric return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc; 271*0b57cec5SDimitry Andric }); 272*0b57cec5SDimitry Andric }) || 273*0b57cec5SDimitry Andric MBB.getFallThrough() == &UnsplitSucc; 274*0b57cec5SDimitry Andric 275*0b57cec5SDimitry Andric MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); 276*0b57cec5SDimitry Andric 277*0b57cec5SDimitry Andric // Insert the new block immediately after the current one. Any existing 278*0b57cec5SDimitry Andric // fallthrough will be sunk into this new block anyways. 279*0b57cec5SDimitry Andric MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); 280*0b57cec5SDimitry Andric 281*0b57cec5SDimitry Andric // Splice the tail of instructions into the new block. 282*0b57cec5SDimitry Andric NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end()); 283*0b57cec5SDimitry Andric 284*0b57cec5SDimitry Andric // Copy the necessary succesors (and their probability info) into the new 285*0b57cec5SDimitry Andric // block. 286*0b57cec5SDimitry Andric for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) 287*0b57cec5SDimitry Andric if (IsEdgeSplit || *SI != &UnsplitSucc) 288*0b57cec5SDimitry Andric NewMBB.copySuccessor(&MBB, SI); 289*0b57cec5SDimitry Andric // Normalize the probabilities if we didn't end up splitting the edge. 290*0b57cec5SDimitry Andric if (!IsEdgeSplit) 291*0b57cec5SDimitry Andric NewMBB.normalizeSuccProbs(); 292*0b57cec5SDimitry Andric 293*0b57cec5SDimitry Andric // Now replace all of the moved successors in the original block with the new 294*0b57cec5SDimitry Andric // block. This will merge their probabilities. 295*0b57cec5SDimitry Andric for (MachineBasicBlock *Succ : NewMBB.successors()) 296*0b57cec5SDimitry Andric if (Succ != &UnsplitSucc) 297*0b57cec5SDimitry Andric MBB.replaceSuccessor(Succ, &NewMBB); 298*0b57cec5SDimitry Andric 299*0b57cec5SDimitry Andric // We should always end up replacing at least one successor. 300*0b57cec5SDimitry Andric assert(MBB.isSuccessor(&NewMBB) && 301*0b57cec5SDimitry Andric "Failed to make the new block a successor!"); 302*0b57cec5SDimitry Andric 303*0b57cec5SDimitry Andric // Now update all the PHIs. 304*0b57cec5SDimitry Andric for (MachineBasicBlock *Succ : NewMBB.successors()) { 305*0b57cec5SDimitry Andric for (MachineInstr &MI : *Succ) { 306*0b57cec5SDimitry Andric if (!MI.isPHI()) 307*0b57cec5SDimitry Andric break; 308*0b57cec5SDimitry Andric 309*0b57cec5SDimitry Andric for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; 310*0b57cec5SDimitry Andric OpIdx += 2) { 311*0b57cec5SDimitry Andric MachineOperand &OpV = MI.getOperand(OpIdx); 312*0b57cec5SDimitry Andric MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); 313*0b57cec5SDimitry Andric assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!"); 314*0b57cec5SDimitry Andric if (OpMBB.getMBB() != &MBB) 315*0b57cec5SDimitry Andric continue; 316*0b57cec5SDimitry Andric 317*0b57cec5SDimitry Andric // Replace the operand for unsplit successors 318*0b57cec5SDimitry Andric if (!IsEdgeSplit || Succ != &UnsplitSucc) { 319*0b57cec5SDimitry Andric OpMBB.setMBB(&NewMBB); 320*0b57cec5SDimitry Andric 321*0b57cec5SDimitry Andric // We have to continue scanning as there may be multiple entries in 322*0b57cec5SDimitry Andric // the PHI. 323*0b57cec5SDimitry Andric continue; 324*0b57cec5SDimitry Andric } 325*0b57cec5SDimitry Andric 326*0b57cec5SDimitry Andric // When we have split the edge append a new successor. 327*0b57cec5SDimitry Andric MI.addOperand(MF, OpV); 328*0b57cec5SDimitry Andric MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); 329*0b57cec5SDimitry Andric break; 330*0b57cec5SDimitry Andric } 331*0b57cec5SDimitry Andric } 332*0b57cec5SDimitry Andric } 333*0b57cec5SDimitry Andric 334*0b57cec5SDimitry Andric return NewMBB; 335*0b57cec5SDimitry Andric } 336*0b57cec5SDimitry Andric 337*0b57cec5SDimitry Andric bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { 338*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() 339*0b57cec5SDimitry Andric << " **********\n"); 340*0b57cec5SDimitry Andric 341*0b57cec5SDimitry Andric Subtarget = &MF.getSubtarget<X86Subtarget>(); 342*0b57cec5SDimitry Andric MRI = &MF.getRegInfo(); 343*0b57cec5SDimitry Andric TII = Subtarget->getInstrInfo(); 344*0b57cec5SDimitry Andric TRI = Subtarget->getRegisterInfo(); 345*0b57cec5SDimitry Andric MDT = &getAnalysis<MachineDominatorTree>(); 346*0b57cec5SDimitry Andric PromoteRC = &X86::GR8RegClass; 347*0b57cec5SDimitry Andric 348*0b57cec5SDimitry Andric if (MF.begin() == MF.end()) 349*0b57cec5SDimitry Andric // Nothing to do for a degenerate empty function... 350*0b57cec5SDimitry Andric return false; 351*0b57cec5SDimitry Andric 352*0b57cec5SDimitry Andric // Collect the copies in RPO so that when there are chains where a copy is in 353*0b57cec5SDimitry Andric // turn copied again we visit the first one first. This ensures we can find 354*0b57cec5SDimitry Andric // viable locations for testing the original EFLAGS that dominate all the 355*0b57cec5SDimitry Andric // uses across complex CFGs. 356*0b57cec5SDimitry Andric SmallVector<MachineInstr *, 4> Copies; 357*0b57cec5SDimitry Andric ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); 358*0b57cec5SDimitry Andric for (MachineBasicBlock *MBB : RPOT) 359*0b57cec5SDimitry Andric for (MachineInstr &MI : *MBB) 360*0b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::COPY && 361*0b57cec5SDimitry Andric MI.getOperand(0).getReg() == X86::EFLAGS) 362*0b57cec5SDimitry Andric Copies.push_back(&MI); 363*0b57cec5SDimitry Andric 364*0b57cec5SDimitry Andric for (MachineInstr *CopyI : Copies) { 365*0b57cec5SDimitry Andric MachineBasicBlock &MBB = *CopyI->getParent(); 366*0b57cec5SDimitry Andric 367*0b57cec5SDimitry Andric MachineOperand &VOp = CopyI->getOperand(1); 368*0b57cec5SDimitry Andric assert(VOp.isReg() && 369*0b57cec5SDimitry Andric "The input to the copy for EFLAGS should always be a register!"); 370*0b57cec5SDimitry Andric MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg()); 371*0b57cec5SDimitry Andric if (CopyDefI.getOpcode() != TargetOpcode::COPY) { 372*0b57cec5SDimitry Andric // FIXME: The big likely candidate here are PHI nodes. We could in theory 373*0b57cec5SDimitry Andric // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard 374*0b57cec5SDimitry Andric // enough that it is probably better to change every other part of LLVM 375*0b57cec5SDimitry Andric // to avoid creating them. The issue is that once we have PHIs we won't 376*0b57cec5SDimitry Andric // know which original EFLAGS value we need to capture with our setCCs 377*0b57cec5SDimitry Andric // below. The end result will be computing a complete set of setCCs that 378*0b57cec5SDimitry Andric // we *might* want, computing them in every place where we copy *out* of 379*0b57cec5SDimitry Andric // EFLAGS and then doing SSA formation on all of them to insert necessary 380*0b57cec5SDimitry Andric // PHI nodes and consume those here. Then hoping that somehow we DCE the 381*0b57cec5SDimitry Andric // unnecessary ones. This DCE seems very unlikely to be successful and so 382*0b57cec5SDimitry Andric // we will almost certainly end up with a glut of dead setCC 383*0b57cec5SDimitry Andric // instructions. Until we have a motivating test case and fail to avoid 384*0b57cec5SDimitry Andric // it by changing other parts of LLVM's lowering, we refuse to handle 385*0b57cec5SDimitry Andric // this complex case here. 386*0b57cec5SDimitry Andric LLVM_DEBUG( 387*0b57cec5SDimitry Andric dbgs() << "ERROR: Encountered unexpected def of an eflags copy: "; 388*0b57cec5SDimitry Andric CopyDefI.dump()); 389*0b57cec5SDimitry Andric report_fatal_error( 390*0b57cec5SDimitry Andric "Cannot lower EFLAGS copy unless it is defined in turn by a copy!"); 391*0b57cec5SDimitry Andric } 392*0b57cec5SDimitry Andric 393*0b57cec5SDimitry Andric auto Cleanup = make_scope_exit([&] { 394*0b57cec5SDimitry Andric // All uses of the EFLAGS copy are now rewritten, kill the copy into 395*0b57cec5SDimitry Andric // eflags and if dead the copy from. 396*0b57cec5SDimitry Andric CopyI->eraseFromParent(); 397*0b57cec5SDimitry Andric if (MRI->use_empty(CopyDefI.getOperand(0).getReg())) 398*0b57cec5SDimitry Andric CopyDefI.eraseFromParent(); 399*0b57cec5SDimitry Andric ++NumCopiesEliminated; 400*0b57cec5SDimitry Andric }); 401*0b57cec5SDimitry Andric 402*0b57cec5SDimitry Andric MachineOperand &DOp = CopyI->getOperand(0); 403*0b57cec5SDimitry Andric assert(DOp.isDef() && "Expected register def!"); 404*0b57cec5SDimitry Andric assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!"); 405*0b57cec5SDimitry Andric if (DOp.isDead()) 406*0b57cec5SDimitry Andric continue; 407*0b57cec5SDimitry Andric 408*0b57cec5SDimitry Andric MachineBasicBlock *TestMBB = CopyDefI.getParent(); 409*0b57cec5SDimitry Andric auto TestPos = CopyDefI.getIterator(); 410*0b57cec5SDimitry Andric DebugLoc TestLoc = CopyDefI.getDebugLoc(); 411*0b57cec5SDimitry Andric 412*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump()); 413*0b57cec5SDimitry Andric 414*0b57cec5SDimitry Andric // Walk up across live-in EFLAGS to find where they were actually def'ed. 415*0b57cec5SDimitry Andric // 416*0b57cec5SDimitry Andric // This copy's def may just be part of a region of blocks covered by 417*0b57cec5SDimitry Andric // a single def of EFLAGS and we want to find the top of that region where 418*0b57cec5SDimitry Andric // possible. 419*0b57cec5SDimitry Andric // 420*0b57cec5SDimitry Andric // This is essentially a search for a *candidate* reaching definition 421*0b57cec5SDimitry Andric // location. We don't need to ever find the actual reaching definition here, 422*0b57cec5SDimitry Andric // but we want to walk up the dominator tree to find the highest point which 423*0b57cec5SDimitry Andric // would be viable for such a definition. 424*0b57cec5SDimitry Andric auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin, 425*0b57cec5SDimitry Andric MachineBasicBlock::iterator End) { 426*0b57cec5SDimitry Andric // Scan backwards as we expect these to be relatively short and often find 427*0b57cec5SDimitry Andric // a clobber near the end. 428*0b57cec5SDimitry Andric return llvm::any_of( 429*0b57cec5SDimitry Andric llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) { 430*0b57cec5SDimitry Andric // Flag any instruction (other than the copy we are 431*0b57cec5SDimitry Andric // currently rewriting) that defs EFLAGS. 432*0b57cec5SDimitry Andric return &MI != CopyI && MI.findRegisterDefOperand(X86::EFLAGS); 433*0b57cec5SDimitry Andric }); 434*0b57cec5SDimitry Andric }; 435*0b57cec5SDimitry Andric auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB, 436*0b57cec5SDimitry Andric MachineBasicBlock *EndMBB) { 437*0b57cec5SDimitry Andric assert(MDT->dominates(BeginMBB, EndMBB) && 438*0b57cec5SDimitry Andric "Only support paths down the dominator tree!"); 439*0b57cec5SDimitry Andric SmallPtrSet<MachineBasicBlock *, 4> Visited; 440*0b57cec5SDimitry Andric SmallVector<MachineBasicBlock *, 4> Worklist; 441*0b57cec5SDimitry Andric // We terminate at the beginning. No need to scan it. 442*0b57cec5SDimitry Andric Visited.insert(BeginMBB); 443*0b57cec5SDimitry Andric Worklist.push_back(EndMBB); 444*0b57cec5SDimitry Andric do { 445*0b57cec5SDimitry Andric auto *MBB = Worklist.pop_back_val(); 446*0b57cec5SDimitry Andric for (auto *PredMBB : MBB->predecessors()) { 447*0b57cec5SDimitry Andric if (!Visited.insert(PredMBB).second) 448*0b57cec5SDimitry Andric continue; 449*0b57cec5SDimitry Andric if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end())) 450*0b57cec5SDimitry Andric return true; 451*0b57cec5SDimitry Andric // Enqueue this block to walk its predecessors. 452*0b57cec5SDimitry Andric Worklist.push_back(PredMBB); 453*0b57cec5SDimitry Andric } 454*0b57cec5SDimitry Andric } while (!Worklist.empty()); 455*0b57cec5SDimitry Andric // No clobber found along a path from the begin to end. 456*0b57cec5SDimitry Andric return false; 457*0b57cec5SDimitry Andric }; 458*0b57cec5SDimitry Andric while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() && 459*0b57cec5SDimitry Andric !HasEFLAGSClobber(TestMBB->begin(), TestPos)) { 460*0b57cec5SDimitry Andric // Find the nearest common dominator of the predecessors, as 461*0b57cec5SDimitry Andric // that will be the best candidate to hoist into. 462*0b57cec5SDimitry Andric MachineBasicBlock *HoistMBB = 463*0b57cec5SDimitry Andric std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(), 464*0b57cec5SDimitry Andric *TestMBB->pred_begin(), 465*0b57cec5SDimitry Andric [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) { 466*0b57cec5SDimitry Andric return MDT->findNearestCommonDominator(LHS, RHS); 467*0b57cec5SDimitry Andric }); 468*0b57cec5SDimitry Andric 469*0b57cec5SDimitry Andric // Now we need to scan all predecessors that may be reached along paths to 470*0b57cec5SDimitry Andric // the hoist block. A clobber anywhere in any of these blocks the hoist. 471*0b57cec5SDimitry Andric // Note that this even handles loops because we require *no* clobbers. 472*0b57cec5SDimitry Andric if (HasEFLAGSClobberPath(HoistMBB, TestMBB)) 473*0b57cec5SDimitry Andric break; 474*0b57cec5SDimitry Andric 475*0b57cec5SDimitry Andric // We also need the terminators to not sneakily clobber flags. 476*0b57cec5SDimitry Andric if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(), 477*0b57cec5SDimitry Andric HoistMBB->instr_end())) 478*0b57cec5SDimitry Andric break; 479*0b57cec5SDimitry Andric 480*0b57cec5SDimitry Andric // We found a viable location, hoist our test position to it. 481*0b57cec5SDimitry Andric TestMBB = HoistMBB; 482*0b57cec5SDimitry Andric TestPos = TestMBB->getFirstTerminator()->getIterator(); 483*0b57cec5SDimitry Andric // Clear the debug location as it would just be confusing after hoisting. 484*0b57cec5SDimitry Andric TestLoc = DebugLoc(); 485*0b57cec5SDimitry Andric } 486*0b57cec5SDimitry Andric LLVM_DEBUG({ 487*0b57cec5SDimitry Andric auto DefIt = llvm::find_if( 488*0b57cec5SDimitry Andric llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)), 489*0b57cec5SDimitry Andric [&](MachineInstr &MI) { 490*0b57cec5SDimitry Andric return MI.findRegisterDefOperand(X86::EFLAGS); 491*0b57cec5SDimitry Andric }); 492*0b57cec5SDimitry Andric if (DefIt.base() != TestMBB->instr_begin()) { 493*0b57cec5SDimitry Andric dbgs() << " Using EFLAGS defined by: "; 494*0b57cec5SDimitry Andric DefIt->dump(); 495*0b57cec5SDimitry Andric } else { 496*0b57cec5SDimitry Andric dbgs() << " Using live-in flags for BB:\n"; 497*0b57cec5SDimitry Andric TestMBB->dump(); 498*0b57cec5SDimitry Andric } 499*0b57cec5SDimitry Andric }); 500*0b57cec5SDimitry Andric 501*0b57cec5SDimitry Andric // While rewriting uses, we buffer jumps and rewrite them in a second pass 502*0b57cec5SDimitry Andric // because doing so will perturb the CFG that we are walking to find the 503*0b57cec5SDimitry Andric // uses in the first place. 504*0b57cec5SDimitry Andric SmallVector<MachineInstr *, 4> JmpIs; 505*0b57cec5SDimitry Andric 506*0b57cec5SDimitry Andric // Gather the condition flags that have already been preserved in 507*0b57cec5SDimitry Andric // registers. We do this from scratch each time as we expect there to be 508*0b57cec5SDimitry Andric // very few of them and we expect to not revisit the same copy definition 509*0b57cec5SDimitry Andric // many times. If either of those change sufficiently we could build a map 510*0b57cec5SDimitry Andric // of these up front instead. 511*0b57cec5SDimitry Andric CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos); 512*0b57cec5SDimitry Andric 513*0b57cec5SDimitry Andric // Collect the basic blocks we need to scan. Typically this will just be 514*0b57cec5SDimitry Andric // a single basic block but we may have to scan multiple blocks if the 515*0b57cec5SDimitry Andric // EFLAGS copy lives into successors. 516*0b57cec5SDimitry Andric SmallVector<MachineBasicBlock *, 2> Blocks; 517*0b57cec5SDimitry Andric SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks; 518*0b57cec5SDimitry Andric Blocks.push_back(&MBB); 519*0b57cec5SDimitry Andric 520*0b57cec5SDimitry Andric do { 521*0b57cec5SDimitry Andric MachineBasicBlock &UseMBB = *Blocks.pop_back_val(); 522*0b57cec5SDimitry Andric 523*0b57cec5SDimitry Andric // Track when if/when we find a kill of the flags in this block. 524*0b57cec5SDimitry Andric bool FlagsKilled = false; 525*0b57cec5SDimitry Andric 526*0b57cec5SDimitry Andric // In most cases, we walk from the beginning to the end of the block. But 527*0b57cec5SDimitry Andric // when the block is the same block as the copy is from, we will visit it 528*0b57cec5SDimitry Andric // twice. The first time we start from the copy and go to the end. The 529*0b57cec5SDimitry Andric // second time we start from the beginning and go to the copy. This lets 530*0b57cec5SDimitry Andric // us handle copies inside of cycles. 531*0b57cec5SDimitry Andric // FIXME: This loop is *super* confusing. This is at least in part 532*0b57cec5SDimitry Andric // a symptom of all of this routine needing to be refactored into 533*0b57cec5SDimitry Andric // documentable components. Once done, there may be a better way to write 534*0b57cec5SDimitry Andric // this loop. 535*0b57cec5SDimitry Andric for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB)) 536*0b57cec5SDimitry Andric ? std::next(CopyI->getIterator()) 537*0b57cec5SDimitry Andric : UseMBB.instr_begin(), 538*0b57cec5SDimitry Andric MIE = UseMBB.instr_end(); 539*0b57cec5SDimitry Andric MII != MIE;) { 540*0b57cec5SDimitry Andric MachineInstr &MI = *MII++; 541*0b57cec5SDimitry Andric // If we are in the original copy block and encounter either the copy 542*0b57cec5SDimitry Andric // def or the copy itself, break so that we don't re-process any part of 543*0b57cec5SDimitry Andric // the block or process the instructions in the range that was copied 544*0b57cec5SDimitry Andric // over. 545*0b57cec5SDimitry Andric if (&MI == CopyI || &MI == &CopyDefI) { 546*0b57cec5SDimitry Andric assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) && 547*0b57cec5SDimitry Andric "Should only encounter these on the second pass over the " 548*0b57cec5SDimitry Andric "original block."); 549*0b57cec5SDimitry Andric break; 550*0b57cec5SDimitry Andric } 551*0b57cec5SDimitry Andric 552*0b57cec5SDimitry Andric MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS); 553*0b57cec5SDimitry Andric if (!FlagUse) { 554*0b57cec5SDimitry Andric if (MI.findRegisterDefOperand(X86::EFLAGS)) { 555*0b57cec5SDimitry Andric // If EFLAGS are defined, it's as-if they were killed. We can stop 556*0b57cec5SDimitry Andric // scanning here. 557*0b57cec5SDimitry Andric // 558*0b57cec5SDimitry Andric // NB!!! Many instructions only modify some flags. LLVM currently 559*0b57cec5SDimitry Andric // models this as clobbering all flags, but if that ever changes 560*0b57cec5SDimitry Andric // this will need to be carefully updated to handle that more 561*0b57cec5SDimitry Andric // complex logic. 562*0b57cec5SDimitry Andric FlagsKilled = true; 563*0b57cec5SDimitry Andric break; 564*0b57cec5SDimitry Andric } 565*0b57cec5SDimitry Andric continue; 566*0b57cec5SDimitry Andric } 567*0b57cec5SDimitry Andric 568*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Rewriting use: "; MI.dump()); 569*0b57cec5SDimitry Andric 570*0b57cec5SDimitry Andric // Check the kill flag before we rewrite as that may change it. 571*0b57cec5SDimitry Andric if (FlagUse->isKill()) 572*0b57cec5SDimitry Andric FlagsKilled = true; 573*0b57cec5SDimitry Andric 574*0b57cec5SDimitry Andric // Once we encounter a branch, the rest of the instructions must also be 575*0b57cec5SDimitry Andric // branches. We can't rewrite in place here, so we handle them below. 576*0b57cec5SDimitry Andric // 577*0b57cec5SDimitry Andric // Note that we don't have to handle tail calls here, even conditional 578*0b57cec5SDimitry Andric // tail calls, as those are not introduced into the X86 MI until post-RA 579*0b57cec5SDimitry Andric // branch folding or black placement. As a consequence, we get to deal 580*0b57cec5SDimitry Andric // with the simpler formulation of conditional branches followed by tail 581*0b57cec5SDimitry Andric // calls. 582*0b57cec5SDimitry Andric if (X86::getCondFromBranch(MI) != X86::COND_INVALID) { 583*0b57cec5SDimitry Andric auto JmpIt = MI.getIterator(); 584*0b57cec5SDimitry Andric do { 585*0b57cec5SDimitry Andric JmpIs.push_back(&*JmpIt); 586*0b57cec5SDimitry Andric ++JmpIt; 587*0b57cec5SDimitry Andric } while (JmpIt != UseMBB.instr_end() && 588*0b57cec5SDimitry Andric X86::getCondFromBranch(*JmpIt) != 589*0b57cec5SDimitry Andric X86::COND_INVALID); 590*0b57cec5SDimitry Andric break; 591*0b57cec5SDimitry Andric } 592*0b57cec5SDimitry Andric 593*0b57cec5SDimitry Andric // Otherwise we can just rewrite in-place. 594*0b57cec5SDimitry Andric if (X86::getCondFromCMov(MI) != X86::COND_INVALID) { 595*0b57cec5SDimitry Andric rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 596*0b57cec5SDimitry Andric } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) { 597*0b57cec5SDimitry Andric rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 598*0b57cec5SDimitry Andric } else if (MI.getOpcode() == TargetOpcode::COPY) { 599*0b57cec5SDimitry Andric rewriteCopy(MI, *FlagUse, CopyDefI); 600*0b57cec5SDimitry Andric } else { 601*0b57cec5SDimitry Andric // We assume all other instructions that use flags also def them. 602*0b57cec5SDimitry Andric assert(MI.findRegisterDefOperand(X86::EFLAGS) && 603*0b57cec5SDimitry Andric "Expected a def of EFLAGS for this instruction!"); 604*0b57cec5SDimitry Andric 605*0b57cec5SDimitry Andric // NB!!! Several arithmetic instructions only *partially* update 606*0b57cec5SDimitry Andric // flags. Theoretically, we could generate MI code sequences that 607*0b57cec5SDimitry Andric // would rely on this fact and observe different flags independently. 608*0b57cec5SDimitry Andric // But currently LLVM models all of these instructions as clobbering 609*0b57cec5SDimitry Andric // all the flags in an undef way. We rely on that to simplify the 610*0b57cec5SDimitry Andric // logic. 611*0b57cec5SDimitry Andric FlagsKilled = true; 612*0b57cec5SDimitry Andric 613*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 614*0b57cec5SDimitry Andric case X86::SETB_C8r: 615*0b57cec5SDimitry Andric case X86::SETB_C16r: 616*0b57cec5SDimitry Andric case X86::SETB_C32r: 617*0b57cec5SDimitry Andric case X86::SETB_C64r: 618*0b57cec5SDimitry Andric // Use custom lowering for arithmetic that is merely extending the 619*0b57cec5SDimitry Andric // carry flag. We model this as the SETB_C* pseudo instructions. 620*0b57cec5SDimitry Andric rewriteSetCarryExtended(*TestMBB, TestPos, TestLoc, MI, *FlagUse, 621*0b57cec5SDimitry Andric CondRegs); 622*0b57cec5SDimitry Andric break; 623*0b57cec5SDimitry Andric 624*0b57cec5SDimitry Andric default: 625*0b57cec5SDimitry Andric // Generically handle remaining uses as arithmetic instructions. 626*0b57cec5SDimitry Andric rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse, 627*0b57cec5SDimitry Andric CondRegs); 628*0b57cec5SDimitry Andric break; 629*0b57cec5SDimitry Andric } 630*0b57cec5SDimitry Andric break; 631*0b57cec5SDimitry Andric } 632*0b57cec5SDimitry Andric 633*0b57cec5SDimitry Andric // If this was the last use of the flags, we're done. 634*0b57cec5SDimitry Andric if (FlagsKilled) 635*0b57cec5SDimitry Andric break; 636*0b57cec5SDimitry Andric } 637*0b57cec5SDimitry Andric 638*0b57cec5SDimitry Andric // If the flags were killed, we're done with this block. 639*0b57cec5SDimitry Andric if (FlagsKilled) 640*0b57cec5SDimitry Andric continue; 641*0b57cec5SDimitry Andric 642*0b57cec5SDimitry Andric // Otherwise we need to scan successors for ones where the flags live-in 643*0b57cec5SDimitry Andric // and queue those up for processing. 644*0b57cec5SDimitry Andric for (MachineBasicBlock *SuccMBB : UseMBB.successors()) 645*0b57cec5SDimitry Andric if (SuccMBB->isLiveIn(X86::EFLAGS) && 646*0b57cec5SDimitry Andric VisitedBlocks.insert(SuccMBB).second) { 647*0b57cec5SDimitry Andric // We currently don't do any PHI insertion and so we require that the 648*0b57cec5SDimitry Andric // test basic block dominates all of the use basic blocks. Further, we 649*0b57cec5SDimitry Andric // can't have a cycle from the test block back to itself as that would 650*0b57cec5SDimitry Andric // create a cycle requiring a PHI to break it. 651*0b57cec5SDimitry Andric // 652*0b57cec5SDimitry Andric // We could in theory do PHI insertion here if it becomes useful by 653*0b57cec5SDimitry Andric // just taking undef values in along every edge that we don't trace 654*0b57cec5SDimitry Andric // this EFLAGS copy along. This isn't as bad as fully general PHI 655*0b57cec5SDimitry Andric // insertion, but still seems like a great deal of complexity. 656*0b57cec5SDimitry Andric // 657*0b57cec5SDimitry Andric // Because it is theoretically possible that some earlier MI pass or 658*0b57cec5SDimitry Andric // other lowering transformation could induce this to happen, we do 659*0b57cec5SDimitry Andric // a hard check even in non-debug builds here. 660*0b57cec5SDimitry Andric if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) { 661*0b57cec5SDimitry Andric LLVM_DEBUG({ 662*0b57cec5SDimitry Andric dbgs() 663*0b57cec5SDimitry Andric << "ERROR: Encountered use that is not dominated by our test " 664*0b57cec5SDimitry Andric "basic block! Rewriting this would require inserting PHI " 665*0b57cec5SDimitry Andric "nodes to track the flag state across the CFG.\n\nTest " 666*0b57cec5SDimitry Andric "block:\n"; 667*0b57cec5SDimitry Andric TestMBB->dump(); 668*0b57cec5SDimitry Andric dbgs() << "Use block:\n"; 669*0b57cec5SDimitry Andric SuccMBB->dump(); 670*0b57cec5SDimitry Andric }); 671*0b57cec5SDimitry Andric report_fatal_error( 672*0b57cec5SDimitry Andric "Cannot lower EFLAGS copy when original copy def " 673*0b57cec5SDimitry Andric "does not dominate all uses."); 674*0b57cec5SDimitry Andric } 675*0b57cec5SDimitry Andric 676*0b57cec5SDimitry Andric Blocks.push_back(SuccMBB); 677*0b57cec5SDimitry Andric } 678*0b57cec5SDimitry Andric } while (!Blocks.empty()); 679*0b57cec5SDimitry Andric 680*0b57cec5SDimitry Andric // Now rewrite the jumps that use the flags. These we handle specially 681*0b57cec5SDimitry Andric // because if there are multiple jumps in a single basic block we'll have 682*0b57cec5SDimitry Andric // to do surgery on the CFG. 683*0b57cec5SDimitry Andric MachineBasicBlock *LastJmpMBB = nullptr; 684*0b57cec5SDimitry Andric for (MachineInstr *JmpI : JmpIs) { 685*0b57cec5SDimitry Andric // Past the first jump within a basic block we need to split the blocks 686*0b57cec5SDimitry Andric // apart. 687*0b57cec5SDimitry Andric if (JmpI->getParent() == LastJmpMBB) 688*0b57cec5SDimitry Andric splitBlock(*JmpI->getParent(), *JmpI, *TII); 689*0b57cec5SDimitry Andric else 690*0b57cec5SDimitry Andric LastJmpMBB = JmpI->getParent(); 691*0b57cec5SDimitry Andric 692*0b57cec5SDimitry Andric rewriteCondJmp(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs); 693*0b57cec5SDimitry Andric } 694*0b57cec5SDimitry Andric 695*0b57cec5SDimitry Andric // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if 696*0b57cec5SDimitry Andric // the copy's def operand is itself a kill. 697*0b57cec5SDimitry Andric } 698*0b57cec5SDimitry Andric 699*0b57cec5SDimitry Andric #ifndef NDEBUG 700*0b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) 701*0b57cec5SDimitry Andric for (MachineInstr &MI : MBB) 702*0b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::COPY && 703*0b57cec5SDimitry Andric (MI.getOperand(0).getReg() == X86::EFLAGS || 704*0b57cec5SDimitry Andric MI.getOperand(1).getReg() == X86::EFLAGS)) { 705*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; 706*0b57cec5SDimitry Andric MI.dump()); 707*0b57cec5SDimitry Andric llvm_unreachable("Unlowered EFLAGS copy!"); 708*0b57cec5SDimitry Andric } 709*0b57cec5SDimitry Andric #endif 710*0b57cec5SDimitry Andric 711*0b57cec5SDimitry Andric return true; 712*0b57cec5SDimitry Andric } 713*0b57cec5SDimitry Andric 714*0b57cec5SDimitry Andric /// Collect any conditions that have already been set in registers so that we 715*0b57cec5SDimitry Andric /// can re-use them rather than adding duplicates. 716*0b57cec5SDimitry Andric CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( 717*0b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) { 718*0b57cec5SDimitry Andric CondRegArray CondRegs = {}; 719*0b57cec5SDimitry Andric 720*0b57cec5SDimitry Andric // Scan backwards across the range of instructions with live EFLAGS. 721*0b57cec5SDimitry Andric for (MachineInstr &MI : 722*0b57cec5SDimitry Andric llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) { 723*0b57cec5SDimitry Andric X86::CondCode Cond = X86::getCondFromSETCC(MI); 724*0b57cec5SDimitry Andric if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() && 725*0b57cec5SDimitry Andric TRI->isVirtualRegister(MI.getOperand(0).getReg())) { 726*0b57cec5SDimitry Andric assert(MI.getOperand(0).isDef() && 727*0b57cec5SDimitry Andric "A non-storing SETcc should always define a register!"); 728*0b57cec5SDimitry Andric CondRegs[Cond] = MI.getOperand(0).getReg(); 729*0b57cec5SDimitry Andric } 730*0b57cec5SDimitry Andric 731*0b57cec5SDimitry Andric // Stop scanning when we see the first definition of the EFLAGS as prior to 732*0b57cec5SDimitry Andric // this we would potentially capture the wrong flag state. 733*0b57cec5SDimitry Andric if (MI.findRegisterDefOperand(X86::EFLAGS)) 734*0b57cec5SDimitry Andric break; 735*0b57cec5SDimitry Andric } 736*0b57cec5SDimitry Andric return CondRegs; 737*0b57cec5SDimitry Andric } 738*0b57cec5SDimitry Andric 739*0b57cec5SDimitry Andric unsigned X86FlagsCopyLoweringPass::promoteCondToReg( 740*0b57cec5SDimitry Andric MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 741*0b57cec5SDimitry Andric DebugLoc TestLoc, X86::CondCode Cond) { 742*0b57cec5SDimitry Andric unsigned Reg = MRI->createVirtualRegister(PromoteRC); 743*0b57cec5SDimitry Andric auto SetI = BuildMI(TestMBB, TestPos, TestLoc, 744*0b57cec5SDimitry Andric TII->get(X86::SETCCr), Reg).addImm(Cond); 745*0b57cec5SDimitry Andric (void)SetI; 746*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump()); 747*0b57cec5SDimitry Andric ++NumSetCCsInserted; 748*0b57cec5SDimitry Andric return Reg; 749*0b57cec5SDimitry Andric } 750*0b57cec5SDimitry Andric 751*0b57cec5SDimitry Andric std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg( 752*0b57cec5SDimitry Andric MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 753*0b57cec5SDimitry Andric DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) { 754*0b57cec5SDimitry Andric unsigned &CondReg = CondRegs[Cond]; 755*0b57cec5SDimitry Andric unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)]; 756*0b57cec5SDimitry Andric if (!CondReg && !InvCondReg) 757*0b57cec5SDimitry Andric CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 758*0b57cec5SDimitry Andric 759*0b57cec5SDimitry Andric if (CondReg) 760*0b57cec5SDimitry Andric return {CondReg, false}; 761*0b57cec5SDimitry Andric else 762*0b57cec5SDimitry Andric return {InvCondReg, true}; 763*0b57cec5SDimitry Andric } 764*0b57cec5SDimitry Andric 765*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, 766*0b57cec5SDimitry Andric MachineBasicBlock::iterator Pos, 767*0b57cec5SDimitry Andric DebugLoc Loc, unsigned Reg) { 768*0b57cec5SDimitry Andric auto TestI = 769*0b57cec5SDimitry Andric BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg); 770*0b57cec5SDimitry Andric (void)TestI; 771*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " test cond: "; TestI->dump()); 772*0b57cec5SDimitry Andric ++NumTestsInserted; 773*0b57cec5SDimitry Andric } 774*0b57cec5SDimitry Andric 775*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteArithmetic( 776*0b57cec5SDimitry Andric MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 777*0b57cec5SDimitry Andric DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse, 778*0b57cec5SDimitry Andric CondRegArray &CondRegs) { 779*0b57cec5SDimitry Andric // Arithmetic is either reading CF or OF. Figure out which condition we need 780*0b57cec5SDimitry Andric // to preserve in a register. 781*0b57cec5SDimitry Andric X86::CondCode Cond; 782*0b57cec5SDimitry Andric 783*0b57cec5SDimitry Andric // The addend to use to reset CF or OF when added to the flag value. 784*0b57cec5SDimitry Andric int Addend; 785*0b57cec5SDimitry Andric 786*0b57cec5SDimitry Andric switch (getMnemonicFromOpcode(MI.getOpcode())) { 787*0b57cec5SDimitry Andric case FlagArithMnemonic::ADC: 788*0b57cec5SDimitry Andric case FlagArithMnemonic::ADCX: 789*0b57cec5SDimitry Andric case FlagArithMnemonic::RCL: 790*0b57cec5SDimitry Andric case FlagArithMnemonic::RCR: 791*0b57cec5SDimitry Andric case FlagArithMnemonic::SBB: 792*0b57cec5SDimitry Andric Cond = X86::COND_B; // CF == 1 793*0b57cec5SDimitry Andric // Set up an addend that when one is added will need a carry due to not 794*0b57cec5SDimitry Andric // having a higher bit available. 795*0b57cec5SDimitry Andric Addend = 255; 796*0b57cec5SDimitry Andric break; 797*0b57cec5SDimitry Andric 798*0b57cec5SDimitry Andric case FlagArithMnemonic::ADOX: 799*0b57cec5SDimitry Andric Cond = X86::COND_O; // OF == 1 800*0b57cec5SDimitry Andric // Set up an addend that when one is added will turn from positive to 801*0b57cec5SDimitry Andric // negative and thus overflow in the signed domain. 802*0b57cec5SDimitry Andric Addend = 127; 803*0b57cec5SDimitry Andric break; 804*0b57cec5SDimitry Andric } 805*0b57cec5SDimitry Andric 806*0b57cec5SDimitry Andric // Now get a register that contains the value of the flag input to the 807*0b57cec5SDimitry Andric // arithmetic. We require exactly this flag to simplify the arithmetic 808*0b57cec5SDimitry Andric // required to materialize it back into the flag. 809*0b57cec5SDimitry Andric unsigned &CondReg = CondRegs[Cond]; 810*0b57cec5SDimitry Andric if (!CondReg) 811*0b57cec5SDimitry Andric CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 812*0b57cec5SDimitry Andric 813*0b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 814*0b57cec5SDimitry Andric 815*0b57cec5SDimitry Andric // Insert an instruction that will set the flag back to the desired value. 816*0b57cec5SDimitry Andric unsigned TmpReg = MRI->createVirtualRegister(PromoteRC); 817*0b57cec5SDimitry Andric auto AddI = 818*0b57cec5SDimitry Andric BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri)) 819*0b57cec5SDimitry Andric .addDef(TmpReg, RegState::Dead) 820*0b57cec5SDimitry Andric .addReg(CondReg) 821*0b57cec5SDimitry Andric .addImm(Addend); 822*0b57cec5SDimitry Andric (void)AddI; 823*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " add cond: "; AddI->dump()); 824*0b57cec5SDimitry Andric ++NumAddsInserted; 825*0b57cec5SDimitry Andric FlagUse.setIsKill(true); 826*0b57cec5SDimitry Andric } 827*0b57cec5SDimitry Andric 828*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB, 829*0b57cec5SDimitry Andric MachineBasicBlock::iterator TestPos, 830*0b57cec5SDimitry Andric DebugLoc TestLoc, 831*0b57cec5SDimitry Andric MachineInstr &CMovI, 832*0b57cec5SDimitry Andric MachineOperand &FlagUse, 833*0b57cec5SDimitry Andric CondRegArray &CondRegs) { 834*0b57cec5SDimitry Andric // First get the register containing this specific condition. 835*0b57cec5SDimitry Andric X86::CondCode Cond = X86::getCondFromCMov(CMovI); 836*0b57cec5SDimitry Andric unsigned CondReg; 837*0b57cec5SDimitry Andric bool Inverted; 838*0b57cec5SDimitry Andric std::tie(CondReg, Inverted) = 839*0b57cec5SDimitry Andric getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); 840*0b57cec5SDimitry Andric 841*0b57cec5SDimitry Andric MachineBasicBlock &MBB = *CMovI.getParent(); 842*0b57cec5SDimitry Andric 843*0b57cec5SDimitry Andric // Insert a direct test of the saved register. 844*0b57cec5SDimitry Andric insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); 845*0b57cec5SDimitry Andric 846*0b57cec5SDimitry Andric // Rewrite the CMov to use the !ZF flag from the test, and then kill its use 847*0b57cec5SDimitry Andric // of the flags afterward. 848*0b57cec5SDimitry Andric CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1) 849*0b57cec5SDimitry Andric .setImm(Inverted ? X86::COND_E : X86::COND_NE); 850*0b57cec5SDimitry Andric FlagUse.setIsKill(true); 851*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " fixed cmov: "; CMovI.dump()); 852*0b57cec5SDimitry Andric } 853*0b57cec5SDimitry Andric 854*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteCondJmp( 855*0b57cec5SDimitry Andric MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 856*0b57cec5SDimitry Andric DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) { 857*0b57cec5SDimitry Andric // First get the register containing this specific condition. 858*0b57cec5SDimitry Andric X86::CondCode Cond = X86::getCondFromBranch(JmpI); 859*0b57cec5SDimitry Andric unsigned CondReg; 860*0b57cec5SDimitry Andric bool Inverted; 861*0b57cec5SDimitry Andric std::tie(CondReg, Inverted) = 862*0b57cec5SDimitry Andric getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); 863*0b57cec5SDimitry Andric 864*0b57cec5SDimitry Andric MachineBasicBlock &JmpMBB = *JmpI.getParent(); 865*0b57cec5SDimitry Andric 866*0b57cec5SDimitry Andric // Insert a direct test of the saved register. 867*0b57cec5SDimitry Andric insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg); 868*0b57cec5SDimitry Andric 869*0b57cec5SDimitry Andric // Rewrite the jump to use the !ZF flag from the test, and kill its use of 870*0b57cec5SDimitry Andric // flags afterward. 871*0b57cec5SDimitry Andric JmpI.getOperand(1).setImm(Inverted ? X86::COND_E : X86::COND_NE); 872*0b57cec5SDimitry Andric JmpI.findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); 873*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " fixed jCC: "; JmpI.dump()); 874*0b57cec5SDimitry Andric } 875*0b57cec5SDimitry Andric 876*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI, 877*0b57cec5SDimitry Andric MachineOperand &FlagUse, 878*0b57cec5SDimitry Andric MachineInstr &CopyDefI) { 879*0b57cec5SDimitry Andric // Just replace this copy with the original copy def. 880*0b57cec5SDimitry Andric MRI->replaceRegWith(MI.getOperand(0).getReg(), 881*0b57cec5SDimitry Andric CopyDefI.getOperand(0).getReg()); 882*0b57cec5SDimitry Andric MI.eraseFromParent(); 883*0b57cec5SDimitry Andric } 884*0b57cec5SDimitry Andric 885*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( 886*0b57cec5SDimitry Andric MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 887*0b57cec5SDimitry Andric DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse, 888*0b57cec5SDimitry Andric CondRegArray &CondRegs) { 889*0b57cec5SDimitry Andric // This routine is only used to handle pseudos for setting a register to zero 890*0b57cec5SDimitry Andric // or all ones based on CF. This is essentially the sign extended from 1-bit 891*0b57cec5SDimitry Andric // form of SETB and modeled with the SETB_C* pseudos. They require special 892*0b57cec5SDimitry Andric // handling as they aren't normal SETcc instructions and are lowered to an 893*0b57cec5SDimitry Andric // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that 894*0b57cec5SDimitry Andric // they are only provided in reg-defining forms. A complicating factor is that 895*0b57cec5SDimitry Andric // they can define many different register widths. 896*0b57cec5SDimitry Andric assert(SetBI.getOperand(0).isReg() && 897*0b57cec5SDimitry Andric "Cannot have a non-register defined operand to this variant of SETB!"); 898*0b57cec5SDimitry Andric 899*0b57cec5SDimitry Andric // Little helper to do the common final step of replacing the register def'ed 900*0b57cec5SDimitry Andric // by this SETB instruction with a new register and removing the SETB 901*0b57cec5SDimitry Andric // instruction. 902*0b57cec5SDimitry Andric auto RewriteToReg = [&](unsigned Reg) { 903*0b57cec5SDimitry Andric MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg); 904*0b57cec5SDimitry Andric SetBI.eraseFromParent(); 905*0b57cec5SDimitry Andric }; 906*0b57cec5SDimitry Andric 907*0b57cec5SDimitry Andric // Grab the register class used for this particular instruction. 908*0b57cec5SDimitry Andric auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg()); 909*0b57cec5SDimitry Andric 910*0b57cec5SDimitry Andric MachineBasicBlock &MBB = *SetBI.getParent(); 911*0b57cec5SDimitry Andric auto SetPos = SetBI.getIterator(); 912*0b57cec5SDimitry Andric auto SetLoc = SetBI.getDebugLoc(); 913*0b57cec5SDimitry Andric 914*0b57cec5SDimitry Andric auto AdjustReg = [&](unsigned Reg) { 915*0b57cec5SDimitry Andric auto &OrigRC = *MRI->getRegClass(Reg); 916*0b57cec5SDimitry Andric if (&OrigRC == &SetBRC) 917*0b57cec5SDimitry Andric return Reg; 918*0b57cec5SDimitry Andric 919*0b57cec5SDimitry Andric unsigned NewReg; 920*0b57cec5SDimitry Andric 921*0b57cec5SDimitry Andric int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8; 922*0b57cec5SDimitry Andric int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8; 923*0b57cec5SDimitry Andric assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!"); 924*0b57cec5SDimitry Andric assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!"); 925*0b57cec5SDimitry Andric int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit, 926*0b57cec5SDimitry Andric X86::NoSubRegister, X86::sub_32bit}; 927*0b57cec5SDimitry Andric 928*0b57cec5SDimitry Andric // If the original size is smaller than the target *and* is smaller than 4 929*0b57cec5SDimitry Andric // bytes, we need to explicitly zero extend it. We always extend to 4-bytes 930*0b57cec5SDimitry Andric // to maximize the chance of being able to CSE that operation and to avoid 931*0b57cec5SDimitry Andric // partial dependency stalls extending to 2-bytes. 932*0b57cec5SDimitry Andric if (OrigRegSize < TargetRegSize && OrigRegSize < 4) { 933*0b57cec5SDimitry Andric NewReg = MRI->createVirtualRegister(&X86::GR32RegClass); 934*0b57cec5SDimitry Andric BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg) 935*0b57cec5SDimitry Andric .addReg(Reg); 936*0b57cec5SDimitry Andric if (&SetBRC == &X86::GR32RegClass) 937*0b57cec5SDimitry Andric return NewReg; 938*0b57cec5SDimitry Andric Reg = NewReg; 939*0b57cec5SDimitry Andric OrigRegSize = 4; 940*0b57cec5SDimitry Andric } 941*0b57cec5SDimitry Andric 942*0b57cec5SDimitry Andric NewReg = MRI->createVirtualRegister(&SetBRC); 943*0b57cec5SDimitry Andric if (OrigRegSize < TargetRegSize) { 944*0b57cec5SDimitry Andric BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG), 945*0b57cec5SDimitry Andric NewReg) 946*0b57cec5SDimitry Andric .addImm(0) 947*0b57cec5SDimitry Andric .addReg(Reg) 948*0b57cec5SDimitry Andric .addImm(SubRegIdx[OrigRegSize]); 949*0b57cec5SDimitry Andric } else if (OrigRegSize > TargetRegSize) { 950*0b57cec5SDimitry Andric if (TargetRegSize == 1 && !Subtarget->is64Bit()) { 951*0b57cec5SDimitry Andric // Need to constrain the register class. 952*0b57cec5SDimitry Andric MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass); 953*0b57cec5SDimitry Andric } 954*0b57cec5SDimitry Andric 955*0b57cec5SDimitry Andric BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), 956*0b57cec5SDimitry Andric NewReg) 957*0b57cec5SDimitry Andric .addReg(Reg, 0, SubRegIdx[TargetRegSize]); 958*0b57cec5SDimitry Andric } else { 959*0b57cec5SDimitry Andric BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg) 960*0b57cec5SDimitry Andric .addReg(Reg); 961*0b57cec5SDimitry Andric } 962*0b57cec5SDimitry Andric return NewReg; 963*0b57cec5SDimitry Andric }; 964*0b57cec5SDimitry Andric 965*0b57cec5SDimitry Andric unsigned &CondReg = CondRegs[X86::COND_B]; 966*0b57cec5SDimitry Andric if (!CondReg) 967*0b57cec5SDimitry Andric CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B); 968*0b57cec5SDimitry Andric 969*0b57cec5SDimitry Andric // Adjust the condition to have the desired register width by zero-extending 970*0b57cec5SDimitry Andric // as needed. 971*0b57cec5SDimitry Andric // FIXME: We should use a better API to avoid the local reference and using a 972*0b57cec5SDimitry Andric // different variable here. 973*0b57cec5SDimitry Andric unsigned ExtCondReg = AdjustReg(CondReg); 974*0b57cec5SDimitry Andric 975*0b57cec5SDimitry Andric // Now we need to turn this into a bitmask. We do this by subtracting it from 976*0b57cec5SDimitry Andric // zero. 977*0b57cec5SDimitry Andric unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass); 978*0b57cec5SDimitry Andric BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg); 979*0b57cec5SDimitry Andric ZeroReg = AdjustReg(ZeroReg); 980*0b57cec5SDimitry Andric 981*0b57cec5SDimitry Andric unsigned Sub; 982*0b57cec5SDimitry Andric switch (SetBI.getOpcode()) { 983*0b57cec5SDimitry Andric case X86::SETB_C8r: 984*0b57cec5SDimitry Andric Sub = X86::SUB8rr; 985*0b57cec5SDimitry Andric break; 986*0b57cec5SDimitry Andric 987*0b57cec5SDimitry Andric case X86::SETB_C16r: 988*0b57cec5SDimitry Andric Sub = X86::SUB16rr; 989*0b57cec5SDimitry Andric break; 990*0b57cec5SDimitry Andric 991*0b57cec5SDimitry Andric case X86::SETB_C32r: 992*0b57cec5SDimitry Andric Sub = X86::SUB32rr; 993*0b57cec5SDimitry Andric break; 994*0b57cec5SDimitry Andric 995*0b57cec5SDimitry Andric case X86::SETB_C64r: 996*0b57cec5SDimitry Andric Sub = X86::SUB64rr; 997*0b57cec5SDimitry Andric break; 998*0b57cec5SDimitry Andric 999*0b57cec5SDimitry Andric default: 1000*0b57cec5SDimitry Andric llvm_unreachable("Invalid SETB_C* opcode!"); 1001*0b57cec5SDimitry Andric } 1002*0b57cec5SDimitry Andric unsigned ResultReg = MRI->createVirtualRegister(&SetBRC); 1003*0b57cec5SDimitry Andric BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg) 1004*0b57cec5SDimitry Andric .addReg(ZeroReg) 1005*0b57cec5SDimitry Andric .addReg(ExtCondReg); 1006*0b57cec5SDimitry Andric return RewriteToReg(ResultReg); 1007*0b57cec5SDimitry Andric } 1008*0b57cec5SDimitry Andric 1009*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB, 1010*0b57cec5SDimitry Andric MachineBasicBlock::iterator TestPos, 1011*0b57cec5SDimitry Andric DebugLoc TestLoc, 1012*0b57cec5SDimitry Andric MachineInstr &SetCCI, 1013*0b57cec5SDimitry Andric MachineOperand &FlagUse, 1014*0b57cec5SDimitry Andric CondRegArray &CondRegs) { 1015*0b57cec5SDimitry Andric X86::CondCode Cond = X86::getCondFromSETCC(SetCCI); 1016*0b57cec5SDimitry Andric // Note that we can't usefully rewrite this to the inverse without complex 1017*0b57cec5SDimitry Andric // analysis of the users of the setCC. Largely we rely on duplicates which 1018*0b57cec5SDimitry Andric // could have been avoided already being avoided here. 1019*0b57cec5SDimitry Andric unsigned &CondReg = CondRegs[Cond]; 1020*0b57cec5SDimitry Andric if (!CondReg) 1021*0b57cec5SDimitry Andric CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 1022*0b57cec5SDimitry Andric 1023*0b57cec5SDimitry Andric // Rewriting a register def is trivial: we just replace the register and 1024*0b57cec5SDimitry Andric // remove the setcc. 1025*0b57cec5SDimitry Andric if (!SetCCI.mayStore()) { 1026*0b57cec5SDimitry Andric assert(SetCCI.getOperand(0).isReg() && 1027*0b57cec5SDimitry Andric "Cannot have a non-register defined operand to SETcc!"); 1028*0b57cec5SDimitry Andric MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg); 1029*0b57cec5SDimitry Andric SetCCI.eraseFromParent(); 1030*0b57cec5SDimitry Andric return; 1031*0b57cec5SDimitry Andric } 1032*0b57cec5SDimitry Andric 1033*0b57cec5SDimitry Andric // Otherwise, we need to emit a store. 1034*0b57cec5SDimitry Andric auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(), 1035*0b57cec5SDimitry Andric SetCCI.getDebugLoc(), TII->get(X86::MOV8mr)); 1036*0b57cec5SDimitry Andric // Copy the address operands. 1037*0b57cec5SDimitry Andric for (int i = 0; i < X86::AddrNumOperands; ++i) 1038*0b57cec5SDimitry Andric MIB.add(SetCCI.getOperand(i)); 1039*0b57cec5SDimitry Andric 1040*0b57cec5SDimitry Andric MIB.addReg(CondReg); 1041*0b57cec5SDimitry Andric 1042*0b57cec5SDimitry Andric MIB.setMemRefs(SetCCI.memoperands()); 1043*0b57cec5SDimitry Andric 1044*0b57cec5SDimitry Andric SetCCI.eraseFromParent(); 1045*0b57cec5SDimitry Andric return; 1046*0b57cec5SDimitry Andric } 1047