xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp (revision 480093f4440d54b30b3025afeac24b48f2ba7a2e)
10b57cec5SDimitry Andric //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// \file
90b57cec5SDimitry Andric ///
100b57cec5SDimitry Andric /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
110b57cec5SDimitry Andric /// flag bits.
120b57cec5SDimitry Andric ///
130b57cec5SDimitry Andric /// We have to do this by carefully analyzing and rewriting the usage of the
140b57cec5SDimitry Andric /// copied EFLAGS register because there is no general way to rematerialize the
150b57cec5SDimitry Andric /// entire EFLAGS register safely and efficiently. Using `popf` both forces
160b57cec5SDimitry Andric /// dynamic stack adjustment and can create correctness issues due to IF, TF,
170b57cec5SDimitry Andric /// and other non-status flags being overwritten. Using sequences involving
180b57cec5SDimitry Andric /// SAHF don't work on all x86 processors and are often quite slow compared to
190b57cec5SDimitry Andric /// directly testing a single status preserved in its own GPR.
200b57cec5SDimitry Andric ///
210b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric #include "X86.h"
240b57cec5SDimitry Andric #include "X86InstrBuilder.h"
250b57cec5SDimitry Andric #include "X86InstrInfo.h"
260b57cec5SDimitry Andric #include "X86Subtarget.h"
270b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
280b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
290b57cec5SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
300b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
310b57cec5SDimitry Andric #include "llvm/ADT/ScopeExit.h"
320b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
330b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h"
340b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
350b57cec5SDimitry Andric #include "llvm/ADT/SparseBitVector.h"
360b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
370b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
380b57cec5SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h"
390b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
400b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
410b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
420b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
430b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
440b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
450b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
460b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
470b57cec5SDimitry Andric #include "llvm/CodeGen/MachineSSAUpdater.h"
480b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
490b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
500b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h"
510b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
520b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
530b57cec5SDimitry Andric #include "llvm/MC/MCSchedule.h"
540b57cec5SDimitry Andric #include "llvm/Pass.h"
550b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
560b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
570b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
580b57cec5SDimitry Andric #include <algorithm>
590b57cec5SDimitry Andric #include <cassert>
600b57cec5SDimitry Andric #include <iterator>
610b57cec5SDimitry Andric #include <utility>
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric using namespace llvm;
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric #define PASS_KEY "x86-flags-copy-lowering"
660b57cec5SDimitry Andric #define DEBUG_TYPE PASS_KEY
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
690b57cec5SDimitry Andric STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
700b57cec5SDimitry Andric STATISTIC(NumTestsInserted, "Number of test instructions inserted");
710b57cec5SDimitry Andric STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric namespace {
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric // Convenient array type for storing registers associated with each condition.
760b57cec5SDimitry Andric using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric class X86FlagsCopyLoweringPass : public MachineFunctionPass {
790b57cec5SDimitry Andric public:
800b57cec5SDimitry Andric   X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { }
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric   StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
830b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
840b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric   /// Pass identification, replacement for typeid.
870b57cec5SDimitry Andric   static char ID;
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric private:
90*480093f4SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
91*480093f4SDimitry Andric   const X86Subtarget *Subtarget = nullptr;
92*480093f4SDimitry Andric   const X86InstrInfo *TII = nullptr;
93*480093f4SDimitry Andric   const TargetRegisterInfo *TRI = nullptr;
94*480093f4SDimitry Andric   const TargetRegisterClass *PromoteRC = nullptr;
95*480093f4SDimitry Andric   MachineDominatorTree *MDT = nullptr;
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric   CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
980b57cec5SDimitry Andric                                   MachineBasicBlock::iterator CopyDefI);
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric   unsigned promoteCondToReg(MachineBasicBlock &MBB,
1010b57cec5SDimitry Andric                             MachineBasicBlock::iterator TestPos,
1020b57cec5SDimitry Andric                             DebugLoc TestLoc, X86::CondCode Cond);
1030b57cec5SDimitry Andric   std::pair<unsigned, bool>
1040b57cec5SDimitry Andric   getCondOrInverseInReg(MachineBasicBlock &TestMBB,
1050b57cec5SDimitry Andric                         MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
1060b57cec5SDimitry Andric                         X86::CondCode Cond, CondRegArray &CondRegs);
1070b57cec5SDimitry Andric   void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
1080b57cec5SDimitry Andric                   DebugLoc Loc, unsigned Reg);
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric   void rewriteArithmetic(MachineBasicBlock &TestMBB,
1110b57cec5SDimitry Andric                          MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
1120b57cec5SDimitry Andric                          MachineInstr &MI, MachineOperand &FlagUse,
1130b57cec5SDimitry Andric                          CondRegArray &CondRegs);
1140b57cec5SDimitry Andric   void rewriteCMov(MachineBasicBlock &TestMBB,
1150b57cec5SDimitry Andric                    MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
1160b57cec5SDimitry Andric                    MachineInstr &CMovI, MachineOperand &FlagUse,
1170b57cec5SDimitry Andric                    CondRegArray &CondRegs);
1182a168f03SDimitry Andric   void rewriteFCMov(MachineBasicBlock &TestMBB,
1192a168f03SDimitry Andric                     MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
1202a168f03SDimitry Andric                     MachineInstr &CMovI, MachineOperand &FlagUse,
1212a168f03SDimitry Andric                     CondRegArray &CondRegs);
1220b57cec5SDimitry Andric   void rewriteCondJmp(MachineBasicBlock &TestMBB,
1230b57cec5SDimitry Andric                       MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
1240b57cec5SDimitry Andric                       MachineInstr &JmpI, CondRegArray &CondRegs);
1250b57cec5SDimitry Andric   void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
1260b57cec5SDimitry Andric                    MachineInstr &CopyDefI);
1270b57cec5SDimitry Andric   void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
1280b57cec5SDimitry Andric                                MachineBasicBlock::iterator TestPos,
1290b57cec5SDimitry Andric                                DebugLoc TestLoc, MachineInstr &SetBI,
1300b57cec5SDimitry Andric                                MachineOperand &FlagUse, CondRegArray &CondRegs);
1310b57cec5SDimitry Andric   void rewriteSetCC(MachineBasicBlock &TestMBB,
1320b57cec5SDimitry Andric                     MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
1330b57cec5SDimitry Andric                     MachineInstr &SetCCI, MachineOperand &FlagUse,
1340b57cec5SDimitry Andric                     CondRegArray &CondRegs);
1350b57cec5SDimitry Andric };
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric } // end anonymous namespace
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
1400b57cec5SDimitry Andric                       "X86 EFLAGS copy lowering", false, false)
1410b57cec5SDimitry Andric INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
1420b57cec5SDimitry Andric                     "X86 EFLAGS copy lowering", false, false)
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
1450b57cec5SDimitry Andric   return new X86FlagsCopyLoweringPass();
1460b57cec5SDimitry Andric }
1470b57cec5SDimitry Andric 
1480b57cec5SDimitry Andric char X86FlagsCopyLoweringPass::ID = 0;
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
1510b57cec5SDimitry Andric   AU.addRequired<MachineDominatorTree>();
1520b57cec5SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric namespace {
1560b57cec5SDimitry Andric /// An enumeration of the arithmetic instruction mnemonics which have
1570b57cec5SDimitry Andric /// interesting flag semantics.
1580b57cec5SDimitry Andric ///
1590b57cec5SDimitry Andric /// We can map instruction opcodes into these mnemonics to make it easy to
1600b57cec5SDimitry Andric /// dispatch with specific functionality.
1610b57cec5SDimitry Andric enum class FlagArithMnemonic {
1620b57cec5SDimitry Andric   ADC,
1630b57cec5SDimitry Andric   ADCX,
1640b57cec5SDimitry Andric   ADOX,
1650b57cec5SDimitry Andric   RCL,
1660b57cec5SDimitry Andric   RCR,
1670b57cec5SDimitry Andric   SBB,
1680b57cec5SDimitry Andric };
1690b57cec5SDimitry Andric } // namespace
1700b57cec5SDimitry Andric 
1710b57cec5SDimitry Andric static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
1720b57cec5SDimitry Andric   switch (Opcode) {
1730b57cec5SDimitry Andric   default:
1740b57cec5SDimitry Andric     report_fatal_error("No support for lowering a copy into EFLAGS when used "
1750b57cec5SDimitry Andric                        "by this instruction!");
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric #define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX)                              \
1780b57cec5SDimitry Andric   case X86::MNEMONIC##8##SUFFIX:                                               \
1790b57cec5SDimitry Andric   case X86::MNEMONIC##16##SUFFIX:                                              \
1800b57cec5SDimitry Andric   case X86::MNEMONIC##32##SUFFIX:                                              \
1810b57cec5SDimitry Andric   case X86::MNEMONIC##64##SUFFIX:
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC)                                    \
1840b57cec5SDimitry Andric   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr)                                        \
1850b57cec5SDimitry Andric   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV)                                    \
1860b57cec5SDimitry Andric   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm)                                        \
1870b57cec5SDimitry Andric   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr)                                        \
1880b57cec5SDimitry Andric   case X86::MNEMONIC##8ri:                                                     \
1890b57cec5SDimitry Andric   case X86::MNEMONIC##16ri8:                                                   \
1900b57cec5SDimitry Andric   case X86::MNEMONIC##32ri8:                                                   \
1910b57cec5SDimitry Andric   case X86::MNEMONIC##64ri8:                                                   \
1920b57cec5SDimitry Andric   case X86::MNEMONIC##16ri:                                                    \
1930b57cec5SDimitry Andric   case X86::MNEMONIC##32ri:                                                    \
1940b57cec5SDimitry Andric   case X86::MNEMONIC##64ri32:                                                  \
1950b57cec5SDimitry Andric   case X86::MNEMONIC##8mi:                                                     \
1960b57cec5SDimitry Andric   case X86::MNEMONIC##16mi8:                                                   \
1970b57cec5SDimitry Andric   case X86::MNEMONIC##32mi8:                                                   \
1980b57cec5SDimitry Andric   case X86::MNEMONIC##64mi8:                                                   \
1990b57cec5SDimitry Andric   case X86::MNEMONIC##16mi:                                                    \
2000b57cec5SDimitry Andric   case X86::MNEMONIC##32mi:                                                    \
2010b57cec5SDimitry Andric   case X86::MNEMONIC##64mi32:                                                  \
2020b57cec5SDimitry Andric   case X86::MNEMONIC##8i8:                                                     \
2030b57cec5SDimitry Andric   case X86::MNEMONIC##16i16:                                                   \
2040b57cec5SDimitry Andric   case X86::MNEMONIC##32i32:                                                   \
2050b57cec5SDimitry Andric   case X86::MNEMONIC##64i32:
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric     LLVM_EXPAND_ADC_SBB_INSTR(ADC)
2080b57cec5SDimitry Andric     return FlagArithMnemonic::ADC;
2090b57cec5SDimitry Andric 
2100b57cec5SDimitry Andric     LLVM_EXPAND_ADC_SBB_INSTR(SBB)
2110b57cec5SDimitry Andric     return FlagArithMnemonic::SBB;
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric #undef LLVM_EXPAND_ADC_SBB_INSTR
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCL, rCL)
2160b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCL, r1)
2170b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCL, ri)
2180b57cec5SDimitry Andric     return FlagArithMnemonic::RCL;
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCR, rCL)
2210b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCR, r1)
2220b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCR, ri)
2230b57cec5SDimitry Andric     return FlagArithMnemonic::RCR;
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric #undef LLVM_EXPAND_INSTR_SIZES
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric   case X86::ADCX32rr:
2280b57cec5SDimitry Andric   case X86::ADCX64rr:
2290b57cec5SDimitry Andric   case X86::ADCX32rm:
2300b57cec5SDimitry Andric   case X86::ADCX64rm:
2310b57cec5SDimitry Andric     return FlagArithMnemonic::ADCX;
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric   case X86::ADOX32rr:
2340b57cec5SDimitry Andric   case X86::ADOX64rr:
2350b57cec5SDimitry Andric   case X86::ADOX32rm:
2360b57cec5SDimitry Andric   case X86::ADOX64rm:
2370b57cec5SDimitry Andric     return FlagArithMnemonic::ADOX;
2380b57cec5SDimitry Andric   }
2390b57cec5SDimitry Andric }
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
2420b57cec5SDimitry Andric                                      MachineInstr &SplitI,
2430b57cec5SDimitry Andric                                      const X86InstrInfo &TII) {
2440b57cec5SDimitry Andric   MachineFunction &MF = *MBB.getParent();
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   assert(SplitI.getParent() == &MBB &&
2470b57cec5SDimitry Andric          "Split instruction must be in the split block!");
2480b57cec5SDimitry Andric   assert(SplitI.isBranch() &&
2490b57cec5SDimitry Andric          "Only designed to split a tail of branch instructions!");
2500b57cec5SDimitry Andric   assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID &&
2510b57cec5SDimitry Andric          "Must split on an actual jCC instruction!");
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric   // Dig out the previous instruction to the split point.
2540b57cec5SDimitry Andric   MachineInstr &PrevI = *std::prev(SplitI.getIterator());
2550b57cec5SDimitry Andric   assert(PrevI.isBranch() && "Must split after a branch!");
2560b57cec5SDimitry Andric   assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID &&
2570b57cec5SDimitry Andric          "Must split after an actual jCC instruction!");
2580b57cec5SDimitry Andric   assert(!std::prev(PrevI.getIterator())->isTerminator() &&
2590b57cec5SDimitry Andric          "Must only have this one terminator prior to the split!");
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric   // Grab the one successor edge that will stay in `MBB`.
2620b57cec5SDimitry Andric   MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric   // Analyze the original block to see if we are actually splitting an edge
2650b57cec5SDimitry Andric   // into two edges. This can happen when we have multiple conditional jumps to
2660b57cec5SDimitry Andric   // the same successor.
2670b57cec5SDimitry Andric   bool IsEdgeSplit =
2680b57cec5SDimitry Andric       std::any_of(SplitI.getIterator(), MBB.instr_end(),
2690b57cec5SDimitry Andric                   [&](MachineInstr &MI) {
2700b57cec5SDimitry Andric                     assert(MI.isTerminator() &&
2710b57cec5SDimitry Andric                            "Should only have spliced terminators!");
2720b57cec5SDimitry Andric                     return llvm::any_of(
2730b57cec5SDimitry Andric                         MI.operands(), [&](MachineOperand &MOp) {
2740b57cec5SDimitry Andric                           return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
2750b57cec5SDimitry Andric                         });
2760b57cec5SDimitry Andric                   }) ||
2770b57cec5SDimitry Andric       MBB.getFallThrough() == &UnsplitSucc;
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric   MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric   // Insert the new block immediately after the current one. Any existing
2820b57cec5SDimitry Andric   // fallthrough will be sunk into this new block anyways.
2830b57cec5SDimitry Andric   MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric   // Splice the tail of instructions into the new block.
2860b57cec5SDimitry Andric   NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric   // Copy the necessary succesors (and their probability info) into the new
2890b57cec5SDimitry Andric   // block.
2900b57cec5SDimitry Andric   for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
2910b57cec5SDimitry Andric     if (IsEdgeSplit || *SI != &UnsplitSucc)
2920b57cec5SDimitry Andric       NewMBB.copySuccessor(&MBB, SI);
2930b57cec5SDimitry Andric   // Normalize the probabilities if we didn't end up splitting the edge.
2940b57cec5SDimitry Andric   if (!IsEdgeSplit)
2950b57cec5SDimitry Andric     NewMBB.normalizeSuccProbs();
2960b57cec5SDimitry Andric 
2970b57cec5SDimitry Andric   // Now replace all of the moved successors in the original block with the new
2980b57cec5SDimitry Andric   // block. This will merge their probabilities.
2990b57cec5SDimitry Andric   for (MachineBasicBlock *Succ : NewMBB.successors())
3000b57cec5SDimitry Andric     if (Succ != &UnsplitSucc)
3010b57cec5SDimitry Andric       MBB.replaceSuccessor(Succ, &NewMBB);
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric   // We should always end up replacing at least one successor.
3040b57cec5SDimitry Andric   assert(MBB.isSuccessor(&NewMBB) &&
3050b57cec5SDimitry Andric          "Failed to make the new block a successor!");
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric   // Now update all the PHIs.
3080b57cec5SDimitry Andric   for (MachineBasicBlock *Succ : NewMBB.successors()) {
3090b57cec5SDimitry Andric     for (MachineInstr &MI : *Succ) {
3100b57cec5SDimitry Andric       if (!MI.isPHI())
3110b57cec5SDimitry Andric         break;
3120b57cec5SDimitry Andric 
3130b57cec5SDimitry Andric       for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
3140b57cec5SDimitry Andric            OpIdx += 2) {
3150b57cec5SDimitry Andric         MachineOperand &OpV = MI.getOperand(OpIdx);
3160b57cec5SDimitry Andric         MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
3170b57cec5SDimitry Andric         assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
3180b57cec5SDimitry Andric         if (OpMBB.getMBB() != &MBB)
3190b57cec5SDimitry Andric           continue;
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric         // Replace the operand for unsplit successors
3220b57cec5SDimitry Andric         if (!IsEdgeSplit || Succ != &UnsplitSucc) {
3230b57cec5SDimitry Andric           OpMBB.setMBB(&NewMBB);
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric           // We have to continue scanning as there may be multiple entries in
3260b57cec5SDimitry Andric           // the PHI.
3270b57cec5SDimitry Andric           continue;
3280b57cec5SDimitry Andric         }
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric         // When we have split the edge append a new successor.
3310b57cec5SDimitry Andric         MI.addOperand(MF, OpV);
3320b57cec5SDimitry Andric         MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
3330b57cec5SDimitry Andric         break;
3340b57cec5SDimitry Andric       }
3350b57cec5SDimitry Andric     }
3360b57cec5SDimitry Andric   }
3370b57cec5SDimitry Andric 
3380b57cec5SDimitry Andric   return NewMBB;
3390b57cec5SDimitry Andric }
3400b57cec5SDimitry Andric 
3412a168f03SDimitry Andric static X86::CondCode getCondFromFCMOV(unsigned Opcode) {
3422a168f03SDimitry Andric   switch (Opcode) {
3432a168f03SDimitry Andric   default: return X86::COND_INVALID;
3442a168f03SDimitry Andric   case X86::CMOVBE_Fp32:  case X86::CMOVBE_Fp64:  case X86::CMOVBE_Fp80:
3452a168f03SDimitry Andric     return X86::COND_BE;
3462a168f03SDimitry Andric   case X86::CMOVB_Fp32:   case X86::CMOVB_Fp64:   case X86::CMOVB_Fp80:
3472a168f03SDimitry Andric     return X86::COND_B;
3482a168f03SDimitry Andric   case X86::CMOVE_Fp32:   case X86::CMOVE_Fp64:   case X86::CMOVE_Fp80:
3492a168f03SDimitry Andric     return X86::COND_E;
3502a168f03SDimitry Andric   case X86::CMOVNBE_Fp32: case X86::CMOVNBE_Fp64: case X86::CMOVNBE_Fp80:
3512a168f03SDimitry Andric     return X86::COND_A;
3522a168f03SDimitry Andric   case X86::CMOVNB_Fp32:  case X86::CMOVNB_Fp64:  case X86::CMOVNB_Fp80:
3532a168f03SDimitry Andric     return X86::COND_AE;
3542a168f03SDimitry Andric   case X86::CMOVNE_Fp32:  case X86::CMOVNE_Fp64:  case X86::CMOVNE_Fp80:
3552a168f03SDimitry Andric     return X86::COND_NE;
3562a168f03SDimitry Andric   case X86::CMOVNP_Fp32:  case X86::CMOVNP_Fp64:  case X86::CMOVNP_Fp80:
3572a168f03SDimitry Andric     return X86::COND_NP;
3582a168f03SDimitry Andric   case X86::CMOVP_Fp32:   case X86::CMOVP_Fp64:   case X86::CMOVP_Fp80:
3592a168f03SDimitry Andric     return X86::COND_P;
3602a168f03SDimitry Andric   }
3612a168f03SDimitry Andric }
3622a168f03SDimitry Andric 
3630b57cec5SDimitry Andric bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
3640b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
3650b57cec5SDimitry Andric                     << " **********\n");
3660b57cec5SDimitry Andric 
3670b57cec5SDimitry Andric   Subtarget = &MF.getSubtarget<X86Subtarget>();
3680b57cec5SDimitry Andric   MRI = &MF.getRegInfo();
3690b57cec5SDimitry Andric   TII = Subtarget->getInstrInfo();
3700b57cec5SDimitry Andric   TRI = Subtarget->getRegisterInfo();
3710b57cec5SDimitry Andric   MDT = &getAnalysis<MachineDominatorTree>();
3720b57cec5SDimitry Andric   PromoteRC = &X86::GR8RegClass;
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric   if (MF.begin() == MF.end())
3750b57cec5SDimitry Andric     // Nothing to do for a degenerate empty function...
3760b57cec5SDimitry Andric     return false;
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   // Collect the copies in RPO so that when there are chains where a copy is in
3790b57cec5SDimitry Andric   // turn copied again we visit the first one first. This ensures we can find
3800b57cec5SDimitry Andric   // viable locations for testing the original EFLAGS that dominate all the
3810b57cec5SDimitry Andric   // uses across complex CFGs.
3820b57cec5SDimitry Andric   SmallVector<MachineInstr *, 4> Copies;
3830b57cec5SDimitry Andric   ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
3840b57cec5SDimitry Andric   for (MachineBasicBlock *MBB : RPOT)
3850b57cec5SDimitry Andric     for (MachineInstr &MI : *MBB)
3860b57cec5SDimitry Andric       if (MI.getOpcode() == TargetOpcode::COPY &&
3870b57cec5SDimitry Andric           MI.getOperand(0).getReg() == X86::EFLAGS)
3880b57cec5SDimitry Andric         Copies.push_back(&MI);
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   for (MachineInstr *CopyI : Copies) {
3910b57cec5SDimitry Andric     MachineBasicBlock &MBB = *CopyI->getParent();
3920b57cec5SDimitry Andric 
3930b57cec5SDimitry Andric     MachineOperand &VOp = CopyI->getOperand(1);
3940b57cec5SDimitry Andric     assert(VOp.isReg() &&
3950b57cec5SDimitry Andric            "The input to the copy for EFLAGS should always be a register!");
3960b57cec5SDimitry Andric     MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
3970b57cec5SDimitry Andric     if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
3980b57cec5SDimitry Andric       // FIXME: The big likely candidate here are PHI nodes. We could in theory
3990b57cec5SDimitry Andric       // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
4000b57cec5SDimitry Andric       // enough that it is probably better to change every other part of LLVM
4010b57cec5SDimitry Andric       // to avoid creating them. The issue is that once we have PHIs we won't
4020b57cec5SDimitry Andric       // know which original EFLAGS value we need to capture with our setCCs
4030b57cec5SDimitry Andric       // below. The end result will be computing a complete set of setCCs that
4040b57cec5SDimitry Andric       // we *might* want, computing them in every place where we copy *out* of
4050b57cec5SDimitry Andric       // EFLAGS and then doing SSA formation on all of them to insert necessary
4060b57cec5SDimitry Andric       // PHI nodes and consume those here. Then hoping that somehow we DCE the
4070b57cec5SDimitry Andric       // unnecessary ones. This DCE seems very unlikely to be successful and so
4080b57cec5SDimitry Andric       // we will almost certainly end up with a glut of dead setCC
4090b57cec5SDimitry Andric       // instructions. Until we have a motivating test case and fail to avoid
4100b57cec5SDimitry Andric       // it by changing other parts of LLVM's lowering, we refuse to handle
4110b57cec5SDimitry Andric       // this complex case here.
4120b57cec5SDimitry Andric       LLVM_DEBUG(
4130b57cec5SDimitry Andric           dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
4140b57cec5SDimitry Andric           CopyDefI.dump());
4150b57cec5SDimitry Andric       report_fatal_error(
4160b57cec5SDimitry Andric           "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
4170b57cec5SDimitry Andric     }
4180b57cec5SDimitry Andric 
4190b57cec5SDimitry Andric     auto Cleanup = make_scope_exit([&] {
4200b57cec5SDimitry Andric       // All uses of the EFLAGS copy are now rewritten, kill the copy into
4210b57cec5SDimitry Andric       // eflags and if dead the copy from.
4220b57cec5SDimitry Andric       CopyI->eraseFromParent();
4230b57cec5SDimitry Andric       if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
4240b57cec5SDimitry Andric         CopyDefI.eraseFromParent();
4250b57cec5SDimitry Andric       ++NumCopiesEliminated;
4260b57cec5SDimitry Andric     });
4270b57cec5SDimitry Andric 
4280b57cec5SDimitry Andric     MachineOperand &DOp = CopyI->getOperand(0);
4290b57cec5SDimitry Andric     assert(DOp.isDef() && "Expected register def!");
4300b57cec5SDimitry Andric     assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
4310b57cec5SDimitry Andric     if (DOp.isDead())
4320b57cec5SDimitry Andric       continue;
4330b57cec5SDimitry Andric 
4340b57cec5SDimitry Andric     MachineBasicBlock *TestMBB = CopyDefI.getParent();
4350b57cec5SDimitry Andric     auto TestPos = CopyDefI.getIterator();
4360b57cec5SDimitry Andric     DebugLoc TestLoc = CopyDefI.getDebugLoc();
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
4390b57cec5SDimitry Andric 
4400b57cec5SDimitry Andric     // Walk up across live-in EFLAGS to find where they were actually def'ed.
4410b57cec5SDimitry Andric     //
4420b57cec5SDimitry Andric     // This copy's def may just be part of a region of blocks covered by
4430b57cec5SDimitry Andric     // a single def of EFLAGS and we want to find the top of that region where
4440b57cec5SDimitry Andric     // possible.
4450b57cec5SDimitry Andric     //
4460b57cec5SDimitry Andric     // This is essentially a search for a *candidate* reaching definition
4470b57cec5SDimitry Andric     // location. We don't need to ever find the actual reaching definition here,
4480b57cec5SDimitry Andric     // but we want to walk up the dominator tree to find the highest point which
4490b57cec5SDimitry Andric     // would be viable for such a definition.
4500b57cec5SDimitry Andric     auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
4510b57cec5SDimitry Andric                                 MachineBasicBlock::iterator End) {
4520b57cec5SDimitry Andric       // Scan backwards as we expect these to be relatively short and often find
4530b57cec5SDimitry Andric       // a clobber near the end.
4540b57cec5SDimitry Andric       return llvm::any_of(
4550b57cec5SDimitry Andric           llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) {
4560b57cec5SDimitry Andric             // Flag any instruction (other than the copy we are
4570b57cec5SDimitry Andric             // currently rewriting) that defs EFLAGS.
4580b57cec5SDimitry Andric             return &MI != CopyI && MI.findRegisterDefOperand(X86::EFLAGS);
4590b57cec5SDimitry Andric           });
4600b57cec5SDimitry Andric     };
4610b57cec5SDimitry Andric     auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
4620b57cec5SDimitry Andric                                     MachineBasicBlock *EndMBB) {
4630b57cec5SDimitry Andric       assert(MDT->dominates(BeginMBB, EndMBB) &&
4640b57cec5SDimitry Andric              "Only support paths down the dominator tree!");
4650b57cec5SDimitry Andric       SmallPtrSet<MachineBasicBlock *, 4> Visited;
4660b57cec5SDimitry Andric       SmallVector<MachineBasicBlock *, 4> Worklist;
4670b57cec5SDimitry Andric       // We terminate at the beginning. No need to scan it.
4680b57cec5SDimitry Andric       Visited.insert(BeginMBB);
4690b57cec5SDimitry Andric       Worklist.push_back(EndMBB);
4700b57cec5SDimitry Andric       do {
4710b57cec5SDimitry Andric         auto *MBB = Worklist.pop_back_val();
4720b57cec5SDimitry Andric         for (auto *PredMBB : MBB->predecessors()) {
4730b57cec5SDimitry Andric           if (!Visited.insert(PredMBB).second)
4740b57cec5SDimitry Andric             continue;
4750b57cec5SDimitry Andric           if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
4760b57cec5SDimitry Andric             return true;
4770b57cec5SDimitry Andric           // Enqueue this block to walk its predecessors.
4780b57cec5SDimitry Andric           Worklist.push_back(PredMBB);
4790b57cec5SDimitry Andric         }
4800b57cec5SDimitry Andric       } while (!Worklist.empty());
4810b57cec5SDimitry Andric       // No clobber found along a path from the begin to end.
4820b57cec5SDimitry Andric       return false;
4830b57cec5SDimitry Andric     };
4840b57cec5SDimitry Andric     while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() &&
4850b57cec5SDimitry Andric            !HasEFLAGSClobber(TestMBB->begin(), TestPos)) {
4860b57cec5SDimitry Andric       // Find the nearest common dominator of the predecessors, as
4870b57cec5SDimitry Andric       // that will be the best candidate to hoist into.
4880b57cec5SDimitry Andric       MachineBasicBlock *HoistMBB =
4890b57cec5SDimitry Andric           std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
4900b57cec5SDimitry Andric                           *TestMBB->pred_begin(),
4910b57cec5SDimitry Andric                           [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
4920b57cec5SDimitry Andric                             return MDT->findNearestCommonDominator(LHS, RHS);
4930b57cec5SDimitry Andric                           });
4940b57cec5SDimitry Andric 
4950b57cec5SDimitry Andric       // Now we need to scan all predecessors that may be reached along paths to
4960b57cec5SDimitry Andric       // the hoist block. A clobber anywhere in any of these blocks the hoist.
4970b57cec5SDimitry Andric       // Note that this even handles loops because we require *no* clobbers.
4980b57cec5SDimitry Andric       if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
4990b57cec5SDimitry Andric         break;
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric       // We also need the terminators to not sneakily clobber flags.
5020b57cec5SDimitry Andric       if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
5030b57cec5SDimitry Andric                            HoistMBB->instr_end()))
5040b57cec5SDimitry Andric         break;
5050b57cec5SDimitry Andric 
5060b57cec5SDimitry Andric       // We found a viable location, hoist our test position to it.
5070b57cec5SDimitry Andric       TestMBB = HoistMBB;
5080b57cec5SDimitry Andric       TestPos = TestMBB->getFirstTerminator()->getIterator();
5090b57cec5SDimitry Andric       // Clear the debug location as it would just be confusing after hoisting.
5100b57cec5SDimitry Andric       TestLoc = DebugLoc();
5110b57cec5SDimitry Andric     }
5120b57cec5SDimitry Andric     LLVM_DEBUG({
5130b57cec5SDimitry Andric       auto DefIt = llvm::find_if(
5140b57cec5SDimitry Andric           llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
5150b57cec5SDimitry Andric           [&](MachineInstr &MI) {
5160b57cec5SDimitry Andric             return MI.findRegisterDefOperand(X86::EFLAGS);
5170b57cec5SDimitry Andric           });
5180b57cec5SDimitry Andric       if (DefIt.base() != TestMBB->instr_begin()) {
5190b57cec5SDimitry Andric         dbgs() << "  Using EFLAGS defined by: ";
5200b57cec5SDimitry Andric         DefIt->dump();
5210b57cec5SDimitry Andric       } else {
5220b57cec5SDimitry Andric         dbgs() << "  Using live-in flags for BB:\n";
5230b57cec5SDimitry Andric         TestMBB->dump();
5240b57cec5SDimitry Andric       }
5250b57cec5SDimitry Andric     });
5260b57cec5SDimitry Andric 
5270b57cec5SDimitry Andric     // While rewriting uses, we buffer jumps and rewrite them in a second pass
5280b57cec5SDimitry Andric     // because doing so will perturb the CFG that we are walking to find the
5290b57cec5SDimitry Andric     // uses in the first place.
5300b57cec5SDimitry Andric     SmallVector<MachineInstr *, 4> JmpIs;
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric     // Gather the condition flags that have already been preserved in
5330b57cec5SDimitry Andric     // registers. We do this from scratch each time as we expect there to be
5340b57cec5SDimitry Andric     // very few of them and we expect to not revisit the same copy definition
5350b57cec5SDimitry Andric     // many times. If either of those change sufficiently we could build a map
5360b57cec5SDimitry Andric     // of these up front instead.
5370b57cec5SDimitry Andric     CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric     // Collect the basic blocks we need to scan. Typically this will just be
5400b57cec5SDimitry Andric     // a single basic block but we may have to scan multiple blocks if the
5410b57cec5SDimitry Andric     // EFLAGS copy lives into successors.
5420b57cec5SDimitry Andric     SmallVector<MachineBasicBlock *, 2> Blocks;
5430b57cec5SDimitry Andric     SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
5440b57cec5SDimitry Andric     Blocks.push_back(&MBB);
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric     do {
5470b57cec5SDimitry Andric       MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
5480b57cec5SDimitry Andric 
5490b57cec5SDimitry Andric       // Track when if/when we find a kill of the flags in this block.
5500b57cec5SDimitry Andric       bool FlagsKilled = false;
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric       // In most cases, we walk from the beginning to the end of the block. But
5530b57cec5SDimitry Andric       // when the block is the same block as the copy is from, we will visit it
5540b57cec5SDimitry Andric       // twice. The first time we start from the copy and go to the end. The
5550b57cec5SDimitry Andric       // second time we start from the beginning and go to the copy. This lets
5560b57cec5SDimitry Andric       // us handle copies inside of cycles.
5570b57cec5SDimitry Andric       // FIXME: This loop is *super* confusing. This is at least in part
5580b57cec5SDimitry Andric       // a symptom of all of this routine needing to be refactored into
5590b57cec5SDimitry Andric       // documentable components. Once done, there may be a better way to write
5600b57cec5SDimitry Andric       // this loop.
5610b57cec5SDimitry Andric       for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB))
5620b57cec5SDimitry Andric                           ? std::next(CopyI->getIterator())
5630b57cec5SDimitry Andric                           : UseMBB.instr_begin(),
5640b57cec5SDimitry Andric                 MIE = UseMBB.instr_end();
5650b57cec5SDimitry Andric            MII != MIE;) {
5660b57cec5SDimitry Andric         MachineInstr &MI = *MII++;
5670b57cec5SDimitry Andric         // If we are in the original copy block and encounter either the copy
5680b57cec5SDimitry Andric         // def or the copy itself, break so that we don't re-process any part of
5690b57cec5SDimitry Andric         // the block or process the instructions in the range that was copied
5700b57cec5SDimitry Andric         // over.
5710b57cec5SDimitry Andric         if (&MI == CopyI || &MI == &CopyDefI) {
5720b57cec5SDimitry Andric           assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
5730b57cec5SDimitry Andric                  "Should only encounter these on the second pass over the "
5740b57cec5SDimitry Andric                  "original block.");
5750b57cec5SDimitry Andric           break;
5760b57cec5SDimitry Andric         }
5770b57cec5SDimitry Andric 
5780b57cec5SDimitry Andric         MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
5790b57cec5SDimitry Andric         if (!FlagUse) {
5800b57cec5SDimitry Andric           if (MI.findRegisterDefOperand(X86::EFLAGS)) {
5810b57cec5SDimitry Andric             // If EFLAGS are defined, it's as-if they were killed. We can stop
5820b57cec5SDimitry Andric             // scanning here.
5830b57cec5SDimitry Andric             //
5840b57cec5SDimitry Andric             // NB!!! Many instructions only modify some flags. LLVM currently
5850b57cec5SDimitry Andric             // models this as clobbering all flags, but if that ever changes
5860b57cec5SDimitry Andric             // this will need to be carefully updated to handle that more
5870b57cec5SDimitry Andric             // complex logic.
5880b57cec5SDimitry Andric             FlagsKilled = true;
5890b57cec5SDimitry Andric             break;
5900b57cec5SDimitry Andric           }
5910b57cec5SDimitry Andric           continue;
5920b57cec5SDimitry Andric         }
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  Rewriting use: "; MI.dump());
5950b57cec5SDimitry Andric 
5960b57cec5SDimitry Andric         // Check the kill flag before we rewrite as that may change it.
5970b57cec5SDimitry Andric         if (FlagUse->isKill())
5980b57cec5SDimitry Andric           FlagsKilled = true;
5990b57cec5SDimitry Andric 
6000b57cec5SDimitry Andric         // Once we encounter a branch, the rest of the instructions must also be
6010b57cec5SDimitry Andric         // branches. We can't rewrite in place here, so we handle them below.
6020b57cec5SDimitry Andric         //
6030b57cec5SDimitry Andric         // Note that we don't have to handle tail calls here, even conditional
6040b57cec5SDimitry Andric         // tail calls, as those are not introduced into the X86 MI until post-RA
6050b57cec5SDimitry Andric         // branch folding or black placement. As a consequence, we get to deal
6060b57cec5SDimitry Andric         // with the simpler formulation of conditional branches followed by tail
6070b57cec5SDimitry Andric         // calls.
6080b57cec5SDimitry Andric         if (X86::getCondFromBranch(MI) != X86::COND_INVALID) {
6090b57cec5SDimitry Andric           auto JmpIt = MI.getIterator();
6100b57cec5SDimitry Andric           do {
6110b57cec5SDimitry Andric             JmpIs.push_back(&*JmpIt);
6120b57cec5SDimitry Andric             ++JmpIt;
6130b57cec5SDimitry Andric           } while (JmpIt != UseMBB.instr_end() &&
6140b57cec5SDimitry Andric                    X86::getCondFromBranch(*JmpIt) !=
6150b57cec5SDimitry Andric                        X86::COND_INVALID);
6160b57cec5SDimitry Andric           break;
6170b57cec5SDimitry Andric         }
6180b57cec5SDimitry Andric 
6190b57cec5SDimitry Andric         // Otherwise we can just rewrite in-place.
6200b57cec5SDimitry Andric         if (X86::getCondFromCMov(MI) != X86::COND_INVALID) {
6210b57cec5SDimitry Andric           rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
6222a168f03SDimitry Andric         } else if (getCondFromFCMOV(MI.getOpcode()) != X86::COND_INVALID) {
6232a168f03SDimitry Andric           rewriteFCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
6240b57cec5SDimitry Andric         } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) {
6250b57cec5SDimitry Andric           rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
6260b57cec5SDimitry Andric         } else if (MI.getOpcode() == TargetOpcode::COPY) {
6270b57cec5SDimitry Andric           rewriteCopy(MI, *FlagUse, CopyDefI);
6280b57cec5SDimitry Andric         } else {
6290b57cec5SDimitry Andric           // We assume all other instructions that use flags also def them.
6300b57cec5SDimitry Andric           assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
6310b57cec5SDimitry Andric                  "Expected a def of EFLAGS for this instruction!");
6320b57cec5SDimitry Andric 
6330b57cec5SDimitry Andric           // NB!!! Several arithmetic instructions only *partially* update
6340b57cec5SDimitry Andric           // flags. Theoretically, we could generate MI code sequences that
6350b57cec5SDimitry Andric           // would rely on this fact and observe different flags independently.
6360b57cec5SDimitry Andric           // But currently LLVM models all of these instructions as clobbering
6370b57cec5SDimitry Andric           // all the flags in an undef way. We rely on that to simplify the
6380b57cec5SDimitry Andric           // logic.
6390b57cec5SDimitry Andric           FlagsKilled = true;
6400b57cec5SDimitry Andric 
6410b57cec5SDimitry Andric           switch (MI.getOpcode()) {
6420b57cec5SDimitry Andric           case X86::SETB_C8r:
6430b57cec5SDimitry Andric           case X86::SETB_C16r:
6440b57cec5SDimitry Andric           case X86::SETB_C32r:
6450b57cec5SDimitry Andric           case X86::SETB_C64r:
6460b57cec5SDimitry Andric             // Use custom lowering for arithmetic that is merely extending the
6470b57cec5SDimitry Andric             // carry flag. We model this as the SETB_C* pseudo instructions.
6480b57cec5SDimitry Andric             rewriteSetCarryExtended(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
6490b57cec5SDimitry Andric                                     CondRegs);
6500b57cec5SDimitry Andric             break;
6510b57cec5SDimitry Andric 
6520b57cec5SDimitry Andric           default:
6530b57cec5SDimitry Andric             // Generically handle remaining uses as arithmetic instructions.
6540b57cec5SDimitry Andric             rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
6550b57cec5SDimitry Andric                               CondRegs);
6560b57cec5SDimitry Andric             break;
6570b57cec5SDimitry Andric           }
6580b57cec5SDimitry Andric           break;
6590b57cec5SDimitry Andric         }
6600b57cec5SDimitry Andric 
6610b57cec5SDimitry Andric         // If this was the last use of the flags, we're done.
6620b57cec5SDimitry Andric         if (FlagsKilled)
6630b57cec5SDimitry Andric           break;
6640b57cec5SDimitry Andric       }
6650b57cec5SDimitry Andric 
6660b57cec5SDimitry Andric       // If the flags were killed, we're done with this block.
6670b57cec5SDimitry Andric       if (FlagsKilled)
6680b57cec5SDimitry Andric         continue;
6690b57cec5SDimitry Andric 
6700b57cec5SDimitry Andric       // Otherwise we need to scan successors for ones where the flags live-in
6710b57cec5SDimitry Andric       // and queue those up for processing.
6720b57cec5SDimitry Andric       for (MachineBasicBlock *SuccMBB : UseMBB.successors())
6730b57cec5SDimitry Andric         if (SuccMBB->isLiveIn(X86::EFLAGS) &&
6740b57cec5SDimitry Andric             VisitedBlocks.insert(SuccMBB).second) {
6750b57cec5SDimitry Andric           // We currently don't do any PHI insertion and so we require that the
6760b57cec5SDimitry Andric           // test basic block dominates all of the use basic blocks. Further, we
6770b57cec5SDimitry Andric           // can't have a cycle from the test block back to itself as that would
6780b57cec5SDimitry Andric           // create a cycle requiring a PHI to break it.
6790b57cec5SDimitry Andric           //
6800b57cec5SDimitry Andric           // We could in theory do PHI insertion here if it becomes useful by
6810b57cec5SDimitry Andric           // just taking undef values in along every edge that we don't trace
6820b57cec5SDimitry Andric           // this EFLAGS copy along. This isn't as bad as fully general PHI
6830b57cec5SDimitry Andric           // insertion, but still seems like a great deal of complexity.
6840b57cec5SDimitry Andric           //
6850b57cec5SDimitry Andric           // Because it is theoretically possible that some earlier MI pass or
6860b57cec5SDimitry Andric           // other lowering transformation could induce this to happen, we do
6870b57cec5SDimitry Andric           // a hard check even in non-debug builds here.
6880b57cec5SDimitry Andric           if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
6890b57cec5SDimitry Andric             LLVM_DEBUG({
6900b57cec5SDimitry Andric               dbgs()
6910b57cec5SDimitry Andric                   << "ERROR: Encountered use that is not dominated by our test "
6920b57cec5SDimitry Andric                      "basic block! Rewriting this would require inserting PHI "
6930b57cec5SDimitry Andric                      "nodes to track the flag state across the CFG.\n\nTest "
6940b57cec5SDimitry Andric                      "block:\n";
6950b57cec5SDimitry Andric               TestMBB->dump();
6960b57cec5SDimitry Andric               dbgs() << "Use block:\n";
6970b57cec5SDimitry Andric               SuccMBB->dump();
6980b57cec5SDimitry Andric             });
6990b57cec5SDimitry Andric             report_fatal_error(
7000b57cec5SDimitry Andric                 "Cannot lower EFLAGS copy when original copy def "
7010b57cec5SDimitry Andric                 "does not dominate all uses.");
7020b57cec5SDimitry Andric           }
7030b57cec5SDimitry Andric 
7040b57cec5SDimitry Andric           Blocks.push_back(SuccMBB);
705*480093f4SDimitry Andric 
706*480093f4SDimitry Andric           // After this, EFLAGS will be recreated before each use.
707*480093f4SDimitry Andric           SuccMBB->removeLiveIn(X86::EFLAGS);
7080b57cec5SDimitry Andric         }
7090b57cec5SDimitry Andric     } while (!Blocks.empty());
7100b57cec5SDimitry Andric 
7110b57cec5SDimitry Andric     // Now rewrite the jumps that use the flags. These we handle specially
7120b57cec5SDimitry Andric     // because if there are multiple jumps in a single basic block we'll have
7130b57cec5SDimitry Andric     // to do surgery on the CFG.
7140b57cec5SDimitry Andric     MachineBasicBlock *LastJmpMBB = nullptr;
7150b57cec5SDimitry Andric     for (MachineInstr *JmpI : JmpIs) {
7160b57cec5SDimitry Andric       // Past the first jump within a basic block we need to split the blocks
7170b57cec5SDimitry Andric       // apart.
7180b57cec5SDimitry Andric       if (JmpI->getParent() == LastJmpMBB)
7190b57cec5SDimitry Andric         splitBlock(*JmpI->getParent(), *JmpI, *TII);
7200b57cec5SDimitry Andric       else
7210b57cec5SDimitry Andric         LastJmpMBB = JmpI->getParent();
7220b57cec5SDimitry Andric 
7230b57cec5SDimitry Andric       rewriteCondJmp(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
7240b57cec5SDimitry Andric     }
7250b57cec5SDimitry Andric 
7260b57cec5SDimitry Andric     // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
7270b57cec5SDimitry Andric     // the copy's def operand is itself a kill.
7280b57cec5SDimitry Andric   }
7290b57cec5SDimitry Andric 
7300b57cec5SDimitry Andric #ifndef NDEBUG
7310b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF)
7320b57cec5SDimitry Andric     for (MachineInstr &MI : MBB)
7330b57cec5SDimitry Andric       if (MI.getOpcode() == TargetOpcode::COPY &&
7340b57cec5SDimitry Andric           (MI.getOperand(0).getReg() == X86::EFLAGS ||
7350b57cec5SDimitry Andric            MI.getOperand(1).getReg() == X86::EFLAGS)) {
7360b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
7370b57cec5SDimitry Andric                    MI.dump());
7380b57cec5SDimitry Andric         llvm_unreachable("Unlowered EFLAGS copy!");
7390b57cec5SDimitry Andric       }
7400b57cec5SDimitry Andric #endif
7410b57cec5SDimitry Andric 
7420b57cec5SDimitry Andric   return true;
7430b57cec5SDimitry Andric }
7440b57cec5SDimitry Andric 
7450b57cec5SDimitry Andric /// Collect any conditions that have already been set in registers so that we
7460b57cec5SDimitry Andric /// can re-use them rather than adding duplicates.
7470b57cec5SDimitry Andric CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
7480b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) {
7490b57cec5SDimitry Andric   CondRegArray CondRegs = {};
7500b57cec5SDimitry Andric 
7510b57cec5SDimitry Andric   // Scan backwards across the range of instructions with live EFLAGS.
7520b57cec5SDimitry Andric   for (MachineInstr &MI :
7530b57cec5SDimitry Andric        llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
7540b57cec5SDimitry Andric     X86::CondCode Cond = X86::getCondFromSETCC(MI);
7558bcb0991SDimitry Andric     if (Cond != X86::COND_INVALID && !MI.mayStore() &&
7568bcb0991SDimitry Andric         MI.getOperand(0).isReg() &&
7578bcb0991SDimitry Andric         Register::isVirtualRegister(MI.getOperand(0).getReg())) {
7580b57cec5SDimitry Andric       assert(MI.getOperand(0).isDef() &&
7590b57cec5SDimitry Andric              "A non-storing SETcc should always define a register!");
7600b57cec5SDimitry Andric       CondRegs[Cond] = MI.getOperand(0).getReg();
7610b57cec5SDimitry Andric     }
7620b57cec5SDimitry Andric 
7630b57cec5SDimitry Andric     // Stop scanning when we see the first definition of the EFLAGS as prior to
7640b57cec5SDimitry Andric     // this we would potentially capture the wrong flag state.
7650b57cec5SDimitry Andric     if (MI.findRegisterDefOperand(X86::EFLAGS))
7660b57cec5SDimitry Andric       break;
7670b57cec5SDimitry Andric   }
7680b57cec5SDimitry Andric   return CondRegs;
7690b57cec5SDimitry Andric }
7700b57cec5SDimitry Andric 
7710b57cec5SDimitry Andric unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
7720b57cec5SDimitry Andric     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
7730b57cec5SDimitry Andric     DebugLoc TestLoc, X86::CondCode Cond) {
7748bcb0991SDimitry Andric   Register Reg = MRI->createVirtualRegister(PromoteRC);
7750b57cec5SDimitry Andric   auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
7760b57cec5SDimitry Andric                       TII->get(X86::SETCCr), Reg).addImm(Cond);
7770b57cec5SDimitry Andric   (void)SetI;
7780b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "    save cond: "; SetI->dump());
7790b57cec5SDimitry Andric   ++NumSetCCsInserted;
7800b57cec5SDimitry Andric   return Reg;
7810b57cec5SDimitry Andric }
7820b57cec5SDimitry Andric 
7830b57cec5SDimitry Andric std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
7840b57cec5SDimitry Andric     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
7850b57cec5SDimitry Andric     DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
7860b57cec5SDimitry Andric   unsigned &CondReg = CondRegs[Cond];
7870b57cec5SDimitry Andric   unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
7880b57cec5SDimitry Andric   if (!CondReg && !InvCondReg)
7890b57cec5SDimitry Andric     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
7900b57cec5SDimitry Andric 
7910b57cec5SDimitry Andric   if (CondReg)
7920b57cec5SDimitry Andric     return {CondReg, false};
7930b57cec5SDimitry Andric   else
7940b57cec5SDimitry Andric     return {InvCondReg, true};
7950b57cec5SDimitry Andric }
7960b57cec5SDimitry Andric 
7970b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
7980b57cec5SDimitry Andric                                           MachineBasicBlock::iterator Pos,
7990b57cec5SDimitry Andric                                           DebugLoc Loc, unsigned Reg) {
8000b57cec5SDimitry Andric   auto TestI =
8010b57cec5SDimitry Andric       BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
8020b57cec5SDimitry Andric   (void)TestI;
8030b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "    test cond: "; TestI->dump());
8040b57cec5SDimitry Andric   ++NumTestsInserted;
8050b57cec5SDimitry Andric }
8060b57cec5SDimitry Andric 
8070b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteArithmetic(
8080b57cec5SDimitry Andric     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
8090b57cec5SDimitry Andric     DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse,
8100b57cec5SDimitry Andric     CondRegArray &CondRegs) {
8110b57cec5SDimitry Andric   // Arithmetic is either reading CF or OF. Figure out which condition we need
8120b57cec5SDimitry Andric   // to preserve in a register.
813*480093f4SDimitry Andric   X86::CondCode Cond = X86::COND_INVALID;
8140b57cec5SDimitry Andric 
8150b57cec5SDimitry Andric   // The addend to use to reset CF or OF when added to the flag value.
816*480093f4SDimitry Andric   int Addend = 0;
8170b57cec5SDimitry Andric 
8180b57cec5SDimitry Andric   switch (getMnemonicFromOpcode(MI.getOpcode())) {
8190b57cec5SDimitry Andric   case FlagArithMnemonic::ADC:
8200b57cec5SDimitry Andric   case FlagArithMnemonic::ADCX:
8210b57cec5SDimitry Andric   case FlagArithMnemonic::RCL:
8220b57cec5SDimitry Andric   case FlagArithMnemonic::RCR:
8230b57cec5SDimitry Andric   case FlagArithMnemonic::SBB:
8240b57cec5SDimitry Andric     Cond = X86::COND_B; // CF == 1
8250b57cec5SDimitry Andric     // Set up an addend that when one is added will need a carry due to not
8260b57cec5SDimitry Andric     // having a higher bit available.
8270b57cec5SDimitry Andric     Addend = 255;
8280b57cec5SDimitry Andric     break;
8290b57cec5SDimitry Andric 
8300b57cec5SDimitry Andric   case FlagArithMnemonic::ADOX:
8310b57cec5SDimitry Andric     Cond = X86::COND_O; // OF == 1
8320b57cec5SDimitry Andric     // Set up an addend that when one is added will turn from positive to
8330b57cec5SDimitry Andric     // negative and thus overflow in the signed domain.
8340b57cec5SDimitry Andric     Addend = 127;
8350b57cec5SDimitry Andric     break;
8360b57cec5SDimitry Andric   }
8370b57cec5SDimitry Andric 
8380b57cec5SDimitry Andric   // Now get a register that contains the value of the flag input to the
8390b57cec5SDimitry Andric   // arithmetic. We require exactly this flag to simplify the arithmetic
8400b57cec5SDimitry Andric   // required to materialize it back into the flag.
8410b57cec5SDimitry Andric   unsigned &CondReg = CondRegs[Cond];
8420b57cec5SDimitry Andric   if (!CondReg)
8430b57cec5SDimitry Andric     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
8440b57cec5SDimitry Andric 
8450b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
8460b57cec5SDimitry Andric 
8470b57cec5SDimitry Andric   // Insert an instruction that will set the flag back to the desired value.
8488bcb0991SDimitry Andric   Register TmpReg = MRI->createVirtualRegister(PromoteRC);
8490b57cec5SDimitry Andric   auto AddI =
8500b57cec5SDimitry Andric       BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri))
8510b57cec5SDimitry Andric           .addDef(TmpReg, RegState::Dead)
8520b57cec5SDimitry Andric           .addReg(CondReg)
8530b57cec5SDimitry Andric           .addImm(Addend);
8540b57cec5SDimitry Andric   (void)AddI;
8550b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "    add cond: "; AddI->dump());
8560b57cec5SDimitry Andric   ++NumAddsInserted;
8570b57cec5SDimitry Andric   FlagUse.setIsKill(true);
8580b57cec5SDimitry Andric }
8590b57cec5SDimitry Andric 
8600b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
8610b57cec5SDimitry Andric                                            MachineBasicBlock::iterator TestPos,
8620b57cec5SDimitry Andric                                            DebugLoc TestLoc,
8630b57cec5SDimitry Andric                                            MachineInstr &CMovI,
8640b57cec5SDimitry Andric                                            MachineOperand &FlagUse,
8650b57cec5SDimitry Andric                                            CondRegArray &CondRegs) {
8660b57cec5SDimitry Andric   // First get the register containing this specific condition.
8670b57cec5SDimitry Andric   X86::CondCode Cond = X86::getCondFromCMov(CMovI);
8680b57cec5SDimitry Andric   unsigned CondReg;
8690b57cec5SDimitry Andric   bool Inverted;
8700b57cec5SDimitry Andric   std::tie(CondReg, Inverted) =
8710b57cec5SDimitry Andric       getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
8720b57cec5SDimitry Andric 
8730b57cec5SDimitry Andric   MachineBasicBlock &MBB = *CMovI.getParent();
8740b57cec5SDimitry Andric 
8750b57cec5SDimitry Andric   // Insert a direct test of the saved register.
8760b57cec5SDimitry Andric   insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
8770b57cec5SDimitry Andric 
8780b57cec5SDimitry Andric   // Rewrite the CMov to use the !ZF flag from the test, and then kill its use
8790b57cec5SDimitry Andric   // of the flags afterward.
8800b57cec5SDimitry Andric   CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1)
8810b57cec5SDimitry Andric       .setImm(Inverted ? X86::COND_E : X86::COND_NE);
8820b57cec5SDimitry Andric   FlagUse.setIsKill(true);
8830b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "    fixed cmov: "; CMovI.dump());
8840b57cec5SDimitry Andric }
8850b57cec5SDimitry Andric 
8862a168f03SDimitry Andric void X86FlagsCopyLoweringPass::rewriteFCMov(MachineBasicBlock &TestMBB,
8872a168f03SDimitry Andric                                             MachineBasicBlock::iterator TestPos,
8882a168f03SDimitry Andric                                             DebugLoc TestLoc,
8892a168f03SDimitry Andric                                             MachineInstr &CMovI,
8902a168f03SDimitry Andric                                             MachineOperand &FlagUse,
8912a168f03SDimitry Andric                                             CondRegArray &CondRegs) {
8922a168f03SDimitry Andric   // First get the register containing this specific condition.
8932a168f03SDimitry Andric   X86::CondCode Cond = getCondFromFCMOV(CMovI.getOpcode());
8942a168f03SDimitry Andric   unsigned CondReg;
8952a168f03SDimitry Andric   bool Inverted;
8962a168f03SDimitry Andric   std::tie(CondReg, Inverted) =
8972a168f03SDimitry Andric       getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
8982a168f03SDimitry Andric 
8992a168f03SDimitry Andric   MachineBasicBlock &MBB = *CMovI.getParent();
9002a168f03SDimitry Andric 
9012a168f03SDimitry Andric   // Insert a direct test of the saved register.
9022a168f03SDimitry Andric   insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
9032a168f03SDimitry Andric 
9042a168f03SDimitry Andric   auto getFCMOVOpcode = [](unsigned Opcode, bool Inverted) {
9052a168f03SDimitry Andric     switch (Opcode) {
9062a168f03SDimitry Andric     default: llvm_unreachable("Unexpected opcode!");
9072a168f03SDimitry Andric     case X86::CMOVBE_Fp32: case X86::CMOVNBE_Fp32:
9082a168f03SDimitry Andric     case X86::CMOVB_Fp32:  case X86::CMOVNB_Fp32:
9092a168f03SDimitry Andric     case X86::CMOVE_Fp32:  case X86::CMOVNE_Fp32:
9102a168f03SDimitry Andric     case X86::CMOVP_Fp32:  case X86::CMOVNP_Fp32:
9112a168f03SDimitry Andric       return Inverted ? X86::CMOVE_Fp32 : X86::CMOVNE_Fp32;
9122a168f03SDimitry Andric     case X86::CMOVBE_Fp64: case X86::CMOVNBE_Fp64:
9132a168f03SDimitry Andric     case X86::CMOVB_Fp64:  case X86::CMOVNB_Fp64:
9142a168f03SDimitry Andric     case X86::CMOVE_Fp64:  case X86::CMOVNE_Fp64:
9152a168f03SDimitry Andric     case X86::CMOVP_Fp64:  case X86::CMOVNP_Fp64:
9162a168f03SDimitry Andric       return Inverted ? X86::CMOVE_Fp64 : X86::CMOVNE_Fp64;
9172a168f03SDimitry Andric     case X86::CMOVBE_Fp80: case X86::CMOVNBE_Fp80:
9182a168f03SDimitry Andric     case X86::CMOVB_Fp80:  case X86::CMOVNB_Fp80:
9192a168f03SDimitry Andric     case X86::CMOVE_Fp80:  case X86::CMOVNE_Fp80:
9202a168f03SDimitry Andric     case X86::CMOVP_Fp80:  case X86::CMOVNP_Fp80:
9212a168f03SDimitry Andric       return Inverted ? X86::CMOVE_Fp80 : X86::CMOVNE_Fp80;
9222a168f03SDimitry Andric     }
9232a168f03SDimitry Andric   };
9242a168f03SDimitry Andric 
9252a168f03SDimitry Andric   // Rewrite the CMov to use the !ZF flag from the test.
9262a168f03SDimitry Andric   CMovI.setDesc(TII->get(getFCMOVOpcode(CMovI.getOpcode(), Inverted)));
9272a168f03SDimitry Andric   FlagUse.setIsKill(true);
9282a168f03SDimitry Andric   LLVM_DEBUG(dbgs() << "    fixed fcmov: "; CMovI.dump());
9292a168f03SDimitry Andric }
9302a168f03SDimitry Andric 
9310b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteCondJmp(
9320b57cec5SDimitry Andric     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
9330b57cec5SDimitry Andric     DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
9340b57cec5SDimitry Andric   // First get the register containing this specific condition.
9350b57cec5SDimitry Andric   X86::CondCode Cond = X86::getCondFromBranch(JmpI);
9360b57cec5SDimitry Andric   unsigned CondReg;
9370b57cec5SDimitry Andric   bool Inverted;
9380b57cec5SDimitry Andric   std::tie(CondReg, Inverted) =
9390b57cec5SDimitry Andric       getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
9400b57cec5SDimitry Andric 
9410b57cec5SDimitry Andric   MachineBasicBlock &JmpMBB = *JmpI.getParent();
9420b57cec5SDimitry Andric 
9430b57cec5SDimitry Andric   // Insert a direct test of the saved register.
9440b57cec5SDimitry Andric   insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg);
9450b57cec5SDimitry Andric 
9460b57cec5SDimitry Andric   // Rewrite the jump to use the !ZF flag from the test, and kill its use of
9470b57cec5SDimitry Andric   // flags afterward.
9480b57cec5SDimitry Andric   JmpI.getOperand(1).setImm(Inverted ? X86::COND_E : X86::COND_NE);
9490b57cec5SDimitry Andric   JmpI.findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
9500b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "    fixed jCC: "; JmpI.dump());
9510b57cec5SDimitry Andric }
9520b57cec5SDimitry Andric 
9530b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
9540b57cec5SDimitry Andric                                            MachineOperand &FlagUse,
9550b57cec5SDimitry Andric                                            MachineInstr &CopyDefI) {
9560b57cec5SDimitry Andric   // Just replace this copy with the original copy def.
9570b57cec5SDimitry Andric   MRI->replaceRegWith(MI.getOperand(0).getReg(),
9580b57cec5SDimitry Andric                       CopyDefI.getOperand(0).getReg());
9590b57cec5SDimitry Andric   MI.eraseFromParent();
9600b57cec5SDimitry Andric }
9610b57cec5SDimitry Andric 
9620b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
9630b57cec5SDimitry Andric     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
9640b57cec5SDimitry Andric     DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
9650b57cec5SDimitry Andric     CondRegArray &CondRegs) {
9660b57cec5SDimitry Andric   // This routine is only used to handle pseudos for setting a register to zero
9670b57cec5SDimitry Andric   // or all ones based on CF. This is essentially the sign extended from 1-bit
9680b57cec5SDimitry Andric   // form of SETB and modeled with the SETB_C* pseudos. They require special
9690b57cec5SDimitry Andric   // handling as they aren't normal SETcc instructions and are lowered to an
9700b57cec5SDimitry Andric   // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
9710b57cec5SDimitry Andric   // they are only provided in reg-defining forms. A complicating factor is that
9720b57cec5SDimitry Andric   // they can define many different register widths.
9730b57cec5SDimitry Andric   assert(SetBI.getOperand(0).isReg() &&
9740b57cec5SDimitry Andric          "Cannot have a non-register defined operand to this variant of SETB!");
9750b57cec5SDimitry Andric 
9760b57cec5SDimitry Andric   // Little helper to do the common final step of replacing the register def'ed
9770b57cec5SDimitry Andric   // by this SETB instruction with a new register and removing the SETB
9780b57cec5SDimitry Andric   // instruction.
9790b57cec5SDimitry Andric   auto RewriteToReg = [&](unsigned Reg) {
9800b57cec5SDimitry Andric     MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
9810b57cec5SDimitry Andric     SetBI.eraseFromParent();
9820b57cec5SDimitry Andric   };
9830b57cec5SDimitry Andric 
9840b57cec5SDimitry Andric   // Grab the register class used for this particular instruction.
9850b57cec5SDimitry Andric   auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
9860b57cec5SDimitry Andric 
9870b57cec5SDimitry Andric   MachineBasicBlock &MBB = *SetBI.getParent();
9880b57cec5SDimitry Andric   auto SetPos = SetBI.getIterator();
9890b57cec5SDimitry Andric   auto SetLoc = SetBI.getDebugLoc();
9900b57cec5SDimitry Andric 
9910b57cec5SDimitry Andric   auto AdjustReg = [&](unsigned Reg) {
9920b57cec5SDimitry Andric     auto &OrigRC = *MRI->getRegClass(Reg);
9930b57cec5SDimitry Andric     if (&OrigRC == &SetBRC)
9940b57cec5SDimitry Andric       return Reg;
9950b57cec5SDimitry Andric 
9960b57cec5SDimitry Andric     unsigned NewReg;
9970b57cec5SDimitry Andric 
9980b57cec5SDimitry Andric     int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
9990b57cec5SDimitry Andric     int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
10000b57cec5SDimitry Andric     assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
10010b57cec5SDimitry Andric     assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
10020b57cec5SDimitry Andric     int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
10030b57cec5SDimitry Andric                        X86::NoSubRegister, X86::sub_32bit};
10040b57cec5SDimitry Andric 
10050b57cec5SDimitry Andric     // If the original size is smaller than the target *and* is smaller than 4
10060b57cec5SDimitry Andric     // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
10070b57cec5SDimitry Andric     // to maximize the chance of being able to CSE that operation and to avoid
10080b57cec5SDimitry Andric     // partial dependency stalls extending to 2-bytes.
10090b57cec5SDimitry Andric     if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
10100b57cec5SDimitry Andric       NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
10110b57cec5SDimitry Andric       BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
10120b57cec5SDimitry Andric           .addReg(Reg);
10130b57cec5SDimitry Andric       if (&SetBRC == &X86::GR32RegClass)
10140b57cec5SDimitry Andric         return NewReg;
10150b57cec5SDimitry Andric       Reg = NewReg;
10160b57cec5SDimitry Andric       OrigRegSize = 4;
10170b57cec5SDimitry Andric     }
10180b57cec5SDimitry Andric 
10190b57cec5SDimitry Andric     NewReg = MRI->createVirtualRegister(&SetBRC);
10200b57cec5SDimitry Andric     if (OrigRegSize < TargetRegSize) {
10210b57cec5SDimitry Andric       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
10220b57cec5SDimitry Andric               NewReg)
10230b57cec5SDimitry Andric           .addImm(0)
10240b57cec5SDimitry Andric           .addReg(Reg)
10250b57cec5SDimitry Andric           .addImm(SubRegIdx[OrigRegSize]);
10260b57cec5SDimitry Andric     } else if (OrigRegSize > TargetRegSize) {
10270b57cec5SDimitry Andric       if (TargetRegSize == 1 && !Subtarget->is64Bit()) {
10280b57cec5SDimitry Andric         // Need to constrain the register class.
10290b57cec5SDimitry Andric         MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass);
10300b57cec5SDimitry Andric       }
10310b57cec5SDimitry Andric 
10320b57cec5SDimitry Andric       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY),
10330b57cec5SDimitry Andric               NewReg)
10340b57cec5SDimitry Andric           .addReg(Reg, 0, SubRegIdx[TargetRegSize]);
10350b57cec5SDimitry Andric     } else {
10360b57cec5SDimitry Andric       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
10370b57cec5SDimitry Andric           .addReg(Reg);
10380b57cec5SDimitry Andric     }
10390b57cec5SDimitry Andric     return NewReg;
10400b57cec5SDimitry Andric   };
10410b57cec5SDimitry Andric 
10420b57cec5SDimitry Andric   unsigned &CondReg = CondRegs[X86::COND_B];
10430b57cec5SDimitry Andric   if (!CondReg)
10440b57cec5SDimitry Andric     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
10450b57cec5SDimitry Andric 
10460b57cec5SDimitry Andric   // Adjust the condition to have the desired register width by zero-extending
10470b57cec5SDimitry Andric   // as needed.
10480b57cec5SDimitry Andric   // FIXME: We should use a better API to avoid the local reference and using a
10490b57cec5SDimitry Andric   // different variable here.
10500b57cec5SDimitry Andric   unsigned ExtCondReg = AdjustReg(CondReg);
10510b57cec5SDimitry Andric 
10520b57cec5SDimitry Andric   // Now we need to turn this into a bitmask. We do this by subtracting it from
10530b57cec5SDimitry Andric   // zero.
10548bcb0991SDimitry Andric   Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
10550b57cec5SDimitry Andric   BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
10560b57cec5SDimitry Andric   ZeroReg = AdjustReg(ZeroReg);
10570b57cec5SDimitry Andric 
10580b57cec5SDimitry Andric   unsigned Sub;
10590b57cec5SDimitry Andric   switch (SetBI.getOpcode()) {
10600b57cec5SDimitry Andric   case X86::SETB_C8r:
10610b57cec5SDimitry Andric     Sub = X86::SUB8rr;
10620b57cec5SDimitry Andric     break;
10630b57cec5SDimitry Andric 
10640b57cec5SDimitry Andric   case X86::SETB_C16r:
10650b57cec5SDimitry Andric     Sub = X86::SUB16rr;
10660b57cec5SDimitry Andric     break;
10670b57cec5SDimitry Andric 
10680b57cec5SDimitry Andric   case X86::SETB_C32r:
10690b57cec5SDimitry Andric     Sub = X86::SUB32rr;
10700b57cec5SDimitry Andric     break;
10710b57cec5SDimitry Andric 
10720b57cec5SDimitry Andric   case X86::SETB_C64r:
10730b57cec5SDimitry Andric     Sub = X86::SUB64rr;
10740b57cec5SDimitry Andric     break;
10750b57cec5SDimitry Andric 
10760b57cec5SDimitry Andric   default:
10770b57cec5SDimitry Andric     llvm_unreachable("Invalid SETB_C* opcode!");
10780b57cec5SDimitry Andric   }
10798bcb0991SDimitry Andric   Register ResultReg = MRI->createVirtualRegister(&SetBRC);
10800b57cec5SDimitry Andric   BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
10810b57cec5SDimitry Andric       .addReg(ZeroReg)
10820b57cec5SDimitry Andric       .addReg(ExtCondReg);
10830b57cec5SDimitry Andric   return RewriteToReg(ResultReg);
10840b57cec5SDimitry Andric }
10850b57cec5SDimitry Andric 
10860b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
10870b57cec5SDimitry Andric                                             MachineBasicBlock::iterator TestPos,
10880b57cec5SDimitry Andric                                             DebugLoc TestLoc,
10890b57cec5SDimitry Andric                                             MachineInstr &SetCCI,
10900b57cec5SDimitry Andric                                             MachineOperand &FlagUse,
10910b57cec5SDimitry Andric                                             CondRegArray &CondRegs) {
10920b57cec5SDimitry Andric   X86::CondCode Cond = X86::getCondFromSETCC(SetCCI);
10930b57cec5SDimitry Andric   // Note that we can't usefully rewrite this to the inverse without complex
10940b57cec5SDimitry Andric   // analysis of the users of the setCC. Largely we rely on duplicates which
10950b57cec5SDimitry Andric   // could have been avoided already being avoided here.
10960b57cec5SDimitry Andric   unsigned &CondReg = CondRegs[Cond];
10970b57cec5SDimitry Andric   if (!CondReg)
10980b57cec5SDimitry Andric     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
10990b57cec5SDimitry Andric 
11000b57cec5SDimitry Andric   // Rewriting a register def is trivial: we just replace the register and
11010b57cec5SDimitry Andric   // remove the setcc.
11020b57cec5SDimitry Andric   if (!SetCCI.mayStore()) {
11030b57cec5SDimitry Andric     assert(SetCCI.getOperand(0).isReg() &&
11040b57cec5SDimitry Andric            "Cannot have a non-register defined operand to SETcc!");
11050b57cec5SDimitry Andric     MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg);
11060b57cec5SDimitry Andric     SetCCI.eraseFromParent();
11070b57cec5SDimitry Andric     return;
11080b57cec5SDimitry Andric   }
11090b57cec5SDimitry Andric 
11100b57cec5SDimitry Andric   // Otherwise, we need to emit a store.
11110b57cec5SDimitry Andric   auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(),
11120b57cec5SDimitry Andric                      SetCCI.getDebugLoc(), TII->get(X86::MOV8mr));
11130b57cec5SDimitry Andric   // Copy the address operands.
11140b57cec5SDimitry Andric   for (int i = 0; i < X86::AddrNumOperands; ++i)
11150b57cec5SDimitry Andric     MIB.add(SetCCI.getOperand(i));
11160b57cec5SDimitry Andric 
11170b57cec5SDimitry Andric   MIB.addReg(CondReg);
11180b57cec5SDimitry Andric 
11190b57cec5SDimitry Andric   MIB.setMemRefs(SetCCI.memoperands());
11200b57cec5SDimitry Andric 
11210b57cec5SDimitry Andric   SetCCI.eraseFromParent();
11220b57cec5SDimitry Andric   return;
11230b57cec5SDimitry Andric }
1124