xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric /// \file
9*0b57cec5SDimitry Andric ///
10*0b57cec5SDimitry Andric /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
11*0b57cec5SDimitry Andric /// flag bits.
12*0b57cec5SDimitry Andric ///
13*0b57cec5SDimitry Andric /// We have to do this by carefully analyzing and rewriting the usage of the
14*0b57cec5SDimitry Andric /// copied EFLAGS register because there is no general way to rematerialize the
15*0b57cec5SDimitry Andric /// entire EFLAGS register safely and efficiently. Using `popf` both forces
16*0b57cec5SDimitry Andric /// dynamic stack adjustment and can create correctness issues due to IF, TF,
17*0b57cec5SDimitry Andric /// and other non-status flags being overwritten. Using sequences involving
18*0b57cec5SDimitry Andric /// SAHF don't work on all x86 processors and are often quite slow compared to
19*0b57cec5SDimitry Andric /// directly testing a single status preserved in its own GPR.
20*0b57cec5SDimitry Andric ///
21*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
22*0b57cec5SDimitry Andric 
23*0b57cec5SDimitry Andric #include "X86.h"
24*0b57cec5SDimitry Andric #include "X86InstrBuilder.h"
25*0b57cec5SDimitry Andric #include "X86InstrInfo.h"
26*0b57cec5SDimitry Andric #include "X86Subtarget.h"
27*0b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
28*0b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
29*0b57cec5SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
30*0b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
31*0b57cec5SDimitry Andric #include "llvm/ADT/ScopeExit.h"
32*0b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
33*0b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h"
34*0b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
35*0b57cec5SDimitry Andric #include "llvm/ADT/SparseBitVector.h"
36*0b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
37*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
38*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h"
39*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
40*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
41*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
42*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
43*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
44*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
45*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
46*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
47*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineSSAUpdater.h"
48*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
49*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
50*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h"
51*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
52*0b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
53*0b57cec5SDimitry Andric #include "llvm/MC/MCSchedule.h"
54*0b57cec5SDimitry Andric #include "llvm/Pass.h"
55*0b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
56*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
57*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
58*0b57cec5SDimitry Andric #include <algorithm>
59*0b57cec5SDimitry Andric #include <cassert>
60*0b57cec5SDimitry Andric #include <iterator>
61*0b57cec5SDimitry Andric #include <utility>
62*0b57cec5SDimitry Andric 
63*0b57cec5SDimitry Andric using namespace llvm;
64*0b57cec5SDimitry Andric 
65*0b57cec5SDimitry Andric #define PASS_KEY "x86-flags-copy-lowering"
66*0b57cec5SDimitry Andric #define DEBUG_TYPE PASS_KEY
67*0b57cec5SDimitry Andric 
68*0b57cec5SDimitry Andric STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
69*0b57cec5SDimitry Andric STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
70*0b57cec5SDimitry Andric STATISTIC(NumTestsInserted, "Number of test instructions inserted");
71*0b57cec5SDimitry Andric STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
72*0b57cec5SDimitry Andric 
73*0b57cec5SDimitry Andric namespace {
74*0b57cec5SDimitry Andric 
75*0b57cec5SDimitry Andric // Convenient array type for storing registers associated with each condition.
76*0b57cec5SDimitry Andric using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
77*0b57cec5SDimitry Andric 
78*0b57cec5SDimitry Andric class X86FlagsCopyLoweringPass : public MachineFunctionPass {
79*0b57cec5SDimitry Andric public:
80*0b57cec5SDimitry Andric   X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { }
81*0b57cec5SDimitry Andric 
82*0b57cec5SDimitry Andric   StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
83*0b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
84*0b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
85*0b57cec5SDimitry Andric 
86*0b57cec5SDimitry Andric   /// Pass identification, replacement for typeid.
87*0b57cec5SDimitry Andric   static char ID;
88*0b57cec5SDimitry Andric 
89*0b57cec5SDimitry Andric private:
90*0b57cec5SDimitry Andric   MachineRegisterInfo *MRI;
91*0b57cec5SDimitry Andric   const X86Subtarget *Subtarget;
92*0b57cec5SDimitry Andric   const X86InstrInfo *TII;
93*0b57cec5SDimitry Andric   const TargetRegisterInfo *TRI;
94*0b57cec5SDimitry Andric   const TargetRegisterClass *PromoteRC;
95*0b57cec5SDimitry Andric   MachineDominatorTree *MDT;
96*0b57cec5SDimitry Andric 
97*0b57cec5SDimitry Andric   CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
98*0b57cec5SDimitry Andric                                   MachineBasicBlock::iterator CopyDefI);
99*0b57cec5SDimitry Andric 
100*0b57cec5SDimitry Andric   unsigned promoteCondToReg(MachineBasicBlock &MBB,
101*0b57cec5SDimitry Andric                             MachineBasicBlock::iterator TestPos,
102*0b57cec5SDimitry Andric                             DebugLoc TestLoc, X86::CondCode Cond);
103*0b57cec5SDimitry Andric   std::pair<unsigned, bool>
104*0b57cec5SDimitry Andric   getCondOrInverseInReg(MachineBasicBlock &TestMBB,
105*0b57cec5SDimitry Andric                         MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
106*0b57cec5SDimitry Andric                         X86::CondCode Cond, CondRegArray &CondRegs);
107*0b57cec5SDimitry Andric   void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
108*0b57cec5SDimitry Andric                   DebugLoc Loc, unsigned Reg);
109*0b57cec5SDimitry Andric 
110*0b57cec5SDimitry Andric   void rewriteArithmetic(MachineBasicBlock &TestMBB,
111*0b57cec5SDimitry Andric                          MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
112*0b57cec5SDimitry Andric                          MachineInstr &MI, MachineOperand &FlagUse,
113*0b57cec5SDimitry Andric                          CondRegArray &CondRegs);
114*0b57cec5SDimitry Andric   void rewriteCMov(MachineBasicBlock &TestMBB,
115*0b57cec5SDimitry Andric                    MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
116*0b57cec5SDimitry Andric                    MachineInstr &CMovI, MachineOperand &FlagUse,
117*0b57cec5SDimitry Andric                    CondRegArray &CondRegs);
118*0b57cec5SDimitry Andric   void rewriteCondJmp(MachineBasicBlock &TestMBB,
119*0b57cec5SDimitry Andric                       MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
120*0b57cec5SDimitry Andric                       MachineInstr &JmpI, CondRegArray &CondRegs);
121*0b57cec5SDimitry Andric   void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
122*0b57cec5SDimitry Andric                    MachineInstr &CopyDefI);
123*0b57cec5SDimitry Andric   void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
124*0b57cec5SDimitry Andric                                MachineBasicBlock::iterator TestPos,
125*0b57cec5SDimitry Andric                                DebugLoc TestLoc, MachineInstr &SetBI,
126*0b57cec5SDimitry Andric                                MachineOperand &FlagUse, CondRegArray &CondRegs);
127*0b57cec5SDimitry Andric   void rewriteSetCC(MachineBasicBlock &TestMBB,
128*0b57cec5SDimitry Andric                     MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
129*0b57cec5SDimitry Andric                     MachineInstr &SetCCI, MachineOperand &FlagUse,
130*0b57cec5SDimitry Andric                     CondRegArray &CondRegs);
131*0b57cec5SDimitry Andric };
132*0b57cec5SDimitry Andric 
133*0b57cec5SDimitry Andric } // end anonymous namespace
134*0b57cec5SDimitry Andric 
135*0b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
136*0b57cec5SDimitry Andric                       "X86 EFLAGS copy lowering", false, false)
137*0b57cec5SDimitry Andric INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
138*0b57cec5SDimitry Andric                     "X86 EFLAGS copy lowering", false, false)
139*0b57cec5SDimitry Andric 
140*0b57cec5SDimitry Andric FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
141*0b57cec5SDimitry Andric   return new X86FlagsCopyLoweringPass();
142*0b57cec5SDimitry Andric }
143*0b57cec5SDimitry Andric 
144*0b57cec5SDimitry Andric char X86FlagsCopyLoweringPass::ID = 0;
145*0b57cec5SDimitry Andric 
146*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
147*0b57cec5SDimitry Andric   AU.addRequired<MachineDominatorTree>();
148*0b57cec5SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
149*0b57cec5SDimitry Andric }
150*0b57cec5SDimitry Andric 
151*0b57cec5SDimitry Andric namespace {
152*0b57cec5SDimitry Andric /// An enumeration of the arithmetic instruction mnemonics which have
153*0b57cec5SDimitry Andric /// interesting flag semantics.
154*0b57cec5SDimitry Andric ///
155*0b57cec5SDimitry Andric /// We can map instruction opcodes into these mnemonics to make it easy to
156*0b57cec5SDimitry Andric /// dispatch with specific functionality.
157*0b57cec5SDimitry Andric enum class FlagArithMnemonic {
158*0b57cec5SDimitry Andric   ADC,
159*0b57cec5SDimitry Andric   ADCX,
160*0b57cec5SDimitry Andric   ADOX,
161*0b57cec5SDimitry Andric   RCL,
162*0b57cec5SDimitry Andric   RCR,
163*0b57cec5SDimitry Andric   SBB,
164*0b57cec5SDimitry Andric };
165*0b57cec5SDimitry Andric } // namespace
166*0b57cec5SDimitry Andric 
167*0b57cec5SDimitry Andric static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
168*0b57cec5SDimitry Andric   switch (Opcode) {
169*0b57cec5SDimitry Andric   default:
170*0b57cec5SDimitry Andric     report_fatal_error("No support for lowering a copy into EFLAGS when used "
171*0b57cec5SDimitry Andric                        "by this instruction!");
172*0b57cec5SDimitry Andric 
173*0b57cec5SDimitry Andric #define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX)                              \
174*0b57cec5SDimitry Andric   case X86::MNEMONIC##8##SUFFIX:                                               \
175*0b57cec5SDimitry Andric   case X86::MNEMONIC##16##SUFFIX:                                              \
176*0b57cec5SDimitry Andric   case X86::MNEMONIC##32##SUFFIX:                                              \
177*0b57cec5SDimitry Andric   case X86::MNEMONIC##64##SUFFIX:
178*0b57cec5SDimitry Andric 
179*0b57cec5SDimitry Andric #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC)                                    \
180*0b57cec5SDimitry Andric   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr)                                        \
181*0b57cec5SDimitry Andric   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV)                                    \
182*0b57cec5SDimitry Andric   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm)                                        \
183*0b57cec5SDimitry Andric   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr)                                        \
184*0b57cec5SDimitry Andric   case X86::MNEMONIC##8ri:                                                     \
185*0b57cec5SDimitry Andric   case X86::MNEMONIC##16ri8:                                                   \
186*0b57cec5SDimitry Andric   case X86::MNEMONIC##32ri8:                                                   \
187*0b57cec5SDimitry Andric   case X86::MNEMONIC##64ri8:                                                   \
188*0b57cec5SDimitry Andric   case X86::MNEMONIC##16ri:                                                    \
189*0b57cec5SDimitry Andric   case X86::MNEMONIC##32ri:                                                    \
190*0b57cec5SDimitry Andric   case X86::MNEMONIC##64ri32:                                                  \
191*0b57cec5SDimitry Andric   case X86::MNEMONIC##8mi:                                                     \
192*0b57cec5SDimitry Andric   case X86::MNEMONIC##16mi8:                                                   \
193*0b57cec5SDimitry Andric   case X86::MNEMONIC##32mi8:                                                   \
194*0b57cec5SDimitry Andric   case X86::MNEMONIC##64mi8:                                                   \
195*0b57cec5SDimitry Andric   case X86::MNEMONIC##16mi:                                                    \
196*0b57cec5SDimitry Andric   case X86::MNEMONIC##32mi:                                                    \
197*0b57cec5SDimitry Andric   case X86::MNEMONIC##64mi32:                                                  \
198*0b57cec5SDimitry Andric   case X86::MNEMONIC##8i8:                                                     \
199*0b57cec5SDimitry Andric   case X86::MNEMONIC##16i16:                                                   \
200*0b57cec5SDimitry Andric   case X86::MNEMONIC##32i32:                                                   \
201*0b57cec5SDimitry Andric   case X86::MNEMONIC##64i32:
202*0b57cec5SDimitry Andric 
203*0b57cec5SDimitry Andric     LLVM_EXPAND_ADC_SBB_INSTR(ADC)
204*0b57cec5SDimitry Andric     return FlagArithMnemonic::ADC;
205*0b57cec5SDimitry Andric 
206*0b57cec5SDimitry Andric     LLVM_EXPAND_ADC_SBB_INSTR(SBB)
207*0b57cec5SDimitry Andric     return FlagArithMnemonic::SBB;
208*0b57cec5SDimitry Andric 
209*0b57cec5SDimitry Andric #undef LLVM_EXPAND_ADC_SBB_INSTR
210*0b57cec5SDimitry Andric 
211*0b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCL, rCL)
212*0b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCL, r1)
213*0b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCL, ri)
214*0b57cec5SDimitry Andric     return FlagArithMnemonic::RCL;
215*0b57cec5SDimitry Andric 
216*0b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCR, rCL)
217*0b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCR, r1)
218*0b57cec5SDimitry Andric     LLVM_EXPAND_INSTR_SIZES(RCR, ri)
219*0b57cec5SDimitry Andric     return FlagArithMnemonic::RCR;
220*0b57cec5SDimitry Andric 
221*0b57cec5SDimitry Andric #undef LLVM_EXPAND_INSTR_SIZES
222*0b57cec5SDimitry Andric 
223*0b57cec5SDimitry Andric   case X86::ADCX32rr:
224*0b57cec5SDimitry Andric   case X86::ADCX64rr:
225*0b57cec5SDimitry Andric   case X86::ADCX32rm:
226*0b57cec5SDimitry Andric   case X86::ADCX64rm:
227*0b57cec5SDimitry Andric     return FlagArithMnemonic::ADCX;
228*0b57cec5SDimitry Andric 
229*0b57cec5SDimitry Andric   case X86::ADOX32rr:
230*0b57cec5SDimitry Andric   case X86::ADOX64rr:
231*0b57cec5SDimitry Andric   case X86::ADOX32rm:
232*0b57cec5SDimitry Andric   case X86::ADOX64rm:
233*0b57cec5SDimitry Andric     return FlagArithMnemonic::ADOX;
234*0b57cec5SDimitry Andric   }
235*0b57cec5SDimitry Andric }
236*0b57cec5SDimitry Andric 
237*0b57cec5SDimitry Andric static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
238*0b57cec5SDimitry Andric                                      MachineInstr &SplitI,
239*0b57cec5SDimitry Andric                                      const X86InstrInfo &TII) {
240*0b57cec5SDimitry Andric   MachineFunction &MF = *MBB.getParent();
241*0b57cec5SDimitry Andric 
242*0b57cec5SDimitry Andric   assert(SplitI.getParent() == &MBB &&
243*0b57cec5SDimitry Andric          "Split instruction must be in the split block!");
244*0b57cec5SDimitry Andric   assert(SplitI.isBranch() &&
245*0b57cec5SDimitry Andric          "Only designed to split a tail of branch instructions!");
246*0b57cec5SDimitry Andric   assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID &&
247*0b57cec5SDimitry Andric          "Must split on an actual jCC instruction!");
248*0b57cec5SDimitry Andric 
249*0b57cec5SDimitry Andric   // Dig out the previous instruction to the split point.
250*0b57cec5SDimitry Andric   MachineInstr &PrevI = *std::prev(SplitI.getIterator());
251*0b57cec5SDimitry Andric   assert(PrevI.isBranch() && "Must split after a branch!");
252*0b57cec5SDimitry Andric   assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID &&
253*0b57cec5SDimitry Andric          "Must split after an actual jCC instruction!");
254*0b57cec5SDimitry Andric   assert(!std::prev(PrevI.getIterator())->isTerminator() &&
255*0b57cec5SDimitry Andric          "Must only have this one terminator prior to the split!");
256*0b57cec5SDimitry Andric 
257*0b57cec5SDimitry Andric   // Grab the one successor edge that will stay in `MBB`.
258*0b57cec5SDimitry Andric   MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
259*0b57cec5SDimitry Andric 
260*0b57cec5SDimitry Andric   // Analyze the original block to see if we are actually splitting an edge
261*0b57cec5SDimitry Andric   // into two edges. This can happen when we have multiple conditional jumps to
262*0b57cec5SDimitry Andric   // the same successor.
263*0b57cec5SDimitry Andric   bool IsEdgeSplit =
264*0b57cec5SDimitry Andric       std::any_of(SplitI.getIterator(), MBB.instr_end(),
265*0b57cec5SDimitry Andric                   [&](MachineInstr &MI) {
266*0b57cec5SDimitry Andric                     assert(MI.isTerminator() &&
267*0b57cec5SDimitry Andric                            "Should only have spliced terminators!");
268*0b57cec5SDimitry Andric                     return llvm::any_of(
269*0b57cec5SDimitry Andric                         MI.operands(), [&](MachineOperand &MOp) {
270*0b57cec5SDimitry Andric                           return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
271*0b57cec5SDimitry Andric                         });
272*0b57cec5SDimitry Andric                   }) ||
273*0b57cec5SDimitry Andric       MBB.getFallThrough() == &UnsplitSucc;
274*0b57cec5SDimitry Andric 
275*0b57cec5SDimitry Andric   MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
276*0b57cec5SDimitry Andric 
277*0b57cec5SDimitry Andric   // Insert the new block immediately after the current one. Any existing
278*0b57cec5SDimitry Andric   // fallthrough will be sunk into this new block anyways.
279*0b57cec5SDimitry Andric   MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
280*0b57cec5SDimitry Andric 
281*0b57cec5SDimitry Andric   // Splice the tail of instructions into the new block.
282*0b57cec5SDimitry Andric   NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
283*0b57cec5SDimitry Andric 
284*0b57cec5SDimitry Andric   // Copy the necessary succesors (and their probability info) into the new
285*0b57cec5SDimitry Andric   // block.
286*0b57cec5SDimitry Andric   for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
287*0b57cec5SDimitry Andric     if (IsEdgeSplit || *SI != &UnsplitSucc)
288*0b57cec5SDimitry Andric       NewMBB.copySuccessor(&MBB, SI);
289*0b57cec5SDimitry Andric   // Normalize the probabilities if we didn't end up splitting the edge.
290*0b57cec5SDimitry Andric   if (!IsEdgeSplit)
291*0b57cec5SDimitry Andric     NewMBB.normalizeSuccProbs();
292*0b57cec5SDimitry Andric 
293*0b57cec5SDimitry Andric   // Now replace all of the moved successors in the original block with the new
294*0b57cec5SDimitry Andric   // block. This will merge their probabilities.
295*0b57cec5SDimitry Andric   for (MachineBasicBlock *Succ : NewMBB.successors())
296*0b57cec5SDimitry Andric     if (Succ != &UnsplitSucc)
297*0b57cec5SDimitry Andric       MBB.replaceSuccessor(Succ, &NewMBB);
298*0b57cec5SDimitry Andric 
299*0b57cec5SDimitry Andric   // We should always end up replacing at least one successor.
300*0b57cec5SDimitry Andric   assert(MBB.isSuccessor(&NewMBB) &&
301*0b57cec5SDimitry Andric          "Failed to make the new block a successor!");
302*0b57cec5SDimitry Andric 
303*0b57cec5SDimitry Andric   // Now update all the PHIs.
304*0b57cec5SDimitry Andric   for (MachineBasicBlock *Succ : NewMBB.successors()) {
305*0b57cec5SDimitry Andric     for (MachineInstr &MI : *Succ) {
306*0b57cec5SDimitry Andric       if (!MI.isPHI())
307*0b57cec5SDimitry Andric         break;
308*0b57cec5SDimitry Andric 
309*0b57cec5SDimitry Andric       for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
310*0b57cec5SDimitry Andric            OpIdx += 2) {
311*0b57cec5SDimitry Andric         MachineOperand &OpV = MI.getOperand(OpIdx);
312*0b57cec5SDimitry Andric         MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
313*0b57cec5SDimitry Andric         assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
314*0b57cec5SDimitry Andric         if (OpMBB.getMBB() != &MBB)
315*0b57cec5SDimitry Andric           continue;
316*0b57cec5SDimitry Andric 
317*0b57cec5SDimitry Andric         // Replace the operand for unsplit successors
318*0b57cec5SDimitry Andric         if (!IsEdgeSplit || Succ != &UnsplitSucc) {
319*0b57cec5SDimitry Andric           OpMBB.setMBB(&NewMBB);
320*0b57cec5SDimitry Andric 
321*0b57cec5SDimitry Andric           // We have to continue scanning as there may be multiple entries in
322*0b57cec5SDimitry Andric           // the PHI.
323*0b57cec5SDimitry Andric           continue;
324*0b57cec5SDimitry Andric         }
325*0b57cec5SDimitry Andric 
326*0b57cec5SDimitry Andric         // When we have split the edge append a new successor.
327*0b57cec5SDimitry Andric         MI.addOperand(MF, OpV);
328*0b57cec5SDimitry Andric         MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
329*0b57cec5SDimitry Andric         break;
330*0b57cec5SDimitry Andric       }
331*0b57cec5SDimitry Andric     }
332*0b57cec5SDimitry Andric   }
333*0b57cec5SDimitry Andric 
334*0b57cec5SDimitry Andric   return NewMBB;
335*0b57cec5SDimitry Andric }
336*0b57cec5SDimitry Andric 
337*0b57cec5SDimitry Andric bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
338*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
339*0b57cec5SDimitry Andric                     << " **********\n");
340*0b57cec5SDimitry Andric 
341*0b57cec5SDimitry Andric   Subtarget = &MF.getSubtarget<X86Subtarget>();
342*0b57cec5SDimitry Andric   MRI = &MF.getRegInfo();
343*0b57cec5SDimitry Andric   TII = Subtarget->getInstrInfo();
344*0b57cec5SDimitry Andric   TRI = Subtarget->getRegisterInfo();
345*0b57cec5SDimitry Andric   MDT = &getAnalysis<MachineDominatorTree>();
346*0b57cec5SDimitry Andric   PromoteRC = &X86::GR8RegClass;
347*0b57cec5SDimitry Andric 
348*0b57cec5SDimitry Andric   if (MF.begin() == MF.end())
349*0b57cec5SDimitry Andric     // Nothing to do for a degenerate empty function...
350*0b57cec5SDimitry Andric     return false;
351*0b57cec5SDimitry Andric 
352*0b57cec5SDimitry Andric   // Collect the copies in RPO so that when there are chains where a copy is in
353*0b57cec5SDimitry Andric   // turn copied again we visit the first one first. This ensures we can find
354*0b57cec5SDimitry Andric   // viable locations for testing the original EFLAGS that dominate all the
355*0b57cec5SDimitry Andric   // uses across complex CFGs.
356*0b57cec5SDimitry Andric   SmallVector<MachineInstr *, 4> Copies;
357*0b57cec5SDimitry Andric   ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
358*0b57cec5SDimitry Andric   for (MachineBasicBlock *MBB : RPOT)
359*0b57cec5SDimitry Andric     for (MachineInstr &MI : *MBB)
360*0b57cec5SDimitry Andric       if (MI.getOpcode() == TargetOpcode::COPY &&
361*0b57cec5SDimitry Andric           MI.getOperand(0).getReg() == X86::EFLAGS)
362*0b57cec5SDimitry Andric         Copies.push_back(&MI);
363*0b57cec5SDimitry Andric 
364*0b57cec5SDimitry Andric   for (MachineInstr *CopyI : Copies) {
365*0b57cec5SDimitry Andric     MachineBasicBlock &MBB = *CopyI->getParent();
366*0b57cec5SDimitry Andric 
367*0b57cec5SDimitry Andric     MachineOperand &VOp = CopyI->getOperand(1);
368*0b57cec5SDimitry Andric     assert(VOp.isReg() &&
369*0b57cec5SDimitry Andric            "The input to the copy for EFLAGS should always be a register!");
370*0b57cec5SDimitry Andric     MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
371*0b57cec5SDimitry Andric     if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
372*0b57cec5SDimitry Andric       // FIXME: The big likely candidate here are PHI nodes. We could in theory
373*0b57cec5SDimitry Andric       // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
374*0b57cec5SDimitry Andric       // enough that it is probably better to change every other part of LLVM
375*0b57cec5SDimitry Andric       // to avoid creating them. The issue is that once we have PHIs we won't
376*0b57cec5SDimitry Andric       // know which original EFLAGS value we need to capture with our setCCs
377*0b57cec5SDimitry Andric       // below. The end result will be computing a complete set of setCCs that
378*0b57cec5SDimitry Andric       // we *might* want, computing them in every place where we copy *out* of
379*0b57cec5SDimitry Andric       // EFLAGS and then doing SSA formation on all of them to insert necessary
380*0b57cec5SDimitry Andric       // PHI nodes and consume those here. Then hoping that somehow we DCE the
381*0b57cec5SDimitry Andric       // unnecessary ones. This DCE seems very unlikely to be successful and so
382*0b57cec5SDimitry Andric       // we will almost certainly end up with a glut of dead setCC
383*0b57cec5SDimitry Andric       // instructions. Until we have a motivating test case and fail to avoid
384*0b57cec5SDimitry Andric       // it by changing other parts of LLVM's lowering, we refuse to handle
385*0b57cec5SDimitry Andric       // this complex case here.
386*0b57cec5SDimitry Andric       LLVM_DEBUG(
387*0b57cec5SDimitry Andric           dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
388*0b57cec5SDimitry Andric           CopyDefI.dump());
389*0b57cec5SDimitry Andric       report_fatal_error(
390*0b57cec5SDimitry Andric           "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
391*0b57cec5SDimitry Andric     }
392*0b57cec5SDimitry Andric 
393*0b57cec5SDimitry Andric     auto Cleanup = make_scope_exit([&] {
394*0b57cec5SDimitry Andric       // All uses of the EFLAGS copy are now rewritten, kill the copy into
395*0b57cec5SDimitry Andric       // eflags and if dead the copy from.
396*0b57cec5SDimitry Andric       CopyI->eraseFromParent();
397*0b57cec5SDimitry Andric       if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
398*0b57cec5SDimitry Andric         CopyDefI.eraseFromParent();
399*0b57cec5SDimitry Andric       ++NumCopiesEliminated;
400*0b57cec5SDimitry Andric     });
401*0b57cec5SDimitry Andric 
402*0b57cec5SDimitry Andric     MachineOperand &DOp = CopyI->getOperand(0);
403*0b57cec5SDimitry Andric     assert(DOp.isDef() && "Expected register def!");
404*0b57cec5SDimitry Andric     assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
405*0b57cec5SDimitry Andric     if (DOp.isDead())
406*0b57cec5SDimitry Andric       continue;
407*0b57cec5SDimitry Andric 
408*0b57cec5SDimitry Andric     MachineBasicBlock *TestMBB = CopyDefI.getParent();
409*0b57cec5SDimitry Andric     auto TestPos = CopyDefI.getIterator();
410*0b57cec5SDimitry Andric     DebugLoc TestLoc = CopyDefI.getDebugLoc();
411*0b57cec5SDimitry Andric 
412*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
413*0b57cec5SDimitry Andric 
414*0b57cec5SDimitry Andric     // Walk up across live-in EFLAGS to find where they were actually def'ed.
415*0b57cec5SDimitry Andric     //
416*0b57cec5SDimitry Andric     // This copy's def may just be part of a region of blocks covered by
417*0b57cec5SDimitry Andric     // a single def of EFLAGS and we want to find the top of that region where
418*0b57cec5SDimitry Andric     // possible.
419*0b57cec5SDimitry Andric     //
420*0b57cec5SDimitry Andric     // This is essentially a search for a *candidate* reaching definition
421*0b57cec5SDimitry Andric     // location. We don't need to ever find the actual reaching definition here,
422*0b57cec5SDimitry Andric     // but we want to walk up the dominator tree to find the highest point which
423*0b57cec5SDimitry Andric     // would be viable for such a definition.
424*0b57cec5SDimitry Andric     auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
425*0b57cec5SDimitry Andric                                 MachineBasicBlock::iterator End) {
426*0b57cec5SDimitry Andric       // Scan backwards as we expect these to be relatively short and often find
427*0b57cec5SDimitry Andric       // a clobber near the end.
428*0b57cec5SDimitry Andric       return llvm::any_of(
429*0b57cec5SDimitry Andric           llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) {
430*0b57cec5SDimitry Andric             // Flag any instruction (other than the copy we are
431*0b57cec5SDimitry Andric             // currently rewriting) that defs EFLAGS.
432*0b57cec5SDimitry Andric             return &MI != CopyI && MI.findRegisterDefOperand(X86::EFLAGS);
433*0b57cec5SDimitry Andric           });
434*0b57cec5SDimitry Andric     };
435*0b57cec5SDimitry Andric     auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
436*0b57cec5SDimitry Andric                                     MachineBasicBlock *EndMBB) {
437*0b57cec5SDimitry Andric       assert(MDT->dominates(BeginMBB, EndMBB) &&
438*0b57cec5SDimitry Andric              "Only support paths down the dominator tree!");
439*0b57cec5SDimitry Andric       SmallPtrSet<MachineBasicBlock *, 4> Visited;
440*0b57cec5SDimitry Andric       SmallVector<MachineBasicBlock *, 4> Worklist;
441*0b57cec5SDimitry Andric       // We terminate at the beginning. No need to scan it.
442*0b57cec5SDimitry Andric       Visited.insert(BeginMBB);
443*0b57cec5SDimitry Andric       Worklist.push_back(EndMBB);
444*0b57cec5SDimitry Andric       do {
445*0b57cec5SDimitry Andric         auto *MBB = Worklist.pop_back_val();
446*0b57cec5SDimitry Andric         for (auto *PredMBB : MBB->predecessors()) {
447*0b57cec5SDimitry Andric           if (!Visited.insert(PredMBB).second)
448*0b57cec5SDimitry Andric             continue;
449*0b57cec5SDimitry Andric           if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
450*0b57cec5SDimitry Andric             return true;
451*0b57cec5SDimitry Andric           // Enqueue this block to walk its predecessors.
452*0b57cec5SDimitry Andric           Worklist.push_back(PredMBB);
453*0b57cec5SDimitry Andric         }
454*0b57cec5SDimitry Andric       } while (!Worklist.empty());
455*0b57cec5SDimitry Andric       // No clobber found along a path from the begin to end.
456*0b57cec5SDimitry Andric       return false;
457*0b57cec5SDimitry Andric     };
458*0b57cec5SDimitry Andric     while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() &&
459*0b57cec5SDimitry Andric            !HasEFLAGSClobber(TestMBB->begin(), TestPos)) {
460*0b57cec5SDimitry Andric       // Find the nearest common dominator of the predecessors, as
461*0b57cec5SDimitry Andric       // that will be the best candidate to hoist into.
462*0b57cec5SDimitry Andric       MachineBasicBlock *HoistMBB =
463*0b57cec5SDimitry Andric           std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
464*0b57cec5SDimitry Andric                           *TestMBB->pred_begin(),
465*0b57cec5SDimitry Andric                           [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
466*0b57cec5SDimitry Andric                             return MDT->findNearestCommonDominator(LHS, RHS);
467*0b57cec5SDimitry Andric                           });
468*0b57cec5SDimitry Andric 
469*0b57cec5SDimitry Andric       // Now we need to scan all predecessors that may be reached along paths to
470*0b57cec5SDimitry Andric       // the hoist block. A clobber anywhere in any of these blocks the hoist.
471*0b57cec5SDimitry Andric       // Note that this even handles loops because we require *no* clobbers.
472*0b57cec5SDimitry Andric       if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
473*0b57cec5SDimitry Andric         break;
474*0b57cec5SDimitry Andric 
475*0b57cec5SDimitry Andric       // We also need the terminators to not sneakily clobber flags.
476*0b57cec5SDimitry Andric       if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
477*0b57cec5SDimitry Andric                            HoistMBB->instr_end()))
478*0b57cec5SDimitry Andric         break;
479*0b57cec5SDimitry Andric 
480*0b57cec5SDimitry Andric       // We found a viable location, hoist our test position to it.
481*0b57cec5SDimitry Andric       TestMBB = HoistMBB;
482*0b57cec5SDimitry Andric       TestPos = TestMBB->getFirstTerminator()->getIterator();
483*0b57cec5SDimitry Andric       // Clear the debug location as it would just be confusing after hoisting.
484*0b57cec5SDimitry Andric       TestLoc = DebugLoc();
485*0b57cec5SDimitry Andric     }
486*0b57cec5SDimitry Andric     LLVM_DEBUG({
487*0b57cec5SDimitry Andric       auto DefIt = llvm::find_if(
488*0b57cec5SDimitry Andric           llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
489*0b57cec5SDimitry Andric           [&](MachineInstr &MI) {
490*0b57cec5SDimitry Andric             return MI.findRegisterDefOperand(X86::EFLAGS);
491*0b57cec5SDimitry Andric           });
492*0b57cec5SDimitry Andric       if (DefIt.base() != TestMBB->instr_begin()) {
493*0b57cec5SDimitry Andric         dbgs() << "  Using EFLAGS defined by: ";
494*0b57cec5SDimitry Andric         DefIt->dump();
495*0b57cec5SDimitry Andric       } else {
496*0b57cec5SDimitry Andric         dbgs() << "  Using live-in flags for BB:\n";
497*0b57cec5SDimitry Andric         TestMBB->dump();
498*0b57cec5SDimitry Andric       }
499*0b57cec5SDimitry Andric     });
500*0b57cec5SDimitry Andric 
501*0b57cec5SDimitry Andric     // While rewriting uses, we buffer jumps and rewrite them in a second pass
502*0b57cec5SDimitry Andric     // because doing so will perturb the CFG that we are walking to find the
503*0b57cec5SDimitry Andric     // uses in the first place.
504*0b57cec5SDimitry Andric     SmallVector<MachineInstr *, 4> JmpIs;
505*0b57cec5SDimitry Andric 
506*0b57cec5SDimitry Andric     // Gather the condition flags that have already been preserved in
507*0b57cec5SDimitry Andric     // registers. We do this from scratch each time as we expect there to be
508*0b57cec5SDimitry Andric     // very few of them and we expect to not revisit the same copy definition
509*0b57cec5SDimitry Andric     // many times. If either of those change sufficiently we could build a map
510*0b57cec5SDimitry Andric     // of these up front instead.
511*0b57cec5SDimitry Andric     CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
512*0b57cec5SDimitry Andric 
513*0b57cec5SDimitry Andric     // Collect the basic blocks we need to scan. Typically this will just be
514*0b57cec5SDimitry Andric     // a single basic block but we may have to scan multiple blocks if the
515*0b57cec5SDimitry Andric     // EFLAGS copy lives into successors.
516*0b57cec5SDimitry Andric     SmallVector<MachineBasicBlock *, 2> Blocks;
517*0b57cec5SDimitry Andric     SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
518*0b57cec5SDimitry Andric     Blocks.push_back(&MBB);
519*0b57cec5SDimitry Andric 
520*0b57cec5SDimitry Andric     do {
521*0b57cec5SDimitry Andric       MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
522*0b57cec5SDimitry Andric 
523*0b57cec5SDimitry Andric       // Track when if/when we find a kill of the flags in this block.
524*0b57cec5SDimitry Andric       bool FlagsKilled = false;
525*0b57cec5SDimitry Andric 
526*0b57cec5SDimitry Andric       // In most cases, we walk from the beginning to the end of the block. But
527*0b57cec5SDimitry Andric       // when the block is the same block as the copy is from, we will visit it
528*0b57cec5SDimitry Andric       // twice. The first time we start from the copy and go to the end. The
529*0b57cec5SDimitry Andric       // second time we start from the beginning and go to the copy. This lets
530*0b57cec5SDimitry Andric       // us handle copies inside of cycles.
531*0b57cec5SDimitry Andric       // FIXME: This loop is *super* confusing. This is at least in part
532*0b57cec5SDimitry Andric       // a symptom of all of this routine needing to be refactored into
533*0b57cec5SDimitry Andric       // documentable components. Once done, there may be a better way to write
534*0b57cec5SDimitry Andric       // this loop.
535*0b57cec5SDimitry Andric       for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB))
536*0b57cec5SDimitry Andric                           ? std::next(CopyI->getIterator())
537*0b57cec5SDimitry Andric                           : UseMBB.instr_begin(),
538*0b57cec5SDimitry Andric                 MIE = UseMBB.instr_end();
539*0b57cec5SDimitry Andric            MII != MIE;) {
540*0b57cec5SDimitry Andric         MachineInstr &MI = *MII++;
541*0b57cec5SDimitry Andric         // If we are in the original copy block and encounter either the copy
542*0b57cec5SDimitry Andric         // def or the copy itself, break so that we don't re-process any part of
543*0b57cec5SDimitry Andric         // the block or process the instructions in the range that was copied
544*0b57cec5SDimitry Andric         // over.
545*0b57cec5SDimitry Andric         if (&MI == CopyI || &MI == &CopyDefI) {
546*0b57cec5SDimitry Andric           assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
547*0b57cec5SDimitry Andric                  "Should only encounter these on the second pass over the "
548*0b57cec5SDimitry Andric                  "original block.");
549*0b57cec5SDimitry Andric           break;
550*0b57cec5SDimitry Andric         }
551*0b57cec5SDimitry Andric 
552*0b57cec5SDimitry Andric         MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
553*0b57cec5SDimitry Andric         if (!FlagUse) {
554*0b57cec5SDimitry Andric           if (MI.findRegisterDefOperand(X86::EFLAGS)) {
555*0b57cec5SDimitry Andric             // If EFLAGS are defined, it's as-if they were killed. We can stop
556*0b57cec5SDimitry Andric             // scanning here.
557*0b57cec5SDimitry Andric             //
558*0b57cec5SDimitry Andric             // NB!!! Many instructions only modify some flags. LLVM currently
559*0b57cec5SDimitry Andric             // models this as clobbering all flags, but if that ever changes
560*0b57cec5SDimitry Andric             // this will need to be carefully updated to handle that more
561*0b57cec5SDimitry Andric             // complex logic.
562*0b57cec5SDimitry Andric             FlagsKilled = true;
563*0b57cec5SDimitry Andric             break;
564*0b57cec5SDimitry Andric           }
565*0b57cec5SDimitry Andric           continue;
566*0b57cec5SDimitry Andric         }
567*0b57cec5SDimitry Andric 
568*0b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  Rewriting use: "; MI.dump());
569*0b57cec5SDimitry Andric 
570*0b57cec5SDimitry Andric         // Check the kill flag before we rewrite as that may change it.
571*0b57cec5SDimitry Andric         if (FlagUse->isKill())
572*0b57cec5SDimitry Andric           FlagsKilled = true;
573*0b57cec5SDimitry Andric 
574*0b57cec5SDimitry Andric         // Once we encounter a branch, the rest of the instructions must also be
575*0b57cec5SDimitry Andric         // branches. We can't rewrite in place here, so we handle them below.
576*0b57cec5SDimitry Andric         //
577*0b57cec5SDimitry Andric         // Note that we don't have to handle tail calls here, even conditional
578*0b57cec5SDimitry Andric         // tail calls, as those are not introduced into the X86 MI until post-RA
579*0b57cec5SDimitry Andric         // branch folding or black placement. As a consequence, we get to deal
580*0b57cec5SDimitry Andric         // with the simpler formulation of conditional branches followed by tail
581*0b57cec5SDimitry Andric         // calls.
582*0b57cec5SDimitry Andric         if (X86::getCondFromBranch(MI) != X86::COND_INVALID) {
583*0b57cec5SDimitry Andric           auto JmpIt = MI.getIterator();
584*0b57cec5SDimitry Andric           do {
585*0b57cec5SDimitry Andric             JmpIs.push_back(&*JmpIt);
586*0b57cec5SDimitry Andric             ++JmpIt;
587*0b57cec5SDimitry Andric           } while (JmpIt != UseMBB.instr_end() &&
588*0b57cec5SDimitry Andric                    X86::getCondFromBranch(*JmpIt) !=
589*0b57cec5SDimitry Andric                        X86::COND_INVALID);
590*0b57cec5SDimitry Andric           break;
591*0b57cec5SDimitry Andric         }
592*0b57cec5SDimitry Andric 
593*0b57cec5SDimitry Andric         // Otherwise we can just rewrite in-place.
594*0b57cec5SDimitry Andric         if (X86::getCondFromCMov(MI) != X86::COND_INVALID) {
595*0b57cec5SDimitry Andric           rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
596*0b57cec5SDimitry Andric         } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) {
597*0b57cec5SDimitry Andric           rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
598*0b57cec5SDimitry Andric         } else if (MI.getOpcode() == TargetOpcode::COPY) {
599*0b57cec5SDimitry Andric           rewriteCopy(MI, *FlagUse, CopyDefI);
600*0b57cec5SDimitry Andric         } else {
601*0b57cec5SDimitry Andric           // We assume all other instructions that use flags also def them.
602*0b57cec5SDimitry Andric           assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
603*0b57cec5SDimitry Andric                  "Expected a def of EFLAGS for this instruction!");
604*0b57cec5SDimitry Andric 
605*0b57cec5SDimitry Andric           // NB!!! Several arithmetic instructions only *partially* update
606*0b57cec5SDimitry Andric           // flags. Theoretically, we could generate MI code sequences that
607*0b57cec5SDimitry Andric           // would rely on this fact and observe different flags independently.
608*0b57cec5SDimitry Andric           // But currently LLVM models all of these instructions as clobbering
609*0b57cec5SDimitry Andric           // all the flags in an undef way. We rely on that to simplify the
610*0b57cec5SDimitry Andric           // logic.
611*0b57cec5SDimitry Andric           FlagsKilled = true;
612*0b57cec5SDimitry Andric 
613*0b57cec5SDimitry Andric           switch (MI.getOpcode()) {
614*0b57cec5SDimitry Andric           case X86::SETB_C8r:
615*0b57cec5SDimitry Andric           case X86::SETB_C16r:
616*0b57cec5SDimitry Andric           case X86::SETB_C32r:
617*0b57cec5SDimitry Andric           case X86::SETB_C64r:
618*0b57cec5SDimitry Andric             // Use custom lowering for arithmetic that is merely extending the
619*0b57cec5SDimitry Andric             // carry flag. We model this as the SETB_C* pseudo instructions.
620*0b57cec5SDimitry Andric             rewriteSetCarryExtended(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
621*0b57cec5SDimitry Andric                                     CondRegs);
622*0b57cec5SDimitry Andric             break;
623*0b57cec5SDimitry Andric 
624*0b57cec5SDimitry Andric           default:
625*0b57cec5SDimitry Andric             // Generically handle remaining uses as arithmetic instructions.
626*0b57cec5SDimitry Andric             rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
627*0b57cec5SDimitry Andric                               CondRegs);
628*0b57cec5SDimitry Andric             break;
629*0b57cec5SDimitry Andric           }
630*0b57cec5SDimitry Andric           break;
631*0b57cec5SDimitry Andric         }
632*0b57cec5SDimitry Andric 
633*0b57cec5SDimitry Andric         // If this was the last use of the flags, we're done.
634*0b57cec5SDimitry Andric         if (FlagsKilled)
635*0b57cec5SDimitry Andric           break;
636*0b57cec5SDimitry Andric       }
637*0b57cec5SDimitry Andric 
638*0b57cec5SDimitry Andric       // If the flags were killed, we're done with this block.
639*0b57cec5SDimitry Andric       if (FlagsKilled)
640*0b57cec5SDimitry Andric         continue;
641*0b57cec5SDimitry Andric 
642*0b57cec5SDimitry Andric       // Otherwise we need to scan successors for ones where the flags live-in
643*0b57cec5SDimitry Andric       // and queue those up for processing.
644*0b57cec5SDimitry Andric       for (MachineBasicBlock *SuccMBB : UseMBB.successors())
645*0b57cec5SDimitry Andric         if (SuccMBB->isLiveIn(X86::EFLAGS) &&
646*0b57cec5SDimitry Andric             VisitedBlocks.insert(SuccMBB).second) {
647*0b57cec5SDimitry Andric           // We currently don't do any PHI insertion and so we require that the
648*0b57cec5SDimitry Andric           // test basic block dominates all of the use basic blocks. Further, we
649*0b57cec5SDimitry Andric           // can't have a cycle from the test block back to itself as that would
650*0b57cec5SDimitry Andric           // create a cycle requiring a PHI to break it.
651*0b57cec5SDimitry Andric           //
652*0b57cec5SDimitry Andric           // We could in theory do PHI insertion here if it becomes useful by
653*0b57cec5SDimitry Andric           // just taking undef values in along every edge that we don't trace
654*0b57cec5SDimitry Andric           // this EFLAGS copy along. This isn't as bad as fully general PHI
655*0b57cec5SDimitry Andric           // insertion, but still seems like a great deal of complexity.
656*0b57cec5SDimitry Andric           //
657*0b57cec5SDimitry Andric           // Because it is theoretically possible that some earlier MI pass or
658*0b57cec5SDimitry Andric           // other lowering transformation could induce this to happen, we do
659*0b57cec5SDimitry Andric           // a hard check even in non-debug builds here.
660*0b57cec5SDimitry Andric           if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
661*0b57cec5SDimitry Andric             LLVM_DEBUG({
662*0b57cec5SDimitry Andric               dbgs()
663*0b57cec5SDimitry Andric                   << "ERROR: Encountered use that is not dominated by our test "
664*0b57cec5SDimitry Andric                      "basic block! Rewriting this would require inserting PHI "
665*0b57cec5SDimitry Andric                      "nodes to track the flag state across the CFG.\n\nTest "
666*0b57cec5SDimitry Andric                      "block:\n";
667*0b57cec5SDimitry Andric               TestMBB->dump();
668*0b57cec5SDimitry Andric               dbgs() << "Use block:\n";
669*0b57cec5SDimitry Andric               SuccMBB->dump();
670*0b57cec5SDimitry Andric             });
671*0b57cec5SDimitry Andric             report_fatal_error(
672*0b57cec5SDimitry Andric                 "Cannot lower EFLAGS copy when original copy def "
673*0b57cec5SDimitry Andric                 "does not dominate all uses.");
674*0b57cec5SDimitry Andric           }
675*0b57cec5SDimitry Andric 
676*0b57cec5SDimitry Andric           Blocks.push_back(SuccMBB);
677*0b57cec5SDimitry Andric         }
678*0b57cec5SDimitry Andric     } while (!Blocks.empty());
679*0b57cec5SDimitry Andric 
680*0b57cec5SDimitry Andric     // Now rewrite the jumps that use the flags. These we handle specially
681*0b57cec5SDimitry Andric     // because if there are multiple jumps in a single basic block we'll have
682*0b57cec5SDimitry Andric     // to do surgery on the CFG.
683*0b57cec5SDimitry Andric     MachineBasicBlock *LastJmpMBB = nullptr;
684*0b57cec5SDimitry Andric     for (MachineInstr *JmpI : JmpIs) {
685*0b57cec5SDimitry Andric       // Past the first jump within a basic block we need to split the blocks
686*0b57cec5SDimitry Andric       // apart.
687*0b57cec5SDimitry Andric       if (JmpI->getParent() == LastJmpMBB)
688*0b57cec5SDimitry Andric         splitBlock(*JmpI->getParent(), *JmpI, *TII);
689*0b57cec5SDimitry Andric       else
690*0b57cec5SDimitry Andric         LastJmpMBB = JmpI->getParent();
691*0b57cec5SDimitry Andric 
692*0b57cec5SDimitry Andric       rewriteCondJmp(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
693*0b57cec5SDimitry Andric     }
694*0b57cec5SDimitry Andric 
695*0b57cec5SDimitry Andric     // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
696*0b57cec5SDimitry Andric     // the copy's def operand is itself a kill.
697*0b57cec5SDimitry Andric   }
698*0b57cec5SDimitry Andric 
699*0b57cec5SDimitry Andric #ifndef NDEBUG
700*0b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF)
701*0b57cec5SDimitry Andric     for (MachineInstr &MI : MBB)
702*0b57cec5SDimitry Andric       if (MI.getOpcode() == TargetOpcode::COPY &&
703*0b57cec5SDimitry Andric           (MI.getOperand(0).getReg() == X86::EFLAGS ||
704*0b57cec5SDimitry Andric            MI.getOperand(1).getReg() == X86::EFLAGS)) {
705*0b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
706*0b57cec5SDimitry Andric                    MI.dump());
707*0b57cec5SDimitry Andric         llvm_unreachable("Unlowered EFLAGS copy!");
708*0b57cec5SDimitry Andric       }
709*0b57cec5SDimitry Andric #endif
710*0b57cec5SDimitry Andric 
711*0b57cec5SDimitry Andric   return true;
712*0b57cec5SDimitry Andric }
713*0b57cec5SDimitry Andric 
714*0b57cec5SDimitry Andric /// Collect any conditions that have already been set in registers so that we
715*0b57cec5SDimitry Andric /// can re-use them rather than adding duplicates.
716*0b57cec5SDimitry Andric CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
717*0b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) {
718*0b57cec5SDimitry Andric   CondRegArray CondRegs = {};
719*0b57cec5SDimitry Andric 
720*0b57cec5SDimitry Andric   // Scan backwards across the range of instructions with live EFLAGS.
721*0b57cec5SDimitry Andric   for (MachineInstr &MI :
722*0b57cec5SDimitry Andric        llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
723*0b57cec5SDimitry Andric     X86::CondCode Cond = X86::getCondFromSETCC(MI);
724*0b57cec5SDimitry Andric     if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() &&
725*0b57cec5SDimitry Andric         TRI->isVirtualRegister(MI.getOperand(0).getReg())) {
726*0b57cec5SDimitry Andric       assert(MI.getOperand(0).isDef() &&
727*0b57cec5SDimitry Andric              "A non-storing SETcc should always define a register!");
728*0b57cec5SDimitry Andric       CondRegs[Cond] = MI.getOperand(0).getReg();
729*0b57cec5SDimitry Andric     }
730*0b57cec5SDimitry Andric 
731*0b57cec5SDimitry Andric     // Stop scanning when we see the first definition of the EFLAGS as prior to
732*0b57cec5SDimitry Andric     // this we would potentially capture the wrong flag state.
733*0b57cec5SDimitry Andric     if (MI.findRegisterDefOperand(X86::EFLAGS))
734*0b57cec5SDimitry Andric       break;
735*0b57cec5SDimitry Andric   }
736*0b57cec5SDimitry Andric   return CondRegs;
737*0b57cec5SDimitry Andric }
738*0b57cec5SDimitry Andric 
739*0b57cec5SDimitry Andric unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
740*0b57cec5SDimitry Andric     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
741*0b57cec5SDimitry Andric     DebugLoc TestLoc, X86::CondCode Cond) {
742*0b57cec5SDimitry Andric   unsigned Reg = MRI->createVirtualRegister(PromoteRC);
743*0b57cec5SDimitry Andric   auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
744*0b57cec5SDimitry Andric                       TII->get(X86::SETCCr), Reg).addImm(Cond);
745*0b57cec5SDimitry Andric   (void)SetI;
746*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "    save cond: "; SetI->dump());
747*0b57cec5SDimitry Andric   ++NumSetCCsInserted;
748*0b57cec5SDimitry Andric   return Reg;
749*0b57cec5SDimitry Andric }
750*0b57cec5SDimitry Andric 
751*0b57cec5SDimitry Andric std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
752*0b57cec5SDimitry Andric     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
753*0b57cec5SDimitry Andric     DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
754*0b57cec5SDimitry Andric   unsigned &CondReg = CondRegs[Cond];
755*0b57cec5SDimitry Andric   unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
756*0b57cec5SDimitry Andric   if (!CondReg && !InvCondReg)
757*0b57cec5SDimitry Andric     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
758*0b57cec5SDimitry Andric 
759*0b57cec5SDimitry Andric   if (CondReg)
760*0b57cec5SDimitry Andric     return {CondReg, false};
761*0b57cec5SDimitry Andric   else
762*0b57cec5SDimitry Andric     return {InvCondReg, true};
763*0b57cec5SDimitry Andric }
764*0b57cec5SDimitry Andric 
765*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
766*0b57cec5SDimitry Andric                                           MachineBasicBlock::iterator Pos,
767*0b57cec5SDimitry Andric                                           DebugLoc Loc, unsigned Reg) {
768*0b57cec5SDimitry Andric   auto TestI =
769*0b57cec5SDimitry Andric       BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
770*0b57cec5SDimitry Andric   (void)TestI;
771*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "    test cond: "; TestI->dump());
772*0b57cec5SDimitry Andric   ++NumTestsInserted;
773*0b57cec5SDimitry Andric }
774*0b57cec5SDimitry Andric 
775*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteArithmetic(
776*0b57cec5SDimitry Andric     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
777*0b57cec5SDimitry Andric     DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse,
778*0b57cec5SDimitry Andric     CondRegArray &CondRegs) {
779*0b57cec5SDimitry Andric   // Arithmetic is either reading CF or OF. Figure out which condition we need
780*0b57cec5SDimitry Andric   // to preserve in a register.
781*0b57cec5SDimitry Andric   X86::CondCode Cond;
782*0b57cec5SDimitry Andric 
783*0b57cec5SDimitry Andric   // The addend to use to reset CF or OF when added to the flag value.
784*0b57cec5SDimitry Andric   int Addend;
785*0b57cec5SDimitry Andric 
786*0b57cec5SDimitry Andric   switch (getMnemonicFromOpcode(MI.getOpcode())) {
787*0b57cec5SDimitry Andric   case FlagArithMnemonic::ADC:
788*0b57cec5SDimitry Andric   case FlagArithMnemonic::ADCX:
789*0b57cec5SDimitry Andric   case FlagArithMnemonic::RCL:
790*0b57cec5SDimitry Andric   case FlagArithMnemonic::RCR:
791*0b57cec5SDimitry Andric   case FlagArithMnemonic::SBB:
792*0b57cec5SDimitry Andric     Cond = X86::COND_B; // CF == 1
793*0b57cec5SDimitry Andric     // Set up an addend that when one is added will need a carry due to not
794*0b57cec5SDimitry Andric     // having a higher bit available.
795*0b57cec5SDimitry Andric     Addend = 255;
796*0b57cec5SDimitry Andric     break;
797*0b57cec5SDimitry Andric 
798*0b57cec5SDimitry Andric   case FlagArithMnemonic::ADOX:
799*0b57cec5SDimitry Andric     Cond = X86::COND_O; // OF == 1
800*0b57cec5SDimitry Andric     // Set up an addend that when one is added will turn from positive to
801*0b57cec5SDimitry Andric     // negative and thus overflow in the signed domain.
802*0b57cec5SDimitry Andric     Addend = 127;
803*0b57cec5SDimitry Andric     break;
804*0b57cec5SDimitry Andric   }
805*0b57cec5SDimitry Andric 
806*0b57cec5SDimitry Andric   // Now get a register that contains the value of the flag input to the
807*0b57cec5SDimitry Andric   // arithmetic. We require exactly this flag to simplify the arithmetic
808*0b57cec5SDimitry Andric   // required to materialize it back into the flag.
809*0b57cec5SDimitry Andric   unsigned &CondReg = CondRegs[Cond];
810*0b57cec5SDimitry Andric   if (!CondReg)
811*0b57cec5SDimitry Andric     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
812*0b57cec5SDimitry Andric 
813*0b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
814*0b57cec5SDimitry Andric 
815*0b57cec5SDimitry Andric   // Insert an instruction that will set the flag back to the desired value.
816*0b57cec5SDimitry Andric   unsigned TmpReg = MRI->createVirtualRegister(PromoteRC);
817*0b57cec5SDimitry Andric   auto AddI =
818*0b57cec5SDimitry Andric       BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri))
819*0b57cec5SDimitry Andric           .addDef(TmpReg, RegState::Dead)
820*0b57cec5SDimitry Andric           .addReg(CondReg)
821*0b57cec5SDimitry Andric           .addImm(Addend);
822*0b57cec5SDimitry Andric   (void)AddI;
823*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "    add cond: "; AddI->dump());
824*0b57cec5SDimitry Andric   ++NumAddsInserted;
825*0b57cec5SDimitry Andric   FlagUse.setIsKill(true);
826*0b57cec5SDimitry Andric }
827*0b57cec5SDimitry Andric 
828*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
829*0b57cec5SDimitry Andric                                            MachineBasicBlock::iterator TestPos,
830*0b57cec5SDimitry Andric                                            DebugLoc TestLoc,
831*0b57cec5SDimitry Andric                                            MachineInstr &CMovI,
832*0b57cec5SDimitry Andric                                            MachineOperand &FlagUse,
833*0b57cec5SDimitry Andric                                            CondRegArray &CondRegs) {
834*0b57cec5SDimitry Andric   // First get the register containing this specific condition.
835*0b57cec5SDimitry Andric   X86::CondCode Cond = X86::getCondFromCMov(CMovI);
836*0b57cec5SDimitry Andric   unsigned CondReg;
837*0b57cec5SDimitry Andric   bool Inverted;
838*0b57cec5SDimitry Andric   std::tie(CondReg, Inverted) =
839*0b57cec5SDimitry Andric       getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
840*0b57cec5SDimitry Andric 
841*0b57cec5SDimitry Andric   MachineBasicBlock &MBB = *CMovI.getParent();
842*0b57cec5SDimitry Andric 
843*0b57cec5SDimitry Andric   // Insert a direct test of the saved register.
844*0b57cec5SDimitry Andric   insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
845*0b57cec5SDimitry Andric 
846*0b57cec5SDimitry Andric   // Rewrite the CMov to use the !ZF flag from the test, and then kill its use
847*0b57cec5SDimitry Andric   // of the flags afterward.
848*0b57cec5SDimitry Andric   CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1)
849*0b57cec5SDimitry Andric       .setImm(Inverted ? X86::COND_E : X86::COND_NE);
850*0b57cec5SDimitry Andric   FlagUse.setIsKill(true);
851*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "    fixed cmov: "; CMovI.dump());
852*0b57cec5SDimitry Andric }
853*0b57cec5SDimitry Andric 
854*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteCondJmp(
855*0b57cec5SDimitry Andric     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
856*0b57cec5SDimitry Andric     DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
857*0b57cec5SDimitry Andric   // First get the register containing this specific condition.
858*0b57cec5SDimitry Andric   X86::CondCode Cond = X86::getCondFromBranch(JmpI);
859*0b57cec5SDimitry Andric   unsigned CondReg;
860*0b57cec5SDimitry Andric   bool Inverted;
861*0b57cec5SDimitry Andric   std::tie(CondReg, Inverted) =
862*0b57cec5SDimitry Andric       getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
863*0b57cec5SDimitry Andric 
864*0b57cec5SDimitry Andric   MachineBasicBlock &JmpMBB = *JmpI.getParent();
865*0b57cec5SDimitry Andric 
866*0b57cec5SDimitry Andric   // Insert a direct test of the saved register.
867*0b57cec5SDimitry Andric   insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg);
868*0b57cec5SDimitry Andric 
869*0b57cec5SDimitry Andric   // Rewrite the jump to use the !ZF flag from the test, and kill its use of
870*0b57cec5SDimitry Andric   // flags afterward.
871*0b57cec5SDimitry Andric   JmpI.getOperand(1).setImm(Inverted ? X86::COND_E : X86::COND_NE);
872*0b57cec5SDimitry Andric   JmpI.findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
873*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "    fixed jCC: "; JmpI.dump());
874*0b57cec5SDimitry Andric }
875*0b57cec5SDimitry Andric 
876*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
877*0b57cec5SDimitry Andric                                            MachineOperand &FlagUse,
878*0b57cec5SDimitry Andric                                            MachineInstr &CopyDefI) {
879*0b57cec5SDimitry Andric   // Just replace this copy with the original copy def.
880*0b57cec5SDimitry Andric   MRI->replaceRegWith(MI.getOperand(0).getReg(),
881*0b57cec5SDimitry Andric                       CopyDefI.getOperand(0).getReg());
882*0b57cec5SDimitry Andric   MI.eraseFromParent();
883*0b57cec5SDimitry Andric }
884*0b57cec5SDimitry Andric 
885*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
886*0b57cec5SDimitry Andric     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
887*0b57cec5SDimitry Andric     DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
888*0b57cec5SDimitry Andric     CondRegArray &CondRegs) {
889*0b57cec5SDimitry Andric   // This routine is only used to handle pseudos for setting a register to zero
890*0b57cec5SDimitry Andric   // or all ones based on CF. This is essentially the sign extended from 1-bit
891*0b57cec5SDimitry Andric   // form of SETB and modeled with the SETB_C* pseudos. They require special
892*0b57cec5SDimitry Andric   // handling as they aren't normal SETcc instructions and are lowered to an
893*0b57cec5SDimitry Andric   // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
894*0b57cec5SDimitry Andric   // they are only provided in reg-defining forms. A complicating factor is that
895*0b57cec5SDimitry Andric   // they can define many different register widths.
896*0b57cec5SDimitry Andric   assert(SetBI.getOperand(0).isReg() &&
897*0b57cec5SDimitry Andric          "Cannot have a non-register defined operand to this variant of SETB!");
898*0b57cec5SDimitry Andric 
899*0b57cec5SDimitry Andric   // Little helper to do the common final step of replacing the register def'ed
900*0b57cec5SDimitry Andric   // by this SETB instruction with a new register and removing the SETB
901*0b57cec5SDimitry Andric   // instruction.
902*0b57cec5SDimitry Andric   auto RewriteToReg = [&](unsigned Reg) {
903*0b57cec5SDimitry Andric     MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
904*0b57cec5SDimitry Andric     SetBI.eraseFromParent();
905*0b57cec5SDimitry Andric   };
906*0b57cec5SDimitry Andric 
907*0b57cec5SDimitry Andric   // Grab the register class used for this particular instruction.
908*0b57cec5SDimitry Andric   auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
909*0b57cec5SDimitry Andric 
910*0b57cec5SDimitry Andric   MachineBasicBlock &MBB = *SetBI.getParent();
911*0b57cec5SDimitry Andric   auto SetPos = SetBI.getIterator();
912*0b57cec5SDimitry Andric   auto SetLoc = SetBI.getDebugLoc();
913*0b57cec5SDimitry Andric 
914*0b57cec5SDimitry Andric   auto AdjustReg = [&](unsigned Reg) {
915*0b57cec5SDimitry Andric     auto &OrigRC = *MRI->getRegClass(Reg);
916*0b57cec5SDimitry Andric     if (&OrigRC == &SetBRC)
917*0b57cec5SDimitry Andric       return Reg;
918*0b57cec5SDimitry Andric 
919*0b57cec5SDimitry Andric     unsigned NewReg;
920*0b57cec5SDimitry Andric 
921*0b57cec5SDimitry Andric     int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
922*0b57cec5SDimitry Andric     int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
923*0b57cec5SDimitry Andric     assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
924*0b57cec5SDimitry Andric     assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
925*0b57cec5SDimitry Andric     int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
926*0b57cec5SDimitry Andric                        X86::NoSubRegister, X86::sub_32bit};
927*0b57cec5SDimitry Andric 
928*0b57cec5SDimitry Andric     // If the original size is smaller than the target *and* is smaller than 4
929*0b57cec5SDimitry Andric     // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
930*0b57cec5SDimitry Andric     // to maximize the chance of being able to CSE that operation and to avoid
931*0b57cec5SDimitry Andric     // partial dependency stalls extending to 2-bytes.
932*0b57cec5SDimitry Andric     if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
933*0b57cec5SDimitry Andric       NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
934*0b57cec5SDimitry Andric       BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
935*0b57cec5SDimitry Andric           .addReg(Reg);
936*0b57cec5SDimitry Andric       if (&SetBRC == &X86::GR32RegClass)
937*0b57cec5SDimitry Andric         return NewReg;
938*0b57cec5SDimitry Andric       Reg = NewReg;
939*0b57cec5SDimitry Andric       OrigRegSize = 4;
940*0b57cec5SDimitry Andric     }
941*0b57cec5SDimitry Andric 
942*0b57cec5SDimitry Andric     NewReg = MRI->createVirtualRegister(&SetBRC);
943*0b57cec5SDimitry Andric     if (OrigRegSize < TargetRegSize) {
944*0b57cec5SDimitry Andric       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
945*0b57cec5SDimitry Andric               NewReg)
946*0b57cec5SDimitry Andric           .addImm(0)
947*0b57cec5SDimitry Andric           .addReg(Reg)
948*0b57cec5SDimitry Andric           .addImm(SubRegIdx[OrigRegSize]);
949*0b57cec5SDimitry Andric     } else if (OrigRegSize > TargetRegSize) {
950*0b57cec5SDimitry Andric       if (TargetRegSize == 1 && !Subtarget->is64Bit()) {
951*0b57cec5SDimitry Andric         // Need to constrain the register class.
952*0b57cec5SDimitry Andric         MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass);
953*0b57cec5SDimitry Andric       }
954*0b57cec5SDimitry Andric 
955*0b57cec5SDimitry Andric       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY),
956*0b57cec5SDimitry Andric               NewReg)
957*0b57cec5SDimitry Andric           .addReg(Reg, 0, SubRegIdx[TargetRegSize]);
958*0b57cec5SDimitry Andric     } else {
959*0b57cec5SDimitry Andric       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
960*0b57cec5SDimitry Andric           .addReg(Reg);
961*0b57cec5SDimitry Andric     }
962*0b57cec5SDimitry Andric     return NewReg;
963*0b57cec5SDimitry Andric   };
964*0b57cec5SDimitry Andric 
965*0b57cec5SDimitry Andric   unsigned &CondReg = CondRegs[X86::COND_B];
966*0b57cec5SDimitry Andric   if (!CondReg)
967*0b57cec5SDimitry Andric     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
968*0b57cec5SDimitry Andric 
969*0b57cec5SDimitry Andric   // Adjust the condition to have the desired register width by zero-extending
970*0b57cec5SDimitry Andric   // as needed.
971*0b57cec5SDimitry Andric   // FIXME: We should use a better API to avoid the local reference and using a
972*0b57cec5SDimitry Andric   // different variable here.
973*0b57cec5SDimitry Andric   unsigned ExtCondReg = AdjustReg(CondReg);
974*0b57cec5SDimitry Andric 
975*0b57cec5SDimitry Andric   // Now we need to turn this into a bitmask. We do this by subtracting it from
976*0b57cec5SDimitry Andric   // zero.
977*0b57cec5SDimitry Andric   unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
978*0b57cec5SDimitry Andric   BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
979*0b57cec5SDimitry Andric   ZeroReg = AdjustReg(ZeroReg);
980*0b57cec5SDimitry Andric 
981*0b57cec5SDimitry Andric   unsigned Sub;
982*0b57cec5SDimitry Andric   switch (SetBI.getOpcode()) {
983*0b57cec5SDimitry Andric   case X86::SETB_C8r:
984*0b57cec5SDimitry Andric     Sub = X86::SUB8rr;
985*0b57cec5SDimitry Andric     break;
986*0b57cec5SDimitry Andric 
987*0b57cec5SDimitry Andric   case X86::SETB_C16r:
988*0b57cec5SDimitry Andric     Sub = X86::SUB16rr;
989*0b57cec5SDimitry Andric     break;
990*0b57cec5SDimitry Andric 
991*0b57cec5SDimitry Andric   case X86::SETB_C32r:
992*0b57cec5SDimitry Andric     Sub = X86::SUB32rr;
993*0b57cec5SDimitry Andric     break;
994*0b57cec5SDimitry Andric 
995*0b57cec5SDimitry Andric   case X86::SETB_C64r:
996*0b57cec5SDimitry Andric     Sub = X86::SUB64rr;
997*0b57cec5SDimitry Andric     break;
998*0b57cec5SDimitry Andric 
999*0b57cec5SDimitry Andric   default:
1000*0b57cec5SDimitry Andric     llvm_unreachable("Invalid SETB_C* opcode!");
1001*0b57cec5SDimitry Andric   }
1002*0b57cec5SDimitry Andric   unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
1003*0b57cec5SDimitry Andric   BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
1004*0b57cec5SDimitry Andric       .addReg(ZeroReg)
1005*0b57cec5SDimitry Andric       .addReg(ExtCondReg);
1006*0b57cec5SDimitry Andric   return RewriteToReg(ResultReg);
1007*0b57cec5SDimitry Andric }
1008*0b57cec5SDimitry Andric 
1009*0b57cec5SDimitry Andric void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
1010*0b57cec5SDimitry Andric                                             MachineBasicBlock::iterator TestPos,
1011*0b57cec5SDimitry Andric                                             DebugLoc TestLoc,
1012*0b57cec5SDimitry Andric                                             MachineInstr &SetCCI,
1013*0b57cec5SDimitry Andric                                             MachineOperand &FlagUse,
1014*0b57cec5SDimitry Andric                                             CondRegArray &CondRegs) {
1015*0b57cec5SDimitry Andric   X86::CondCode Cond = X86::getCondFromSETCC(SetCCI);
1016*0b57cec5SDimitry Andric   // Note that we can't usefully rewrite this to the inverse without complex
1017*0b57cec5SDimitry Andric   // analysis of the users of the setCC. Largely we rely on duplicates which
1018*0b57cec5SDimitry Andric   // could have been avoided already being avoided here.
1019*0b57cec5SDimitry Andric   unsigned &CondReg = CondRegs[Cond];
1020*0b57cec5SDimitry Andric   if (!CondReg)
1021*0b57cec5SDimitry Andric     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
1022*0b57cec5SDimitry Andric 
1023*0b57cec5SDimitry Andric   // Rewriting a register def is trivial: we just replace the register and
1024*0b57cec5SDimitry Andric   // remove the setcc.
1025*0b57cec5SDimitry Andric   if (!SetCCI.mayStore()) {
1026*0b57cec5SDimitry Andric     assert(SetCCI.getOperand(0).isReg() &&
1027*0b57cec5SDimitry Andric            "Cannot have a non-register defined operand to SETcc!");
1028*0b57cec5SDimitry Andric     MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg);
1029*0b57cec5SDimitry Andric     SetCCI.eraseFromParent();
1030*0b57cec5SDimitry Andric     return;
1031*0b57cec5SDimitry Andric   }
1032*0b57cec5SDimitry Andric 
1033*0b57cec5SDimitry Andric   // Otherwise, we need to emit a store.
1034*0b57cec5SDimitry Andric   auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(),
1035*0b57cec5SDimitry Andric                      SetCCI.getDebugLoc(), TII->get(X86::MOV8mr));
1036*0b57cec5SDimitry Andric   // Copy the address operands.
1037*0b57cec5SDimitry Andric   for (int i = 0; i < X86::AddrNumOperands; ++i)
1038*0b57cec5SDimitry Andric     MIB.add(SetCCI.getOperand(i));
1039*0b57cec5SDimitry Andric 
1040*0b57cec5SDimitry Andric   MIB.addReg(CondReg);
1041*0b57cec5SDimitry Andric 
1042*0b57cec5SDimitry Andric   MIB.setMemRefs(SetCCI.memoperands());
1043*0b57cec5SDimitry Andric 
1044*0b57cec5SDimitry Andric   SetCCI.eraseFromParent();
1045*0b57cec5SDimitry Andric   return;
1046*0b57cec5SDimitry Andric }
1047