xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp (revision 7c20397b724a55001c2054fa133a768e9d06eb1c)
1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the pass that finds instructions that can be
10 // re-written as LEA instructions in order to reduce pipeline delays.
11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86InstrInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineSizeOpts.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
28 using namespace llvm;
29 
30 #define FIXUPLEA_DESC "X86 LEA Fixup"
31 #define FIXUPLEA_NAME "x86-fixup-LEAs"
32 
33 #define DEBUG_TYPE FIXUPLEA_NAME
34 
35 STATISTIC(NumLEAs, "Number of LEA instructions created");
36 
37 namespace {
38 class FixupLEAPass : public MachineFunctionPass {
39   enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
40 
41   /// Given a machine register, look for the instruction
42   /// which writes it in the current basic block. If found,
43   /// try to replace it with an equivalent LEA instruction.
44   /// If replacement succeeds, then also process the newly created
45   /// instruction.
46   void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
47                     MachineBasicBlock &MBB);
48 
49   /// Given a memory access or LEA instruction
50   /// whose address mode uses a base and/or index register, look for
51   /// an opportunity to replace the instruction which sets the base or index
52   /// register with an equivalent LEA instruction.
53   void processInstruction(MachineBasicBlock::iterator &I,
54                           MachineBasicBlock &MBB);
55 
56   /// Given a LEA instruction which is unprofitable
57   /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58   void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
59                                     MachineBasicBlock &MBB);
60 
61   /// Given a LEA instruction which is unprofitable
62   /// on SNB+ try to replace it with other instructions.
63   /// According to Intel's Optimization Reference Manual:
64   /// " For LEA instructions with three source operands and some specific
65   ///   situations, instruction latency has increased to 3 cycles, and must
66   ///   dispatch via port 1:
67   /// - LEA that has all three source operands: base, index, and offset
68   /// - LEA that uses base and index registers where the base is EBP, RBP,
69   ///   or R13
70   /// - LEA that uses RIP relative addressing mode
71   /// - LEA that uses 16-bit addressing mode "
72   /// This function currently handles the first 2 cases only.
73   void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
74                                  MachineBasicBlock &MBB, bool OptIncDec);
75 
76   /// Look for LEAs that are really two address LEAs that we might be able to
77   /// turn into regular ADD instructions.
78   bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
79                      MachineBasicBlock &MBB, bool OptIncDec,
80                      bool UseLEAForSP) const;
81 
82   /// Look for and transform the sequence
83   ///     lea (reg1, reg2), reg3
84   ///     sub reg3, reg4
85   /// to
86   ///     sub reg1, reg4
87   ///     sub reg2, reg4
88   /// It can also optimize the sequence lea/add similarly.
89   bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
90 
91   /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92   /// the dest register of LEA instruction I.
93   MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
94                                             MachineBasicBlock &MBB) const;
95 
96   /// Check instructions between LeaI and AluI (exclusively).
97   /// Set BaseIndexDef to true if base or index register from LeaI is defined.
98   /// Set AluDestRef to true if the dest register of AluI is used or defined.
99   /// *KilledBase is set to the killed base register usage.
100   /// *KilledIndex is set to the killed index register usage.
101   void checkRegUsage(MachineBasicBlock::iterator &LeaI,
102                      MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
103                      bool &AluDestRef, MachineOperand **KilledBase,
104                      MachineOperand **KilledIndex) const;
105 
106   /// Determine if an instruction references a machine register
107   /// and, if so, whether it reads or writes the register.
108   RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
109 
110   /// Step backwards through a basic block, looking
111   /// for an instruction which writes a register within
112   /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
113   MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
114                                               MachineBasicBlock::iterator &I,
115                                               MachineBasicBlock &MBB);
116 
117   /// if an instruction can be converted to an
118   /// equivalent LEA, insert the new instruction into the basic block
119   /// and return a pointer to it. Otherwise, return zero.
120   MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
121                                    MachineBasicBlock::iterator &MBBI) const;
122 
123 public:
124   static char ID;
125 
126   StringRef getPassName() const override { return FIXUPLEA_DESC; }
127 
128   FixupLEAPass() : MachineFunctionPass(ID) { }
129 
130   /// Loop over all of the basic blocks,
131   /// replacing instructions by equivalent LEA instructions
132   /// if needed and when possible.
133   bool runOnMachineFunction(MachineFunction &MF) override;
134 
135   // This pass runs after regalloc and doesn't support VReg operands.
136   MachineFunctionProperties getRequiredProperties() const override {
137     return MachineFunctionProperties().set(
138         MachineFunctionProperties::Property::NoVRegs);
139   }
140 
141   void getAnalysisUsage(AnalysisUsage &AU) const override {
142     AU.addRequired<ProfileSummaryInfoWrapperPass>();
143     AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
144     MachineFunctionPass::getAnalysisUsage(AU);
145   }
146 
147 private:
148   TargetSchedModel TSM;
149   const X86InstrInfo *TII = nullptr;
150   const X86RegisterInfo *TRI = nullptr;
151 };
152 }
153 
154 char FixupLEAPass::ID = 0;
155 
156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
157 
158 MachineInstr *
159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
160                                  MachineBasicBlock::iterator &MBBI) const {
161   MachineInstr &MI = *MBBI;
162   switch (MI.getOpcode()) {
163   case X86::MOV32rr:
164   case X86::MOV64rr: {
165     const MachineOperand &Src = MI.getOperand(1);
166     const MachineOperand &Dest = MI.getOperand(0);
167     MachineInstr *NewMI =
168         BuildMI(MBB, MBBI, MI.getDebugLoc(),
169                 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
170                                                         : X86::LEA64r))
171             .add(Dest)
172             .add(Src)
173             .addImm(1)
174             .addReg(0)
175             .addImm(0)
176             .addReg(0);
177     return NewMI;
178   }
179   }
180 
181   if (!MI.isConvertibleTo3Addr())
182     return nullptr;
183 
184   switch (MI.getOpcode()) {
185   default:
186     // Only convert instructions that we've verified are safe.
187     return nullptr;
188   case X86::ADD64ri32:
189   case X86::ADD64ri8:
190   case X86::ADD64ri32_DB:
191   case X86::ADD64ri8_DB:
192   case X86::ADD32ri:
193   case X86::ADD32ri8:
194   case X86::ADD32ri_DB:
195   case X86::ADD32ri8_DB:
196     if (!MI.getOperand(2).isImm()) {
197       // convertToThreeAddress will call getImm()
198       // which requires isImm() to be true
199       return nullptr;
200     }
201     break;
202   case X86::SHL64ri:
203   case X86::SHL32ri:
204   case X86::INC64r:
205   case X86::INC32r:
206   case X86::DEC64r:
207   case X86::DEC32r:
208   case X86::ADD64rr:
209   case X86::ADD64rr_DB:
210   case X86::ADD32rr:
211   case X86::ADD32rr_DB:
212     // These instructions are all fine to convert.
213     break;
214   }
215   return TII->convertToThreeAddress(MI, nullptr, nullptr);
216 }
217 
218 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
219 
220 static bool isLEA(unsigned Opcode) {
221   return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
222          Opcode == X86::LEA64_32r;
223 }
224 
225 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
226   if (skipFunction(MF.getFunction()))
227     return false;
228 
229   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
230   bool IsSlowLEA = ST.slowLEA();
231   bool IsSlow3OpsLEA = ST.slow3OpsLEA();
232   bool LEAUsesAG = ST.LEAusesAG();
233 
234   bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
235   bool UseLEAForSP = ST.useLeaForSP();
236 
237   TSM.init(&ST);
238   TII = ST.getInstrInfo();
239   TRI = ST.getRegisterInfo();
240   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
241   auto *MBFI = (PSI && PSI->hasProfileSummary())
242                    ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
243                    : nullptr;
244 
245   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
246   for (MachineBasicBlock &MBB : MF) {
247     // First pass. Try to remove or optimize existing LEAs.
248     bool OptIncDecPerBB =
249         OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
250     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
251       if (!isLEA(I->getOpcode()))
252         continue;
253 
254       if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
255         continue;
256 
257       if (IsSlowLEA)
258         processInstructionForSlowLEA(I, MBB);
259       else if (IsSlow3OpsLEA)
260         processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
261     }
262 
263     // Second pass for creating LEAs. This may reverse some of the
264     // transformations above.
265     if (LEAUsesAG) {
266       for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
267         processInstruction(I, MBB);
268     }
269   }
270 
271   LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
272 
273   return true;
274 }
275 
276 FixupLEAPass::RegUsageState
277 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
278   RegUsageState RegUsage = RU_NotUsed;
279   MachineInstr &MI = *I;
280 
281   for (const MachineOperand &MO : MI.operands()) {
282     if (MO.isReg() && MO.getReg() == p.getReg()) {
283       if (MO.isDef())
284         return RU_Write;
285       RegUsage = RU_Read;
286     }
287   }
288   return RegUsage;
289 }
290 
291 /// getPreviousInstr - Given a reference to an instruction in a basic
292 /// block, return a reference to the previous instruction in the block,
293 /// wrapping around to the last instruction of the block if the block
294 /// branches to itself.
295 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
296                                     MachineBasicBlock &MBB) {
297   if (I == MBB.begin()) {
298     if (MBB.isPredecessor(&MBB)) {
299       I = --MBB.end();
300       return true;
301     } else
302       return false;
303   }
304   --I;
305   return true;
306 }
307 
308 MachineBasicBlock::iterator
309 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
310                               MachineBasicBlock &MBB) {
311   int InstrDistance = 1;
312   MachineBasicBlock::iterator CurInst;
313   static const int INSTR_DISTANCE_THRESHOLD = 5;
314 
315   CurInst = I;
316   bool Found;
317   Found = getPreviousInstr(CurInst, MBB);
318   while (Found && I != CurInst) {
319     if (CurInst->isCall() || CurInst->isInlineAsm())
320       break;
321     if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
322       break; // too far back to make a difference
323     if (usesRegister(p, CurInst) == RU_Write) {
324       return CurInst;
325     }
326     InstrDistance += TSM.computeInstrLatency(&*CurInst);
327     Found = getPreviousInstr(CurInst, MBB);
328   }
329   return MachineBasicBlock::iterator();
330 }
331 
332 static inline bool isInefficientLEAReg(unsigned Reg) {
333   return Reg == X86::EBP || Reg == X86::RBP ||
334          Reg == X86::R13D || Reg == X86::R13;
335 }
336 
337 /// Returns true if this LEA uses base an index registers, and the base register
338 /// is known to be inefficient for the subtarget.
339 // TODO: use a variant scheduling class to model the latency profile
340 // of LEA instructions, and implement this logic as a scheduling predicate.
341 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
342                                             const MachineOperand &Index) {
343   return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
344          Index.getReg() != X86::NoRegister;
345 }
346 
347 static inline bool hasLEAOffset(const MachineOperand &Offset) {
348   return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
349 }
350 
351 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
352   switch (LEAOpcode) {
353   default:
354     llvm_unreachable("Unexpected LEA instruction");
355   case X86::LEA32r:
356   case X86::LEA64_32r:
357     return X86::ADD32rr;
358   case X86::LEA64r:
359     return X86::ADD64rr;
360   }
361 }
362 
363 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
364   switch (LEAOpcode) {
365   default:
366     llvm_unreachable("Unexpected LEA instruction");
367   case X86::LEA32r:
368   case X86::LEA64_32r:
369     return X86::SUB32rr;
370   case X86::LEA64r:
371     return X86::SUB64rr;
372   }
373 }
374 
375 static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
376                                        const MachineOperand &Offset) {
377   bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
378   switch (LEAOpcode) {
379   default:
380     llvm_unreachable("Unexpected LEA instruction");
381   case X86::LEA32r:
382   case X86::LEA64_32r:
383     return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
384   case X86::LEA64r:
385     return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
386   }
387 }
388 
389 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
390   switch (LEAOpcode) {
391   default:
392     llvm_unreachable("Unexpected LEA instruction");
393   case X86::LEA32r:
394   case X86::LEA64_32r:
395     return IsINC ? X86::INC32r : X86::DEC32r;
396   case X86::LEA64r:
397     return IsINC ? X86::INC64r : X86::DEC64r;
398   }
399 }
400 
401 MachineBasicBlock::iterator
402 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
403                             MachineBasicBlock &MBB) const {
404   const int InstrDistanceThreshold = 5;
405   int InstrDistance = 1;
406   MachineBasicBlock::iterator CurInst = std::next(I);
407 
408   unsigned LEAOpcode = I->getOpcode();
409   unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
410   unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
411   Register DestReg = I->getOperand(0).getReg();
412 
413   while (CurInst != MBB.end()) {
414     if (CurInst->isCall() || CurInst->isInlineAsm())
415       break;
416     if (InstrDistance > InstrDistanceThreshold)
417       break;
418 
419     // Check if the lea dest register is used in an add/sub instruction only.
420     for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
421       MachineOperand &Opnd = CurInst->getOperand(I);
422       if (Opnd.isReg()) {
423         if (Opnd.getReg() == DestReg) {
424           if (Opnd.isDef() || !Opnd.isKill())
425             return MachineBasicBlock::iterator();
426 
427           unsigned AluOpcode = CurInst->getOpcode();
428           if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
429             return MachineBasicBlock::iterator();
430 
431           MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
432           MachineOperand AluDest = CurInst->getOperand(0);
433           if (Opnd2.getReg() != AluDest.getReg())
434             return MachineBasicBlock::iterator();
435 
436           // X - (Y + Z) may generate different flags than (X - Y) - Z when
437           // there is overflow. So we can't change the alu instruction if the
438           // flags register is live.
439           if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
440             return MachineBasicBlock::iterator();
441 
442           return CurInst;
443         }
444         if (TRI->regsOverlap(DestReg, Opnd.getReg()))
445           return MachineBasicBlock::iterator();
446       }
447     }
448 
449     InstrDistance++;
450     ++CurInst;
451   }
452   return MachineBasicBlock::iterator();
453 }
454 
455 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
456                                  MachineBasicBlock::iterator &AluI,
457                                  bool &BaseIndexDef, bool &AluDestRef,
458                                  MachineOperand **KilledBase,
459                                  MachineOperand **KilledIndex) const {
460   BaseIndexDef = AluDestRef = false;
461   *KilledBase = *KilledIndex = nullptr;
462   Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
463   Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
464   Register AluDestReg = AluI->getOperand(0).getReg();
465 
466   MachineBasicBlock::iterator CurInst = std::next(LeaI);
467   while (CurInst != AluI) {
468     for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
469       MachineOperand &Opnd = CurInst->getOperand(I);
470       if (!Opnd.isReg())
471         continue;
472       Register Reg = Opnd.getReg();
473       if (TRI->regsOverlap(Reg, AluDestReg))
474         AluDestRef = true;
475       if (TRI->regsOverlap(Reg, BaseReg)) {
476         if (Opnd.isDef())
477           BaseIndexDef = true;
478         else if (Opnd.isKill())
479           *KilledBase = &Opnd;
480       }
481       if (TRI->regsOverlap(Reg, IndexReg)) {
482         if (Opnd.isDef())
483           BaseIndexDef = true;
484         else if (Opnd.isKill())
485           *KilledIndex = &Opnd;
486       }
487     }
488     ++CurInst;
489   }
490 }
491 
492 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
493                              MachineBasicBlock &MBB) const {
494   // Look for an add/sub instruction which uses the result of lea.
495   MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
496   if (AluI == MachineBasicBlock::iterator())
497     return false;
498 
499   // Check if there are any related register usage between lea and alu.
500   bool BaseIndexDef, AluDestRef;
501   MachineOperand *KilledBase, *KilledIndex;
502   checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);
503 
504   MachineBasicBlock::iterator InsertPos = AluI;
505   if (BaseIndexDef) {
506     if (AluDestRef)
507       return false;
508     InsertPos = I;
509     KilledBase = KilledIndex = nullptr;
510   }
511 
512   // Check if there are same registers.
513   Register AluDestReg = AluI->getOperand(0).getReg();
514   Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
515   Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
516   if (I->getOpcode() == X86::LEA64_32r) {
517     BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
518     IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
519   }
520   if (AluDestReg == IndexReg) {
521     if (BaseReg == IndexReg)
522       return false;
523     std::swap(BaseReg, IndexReg);
524     std::swap(KilledBase, KilledIndex);
525   }
526   if (BaseReg == IndexReg)
527     KilledBase = nullptr;
528 
529   // Now it's safe to change instructions.
530   MachineInstr *NewMI1, *NewMI2;
531   unsigned NewOpcode = AluI->getOpcode();
532   NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
533                    AluDestReg)
534                .addReg(AluDestReg, RegState::Kill)
535                .addReg(BaseReg, KilledBase ? RegState::Kill : 0);
536   NewMI1->addRegisterDead(X86::EFLAGS, TRI);
537   NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
538                    AluDestReg)
539                .addReg(AluDestReg, RegState::Kill)
540                .addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
541   NewMI2->addRegisterDead(X86::EFLAGS, TRI);
542 
543   // Clear the old Kill flags.
544   if (KilledBase)
545     KilledBase->setIsKill(false);
546   if (KilledIndex)
547     KilledIndex->setIsKill(false);
548 
549   MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1);
550   MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
551   MBB.erase(I);
552   MBB.erase(AluI);
553   I = NewMI1;
554   return true;
555 }
556 
557 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
558                                  MachineBasicBlock &MBB, bool OptIncDec,
559                                  bool UseLEAForSP) const {
560   MachineInstr &MI = *I;
561 
562   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
563   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
564   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
565   const MachineOperand &Disp =    MI.getOperand(1 + X86::AddrDisp);
566   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
567 
568   if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
569       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
570           MachineBasicBlock::LQR_Dead)
571     return false;
572 
573   Register DestReg = MI.getOperand(0).getReg();
574   Register BaseReg = Base.getReg();
575   Register IndexReg = Index.getReg();
576 
577   // Don't change stack adjustment LEAs.
578   if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
579     return false;
580 
581   // LEA64_32 has 64-bit operands but 32-bit result.
582   if (MI.getOpcode() == X86::LEA64_32r) {
583     if (BaseReg != 0)
584       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
585     if (IndexReg != 0)
586       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
587   }
588 
589   MachineInstr *NewMI = nullptr;
590 
591   // Case 1.
592   // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
593   // which can be turned into add %reg2, %reg1
594   if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
595       (DestReg == BaseReg || DestReg == IndexReg)) {
596     unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
597     if (DestReg != BaseReg)
598       std::swap(BaseReg, IndexReg);
599 
600     if (MI.getOpcode() == X86::LEA64_32r) {
601       // TODO: Do we need the super register implicit use?
602       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
603         .addReg(BaseReg).addReg(IndexReg)
604         .addReg(Base.getReg(), RegState::Implicit)
605         .addReg(Index.getReg(), RegState::Implicit);
606     } else {
607       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
608         .addReg(BaseReg).addReg(IndexReg);
609     }
610   } else if (DestReg == BaseReg && IndexReg == 0) {
611     // Case 2.
612     // This is an LEA with only a base register and a displacement,
613     // We can use ADDri or INC/DEC.
614 
615     // Does this LEA have one these forms:
616     // lea  %reg, 1(%reg)
617     // lea  %reg, -1(%reg)
618     if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
619       bool IsINC = Disp.getImm() == 1;
620       unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
621 
622       if (MI.getOpcode() == X86::LEA64_32r) {
623         // TODO: Do we need the super register implicit use?
624         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
625           .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
626       } else {
627         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
628           .addReg(BaseReg);
629       }
630     } else {
631       unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
632       if (MI.getOpcode() == X86::LEA64_32r) {
633         // TODO: Do we need the super register implicit use?
634         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
635           .addReg(BaseReg).addImm(Disp.getImm())
636           .addReg(Base.getReg(), RegState::Implicit);
637       } else {
638         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
639           .addReg(BaseReg).addImm(Disp.getImm());
640       }
641     }
642   } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
643     // Case 3.
644     // Look for and transform the sequence
645     //     lea (reg1, reg2), reg3
646     //     sub reg3, reg4
647     return optLEAALU(I, MBB);
648   } else
649     return false;
650 
651   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
652   MBB.erase(I);
653   I = NewMI;
654   return true;
655 }
656 
657 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
658                                       MachineBasicBlock &MBB) {
659   // Process a load, store, or LEA instruction.
660   MachineInstr &MI = *I;
661   const MCInstrDesc &Desc = MI.getDesc();
662   int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
663   if (AddrOffset >= 0) {
664     AddrOffset += X86II::getOperandBias(Desc);
665     MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
666     if (p.isReg() && p.getReg() != X86::ESP) {
667       seekLEAFixup(p, I, MBB);
668     }
669     MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
670     if (q.isReg() && q.getReg() != X86::ESP) {
671       seekLEAFixup(q, I, MBB);
672     }
673   }
674 }
675 
676 void FixupLEAPass::seekLEAFixup(MachineOperand &p,
677                                 MachineBasicBlock::iterator &I,
678                                 MachineBasicBlock &MBB) {
679   MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
680   if (MBI != MachineBasicBlock::iterator()) {
681     MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
682     if (NewMI) {
683       ++NumLEAs;
684       LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
685       // now to replace with an equivalent LEA...
686       LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
687       MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
688       MBB.erase(MBI);
689       MachineBasicBlock::iterator J =
690           static_cast<MachineBasicBlock::iterator>(NewMI);
691       processInstruction(J, MBB);
692     }
693   }
694 }
695 
696 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
697                                                 MachineBasicBlock &MBB) {
698   MachineInstr &MI = *I;
699   const unsigned Opcode = MI.getOpcode();
700 
701   const MachineOperand &Dst =     MI.getOperand(0);
702   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
703   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
704   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
705   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
706   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
707 
708   if (Segment.getReg() != 0 || !Offset.isImm() ||
709       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
710           MachineBasicBlock::LQR_Dead)
711     return;
712   const Register DstR = Dst.getReg();
713   const Register SrcR1 = Base.getReg();
714   const Register SrcR2 = Index.getReg();
715   if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
716     return;
717   if (Scale.getImm() > 1)
718     return;
719   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
720   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
721   MachineInstr *NewMI = nullptr;
722   // Make ADD instruction for two registers writing to LEA's destination
723   if (SrcR1 != 0 && SrcR2 != 0) {
724     const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
725     const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
726     NewMI =
727         BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
728     LLVM_DEBUG(NewMI->dump(););
729   }
730   // Make ADD instruction for immediate
731   if (Offset.getImm() != 0) {
732     const MCInstrDesc &ADDri =
733         TII->get(getADDriFromLEA(Opcode, Offset));
734     const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
735     NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
736                 .add(SrcR)
737                 .addImm(Offset.getImm());
738     LLVM_DEBUG(NewMI->dump(););
739   }
740   if (NewMI) {
741     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
742     MBB.erase(I);
743     I = NewMI;
744   }
745 }
746 
747 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
748                                              MachineBasicBlock &MBB,
749                                              bool OptIncDec) {
750   MachineInstr &MI = *I;
751   const unsigned LEAOpcode = MI.getOpcode();
752 
753   const MachineOperand &Dest =    MI.getOperand(0);
754   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
755   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
756   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
757   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
758   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
759 
760   if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
761       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
762           MachineBasicBlock::LQR_Dead ||
763       Segment.getReg() != X86::NoRegister)
764     return;
765 
766   Register DestReg = Dest.getReg();
767   Register BaseReg = Base.getReg();
768   Register IndexReg = Index.getReg();
769 
770   if (MI.getOpcode() == X86::LEA64_32r) {
771     if (BaseReg != 0)
772       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
773     if (IndexReg != 0)
774       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
775   }
776 
777   bool IsScale1 = Scale.getImm() == 1;
778   bool IsInefficientBase = isInefficientLEAReg(BaseReg);
779   bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
780 
781   // Skip these cases since it takes more than 2 instructions
782   // to replace the LEA instruction.
783   if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
784     return;
785 
786   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
787   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
788 
789   MachineInstr *NewMI = nullptr;
790 
791   // First try to replace LEA with one or two (for the 3-op LEA case)
792   // add instructions:
793   // 1.lea (%base,%index,1), %base => add %index,%base
794   // 2.lea (%base,%index,1), %index => add %base,%index
795   if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
796     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
797     if (DestReg != BaseReg)
798       std::swap(BaseReg, IndexReg);
799 
800     if (MI.getOpcode() == X86::LEA64_32r) {
801       // TODO: Do we need the super register implicit use?
802       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
803                   .addReg(BaseReg)
804                   .addReg(IndexReg)
805                   .addReg(Base.getReg(), RegState::Implicit)
806                   .addReg(Index.getReg(), RegState::Implicit);
807     } else {
808       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
809                   .addReg(BaseReg)
810                   .addReg(IndexReg);
811     }
812   } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
813     // If the base is inefficient try switching the index and base operands,
814     // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
815     // lea offset(%base,%index,scale),%dst =>
816     // lea (%base,%index,scale); add offset,%dst
817     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
818                 .add(Dest)
819                 .add(IsInefficientBase ? Index : Base)
820                 .add(Scale)
821                 .add(IsInefficientBase ? Base : Index)
822                 .addImm(0)
823                 .add(Segment);
824     LLVM_DEBUG(NewMI->dump(););
825   }
826 
827   // If either replacement succeeded above, add the offset if needed, then
828   // replace the instruction.
829   if (NewMI) {
830     // Create ADD instruction for the Offset in case of 3-Ops LEA.
831     if (hasLEAOffset(Offset)) {
832       if (OptIncDec && Offset.isImm() &&
833           (Offset.getImm() == 1 || Offset.getImm() == -1)) {
834         unsigned NewOpc =
835             getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
836         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
837                     .addReg(DestReg);
838         LLVM_DEBUG(NewMI->dump(););
839       } else {
840         unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
841         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
842                     .addReg(DestReg)
843                     .add(Offset);
844         LLVM_DEBUG(NewMI->dump(););
845       }
846     }
847 
848     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
849     MBB.erase(I);
850     I = NewMI;
851     return;
852   }
853 
854   // Handle the rest of the cases with inefficient base register:
855   assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
856   assert(IsInefficientBase && "efficient base should be handled already!");
857 
858   // FIXME: Handle LEA64_32r.
859   if (LEAOpcode == X86::LEA64_32r)
860     return;
861 
862   // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
863   if (IsScale1 && !hasLEAOffset(Offset)) {
864     bool BIK = Base.isKill() && BaseReg != IndexReg;
865     TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
866     LLVM_DEBUG(MI.getPrevNode()->dump(););
867 
868     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
869     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
870                 .addReg(DestReg)
871                 .add(Index);
872     LLVM_DEBUG(NewMI->dump(););
873 
874     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
875     MBB.erase(I);
876     I = NewMI;
877     return;
878   }
879 
880   // lea offset(%base,%index,scale), %dst =>
881   // lea offset( ,%index,scale), %dst; add %base,%dst
882   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
883               .add(Dest)
884               .addReg(0)
885               .add(Scale)
886               .add(Index)
887               .add(Offset)
888               .add(Segment);
889   LLVM_DEBUG(NewMI->dump(););
890 
891   unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
892   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
893               .addReg(DestReg)
894               .add(Base);
895   LLVM_DEBUG(NewMI->dump(););
896 
897   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
898   MBB.erase(I);
899   I = NewMI;
900 }
901