xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the pass that finds instructions that can be
10 // re-written as LEA instructions in order to reduce pipeline delays.
11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86InstrInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineSizeOpts.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
28 using namespace llvm;
29 
30 #define FIXUPLEA_DESC "X86 LEA Fixup"
31 #define FIXUPLEA_NAME "x86-fixup-LEAs"
32 
33 #define DEBUG_TYPE FIXUPLEA_NAME
34 
35 STATISTIC(NumLEAs, "Number of LEA instructions created");
36 
37 namespace {
38 class FixupLEAPass : public MachineFunctionPass {
39   enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
40 
41   /// Given a machine register, look for the instruction
42   /// which writes it in the current basic block. If found,
43   /// try to replace it with an equivalent LEA instruction.
44   /// If replacement succeeds, then also process the newly created
45   /// instruction.
46   void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
47                     MachineBasicBlock &MBB);
48 
49   /// Given a memory access or LEA instruction
50   /// whose address mode uses a base and/or index register, look for
51   /// an opportunity to replace the instruction which sets the base or index
52   /// register with an equivalent LEA instruction.
53   void processInstruction(MachineBasicBlock::iterator &I,
54                           MachineBasicBlock &MBB);
55 
56   /// Given a LEA instruction which is unprofitable
57   /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58   void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
59                                     MachineBasicBlock &MBB);
60 
61   /// Given a LEA instruction which is unprofitable
62   /// on SNB+ try to replace it with other instructions.
63   /// According to Intel's Optimization Reference Manual:
64   /// " For LEA instructions with three source operands and some specific
65   ///   situations, instruction latency has increased to 3 cycles, and must
66   ///   dispatch via port 1:
67   /// - LEA that has all three source operands: base, index, and offset
68   /// - LEA that uses base and index registers where the base is EBP, RBP,
69   ///   or R13
70   /// - LEA that uses RIP relative addressing mode
71   /// - LEA that uses 16-bit addressing mode "
72   /// This function currently handles the first 2 cases only.
73   void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
74                                  MachineBasicBlock &MBB, bool OptIncDec);
75 
76   /// Look for LEAs that are really two address LEAs that we might be able to
77   /// turn into regular ADD instructions.
78   bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
79                      MachineBasicBlock &MBB, bool OptIncDec,
80                      bool UseLEAForSP) const;
81 
82   /// Look for and transform the sequence
83   ///     lea (reg1, reg2), reg3
84   ///     sub reg3, reg4
85   /// to
86   ///     sub reg1, reg4
87   ///     sub reg2, reg4
88   /// It can also optimize the sequence lea/add similarly.
89   bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
90 
91   /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92   /// the dest register of LEA instruction I.
93   MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
94                                             MachineBasicBlock &MBB) const;
95 
96   /// Check instructions between LeaI and AluI (exclusively).
97   /// Set BaseIndexDef to true if base or index register from LeaI is defined.
98   /// Set AluDestRef to true if the dest register of AluI is used or defined.
99   /// *KilledBase is set to the killed base register usage.
100   /// *KilledIndex is set to the killed index register usage.
101   void checkRegUsage(MachineBasicBlock::iterator &LeaI,
102                      MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
103                      bool &AluDestRef, MachineOperand **KilledBase,
104                      MachineOperand **KilledIndex) const;
105 
106   /// Determine if an instruction references a machine register
107   /// and, if so, whether it reads or writes the register.
108   RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
109 
110   /// Step backwards through a basic block, looking
111   /// for an instruction which writes a register within
112   /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
113   MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
114                                               MachineBasicBlock::iterator &I,
115                                               MachineBasicBlock &MBB);
116 
117   /// if an instruction can be converted to an
118   /// equivalent LEA, insert the new instruction into the basic block
119   /// and return a pointer to it. Otherwise, return zero.
120   MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
121                                    MachineBasicBlock::iterator &MBBI) const;
122 
123 public:
124   static char ID;
125 
126   StringRef getPassName() const override { return FIXUPLEA_DESC; }
127 
128   FixupLEAPass() : MachineFunctionPass(ID) { }
129 
130   /// Loop over all of the basic blocks,
131   /// replacing instructions by equivalent LEA instructions
132   /// if needed and when possible.
133   bool runOnMachineFunction(MachineFunction &MF) override;
134 
135   // This pass runs after regalloc and doesn't support VReg operands.
136   MachineFunctionProperties getRequiredProperties() const override {
137     return MachineFunctionProperties().set(
138         MachineFunctionProperties::Property::NoVRegs);
139   }
140 
141   void getAnalysisUsage(AnalysisUsage &AU) const override {
142     AU.addRequired<ProfileSummaryInfoWrapperPass>();
143     AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
144     MachineFunctionPass::getAnalysisUsage(AU);
145   }
146 
147 private:
148   TargetSchedModel TSM;
149   const X86InstrInfo *TII = nullptr;
150   const X86RegisterInfo *TRI = nullptr;
151 };
152 }
153 
154 char FixupLEAPass::ID = 0;
155 
156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
157 
158 MachineInstr *
159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
160                                  MachineBasicBlock::iterator &MBBI) const {
161   MachineInstr &MI = *MBBI;
162   switch (MI.getOpcode()) {
163   case X86::MOV32rr:
164   case X86::MOV64rr: {
165     const MachineOperand &Src = MI.getOperand(1);
166     const MachineOperand &Dest = MI.getOperand(0);
167     MachineInstr *NewMI =
168         BuildMI(MBB, MBBI, MI.getDebugLoc(),
169                 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
170                                                         : X86::LEA64r))
171             .add(Dest)
172             .add(Src)
173             .addImm(1)
174             .addReg(0)
175             .addImm(0)
176             .addReg(0);
177     return NewMI;
178   }
179   }
180 
181   if (!MI.isConvertibleTo3Addr())
182     return nullptr;
183 
184   switch (MI.getOpcode()) {
185   default:
186     // Only convert instructions that we've verified are safe.
187     return nullptr;
188   case X86::ADD64ri32:
189   case X86::ADD64ri8:
190   case X86::ADD64ri32_DB:
191   case X86::ADD64ri8_DB:
192   case X86::ADD32ri:
193   case X86::ADD32ri8:
194   case X86::ADD32ri_DB:
195   case X86::ADD32ri8_DB:
196     if (!MI.getOperand(2).isImm()) {
197       // convertToThreeAddress will call getImm()
198       // which requires isImm() to be true
199       return nullptr;
200     }
201     break;
202   case X86::SHL64ri:
203   case X86::SHL32ri:
204   case X86::INC64r:
205   case X86::INC32r:
206   case X86::DEC64r:
207   case X86::DEC32r:
208   case X86::ADD64rr:
209   case X86::ADD64rr_DB:
210   case X86::ADD32rr:
211   case X86::ADD32rr_DB:
212     // These instructions are all fine to convert.
213     break;
214   }
215   MachineFunction::iterator MFI = MBB.getIterator();
216   return TII->convertToThreeAddress(MFI, MI, nullptr);
217 }
218 
219 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
220 
221 static bool isLEA(unsigned Opcode) {
222   return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
223          Opcode == X86::LEA64_32r;
224 }
225 
226 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
227   if (skipFunction(MF.getFunction()))
228     return false;
229 
230   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
231   bool IsSlowLEA = ST.slowLEA();
232   bool IsSlow3OpsLEA = ST.slow3OpsLEA();
233   bool LEAUsesAG = ST.LEAusesAG();
234 
235   bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
236   bool UseLEAForSP = ST.useLeaForSP();
237 
238   TSM.init(&ST);
239   TII = ST.getInstrInfo();
240   TRI = ST.getRegisterInfo();
241   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
242   auto *MBFI = (PSI && PSI->hasProfileSummary())
243                    ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
244                    : nullptr;
245 
246   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
247   for (MachineBasicBlock &MBB : MF) {
248     // First pass. Try to remove or optimize existing LEAs.
249     bool OptIncDecPerBB =
250         OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
251     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
252       if (!isLEA(I->getOpcode()))
253         continue;
254 
255       if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
256         continue;
257 
258       if (IsSlowLEA)
259         processInstructionForSlowLEA(I, MBB);
260       else if (IsSlow3OpsLEA)
261         processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
262     }
263 
264     // Second pass for creating LEAs. This may reverse some of the
265     // transformations above.
266     if (LEAUsesAG) {
267       for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
268         processInstruction(I, MBB);
269     }
270   }
271 
272   LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
273 
274   return true;
275 }
276 
277 FixupLEAPass::RegUsageState
278 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
279   RegUsageState RegUsage = RU_NotUsed;
280   MachineInstr &MI = *I;
281 
282   for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
283     MachineOperand &opnd = MI.getOperand(i);
284     if (opnd.isReg() && opnd.getReg() == p.getReg()) {
285       if (opnd.isDef())
286         return RU_Write;
287       RegUsage = RU_Read;
288     }
289   }
290   return RegUsage;
291 }
292 
293 /// getPreviousInstr - Given a reference to an instruction in a basic
294 /// block, return a reference to the previous instruction in the block,
295 /// wrapping around to the last instruction of the block if the block
296 /// branches to itself.
297 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
298                                     MachineBasicBlock &MBB) {
299   if (I == MBB.begin()) {
300     if (MBB.isPredecessor(&MBB)) {
301       I = --MBB.end();
302       return true;
303     } else
304       return false;
305   }
306   --I;
307   return true;
308 }
309 
310 MachineBasicBlock::iterator
311 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
312                               MachineBasicBlock &MBB) {
313   int InstrDistance = 1;
314   MachineBasicBlock::iterator CurInst;
315   static const int INSTR_DISTANCE_THRESHOLD = 5;
316 
317   CurInst = I;
318   bool Found;
319   Found = getPreviousInstr(CurInst, MBB);
320   while (Found && I != CurInst) {
321     if (CurInst->isCall() || CurInst->isInlineAsm())
322       break;
323     if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
324       break; // too far back to make a difference
325     if (usesRegister(p, CurInst) == RU_Write) {
326       return CurInst;
327     }
328     InstrDistance += TSM.computeInstrLatency(&*CurInst);
329     Found = getPreviousInstr(CurInst, MBB);
330   }
331   return MachineBasicBlock::iterator();
332 }
333 
334 static inline bool isInefficientLEAReg(unsigned Reg) {
335   return Reg == X86::EBP || Reg == X86::RBP ||
336          Reg == X86::R13D || Reg == X86::R13;
337 }
338 
339 /// Returns true if this LEA uses base an index registers, and the base register
340 /// is known to be inefficient for the subtarget.
341 // TODO: use a variant scheduling class to model the latency profile
342 // of LEA instructions, and implement this logic as a scheduling predicate.
343 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
344                                             const MachineOperand &Index) {
345   return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
346          Index.getReg() != X86::NoRegister;
347 }
348 
349 static inline bool hasLEAOffset(const MachineOperand &Offset) {
350   return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
351 }
352 
353 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
354   switch (LEAOpcode) {
355   default:
356     llvm_unreachable("Unexpected LEA instruction");
357   case X86::LEA32r:
358   case X86::LEA64_32r:
359     return X86::ADD32rr;
360   case X86::LEA64r:
361     return X86::ADD64rr;
362   }
363 }
364 
365 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
366   switch (LEAOpcode) {
367   default:
368     llvm_unreachable("Unexpected LEA instruction");
369   case X86::LEA32r:
370   case X86::LEA64_32r:
371     return X86::SUB32rr;
372   case X86::LEA64r:
373     return X86::SUB64rr;
374   }
375 }
376 
377 static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
378                                        const MachineOperand &Offset) {
379   bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
380   switch (LEAOpcode) {
381   default:
382     llvm_unreachable("Unexpected LEA instruction");
383   case X86::LEA32r:
384   case X86::LEA64_32r:
385     return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
386   case X86::LEA64r:
387     return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
388   }
389 }
390 
391 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
392   switch (LEAOpcode) {
393   default:
394     llvm_unreachable("Unexpected LEA instruction");
395   case X86::LEA32r:
396   case X86::LEA64_32r:
397     return IsINC ? X86::INC32r : X86::DEC32r;
398   case X86::LEA64r:
399     return IsINC ? X86::INC64r : X86::DEC64r;
400   }
401 }
402 
403 MachineBasicBlock::iterator
404 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
405                             MachineBasicBlock &MBB) const {
406   const int InstrDistanceThreshold = 5;
407   int InstrDistance = 1;
408   MachineBasicBlock::iterator CurInst = std::next(I);
409 
410   unsigned LEAOpcode = I->getOpcode();
411   unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
412   unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
413   Register DestReg = I->getOperand(0).getReg();
414 
415   while (CurInst != MBB.end()) {
416     if (CurInst->isCall() || CurInst->isInlineAsm())
417       break;
418     if (InstrDistance > InstrDistanceThreshold)
419       break;
420 
421     // Check if the lea dest register is used in an add/sub instruction only.
422     for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
423       MachineOperand &Opnd = CurInst->getOperand(I);
424       if (Opnd.isReg()) {
425         if (Opnd.getReg() == DestReg) {
426           if (Opnd.isDef() || !Opnd.isKill())
427             return MachineBasicBlock::iterator();
428 
429           unsigned AluOpcode = CurInst->getOpcode();
430           if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
431             return MachineBasicBlock::iterator();
432 
433           MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
434           MachineOperand AluDest = CurInst->getOperand(0);
435           if (Opnd2.getReg() != AluDest.getReg())
436             return MachineBasicBlock::iterator();
437 
438           // X - (Y + Z) may generate different flags than (X - Y) - Z when
439           // there is overflow. So we can't change the alu instruction if the
440           // flags register is live.
441           if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
442             return MachineBasicBlock::iterator();
443 
444           return CurInst;
445         }
446         if (TRI->regsOverlap(DestReg, Opnd.getReg()))
447           return MachineBasicBlock::iterator();
448       }
449     }
450 
451     InstrDistance++;
452     ++CurInst;
453   }
454   return MachineBasicBlock::iterator();
455 }
456 
457 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
458                                  MachineBasicBlock::iterator &AluI,
459                                  bool &BaseIndexDef, bool &AluDestRef,
460                                  MachineOperand **KilledBase,
461                                  MachineOperand **KilledIndex) const {
462   BaseIndexDef = AluDestRef = false;
463   *KilledBase = *KilledIndex = nullptr;
464   Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
465   Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
466   Register AluDestReg = AluI->getOperand(0).getReg();
467 
468   MachineBasicBlock::iterator CurInst = std::next(LeaI);
469   while (CurInst != AluI) {
470     for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
471       MachineOperand &Opnd = CurInst->getOperand(I);
472       if (!Opnd.isReg())
473         continue;
474       Register Reg = Opnd.getReg();
475       if (TRI->regsOverlap(Reg, AluDestReg))
476         AluDestRef = true;
477       if (TRI->regsOverlap(Reg, BaseReg)) {
478         if (Opnd.isDef())
479           BaseIndexDef = true;
480         else if (Opnd.isKill())
481           *KilledBase = &Opnd;
482       }
483       if (TRI->regsOverlap(Reg, IndexReg)) {
484         if (Opnd.isDef())
485           BaseIndexDef = true;
486         else if (Opnd.isKill())
487           *KilledIndex = &Opnd;
488       }
489     }
490     ++CurInst;
491   }
492 }
493 
494 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
495                              MachineBasicBlock &MBB) const {
496   // Look for an add/sub instruction which uses the result of lea.
497   MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
498   if (AluI == MachineBasicBlock::iterator())
499     return false;
500 
501   // Check if there are any related register usage between lea and alu.
502   bool BaseIndexDef, AluDestRef;
503   MachineOperand *KilledBase, *KilledIndex;
504   checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);
505 
506   MachineBasicBlock::iterator InsertPos = AluI;
507   if (BaseIndexDef) {
508     if (AluDestRef)
509       return false;
510     InsertPos = I;
511     KilledBase = KilledIndex = nullptr;
512   }
513 
514   // Check if there are same registers.
515   Register AluDestReg = AluI->getOperand(0).getReg();
516   Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
517   Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
518   if (I->getOpcode() == X86::LEA64_32r) {
519     BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
520     IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
521   }
522   if (AluDestReg == IndexReg) {
523     if (BaseReg == IndexReg)
524       return false;
525     std::swap(BaseReg, IndexReg);
526     std::swap(KilledBase, KilledIndex);
527   }
528   if (BaseReg == IndexReg)
529     KilledBase = nullptr;
530 
531   // Now it's safe to change instructions.
532   MachineInstr *NewMI1, *NewMI2;
533   unsigned NewOpcode = AluI->getOpcode();
534   NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
535                    AluDestReg)
536                .addReg(AluDestReg, RegState::Kill)
537                .addReg(BaseReg, KilledBase ? RegState::Kill : 0);
538   NewMI1->addRegisterDead(X86::EFLAGS, TRI);
539   NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
540                    AluDestReg)
541                .addReg(AluDestReg, RegState::Kill)
542                .addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
543   NewMI2->addRegisterDead(X86::EFLAGS, TRI);
544 
545   // Clear the old Kill flags.
546   if (KilledBase)
547     KilledBase->setIsKill(false);
548   if (KilledIndex)
549     KilledIndex->setIsKill(false);
550 
551   MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1);
552   MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
553   MBB.erase(I);
554   MBB.erase(AluI);
555   I = NewMI1;
556   return true;
557 }
558 
559 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
560                                  MachineBasicBlock &MBB, bool OptIncDec,
561                                  bool UseLEAForSP) const {
562   MachineInstr &MI = *I;
563 
564   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
565   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
566   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
567   const MachineOperand &Disp =    MI.getOperand(1 + X86::AddrDisp);
568   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
569 
570   if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
571       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
572           MachineBasicBlock::LQR_Dead)
573     return false;
574 
575   Register DestReg = MI.getOperand(0).getReg();
576   Register BaseReg = Base.getReg();
577   Register IndexReg = Index.getReg();
578 
579   // Don't change stack adjustment LEAs.
580   if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
581     return false;
582 
583   // LEA64_32 has 64-bit operands but 32-bit result.
584   if (MI.getOpcode() == X86::LEA64_32r) {
585     if (BaseReg != 0)
586       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
587     if (IndexReg != 0)
588       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
589   }
590 
591   MachineInstr *NewMI = nullptr;
592 
593   // Case 1.
594   // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
595   // which can be turned into add %reg2, %reg1
596   if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
597       (DestReg == BaseReg || DestReg == IndexReg)) {
598     unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
599     if (DestReg != BaseReg)
600       std::swap(BaseReg, IndexReg);
601 
602     if (MI.getOpcode() == X86::LEA64_32r) {
603       // TODO: Do we need the super register implicit use?
604       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
605         .addReg(BaseReg).addReg(IndexReg)
606         .addReg(Base.getReg(), RegState::Implicit)
607         .addReg(Index.getReg(), RegState::Implicit);
608     } else {
609       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
610         .addReg(BaseReg).addReg(IndexReg);
611     }
612   } else if (DestReg == BaseReg && IndexReg == 0) {
613     // Case 2.
614     // This is an LEA with only a base register and a displacement,
615     // We can use ADDri or INC/DEC.
616 
617     // Does this LEA have one these forms:
618     // lea  %reg, 1(%reg)
619     // lea  %reg, -1(%reg)
620     if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
621       bool IsINC = Disp.getImm() == 1;
622       unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
623 
624       if (MI.getOpcode() == X86::LEA64_32r) {
625         // TODO: Do we need the super register implicit use?
626         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
627           .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
628       } else {
629         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
630           .addReg(BaseReg);
631       }
632     } else {
633       unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
634       if (MI.getOpcode() == X86::LEA64_32r) {
635         // TODO: Do we need the super register implicit use?
636         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
637           .addReg(BaseReg).addImm(Disp.getImm())
638           .addReg(Base.getReg(), RegState::Implicit);
639       } else {
640         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
641           .addReg(BaseReg).addImm(Disp.getImm());
642       }
643     }
644   } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
645     // Case 3.
646     // Look for and transform the sequence
647     //     lea (reg1, reg2), reg3
648     //     sub reg3, reg4
649     return optLEAALU(I, MBB);
650   } else
651     return false;
652 
653   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
654   MBB.erase(I);
655   I = NewMI;
656   return true;
657 }
658 
659 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
660                                       MachineBasicBlock &MBB) {
661   // Process a load, store, or LEA instruction.
662   MachineInstr &MI = *I;
663   const MCInstrDesc &Desc = MI.getDesc();
664   int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
665   if (AddrOffset >= 0) {
666     AddrOffset += X86II::getOperandBias(Desc);
667     MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
668     if (p.isReg() && p.getReg() != X86::ESP) {
669       seekLEAFixup(p, I, MBB);
670     }
671     MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
672     if (q.isReg() && q.getReg() != X86::ESP) {
673       seekLEAFixup(q, I, MBB);
674     }
675   }
676 }
677 
678 void FixupLEAPass::seekLEAFixup(MachineOperand &p,
679                                 MachineBasicBlock::iterator &I,
680                                 MachineBasicBlock &MBB) {
681   MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
682   if (MBI != MachineBasicBlock::iterator()) {
683     MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
684     if (NewMI) {
685       ++NumLEAs;
686       LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
687       // now to replace with an equivalent LEA...
688       LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
689       MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
690       MBB.erase(MBI);
691       MachineBasicBlock::iterator J =
692           static_cast<MachineBasicBlock::iterator>(NewMI);
693       processInstruction(J, MBB);
694     }
695   }
696 }
697 
698 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
699                                                 MachineBasicBlock &MBB) {
700   MachineInstr &MI = *I;
701   const unsigned Opcode = MI.getOpcode();
702 
703   const MachineOperand &Dst =     MI.getOperand(0);
704   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
705   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
706   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
707   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
708   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
709 
710   if (Segment.getReg() != 0 || !Offset.isImm() ||
711       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
712           MachineBasicBlock::LQR_Dead)
713     return;
714   const Register DstR = Dst.getReg();
715   const Register SrcR1 = Base.getReg();
716   const Register SrcR2 = Index.getReg();
717   if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
718     return;
719   if (Scale.getImm() > 1)
720     return;
721   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
722   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
723   MachineInstr *NewMI = nullptr;
724   // Make ADD instruction for two registers writing to LEA's destination
725   if (SrcR1 != 0 && SrcR2 != 0) {
726     const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
727     const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
728     NewMI =
729         BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
730     LLVM_DEBUG(NewMI->dump(););
731   }
732   // Make ADD instruction for immediate
733   if (Offset.getImm() != 0) {
734     const MCInstrDesc &ADDri =
735         TII->get(getADDriFromLEA(Opcode, Offset));
736     const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
737     NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
738                 .add(SrcR)
739                 .addImm(Offset.getImm());
740     LLVM_DEBUG(NewMI->dump(););
741   }
742   if (NewMI) {
743     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
744     MBB.erase(I);
745     I = NewMI;
746   }
747 }
748 
749 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
750                                              MachineBasicBlock &MBB,
751                                              bool OptIncDec) {
752   MachineInstr &MI = *I;
753   const unsigned LEAOpcode = MI.getOpcode();
754 
755   const MachineOperand &Dest =    MI.getOperand(0);
756   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
757   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
758   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
759   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
760   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
761 
762   if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
763       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
764           MachineBasicBlock::LQR_Dead ||
765       Segment.getReg() != X86::NoRegister)
766     return;
767 
768   Register DestReg = Dest.getReg();
769   Register BaseReg = Base.getReg();
770   Register IndexReg = Index.getReg();
771 
772   if (MI.getOpcode() == X86::LEA64_32r) {
773     if (BaseReg != 0)
774       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
775     if (IndexReg != 0)
776       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
777   }
778 
779   bool IsScale1 = Scale.getImm() == 1;
780   bool IsInefficientBase = isInefficientLEAReg(BaseReg);
781   bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
782 
783   // Skip these cases since it takes more than 2 instructions
784   // to replace the LEA instruction.
785   if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
786     return;
787 
788   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
789   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
790 
791   MachineInstr *NewMI = nullptr;
792 
793   // First try to replace LEA with one or two (for the 3-op LEA case)
794   // add instructions:
795   // 1.lea (%base,%index,1), %base => add %index,%base
796   // 2.lea (%base,%index,1), %index => add %base,%index
797   if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
798     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
799     if (DestReg != BaseReg)
800       std::swap(BaseReg, IndexReg);
801 
802     if (MI.getOpcode() == X86::LEA64_32r) {
803       // TODO: Do we need the super register implicit use?
804       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
805                   .addReg(BaseReg)
806                   .addReg(IndexReg)
807                   .addReg(Base.getReg(), RegState::Implicit)
808                   .addReg(Index.getReg(), RegState::Implicit);
809     } else {
810       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
811                   .addReg(BaseReg)
812                   .addReg(IndexReg);
813     }
814   } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
815     // If the base is inefficient try switching the index and base operands,
816     // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
817     // lea offset(%base,%index,scale),%dst =>
818     // lea (%base,%index,scale); add offset,%dst
819     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
820                 .add(Dest)
821                 .add(IsInefficientBase ? Index : Base)
822                 .add(Scale)
823                 .add(IsInefficientBase ? Base : Index)
824                 .addImm(0)
825                 .add(Segment);
826     LLVM_DEBUG(NewMI->dump(););
827   }
828 
829   // If either replacement succeeded above, add the offset if needed, then
830   // replace the instruction.
831   if (NewMI) {
832     // Create ADD instruction for the Offset in case of 3-Ops LEA.
833     if (hasLEAOffset(Offset)) {
834       if (OptIncDec && Offset.isImm() &&
835           (Offset.getImm() == 1 || Offset.getImm() == -1)) {
836         unsigned NewOpc =
837             getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
838         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
839                     .addReg(DestReg);
840         LLVM_DEBUG(NewMI->dump(););
841       } else {
842         unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
843         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
844                     .addReg(DestReg)
845                     .add(Offset);
846         LLVM_DEBUG(NewMI->dump(););
847       }
848     }
849 
850     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
851     MBB.erase(I);
852     I = NewMI;
853     return;
854   }
855 
856   // Handle the rest of the cases with inefficient base register:
857   assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
858   assert(IsInefficientBase && "efficient base should be handled already!");
859 
860   // FIXME: Handle LEA64_32r.
861   if (LEAOpcode == X86::LEA64_32r)
862     return;
863 
864   // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
865   if (IsScale1 && !hasLEAOffset(Offset)) {
866     bool BIK = Base.isKill() && BaseReg != IndexReg;
867     TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
868     LLVM_DEBUG(MI.getPrevNode()->dump(););
869 
870     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
871     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
872                 .addReg(DestReg)
873                 .add(Index);
874     LLVM_DEBUG(NewMI->dump(););
875 
876     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
877     MBB.erase(I);
878     I = NewMI;
879     return;
880   }
881 
882   // lea offset(%base,%index,scale), %dst =>
883   // lea offset( ,%index,scale), %dst; add %base,%dst
884   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
885               .add(Dest)
886               .addReg(0)
887               .add(Scale)
888               .add(Index)
889               .add(Offset)
890               .add(Segment);
891   LLVM_DEBUG(NewMI->dump(););
892 
893   unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
894   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
895               .addReg(DestReg)
896               .add(Base);
897   LLVM_DEBUG(NewMI->dump(););
898 
899   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
900   MBB.erase(I);
901   I = NewMI;
902 }
903