1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the pass that finds instructions that can be 10 // re-written as LEA instructions in order to reduce pipeline delays. 11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86InstrInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/Passes.h" 22 #include "llvm/CodeGen/TargetSchedule.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/raw_ostream.h" 25 using namespace llvm; 26 27 #define FIXUPLEA_DESC "X86 LEA Fixup" 28 #define FIXUPLEA_NAME "x86-fixup-LEAs" 29 30 #define DEBUG_TYPE FIXUPLEA_NAME 31 32 STATISTIC(NumLEAs, "Number of LEA instructions created"); 33 34 namespace { 35 class FixupLEAPass : public MachineFunctionPass { 36 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; 37 38 /// Given a machine register, look for the instruction 39 /// which writes it in the current basic block. If found, 40 /// try to replace it with an equivalent LEA instruction. 41 /// If replacement succeeds, then also process the newly created 42 /// instruction. 43 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, 44 MachineBasicBlock &MBB); 45 46 /// Given a memory access or LEA instruction 47 /// whose address mode uses a base and/or index register, look for 48 /// an opportunity to replace the instruction which sets the base or index 49 /// register with an equivalent LEA instruction. 50 void processInstruction(MachineBasicBlock::iterator &I, 51 MachineBasicBlock &MBB); 52 53 /// Given a LEA instruction which is unprofitable 54 /// on SlowLEA targets try to replace it with an equivalent ADD instruction. 55 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 56 MachineBasicBlock &MBB); 57 58 /// Given a LEA instruction which is unprofitable 59 /// on SNB+ try to replace it with other instructions. 60 /// According to Intel's Optimization Reference Manual: 61 /// " For LEA instructions with three source operands and some specific 62 /// situations, instruction latency has increased to 3 cycles, and must 63 /// dispatch via port 1: 64 /// - LEA that has all three source operands: base, index, and offset 65 /// - LEA that uses base and index registers where the base is EBP, RBP, 66 /// or R13 67 /// - LEA that uses RIP relative addressing mode 68 /// - LEA that uses 16-bit addressing mode " 69 /// This function currently handles the first 2 cases only. 70 MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI, 71 MachineBasicBlock &MBB); 72 73 /// Look for LEAs that are really two address LEAs that we might be able to 74 /// turn into regular ADD instructions. 75 bool optTwoAddrLEA(MachineBasicBlock::iterator &I, 76 MachineBasicBlock &MBB, bool OptIncDec, 77 bool UseLEAForSP) const; 78 79 /// Determine if an instruction references a machine register 80 /// and, if so, whether it reads or writes the register. 81 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); 82 83 /// Step backwards through a basic block, looking 84 /// for an instruction which writes a register within 85 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. 86 MachineBasicBlock::iterator searchBackwards(MachineOperand &p, 87 MachineBasicBlock::iterator &I, 88 MachineBasicBlock &MBB); 89 90 /// if an instruction can be converted to an 91 /// equivalent LEA, insert the new instruction into the basic block 92 /// and return a pointer to it. Otherwise, return zero. 93 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, 94 MachineBasicBlock::iterator &MBBI) const; 95 96 public: 97 static char ID; 98 99 StringRef getPassName() const override { return FIXUPLEA_DESC; } 100 101 FixupLEAPass() : MachineFunctionPass(ID) { } 102 103 /// Loop over all of the basic blocks, 104 /// replacing instructions by equivalent LEA instructions 105 /// if needed and when possible. 106 bool runOnMachineFunction(MachineFunction &MF) override; 107 108 // This pass runs after regalloc and doesn't support VReg operands. 109 MachineFunctionProperties getRequiredProperties() const override { 110 return MachineFunctionProperties().set( 111 MachineFunctionProperties::Property::NoVRegs); 112 } 113 114 private: 115 TargetSchedModel TSM; 116 const X86InstrInfo *TII; 117 const X86RegisterInfo *TRI; 118 }; 119 } 120 121 char FixupLEAPass::ID = 0; 122 123 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) 124 125 MachineInstr * 126 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, 127 MachineBasicBlock::iterator &MBBI) const { 128 MachineInstr &MI = *MBBI; 129 switch (MI.getOpcode()) { 130 case X86::MOV32rr: 131 case X86::MOV64rr: { 132 const MachineOperand &Src = MI.getOperand(1); 133 const MachineOperand &Dest = MI.getOperand(0); 134 MachineInstr *NewMI = 135 BuildMI(MBB, MBBI, MI.getDebugLoc(), 136 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r 137 : X86::LEA64r)) 138 .add(Dest) 139 .add(Src) 140 .addImm(1) 141 .addReg(0) 142 .addImm(0) 143 .addReg(0); 144 return NewMI; 145 } 146 } 147 148 if (!MI.isConvertibleTo3Addr()) 149 return nullptr; 150 151 switch (MI.getOpcode()) { 152 default: 153 // Only convert instructions that we've verified are safe. 154 return nullptr; 155 case X86::ADD64ri32: 156 case X86::ADD64ri8: 157 case X86::ADD64ri32_DB: 158 case X86::ADD64ri8_DB: 159 case X86::ADD32ri: 160 case X86::ADD32ri8: 161 case X86::ADD32ri_DB: 162 case X86::ADD32ri8_DB: 163 if (!MI.getOperand(2).isImm()) { 164 // convertToThreeAddress will call getImm() 165 // which requires isImm() to be true 166 return nullptr; 167 } 168 break; 169 case X86::SHL64ri: 170 case X86::SHL32ri: 171 case X86::INC64r: 172 case X86::INC32r: 173 case X86::DEC64r: 174 case X86::DEC32r: 175 case X86::ADD64rr: 176 case X86::ADD64rr_DB: 177 case X86::ADD32rr: 178 case X86::ADD32rr_DB: 179 // These instructions are all fine to convert. 180 break; 181 } 182 MachineFunction::iterator MFI = MBB.getIterator(); 183 return TII->convertToThreeAddress(MFI, MI, nullptr); 184 } 185 186 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } 187 188 static bool isLEA(unsigned Opcode) { 189 return Opcode == X86::LEA32r || Opcode == X86::LEA64r || 190 Opcode == X86::LEA64_32r; 191 } 192 193 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { 194 if (skipFunction(MF.getFunction())) 195 return false; 196 197 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 198 bool IsSlowLEA = ST.slowLEA(); 199 bool IsSlow3OpsLEA = ST.slow3OpsLEA(); 200 bool LEAUsesAG = ST.LEAusesAG(); 201 202 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); 203 bool UseLEAForSP = ST.useLeaForSP(); 204 205 TSM.init(&ST); 206 TII = ST.getInstrInfo(); 207 TRI = ST.getRegisterInfo(); 208 209 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); 210 for (MachineBasicBlock &MBB : MF) { 211 // First pass. Try to remove or optimize existing LEAs. 212 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 213 if (!isLEA(I->getOpcode())) 214 continue; 215 216 if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP)) 217 continue; 218 219 if (IsSlowLEA) { 220 processInstructionForSlowLEA(I, MBB); 221 } else if (IsSlow3OpsLEA) { 222 if (auto *NewMI = processInstrForSlow3OpLEA(*I, MBB)) { 223 MBB.erase(I); 224 I = NewMI; 225 } 226 } 227 } 228 229 // Second pass for creating LEAs. This may reverse some of the 230 // transformations above. 231 if (LEAUsesAG) { 232 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) 233 processInstruction(I, MBB); 234 } 235 } 236 237 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";); 238 239 return true; 240 } 241 242 FixupLEAPass::RegUsageState 243 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { 244 RegUsageState RegUsage = RU_NotUsed; 245 MachineInstr &MI = *I; 246 247 for (unsigned i = 0; i < MI.getNumOperands(); ++i) { 248 MachineOperand &opnd = MI.getOperand(i); 249 if (opnd.isReg() && opnd.getReg() == p.getReg()) { 250 if (opnd.isDef()) 251 return RU_Write; 252 RegUsage = RU_Read; 253 } 254 } 255 return RegUsage; 256 } 257 258 /// getPreviousInstr - Given a reference to an instruction in a basic 259 /// block, return a reference to the previous instruction in the block, 260 /// wrapping around to the last instruction of the block if the block 261 /// branches to itself. 262 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, 263 MachineBasicBlock &MBB) { 264 if (I == MBB.begin()) { 265 if (MBB.isPredecessor(&MBB)) { 266 I = --MBB.end(); 267 return true; 268 } else 269 return false; 270 } 271 --I; 272 return true; 273 } 274 275 MachineBasicBlock::iterator 276 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, 277 MachineBasicBlock &MBB) { 278 int InstrDistance = 1; 279 MachineBasicBlock::iterator CurInst; 280 static const int INSTR_DISTANCE_THRESHOLD = 5; 281 282 CurInst = I; 283 bool Found; 284 Found = getPreviousInstr(CurInst, MBB); 285 while (Found && I != CurInst) { 286 if (CurInst->isCall() || CurInst->isInlineAsm()) 287 break; 288 if (InstrDistance > INSTR_DISTANCE_THRESHOLD) 289 break; // too far back to make a difference 290 if (usesRegister(p, CurInst) == RU_Write) { 291 return CurInst; 292 } 293 InstrDistance += TSM.computeInstrLatency(&*CurInst); 294 Found = getPreviousInstr(CurInst, MBB); 295 } 296 return MachineBasicBlock::iterator(); 297 } 298 299 static inline bool isInefficientLEAReg(unsigned Reg) { 300 return Reg == X86::EBP || Reg == X86::RBP || 301 Reg == X86::R13D || Reg == X86::R13; 302 } 303 304 static inline bool isRegOperand(const MachineOperand &Op) { 305 return Op.isReg() && Op.getReg() != X86::NoRegister; 306 } 307 308 /// Returns true if this LEA uses base an index registers, and the base register 309 /// is known to be inefficient for the subtarget. 310 // TODO: use a variant scheduling class to model the latency profile 311 // of LEA instructions, and implement this logic as a scheduling predicate. 312 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, 313 const MachineOperand &Index) { 314 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && 315 isRegOperand(Index); 316 } 317 318 static inline bool hasLEAOffset(const MachineOperand &Offset) { 319 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); 320 } 321 322 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { 323 switch (LEAOpcode) { 324 default: 325 llvm_unreachable("Unexpected LEA instruction"); 326 case X86::LEA32r: 327 case X86::LEA64_32r: 328 return X86::ADD32rr; 329 case X86::LEA64r: 330 return X86::ADD64rr; 331 } 332 } 333 334 static inline unsigned getADDriFromLEA(unsigned LEAOpcode, 335 const MachineOperand &Offset) { 336 bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); 337 switch (LEAOpcode) { 338 default: 339 llvm_unreachable("Unexpected LEA instruction"); 340 case X86::LEA32r: 341 case X86::LEA64_32r: 342 return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; 343 case X86::LEA64r: 344 return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; 345 } 346 } 347 348 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { 349 switch (LEAOpcode) { 350 default: 351 llvm_unreachable("Unexpected LEA instruction"); 352 case X86::LEA32r: 353 case X86::LEA64_32r: 354 return IsINC ? X86::INC32r : X86::DEC32r; 355 case X86::LEA64r: 356 return IsINC ? X86::INC64r : X86::DEC64r; 357 } 358 } 359 360 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, 361 MachineBasicBlock &MBB, bool OptIncDec, 362 bool UseLEAForSP) const { 363 MachineInstr &MI = *I; 364 365 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 366 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 367 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 368 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); 369 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 370 371 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || 372 !TII->isSafeToClobberEFLAGS(MBB, I)) 373 return false; 374 375 unsigned DestReg = MI.getOperand(0).getReg(); 376 unsigned BaseReg = Base.getReg(); 377 unsigned IndexReg = Index.getReg(); 378 379 // Don't change stack adjustment LEAs. 380 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) 381 return false; 382 383 // LEA64_32 has 64-bit operands but 32-bit result. 384 if (MI.getOpcode() == X86::LEA64_32r) { 385 if (BaseReg != 0) 386 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 387 if (IndexReg != 0) 388 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 389 } 390 391 MachineInstr *NewMI = nullptr; 392 393 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 394 // which can be turned into add %reg2, %reg1 395 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && 396 (DestReg == BaseReg || DestReg == IndexReg)) { 397 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); 398 if (DestReg != BaseReg) 399 std::swap(BaseReg, IndexReg); 400 401 if (MI.getOpcode() == X86::LEA64_32r) { 402 // TODO: Do we need the super register implicit use? 403 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 404 .addReg(BaseReg).addReg(IndexReg) 405 .addReg(Base.getReg(), RegState::Implicit) 406 .addReg(Index.getReg(), RegState::Implicit); 407 } else { 408 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 409 .addReg(BaseReg).addReg(IndexReg); 410 } 411 } else if (DestReg == BaseReg && IndexReg == 0) { 412 // This is an LEA with only a base register and a displacement, 413 // We can use ADDri or INC/DEC. 414 415 // Does this LEA have one these forms: 416 // lea %reg, 1(%reg) 417 // lea %reg, -1(%reg) 418 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { 419 bool IsINC = Disp.getImm() == 1; 420 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); 421 422 if (MI.getOpcode() == X86::LEA64_32r) { 423 // TODO: Do we need the super register implicit use? 424 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 425 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); 426 } else { 427 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 428 .addReg(BaseReg); 429 } 430 } else { 431 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); 432 if (MI.getOpcode() == X86::LEA64_32r) { 433 // TODO: Do we need the super register implicit use? 434 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 435 .addReg(BaseReg).addImm(Disp.getImm()) 436 .addReg(Base.getReg(), RegState::Implicit); 437 } else { 438 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 439 .addReg(BaseReg).addImm(Disp.getImm()); 440 } 441 } 442 } else 443 return false; 444 445 MBB.erase(I); 446 I = NewMI; 447 return true; 448 } 449 450 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, 451 MachineBasicBlock &MBB) { 452 // Process a load, store, or LEA instruction. 453 MachineInstr &MI = *I; 454 const MCInstrDesc &Desc = MI.getDesc(); 455 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); 456 if (AddrOffset >= 0) { 457 AddrOffset += X86II::getOperandBias(Desc); 458 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); 459 if (p.isReg() && p.getReg() != X86::ESP) { 460 seekLEAFixup(p, I, MBB); 461 } 462 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); 463 if (q.isReg() && q.getReg() != X86::ESP) { 464 seekLEAFixup(q, I, MBB); 465 } 466 } 467 } 468 469 void FixupLEAPass::seekLEAFixup(MachineOperand &p, 470 MachineBasicBlock::iterator &I, 471 MachineBasicBlock &MBB) { 472 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); 473 if (MBI != MachineBasicBlock::iterator()) { 474 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); 475 if (NewMI) { 476 ++NumLEAs; 477 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); 478 // now to replace with an equivalent LEA... 479 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); 480 MBB.erase(MBI); 481 MachineBasicBlock::iterator J = 482 static_cast<MachineBasicBlock::iterator>(NewMI); 483 processInstruction(J, MBB); 484 } 485 } 486 } 487 488 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 489 MachineBasicBlock &MBB) { 490 MachineInstr &MI = *I; 491 const unsigned Opcode = MI.getOpcode(); 492 493 const MachineOperand &Dst = MI.getOperand(0); 494 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 495 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 496 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 497 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 498 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 499 500 if (Segment.getReg() != 0 || !Offset.isImm() || 501 !TII->isSafeToClobberEFLAGS(MBB, I)) 502 return; 503 const unsigned DstR = Dst.getReg(); 504 const unsigned SrcR1 = Base.getReg(); 505 const unsigned SrcR2 = Index.getReg(); 506 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) 507 return; 508 if (Scale.getImm() > 1) 509 return; 510 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); 511 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 512 MachineInstr *NewMI = nullptr; 513 // Make ADD instruction for two registers writing to LEA's destination 514 if (SrcR1 != 0 && SrcR2 != 0) { 515 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); 516 const MachineOperand &Src = SrcR1 == DstR ? Index : Base; 517 NewMI = 518 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); 519 LLVM_DEBUG(NewMI->dump();); 520 } 521 // Make ADD instruction for immediate 522 if (Offset.getImm() != 0) { 523 const MCInstrDesc &ADDri = 524 TII->get(getADDriFromLEA(Opcode, Offset)); 525 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; 526 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) 527 .add(SrcR) 528 .addImm(Offset.getImm()); 529 LLVM_DEBUG(NewMI->dump();); 530 } 531 if (NewMI) { 532 MBB.erase(I); 533 I = NewMI; 534 } 535 } 536 537 MachineInstr * 538 FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI, 539 MachineBasicBlock &MBB) { 540 const unsigned LEAOpcode = MI.getOpcode(); 541 542 const MachineOperand &Dst = MI.getOperand(0); 543 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 544 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 545 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 546 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 547 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 548 549 if (!(TII->isThreeOperandsLEA(MI) || 550 hasInefficientLEABaseReg(Base, Index)) || 551 !TII->isSafeToClobberEFLAGS(MBB, MI) || 552 Segment.getReg() != X86::NoRegister) 553 return nullptr; 554 555 unsigned DstR = Dst.getReg(); 556 unsigned BaseR = Base.getReg(); 557 unsigned IndexR = Index.getReg(); 558 unsigned SSDstR = 559 (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR; 560 bool IsScale1 = Scale.getImm() == 1; 561 bool IsInefficientBase = isInefficientLEAReg(BaseR); 562 bool IsInefficientIndex = isInefficientLEAReg(IndexR); 563 564 // Skip these cases since it takes more than 2 instructions 565 // to replace the LEA instruction. 566 if (IsInefficientBase && SSDstR == BaseR && !IsScale1) 567 return nullptr; 568 if (LEAOpcode == X86::LEA64_32r && IsInefficientBase && 569 (IsInefficientIndex || !IsScale1)) 570 return nullptr; 571 572 const DebugLoc DL = MI.getDebugLoc(); 573 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode)); 574 const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset)); 575 576 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); 577 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 578 579 // First try to replace LEA with one or two (for the 3-op LEA case) 580 // add instructions: 581 // 1.lea (%base,%index,1), %base => add %index,%base 582 // 2.lea (%base,%index,1), %index => add %base,%index 583 if (IsScale1 && (DstR == BaseR || DstR == IndexR)) { 584 const MachineOperand &Src = DstR == BaseR ? Index : Base; 585 MachineInstr *NewMI = 586 BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Src); 587 LLVM_DEBUG(NewMI->dump();); 588 // Create ADD instruction for the Offset in case of 3-Ops LEA. 589 if (hasLEAOffset(Offset)) { 590 NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); 591 LLVM_DEBUG(NewMI->dump();); 592 } 593 return NewMI; 594 } 595 // If the base is inefficient try switching the index and base operands, 596 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: 597 // lea offset(%base,%index,scale),%dst => 598 // lea (%base,%index,scale); add offset,%dst 599 if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { 600 MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode)) 601 .add(Dst) 602 .add(IsInefficientBase ? Index : Base) 603 .add(Scale) 604 .add(IsInefficientBase ? Base : Index) 605 .addImm(0) 606 .add(Segment); 607 LLVM_DEBUG(NewMI->dump();); 608 // Create ADD instruction for the Offset in case of 3-Ops LEA. 609 if (hasLEAOffset(Offset)) { 610 NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); 611 LLVM_DEBUG(NewMI->dump();); 612 } 613 return NewMI; 614 } 615 // Handle the rest of the cases with inefficient base register: 616 assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!"); 617 assert(IsInefficientBase && "efficient base should be handled already!"); 618 619 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst 620 if (IsScale1 && !hasLEAOffset(Offset)) { 621 bool BIK = Base.isKill() && BaseR != IndexR; 622 TII->copyPhysReg(MBB, MI, DL, DstR, BaseR, BIK); 623 LLVM_DEBUG(MI.getPrevNode()->dump();); 624 625 MachineInstr *NewMI = 626 BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Index); 627 LLVM_DEBUG(NewMI->dump();); 628 return NewMI; 629 } 630 // lea offset(%base,%index,scale), %dst => 631 // lea offset( ,%index,scale), %dst; add %base,%dst 632 MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode)) 633 .add(Dst) 634 .addReg(0) 635 .add(Scale) 636 .add(Index) 637 .add(Offset) 638 .add(Segment); 639 LLVM_DEBUG(NewMI->dump();); 640 641 NewMI = BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Base); 642 LLVM_DEBUG(NewMI->dump();); 643 return NewMI; 644 } 645