1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the pass that finds instructions that can be 10 // re-written as LEA instructions in order to reduce pipeline delays. 11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86InstrInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/Passes.h" 22 #include "llvm/CodeGen/TargetSchedule.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/raw_ostream.h" 25 using namespace llvm; 26 27 #define FIXUPLEA_DESC "X86 LEA Fixup" 28 #define FIXUPLEA_NAME "x86-fixup-LEAs" 29 30 #define DEBUG_TYPE FIXUPLEA_NAME 31 32 STATISTIC(NumLEAs, "Number of LEA instructions created"); 33 34 namespace { 35 class FixupLEAPass : public MachineFunctionPass { 36 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; 37 38 /// Given a machine register, look for the instruction 39 /// which writes it in the current basic block. If found, 40 /// try to replace it with an equivalent LEA instruction. 41 /// If replacement succeeds, then also process the newly created 42 /// instruction. 43 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, 44 MachineBasicBlock &MBB); 45 46 /// Given a memory access or LEA instruction 47 /// whose address mode uses a base and/or index register, look for 48 /// an opportunity to replace the instruction which sets the base or index 49 /// register with an equivalent LEA instruction. 50 void processInstruction(MachineBasicBlock::iterator &I, 51 MachineBasicBlock &MBB); 52 53 /// Given a LEA instruction which is unprofitable 54 /// on SlowLEA targets try to replace it with an equivalent ADD instruction. 55 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 56 MachineBasicBlock &MBB); 57 58 /// Given a LEA instruction which is unprofitable 59 /// on SNB+ try to replace it with other instructions. 60 /// According to Intel's Optimization Reference Manual: 61 /// " For LEA instructions with three source operands and some specific 62 /// situations, instruction latency has increased to 3 cycles, and must 63 /// dispatch via port 1: 64 /// - LEA that has all three source operands: base, index, and offset 65 /// - LEA that uses base and index registers where the base is EBP, RBP, 66 /// or R13 67 /// - LEA that uses RIP relative addressing mode 68 /// - LEA that uses 16-bit addressing mode " 69 /// This function currently handles the first 2 cases only. 70 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 71 MachineBasicBlock &MBB, bool OptIncDec); 72 73 /// Look for LEAs that are really two address LEAs that we might be able to 74 /// turn into regular ADD instructions. 75 bool optTwoAddrLEA(MachineBasicBlock::iterator &I, 76 MachineBasicBlock &MBB, bool OptIncDec, 77 bool UseLEAForSP) const; 78 79 /// Determine if an instruction references a machine register 80 /// and, if so, whether it reads or writes the register. 81 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); 82 83 /// Step backwards through a basic block, looking 84 /// for an instruction which writes a register within 85 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. 86 MachineBasicBlock::iterator searchBackwards(MachineOperand &p, 87 MachineBasicBlock::iterator &I, 88 MachineBasicBlock &MBB); 89 90 /// if an instruction can be converted to an 91 /// equivalent LEA, insert the new instruction into the basic block 92 /// and return a pointer to it. Otherwise, return zero. 93 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, 94 MachineBasicBlock::iterator &MBBI) const; 95 96 public: 97 static char ID; 98 99 StringRef getPassName() const override { return FIXUPLEA_DESC; } 100 101 FixupLEAPass() : MachineFunctionPass(ID) { } 102 103 /// Loop over all of the basic blocks, 104 /// replacing instructions by equivalent LEA instructions 105 /// if needed and when possible. 106 bool runOnMachineFunction(MachineFunction &MF) override; 107 108 // This pass runs after regalloc and doesn't support VReg operands. 109 MachineFunctionProperties getRequiredProperties() const override { 110 return MachineFunctionProperties().set( 111 MachineFunctionProperties::Property::NoVRegs); 112 } 113 114 private: 115 TargetSchedModel TSM; 116 const X86InstrInfo *TII = nullptr; 117 const X86RegisterInfo *TRI = nullptr; 118 }; 119 } 120 121 char FixupLEAPass::ID = 0; 122 123 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) 124 125 MachineInstr * 126 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, 127 MachineBasicBlock::iterator &MBBI) const { 128 MachineInstr &MI = *MBBI; 129 switch (MI.getOpcode()) { 130 case X86::MOV32rr: 131 case X86::MOV64rr: { 132 const MachineOperand &Src = MI.getOperand(1); 133 const MachineOperand &Dest = MI.getOperand(0); 134 MachineInstr *NewMI = 135 BuildMI(MBB, MBBI, MI.getDebugLoc(), 136 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r 137 : X86::LEA64r)) 138 .add(Dest) 139 .add(Src) 140 .addImm(1) 141 .addReg(0) 142 .addImm(0) 143 .addReg(0); 144 return NewMI; 145 } 146 } 147 148 if (!MI.isConvertibleTo3Addr()) 149 return nullptr; 150 151 switch (MI.getOpcode()) { 152 default: 153 // Only convert instructions that we've verified are safe. 154 return nullptr; 155 case X86::ADD64ri32: 156 case X86::ADD64ri8: 157 case X86::ADD64ri32_DB: 158 case X86::ADD64ri8_DB: 159 case X86::ADD32ri: 160 case X86::ADD32ri8: 161 case X86::ADD32ri_DB: 162 case X86::ADD32ri8_DB: 163 if (!MI.getOperand(2).isImm()) { 164 // convertToThreeAddress will call getImm() 165 // which requires isImm() to be true 166 return nullptr; 167 } 168 break; 169 case X86::SHL64ri: 170 case X86::SHL32ri: 171 case X86::INC64r: 172 case X86::INC32r: 173 case X86::DEC64r: 174 case X86::DEC32r: 175 case X86::ADD64rr: 176 case X86::ADD64rr_DB: 177 case X86::ADD32rr: 178 case X86::ADD32rr_DB: 179 // These instructions are all fine to convert. 180 break; 181 } 182 MachineFunction::iterator MFI = MBB.getIterator(); 183 return TII->convertToThreeAddress(MFI, MI, nullptr); 184 } 185 186 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } 187 188 static bool isLEA(unsigned Opcode) { 189 return Opcode == X86::LEA32r || Opcode == X86::LEA64r || 190 Opcode == X86::LEA64_32r; 191 } 192 193 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { 194 if (skipFunction(MF.getFunction())) 195 return false; 196 197 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 198 bool IsSlowLEA = ST.slowLEA(); 199 bool IsSlow3OpsLEA = ST.slow3OpsLEA(); 200 bool LEAUsesAG = ST.LEAusesAG(); 201 202 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); 203 bool UseLEAForSP = ST.useLeaForSP(); 204 205 TSM.init(&ST); 206 TII = ST.getInstrInfo(); 207 TRI = ST.getRegisterInfo(); 208 209 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); 210 for (MachineBasicBlock &MBB : MF) { 211 // First pass. Try to remove or optimize existing LEAs. 212 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 213 if (!isLEA(I->getOpcode())) 214 continue; 215 216 if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP)) 217 continue; 218 219 if (IsSlowLEA) 220 processInstructionForSlowLEA(I, MBB); 221 else if (IsSlow3OpsLEA) 222 processInstrForSlow3OpLEA(I, MBB, OptIncDec); 223 } 224 225 // Second pass for creating LEAs. This may reverse some of the 226 // transformations above. 227 if (LEAUsesAG) { 228 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) 229 processInstruction(I, MBB); 230 } 231 } 232 233 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";); 234 235 return true; 236 } 237 238 FixupLEAPass::RegUsageState 239 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { 240 RegUsageState RegUsage = RU_NotUsed; 241 MachineInstr &MI = *I; 242 243 for (unsigned i = 0; i < MI.getNumOperands(); ++i) { 244 MachineOperand &opnd = MI.getOperand(i); 245 if (opnd.isReg() && opnd.getReg() == p.getReg()) { 246 if (opnd.isDef()) 247 return RU_Write; 248 RegUsage = RU_Read; 249 } 250 } 251 return RegUsage; 252 } 253 254 /// getPreviousInstr - Given a reference to an instruction in a basic 255 /// block, return a reference to the previous instruction in the block, 256 /// wrapping around to the last instruction of the block if the block 257 /// branches to itself. 258 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, 259 MachineBasicBlock &MBB) { 260 if (I == MBB.begin()) { 261 if (MBB.isPredecessor(&MBB)) { 262 I = --MBB.end(); 263 return true; 264 } else 265 return false; 266 } 267 --I; 268 return true; 269 } 270 271 MachineBasicBlock::iterator 272 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, 273 MachineBasicBlock &MBB) { 274 int InstrDistance = 1; 275 MachineBasicBlock::iterator CurInst; 276 static const int INSTR_DISTANCE_THRESHOLD = 5; 277 278 CurInst = I; 279 bool Found; 280 Found = getPreviousInstr(CurInst, MBB); 281 while (Found && I != CurInst) { 282 if (CurInst->isCall() || CurInst->isInlineAsm()) 283 break; 284 if (InstrDistance > INSTR_DISTANCE_THRESHOLD) 285 break; // too far back to make a difference 286 if (usesRegister(p, CurInst) == RU_Write) { 287 return CurInst; 288 } 289 InstrDistance += TSM.computeInstrLatency(&*CurInst); 290 Found = getPreviousInstr(CurInst, MBB); 291 } 292 return MachineBasicBlock::iterator(); 293 } 294 295 static inline bool isInefficientLEAReg(unsigned Reg) { 296 return Reg == X86::EBP || Reg == X86::RBP || 297 Reg == X86::R13D || Reg == X86::R13; 298 } 299 300 /// Returns true if this LEA uses base an index registers, and the base register 301 /// is known to be inefficient for the subtarget. 302 // TODO: use a variant scheduling class to model the latency profile 303 // of LEA instructions, and implement this logic as a scheduling predicate. 304 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, 305 const MachineOperand &Index) { 306 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && 307 Index.getReg() != X86::NoRegister; 308 } 309 310 static inline bool hasLEAOffset(const MachineOperand &Offset) { 311 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); 312 } 313 314 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { 315 switch (LEAOpcode) { 316 default: 317 llvm_unreachable("Unexpected LEA instruction"); 318 case X86::LEA32r: 319 case X86::LEA64_32r: 320 return X86::ADD32rr; 321 case X86::LEA64r: 322 return X86::ADD64rr; 323 } 324 } 325 326 static inline unsigned getADDriFromLEA(unsigned LEAOpcode, 327 const MachineOperand &Offset) { 328 bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); 329 switch (LEAOpcode) { 330 default: 331 llvm_unreachable("Unexpected LEA instruction"); 332 case X86::LEA32r: 333 case X86::LEA64_32r: 334 return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; 335 case X86::LEA64r: 336 return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; 337 } 338 } 339 340 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { 341 switch (LEAOpcode) { 342 default: 343 llvm_unreachable("Unexpected LEA instruction"); 344 case X86::LEA32r: 345 case X86::LEA64_32r: 346 return IsINC ? X86::INC32r : X86::DEC32r; 347 case X86::LEA64r: 348 return IsINC ? X86::INC64r : X86::DEC64r; 349 } 350 } 351 352 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, 353 MachineBasicBlock &MBB, bool OptIncDec, 354 bool UseLEAForSP) const { 355 MachineInstr &MI = *I; 356 357 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 358 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 359 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 360 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); 361 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 362 363 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || 364 !TII->isSafeToClobberEFLAGS(MBB, I)) 365 return false; 366 367 Register DestReg = MI.getOperand(0).getReg(); 368 Register BaseReg = Base.getReg(); 369 Register IndexReg = Index.getReg(); 370 371 // Don't change stack adjustment LEAs. 372 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) 373 return false; 374 375 // LEA64_32 has 64-bit operands but 32-bit result. 376 if (MI.getOpcode() == X86::LEA64_32r) { 377 if (BaseReg != 0) 378 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 379 if (IndexReg != 0) 380 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 381 } 382 383 MachineInstr *NewMI = nullptr; 384 385 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 386 // which can be turned into add %reg2, %reg1 387 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && 388 (DestReg == BaseReg || DestReg == IndexReg)) { 389 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); 390 if (DestReg != BaseReg) 391 std::swap(BaseReg, IndexReg); 392 393 if (MI.getOpcode() == X86::LEA64_32r) { 394 // TODO: Do we need the super register implicit use? 395 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 396 .addReg(BaseReg).addReg(IndexReg) 397 .addReg(Base.getReg(), RegState::Implicit) 398 .addReg(Index.getReg(), RegState::Implicit); 399 } else { 400 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 401 .addReg(BaseReg).addReg(IndexReg); 402 } 403 } else if (DestReg == BaseReg && IndexReg == 0) { 404 // This is an LEA with only a base register and a displacement, 405 // We can use ADDri or INC/DEC. 406 407 // Does this LEA have one these forms: 408 // lea %reg, 1(%reg) 409 // lea %reg, -1(%reg) 410 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { 411 bool IsINC = Disp.getImm() == 1; 412 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); 413 414 if (MI.getOpcode() == X86::LEA64_32r) { 415 // TODO: Do we need the super register implicit use? 416 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 417 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); 418 } else { 419 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 420 .addReg(BaseReg); 421 } 422 } else { 423 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); 424 if (MI.getOpcode() == X86::LEA64_32r) { 425 // TODO: Do we need the super register implicit use? 426 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 427 .addReg(BaseReg).addImm(Disp.getImm()) 428 .addReg(Base.getReg(), RegState::Implicit); 429 } else { 430 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 431 .addReg(BaseReg).addImm(Disp.getImm()); 432 } 433 } 434 } else 435 return false; 436 437 MBB.erase(I); 438 I = NewMI; 439 return true; 440 } 441 442 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, 443 MachineBasicBlock &MBB) { 444 // Process a load, store, or LEA instruction. 445 MachineInstr &MI = *I; 446 const MCInstrDesc &Desc = MI.getDesc(); 447 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); 448 if (AddrOffset >= 0) { 449 AddrOffset += X86II::getOperandBias(Desc); 450 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); 451 if (p.isReg() && p.getReg() != X86::ESP) { 452 seekLEAFixup(p, I, MBB); 453 } 454 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); 455 if (q.isReg() && q.getReg() != X86::ESP) { 456 seekLEAFixup(q, I, MBB); 457 } 458 } 459 } 460 461 void FixupLEAPass::seekLEAFixup(MachineOperand &p, 462 MachineBasicBlock::iterator &I, 463 MachineBasicBlock &MBB) { 464 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); 465 if (MBI != MachineBasicBlock::iterator()) { 466 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); 467 if (NewMI) { 468 ++NumLEAs; 469 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); 470 // now to replace with an equivalent LEA... 471 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); 472 MBB.erase(MBI); 473 MachineBasicBlock::iterator J = 474 static_cast<MachineBasicBlock::iterator>(NewMI); 475 processInstruction(J, MBB); 476 } 477 } 478 } 479 480 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 481 MachineBasicBlock &MBB) { 482 MachineInstr &MI = *I; 483 const unsigned Opcode = MI.getOpcode(); 484 485 const MachineOperand &Dst = MI.getOperand(0); 486 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 487 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 488 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 489 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 490 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 491 492 if (Segment.getReg() != 0 || !Offset.isImm() || 493 !TII->isSafeToClobberEFLAGS(MBB, I)) 494 return; 495 const Register DstR = Dst.getReg(); 496 const Register SrcR1 = Base.getReg(); 497 const Register SrcR2 = Index.getReg(); 498 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) 499 return; 500 if (Scale.getImm() > 1) 501 return; 502 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); 503 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 504 MachineInstr *NewMI = nullptr; 505 // Make ADD instruction for two registers writing to LEA's destination 506 if (SrcR1 != 0 && SrcR2 != 0) { 507 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); 508 const MachineOperand &Src = SrcR1 == DstR ? Index : Base; 509 NewMI = 510 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); 511 LLVM_DEBUG(NewMI->dump();); 512 } 513 // Make ADD instruction for immediate 514 if (Offset.getImm() != 0) { 515 const MCInstrDesc &ADDri = 516 TII->get(getADDriFromLEA(Opcode, Offset)); 517 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; 518 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) 519 .add(SrcR) 520 .addImm(Offset.getImm()); 521 LLVM_DEBUG(NewMI->dump();); 522 } 523 if (NewMI) { 524 MBB.erase(I); 525 I = NewMI; 526 } 527 } 528 529 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 530 MachineBasicBlock &MBB, 531 bool OptIncDec) { 532 MachineInstr &MI = *I; 533 const unsigned LEAOpcode = MI.getOpcode(); 534 535 const MachineOperand &Dest = MI.getOperand(0); 536 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 537 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 538 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 539 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 540 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 541 542 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || 543 !TII->isSafeToClobberEFLAGS(MBB, MI) || 544 Segment.getReg() != X86::NoRegister) 545 return; 546 547 Register DestReg = Dest.getReg(); 548 Register BaseReg = Base.getReg(); 549 Register IndexReg = Index.getReg(); 550 551 if (MI.getOpcode() == X86::LEA64_32r) { 552 if (BaseReg != 0) 553 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 554 if (IndexReg != 0) 555 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 556 } 557 558 bool IsScale1 = Scale.getImm() == 1; 559 bool IsInefficientBase = isInefficientLEAReg(BaseReg); 560 bool IsInefficientIndex = isInefficientLEAReg(IndexReg); 561 562 // Skip these cases since it takes more than 2 instructions 563 // to replace the LEA instruction. 564 if (IsInefficientBase && DestReg == BaseReg && !IsScale1) 565 return; 566 567 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); 568 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 569 570 MachineInstr *NewMI = nullptr; 571 572 // First try to replace LEA with one or two (for the 3-op LEA case) 573 // add instructions: 574 // 1.lea (%base,%index,1), %base => add %index,%base 575 // 2.lea (%base,%index,1), %index => add %base,%index 576 if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { 577 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 578 if (DestReg != BaseReg) 579 std::swap(BaseReg, IndexReg); 580 581 if (MI.getOpcode() == X86::LEA64_32r) { 582 // TODO: Do we need the super register implicit use? 583 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 584 .addReg(BaseReg) 585 .addReg(IndexReg) 586 .addReg(Base.getReg(), RegState::Implicit) 587 .addReg(Index.getReg(), RegState::Implicit); 588 } else { 589 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 590 .addReg(BaseReg) 591 .addReg(IndexReg); 592 } 593 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { 594 // If the base is inefficient try switching the index and base operands, 595 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: 596 // lea offset(%base,%index,scale),%dst => 597 // lea (%base,%index,scale); add offset,%dst 598 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 599 .add(Dest) 600 .add(IsInefficientBase ? Index : Base) 601 .add(Scale) 602 .add(IsInefficientBase ? Base : Index) 603 .addImm(0) 604 .add(Segment); 605 LLVM_DEBUG(NewMI->dump();); 606 } 607 608 // If either replacement succeeded above, add the offset if needed, then 609 // replace the instruction. 610 if (NewMI) { 611 // Create ADD instruction for the Offset in case of 3-Ops LEA. 612 if (hasLEAOffset(Offset)) { 613 if (OptIncDec && Offset.isImm() && 614 (Offset.getImm() == 1 || Offset.getImm() == -1)) { 615 unsigned NewOpc = 616 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); 617 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 618 .addReg(DestReg); 619 LLVM_DEBUG(NewMI->dump();); 620 } else { 621 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); 622 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 623 .addReg(DestReg) 624 .add(Offset); 625 LLVM_DEBUG(NewMI->dump();); 626 } 627 } 628 629 MBB.erase(I); 630 I = NewMI; 631 return; 632 } 633 634 // Handle the rest of the cases with inefficient base register: 635 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); 636 assert(IsInefficientBase && "efficient base should be handled already!"); 637 638 // FIXME: Handle LEA64_32r. 639 if (LEAOpcode == X86::LEA64_32r) 640 return; 641 642 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst 643 if (IsScale1 && !hasLEAOffset(Offset)) { 644 bool BIK = Base.isKill() && BaseReg != IndexReg; 645 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); 646 LLVM_DEBUG(MI.getPrevNode()->dump();); 647 648 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 649 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 650 .addReg(DestReg) 651 .add(Index); 652 LLVM_DEBUG(NewMI->dump();); 653 654 MBB.erase(I); 655 I = NewMI; 656 return; 657 } 658 659 // lea offset(%base,%index,scale), %dst => 660 // lea offset( ,%index,scale), %dst; add %base,%dst 661 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 662 .add(Dest) 663 .addReg(0) 664 .add(Scale) 665 .add(Index) 666 .add(Offset) 667 .add(Segment); 668 LLVM_DEBUG(NewMI->dump();); 669 670 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 671 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 672 .addReg(DestReg) 673 .add(Base); 674 LLVM_DEBUG(NewMI->dump();); 675 676 MBB.erase(I); 677 I = NewMI; 678 } 679