1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the pass that finds instructions that can be 10 // re-written as LEA instructions in order to reduce pipeline delays. 11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86InstrInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/ProfileSummaryInfo.h" 20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineSizeOpts.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/TargetSchedule.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 using namespace llvm; 29 30 #define FIXUPLEA_DESC "X86 LEA Fixup" 31 #define FIXUPLEA_NAME "x86-fixup-LEAs" 32 33 #define DEBUG_TYPE FIXUPLEA_NAME 34 35 STATISTIC(NumLEAs, "Number of LEA instructions created"); 36 37 namespace { 38 class FixupLEAPass : public MachineFunctionPass { 39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; 40 41 /// Given a machine register, look for the instruction 42 /// which writes it in the current basic block. If found, 43 /// try to replace it with an equivalent LEA instruction. 44 /// If replacement succeeds, then also process the newly created 45 /// instruction. 46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, 47 MachineBasicBlock &MBB); 48 49 /// Given a memory access or LEA instruction 50 /// whose address mode uses a base and/or index register, look for 51 /// an opportunity to replace the instruction which sets the base or index 52 /// register with an equivalent LEA instruction. 53 void processInstruction(MachineBasicBlock::iterator &I, 54 MachineBasicBlock &MBB); 55 56 /// Given a LEA instruction which is unprofitable 57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction. 58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 59 MachineBasicBlock &MBB); 60 61 /// Given a LEA instruction which is unprofitable 62 /// on SNB+ try to replace it with other instructions. 63 /// According to Intel's Optimization Reference Manual: 64 /// " For LEA instructions with three source operands and some specific 65 /// situations, instruction latency has increased to 3 cycles, and must 66 /// dispatch via port 1: 67 /// - LEA that has all three source operands: base, index, and offset 68 /// - LEA that uses base and index registers where the base is EBP, RBP, 69 /// or R13 70 /// - LEA that uses RIP relative addressing mode 71 /// - LEA that uses 16-bit addressing mode " 72 /// This function currently handles the first 2 cases only. 73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 74 MachineBasicBlock &MBB, bool OptIncDec); 75 76 /// Look for LEAs that are really two address LEAs that we might be able to 77 /// turn into regular ADD instructions. 78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I, 79 MachineBasicBlock &MBB, bool OptIncDec, 80 bool UseLEAForSP) const; 81 82 /// Determine if an instruction references a machine register 83 /// and, if so, whether it reads or writes the register. 84 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); 85 86 /// Step backwards through a basic block, looking 87 /// for an instruction which writes a register within 88 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. 89 MachineBasicBlock::iterator searchBackwards(MachineOperand &p, 90 MachineBasicBlock::iterator &I, 91 MachineBasicBlock &MBB); 92 93 /// if an instruction can be converted to an 94 /// equivalent LEA, insert the new instruction into the basic block 95 /// and return a pointer to it. Otherwise, return zero. 96 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, 97 MachineBasicBlock::iterator &MBBI) const; 98 99 public: 100 static char ID; 101 102 StringRef getPassName() const override { return FIXUPLEA_DESC; } 103 104 FixupLEAPass() : MachineFunctionPass(ID) { } 105 106 /// Loop over all of the basic blocks, 107 /// replacing instructions by equivalent LEA instructions 108 /// if needed and when possible. 109 bool runOnMachineFunction(MachineFunction &MF) override; 110 111 // This pass runs after regalloc and doesn't support VReg operands. 112 MachineFunctionProperties getRequiredProperties() const override { 113 return MachineFunctionProperties().set( 114 MachineFunctionProperties::Property::NoVRegs); 115 } 116 117 void getAnalysisUsage(AnalysisUsage &AU) const override { 118 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 119 AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); 120 MachineFunctionPass::getAnalysisUsage(AU); 121 } 122 123 private: 124 TargetSchedModel TSM; 125 const X86InstrInfo *TII = nullptr; 126 const X86RegisterInfo *TRI = nullptr; 127 }; 128 } 129 130 char FixupLEAPass::ID = 0; 131 132 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) 133 134 MachineInstr * 135 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, 136 MachineBasicBlock::iterator &MBBI) const { 137 MachineInstr &MI = *MBBI; 138 switch (MI.getOpcode()) { 139 case X86::MOV32rr: 140 case X86::MOV64rr: { 141 const MachineOperand &Src = MI.getOperand(1); 142 const MachineOperand &Dest = MI.getOperand(0); 143 MachineInstr *NewMI = 144 BuildMI(MBB, MBBI, MI.getDebugLoc(), 145 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r 146 : X86::LEA64r)) 147 .add(Dest) 148 .add(Src) 149 .addImm(1) 150 .addReg(0) 151 .addImm(0) 152 .addReg(0); 153 return NewMI; 154 } 155 } 156 157 if (!MI.isConvertibleTo3Addr()) 158 return nullptr; 159 160 switch (MI.getOpcode()) { 161 default: 162 // Only convert instructions that we've verified are safe. 163 return nullptr; 164 case X86::ADD64ri32: 165 case X86::ADD64ri8: 166 case X86::ADD64ri32_DB: 167 case X86::ADD64ri8_DB: 168 case X86::ADD32ri: 169 case X86::ADD32ri8: 170 case X86::ADD32ri_DB: 171 case X86::ADD32ri8_DB: 172 if (!MI.getOperand(2).isImm()) { 173 // convertToThreeAddress will call getImm() 174 // which requires isImm() to be true 175 return nullptr; 176 } 177 break; 178 case X86::SHL64ri: 179 case X86::SHL32ri: 180 case X86::INC64r: 181 case X86::INC32r: 182 case X86::DEC64r: 183 case X86::DEC32r: 184 case X86::ADD64rr: 185 case X86::ADD64rr_DB: 186 case X86::ADD32rr: 187 case X86::ADD32rr_DB: 188 // These instructions are all fine to convert. 189 break; 190 } 191 MachineFunction::iterator MFI = MBB.getIterator(); 192 return TII->convertToThreeAddress(MFI, MI, nullptr); 193 } 194 195 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } 196 197 static bool isLEA(unsigned Opcode) { 198 return Opcode == X86::LEA32r || Opcode == X86::LEA64r || 199 Opcode == X86::LEA64_32r; 200 } 201 202 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { 203 if (skipFunction(MF.getFunction())) 204 return false; 205 206 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 207 bool IsSlowLEA = ST.slowLEA(); 208 bool IsSlow3OpsLEA = ST.slow3OpsLEA(); 209 bool LEAUsesAG = ST.LEAusesAG(); 210 211 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); 212 bool UseLEAForSP = ST.useLeaForSP(); 213 214 TSM.init(&ST); 215 TII = ST.getInstrInfo(); 216 TRI = ST.getRegisterInfo(); 217 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 218 auto *MBFI = (PSI && PSI->hasProfileSummary()) 219 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() 220 : nullptr; 221 222 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); 223 for (MachineBasicBlock &MBB : MF) { 224 // First pass. Try to remove or optimize existing LEAs. 225 bool OptIncDecPerBB = 226 OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); 227 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 228 if (!isLEA(I->getOpcode())) 229 continue; 230 231 if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP)) 232 continue; 233 234 if (IsSlowLEA) 235 processInstructionForSlowLEA(I, MBB); 236 else if (IsSlow3OpsLEA) 237 processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB); 238 } 239 240 // Second pass for creating LEAs. This may reverse some of the 241 // transformations above. 242 if (LEAUsesAG) { 243 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) 244 processInstruction(I, MBB); 245 } 246 } 247 248 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";); 249 250 return true; 251 } 252 253 FixupLEAPass::RegUsageState 254 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { 255 RegUsageState RegUsage = RU_NotUsed; 256 MachineInstr &MI = *I; 257 258 for (unsigned i = 0; i < MI.getNumOperands(); ++i) { 259 MachineOperand &opnd = MI.getOperand(i); 260 if (opnd.isReg() && opnd.getReg() == p.getReg()) { 261 if (opnd.isDef()) 262 return RU_Write; 263 RegUsage = RU_Read; 264 } 265 } 266 return RegUsage; 267 } 268 269 /// getPreviousInstr - Given a reference to an instruction in a basic 270 /// block, return a reference to the previous instruction in the block, 271 /// wrapping around to the last instruction of the block if the block 272 /// branches to itself. 273 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, 274 MachineBasicBlock &MBB) { 275 if (I == MBB.begin()) { 276 if (MBB.isPredecessor(&MBB)) { 277 I = --MBB.end(); 278 return true; 279 } else 280 return false; 281 } 282 --I; 283 return true; 284 } 285 286 MachineBasicBlock::iterator 287 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, 288 MachineBasicBlock &MBB) { 289 int InstrDistance = 1; 290 MachineBasicBlock::iterator CurInst; 291 static const int INSTR_DISTANCE_THRESHOLD = 5; 292 293 CurInst = I; 294 bool Found; 295 Found = getPreviousInstr(CurInst, MBB); 296 while (Found && I != CurInst) { 297 if (CurInst->isCall() || CurInst->isInlineAsm()) 298 break; 299 if (InstrDistance > INSTR_DISTANCE_THRESHOLD) 300 break; // too far back to make a difference 301 if (usesRegister(p, CurInst) == RU_Write) { 302 return CurInst; 303 } 304 InstrDistance += TSM.computeInstrLatency(&*CurInst); 305 Found = getPreviousInstr(CurInst, MBB); 306 } 307 return MachineBasicBlock::iterator(); 308 } 309 310 static inline bool isInefficientLEAReg(unsigned Reg) { 311 return Reg == X86::EBP || Reg == X86::RBP || 312 Reg == X86::R13D || Reg == X86::R13; 313 } 314 315 /// Returns true if this LEA uses base an index registers, and the base register 316 /// is known to be inefficient for the subtarget. 317 // TODO: use a variant scheduling class to model the latency profile 318 // of LEA instructions, and implement this logic as a scheduling predicate. 319 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, 320 const MachineOperand &Index) { 321 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && 322 Index.getReg() != X86::NoRegister; 323 } 324 325 static inline bool hasLEAOffset(const MachineOperand &Offset) { 326 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); 327 } 328 329 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { 330 switch (LEAOpcode) { 331 default: 332 llvm_unreachable("Unexpected LEA instruction"); 333 case X86::LEA32r: 334 case X86::LEA64_32r: 335 return X86::ADD32rr; 336 case X86::LEA64r: 337 return X86::ADD64rr; 338 } 339 } 340 341 static inline unsigned getADDriFromLEA(unsigned LEAOpcode, 342 const MachineOperand &Offset) { 343 bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); 344 switch (LEAOpcode) { 345 default: 346 llvm_unreachable("Unexpected LEA instruction"); 347 case X86::LEA32r: 348 case X86::LEA64_32r: 349 return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; 350 case X86::LEA64r: 351 return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; 352 } 353 } 354 355 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { 356 switch (LEAOpcode) { 357 default: 358 llvm_unreachable("Unexpected LEA instruction"); 359 case X86::LEA32r: 360 case X86::LEA64_32r: 361 return IsINC ? X86::INC32r : X86::DEC32r; 362 case X86::LEA64r: 363 return IsINC ? X86::INC64r : X86::DEC64r; 364 } 365 } 366 367 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, 368 MachineBasicBlock &MBB, bool OptIncDec, 369 bool UseLEAForSP) const { 370 MachineInstr &MI = *I; 371 372 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 373 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 374 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 375 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); 376 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 377 378 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || 379 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) != 380 MachineBasicBlock::LQR_Dead) 381 return false; 382 383 Register DestReg = MI.getOperand(0).getReg(); 384 Register BaseReg = Base.getReg(); 385 Register IndexReg = Index.getReg(); 386 387 // Don't change stack adjustment LEAs. 388 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) 389 return false; 390 391 // LEA64_32 has 64-bit operands but 32-bit result. 392 if (MI.getOpcode() == X86::LEA64_32r) { 393 if (BaseReg != 0) 394 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 395 if (IndexReg != 0) 396 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 397 } 398 399 MachineInstr *NewMI = nullptr; 400 401 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 402 // which can be turned into add %reg2, %reg1 403 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && 404 (DestReg == BaseReg || DestReg == IndexReg)) { 405 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); 406 if (DestReg != BaseReg) 407 std::swap(BaseReg, IndexReg); 408 409 if (MI.getOpcode() == X86::LEA64_32r) { 410 // TODO: Do we need the super register implicit use? 411 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 412 .addReg(BaseReg).addReg(IndexReg) 413 .addReg(Base.getReg(), RegState::Implicit) 414 .addReg(Index.getReg(), RegState::Implicit); 415 } else { 416 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 417 .addReg(BaseReg).addReg(IndexReg); 418 } 419 } else if (DestReg == BaseReg && IndexReg == 0) { 420 // This is an LEA with only a base register and a displacement, 421 // We can use ADDri or INC/DEC. 422 423 // Does this LEA have one these forms: 424 // lea %reg, 1(%reg) 425 // lea %reg, -1(%reg) 426 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { 427 bool IsINC = Disp.getImm() == 1; 428 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); 429 430 if (MI.getOpcode() == X86::LEA64_32r) { 431 // TODO: Do we need the super register implicit use? 432 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 433 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); 434 } else { 435 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 436 .addReg(BaseReg); 437 } 438 } else { 439 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); 440 if (MI.getOpcode() == X86::LEA64_32r) { 441 // TODO: Do we need the super register implicit use? 442 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 443 .addReg(BaseReg).addImm(Disp.getImm()) 444 .addReg(Base.getReg(), RegState::Implicit); 445 } else { 446 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 447 .addReg(BaseReg).addImm(Disp.getImm()); 448 } 449 } 450 } else 451 return false; 452 453 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 454 MBB.erase(I); 455 I = NewMI; 456 return true; 457 } 458 459 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, 460 MachineBasicBlock &MBB) { 461 // Process a load, store, or LEA instruction. 462 MachineInstr &MI = *I; 463 const MCInstrDesc &Desc = MI.getDesc(); 464 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); 465 if (AddrOffset >= 0) { 466 AddrOffset += X86II::getOperandBias(Desc); 467 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); 468 if (p.isReg() && p.getReg() != X86::ESP) { 469 seekLEAFixup(p, I, MBB); 470 } 471 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); 472 if (q.isReg() && q.getReg() != X86::ESP) { 473 seekLEAFixup(q, I, MBB); 474 } 475 } 476 } 477 478 void FixupLEAPass::seekLEAFixup(MachineOperand &p, 479 MachineBasicBlock::iterator &I, 480 MachineBasicBlock &MBB) { 481 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); 482 if (MBI != MachineBasicBlock::iterator()) { 483 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); 484 if (NewMI) { 485 ++NumLEAs; 486 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); 487 // now to replace with an equivalent LEA... 488 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); 489 MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1); 490 MBB.erase(MBI); 491 MachineBasicBlock::iterator J = 492 static_cast<MachineBasicBlock::iterator>(NewMI); 493 processInstruction(J, MBB); 494 } 495 } 496 } 497 498 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 499 MachineBasicBlock &MBB) { 500 MachineInstr &MI = *I; 501 const unsigned Opcode = MI.getOpcode(); 502 503 const MachineOperand &Dst = MI.getOperand(0); 504 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 505 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 506 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 507 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 508 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 509 510 if (Segment.getReg() != 0 || !Offset.isImm() || 511 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 512 MachineBasicBlock::LQR_Dead) 513 return; 514 const Register DstR = Dst.getReg(); 515 const Register SrcR1 = Base.getReg(); 516 const Register SrcR2 = Index.getReg(); 517 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) 518 return; 519 if (Scale.getImm() > 1) 520 return; 521 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); 522 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 523 MachineInstr *NewMI = nullptr; 524 // Make ADD instruction for two registers writing to LEA's destination 525 if (SrcR1 != 0 && SrcR2 != 0) { 526 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); 527 const MachineOperand &Src = SrcR1 == DstR ? Index : Base; 528 NewMI = 529 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); 530 LLVM_DEBUG(NewMI->dump();); 531 } 532 // Make ADD instruction for immediate 533 if (Offset.getImm() != 0) { 534 const MCInstrDesc &ADDri = 535 TII->get(getADDriFromLEA(Opcode, Offset)); 536 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; 537 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) 538 .add(SrcR) 539 .addImm(Offset.getImm()); 540 LLVM_DEBUG(NewMI->dump();); 541 } 542 if (NewMI) { 543 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 544 MBB.erase(I); 545 I = NewMI; 546 } 547 } 548 549 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 550 MachineBasicBlock &MBB, 551 bool OptIncDec) { 552 MachineInstr &MI = *I; 553 const unsigned LEAOpcode = MI.getOpcode(); 554 555 const MachineOperand &Dest = MI.getOperand(0); 556 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 557 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 558 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 559 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 560 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 561 562 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || 563 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 564 MachineBasicBlock::LQR_Dead || 565 Segment.getReg() != X86::NoRegister) 566 return; 567 568 Register DestReg = Dest.getReg(); 569 Register BaseReg = Base.getReg(); 570 Register IndexReg = Index.getReg(); 571 572 if (MI.getOpcode() == X86::LEA64_32r) { 573 if (BaseReg != 0) 574 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 575 if (IndexReg != 0) 576 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 577 } 578 579 bool IsScale1 = Scale.getImm() == 1; 580 bool IsInefficientBase = isInefficientLEAReg(BaseReg); 581 bool IsInefficientIndex = isInefficientLEAReg(IndexReg); 582 583 // Skip these cases since it takes more than 2 instructions 584 // to replace the LEA instruction. 585 if (IsInefficientBase && DestReg == BaseReg && !IsScale1) 586 return; 587 588 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); 589 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 590 591 MachineInstr *NewMI = nullptr; 592 593 // First try to replace LEA with one or two (for the 3-op LEA case) 594 // add instructions: 595 // 1.lea (%base,%index,1), %base => add %index,%base 596 // 2.lea (%base,%index,1), %index => add %base,%index 597 if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { 598 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 599 if (DestReg != BaseReg) 600 std::swap(BaseReg, IndexReg); 601 602 if (MI.getOpcode() == X86::LEA64_32r) { 603 // TODO: Do we need the super register implicit use? 604 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 605 .addReg(BaseReg) 606 .addReg(IndexReg) 607 .addReg(Base.getReg(), RegState::Implicit) 608 .addReg(Index.getReg(), RegState::Implicit); 609 } else { 610 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 611 .addReg(BaseReg) 612 .addReg(IndexReg); 613 } 614 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { 615 // If the base is inefficient try switching the index and base operands, 616 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: 617 // lea offset(%base,%index,scale),%dst => 618 // lea (%base,%index,scale); add offset,%dst 619 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 620 .add(Dest) 621 .add(IsInefficientBase ? Index : Base) 622 .add(Scale) 623 .add(IsInefficientBase ? Base : Index) 624 .addImm(0) 625 .add(Segment); 626 LLVM_DEBUG(NewMI->dump();); 627 } 628 629 // If either replacement succeeded above, add the offset if needed, then 630 // replace the instruction. 631 if (NewMI) { 632 // Create ADD instruction for the Offset in case of 3-Ops LEA. 633 if (hasLEAOffset(Offset)) { 634 if (OptIncDec && Offset.isImm() && 635 (Offset.getImm() == 1 || Offset.getImm() == -1)) { 636 unsigned NewOpc = 637 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); 638 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 639 .addReg(DestReg); 640 LLVM_DEBUG(NewMI->dump();); 641 } else { 642 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); 643 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 644 .addReg(DestReg) 645 .add(Offset); 646 LLVM_DEBUG(NewMI->dump();); 647 } 648 } 649 650 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 651 MBB.erase(I); 652 I = NewMI; 653 return; 654 } 655 656 // Handle the rest of the cases with inefficient base register: 657 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); 658 assert(IsInefficientBase && "efficient base should be handled already!"); 659 660 // FIXME: Handle LEA64_32r. 661 if (LEAOpcode == X86::LEA64_32r) 662 return; 663 664 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst 665 if (IsScale1 && !hasLEAOffset(Offset)) { 666 bool BIK = Base.isKill() && BaseReg != IndexReg; 667 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); 668 LLVM_DEBUG(MI.getPrevNode()->dump();); 669 670 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 671 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 672 .addReg(DestReg) 673 .add(Index); 674 LLVM_DEBUG(NewMI->dump();); 675 676 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 677 MBB.erase(I); 678 I = NewMI; 679 return; 680 } 681 682 // lea offset(%base,%index,scale), %dst => 683 // lea offset( ,%index,scale), %dst; add %base,%dst 684 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 685 .add(Dest) 686 .addReg(0) 687 .add(Scale) 688 .add(Index) 689 .add(Offset) 690 .add(Segment); 691 LLVM_DEBUG(NewMI->dump();); 692 693 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 694 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 695 .addReg(DestReg) 696 .add(Base); 697 LLVM_DEBUG(NewMI->dump();); 698 699 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 700 MBB.erase(I); 701 I = NewMI; 702 } 703