1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the pass that finds instructions that can be 10 // re-written as LEA instructions in order to reduce pipeline delays. 11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86InstrInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/ProfileSummaryInfo.h" 20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineSizeOpts.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/TargetSchedule.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 using namespace llvm; 29 30 #define FIXUPLEA_DESC "X86 LEA Fixup" 31 #define FIXUPLEA_NAME "x86-fixup-LEAs" 32 33 #define DEBUG_TYPE FIXUPLEA_NAME 34 35 STATISTIC(NumLEAs, "Number of LEA instructions created"); 36 37 namespace { 38 class FixupLEAPass : public MachineFunctionPass { 39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; 40 41 /// Given a machine register, look for the instruction 42 /// which writes it in the current basic block. If found, 43 /// try to replace it with an equivalent LEA instruction. 44 /// If replacement succeeds, then also process the newly created 45 /// instruction. 46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, 47 MachineBasicBlock &MBB); 48 49 /// Given a memory access or LEA instruction 50 /// whose address mode uses a base and/or index register, look for 51 /// an opportunity to replace the instruction which sets the base or index 52 /// register with an equivalent LEA instruction. 53 void processInstruction(MachineBasicBlock::iterator &I, 54 MachineBasicBlock &MBB); 55 56 /// Given a LEA instruction which is unprofitable 57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction. 58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 59 MachineBasicBlock &MBB); 60 61 /// Given a LEA instruction which is unprofitable 62 /// on SNB+ try to replace it with other instructions. 63 /// According to Intel's Optimization Reference Manual: 64 /// " For LEA instructions with three source operands and some specific 65 /// situations, instruction latency has increased to 3 cycles, and must 66 /// dispatch via port 1: 67 /// - LEA that has all three source operands: base, index, and offset 68 /// - LEA that uses base and index registers where the base is EBP, RBP, 69 /// or R13 70 /// - LEA that uses RIP relative addressing mode 71 /// - LEA that uses 16-bit addressing mode " 72 /// This function currently handles the first 2 cases only. 73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 74 MachineBasicBlock &MBB, bool OptIncDec); 75 76 /// Look for LEAs that are really two address LEAs that we might be able to 77 /// turn into regular ADD instructions. 78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I, 79 MachineBasicBlock &MBB, bool OptIncDec, 80 bool UseLEAForSP) const; 81 82 /// Look for and transform the sequence 83 /// lea (reg1, reg2), reg3 84 /// sub reg3, reg4 85 /// to 86 /// sub reg1, reg4 87 /// sub reg2, reg4 88 /// It can also optimize the sequence lea/add similarly. 89 bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const; 90 91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses 92 /// the dest register of LEA instruction I. 93 MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I, 94 MachineBasicBlock &MBB) const; 95 96 /// Check instructions between LeaI and AluI (exclusively). 97 /// Set BaseIndexDef to true if base or index register from LeaI is defined. 98 /// Set AluDestRef to true if the dest register of AluI is used or defined. 99 /// *KilledBase is set to the killed base register usage. 100 /// *KilledIndex is set to the killed index register usage. 101 void checkRegUsage(MachineBasicBlock::iterator &LeaI, 102 MachineBasicBlock::iterator &AluI, bool &BaseIndexDef, 103 bool &AluDestRef, MachineOperand **KilledBase, 104 MachineOperand **KilledIndex) const; 105 106 /// Determine if an instruction references a machine register 107 /// and, if so, whether it reads or writes the register. 108 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); 109 110 /// Step backwards through a basic block, looking 111 /// for an instruction which writes a register within 112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. 113 MachineBasicBlock::iterator searchBackwards(MachineOperand &p, 114 MachineBasicBlock::iterator &I, 115 MachineBasicBlock &MBB); 116 117 /// if an instruction can be converted to an 118 /// equivalent LEA, insert the new instruction into the basic block 119 /// and return a pointer to it. Otherwise, return zero. 120 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator &MBBI) const; 122 123 public: 124 static char ID; 125 126 StringRef getPassName() const override { return FIXUPLEA_DESC; } 127 128 FixupLEAPass() : MachineFunctionPass(ID) { } 129 130 /// Loop over all of the basic blocks, 131 /// replacing instructions by equivalent LEA instructions 132 /// if needed and when possible. 133 bool runOnMachineFunction(MachineFunction &MF) override; 134 135 // This pass runs after regalloc and doesn't support VReg operands. 136 MachineFunctionProperties getRequiredProperties() const override { 137 return MachineFunctionProperties().set( 138 MachineFunctionProperties::Property::NoVRegs); 139 } 140 141 void getAnalysisUsage(AnalysisUsage &AU) const override { 142 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 143 AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); 144 MachineFunctionPass::getAnalysisUsage(AU); 145 } 146 147 private: 148 TargetSchedModel TSM; 149 const X86InstrInfo *TII = nullptr; 150 const X86RegisterInfo *TRI = nullptr; 151 }; 152 } 153 154 char FixupLEAPass::ID = 0; 155 156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) 157 158 MachineInstr * 159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, 160 MachineBasicBlock::iterator &MBBI) const { 161 MachineInstr &MI = *MBBI; 162 switch (MI.getOpcode()) { 163 case X86::MOV32rr: 164 case X86::MOV64rr: { 165 const MachineOperand &Src = MI.getOperand(1); 166 const MachineOperand &Dest = MI.getOperand(0); 167 MachineInstr *NewMI = 168 BuildMI(MBB, MBBI, MI.getDebugLoc(), 169 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r 170 : X86::LEA64r)) 171 .add(Dest) 172 .add(Src) 173 .addImm(1) 174 .addReg(0) 175 .addImm(0) 176 .addReg(0); 177 return NewMI; 178 } 179 } 180 181 if (!MI.isConvertibleTo3Addr()) 182 return nullptr; 183 184 switch (MI.getOpcode()) { 185 default: 186 // Only convert instructions that we've verified are safe. 187 return nullptr; 188 case X86::ADD64ri32: 189 case X86::ADD64ri32_DB: 190 case X86::ADD32ri: 191 case X86::ADD32ri_DB: 192 if (!MI.getOperand(2).isImm()) { 193 // convertToThreeAddress will call getImm() 194 // which requires isImm() to be true 195 return nullptr; 196 } 197 break; 198 case X86::SHL64ri: 199 case X86::SHL32ri: 200 case X86::INC64r: 201 case X86::INC32r: 202 case X86::DEC64r: 203 case X86::DEC32r: 204 case X86::ADD64rr: 205 case X86::ADD64rr_DB: 206 case X86::ADD32rr: 207 case X86::ADD32rr_DB: 208 // These instructions are all fine to convert. 209 break; 210 } 211 return TII->convertToThreeAddress(MI, nullptr, nullptr); 212 } 213 214 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } 215 216 static bool isLEA(unsigned Opcode) { 217 return Opcode == X86::LEA32r || Opcode == X86::LEA64r || 218 Opcode == X86::LEA64_32r; 219 } 220 221 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { 222 if (skipFunction(MF.getFunction())) 223 return false; 224 225 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 226 bool IsSlowLEA = ST.slowLEA(); 227 bool IsSlow3OpsLEA = ST.slow3OpsLEA(); 228 bool LEAUsesAG = ST.leaUsesAG(); 229 230 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); 231 bool UseLEAForSP = ST.useLeaForSP(); 232 233 TSM.init(&ST); 234 TII = ST.getInstrInfo(); 235 TRI = ST.getRegisterInfo(); 236 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 237 auto *MBFI = (PSI && PSI->hasProfileSummary()) 238 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() 239 : nullptr; 240 241 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); 242 for (MachineBasicBlock &MBB : MF) { 243 // First pass. Try to remove or optimize existing LEAs. 244 bool OptIncDecPerBB = 245 OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); 246 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 247 if (!isLEA(I->getOpcode())) 248 continue; 249 250 if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP)) 251 continue; 252 253 if (IsSlowLEA) 254 processInstructionForSlowLEA(I, MBB); 255 else if (IsSlow3OpsLEA) 256 processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB); 257 } 258 259 // Second pass for creating LEAs. This may reverse some of the 260 // transformations above. 261 if (LEAUsesAG) { 262 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) 263 processInstruction(I, MBB); 264 } 265 } 266 267 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";); 268 269 return true; 270 } 271 272 FixupLEAPass::RegUsageState 273 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { 274 RegUsageState RegUsage = RU_NotUsed; 275 MachineInstr &MI = *I; 276 277 for (const MachineOperand &MO : MI.operands()) { 278 if (MO.isReg() && MO.getReg() == p.getReg()) { 279 if (MO.isDef()) 280 return RU_Write; 281 RegUsage = RU_Read; 282 } 283 } 284 return RegUsage; 285 } 286 287 /// getPreviousInstr - Given a reference to an instruction in a basic 288 /// block, return a reference to the previous instruction in the block, 289 /// wrapping around to the last instruction of the block if the block 290 /// branches to itself. 291 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, 292 MachineBasicBlock &MBB) { 293 if (I == MBB.begin()) { 294 if (MBB.isPredecessor(&MBB)) { 295 I = --MBB.end(); 296 return true; 297 } else 298 return false; 299 } 300 --I; 301 return true; 302 } 303 304 MachineBasicBlock::iterator 305 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, 306 MachineBasicBlock &MBB) { 307 int InstrDistance = 1; 308 MachineBasicBlock::iterator CurInst; 309 static const int INSTR_DISTANCE_THRESHOLD = 5; 310 311 CurInst = I; 312 bool Found; 313 Found = getPreviousInstr(CurInst, MBB); 314 while (Found && I != CurInst) { 315 if (CurInst->isCall() || CurInst->isInlineAsm()) 316 break; 317 if (InstrDistance > INSTR_DISTANCE_THRESHOLD) 318 break; // too far back to make a difference 319 if (usesRegister(p, CurInst) == RU_Write) { 320 return CurInst; 321 } 322 InstrDistance += TSM.computeInstrLatency(&*CurInst); 323 Found = getPreviousInstr(CurInst, MBB); 324 } 325 return MachineBasicBlock::iterator(); 326 } 327 328 static inline bool isInefficientLEAReg(unsigned Reg) { 329 return Reg == X86::EBP || Reg == X86::RBP || 330 Reg == X86::R13D || Reg == X86::R13; 331 } 332 333 /// Returns true if this LEA uses base an index registers, and the base register 334 /// is known to be inefficient for the subtarget. 335 // TODO: use a variant scheduling class to model the latency profile 336 // of LEA instructions, and implement this logic as a scheduling predicate. 337 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, 338 const MachineOperand &Index) { 339 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && 340 Index.getReg() != X86::NoRegister; 341 } 342 343 static inline bool hasLEAOffset(const MachineOperand &Offset) { 344 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); 345 } 346 347 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { 348 switch (LEAOpcode) { 349 default: 350 llvm_unreachable("Unexpected LEA instruction"); 351 case X86::LEA32r: 352 case X86::LEA64_32r: 353 return X86::ADD32rr; 354 case X86::LEA64r: 355 return X86::ADD64rr; 356 } 357 } 358 359 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) { 360 switch (LEAOpcode) { 361 default: 362 llvm_unreachable("Unexpected LEA instruction"); 363 case X86::LEA32r: 364 case X86::LEA64_32r: 365 return X86::SUB32rr; 366 case X86::LEA64r: 367 return X86::SUB64rr; 368 } 369 } 370 371 static inline unsigned getADDriFromLEA(unsigned LEAOpcode, 372 const MachineOperand &Offset) { 373 switch (LEAOpcode) { 374 default: 375 llvm_unreachable("Unexpected LEA instruction"); 376 case X86::LEA32r: 377 case X86::LEA64_32r: 378 return X86::ADD32ri; 379 case X86::LEA64r: 380 return X86::ADD64ri32; 381 } 382 } 383 384 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { 385 switch (LEAOpcode) { 386 default: 387 llvm_unreachable("Unexpected LEA instruction"); 388 case X86::LEA32r: 389 case X86::LEA64_32r: 390 return IsINC ? X86::INC32r : X86::DEC32r; 391 case X86::LEA64r: 392 return IsINC ? X86::INC64r : X86::DEC64r; 393 } 394 } 395 396 MachineBasicBlock::iterator 397 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I, 398 MachineBasicBlock &MBB) const { 399 const int InstrDistanceThreshold = 5; 400 int InstrDistance = 1; 401 MachineBasicBlock::iterator CurInst = std::next(I); 402 403 unsigned LEAOpcode = I->getOpcode(); 404 unsigned AddOpcode = getADDrrFromLEA(LEAOpcode); 405 unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode); 406 Register DestReg = I->getOperand(0).getReg(); 407 408 while (CurInst != MBB.end()) { 409 if (CurInst->isCall() || CurInst->isInlineAsm()) 410 break; 411 if (InstrDistance > InstrDistanceThreshold) 412 break; 413 414 // Check if the lea dest register is used in an add/sub instruction only. 415 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { 416 MachineOperand &Opnd = CurInst->getOperand(I); 417 if (Opnd.isReg()) { 418 if (Opnd.getReg() == DestReg) { 419 if (Opnd.isDef() || !Opnd.isKill()) 420 return MachineBasicBlock::iterator(); 421 422 unsigned AluOpcode = CurInst->getOpcode(); 423 if (AluOpcode != AddOpcode && AluOpcode != SubOpcode) 424 return MachineBasicBlock::iterator(); 425 426 MachineOperand &Opnd2 = CurInst->getOperand(3 - I); 427 MachineOperand AluDest = CurInst->getOperand(0); 428 if (Opnd2.getReg() != AluDest.getReg()) 429 return MachineBasicBlock::iterator(); 430 431 // X - (Y + Z) may generate different flags than (X - Y) - Z when 432 // there is overflow. So we can't change the alu instruction if the 433 // flags register is live. 434 if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI)) 435 return MachineBasicBlock::iterator(); 436 437 return CurInst; 438 } 439 if (TRI->regsOverlap(DestReg, Opnd.getReg())) 440 return MachineBasicBlock::iterator(); 441 } 442 } 443 444 InstrDistance++; 445 ++CurInst; 446 } 447 return MachineBasicBlock::iterator(); 448 } 449 450 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI, 451 MachineBasicBlock::iterator &AluI, 452 bool &BaseIndexDef, bool &AluDestRef, 453 MachineOperand **KilledBase, 454 MachineOperand **KilledIndex) const { 455 BaseIndexDef = AluDestRef = false; 456 *KilledBase = *KilledIndex = nullptr; 457 Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg(); 458 Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg(); 459 Register AluDestReg = AluI->getOperand(0).getReg(); 460 461 for (MachineInstr &CurInst : llvm::make_range(std::next(LeaI), AluI)) { 462 for (MachineOperand &Opnd : CurInst.operands()) { 463 if (!Opnd.isReg()) 464 continue; 465 Register Reg = Opnd.getReg(); 466 if (TRI->regsOverlap(Reg, AluDestReg)) 467 AluDestRef = true; 468 if (TRI->regsOverlap(Reg, BaseReg)) { 469 if (Opnd.isDef()) 470 BaseIndexDef = true; 471 else if (Opnd.isKill()) 472 *KilledBase = &Opnd; 473 } 474 if (TRI->regsOverlap(Reg, IndexReg)) { 475 if (Opnd.isDef()) 476 BaseIndexDef = true; 477 else if (Opnd.isKill()) 478 *KilledIndex = &Opnd; 479 } 480 } 481 } 482 } 483 484 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I, 485 MachineBasicBlock &MBB) const { 486 // Look for an add/sub instruction which uses the result of lea. 487 MachineBasicBlock::iterator AluI = searchALUInst(I, MBB); 488 if (AluI == MachineBasicBlock::iterator()) 489 return false; 490 491 // Check if there are any related register usage between lea and alu. 492 bool BaseIndexDef, AluDestRef; 493 MachineOperand *KilledBase, *KilledIndex; 494 checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex); 495 496 MachineBasicBlock::iterator InsertPos = AluI; 497 if (BaseIndexDef) { 498 if (AluDestRef) 499 return false; 500 InsertPos = I; 501 KilledBase = KilledIndex = nullptr; 502 } 503 504 // Check if there are same registers. 505 Register AluDestReg = AluI->getOperand(0).getReg(); 506 Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg(); 507 Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg(); 508 if (I->getOpcode() == X86::LEA64_32r) { 509 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 510 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 511 } 512 if (AluDestReg == IndexReg) { 513 if (BaseReg == IndexReg) 514 return false; 515 std::swap(BaseReg, IndexReg); 516 std::swap(KilledBase, KilledIndex); 517 } 518 if (BaseReg == IndexReg) 519 KilledBase = nullptr; 520 521 // Now it's safe to change instructions. 522 MachineInstr *NewMI1, *NewMI2; 523 unsigned NewOpcode = AluI->getOpcode(); 524 NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 525 AluDestReg) 526 .addReg(AluDestReg, RegState::Kill) 527 .addReg(BaseReg, KilledBase ? RegState::Kill : 0); 528 NewMI1->addRegisterDead(X86::EFLAGS, TRI); 529 NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 530 AluDestReg) 531 .addReg(AluDestReg, RegState::Kill) 532 .addReg(IndexReg, KilledIndex ? RegState::Kill : 0); 533 NewMI2->addRegisterDead(X86::EFLAGS, TRI); 534 535 // Clear the old Kill flags. 536 if (KilledBase) 537 KilledBase->setIsKill(false); 538 if (KilledIndex) 539 KilledIndex->setIsKill(false); 540 541 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1); 542 MBB.erase(I); 543 MBB.erase(AluI); 544 I = NewMI1; 545 return true; 546 } 547 548 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, 549 MachineBasicBlock &MBB, bool OptIncDec, 550 bool UseLEAForSP) const { 551 MachineInstr &MI = *I; 552 553 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 554 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 555 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 556 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); 557 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 558 559 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || 560 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) != 561 MachineBasicBlock::LQR_Dead) 562 return false; 563 564 Register DestReg = MI.getOperand(0).getReg(); 565 Register BaseReg = Base.getReg(); 566 Register IndexReg = Index.getReg(); 567 568 // Don't change stack adjustment LEAs. 569 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) 570 return false; 571 572 // LEA64_32 has 64-bit operands but 32-bit result. 573 if (MI.getOpcode() == X86::LEA64_32r) { 574 if (BaseReg != 0) 575 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 576 if (IndexReg != 0) 577 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 578 } 579 580 MachineInstr *NewMI = nullptr; 581 582 // Case 1. 583 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 584 // which can be turned into add %reg2, %reg1 585 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && 586 (DestReg == BaseReg || DestReg == IndexReg)) { 587 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); 588 if (DestReg != BaseReg) 589 std::swap(BaseReg, IndexReg); 590 591 if (MI.getOpcode() == X86::LEA64_32r) { 592 // TODO: Do we need the super register implicit use? 593 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 594 .addReg(BaseReg).addReg(IndexReg) 595 .addReg(Base.getReg(), RegState::Implicit) 596 .addReg(Index.getReg(), RegState::Implicit); 597 } else { 598 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 599 .addReg(BaseReg).addReg(IndexReg); 600 } 601 } else if (DestReg == BaseReg && IndexReg == 0) { 602 // Case 2. 603 // This is an LEA with only a base register and a displacement, 604 // We can use ADDri or INC/DEC. 605 606 // Does this LEA have one these forms: 607 // lea %reg, 1(%reg) 608 // lea %reg, -1(%reg) 609 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { 610 bool IsINC = Disp.getImm() == 1; 611 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); 612 613 if (MI.getOpcode() == X86::LEA64_32r) { 614 // TODO: Do we need the super register implicit use? 615 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 616 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); 617 } else { 618 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 619 .addReg(BaseReg); 620 } 621 } else { 622 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); 623 if (MI.getOpcode() == X86::LEA64_32r) { 624 // TODO: Do we need the super register implicit use? 625 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 626 .addReg(BaseReg).addImm(Disp.getImm()) 627 .addReg(Base.getReg(), RegState::Implicit); 628 } else { 629 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 630 .addReg(BaseReg).addImm(Disp.getImm()); 631 } 632 } 633 } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) { 634 // Case 3. 635 // Look for and transform the sequence 636 // lea (reg1, reg2), reg3 637 // sub reg3, reg4 638 return optLEAALU(I, MBB); 639 } else 640 return false; 641 642 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 643 MBB.erase(I); 644 I = NewMI; 645 return true; 646 } 647 648 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, 649 MachineBasicBlock &MBB) { 650 // Process a load, store, or LEA instruction. 651 MachineInstr &MI = *I; 652 const MCInstrDesc &Desc = MI.getDesc(); 653 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); 654 if (AddrOffset >= 0) { 655 AddrOffset += X86II::getOperandBias(Desc); 656 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); 657 if (p.isReg() && p.getReg() != X86::ESP) { 658 seekLEAFixup(p, I, MBB); 659 } 660 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); 661 if (q.isReg() && q.getReg() != X86::ESP) { 662 seekLEAFixup(q, I, MBB); 663 } 664 } 665 } 666 667 void FixupLEAPass::seekLEAFixup(MachineOperand &p, 668 MachineBasicBlock::iterator &I, 669 MachineBasicBlock &MBB) { 670 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); 671 if (MBI != MachineBasicBlock::iterator()) { 672 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); 673 if (NewMI) { 674 ++NumLEAs; 675 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); 676 // now to replace with an equivalent LEA... 677 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); 678 MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1); 679 MBB.erase(MBI); 680 MachineBasicBlock::iterator J = 681 static_cast<MachineBasicBlock::iterator>(NewMI); 682 processInstruction(J, MBB); 683 } 684 } 685 } 686 687 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 688 MachineBasicBlock &MBB) { 689 MachineInstr &MI = *I; 690 const unsigned Opcode = MI.getOpcode(); 691 692 const MachineOperand &Dst = MI.getOperand(0); 693 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 694 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 695 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 696 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 697 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 698 699 if (Segment.getReg() != 0 || !Offset.isImm() || 700 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 701 MachineBasicBlock::LQR_Dead) 702 return; 703 const Register DstR = Dst.getReg(); 704 const Register SrcR1 = Base.getReg(); 705 const Register SrcR2 = Index.getReg(); 706 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) 707 return; 708 if (Scale.getImm() > 1) 709 return; 710 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); 711 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 712 MachineInstr *NewMI = nullptr; 713 // Make ADD instruction for two registers writing to LEA's destination 714 if (SrcR1 != 0 && SrcR2 != 0) { 715 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); 716 const MachineOperand &Src = SrcR1 == DstR ? Index : Base; 717 NewMI = 718 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); 719 LLVM_DEBUG(NewMI->dump();); 720 } 721 // Make ADD instruction for immediate 722 if (Offset.getImm() != 0) { 723 const MCInstrDesc &ADDri = 724 TII->get(getADDriFromLEA(Opcode, Offset)); 725 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; 726 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) 727 .add(SrcR) 728 .addImm(Offset.getImm()); 729 LLVM_DEBUG(NewMI->dump();); 730 } 731 if (NewMI) { 732 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 733 MBB.erase(I); 734 I = NewMI; 735 } 736 } 737 738 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 739 MachineBasicBlock &MBB, 740 bool OptIncDec) { 741 MachineInstr &MI = *I; 742 const unsigned LEAOpcode = MI.getOpcode(); 743 744 const MachineOperand &Dest = MI.getOperand(0); 745 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 746 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 747 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 748 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 749 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 750 751 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || 752 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 753 MachineBasicBlock::LQR_Dead || 754 Segment.getReg() != X86::NoRegister) 755 return; 756 757 Register DestReg = Dest.getReg(); 758 Register BaseReg = Base.getReg(); 759 Register IndexReg = Index.getReg(); 760 761 if (MI.getOpcode() == X86::LEA64_32r) { 762 if (BaseReg != 0) 763 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 764 if (IndexReg != 0) 765 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 766 } 767 768 bool IsScale1 = Scale.getImm() == 1; 769 bool IsInefficientBase = isInefficientLEAReg(BaseReg); 770 bool IsInefficientIndex = isInefficientLEAReg(IndexReg); 771 772 // Skip these cases since it takes more than 2 instructions 773 // to replace the LEA instruction. 774 if (IsInefficientBase && DestReg == BaseReg && !IsScale1) 775 return; 776 777 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); 778 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 779 780 MachineInstr *NewMI = nullptr; 781 bool BaseOrIndexIsDst = DestReg == BaseReg || DestReg == IndexReg; 782 // First try and remove the base while sticking with LEA iff base == index and 783 // scale == 1. We can handle: 784 // 1. lea D(%base,%index,1) -> lea D(,%index,2) 785 // 2. lea D(%r13/%rbp,%index) -> lea D(,%index,2) 786 // Only do this if the LEA would otherwise be split into 2-instruction 787 // (either it has a an Offset or neither base nor index are dst) 788 if (IsScale1 && BaseReg == IndexReg && 789 (hasLEAOffset(Offset) || (IsInefficientBase && !BaseOrIndexIsDst))) { 790 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 791 .add(Dest) 792 .addReg(0) 793 .addImm(2) 794 .add(Index) 795 .add(Offset) 796 .add(Segment); 797 LLVM_DEBUG(NewMI->dump();); 798 799 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 800 MBB.erase(I); 801 I = NewMI; 802 return; 803 } else if (IsScale1 && BaseOrIndexIsDst) { 804 // Try to replace LEA with one or two (for the 3-op LEA case) 805 // add instructions: 806 // 1.lea (%base,%index,1), %base => add %index,%base 807 // 2.lea (%base,%index,1), %index => add %base,%index 808 809 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 810 if (DestReg != BaseReg) 811 std::swap(BaseReg, IndexReg); 812 813 if (MI.getOpcode() == X86::LEA64_32r) { 814 // TODO: Do we need the super register implicit use? 815 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 816 .addReg(BaseReg) 817 .addReg(IndexReg) 818 .addReg(Base.getReg(), RegState::Implicit) 819 .addReg(Index.getReg(), RegState::Implicit); 820 } else { 821 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 822 .addReg(BaseReg) 823 .addReg(IndexReg); 824 } 825 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { 826 // If the base is inefficient try switching the index and base operands, 827 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: 828 // lea offset(%base,%index,scale),%dst => 829 // lea (%base,%index,scale); add offset,%dst 830 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 831 .add(Dest) 832 .add(IsInefficientBase ? Index : Base) 833 .add(Scale) 834 .add(IsInefficientBase ? Base : Index) 835 .addImm(0) 836 .add(Segment); 837 LLVM_DEBUG(NewMI->dump();); 838 } 839 840 // If either replacement succeeded above, add the offset if needed, then 841 // replace the instruction. 842 if (NewMI) { 843 // Create ADD instruction for the Offset in case of 3-Ops LEA. 844 if (hasLEAOffset(Offset)) { 845 if (OptIncDec && Offset.isImm() && 846 (Offset.getImm() == 1 || Offset.getImm() == -1)) { 847 unsigned NewOpc = 848 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); 849 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 850 .addReg(DestReg); 851 LLVM_DEBUG(NewMI->dump();); 852 } else { 853 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); 854 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 855 .addReg(DestReg) 856 .add(Offset); 857 LLVM_DEBUG(NewMI->dump();); 858 } 859 } 860 861 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 862 MBB.erase(I); 863 I = NewMI; 864 return; 865 } 866 867 // Handle the rest of the cases with inefficient base register: 868 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); 869 assert(IsInefficientBase && "efficient base should be handled already!"); 870 871 // FIXME: Handle LEA64_32r. 872 if (LEAOpcode == X86::LEA64_32r) 873 return; 874 875 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst 876 if (IsScale1 && !hasLEAOffset(Offset)) { 877 bool BIK = Base.isKill() && BaseReg != IndexReg; 878 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); 879 LLVM_DEBUG(MI.getPrevNode()->dump();); 880 881 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 882 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 883 .addReg(DestReg) 884 .add(Index); 885 LLVM_DEBUG(NewMI->dump();); 886 887 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 888 MBB.erase(I); 889 I = NewMI; 890 return; 891 } 892 893 // lea offset(%base,%index,scale), %dst => 894 // lea offset( ,%index,scale), %dst; add %base,%dst 895 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 896 .add(Dest) 897 .addReg(0) 898 .add(Scale) 899 .add(Index) 900 .add(Offset) 901 .add(Segment); 902 LLVM_DEBUG(NewMI->dump();); 903 904 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 905 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 906 .addReg(DestReg) 907 .add(Base); 908 LLVM_DEBUG(NewMI->dump();); 909 910 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 911 MBB.erase(I); 912 I = NewMI; 913 } 914