1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the pass that finds instructions that can be 10 // re-written as LEA instructions in order to reduce pipeline delays. 11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86InstrInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/ProfileSummaryInfo.h" 20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineSizeOpts.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/TargetSchedule.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 using namespace llvm; 29 30 #define FIXUPLEA_DESC "X86 LEA Fixup" 31 #define FIXUPLEA_NAME "x86-fixup-LEAs" 32 33 #define DEBUG_TYPE FIXUPLEA_NAME 34 35 STATISTIC(NumLEAs, "Number of LEA instructions created"); 36 37 namespace { 38 class FixupLEAPass : public MachineFunctionPass { 39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; 40 41 /// Given a machine register, look for the instruction 42 /// which writes it in the current basic block. If found, 43 /// try to replace it with an equivalent LEA instruction. 44 /// If replacement succeeds, then also process the newly created 45 /// instruction. 46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, 47 MachineBasicBlock &MBB); 48 49 /// Given a memory access or LEA instruction 50 /// whose address mode uses a base and/or index register, look for 51 /// an opportunity to replace the instruction which sets the base or index 52 /// register with an equivalent LEA instruction. 53 void processInstruction(MachineBasicBlock::iterator &I, 54 MachineBasicBlock &MBB); 55 56 /// Given a LEA instruction which is unprofitable 57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction. 58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 59 MachineBasicBlock &MBB); 60 61 /// Given a LEA instruction which is unprofitable 62 /// on SNB+ try to replace it with other instructions. 63 /// According to Intel's Optimization Reference Manual: 64 /// " For LEA instructions with three source operands and some specific 65 /// situations, instruction latency has increased to 3 cycles, and must 66 /// dispatch via port 1: 67 /// - LEA that has all three source operands: base, index, and offset 68 /// - LEA that uses base and index registers where the base is EBP, RBP, 69 /// or R13 70 /// - LEA that uses RIP relative addressing mode 71 /// - LEA that uses 16-bit addressing mode " 72 /// This function currently handles the first 2 cases only. 73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 74 MachineBasicBlock &MBB, bool OptIncDec); 75 76 /// Look for LEAs that are really two address LEAs that we might be able to 77 /// turn into regular ADD instructions. 78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I, 79 MachineBasicBlock &MBB, bool OptIncDec, 80 bool UseLEAForSP) const; 81 82 /// Look for and transform the sequence 83 /// lea (reg1, reg2), reg3 84 /// sub reg3, reg4 85 /// to 86 /// sub reg1, reg4 87 /// sub reg2, reg4 88 /// It can also optimize the sequence lea/add similarly. 89 bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const; 90 91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses 92 /// the dest register of LEA instruction I. 93 MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I, 94 MachineBasicBlock &MBB) const; 95 96 /// Check instructions between LeaI and AluI (exclusively). 97 /// Set BaseIndexDef to true if base or index register from LeaI is defined. 98 /// Set AluDestRef to true if the dest register of AluI is used or defined. 99 /// *KilledBase is set to the killed base register usage. 100 /// *KilledIndex is set to the killed index register usage. 101 void checkRegUsage(MachineBasicBlock::iterator &LeaI, 102 MachineBasicBlock::iterator &AluI, bool &BaseIndexDef, 103 bool &AluDestRef, MachineOperand **KilledBase, 104 MachineOperand **KilledIndex) const; 105 106 /// Determine if an instruction references a machine register 107 /// and, if so, whether it reads or writes the register. 108 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); 109 110 /// Step backwards through a basic block, looking 111 /// for an instruction which writes a register within 112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. 113 MachineBasicBlock::iterator searchBackwards(MachineOperand &p, 114 MachineBasicBlock::iterator &I, 115 MachineBasicBlock &MBB); 116 117 /// if an instruction can be converted to an 118 /// equivalent LEA, insert the new instruction into the basic block 119 /// and return a pointer to it. Otherwise, return zero. 120 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator &MBBI) const; 122 123 public: 124 static char ID; 125 126 StringRef getPassName() const override { return FIXUPLEA_DESC; } 127 128 FixupLEAPass() : MachineFunctionPass(ID) { } 129 130 /// Loop over all of the basic blocks, 131 /// replacing instructions by equivalent LEA instructions 132 /// if needed and when possible. 133 bool runOnMachineFunction(MachineFunction &MF) override; 134 135 // This pass runs after regalloc and doesn't support VReg operands. 136 MachineFunctionProperties getRequiredProperties() const override { 137 return MachineFunctionProperties().set( 138 MachineFunctionProperties::Property::NoVRegs); 139 } 140 141 void getAnalysisUsage(AnalysisUsage &AU) const override { 142 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 143 AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); 144 MachineFunctionPass::getAnalysisUsage(AU); 145 } 146 147 private: 148 TargetSchedModel TSM; 149 const X86InstrInfo *TII = nullptr; 150 const X86RegisterInfo *TRI = nullptr; 151 }; 152 } 153 154 char FixupLEAPass::ID = 0; 155 156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) 157 158 MachineInstr * 159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, 160 MachineBasicBlock::iterator &MBBI) const { 161 MachineInstr &MI = *MBBI; 162 switch (MI.getOpcode()) { 163 case X86::MOV32rr: 164 case X86::MOV64rr: { 165 const MachineOperand &Src = MI.getOperand(1); 166 const MachineOperand &Dest = MI.getOperand(0); 167 MachineInstr *NewMI = 168 BuildMI(MBB, MBBI, MI.getDebugLoc(), 169 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r 170 : X86::LEA64r)) 171 .add(Dest) 172 .add(Src) 173 .addImm(1) 174 .addReg(0) 175 .addImm(0) 176 .addReg(0); 177 return NewMI; 178 } 179 } 180 181 if (!MI.isConvertibleTo3Addr()) 182 return nullptr; 183 184 switch (MI.getOpcode()) { 185 default: 186 // Only convert instructions that we've verified are safe. 187 return nullptr; 188 case X86::ADD64ri32: 189 case X86::ADD64ri8: 190 case X86::ADD64ri32_DB: 191 case X86::ADD64ri8_DB: 192 case X86::ADD32ri: 193 case X86::ADD32ri8: 194 case X86::ADD32ri_DB: 195 case X86::ADD32ri8_DB: 196 if (!MI.getOperand(2).isImm()) { 197 // convertToThreeAddress will call getImm() 198 // which requires isImm() to be true 199 return nullptr; 200 } 201 break; 202 case X86::SHL64ri: 203 case X86::SHL32ri: 204 case X86::INC64r: 205 case X86::INC32r: 206 case X86::DEC64r: 207 case X86::DEC32r: 208 case X86::ADD64rr: 209 case X86::ADD64rr_DB: 210 case X86::ADD32rr: 211 case X86::ADD32rr_DB: 212 // These instructions are all fine to convert. 213 break; 214 } 215 return TII->convertToThreeAddress(MI, nullptr, nullptr); 216 } 217 218 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } 219 220 static bool isLEA(unsigned Opcode) { 221 return Opcode == X86::LEA32r || Opcode == X86::LEA64r || 222 Opcode == X86::LEA64_32r; 223 } 224 225 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { 226 if (skipFunction(MF.getFunction())) 227 return false; 228 229 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 230 bool IsSlowLEA = ST.slowLEA(); 231 bool IsSlow3OpsLEA = ST.slow3OpsLEA(); 232 bool LEAUsesAG = ST.LEAusesAG(); 233 234 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); 235 bool UseLEAForSP = ST.useLeaForSP(); 236 237 TSM.init(&ST); 238 TII = ST.getInstrInfo(); 239 TRI = ST.getRegisterInfo(); 240 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 241 auto *MBFI = (PSI && PSI->hasProfileSummary()) 242 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() 243 : nullptr; 244 245 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); 246 for (MachineBasicBlock &MBB : MF) { 247 // First pass. Try to remove or optimize existing LEAs. 248 bool OptIncDecPerBB = 249 OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); 250 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 251 if (!isLEA(I->getOpcode())) 252 continue; 253 254 if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP)) 255 continue; 256 257 if (IsSlowLEA) 258 processInstructionForSlowLEA(I, MBB); 259 else if (IsSlow3OpsLEA) 260 processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB); 261 } 262 263 // Second pass for creating LEAs. This may reverse some of the 264 // transformations above. 265 if (LEAUsesAG) { 266 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) 267 processInstruction(I, MBB); 268 } 269 } 270 271 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";); 272 273 return true; 274 } 275 276 FixupLEAPass::RegUsageState 277 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { 278 RegUsageState RegUsage = RU_NotUsed; 279 MachineInstr &MI = *I; 280 281 for (const MachineOperand &MO : MI.operands()) { 282 if (MO.isReg() && MO.getReg() == p.getReg()) { 283 if (MO.isDef()) 284 return RU_Write; 285 RegUsage = RU_Read; 286 } 287 } 288 return RegUsage; 289 } 290 291 /// getPreviousInstr - Given a reference to an instruction in a basic 292 /// block, return a reference to the previous instruction in the block, 293 /// wrapping around to the last instruction of the block if the block 294 /// branches to itself. 295 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, 296 MachineBasicBlock &MBB) { 297 if (I == MBB.begin()) { 298 if (MBB.isPredecessor(&MBB)) { 299 I = --MBB.end(); 300 return true; 301 } else 302 return false; 303 } 304 --I; 305 return true; 306 } 307 308 MachineBasicBlock::iterator 309 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, 310 MachineBasicBlock &MBB) { 311 int InstrDistance = 1; 312 MachineBasicBlock::iterator CurInst; 313 static const int INSTR_DISTANCE_THRESHOLD = 5; 314 315 CurInst = I; 316 bool Found; 317 Found = getPreviousInstr(CurInst, MBB); 318 while (Found && I != CurInst) { 319 if (CurInst->isCall() || CurInst->isInlineAsm()) 320 break; 321 if (InstrDistance > INSTR_DISTANCE_THRESHOLD) 322 break; // too far back to make a difference 323 if (usesRegister(p, CurInst) == RU_Write) { 324 return CurInst; 325 } 326 InstrDistance += TSM.computeInstrLatency(&*CurInst); 327 Found = getPreviousInstr(CurInst, MBB); 328 } 329 return MachineBasicBlock::iterator(); 330 } 331 332 static inline bool isInefficientLEAReg(unsigned Reg) { 333 return Reg == X86::EBP || Reg == X86::RBP || 334 Reg == X86::R13D || Reg == X86::R13; 335 } 336 337 /// Returns true if this LEA uses base an index registers, and the base register 338 /// is known to be inefficient for the subtarget. 339 // TODO: use a variant scheduling class to model the latency profile 340 // of LEA instructions, and implement this logic as a scheduling predicate. 341 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, 342 const MachineOperand &Index) { 343 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && 344 Index.getReg() != X86::NoRegister; 345 } 346 347 static inline bool hasLEAOffset(const MachineOperand &Offset) { 348 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); 349 } 350 351 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { 352 switch (LEAOpcode) { 353 default: 354 llvm_unreachable("Unexpected LEA instruction"); 355 case X86::LEA32r: 356 case X86::LEA64_32r: 357 return X86::ADD32rr; 358 case X86::LEA64r: 359 return X86::ADD64rr; 360 } 361 } 362 363 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) { 364 switch (LEAOpcode) { 365 default: 366 llvm_unreachable("Unexpected LEA instruction"); 367 case X86::LEA32r: 368 case X86::LEA64_32r: 369 return X86::SUB32rr; 370 case X86::LEA64r: 371 return X86::SUB64rr; 372 } 373 } 374 375 static inline unsigned getADDriFromLEA(unsigned LEAOpcode, 376 const MachineOperand &Offset) { 377 bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); 378 switch (LEAOpcode) { 379 default: 380 llvm_unreachable("Unexpected LEA instruction"); 381 case X86::LEA32r: 382 case X86::LEA64_32r: 383 return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; 384 case X86::LEA64r: 385 return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; 386 } 387 } 388 389 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { 390 switch (LEAOpcode) { 391 default: 392 llvm_unreachable("Unexpected LEA instruction"); 393 case X86::LEA32r: 394 case X86::LEA64_32r: 395 return IsINC ? X86::INC32r : X86::DEC32r; 396 case X86::LEA64r: 397 return IsINC ? X86::INC64r : X86::DEC64r; 398 } 399 } 400 401 MachineBasicBlock::iterator 402 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I, 403 MachineBasicBlock &MBB) const { 404 const int InstrDistanceThreshold = 5; 405 int InstrDistance = 1; 406 MachineBasicBlock::iterator CurInst = std::next(I); 407 408 unsigned LEAOpcode = I->getOpcode(); 409 unsigned AddOpcode = getADDrrFromLEA(LEAOpcode); 410 unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode); 411 Register DestReg = I->getOperand(0).getReg(); 412 413 while (CurInst != MBB.end()) { 414 if (CurInst->isCall() || CurInst->isInlineAsm()) 415 break; 416 if (InstrDistance > InstrDistanceThreshold) 417 break; 418 419 // Check if the lea dest register is used in an add/sub instruction only. 420 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { 421 MachineOperand &Opnd = CurInst->getOperand(I); 422 if (Opnd.isReg()) { 423 if (Opnd.getReg() == DestReg) { 424 if (Opnd.isDef() || !Opnd.isKill()) 425 return MachineBasicBlock::iterator(); 426 427 unsigned AluOpcode = CurInst->getOpcode(); 428 if (AluOpcode != AddOpcode && AluOpcode != SubOpcode) 429 return MachineBasicBlock::iterator(); 430 431 MachineOperand &Opnd2 = CurInst->getOperand(3 - I); 432 MachineOperand AluDest = CurInst->getOperand(0); 433 if (Opnd2.getReg() != AluDest.getReg()) 434 return MachineBasicBlock::iterator(); 435 436 // X - (Y + Z) may generate different flags than (X - Y) - Z when 437 // there is overflow. So we can't change the alu instruction if the 438 // flags register is live. 439 if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI)) 440 return MachineBasicBlock::iterator(); 441 442 return CurInst; 443 } 444 if (TRI->regsOverlap(DestReg, Opnd.getReg())) 445 return MachineBasicBlock::iterator(); 446 } 447 } 448 449 InstrDistance++; 450 ++CurInst; 451 } 452 return MachineBasicBlock::iterator(); 453 } 454 455 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI, 456 MachineBasicBlock::iterator &AluI, 457 bool &BaseIndexDef, bool &AluDestRef, 458 MachineOperand **KilledBase, 459 MachineOperand **KilledIndex) const { 460 BaseIndexDef = AluDestRef = false; 461 *KilledBase = *KilledIndex = nullptr; 462 Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg(); 463 Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg(); 464 Register AluDestReg = AluI->getOperand(0).getReg(); 465 466 MachineBasicBlock::iterator CurInst = std::next(LeaI); 467 while (CurInst != AluI) { 468 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { 469 MachineOperand &Opnd = CurInst->getOperand(I); 470 if (!Opnd.isReg()) 471 continue; 472 Register Reg = Opnd.getReg(); 473 if (TRI->regsOverlap(Reg, AluDestReg)) 474 AluDestRef = true; 475 if (TRI->regsOverlap(Reg, BaseReg)) { 476 if (Opnd.isDef()) 477 BaseIndexDef = true; 478 else if (Opnd.isKill()) 479 *KilledBase = &Opnd; 480 } 481 if (TRI->regsOverlap(Reg, IndexReg)) { 482 if (Opnd.isDef()) 483 BaseIndexDef = true; 484 else if (Opnd.isKill()) 485 *KilledIndex = &Opnd; 486 } 487 } 488 ++CurInst; 489 } 490 } 491 492 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I, 493 MachineBasicBlock &MBB) const { 494 // Look for an add/sub instruction which uses the result of lea. 495 MachineBasicBlock::iterator AluI = searchALUInst(I, MBB); 496 if (AluI == MachineBasicBlock::iterator()) 497 return false; 498 499 // Check if there are any related register usage between lea and alu. 500 bool BaseIndexDef, AluDestRef; 501 MachineOperand *KilledBase, *KilledIndex; 502 checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex); 503 504 MachineBasicBlock::iterator InsertPos = AluI; 505 if (BaseIndexDef) { 506 if (AluDestRef) 507 return false; 508 InsertPos = I; 509 KilledBase = KilledIndex = nullptr; 510 } 511 512 // Check if there are same registers. 513 Register AluDestReg = AluI->getOperand(0).getReg(); 514 Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg(); 515 Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg(); 516 if (I->getOpcode() == X86::LEA64_32r) { 517 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 518 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 519 } 520 if (AluDestReg == IndexReg) { 521 if (BaseReg == IndexReg) 522 return false; 523 std::swap(BaseReg, IndexReg); 524 std::swap(KilledBase, KilledIndex); 525 } 526 if (BaseReg == IndexReg) 527 KilledBase = nullptr; 528 529 // Now it's safe to change instructions. 530 MachineInstr *NewMI1, *NewMI2; 531 unsigned NewOpcode = AluI->getOpcode(); 532 NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 533 AluDestReg) 534 .addReg(AluDestReg, RegState::Kill) 535 .addReg(BaseReg, KilledBase ? RegState::Kill : 0); 536 NewMI1->addRegisterDead(X86::EFLAGS, TRI); 537 NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 538 AluDestReg) 539 .addReg(AluDestReg, RegState::Kill) 540 .addReg(IndexReg, KilledIndex ? RegState::Kill : 0); 541 NewMI2->addRegisterDead(X86::EFLAGS, TRI); 542 543 // Clear the old Kill flags. 544 if (KilledBase) 545 KilledBase->setIsKill(false); 546 if (KilledIndex) 547 KilledIndex->setIsKill(false); 548 549 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1); 550 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1); 551 MBB.erase(I); 552 MBB.erase(AluI); 553 I = NewMI1; 554 return true; 555 } 556 557 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, 558 MachineBasicBlock &MBB, bool OptIncDec, 559 bool UseLEAForSP) const { 560 MachineInstr &MI = *I; 561 562 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 563 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 564 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 565 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); 566 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 567 568 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || 569 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) != 570 MachineBasicBlock::LQR_Dead) 571 return false; 572 573 Register DestReg = MI.getOperand(0).getReg(); 574 Register BaseReg = Base.getReg(); 575 Register IndexReg = Index.getReg(); 576 577 // Don't change stack adjustment LEAs. 578 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) 579 return false; 580 581 // LEA64_32 has 64-bit operands but 32-bit result. 582 if (MI.getOpcode() == X86::LEA64_32r) { 583 if (BaseReg != 0) 584 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 585 if (IndexReg != 0) 586 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 587 } 588 589 MachineInstr *NewMI = nullptr; 590 591 // Case 1. 592 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 593 // which can be turned into add %reg2, %reg1 594 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && 595 (DestReg == BaseReg || DestReg == IndexReg)) { 596 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); 597 if (DestReg != BaseReg) 598 std::swap(BaseReg, IndexReg); 599 600 if (MI.getOpcode() == X86::LEA64_32r) { 601 // TODO: Do we need the super register implicit use? 602 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 603 .addReg(BaseReg).addReg(IndexReg) 604 .addReg(Base.getReg(), RegState::Implicit) 605 .addReg(Index.getReg(), RegState::Implicit); 606 } else { 607 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 608 .addReg(BaseReg).addReg(IndexReg); 609 } 610 } else if (DestReg == BaseReg && IndexReg == 0) { 611 // Case 2. 612 // This is an LEA with only a base register and a displacement, 613 // We can use ADDri or INC/DEC. 614 615 // Does this LEA have one these forms: 616 // lea %reg, 1(%reg) 617 // lea %reg, -1(%reg) 618 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { 619 bool IsINC = Disp.getImm() == 1; 620 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); 621 622 if (MI.getOpcode() == X86::LEA64_32r) { 623 // TODO: Do we need the super register implicit use? 624 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 625 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); 626 } else { 627 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 628 .addReg(BaseReg); 629 } 630 } else { 631 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); 632 if (MI.getOpcode() == X86::LEA64_32r) { 633 // TODO: Do we need the super register implicit use? 634 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 635 .addReg(BaseReg).addImm(Disp.getImm()) 636 .addReg(Base.getReg(), RegState::Implicit); 637 } else { 638 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 639 .addReg(BaseReg).addImm(Disp.getImm()); 640 } 641 } 642 } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) { 643 // Case 3. 644 // Look for and transform the sequence 645 // lea (reg1, reg2), reg3 646 // sub reg3, reg4 647 return optLEAALU(I, MBB); 648 } else 649 return false; 650 651 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 652 MBB.erase(I); 653 I = NewMI; 654 return true; 655 } 656 657 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, 658 MachineBasicBlock &MBB) { 659 // Process a load, store, or LEA instruction. 660 MachineInstr &MI = *I; 661 const MCInstrDesc &Desc = MI.getDesc(); 662 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); 663 if (AddrOffset >= 0) { 664 AddrOffset += X86II::getOperandBias(Desc); 665 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); 666 if (p.isReg() && p.getReg() != X86::ESP) { 667 seekLEAFixup(p, I, MBB); 668 } 669 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); 670 if (q.isReg() && q.getReg() != X86::ESP) { 671 seekLEAFixup(q, I, MBB); 672 } 673 } 674 } 675 676 void FixupLEAPass::seekLEAFixup(MachineOperand &p, 677 MachineBasicBlock::iterator &I, 678 MachineBasicBlock &MBB) { 679 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); 680 if (MBI != MachineBasicBlock::iterator()) { 681 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); 682 if (NewMI) { 683 ++NumLEAs; 684 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); 685 // now to replace with an equivalent LEA... 686 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); 687 MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1); 688 MBB.erase(MBI); 689 MachineBasicBlock::iterator J = 690 static_cast<MachineBasicBlock::iterator>(NewMI); 691 processInstruction(J, MBB); 692 } 693 } 694 } 695 696 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 697 MachineBasicBlock &MBB) { 698 MachineInstr &MI = *I; 699 const unsigned Opcode = MI.getOpcode(); 700 701 const MachineOperand &Dst = MI.getOperand(0); 702 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 703 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 704 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 705 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 706 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 707 708 if (Segment.getReg() != 0 || !Offset.isImm() || 709 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 710 MachineBasicBlock::LQR_Dead) 711 return; 712 const Register DstR = Dst.getReg(); 713 const Register SrcR1 = Base.getReg(); 714 const Register SrcR2 = Index.getReg(); 715 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) 716 return; 717 if (Scale.getImm() > 1) 718 return; 719 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); 720 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 721 MachineInstr *NewMI = nullptr; 722 // Make ADD instruction for two registers writing to LEA's destination 723 if (SrcR1 != 0 && SrcR2 != 0) { 724 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); 725 const MachineOperand &Src = SrcR1 == DstR ? Index : Base; 726 NewMI = 727 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); 728 LLVM_DEBUG(NewMI->dump();); 729 } 730 // Make ADD instruction for immediate 731 if (Offset.getImm() != 0) { 732 const MCInstrDesc &ADDri = 733 TII->get(getADDriFromLEA(Opcode, Offset)); 734 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; 735 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) 736 .add(SrcR) 737 .addImm(Offset.getImm()); 738 LLVM_DEBUG(NewMI->dump();); 739 } 740 if (NewMI) { 741 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 742 MBB.erase(I); 743 I = NewMI; 744 } 745 } 746 747 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 748 MachineBasicBlock &MBB, 749 bool OptIncDec) { 750 MachineInstr &MI = *I; 751 const unsigned LEAOpcode = MI.getOpcode(); 752 753 const MachineOperand &Dest = MI.getOperand(0); 754 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 755 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 756 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 757 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 758 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 759 760 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || 761 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 762 MachineBasicBlock::LQR_Dead || 763 Segment.getReg() != X86::NoRegister) 764 return; 765 766 Register DestReg = Dest.getReg(); 767 Register BaseReg = Base.getReg(); 768 Register IndexReg = Index.getReg(); 769 770 if (MI.getOpcode() == X86::LEA64_32r) { 771 if (BaseReg != 0) 772 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 773 if (IndexReg != 0) 774 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 775 } 776 777 bool IsScale1 = Scale.getImm() == 1; 778 bool IsInefficientBase = isInefficientLEAReg(BaseReg); 779 bool IsInefficientIndex = isInefficientLEAReg(IndexReg); 780 781 // Skip these cases since it takes more than 2 instructions 782 // to replace the LEA instruction. 783 if (IsInefficientBase && DestReg == BaseReg && !IsScale1) 784 return; 785 786 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); 787 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 788 789 MachineInstr *NewMI = nullptr; 790 791 // First try to replace LEA with one or two (for the 3-op LEA case) 792 // add instructions: 793 // 1.lea (%base,%index,1), %base => add %index,%base 794 // 2.lea (%base,%index,1), %index => add %base,%index 795 if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { 796 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 797 if (DestReg != BaseReg) 798 std::swap(BaseReg, IndexReg); 799 800 if (MI.getOpcode() == X86::LEA64_32r) { 801 // TODO: Do we need the super register implicit use? 802 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 803 .addReg(BaseReg) 804 .addReg(IndexReg) 805 .addReg(Base.getReg(), RegState::Implicit) 806 .addReg(Index.getReg(), RegState::Implicit); 807 } else { 808 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 809 .addReg(BaseReg) 810 .addReg(IndexReg); 811 } 812 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { 813 // If the base is inefficient try switching the index and base operands, 814 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: 815 // lea offset(%base,%index,scale),%dst => 816 // lea (%base,%index,scale); add offset,%dst 817 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 818 .add(Dest) 819 .add(IsInefficientBase ? Index : Base) 820 .add(Scale) 821 .add(IsInefficientBase ? Base : Index) 822 .addImm(0) 823 .add(Segment); 824 LLVM_DEBUG(NewMI->dump();); 825 } 826 827 // If either replacement succeeded above, add the offset if needed, then 828 // replace the instruction. 829 if (NewMI) { 830 // Create ADD instruction for the Offset in case of 3-Ops LEA. 831 if (hasLEAOffset(Offset)) { 832 if (OptIncDec && Offset.isImm() && 833 (Offset.getImm() == 1 || Offset.getImm() == -1)) { 834 unsigned NewOpc = 835 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); 836 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 837 .addReg(DestReg); 838 LLVM_DEBUG(NewMI->dump();); 839 } else { 840 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); 841 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 842 .addReg(DestReg) 843 .add(Offset); 844 LLVM_DEBUG(NewMI->dump();); 845 } 846 } 847 848 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 849 MBB.erase(I); 850 I = NewMI; 851 return; 852 } 853 854 // Handle the rest of the cases with inefficient base register: 855 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); 856 assert(IsInefficientBase && "efficient base should be handled already!"); 857 858 // FIXME: Handle LEA64_32r. 859 if (LEAOpcode == X86::LEA64_32r) 860 return; 861 862 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst 863 if (IsScale1 && !hasLEAOffset(Offset)) { 864 bool BIK = Base.isKill() && BaseReg != IndexReg; 865 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); 866 LLVM_DEBUG(MI.getPrevNode()->dump();); 867 868 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 869 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 870 .addReg(DestReg) 871 .add(Index); 872 LLVM_DEBUG(NewMI->dump();); 873 874 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 875 MBB.erase(I); 876 I = NewMI; 877 return; 878 } 879 880 // lea offset(%base,%index,scale), %dst => 881 // lea offset( ,%index,scale), %dst; add %base,%dst 882 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 883 .add(Dest) 884 .addReg(0) 885 .add(Scale) 886 .add(Index) 887 .add(Offset) 888 .add(Segment); 889 LLVM_DEBUG(NewMI->dump();); 890 891 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 892 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 893 .addReg(DestReg) 894 .add(Base); 895 LLVM_DEBUG(NewMI->dump();); 896 897 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 898 MBB.erase(I); 899 I = NewMI; 900 } 901