1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the pass that finds instructions that can be 10 // re-written as LEA instructions in order to reduce pipeline delays. 11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86InstrInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/ProfileSummaryInfo.h" 20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineSizeOpts.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/TargetSchedule.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 using namespace llvm; 29 30 #define FIXUPLEA_DESC "X86 LEA Fixup" 31 #define FIXUPLEA_NAME "x86-fixup-LEAs" 32 33 #define DEBUG_TYPE FIXUPLEA_NAME 34 35 STATISTIC(NumLEAs, "Number of LEA instructions created"); 36 37 namespace { 38 class FixupLEAPass : public MachineFunctionPass { 39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; 40 41 /// Given a machine register, look for the instruction 42 /// which writes it in the current basic block. If found, 43 /// try to replace it with an equivalent LEA instruction. 44 /// If replacement succeeds, then also process the newly created 45 /// instruction. 46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, 47 MachineBasicBlock &MBB); 48 49 /// Given a memory access or LEA instruction 50 /// whose address mode uses a base and/or index register, look for 51 /// an opportunity to replace the instruction which sets the base or index 52 /// register with an equivalent LEA instruction. 53 void processInstruction(MachineBasicBlock::iterator &I, 54 MachineBasicBlock &MBB); 55 56 /// Given a LEA instruction which is unprofitable 57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction. 58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 59 MachineBasicBlock &MBB); 60 61 /// Given a LEA instruction which is unprofitable 62 /// on SNB+ try to replace it with other instructions. 63 /// According to Intel's Optimization Reference Manual: 64 /// " For LEA instructions with three source operands and some specific 65 /// situations, instruction latency has increased to 3 cycles, and must 66 /// dispatch via port 1: 67 /// - LEA that has all three source operands: base, index, and offset 68 /// - LEA that uses base and index registers where the base is EBP, RBP, 69 /// or R13 70 /// - LEA that uses RIP relative addressing mode 71 /// - LEA that uses 16-bit addressing mode " 72 /// This function currently handles the first 2 cases only. 73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 74 MachineBasicBlock &MBB, bool OptIncDec); 75 76 /// Look for LEAs that are really two address LEAs that we might be able to 77 /// turn into regular ADD instructions. 78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I, 79 MachineBasicBlock &MBB, bool OptIncDec, 80 bool UseLEAForSP) const; 81 82 /// Look for and transform the sequence 83 /// lea (reg1, reg2), reg3 84 /// sub reg3, reg4 85 /// to 86 /// sub reg1, reg4 87 /// sub reg2, reg4 88 /// It can also optimize the sequence lea/add similarly. 89 bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const; 90 91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses 92 /// the dest register of LEA instruction I. 93 MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I, 94 MachineBasicBlock &MBB) const; 95 96 /// Check instructions between LeaI and AluI (exclusively). 97 /// Set BaseIndexDef to true if base or index register from LeaI is defined. 98 /// Set AluDestRef to true if the dest register of AluI is used or defined. 99 /// *KilledBase is set to the killed base register usage. 100 /// *KilledIndex is set to the killed index register usage. 101 void checkRegUsage(MachineBasicBlock::iterator &LeaI, 102 MachineBasicBlock::iterator &AluI, bool &BaseIndexDef, 103 bool &AluDestRef, MachineOperand **KilledBase, 104 MachineOperand **KilledIndex) const; 105 106 /// Determine if an instruction references a machine register 107 /// and, if so, whether it reads or writes the register. 108 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); 109 110 /// Step backwards through a basic block, looking 111 /// for an instruction which writes a register within 112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. 113 MachineBasicBlock::iterator searchBackwards(MachineOperand &p, 114 MachineBasicBlock::iterator &I, 115 MachineBasicBlock &MBB); 116 117 /// if an instruction can be converted to an 118 /// equivalent LEA, insert the new instruction into the basic block 119 /// and return a pointer to it. Otherwise, return zero. 120 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator &MBBI) const; 122 123 public: 124 static char ID; 125 126 StringRef getPassName() const override { return FIXUPLEA_DESC; } 127 128 FixupLEAPass() : MachineFunctionPass(ID) { } 129 130 /// Loop over all of the basic blocks, 131 /// replacing instructions by equivalent LEA instructions 132 /// if needed and when possible. 133 bool runOnMachineFunction(MachineFunction &MF) override; 134 135 // This pass runs after regalloc and doesn't support VReg operands. 136 MachineFunctionProperties getRequiredProperties() const override { 137 return MachineFunctionProperties().set( 138 MachineFunctionProperties::Property::NoVRegs); 139 } 140 141 void getAnalysisUsage(AnalysisUsage &AU) const override { 142 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 143 AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); 144 MachineFunctionPass::getAnalysisUsage(AU); 145 } 146 147 private: 148 TargetSchedModel TSM; 149 const X86InstrInfo *TII = nullptr; 150 const X86RegisterInfo *TRI = nullptr; 151 }; 152 } 153 154 char FixupLEAPass::ID = 0; 155 156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) 157 158 MachineInstr * 159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, 160 MachineBasicBlock::iterator &MBBI) const { 161 MachineInstr &MI = *MBBI; 162 switch (MI.getOpcode()) { 163 case X86::MOV32rr: 164 case X86::MOV64rr: { 165 const MachineOperand &Src = MI.getOperand(1); 166 const MachineOperand &Dest = MI.getOperand(0); 167 MachineInstr *NewMI = 168 BuildMI(MBB, MBBI, MI.getDebugLoc(), 169 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r 170 : X86::LEA64r)) 171 .add(Dest) 172 .add(Src) 173 .addImm(1) 174 .addReg(0) 175 .addImm(0) 176 .addReg(0); 177 return NewMI; 178 } 179 } 180 181 if (!MI.isConvertibleTo3Addr()) 182 return nullptr; 183 184 switch (MI.getOpcode()) { 185 default: 186 // Only convert instructions that we've verified are safe. 187 return nullptr; 188 case X86::ADD64ri32: 189 case X86::ADD64ri8: 190 case X86::ADD64ri32_DB: 191 case X86::ADD64ri8_DB: 192 case X86::ADD32ri: 193 case X86::ADD32ri8: 194 case X86::ADD32ri_DB: 195 case X86::ADD32ri8_DB: 196 if (!MI.getOperand(2).isImm()) { 197 // convertToThreeAddress will call getImm() 198 // which requires isImm() to be true 199 return nullptr; 200 } 201 break; 202 case X86::SHL64ri: 203 case X86::SHL32ri: 204 case X86::INC64r: 205 case X86::INC32r: 206 case X86::DEC64r: 207 case X86::DEC32r: 208 case X86::ADD64rr: 209 case X86::ADD64rr_DB: 210 case X86::ADD32rr: 211 case X86::ADD32rr_DB: 212 // These instructions are all fine to convert. 213 break; 214 } 215 MachineFunction::iterator MFI = MBB.getIterator(); 216 return TII->convertToThreeAddress(MFI, MI, nullptr); 217 } 218 219 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } 220 221 static bool isLEA(unsigned Opcode) { 222 return Opcode == X86::LEA32r || Opcode == X86::LEA64r || 223 Opcode == X86::LEA64_32r; 224 } 225 226 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { 227 if (skipFunction(MF.getFunction())) 228 return false; 229 230 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 231 bool IsSlowLEA = ST.slowLEA(); 232 bool IsSlow3OpsLEA = ST.slow3OpsLEA(); 233 bool LEAUsesAG = ST.LEAusesAG(); 234 235 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); 236 bool UseLEAForSP = ST.useLeaForSP(); 237 238 TSM.init(&ST); 239 TII = ST.getInstrInfo(); 240 TRI = ST.getRegisterInfo(); 241 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 242 auto *MBFI = (PSI && PSI->hasProfileSummary()) 243 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() 244 : nullptr; 245 246 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); 247 for (MachineBasicBlock &MBB : MF) { 248 // First pass. Try to remove or optimize existing LEAs. 249 bool OptIncDecPerBB = 250 OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); 251 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 252 if (!isLEA(I->getOpcode())) 253 continue; 254 255 if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP)) 256 continue; 257 258 if (IsSlowLEA) 259 processInstructionForSlowLEA(I, MBB); 260 else if (IsSlow3OpsLEA) 261 processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB); 262 } 263 264 // Second pass for creating LEAs. This may reverse some of the 265 // transformations above. 266 if (LEAUsesAG) { 267 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) 268 processInstruction(I, MBB); 269 } 270 } 271 272 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";); 273 274 return true; 275 } 276 277 FixupLEAPass::RegUsageState 278 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { 279 RegUsageState RegUsage = RU_NotUsed; 280 MachineInstr &MI = *I; 281 282 for (unsigned i = 0; i < MI.getNumOperands(); ++i) { 283 MachineOperand &opnd = MI.getOperand(i); 284 if (opnd.isReg() && opnd.getReg() == p.getReg()) { 285 if (opnd.isDef()) 286 return RU_Write; 287 RegUsage = RU_Read; 288 } 289 } 290 return RegUsage; 291 } 292 293 /// getPreviousInstr - Given a reference to an instruction in a basic 294 /// block, return a reference to the previous instruction in the block, 295 /// wrapping around to the last instruction of the block if the block 296 /// branches to itself. 297 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, 298 MachineBasicBlock &MBB) { 299 if (I == MBB.begin()) { 300 if (MBB.isPredecessor(&MBB)) { 301 I = --MBB.end(); 302 return true; 303 } else 304 return false; 305 } 306 --I; 307 return true; 308 } 309 310 MachineBasicBlock::iterator 311 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, 312 MachineBasicBlock &MBB) { 313 int InstrDistance = 1; 314 MachineBasicBlock::iterator CurInst; 315 static const int INSTR_DISTANCE_THRESHOLD = 5; 316 317 CurInst = I; 318 bool Found; 319 Found = getPreviousInstr(CurInst, MBB); 320 while (Found && I != CurInst) { 321 if (CurInst->isCall() || CurInst->isInlineAsm()) 322 break; 323 if (InstrDistance > INSTR_DISTANCE_THRESHOLD) 324 break; // too far back to make a difference 325 if (usesRegister(p, CurInst) == RU_Write) { 326 return CurInst; 327 } 328 InstrDistance += TSM.computeInstrLatency(&*CurInst); 329 Found = getPreviousInstr(CurInst, MBB); 330 } 331 return MachineBasicBlock::iterator(); 332 } 333 334 static inline bool isInefficientLEAReg(unsigned Reg) { 335 return Reg == X86::EBP || Reg == X86::RBP || 336 Reg == X86::R13D || Reg == X86::R13; 337 } 338 339 /// Returns true if this LEA uses base an index registers, and the base register 340 /// is known to be inefficient for the subtarget. 341 // TODO: use a variant scheduling class to model the latency profile 342 // of LEA instructions, and implement this logic as a scheduling predicate. 343 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, 344 const MachineOperand &Index) { 345 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && 346 Index.getReg() != X86::NoRegister; 347 } 348 349 static inline bool hasLEAOffset(const MachineOperand &Offset) { 350 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); 351 } 352 353 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { 354 switch (LEAOpcode) { 355 default: 356 llvm_unreachable("Unexpected LEA instruction"); 357 case X86::LEA32r: 358 case X86::LEA64_32r: 359 return X86::ADD32rr; 360 case X86::LEA64r: 361 return X86::ADD64rr; 362 } 363 } 364 365 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) { 366 switch (LEAOpcode) { 367 default: 368 llvm_unreachable("Unexpected LEA instruction"); 369 case X86::LEA32r: 370 case X86::LEA64_32r: 371 return X86::SUB32rr; 372 case X86::LEA64r: 373 return X86::SUB64rr; 374 } 375 } 376 377 static inline unsigned getADDriFromLEA(unsigned LEAOpcode, 378 const MachineOperand &Offset) { 379 bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); 380 switch (LEAOpcode) { 381 default: 382 llvm_unreachable("Unexpected LEA instruction"); 383 case X86::LEA32r: 384 case X86::LEA64_32r: 385 return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; 386 case X86::LEA64r: 387 return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; 388 } 389 } 390 391 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { 392 switch (LEAOpcode) { 393 default: 394 llvm_unreachable("Unexpected LEA instruction"); 395 case X86::LEA32r: 396 case X86::LEA64_32r: 397 return IsINC ? X86::INC32r : X86::DEC32r; 398 case X86::LEA64r: 399 return IsINC ? X86::INC64r : X86::DEC64r; 400 } 401 } 402 403 MachineBasicBlock::iterator 404 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I, 405 MachineBasicBlock &MBB) const { 406 const int InstrDistanceThreshold = 5; 407 int InstrDistance = 1; 408 MachineBasicBlock::iterator CurInst = std::next(I); 409 410 unsigned LEAOpcode = I->getOpcode(); 411 unsigned AddOpcode = getADDrrFromLEA(LEAOpcode); 412 unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode); 413 Register DestReg = I->getOperand(0).getReg(); 414 415 while (CurInst != MBB.end()) { 416 if (CurInst->isCall() || CurInst->isInlineAsm()) 417 break; 418 if (InstrDistance > InstrDistanceThreshold) 419 break; 420 421 // Check if the lea dest register is used in an add/sub instruction only. 422 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { 423 MachineOperand &Opnd = CurInst->getOperand(I); 424 if (Opnd.isReg()) { 425 if (Opnd.getReg() == DestReg) { 426 if (Opnd.isDef() || !Opnd.isKill()) 427 return MachineBasicBlock::iterator(); 428 429 unsigned AluOpcode = CurInst->getOpcode(); 430 if (AluOpcode != AddOpcode && AluOpcode != SubOpcode) 431 return MachineBasicBlock::iterator(); 432 433 MachineOperand &Opnd2 = CurInst->getOperand(3 - I); 434 MachineOperand AluDest = CurInst->getOperand(0); 435 if (Opnd2.getReg() != AluDest.getReg()) 436 return MachineBasicBlock::iterator(); 437 438 // X - (Y + Z) may generate different flags than (X - Y) - Z when 439 // there is overflow. So we can't change the alu instruction if the 440 // flags register is live. 441 if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI)) 442 return MachineBasicBlock::iterator(); 443 444 return CurInst; 445 } 446 if (TRI->regsOverlap(DestReg, Opnd.getReg())) 447 return MachineBasicBlock::iterator(); 448 } 449 } 450 451 InstrDistance++; 452 ++CurInst; 453 } 454 return MachineBasicBlock::iterator(); 455 } 456 457 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI, 458 MachineBasicBlock::iterator &AluI, 459 bool &BaseIndexDef, bool &AluDestRef, 460 MachineOperand **KilledBase, 461 MachineOperand **KilledIndex) const { 462 BaseIndexDef = AluDestRef = false; 463 *KilledBase = *KilledIndex = nullptr; 464 Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg(); 465 Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg(); 466 Register AluDestReg = AluI->getOperand(0).getReg(); 467 468 MachineBasicBlock::iterator CurInst = std::next(LeaI); 469 while (CurInst != AluI) { 470 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { 471 MachineOperand &Opnd = CurInst->getOperand(I); 472 if (!Opnd.isReg()) 473 continue; 474 Register Reg = Opnd.getReg(); 475 if (TRI->regsOverlap(Reg, AluDestReg)) 476 AluDestRef = true; 477 if (TRI->regsOverlap(Reg, BaseReg)) { 478 if (Opnd.isDef()) 479 BaseIndexDef = true; 480 else if (Opnd.isKill()) 481 *KilledBase = &Opnd; 482 } 483 if (TRI->regsOverlap(Reg, IndexReg)) { 484 if (Opnd.isDef()) 485 BaseIndexDef = true; 486 else if (Opnd.isKill()) 487 *KilledIndex = &Opnd; 488 } 489 } 490 ++CurInst; 491 } 492 } 493 494 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I, 495 MachineBasicBlock &MBB) const { 496 // Look for an add/sub instruction which uses the result of lea. 497 MachineBasicBlock::iterator AluI = searchALUInst(I, MBB); 498 if (AluI == MachineBasicBlock::iterator()) 499 return false; 500 501 // Check if there are any related register usage between lea and alu. 502 bool BaseIndexDef, AluDestRef; 503 MachineOperand *KilledBase, *KilledIndex; 504 checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex); 505 506 MachineBasicBlock::iterator InsertPos = AluI; 507 if (BaseIndexDef) { 508 if (AluDestRef) 509 return false; 510 InsertPos = I; 511 KilledBase = KilledIndex = nullptr; 512 } 513 514 // Check if there are same registers. 515 Register AluDestReg = AluI->getOperand(0).getReg(); 516 Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg(); 517 Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg(); 518 if (I->getOpcode() == X86::LEA64_32r) { 519 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 520 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 521 } 522 if (AluDestReg == IndexReg) { 523 if (BaseReg == IndexReg) 524 return false; 525 std::swap(BaseReg, IndexReg); 526 std::swap(KilledBase, KilledIndex); 527 } 528 if (BaseReg == IndexReg) 529 KilledBase = nullptr; 530 531 // Now it's safe to change instructions. 532 MachineInstr *NewMI1, *NewMI2; 533 unsigned NewOpcode = AluI->getOpcode(); 534 NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 535 AluDestReg) 536 .addReg(AluDestReg, RegState::Kill) 537 .addReg(BaseReg, KilledBase ? RegState::Kill : 0); 538 NewMI1->addRegisterDead(X86::EFLAGS, TRI); 539 NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 540 AluDestReg) 541 .addReg(AluDestReg, RegState::Kill) 542 .addReg(IndexReg, KilledIndex ? RegState::Kill : 0); 543 NewMI2->addRegisterDead(X86::EFLAGS, TRI); 544 545 // Clear the old Kill flags. 546 if (KilledBase) 547 KilledBase->setIsKill(false); 548 if (KilledIndex) 549 KilledIndex->setIsKill(false); 550 551 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1); 552 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1); 553 MBB.erase(I); 554 MBB.erase(AluI); 555 I = NewMI1; 556 return true; 557 } 558 559 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, 560 MachineBasicBlock &MBB, bool OptIncDec, 561 bool UseLEAForSP) const { 562 MachineInstr &MI = *I; 563 564 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 565 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 566 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 567 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); 568 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 569 570 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || 571 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) != 572 MachineBasicBlock::LQR_Dead) 573 return false; 574 575 Register DestReg = MI.getOperand(0).getReg(); 576 Register BaseReg = Base.getReg(); 577 Register IndexReg = Index.getReg(); 578 579 // Don't change stack adjustment LEAs. 580 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) 581 return false; 582 583 // LEA64_32 has 64-bit operands but 32-bit result. 584 if (MI.getOpcode() == X86::LEA64_32r) { 585 if (BaseReg != 0) 586 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 587 if (IndexReg != 0) 588 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 589 } 590 591 MachineInstr *NewMI = nullptr; 592 593 // Case 1. 594 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 595 // which can be turned into add %reg2, %reg1 596 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && 597 (DestReg == BaseReg || DestReg == IndexReg)) { 598 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); 599 if (DestReg != BaseReg) 600 std::swap(BaseReg, IndexReg); 601 602 if (MI.getOpcode() == X86::LEA64_32r) { 603 // TODO: Do we need the super register implicit use? 604 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 605 .addReg(BaseReg).addReg(IndexReg) 606 .addReg(Base.getReg(), RegState::Implicit) 607 .addReg(Index.getReg(), RegState::Implicit); 608 } else { 609 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 610 .addReg(BaseReg).addReg(IndexReg); 611 } 612 } else if (DestReg == BaseReg && IndexReg == 0) { 613 // Case 2. 614 // This is an LEA with only a base register and a displacement, 615 // We can use ADDri or INC/DEC. 616 617 // Does this LEA have one these forms: 618 // lea %reg, 1(%reg) 619 // lea %reg, -1(%reg) 620 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { 621 bool IsINC = Disp.getImm() == 1; 622 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); 623 624 if (MI.getOpcode() == X86::LEA64_32r) { 625 // TODO: Do we need the super register implicit use? 626 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 627 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); 628 } else { 629 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 630 .addReg(BaseReg); 631 } 632 } else { 633 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); 634 if (MI.getOpcode() == X86::LEA64_32r) { 635 // TODO: Do we need the super register implicit use? 636 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 637 .addReg(BaseReg).addImm(Disp.getImm()) 638 .addReg(Base.getReg(), RegState::Implicit); 639 } else { 640 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 641 .addReg(BaseReg).addImm(Disp.getImm()); 642 } 643 } 644 } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) { 645 // Case 3. 646 // Look for and transform the sequence 647 // lea (reg1, reg2), reg3 648 // sub reg3, reg4 649 return optLEAALU(I, MBB); 650 } else 651 return false; 652 653 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 654 MBB.erase(I); 655 I = NewMI; 656 return true; 657 } 658 659 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, 660 MachineBasicBlock &MBB) { 661 // Process a load, store, or LEA instruction. 662 MachineInstr &MI = *I; 663 const MCInstrDesc &Desc = MI.getDesc(); 664 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); 665 if (AddrOffset >= 0) { 666 AddrOffset += X86II::getOperandBias(Desc); 667 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); 668 if (p.isReg() && p.getReg() != X86::ESP) { 669 seekLEAFixup(p, I, MBB); 670 } 671 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); 672 if (q.isReg() && q.getReg() != X86::ESP) { 673 seekLEAFixup(q, I, MBB); 674 } 675 } 676 } 677 678 void FixupLEAPass::seekLEAFixup(MachineOperand &p, 679 MachineBasicBlock::iterator &I, 680 MachineBasicBlock &MBB) { 681 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); 682 if (MBI != MachineBasicBlock::iterator()) { 683 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); 684 if (NewMI) { 685 ++NumLEAs; 686 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); 687 // now to replace with an equivalent LEA... 688 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); 689 MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1); 690 MBB.erase(MBI); 691 MachineBasicBlock::iterator J = 692 static_cast<MachineBasicBlock::iterator>(NewMI); 693 processInstruction(J, MBB); 694 } 695 } 696 } 697 698 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 699 MachineBasicBlock &MBB) { 700 MachineInstr &MI = *I; 701 const unsigned Opcode = MI.getOpcode(); 702 703 const MachineOperand &Dst = MI.getOperand(0); 704 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 705 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 706 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 707 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 708 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 709 710 if (Segment.getReg() != 0 || !Offset.isImm() || 711 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 712 MachineBasicBlock::LQR_Dead) 713 return; 714 const Register DstR = Dst.getReg(); 715 const Register SrcR1 = Base.getReg(); 716 const Register SrcR2 = Index.getReg(); 717 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) 718 return; 719 if (Scale.getImm() > 1) 720 return; 721 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); 722 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 723 MachineInstr *NewMI = nullptr; 724 // Make ADD instruction for two registers writing to LEA's destination 725 if (SrcR1 != 0 && SrcR2 != 0) { 726 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); 727 const MachineOperand &Src = SrcR1 == DstR ? Index : Base; 728 NewMI = 729 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); 730 LLVM_DEBUG(NewMI->dump();); 731 } 732 // Make ADD instruction for immediate 733 if (Offset.getImm() != 0) { 734 const MCInstrDesc &ADDri = 735 TII->get(getADDriFromLEA(Opcode, Offset)); 736 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; 737 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) 738 .add(SrcR) 739 .addImm(Offset.getImm()); 740 LLVM_DEBUG(NewMI->dump();); 741 } 742 if (NewMI) { 743 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 744 MBB.erase(I); 745 I = NewMI; 746 } 747 } 748 749 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 750 MachineBasicBlock &MBB, 751 bool OptIncDec) { 752 MachineInstr &MI = *I; 753 const unsigned LEAOpcode = MI.getOpcode(); 754 755 const MachineOperand &Dest = MI.getOperand(0); 756 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 757 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 758 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 759 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 760 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 761 762 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || 763 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 764 MachineBasicBlock::LQR_Dead || 765 Segment.getReg() != X86::NoRegister) 766 return; 767 768 Register DestReg = Dest.getReg(); 769 Register BaseReg = Base.getReg(); 770 Register IndexReg = Index.getReg(); 771 772 if (MI.getOpcode() == X86::LEA64_32r) { 773 if (BaseReg != 0) 774 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 775 if (IndexReg != 0) 776 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 777 } 778 779 bool IsScale1 = Scale.getImm() == 1; 780 bool IsInefficientBase = isInefficientLEAReg(BaseReg); 781 bool IsInefficientIndex = isInefficientLEAReg(IndexReg); 782 783 // Skip these cases since it takes more than 2 instructions 784 // to replace the LEA instruction. 785 if (IsInefficientBase && DestReg == BaseReg && !IsScale1) 786 return; 787 788 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); 789 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 790 791 MachineInstr *NewMI = nullptr; 792 793 // First try to replace LEA with one or two (for the 3-op LEA case) 794 // add instructions: 795 // 1.lea (%base,%index,1), %base => add %index,%base 796 // 2.lea (%base,%index,1), %index => add %base,%index 797 if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { 798 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 799 if (DestReg != BaseReg) 800 std::swap(BaseReg, IndexReg); 801 802 if (MI.getOpcode() == X86::LEA64_32r) { 803 // TODO: Do we need the super register implicit use? 804 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 805 .addReg(BaseReg) 806 .addReg(IndexReg) 807 .addReg(Base.getReg(), RegState::Implicit) 808 .addReg(Index.getReg(), RegState::Implicit); 809 } else { 810 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 811 .addReg(BaseReg) 812 .addReg(IndexReg); 813 } 814 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { 815 // If the base is inefficient try switching the index and base operands, 816 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: 817 // lea offset(%base,%index,scale),%dst => 818 // lea (%base,%index,scale); add offset,%dst 819 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 820 .add(Dest) 821 .add(IsInefficientBase ? Index : Base) 822 .add(Scale) 823 .add(IsInefficientBase ? Base : Index) 824 .addImm(0) 825 .add(Segment); 826 LLVM_DEBUG(NewMI->dump();); 827 } 828 829 // If either replacement succeeded above, add the offset if needed, then 830 // replace the instruction. 831 if (NewMI) { 832 // Create ADD instruction for the Offset in case of 3-Ops LEA. 833 if (hasLEAOffset(Offset)) { 834 if (OptIncDec && Offset.isImm() && 835 (Offset.getImm() == 1 || Offset.getImm() == -1)) { 836 unsigned NewOpc = 837 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); 838 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 839 .addReg(DestReg); 840 LLVM_DEBUG(NewMI->dump();); 841 } else { 842 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); 843 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 844 .addReg(DestReg) 845 .add(Offset); 846 LLVM_DEBUG(NewMI->dump();); 847 } 848 } 849 850 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 851 MBB.erase(I); 852 I = NewMI; 853 return; 854 } 855 856 // Handle the rest of the cases with inefficient base register: 857 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); 858 assert(IsInefficientBase && "efficient base should be handled already!"); 859 860 // FIXME: Handle LEA64_32r. 861 if (LEAOpcode == X86::LEA64_32r) 862 return; 863 864 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst 865 if (IsScale1 && !hasLEAOffset(Offset)) { 866 bool BIK = Base.isKill() && BaseReg != IndexReg; 867 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); 868 LLVM_DEBUG(MI.getPrevNode()->dump();); 869 870 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 871 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 872 .addReg(DestReg) 873 .add(Index); 874 LLVM_DEBUG(NewMI->dump();); 875 876 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 877 MBB.erase(I); 878 I = NewMI; 879 return; 880 } 881 882 // lea offset(%base,%index,scale), %dst => 883 // lea offset( ,%index,scale), %dst; add %base,%dst 884 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 885 .add(Dest) 886 .addReg(0) 887 .add(Scale) 888 .add(Index) 889 .add(Offset) 890 .add(Segment); 891 LLVM_DEBUG(NewMI->dump();); 892 893 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 894 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 895 .addReg(DestReg) 896 .add(Base); 897 LLVM_DEBUG(NewMI->dump();); 898 899 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 900 MBB.erase(I); 901 I = NewMI; 902 } 903