1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the pass that finds instructions that can be 10 // re-written as LEA instructions in order to reduce pipeline delays. 11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86InstrInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/ProfileSummaryInfo.h" 20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineSizeOpts.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/TargetSchedule.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 using namespace llvm; 29 30 #define FIXUPLEA_DESC "X86 LEA Fixup" 31 #define FIXUPLEA_NAME "x86-fixup-LEAs" 32 33 #define DEBUG_TYPE FIXUPLEA_NAME 34 35 STATISTIC(NumLEAs, "Number of LEA instructions created"); 36 37 namespace { 38 class FixupLEAPass : public MachineFunctionPass { 39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; 40 41 /// Given a machine register, look for the instruction 42 /// which writes it in the current basic block. If found, 43 /// try to replace it with an equivalent LEA instruction. 44 /// If replacement succeeds, then also process the newly created 45 /// instruction. 46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, 47 MachineBasicBlock &MBB); 48 49 /// Given a memory access or LEA instruction 50 /// whose address mode uses a base and/or index register, look for 51 /// an opportunity to replace the instruction which sets the base or index 52 /// register with an equivalent LEA instruction. 53 void processInstruction(MachineBasicBlock::iterator &I, 54 MachineBasicBlock &MBB); 55 56 /// Given a LEA instruction which is unprofitable 57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction. 58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 59 MachineBasicBlock &MBB); 60 61 /// Given a LEA instruction which is unprofitable 62 /// on SNB+ try to replace it with other instructions. 63 /// According to Intel's Optimization Reference Manual: 64 /// " For LEA instructions with three source operands and some specific 65 /// situations, instruction latency has increased to 3 cycles, and must 66 /// dispatch via port 1: 67 /// - LEA that has all three source operands: base, index, and offset 68 /// - LEA that uses base and index registers where the base is EBP, RBP, 69 /// or R13 70 /// - LEA that uses RIP relative addressing mode 71 /// - LEA that uses 16-bit addressing mode " 72 /// This function currently handles the first 2 cases only. 73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 74 MachineBasicBlock &MBB, bool OptIncDec); 75 76 /// Look for LEAs that are really two address LEAs that we might be able to 77 /// turn into regular ADD instructions. 78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I, 79 MachineBasicBlock &MBB, bool OptIncDec, 80 bool UseLEAForSP) const; 81 82 /// Look for and transform the sequence 83 /// lea (reg1, reg2), reg3 84 /// sub reg3, reg4 85 /// to 86 /// sub reg1, reg4 87 /// sub reg2, reg4 88 /// It can also optimize the sequence lea/add similarly. 89 bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const; 90 91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses 92 /// the dest register of LEA instruction I. 93 MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I, 94 MachineBasicBlock &MBB) const; 95 96 /// Check instructions between LeaI and AluI (exclusively). 97 /// Set BaseIndexDef to true if base or index register from LeaI is defined. 98 /// Set AluDestRef to true if the dest register of AluI is used or defined. 99 /// *KilledBase is set to the killed base register usage. 100 /// *KilledIndex is set to the killed index register usage. 101 void checkRegUsage(MachineBasicBlock::iterator &LeaI, 102 MachineBasicBlock::iterator &AluI, bool &BaseIndexDef, 103 bool &AluDestRef, MachineOperand **KilledBase, 104 MachineOperand **KilledIndex) const; 105 106 /// Determine if an instruction references a machine register 107 /// and, if so, whether it reads or writes the register. 108 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); 109 110 /// Step backwards through a basic block, looking 111 /// for an instruction which writes a register within 112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. 113 MachineBasicBlock::iterator searchBackwards(MachineOperand &p, 114 MachineBasicBlock::iterator &I, 115 MachineBasicBlock &MBB); 116 117 /// if an instruction can be converted to an 118 /// equivalent LEA, insert the new instruction into the basic block 119 /// and return a pointer to it. Otherwise, return zero. 120 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator &MBBI) const; 122 123 public: 124 static char ID; 125 126 StringRef getPassName() const override { return FIXUPLEA_DESC; } 127 128 FixupLEAPass() : MachineFunctionPass(ID) { } 129 130 /// Loop over all of the basic blocks, 131 /// replacing instructions by equivalent LEA instructions 132 /// if needed and when possible. 133 bool runOnMachineFunction(MachineFunction &MF) override; 134 135 // This pass runs after regalloc and doesn't support VReg operands. 136 MachineFunctionProperties getRequiredProperties() const override { 137 return MachineFunctionProperties().set( 138 MachineFunctionProperties::Property::NoVRegs); 139 } 140 141 void getAnalysisUsage(AnalysisUsage &AU) const override { 142 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 143 AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); 144 MachineFunctionPass::getAnalysisUsage(AU); 145 } 146 147 private: 148 TargetSchedModel TSM; 149 const X86InstrInfo *TII = nullptr; 150 const X86RegisterInfo *TRI = nullptr; 151 }; 152 } 153 154 char FixupLEAPass::ID = 0; 155 156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) 157 158 MachineInstr * 159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, 160 MachineBasicBlock::iterator &MBBI) const { 161 MachineInstr &MI = *MBBI; 162 switch (MI.getOpcode()) { 163 case X86::MOV32rr: 164 case X86::MOV64rr: { 165 const MachineOperand &Src = MI.getOperand(1); 166 const MachineOperand &Dest = MI.getOperand(0); 167 MachineInstr *NewMI = 168 BuildMI(MBB, MBBI, MI.getDebugLoc(), 169 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r 170 : X86::LEA64r)) 171 .add(Dest) 172 .add(Src) 173 .addImm(1) 174 .addReg(0) 175 .addImm(0) 176 .addReg(0); 177 return NewMI; 178 } 179 } 180 181 if (!MI.isConvertibleTo3Addr()) 182 return nullptr; 183 184 switch (MI.getOpcode()) { 185 default: 186 // Only convert instructions that we've verified are safe. 187 return nullptr; 188 case X86::ADD64ri32: 189 case X86::ADD64ri32_DB: 190 case X86::ADD32ri: 191 case X86::ADD32ri_DB: 192 if (!MI.getOperand(2).isImm()) { 193 // convertToThreeAddress will call getImm() 194 // which requires isImm() to be true 195 return nullptr; 196 } 197 break; 198 case X86::SHL64ri: 199 case X86::SHL32ri: 200 case X86::INC64r: 201 case X86::INC32r: 202 case X86::DEC64r: 203 case X86::DEC32r: 204 case X86::ADD64rr: 205 case X86::ADD64rr_DB: 206 case X86::ADD32rr: 207 case X86::ADD32rr_DB: 208 // These instructions are all fine to convert. 209 break; 210 } 211 return TII->convertToThreeAddress(MI, nullptr, nullptr); 212 } 213 214 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } 215 216 static bool isLEA(unsigned Opcode) { 217 return Opcode == X86::LEA32r || Opcode == X86::LEA64r || 218 Opcode == X86::LEA64_32r; 219 } 220 221 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { 222 if (skipFunction(MF.getFunction())) 223 return false; 224 225 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 226 bool IsSlowLEA = ST.slowLEA(); 227 bool IsSlow3OpsLEA = ST.slow3OpsLEA(); 228 bool LEAUsesAG = ST.leaUsesAG(); 229 230 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); 231 bool UseLEAForSP = ST.useLeaForSP(); 232 233 TSM.init(&ST); 234 TII = ST.getInstrInfo(); 235 TRI = ST.getRegisterInfo(); 236 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 237 auto *MBFI = (PSI && PSI->hasProfileSummary()) 238 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() 239 : nullptr; 240 241 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); 242 for (MachineBasicBlock &MBB : MF) { 243 // First pass. Try to remove or optimize existing LEAs. 244 bool OptIncDecPerBB = 245 OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); 246 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 247 if (!isLEA(I->getOpcode())) 248 continue; 249 250 if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP)) 251 continue; 252 253 if (IsSlowLEA) 254 processInstructionForSlowLEA(I, MBB); 255 else if (IsSlow3OpsLEA) 256 processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB); 257 } 258 259 // Second pass for creating LEAs. This may reverse some of the 260 // transformations above. 261 if (LEAUsesAG) { 262 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) 263 processInstruction(I, MBB); 264 } 265 } 266 267 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";); 268 269 return true; 270 } 271 272 FixupLEAPass::RegUsageState 273 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { 274 RegUsageState RegUsage = RU_NotUsed; 275 MachineInstr &MI = *I; 276 277 for (const MachineOperand &MO : MI.operands()) { 278 if (MO.isReg() && MO.getReg() == p.getReg()) { 279 if (MO.isDef()) 280 return RU_Write; 281 RegUsage = RU_Read; 282 } 283 } 284 return RegUsage; 285 } 286 287 /// getPreviousInstr - Given a reference to an instruction in a basic 288 /// block, return a reference to the previous instruction in the block, 289 /// wrapping around to the last instruction of the block if the block 290 /// branches to itself. 291 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, 292 MachineBasicBlock &MBB) { 293 if (I == MBB.begin()) { 294 if (MBB.isPredecessor(&MBB)) { 295 I = --MBB.end(); 296 return true; 297 } else 298 return false; 299 } 300 --I; 301 return true; 302 } 303 304 MachineBasicBlock::iterator 305 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, 306 MachineBasicBlock &MBB) { 307 int InstrDistance = 1; 308 MachineBasicBlock::iterator CurInst; 309 static const int INSTR_DISTANCE_THRESHOLD = 5; 310 311 CurInst = I; 312 bool Found; 313 Found = getPreviousInstr(CurInst, MBB); 314 while (Found && I != CurInst) { 315 if (CurInst->isCall() || CurInst->isInlineAsm()) 316 break; 317 if (InstrDistance > INSTR_DISTANCE_THRESHOLD) 318 break; // too far back to make a difference 319 if (usesRegister(p, CurInst) == RU_Write) { 320 return CurInst; 321 } 322 InstrDistance += TSM.computeInstrLatency(&*CurInst); 323 Found = getPreviousInstr(CurInst, MBB); 324 } 325 return MachineBasicBlock::iterator(); 326 } 327 328 static inline bool isInefficientLEAReg(unsigned Reg) { 329 return Reg == X86::EBP || Reg == X86::RBP || 330 Reg == X86::R13D || Reg == X86::R13; 331 } 332 333 /// Returns true if this LEA uses base and index registers, and the base 334 /// register is known to be inefficient for the subtarget. 335 // TODO: use a variant scheduling class to model the latency profile 336 // of LEA instructions, and implement this logic as a scheduling predicate. 337 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, 338 const MachineOperand &Index) { 339 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && 340 Index.getReg() != X86::NoRegister; 341 } 342 343 static inline bool hasLEAOffset(const MachineOperand &Offset) { 344 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal() || 345 Offset.isBlockAddress(); 346 } 347 348 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { 349 switch (LEAOpcode) { 350 default: 351 llvm_unreachable("Unexpected LEA instruction"); 352 case X86::LEA32r: 353 case X86::LEA64_32r: 354 return X86::ADD32rr; 355 case X86::LEA64r: 356 return X86::ADD64rr; 357 } 358 } 359 360 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) { 361 switch (LEAOpcode) { 362 default: 363 llvm_unreachable("Unexpected LEA instruction"); 364 case X86::LEA32r: 365 case X86::LEA64_32r: 366 return X86::SUB32rr; 367 case X86::LEA64r: 368 return X86::SUB64rr; 369 } 370 } 371 372 static inline unsigned getADDriFromLEA(unsigned LEAOpcode, 373 const MachineOperand &Offset) { 374 switch (LEAOpcode) { 375 default: 376 llvm_unreachable("Unexpected LEA instruction"); 377 case X86::LEA32r: 378 case X86::LEA64_32r: 379 return X86::ADD32ri; 380 case X86::LEA64r: 381 return X86::ADD64ri32; 382 } 383 } 384 385 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { 386 switch (LEAOpcode) { 387 default: 388 llvm_unreachable("Unexpected LEA instruction"); 389 case X86::LEA32r: 390 case X86::LEA64_32r: 391 return IsINC ? X86::INC32r : X86::DEC32r; 392 case X86::LEA64r: 393 return IsINC ? X86::INC64r : X86::DEC64r; 394 } 395 } 396 397 MachineBasicBlock::iterator 398 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I, 399 MachineBasicBlock &MBB) const { 400 const int InstrDistanceThreshold = 5; 401 int InstrDistance = 1; 402 MachineBasicBlock::iterator CurInst = std::next(I); 403 404 unsigned LEAOpcode = I->getOpcode(); 405 unsigned AddOpcode = getADDrrFromLEA(LEAOpcode); 406 unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode); 407 Register DestReg = I->getOperand(0).getReg(); 408 409 while (CurInst != MBB.end()) { 410 if (CurInst->isCall() || CurInst->isInlineAsm()) 411 break; 412 if (InstrDistance > InstrDistanceThreshold) 413 break; 414 415 // Check if the lea dest register is used in an add/sub instruction only. 416 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { 417 MachineOperand &Opnd = CurInst->getOperand(I); 418 if (Opnd.isReg()) { 419 if (Opnd.getReg() == DestReg) { 420 if (Opnd.isDef() || !Opnd.isKill()) 421 return MachineBasicBlock::iterator(); 422 423 unsigned AluOpcode = CurInst->getOpcode(); 424 if (AluOpcode != AddOpcode && AluOpcode != SubOpcode) 425 return MachineBasicBlock::iterator(); 426 427 MachineOperand &Opnd2 = CurInst->getOperand(3 - I); 428 MachineOperand AluDest = CurInst->getOperand(0); 429 if (Opnd2.getReg() != AluDest.getReg()) 430 return MachineBasicBlock::iterator(); 431 432 // X - (Y + Z) may generate different flags than (X - Y) - Z when 433 // there is overflow. So we can't change the alu instruction if the 434 // flags register is live. 435 if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI)) 436 return MachineBasicBlock::iterator(); 437 438 return CurInst; 439 } 440 if (TRI->regsOverlap(DestReg, Opnd.getReg())) 441 return MachineBasicBlock::iterator(); 442 } 443 } 444 445 InstrDistance++; 446 ++CurInst; 447 } 448 return MachineBasicBlock::iterator(); 449 } 450 451 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI, 452 MachineBasicBlock::iterator &AluI, 453 bool &BaseIndexDef, bool &AluDestRef, 454 MachineOperand **KilledBase, 455 MachineOperand **KilledIndex) const { 456 BaseIndexDef = AluDestRef = false; 457 *KilledBase = *KilledIndex = nullptr; 458 Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg(); 459 Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg(); 460 Register AluDestReg = AluI->getOperand(0).getReg(); 461 462 for (MachineInstr &CurInst : llvm::make_range(std::next(LeaI), AluI)) { 463 for (MachineOperand &Opnd : CurInst.operands()) { 464 if (!Opnd.isReg()) 465 continue; 466 Register Reg = Opnd.getReg(); 467 if (TRI->regsOverlap(Reg, AluDestReg)) 468 AluDestRef = true; 469 if (TRI->regsOverlap(Reg, BaseReg)) { 470 if (Opnd.isDef()) 471 BaseIndexDef = true; 472 else if (Opnd.isKill()) 473 *KilledBase = &Opnd; 474 } 475 if (TRI->regsOverlap(Reg, IndexReg)) { 476 if (Opnd.isDef()) 477 BaseIndexDef = true; 478 else if (Opnd.isKill()) 479 *KilledIndex = &Opnd; 480 } 481 } 482 } 483 } 484 485 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I, 486 MachineBasicBlock &MBB) const { 487 // Look for an add/sub instruction which uses the result of lea. 488 MachineBasicBlock::iterator AluI = searchALUInst(I, MBB); 489 if (AluI == MachineBasicBlock::iterator()) 490 return false; 491 492 // Check if there are any related register usage between lea and alu. 493 bool BaseIndexDef, AluDestRef; 494 MachineOperand *KilledBase, *KilledIndex; 495 checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex); 496 497 MachineBasicBlock::iterator InsertPos = AluI; 498 if (BaseIndexDef) { 499 if (AluDestRef) 500 return false; 501 InsertPos = I; 502 KilledBase = KilledIndex = nullptr; 503 } 504 505 // Check if there are same registers. 506 Register AluDestReg = AluI->getOperand(0).getReg(); 507 Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg(); 508 Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg(); 509 if (I->getOpcode() == X86::LEA64_32r) { 510 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 511 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 512 } 513 if (AluDestReg == IndexReg) { 514 if (BaseReg == IndexReg) 515 return false; 516 std::swap(BaseReg, IndexReg); 517 std::swap(KilledBase, KilledIndex); 518 } 519 if (BaseReg == IndexReg) 520 KilledBase = nullptr; 521 522 // Now it's safe to change instructions. 523 MachineInstr *NewMI1, *NewMI2; 524 unsigned NewOpcode = AluI->getOpcode(); 525 NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 526 AluDestReg) 527 .addReg(AluDestReg, RegState::Kill) 528 .addReg(BaseReg, KilledBase ? RegState::Kill : 0); 529 NewMI1->addRegisterDead(X86::EFLAGS, TRI); 530 NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 531 AluDestReg) 532 .addReg(AluDestReg, RegState::Kill) 533 .addReg(IndexReg, KilledIndex ? RegState::Kill : 0); 534 NewMI2->addRegisterDead(X86::EFLAGS, TRI); 535 536 // Clear the old Kill flags. 537 if (KilledBase) 538 KilledBase->setIsKill(false); 539 if (KilledIndex) 540 KilledIndex->setIsKill(false); 541 542 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1); 543 MBB.erase(I); 544 MBB.erase(AluI); 545 I = NewMI1; 546 return true; 547 } 548 549 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, 550 MachineBasicBlock &MBB, bool OptIncDec, 551 bool UseLEAForSP) const { 552 MachineInstr &MI = *I; 553 554 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 555 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 556 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 557 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); 558 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 559 560 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || 561 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) != 562 MachineBasicBlock::LQR_Dead) 563 return false; 564 565 Register DestReg = MI.getOperand(0).getReg(); 566 Register BaseReg = Base.getReg(); 567 Register IndexReg = Index.getReg(); 568 569 // Don't change stack adjustment LEAs. 570 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) 571 return false; 572 573 // LEA64_32 has 64-bit operands but 32-bit result. 574 if (MI.getOpcode() == X86::LEA64_32r) { 575 if (BaseReg != 0) 576 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 577 if (IndexReg != 0) 578 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 579 } 580 581 MachineInstr *NewMI = nullptr; 582 583 // Case 1. 584 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 585 // which can be turned into add %reg2, %reg1 586 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && 587 (DestReg == BaseReg || DestReg == IndexReg)) { 588 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); 589 if (DestReg != BaseReg) 590 std::swap(BaseReg, IndexReg); 591 592 if (MI.getOpcode() == X86::LEA64_32r) { 593 // TODO: Do we need the super register implicit use? 594 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 595 .addReg(BaseReg).addReg(IndexReg) 596 .addReg(Base.getReg(), RegState::Implicit) 597 .addReg(Index.getReg(), RegState::Implicit); 598 } else { 599 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 600 .addReg(BaseReg).addReg(IndexReg); 601 } 602 } else if (DestReg == BaseReg && IndexReg == 0) { 603 // Case 2. 604 // This is an LEA with only a base register and a displacement, 605 // We can use ADDri or INC/DEC. 606 607 // Does this LEA have one these forms: 608 // lea %reg, 1(%reg) 609 // lea %reg, -1(%reg) 610 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { 611 bool IsINC = Disp.getImm() == 1; 612 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); 613 614 if (MI.getOpcode() == X86::LEA64_32r) { 615 // TODO: Do we need the super register implicit use? 616 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 617 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); 618 } else { 619 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 620 .addReg(BaseReg); 621 } 622 } else { 623 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); 624 if (MI.getOpcode() == X86::LEA64_32r) { 625 // TODO: Do we need the super register implicit use? 626 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 627 .addReg(BaseReg).addImm(Disp.getImm()) 628 .addReg(Base.getReg(), RegState::Implicit); 629 } else { 630 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 631 .addReg(BaseReg).addImm(Disp.getImm()); 632 } 633 } 634 } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) { 635 // Case 3. 636 // Look for and transform the sequence 637 // lea (reg1, reg2), reg3 638 // sub reg3, reg4 639 return optLEAALU(I, MBB); 640 } else 641 return false; 642 643 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 644 MBB.erase(I); 645 I = NewMI; 646 return true; 647 } 648 649 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, 650 MachineBasicBlock &MBB) { 651 // Process a load, store, or LEA instruction. 652 MachineInstr &MI = *I; 653 const MCInstrDesc &Desc = MI.getDesc(); 654 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); 655 if (AddrOffset >= 0) { 656 AddrOffset += X86II::getOperandBias(Desc); 657 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); 658 if (p.isReg() && p.getReg() != X86::ESP) { 659 seekLEAFixup(p, I, MBB); 660 } 661 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); 662 if (q.isReg() && q.getReg() != X86::ESP) { 663 seekLEAFixup(q, I, MBB); 664 } 665 } 666 } 667 668 void FixupLEAPass::seekLEAFixup(MachineOperand &p, 669 MachineBasicBlock::iterator &I, 670 MachineBasicBlock &MBB) { 671 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); 672 if (MBI != MachineBasicBlock::iterator()) { 673 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); 674 if (NewMI) { 675 ++NumLEAs; 676 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); 677 // now to replace with an equivalent LEA... 678 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); 679 MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1); 680 MBB.erase(MBI); 681 MachineBasicBlock::iterator J = 682 static_cast<MachineBasicBlock::iterator>(NewMI); 683 processInstruction(J, MBB); 684 } 685 } 686 } 687 688 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 689 MachineBasicBlock &MBB) { 690 MachineInstr &MI = *I; 691 const unsigned Opcode = MI.getOpcode(); 692 693 const MachineOperand &Dst = MI.getOperand(0); 694 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 695 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 696 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 697 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 698 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 699 700 if (Segment.getReg() != 0 || !Offset.isImm() || 701 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 702 MachineBasicBlock::LQR_Dead) 703 return; 704 const Register DstR = Dst.getReg(); 705 const Register SrcR1 = Base.getReg(); 706 const Register SrcR2 = Index.getReg(); 707 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) 708 return; 709 if (Scale.getImm() > 1) 710 return; 711 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); 712 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 713 MachineInstr *NewMI = nullptr; 714 // Make ADD instruction for two registers writing to LEA's destination 715 if (SrcR1 != 0 && SrcR2 != 0) { 716 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); 717 const MachineOperand &Src = SrcR1 == DstR ? Index : Base; 718 NewMI = 719 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); 720 LLVM_DEBUG(NewMI->dump();); 721 } 722 // Make ADD instruction for immediate 723 if (Offset.getImm() != 0) { 724 const MCInstrDesc &ADDri = 725 TII->get(getADDriFromLEA(Opcode, Offset)); 726 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; 727 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) 728 .add(SrcR) 729 .addImm(Offset.getImm()); 730 LLVM_DEBUG(NewMI->dump();); 731 } 732 if (NewMI) { 733 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 734 MBB.erase(I); 735 I = NewMI; 736 } 737 } 738 739 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 740 MachineBasicBlock &MBB, 741 bool OptIncDec) { 742 MachineInstr &MI = *I; 743 const unsigned LEAOpcode = MI.getOpcode(); 744 745 const MachineOperand &Dest = MI.getOperand(0); 746 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 747 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 748 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 749 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 750 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 751 752 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || 753 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 754 MachineBasicBlock::LQR_Dead || 755 Segment.getReg() != X86::NoRegister) 756 return; 757 758 Register DestReg = Dest.getReg(); 759 Register BaseReg = Base.getReg(); 760 Register IndexReg = Index.getReg(); 761 762 if (MI.getOpcode() == X86::LEA64_32r) { 763 if (BaseReg != 0) 764 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 765 if (IndexReg != 0) 766 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 767 } 768 769 bool IsScale1 = Scale.getImm() == 1; 770 bool IsInefficientBase = isInefficientLEAReg(BaseReg); 771 bool IsInefficientIndex = isInefficientLEAReg(IndexReg); 772 773 // Skip these cases since it takes more than 2 instructions 774 // to replace the LEA instruction. 775 if (IsInefficientBase && DestReg == BaseReg && !IsScale1) 776 return; 777 778 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); 779 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 780 781 MachineInstr *NewMI = nullptr; 782 bool BaseOrIndexIsDst = DestReg == BaseReg || DestReg == IndexReg; 783 // First try and remove the base while sticking with LEA iff base == index and 784 // scale == 1. We can handle: 785 // 1. lea D(%base,%index,1) -> lea D(,%index,2) 786 // 2. lea D(%r13/%rbp,%index) -> lea D(,%index,2) 787 // Only do this if the LEA would otherwise be split into 2-instruction 788 // (either it has a an Offset or neither base nor index are dst) 789 if (IsScale1 && BaseReg == IndexReg && 790 (hasLEAOffset(Offset) || (IsInefficientBase && !BaseOrIndexIsDst))) { 791 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 792 .add(Dest) 793 .addReg(0) 794 .addImm(2) 795 .add(Index) 796 .add(Offset) 797 .add(Segment); 798 LLVM_DEBUG(NewMI->dump();); 799 800 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 801 MBB.erase(I); 802 I = NewMI; 803 return; 804 } else if (IsScale1 && BaseOrIndexIsDst) { 805 // Try to replace LEA with one or two (for the 3-op LEA case) 806 // add instructions: 807 // 1.lea (%base,%index,1), %base => add %index,%base 808 // 2.lea (%base,%index,1), %index => add %base,%index 809 810 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 811 if (DestReg != BaseReg) 812 std::swap(BaseReg, IndexReg); 813 814 if (MI.getOpcode() == X86::LEA64_32r) { 815 // TODO: Do we need the super register implicit use? 816 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 817 .addReg(BaseReg) 818 .addReg(IndexReg) 819 .addReg(Base.getReg(), RegState::Implicit) 820 .addReg(Index.getReg(), RegState::Implicit); 821 } else { 822 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 823 .addReg(BaseReg) 824 .addReg(IndexReg); 825 } 826 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { 827 // If the base is inefficient try switching the index and base operands, 828 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: 829 // lea offset(%base,%index,scale),%dst => 830 // lea (%base,%index,scale); add offset,%dst 831 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 832 .add(Dest) 833 .add(IsInefficientBase ? Index : Base) 834 .add(Scale) 835 .add(IsInefficientBase ? Base : Index) 836 .addImm(0) 837 .add(Segment); 838 LLVM_DEBUG(NewMI->dump();); 839 } 840 841 // If either replacement succeeded above, add the offset if needed, then 842 // replace the instruction. 843 if (NewMI) { 844 // Create ADD instruction for the Offset in case of 3-Ops LEA. 845 if (hasLEAOffset(Offset)) { 846 if (OptIncDec && Offset.isImm() && 847 (Offset.getImm() == 1 || Offset.getImm() == -1)) { 848 unsigned NewOpc = 849 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); 850 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 851 .addReg(DestReg); 852 LLVM_DEBUG(NewMI->dump();); 853 } else { 854 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); 855 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 856 .addReg(DestReg) 857 .add(Offset); 858 LLVM_DEBUG(NewMI->dump();); 859 } 860 } 861 862 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 863 MBB.erase(I); 864 I = NewMI; 865 return; 866 } 867 868 // Handle the rest of the cases with inefficient base register: 869 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); 870 assert(IsInefficientBase && "efficient base should be handled already!"); 871 872 // FIXME: Handle LEA64_32r. 873 if (LEAOpcode == X86::LEA64_32r) 874 return; 875 876 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst 877 if (IsScale1 && !hasLEAOffset(Offset)) { 878 bool BIK = Base.isKill() && BaseReg != IndexReg; 879 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); 880 LLVM_DEBUG(MI.getPrevNode()->dump();); 881 882 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 883 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 884 .addReg(DestReg) 885 .add(Index); 886 LLVM_DEBUG(NewMI->dump();); 887 888 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 889 MBB.erase(I); 890 I = NewMI; 891 return; 892 } 893 894 // lea offset(%base,%index,scale), %dst => 895 // lea offset( ,%index,scale), %dst; add %base,%dst 896 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 897 .add(Dest) 898 .addReg(0) 899 .add(Scale) 900 .add(Index) 901 .add(Offset) 902 .add(Segment); 903 LLVM_DEBUG(NewMI->dump();); 904 905 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 906 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 907 .addReg(DestReg) 908 .add(Base); 909 LLVM_DEBUG(NewMI->dump();); 910 911 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 912 MBB.erase(I); 913 I = NewMI; 914 } 915