1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the pass that finds instructions that can be 10 // re-written as LEA instructions in order to reduce pipeline delays. 11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86InstrInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/ProfileSummaryInfo.h" 20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineSizeOpts.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/TargetSchedule.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 using namespace llvm; 29 30 #define FIXUPLEA_DESC "X86 LEA Fixup" 31 #define FIXUPLEA_NAME "x86-fixup-LEAs" 32 33 #define DEBUG_TYPE FIXUPLEA_NAME 34 35 STATISTIC(NumLEAs, "Number of LEA instructions created"); 36 37 namespace { 38 class FixupLEAPass : public MachineFunctionPass { 39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; 40 41 /// Given a machine register, look for the instruction 42 /// which writes it in the current basic block. If found, 43 /// try to replace it with an equivalent LEA instruction. 44 /// If replacement succeeds, then also process the newly created 45 /// instruction. 46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, 47 MachineBasicBlock &MBB); 48 49 /// Given a memory access or LEA instruction 50 /// whose address mode uses a base and/or index register, look for 51 /// an opportunity to replace the instruction which sets the base or index 52 /// register with an equivalent LEA instruction. 53 void processInstruction(MachineBasicBlock::iterator &I, 54 MachineBasicBlock &MBB); 55 56 /// Given a LEA instruction which is unprofitable 57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction. 58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 59 MachineBasicBlock &MBB); 60 61 /// Given a LEA instruction which is unprofitable 62 /// on SNB+ try to replace it with other instructions. 63 /// According to Intel's Optimization Reference Manual: 64 /// " For LEA instructions with three source operands and some specific 65 /// situations, instruction latency has increased to 3 cycles, and must 66 /// dispatch via port 1: 67 /// - LEA that has all three source operands: base, index, and offset 68 /// - LEA that uses base and index registers where the base is EBP, RBP, 69 /// or R13 70 /// - LEA that uses RIP relative addressing mode 71 /// - LEA that uses 16-bit addressing mode " 72 /// This function currently handles the first 2 cases only. 73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 74 MachineBasicBlock &MBB, bool OptIncDec); 75 76 /// Look for LEAs that are really two address LEAs that we might be able to 77 /// turn into regular ADD instructions. 78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I, 79 MachineBasicBlock &MBB, bool OptIncDec, 80 bool UseLEAForSP) const; 81 82 /// Look for and transform the sequence 83 /// lea (reg1, reg2), reg3 84 /// sub reg3, reg4 85 /// to 86 /// sub reg1, reg4 87 /// sub reg2, reg4 88 /// It can also optimize the sequence lea/add similarly. 89 bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const; 90 91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses 92 /// the dest register of LEA instruction I. 93 MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I, 94 MachineBasicBlock &MBB) const; 95 96 /// Check instructions between LeaI and AluI (exclusively). 97 /// Set BaseIndexDef to true if base or index register from LeaI is defined. 98 /// Set AluDestRef to true if the dest register of AluI is used or defined. 99 /// *KilledBase is set to the killed base register usage. 100 /// *KilledIndex is set to the killed index register usage. 101 void checkRegUsage(MachineBasicBlock::iterator &LeaI, 102 MachineBasicBlock::iterator &AluI, bool &BaseIndexDef, 103 bool &AluDestRef, MachineOperand **KilledBase, 104 MachineOperand **KilledIndex) const; 105 106 /// Determine if an instruction references a machine register 107 /// and, if so, whether it reads or writes the register. 108 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); 109 110 /// Step backwards through a basic block, looking 111 /// for an instruction which writes a register within 112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. 113 MachineBasicBlock::iterator searchBackwards(MachineOperand &p, 114 MachineBasicBlock::iterator &I, 115 MachineBasicBlock &MBB); 116 117 /// if an instruction can be converted to an 118 /// equivalent LEA, insert the new instruction into the basic block 119 /// and return a pointer to it. Otherwise, return zero. 120 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator &MBBI) const; 122 123 public: 124 static char ID; 125 126 StringRef getPassName() const override { return FIXUPLEA_DESC; } 127 128 FixupLEAPass() : MachineFunctionPass(ID) { } 129 130 /// Loop over all of the basic blocks, 131 /// replacing instructions by equivalent LEA instructions 132 /// if needed and when possible. 133 bool runOnMachineFunction(MachineFunction &MF) override; 134 135 // This pass runs after regalloc and doesn't support VReg operands. 136 MachineFunctionProperties getRequiredProperties() const override { 137 return MachineFunctionProperties().set( 138 MachineFunctionProperties::Property::NoVRegs); 139 } 140 141 void getAnalysisUsage(AnalysisUsage &AU) const override { 142 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 143 AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); 144 MachineFunctionPass::getAnalysisUsage(AU); 145 } 146 147 private: 148 TargetSchedModel TSM; 149 const X86InstrInfo *TII = nullptr; 150 const X86RegisterInfo *TRI = nullptr; 151 }; 152 } 153 154 char FixupLEAPass::ID = 0; 155 156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) 157 158 MachineInstr * 159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, 160 MachineBasicBlock::iterator &MBBI) const { 161 MachineInstr &MI = *MBBI; 162 switch (MI.getOpcode()) { 163 case X86::MOV32rr: 164 case X86::MOV64rr: { 165 const MachineOperand &Src = MI.getOperand(1); 166 const MachineOperand &Dest = MI.getOperand(0); 167 MachineInstr *NewMI = 168 BuildMI(MBB, MBBI, MI.getDebugLoc(), 169 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r 170 : X86::LEA64r)) 171 .add(Dest) 172 .add(Src) 173 .addImm(1) 174 .addReg(0) 175 .addImm(0) 176 .addReg(0); 177 return NewMI; 178 } 179 } 180 181 if (!MI.isConvertibleTo3Addr()) 182 return nullptr; 183 184 switch (MI.getOpcode()) { 185 default: 186 // Only convert instructions that we've verified are safe. 187 return nullptr; 188 case X86::ADD64ri32: 189 case X86::ADD64ri8: 190 case X86::ADD64ri32_DB: 191 case X86::ADD64ri8_DB: 192 case X86::ADD32ri: 193 case X86::ADD32ri8: 194 case X86::ADD32ri_DB: 195 case X86::ADD32ri8_DB: 196 if (!MI.getOperand(2).isImm()) { 197 // convertToThreeAddress will call getImm() 198 // which requires isImm() to be true 199 return nullptr; 200 } 201 break; 202 case X86::SHL64ri: 203 case X86::SHL32ri: 204 case X86::INC64r: 205 case X86::INC32r: 206 case X86::DEC64r: 207 case X86::DEC32r: 208 case X86::ADD64rr: 209 case X86::ADD64rr_DB: 210 case X86::ADD32rr: 211 case X86::ADD32rr_DB: 212 // These instructions are all fine to convert. 213 break; 214 } 215 return TII->convertToThreeAddress(MI, nullptr, nullptr); 216 } 217 218 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } 219 220 static bool isLEA(unsigned Opcode) { 221 return Opcode == X86::LEA32r || Opcode == X86::LEA64r || 222 Opcode == X86::LEA64_32r; 223 } 224 225 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { 226 if (skipFunction(MF.getFunction())) 227 return false; 228 229 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 230 bool IsSlowLEA = ST.slowLEA(); 231 bool IsSlow3OpsLEA = ST.slow3OpsLEA(); 232 bool LEAUsesAG = ST.LEAusesAG(); 233 234 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); 235 bool UseLEAForSP = ST.useLeaForSP(); 236 237 TSM.init(&ST); 238 TII = ST.getInstrInfo(); 239 TRI = ST.getRegisterInfo(); 240 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 241 auto *MBFI = (PSI && PSI->hasProfileSummary()) 242 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() 243 : nullptr; 244 245 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); 246 for (MachineBasicBlock &MBB : MF) { 247 // First pass. Try to remove or optimize existing LEAs. 248 bool OptIncDecPerBB = 249 OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); 250 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 251 if (!isLEA(I->getOpcode())) 252 continue; 253 254 if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP)) 255 continue; 256 257 if (IsSlowLEA) 258 processInstructionForSlowLEA(I, MBB); 259 else if (IsSlow3OpsLEA) 260 processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB); 261 } 262 263 // Second pass for creating LEAs. This may reverse some of the 264 // transformations above. 265 if (LEAUsesAG) { 266 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) 267 processInstruction(I, MBB); 268 } 269 } 270 271 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";); 272 273 return true; 274 } 275 276 FixupLEAPass::RegUsageState 277 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { 278 RegUsageState RegUsage = RU_NotUsed; 279 MachineInstr &MI = *I; 280 281 for (unsigned i = 0; i < MI.getNumOperands(); ++i) { 282 MachineOperand &opnd = MI.getOperand(i); 283 if (opnd.isReg() && opnd.getReg() == p.getReg()) { 284 if (opnd.isDef()) 285 return RU_Write; 286 RegUsage = RU_Read; 287 } 288 } 289 return RegUsage; 290 } 291 292 /// getPreviousInstr - Given a reference to an instruction in a basic 293 /// block, return a reference to the previous instruction in the block, 294 /// wrapping around to the last instruction of the block if the block 295 /// branches to itself. 296 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, 297 MachineBasicBlock &MBB) { 298 if (I == MBB.begin()) { 299 if (MBB.isPredecessor(&MBB)) { 300 I = --MBB.end(); 301 return true; 302 } else 303 return false; 304 } 305 --I; 306 return true; 307 } 308 309 MachineBasicBlock::iterator 310 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, 311 MachineBasicBlock &MBB) { 312 int InstrDistance = 1; 313 MachineBasicBlock::iterator CurInst; 314 static const int INSTR_DISTANCE_THRESHOLD = 5; 315 316 CurInst = I; 317 bool Found; 318 Found = getPreviousInstr(CurInst, MBB); 319 while (Found && I != CurInst) { 320 if (CurInst->isCall() || CurInst->isInlineAsm()) 321 break; 322 if (InstrDistance > INSTR_DISTANCE_THRESHOLD) 323 break; // too far back to make a difference 324 if (usesRegister(p, CurInst) == RU_Write) { 325 return CurInst; 326 } 327 InstrDistance += TSM.computeInstrLatency(&*CurInst); 328 Found = getPreviousInstr(CurInst, MBB); 329 } 330 return MachineBasicBlock::iterator(); 331 } 332 333 static inline bool isInefficientLEAReg(unsigned Reg) { 334 return Reg == X86::EBP || Reg == X86::RBP || 335 Reg == X86::R13D || Reg == X86::R13; 336 } 337 338 /// Returns true if this LEA uses base an index registers, and the base register 339 /// is known to be inefficient for the subtarget. 340 // TODO: use a variant scheduling class to model the latency profile 341 // of LEA instructions, and implement this logic as a scheduling predicate. 342 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, 343 const MachineOperand &Index) { 344 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && 345 Index.getReg() != X86::NoRegister; 346 } 347 348 static inline bool hasLEAOffset(const MachineOperand &Offset) { 349 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); 350 } 351 352 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { 353 switch (LEAOpcode) { 354 default: 355 llvm_unreachable("Unexpected LEA instruction"); 356 case X86::LEA32r: 357 case X86::LEA64_32r: 358 return X86::ADD32rr; 359 case X86::LEA64r: 360 return X86::ADD64rr; 361 } 362 } 363 364 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) { 365 switch (LEAOpcode) { 366 default: 367 llvm_unreachable("Unexpected LEA instruction"); 368 case X86::LEA32r: 369 case X86::LEA64_32r: 370 return X86::SUB32rr; 371 case X86::LEA64r: 372 return X86::SUB64rr; 373 } 374 } 375 376 static inline unsigned getADDriFromLEA(unsigned LEAOpcode, 377 const MachineOperand &Offset) { 378 bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); 379 switch (LEAOpcode) { 380 default: 381 llvm_unreachable("Unexpected LEA instruction"); 382 case X86::LEA32r: 383 case X86::LEA64_32r: 384 return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; 385 case X86::LEA64r: 386 return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; 387 } 388 } 389 390 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { 391 switch (LEAOpcode) { 392 default: 393 llvm_unreachable("Unexpected LEA instruction"); 394 case X86::LEA32r: 395 case X86::LEA64_32r: 396 return IsINC ? X86::INC32r : X86::DEC32r; 397 case X86::LEA64r: 398 return IsINC ? X86::INC64r : X86::DEC64r; 399 } 400 } 401 402 MachineBasicBlock::iterator 403 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I, 404 MachineBasicBlock &MBB) const { 405 const int InstrDistanceThreshold = 5; 406 int InstrDistance = 1; 407 MachineBasicBlock::iterator CurInst = std::next(I); 408 409 unsigned LEAOpcode = I->getOpcode(); 410 unsigned AddOpcode = getADDrrFromLEA(LEAOpcode); 411 unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode); 412 Register DestReg = I->getOperand(0).getReg(); 413 414 while (CurInst != MBB.end()) { 415 if (CurInst->isCall() || CurInst->isInlineAsm()) 416 break; 417 if (InstrDistance > InstrDistanceThreshold) 418 break; 419 420 // Check if the lea dest register is used in an add/sub instruction only. 421 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { 422 MachineOperand &Opnd = CurInst->getOperand(I); 423 if (Opnd.isReg()) { 424 if (Opnd.getReg() == DestReg) { 425 if (Opnd.isDef() || !Opnd.isKill()) 426 return MachineBasicBlock::iterator(); 427 428 unsigned AluOpcode = CurInst->getOpcode(); 429 if (AluOpcode != AddOpcode && AluOpcode != SubOpcode) 430 return MachineBasicBlock::iterator(); 431 432 MachineOperand &Opnd2 = CurInst->getOperand(3 - I); 433 MachineOperand AluDest = CurInst->getOperand(0); 434 if (Opnd2.getReg() != AluDest.getReg()) 435 return MachineBasicBlock::iterator(); 436 437 // X - (Y + Z) may generate different flags than (X - Y) - Z when 438 // there is overflow. So we can't change the alu instruction if the 439 // flags register is live. 440 if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI)) 441 return MachineBasicBlock::iterator(); 442 443 return CurInst; 444 } 445 if (TRI->regsOverlap(DestReg, Opnd.getReg())) 446 return MachineBasicBlock::iterator(); 447 } 448 } 449 450 InstrDistance++; 451 ++CurInst; 452 } 453 return MachineBasicBlock::iterator(); 454 } 455 456 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI, 457 MachineBasicBlock::iterator &AluI, 458 bool &BaseIndexDef, bool &AluDestRef, 459 MachineOperand **KilledBase, 460 MachineOperand **KilledIndex) const { 461 BaseIndexDef = AluDestRef = false; 462 *KilledBase = *KilledIndex = nullptr; 463 Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg(); 464 Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg(); 465 Register AluDestReg = AluI->getOperand(0).getReg(); 466 467 MachineBasicBlock::iterator CurInst = std::next(LeaI); 468 while (CurInst != AluI) { 469 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { 470 MachineOperand &Opnd = CurInst->getOperand(I); 471 if (!Opnd.isReg()) 472 continue; 473 Register Reg = Opnd.getReg(); 474 if (TRI->regsOverlap(Reg, AluDestReg)) 475 AluDestRef = true; 476 if (TRI->regsOverlap(Reg, BaseReg)) { 477 if (Opnd.isDef()) 478 BaseIndexDef = true; 479 else if (Opnd.isKill()) 480 *KilledBase = &Opnd; 481 } 482 if (TRI->regsOverlap(Reg, IndexReg)) { 483 if (Opnd.isDef()) 484 BaseIndexDef = true; 485 else if (Opnd.isKill()) 486 *KilledIndex = &Opnd; 487 } 488 } 489 ++CurInst; 490 } 491 } 492 493 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I, 494 MachineBasicBlock &MBB) const { 495 // Look for an add/sub instruction which uses the result of lea. 496 MachineBasicBlock::iterator AluI = searchALUInst(I, MBB); 497 if (AluI == MachineBasicBlock::iterator()) 498 return false; 499 500 // Check if there are any related register usage between lea and alu. 501 bool BaseIndexDef, AluDestRef; 502 MachineOperand *KilledBase, *KilledIndex; 503 checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex); 504 505 MachineBasicBlock::iterator InsertPos = AluI; 506 if (BaseIndexDef) { 507 if (AluDestRef) 508 return false; 509 InsertPos = I; 510 KilledBase = KilledIndex = nullptr; 511 } 512 513 // Check if there are same registers. 514 Register AluDestReg = AluI->getOperand(0).getReg(); 515 Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg(); 516 Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg(); 517 if (I->getOpcode() == X86::LEA64_32r) { 518 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 519 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 520 } 521 if (AluDestReg == IndexReg) { 522 if (BaseReg == IndexReg) 523 return false; 524 std::swap(BaseReg, IndexReg); 525 std::swap(KilledBase, KilledIndex); 526 } 527 if (BaseReg == IndexReg) 528 KilledBase = nullptr; 529 530 // Now it's safe to change instructions. 531 MachineInstr *NewMI1, *NewMI2; 532 unsigned NewOpcode = AluI->getOpcode(); 533 NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 534 AluDestReg) 535 .addReg(AluDestReg, RegState::Kill) 536 .addReg(BaseReg, KilledBase ? RegState::Kill : 0); 537 NewMI1->addRegisterDead(X86::EFLAGS, TRI); 538 NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 539 AluDestReg) 540 .addReg(AluDestReg, RegState::Kill) 541 .addReg(IndexReg, KilledIndex ? RegState::Kill : 0); 542 NewMI2->addRegisterDead(X86::EFLAGS, TRI); 543 544 // Clear the old Kill flags. 545 if (KilledBase) 546 KilledBase->setIsKill(false); 547 if (KilledIndex) 548 KilledIndex->setIsKill(false); 549 550 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1); 551 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1); 552 MBB.erase(I); 553 MBB.erase(AluI); 554 I = NewMI1; 555 return true; 556 } 557 558 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, 559 MachineBasicBlock &MBB, bool OptIncDec, 560 bool UseLEAForSP) const { 561 MachineInstr &MI = *I; 562 563 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 564 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 565 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 566 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); 567 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 568 569 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || 570 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) != 571 MachineBasicBlock::LQR_Dead) 572 return false; 573 574 Register DestReg = MI.getOperand(0).getReg(); 575 Register BaseReg = Base.getReg(); 576 Register IndexReg = Index.getReg(); 577 578 // Don't change stack adjustment LEAs. 579 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) 580 return false; 581 582 // LEA64_32 has 64-bit operands but 32-bit result. 583 if (MI.getOpcode() == X86::LEA64_32r) { 584 if (BaseReg != 0) 585 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 586 if (IndexReg != 0) 587 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 588 } 589 590 MachineInstr *NewMI = nullptr; 591 592 // Case 1. 593 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 594 // which can be turned into add %reg2, %reg1 595 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && 596 (DestReg == BaseReg || DestReg == IndexReg)) { 597 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); 598 if (DestReg != BaseReg) 599 std::swap(BaseReg, IndexReg); 600 601 if (MI.getOpcode() == X86::LEA64_32r) { 602 // TODO: Do we need the super register implicit use? 603 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 604 .addReg(BaseReg).addReg(IndexReg) 605 .addReg(Base.getReg(), RegState::Implicit) 606 .addReg(Index.getReg(), RegState::Implicit); 607 } else { 608 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 609 .addReg(BaseReg).addReg(IndexReg); 610 } 611 } else if (DestReg == BaseReg && IndexReg == 0) { 612 // Case 2. 613 // This is an LEA with only a base register and a displacement, 614 // We can use ADDri or INC/DEC. 615 616 // Does this LEA have one these forms: 617 // lea %reg, 1(%reg) 618 // lea %reg, -1(%reg) 619 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { 620 bool IsINC = Disp.getImm() == 1; 621 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); 622 623 if (MI.getOpcode() == X86::LEA64_32r) { 624 // TODO: Do we need the super register implicit use? 625 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 626 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); 627 } else { 628 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 629 .addReg(BaseReg); 630 } 631 } else { 632 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); 633 if (MI.getOpcode() == X86::LEA64_32r) { 634 // TODO: Do we need the super register implicit use? 635 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 636 .addReg(BaseReg).addImm(Disp.getImm()) 637 .addReg(Base.getReg(), RegState::Implicit); 638 } else { 639 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 640 .addReg(BaseReg).addImm(Disp.getImm()); 641 } 642 } 643 } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) { 644 // Case 3. 645 // Look for and transform the sequence 646 // lea (reg1, reg2), reg3 647 // sub reg3, reg4 648 return optLEAALU(I, MBB); 649 } else 650 return false; 651 652 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 653 MBB.erase(I); 654 I = NewMI; 655 return true; 656 } 657 658 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, 659 MachineBasicBlock &MBB) { 660 // Process a load, store, or LEA instruction. 661 MachineInstr &MI = *I; 662 const MCInstrDesc &Desc = MI.getDesc(); 663 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); 664 if (AddrOffset >= 0) { 665 AddrOffset += X86II::getOperandBias(Desc); 666 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); 667 if (p.isReg() && p.getReg() != X86::ESP) { 668 seekLEAFixup(p, I, MBB); 669 } 670 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); 671 if (q.isReg() && q.getReg() != X86::ESP) { 672 seekLEAFixup(q, I, MBB); 673 } 674 } 675 } 676 677 void FixupLEAPass::seekLEAFixup(MachineOperand &p, 678 MachineBasicBlock::iterator &I, 679 MachineBasicBlock &MBB) { 680 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); 681 if (MBI != MachineBasicBlock::iterator()) { 682 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); 683 if (NewMI) { 684 ++NumLEAs; 685 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); 686 // now to replace with an equivalent LEA... 687 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); 688 MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1); 689 MBB.erase(MBI); 690 MachineBasicBlock::iterator J = 691 static_cast<MachineBasicBlock::iterator>(NewMI); 692 processInstruction(J, MBB); 693 } 694 } 695 } 696 697 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 698 MachineBasicBlock &MBB) { 699 MachineInstr &MI = *I; 700 const unsigned Opcode = MI.getOpcode(); 701 702 const MachineOperand &Dst = MI.getOperand(0); 703 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 704 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 705 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 706 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 707 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 708 709 if (Segment.getReg() != 0 || !Offset.isImm() || 710 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 711 MachineBasicBlock::LQR_Dead) 712 return; 713 const Register DstR = Dst.getReg(); 714 const Register SrcR1 = Base.getReg(); 715 const Register SrcR2 = Index.getReg(); 716 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) 717 return; 718 if (Scale.getImm() > 1) 719 return; 720 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); 721 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 722 MachineInstr *NewMI = nullptr; 723 // Make ADD instruction for two registers writing to LEA's destination 724 if (SrcR1 != 0 && SrcR2 != 0) { 725 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); 726 const MachineOperand &Src = SrcR1 == DstR ? Index : Base; 727 NewMI = 728 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); 729 LLVM_DEBUG(NewMI->dump();); 730 } 731 // Make ADD instruction for immediate 732 if (Offset.getImm() != 0) { 733 const MCInstrDesc &ADDri = 734 TII->get(getADDriFromLEA(Opcode, Offset)); 735 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; 736 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) 737 .add(SrcR) 738 .addImm(Offset.getImm()); 739 LLVM_DEBUG(NewMI->dump();); 740 } 741 if (NewMI) { 742 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 743 MBB.erase(I); 744 I = NewMI; 745 } 746 } 747 748 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 749 MachineBasicBlock &MBB, 750 bool OptIncDec) { 751 MachineInstr &MI = *I; 752 const unsigned LEAOpcode = MI.getOpcode(); 753 754 const MachineOperand &Dest = MI.getOperand(0); 755 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 756 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 757 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 758 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 759 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 760 761 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || 762 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 763 MachineBasicBlock::LQR_Dead || 764 Segment.getReg() != X86::NoRegister) 765 return; 766 767 Register DestReg = Dest.getReg(); 768 Register BaseReg = Base.getReg(); 769 Register IndexReg = Index.getReg(); 770 771 if (MI.getOpcode() == X86::LEA64_32r) { 772 if (BaseReg != 0) 773 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 774 if (IndexReg != 0) 775 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 776 } 777 778 bool IsScale1 = Scale.getImm() == 1; 779 bool IsInefficientBase = isInefficientLEAReg(BaseReg); 780 bool IsInefficientIndex = isInefficientLEAReg(IndexReg); 781 782 // Skip these cases since it takes more than 2 instructions 783 // to replace the LEA instruction. 784 if (IsInefficientBase && DestReg == BaseReg && !IsScale1) 785 return; 786 787 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); 788 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 789 790 MachineInstr *NewMI = nullptr; 791 792 // First try to replace LEA with one or two (for the 3-op LEA case) 793 // add instructions: 794 // 1.lea (%base,%index,1), %base => add %index,%base 795 // 2.lea (%base,%index,1), %index => add %base,%index 796 if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { 797 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 798 if (DestReg != BaseReg) 799 std::swap(BaseReg, IndexReg); 800 801 if (MI.getOpcode() == X86::LEA64_32r) { 802 // TODO: Do we need the super register implicit use? 803 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 804 .addReg(BaseReg) 805 .addReg(IndexReg) 806 .addReg(Base.getReg(), RegState::Implicit) 807 .addReg(Index.getReg(), RegState::Implicit); 808 } else { 809 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 810 .addReg(BaseReg) 811 .addReg(IndexReg); 812 } 813 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { 814 // If the base is inefficient try switching the index and base operands, 815 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: 816 // lea offset(%base,%index,scale),%dst => 817 // lea (%base,%index,scale); add offset,%dst 818 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 819 .add(Dest) 820 .add(IsInefficientBase ? Index : Base) 821 .add(Scale) 822 .add(IsInefficientBase ? Base : Index) 823 .addImm(0) 824 .add(Segment); 825 LLVM_DEBUG(NewMI->dump();); 826 } 827 828 // If either replacement succeeded above, add the offset if needed, then 829 // replace the instruction. 830 if (NewMI) { 831 // Create ADD instruction for the Offset in case of 3-Ops LEA. 832 if (hasLEAOffset(Offset)) { 833 if (OptIncDec && Offset.isImm() && 834 (Offset.getImm() == 1 || Offset.getImm() == -1)) { 835 unsigned NewOpc = 836 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); 837 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 838 .addReg(DestReg); 839 LLVM_DEBUG(NewMI->dump();); 840 } else { 841 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); 842 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 843 .addReg(DestReg) 844 .add(Offset); 845 LLVM_DEBUG(NewMI->dump();); 846 } 847 } 848 849 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 850 MBB.erase(I); 851 I = NewMI; 852 return; 853 } 854 855 // Handle the rest of the cases with inefficient base register: 856 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); 857 assert(IsInefficientBase && "efficient base should be handled already!"); 858 859 // FIXME: Handle LEA64_32r. 860 if (LEAOpcode == X86::LEA64_32r) 861 return; 862 863 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst 864 if (IsScale1 && !hasLEAOffset(Offset)) { 865 bool BIK = Base.isKill() && BaseReg != IndexReg; 866 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); 867 LLVM_DEBUG(MI.getPrevNode()->dump();); 868 869 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 870 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 871 .addReg(DestReg) 872 .add(Index); 873 LLVM_DEBUG(NewMI->dump();); 874 875 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 876 MBB.erase(I); 877 I = NewMI; 878 return; 879 } 880 881 // lea offset(%base,%index,scale), %dst => 882 // lea offset( ,%index,scale), %dst; add %base,%dst 883 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 884 .add(Dest) 885 .addReg(0) 886 .add(Scale) 887 .add(Index) 888 .add(Offset) 889 .add(Segment); 890 LLVM_DEBUG(NewMI->dump();); 891 892 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 893 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 894 .addReg(DestReg) 895 .add(Base); 896 LLVM_DEBUG(NewMI->dump();); 897 898 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 899 MBB.erase(I); 900 I = NewMI; 901 } 902