1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the pass that finds instructions that can be 10 // re-written as LEA instructions in order to reduce pipeline delays. 11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86InstrInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/ProfileSummaryInfo.h" 20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineSizeOpts.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/TargetSchedule.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 using namespace llvm; 29 30 #define FIXUPLEA_DESC "X86 LEA Fixup" 31 #define FIXUPLEA_NAME "x86-fixup-LEAs" 32 33 #define DEBUG_TYPE FIXUPLEA_NAME 34 35 STATISTIC(NumLEAs, "Number of LEA instructions created"); 36 37 namespace { 38 class FixupLEAPass : public MachineFunctionPass { 39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; 40 41 /// Given a machine register, look for the instruction 42 /// which writes it in the current basic block. If found, 43 /// try to replace it with an equivalent LEA instruction. 44 /// If replacement succeeds, then also process the newly created 45 /// instruction. 46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, 47 MachineBasicBlock &MBB); 48 49 /// Given a memory access or LEA instruction 50 /// whose address mode uses a base and/or index register, look for 51 /// an opportunity to replace the instruction which sets the base or index 52 /// register with an equivalent LEA instruction. 53 void processInstruction(MachineBasicBlock::iterator &I, 54 MachineBasicBlock &MBB); 55 56 /// Given a LEA instruction which is unprofitable 57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction. 58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 59 MachineBasicBlock &MBB); 60 61 /// Given a LEA instruction which is unprofitable 62 /// on SNB+ try to replace it with other instructions. 63 /// According to Intel's Optimization Reference Manual: 64 /// " For LEA instructions with three source operands and some specific 65 /// situations, instruction latency has increased to 3 cycles, and must 66 /// dispatch via port 1: 67 /// - LEA that has all three source operands: base, index, and offset 68 /// - LEA that uses base and index registers where the base is EBP, RBP, 69 /// or R13 70 /// - LEA that uses RIP relative addressing mode 71 /// - LEA that uses 16-bit addressing mode " 72 /// This function currently handles the first 2 cases only. 73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 74 MachineBasicBlock &MBB, bool OptIncDec); 75 76 /// Look for LEAs that are really two address LEAs that we might be able to 77 /// turn into regular ADD instructions. 78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I, 79 MachineBasicBlock &MBB, bool OptIncDec, 80 bool UseLEAForSP) const; 81 82 /// Look for and transform the sequence 83 /// lea (reg1, reg2), reg3 84 /// sub reg3, reg4 85 /// to 86 /// sub reg1, reg4 87 /// sub reg2, reg4 88 /// It can also optimize the sequence lea/add similarly. 89 bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const; 90 91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses 92 /// the dest register of LEA instruction I. 93 MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I, 94 MachineBasicBlock &MBB) const; 95 96 /// Check instructions between LeaI and AluI (exclusively). 97 /// Set BaseIndexDef to true if base or index register from LeaI is defined. 98 /// Set AluDestRef to true if the dest register of AluI is used or defined. 99 /// *KilledBase is set to the killed base register usage. 100 /// *KilledIndex is set to the killed index register usage. 101 void checkRegUsage(MachineBasicBlock::iterator &LeaI, 102 MachineBasicBlock::iterator &AluI, bool &BaseIndexDef, 103 bool &AluDestRef, MachineOperand **KilledBase, 104 MachineOperand **KilledIndex) const; 105 106 /// Determine if an instruction references a machine register 107 /// and, if so, whether it reads or writes the register. 108 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); 109 110 /// Step backwards through a basic block, looking 111 /// for an instruction which writes a register within 112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. 113 MachineBasicBlock::iterator searchBackwards(MachineOperand &p, 114 MachineBasicBlock::iterator &I, 115 MachineBasicBlock &MBB); 116 117 /// if an instruction can be converted to an 118 /// equivalent LEA, insert the new instruction into the basic block 119 /// and return a pointer to it. Otherwise, return zero. 120 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator &MBBI) const; 122 123 public: 124 static char ID; 125 126 StringRef getPassName() const override { return FIXUPLEA_DESC; } 127 128 FixupLEAPass() : MachineFunctionPass(ID) { } 129 130 /// Loop over all of the basic blocks, 131 /// replacing instructions by equivalent LEA instructions 132 /// if needed and when possible. 133 bool runOnMachineFunction(MachineFunction &MF) override; 134 135 // This pass runs after regalloc and doesn't support VReg operands. 136 MachineFunctionProperties getRequiredProperties() const override { 137 return MachineFunctionProperties().set( 138 MachineFunctionProperties::Property::NoVRegs); 139 } 140 141 void getAnalysisUsage(AnalysisUsage &AU) const override { 142 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 143 AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); 144 MachineFunctionPass::getAnalysisUsage(AU); 145 } 146 147 private: 148 TargetSchedModel TSM; 149 const X86InstrInfo *TII = nullptr; 150 const X86RegisterInfo *TRI = nullptr; 151 }; 152 } 153 154 char FixupLEAPass::ID = 0; 155 156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) 157 158 MachineInstr * 159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, 160 MachineBasicBlock::iterator &MBBI) const { 161 MachineInstr &MI = *MBBI; 162 switch (MI.getOpcode()) { 163 case X86::MOV32rr: 164 case X86::MOV64rr: { 165 const MachineOperand &Src = MI.getOperand(1); 166 const MachineOperand &Dest = MI.getOperand(0); 167 MachineInstr *NewMI = 168 BuildMI(MBB, MBBI, MI.getDebugLoc(), 169 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r 170 : X86::LEA64r)) 171 .add(Dest) 172 .add(Src) 173 .addImm(1) 174 .addReg(0) 175 .addImm(0) 176 .addReg(0); 177 return NewMI; 178 } 179 } 180 181 if (!MI.isConvertibleTo3Addr()) 182 return nullptr; 183 184 switch (MI.getOpcode()) { 185 default: 186 // Only convert instructions that we've verified are safe. 187 return nullptr; 188 case X86::ADD64ri32: 189 case X86::ADD64ri8: 190 case X86::ADD64ri32_DB: 191 case X86::ADD64ri8_DB: 192 case X86::ADD32ri: 193 case X86::ADD32ri8: 194 case X86::ADD32ri_DB: 195 case X86::ADD32ri8_DB: 196 if (!MI.getOperand(2).isImm()) { 197 // convertToThreeAddress will call getImm() 198 // which requires isImm() to be true 199 return nullptr; 200 } 201 break; 202 case X86::SHL64ri: 203 case X86::SHL32ri: 204 case X86::INC64r: 205 case X86::INC32r: 206 case X86::DEC64r: 207 case X86::DEC32r: 208 case X86::ADD64rr: 209 case X86::ADD64rr_DB: 210 case X86::ADD32rr: 211 case X86::ADD32rr_DB: 212 // These instructions are all fine to convert. 213 break; 214 } 215 return TII->convertToThreeAddress(MI, nullptr, nullptr); 216 } 217 218 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } 219 220 static bool isLEA(unsigned Opcode) { 221 return Opcode == X86::LEA32r || Opcode == X86::LEA64r || 222 Opcode == X86::LEA64_32r; 223 } 224 225 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { 226 if (skipFunction(MF.getFunction())) 227 return false; 228 229 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 230 bool IsSlowLEA = ST.slowLEA(); 231 bool IsSlow3OpsLEA = ST.slow3OpsLEA(); 232 bool LEAUsesAG = ST.leaUsesAG(); 233 234 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); 235 bool UseLEAForSP = ST.useLeaForSP(); 236 237 TSM.init(&ST); 238 TII = ST.getInstrInfo(); 239 TRI = ST.getRegisterInfo(); 240 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 241 auto *MBFI = (PSI && PSI->hasProfileSummary()) 242 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() 243 : nullptr; 244 245 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); 246 for (MachineBasicBlock &MBB : MF) { 247 // First pass. Try to remove or optimize existing LEAs. 248 bool OptIncDecPerBB = 249 OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); 250 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 251 if (!isLEA(I->getOpcode())) 252 continue; 253 254 if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP)) 255 continue; 256 257 if (IsSlowLEA) 258 processInstructionForSlowLEA(I, MBB); 259 else if (IsSlow3OpsLEA) 260 processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB); 261 } 262 263 // Second pass for creating LEAs. This may reverse some of the 264 // transformations above. 265 if (LEAUsesAG) { 266 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) 267 processInstruction(I, MBB); 268 } 269 } 270 271 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";); 272 273 return true; 274 } 275 276 FixupLEAPass::RegUsageState 277 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { 278 RegUsageState RegUsage = RU_NotUsed; 279 MachineInstr &MI = *I; 280 281 for (const MachineOperand &MO : MI.operands()) { 282 if (MO.isReg() && MO.getReg() == p.getReg()) { 283 if (MO.isDef()) 284 return RU_Write; 285 RegUsage = RU_Read; 286 } 287 } 288 return RegUsage; 289 } 290 291 /// getPreviousInstr - Given a reference to an instruction in a basic 292 /// block, return a reference to the previous instruction in the block, 293 /// wrapping around to the last instruction of the block if the block 294 /// branches to itself. 295 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, 296 MachineBasicBlock &MBB) { 297 if (I == MBB.begin()) { 298 if (MBB.isPredecessor(&MBB)) { 299 I = --MBB.end(); 300 return true; 301 } else 302 return false; 303 } 304 --I; 305 return true; 306 } 307 308 MachineBasicBlock::iterator 309 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, 310 MachineBasicBlock &MBB) { 311 int InstrDistance = 1; 312 MachineBasicBlock::iterator CurInst; 313 static const int INSTR_DISTANCE_THRESHOLD = 5; 314 315 CurInst = I; 316 bool Found; 317 Found = getPreviousInstr(CurInst, MBB); 318 while (Found && I != CurInst) { 319 if (CurInst->isCall() || CurInst->isInlineAsm()) 320 break; 321 if (InstrDistance > INSTR_DISTANCE_THRESHOLD) 322 break; // too far back to make a difference 323 if (usesRegister(p, CurInst) == RU_Write) { 324 return CurInst; 325 } 326 InstrDistance += TSM.computeInstrLatency(&*CurInst); 327 Found = getPreviousInstr(CurInst, MBB); 328 } 329 return MachineBasicBlock::iterator(); 330 } 331 332 static inline bool isInefficientLEAReg(unsigned Reg) { 333 return Reg == X86::EBP || Reg == X86::RBP || 334 Reg == X86::R13D || Reg == X86::R13; 335 } 336 337 /// Returns true if this LEA uses base an index registers, and the base register 338 /// is known to be inefficient for the subtarget. 339 // TODO: use a variant scheduling class to model the latency profile 340 // of LEA instructions, and implement this logic as a scheduling predicate. 341 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, 342 const MachineOperand &Index) { 343 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && 344 Index.getReg() != X86::NoRegister; 345 } 346 347 static inline bool hasLEAOffset(const MachineOperand &Offset) { 348 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); 349 } 350 351 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { 352 switch (LEAOpcode) { 353 default: 354 llvm_unreachable("Unexpected LEA instruction"); 355 case X86::LEA32r: 356 case X86::LEA64_32r: 357 return X86::ADD32rr; 358 case X86::LEA64r: 359 return X86::ADD64rr; 360 } 361 } 362 363 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) { 364 switch (LEAOpcode) { 365 default: 366 llvm_unreachable("Unexpected LEA instruction"); 367 case X86::LEA32r: 368 case X86::LEA64_32r: 369 return X86::SUB32rr; 370 case X86::LEA64r: 371 return X86::SUB64rr; 372 } 373 } 374 375 static inline unsigned getADDriFromLEA(unsigned LEAOpcode, 376 const MachineOperand &Offset) { 377 bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); 378 switch (LEAOpcode) { 379 default: 380 llvm_unreachable("Unexpected LEA instruction"); 381 case X86::LEA32r: 382 case X86::LEA64_32r: 383 return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; 384 case X86::LEA64r: 385 return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; 386 } 387 } 388 389 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { 390 switch (LEAOpcode) { 391 default: 392 llvm_unreachable("Unexpected LEA instruction"); 393 case X86::LEA32r: 394 case X86::LEA64_32r: 395 return IsINC ? X86::INC32r : X86::DEC32r; 396 case X86::LEA64r: 397 return IsINC ? X86::INC64r : X86::DEC64r; 398 } 399 } 400 401 MachineBasicBlock::iterator 402 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I, 403 MachineBasicBlock &MBB) const { 404 const int InstrDistanceThreshold = 5; 405 int InstrDistance = 1; 406 MachineBasicBlock::iterator CurInst = std::next(I); 407 408 unsigned LEAOpcode = I->getOpcode(); 409 unsigned AddOpcode = getADDrrFromLEA(LEAOpcode); 410 unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode); 411 Register DestReg = I->getOperand(0).getReg(); 412 413 while (CurInst != MBB.end()) { 414 if (CurInst->isCall() || CurInst->isInlineAsm()) 415 break; 416 if (InstrDistance > InstrDistanceThreshold) 417 break; 418 419 // Check if the lea dest register is used in an add/sub instruction only. 420 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { 421 MachineOperand &Opnd = CurInst->getOperand(I); 422 if (Opnd.isReg()) { 423 if (Opnd.getReg() == DestReg) { 424 if (Opnd.isDef() || !Opnd.isKill()) 425 return MachineBasicBlock::iterator(); 426 427 unsigned AluOpcode = CurInst->getOpcode(); 428 if (AluOpcode != AddOpcode && AluOpcode != SubOpcode) 429 return MachineBasicBlock::iterator(); 430 431 MachineOperand &Opnd2 = CurInst->getOperand(3 - I); 432 MachineOperand AluDest = CurInst->getOperand(0); 433 if (Opnd2.getReg() != AluDest.getReg()) 434 return MachineBasicBlock::iterator(); 435 436 // X - (Y + Z) may generate different flags than (X - Y) - Z when 437 // there is overflow. So we can't change the alu instruction if the 438 // flags register is live. 439 if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI)) 440 return MachineBasicBlock::iterator(); 441 442 return CurInst; 443 } 444 if (TRI->regsOverlap(DestReg, Opnd.getReg())) 445 return MachineBasicBlock::iterator(); 446 } 447 } 448 449 InstrDistance++; 450 ++CurInst; 451 } 452 return MachineBasicBlock::iterator(); 453 } 454 455 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI, 456 MachineBasicBlock::iterator &AluI, 457 bool &BaseIndexDef, bool &AluDestRef, 458 MachineOperand **KilledBase, 459 MachineOperand **KilledIndex) const { 460 BaseIndexDef = AluDestRef = false; 461 *KilledBase = *KilledIndex = nullptr; 462 Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg(); 463 Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg(); 464 Register AluDestReg = AluI->getOperand(0).getReg(); 465 466 MachineBasicBlock::iterator CurInst = std::next(LeaI); 467 while (CurInst != AluI) { 468 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { 469 MachineOperand &Opnd = CurInst->getOperand(I); 470 if (!Opnd.isReg()) 471 continue; 472 Register Reg = Opnd.getReg(); 473 if (TRI->regsOverlap(Reg, AluDestReg)) 474 AluDestRef = true; 475 if (TRI->regsOverlap(Reg, BaseReg)) { 476 if (Opnd.isDef()) 477 BaseIndexDef = true; 478 else if (Opnd.isKill()) 479 *KilledBase = &Opnd; 480 } 481 if (TRI->regsOverlap(Reg, IndexReg)) { 482 if (Opnd.isDef()) 483 BaseIndexDef = true; 484 else if (Opnd.isKill()) 485 *KilledIndex = &Opnd; 486 } 487 } 488 ++CurInst; 489 } 490 } 491 492 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I, 493 MachineBasicBlock &MBB) const { 494 // Look for an add/sub instruction which uses the result of lea. 495 MachineBasicBlock::iterator AluI = searchALUInst(I, MBB); 496 if (AluI == MachineBasicBlock::iterator()) 497 return false; 498 499 // Check if there are any related register usage between lea and alu. 500 bool BaseIndexDef, AluDestRef; 501 MachineOperand *KilledBase, *KilledIndex; 502 checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex); 503 504 MachineBasicBlock::iterator InsertPos = AluI; 505 if (BaseIndexDef) { 506 if (AluDestRef) 507 return false; 508 InsertPos = I; 509 KilledBase = KilledIndex = nullptr; 510 } 511 512 // Check if there are same registers. 513 Register AluDestReg = AluI->getOperand(0).getReg(); 514 Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg(); 515 Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg(); 516 if (I->getOpcode() == X86::LEA64_32r) { 517 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 518 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 519 } 520 if (AluDestReg == IndexReg) { 521 if (BaseReg == IndexReg) 522 return false; 523 std::swap(BaseReg, IndexReg); 524 std::swap(KilledBase, KilledIndex); 525 } 526 if (BaseReg == IndexReg) 527 KilledBase = nullptr; 528 529 // Now it's safe to change instructions. 530 MachineInstr *NewMI1, *NewMI2; 531 unsigned NewOpcode = AluI->getOpcode(); 532 NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 533 AluDestReg) 534 .addReg(AluDestReg, RegState::Kill) 535 .addReg(BaseReg, KilledBase ? RegState::Kill : 0); 536 NewMI1->addRegisterDead(X86::EFLAGS, TRI); 537 NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), 538 AluDestReg) 539 .addReg(AluDestReg, RegState::Kill) 540 .addReg(IndexReg, KilledIndex ? RegState::Kill : 0); 541 NewMI2->addRegisterDead(X86::EFLAGS, TRI); 542 543 // Clear the old Kill flags. 544 if (KilledBase) 545 KilledBase->setIsKill(false); 546 if (KilledIndex) 547 KilledIndex->setIsKill(false); 548 549 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1); 550 MBB.erase(I); 551 MBB.erase(AluI); 552 I = NewMI1; 553 return true; 554 } 555 556 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, 557 MachineBasicBlock &MBB, bool OptIncDec, 558 bool UseLEAForSP) const { 559 MachineInstr &MI = *I; 560 561 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 562 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 563 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 564 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); 565 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 566 567 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || 568 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) != 569 MachineBasicBlock::LQR_Dead) 570 return false; 571 572 Register DestReg = MI.getOperand(0).getReg(); 573 Register BaseReg = Base.getReg(); 574 Register IndexReg = Index.getReg(); 575 576 // Don't change stack adjustment LEAs. 577 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) 578 return false; 579 580 // LEA64_32 has 64-bit operands but 32-bit result. 581 if (MI.getOpcode() == X86::LEA64_32r) { 582 if (BaseReg != 0) 583 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 584 if (IndexReg != 0) 585 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 586 } 587 588 MachineInstr *NewMI = nullptr; 589 590 // Case 1. 591 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 592 // which can be turned into add %reg2, %reg1 593 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && 594 (DestReg == BaseReg || DestReg == IndexReg)) { 595 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); 596 if (DestReg != BaseReg) 597 std::swap(BaseReg, IndexReg); 598 599 if (MI.getOpcode() == X86::LEA64_32r) { 600 // TODO: Do we need the super register implicit use? 601 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 602 .addReg(BaseReg).addReg(IndexReg) 603 .addReg(Base.getReg(), RegState::Implicit) 604 .addReg(Index.getReg(), RegState::Implicit); 605 } else { 606 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 607 .addReg(BaseReg).addReg(IndexReg); 608 } 609 } else if (DestReg == BaseReg && IndexReg == 0) { 610 // Case 2. 611 // This is an LEA with only a base register and a displacement, 612 // We can use ADDri or INC/DEC. 613 614 // Does this LEA have one these forms: 615 // lea %reg, 1(%reg) 616 // lea %reg, -1(%reg) 617 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { 618 bool IsINC = Disp.getImm() == 1; 619 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); 620 621 if (MI.getOpcode() == X86::LEA64_32r) { 622 // TODO: Do we need the super register implicit use? 623 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 624 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); 625 } else { 626 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 627 .addReg(BaseReg); 628 } 629 } else { 630 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); 631 if (MI.getOpcode() == X86::LEA64_32r) { 632 // TODO: Do we need the super register implicit use? 633 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 634 .addReg(BaseReg).addImm(Disp.getImm()) 635 .addReg(Base.getReg(), RegState::Implicit); 636 } else { 637 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 638 .addReg(BaseReg).addImm(Disp.getImm()); 639 } 640 } 641 } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) { 642 // Case 3. 643 // Look for and transform the sequence 644 // lea (reg1, reg2), reg3 645 // sub reg3, reg4 646 return optLEAALU(I, MBB); 647 } else 648 return false; 649 650 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 651 MBB.erase(I); 652 I = NewMI; 653 return true; 654 } 655 656 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, 657 MachineBasicBlock &MBB) { 658 // Process a load, store, or LEA instruction. 659 MachineInstr &MI = *I; 660 const MCInstrDesc &Desc = MI.getDesc(); 661 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); 662 if (AddrOffset >= 0) { 663 AddrOffset += X86II::getOperandBias(Desc); 664 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); 665 if (p.isReg() && p.getReg() != X86::ESP) { 666 seekLEAFixup(p, I, MBB); 667 } 668 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); 669 if (q.isReg() && q.getReg() != X86::ESP) { 670 seekLEAFixup(q, I, MBB); 671 } 672 } 673 } 674 675 void FixupLEAPass::seekLEAFixup(MachineOperand &p, 676 MachineBasicBlock::iterator &I, 677 MachineBasicBlock &MBB) { 678 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); 679 if (MBI != MachineBasicBlock::iterator()) { 680 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); 681 if (NewMI) { 682 ++NumLEAs; 683 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); 684 // now to replace with an equivalent LEA... 685 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); 686 MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1); 687 MBB.erase(MBI); 688 MachineBasicBlock::iterator J = 689 static_cast<MachineBasicBlock::iterator>(NewMI); 690 processInstruction(J, MBB); 691 } 692 } 693 } 694 695 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 696 MachineBasicBlock &MBB) { 697 MachineInstr &MI = *I; 698 const unsigned Opcode = MI.getOpcode(); 699 700 const MachineOperand &Dst = MI.getOperand(0); 701 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 702 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 703 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 704 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 705 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 706 707 if (Segment.getReg() != 0 || !Offset.isImm() || 708 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 709 MachineBasicBlock::LQR_Dead) 710 return; 711 const Register DstR = Dst.getReg(); 712 const Register SrcR1 = Base.getReg(); 713 const Register SrcR2 = Index.getReg(); 714 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) 715 return; 716 if (Scale.getImm() > 1) 717 return; 718 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); 719 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 720 MachineInstr *NewMI = nullptr; 721 // Make ADD instruction for two registers writing to LEA's destination 722 if (SrcR1 != 0 && SrcR2 != 0) { 723 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); 724 const MachineOperand &Src = SrcR1 == DstR ? Index : Base; 725 NewMI = 726 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); 727 LLVM_DEBUG(NewMI->dump();); 728 } 729 // Make ADD instruction for immediate 730 if (Offset.getImm() != 0) { 731 const MCInstrDesc &ADDri = 732 TII->get(getADDriFromLEA(Opcode, Offset)); 733 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; 734 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) 735 .add(SrcR) 736 .addImm(Offset.getImm()); 737 LLVM_DEBUG(NewMI->dump();); 738 } 739 if (NewMI) { 740 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 741 MBB.erase(I); 742 I = NewMI; 743 } 744 } 745 746 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 747 MachineBasicBlock &MBB, 748 bool OptIncDec) { 749 MachineInstr &MI = *I; 750 const unsigned LEAOpcode = MI.getOpcode(); 751 752 const MachineOperand &Dest = MI.getOperand(0); 753 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 754 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 755 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 756 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 757 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 758 759 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || 760 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != 761 MachineBasicBlock::LQR_Dead || 762 Segment.getReg() != X86::NoRegister) 763 return; 764 765 Register DestReg = Dest.getReg(); 766 Register BaseReg = Base.getReg(); 767 Register IndexReg = Index.getReg(); 768 769 if (MI.getOpcode() == X86::LEA64_32r) { 770 if (BaseReg != 0) 771 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 772 if (IndexReg != 0) 773 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 774 } 775 776 bool IsScale1 = Scale.getImm() == 1; 777 bool IsInefficientBase = isInefficientLEAReg(BaseReg); 778 bool IsInefficientIndex = isInefficientLEAReg(IndexReg); 779 780 // Skip these cases since it takes more than 2 instructions 781 // to replace the LEA instruction. 782 if (IsInefficientBase && DestReg == BaseReg && !IsScale1) 783 return; 784 785 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); 786 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 787 788 MachineInstr *NewMI = nullptr; 789 790 // First try to replace LEA with one or two (for the 3-op LEA case) 791 // add instructions: 792 // 1.lea (%base,%index,1), %base => add %index,%base 793 // 2.lea (%base,%index,1), %index => add %base,%index 794 if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { 795 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 796 if (DestReg != BaseReg) 797 std::swap(BaseReg, IndexReg); 798 799 if (MI.getOpcode() == X86::LEA64_32r) { 800 // TODO: Do we need the super register implicit use? 801 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 802 .addReg(BaseReg) 803 .addReg(IndexReg) 804 .addReg(Base.getReg(), RegState::Implicit) 805 .addReg(Index.getReg(), RegState::Implicit); 806 } else { 807 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 808 .addReg(BaseReg) 809 .addReg(IndexReg); 810 } 811 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { 812 // If the base is inefficient try switching the index and base operands, 813 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: 814 // lea offset(%base,%index,scale),%dst => 815 // lea (%base,%index,scale); add offset,%dst 816 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 817 .add(Dest) 818 .add(IsInefficientBase ? Index : Base) 819 .add(Scale) 820 .add(IsInefficientBase ? Base : Index) 821 .addImm(0) 822 .add(Segment); 823 LLVM_DEBUG(NewMI->dump();); 824 } 825 826 // If either replacement succeeded above, add the offset if needed, then 827 // replace the instruction. 828 if (NewMI) { 829 // Create ADD instruction for the Offset in case of 3-Ops LEA. 830 if (hasLEAOffset(Offset)) { 831 if (OptIncDec && Offset.isImm() && 832 (Offset.getImm() == 1 || Offset.getImm() == -1)) { 833 unsigned NewOpc = 834 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); 835 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 836 .addReg(DestReg); 837 LLVM_DEBUG(NewMI->dump();); 838 } else { 839 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); 840 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 841 .addReg(DestReg) 842 .add(Offset); 843 LLVM_DEBUG(NewMI->dump();); 844 } 845 } 846 847 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 848 MBB.erase(I); 849 I = NewMI; 850 return; 851 } 852 853 // Handle the rest of the cases with inefficient base register: 854 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); 855 assert(IsInefficientBase && "efficient base should be handled already!"); 856 857 // FIXME: Handle LEA64_32r. 858 if (LEAOpcode == X86::LEA64_32r) 859 return; 860 861 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst 862 if (IsScale1 && !hasLEAOffset(Offset)) { 863 bool BIK = Base.isKill() && BaseReg != IndexReg; 864 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); 865 LLVM_DEBUG(MI.getPrevNode()->dump();); 866 867 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 868 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 869 .addReg(DestReg) 870 .add(Index); 871 LLVM_DEBUG(NewMI->dump();); 872 873 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 874 MBB.erase(I); 875 I = NewMI; 876 return; 877 } 878 879 // lea offset(%base,%index,scale), %dst => 880 // lea offset( ,%index,scale), %dst; add %base,%dst 881 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 882 .add(Dest) 883 .addReg(0) 884 .add(Scale) 885 .add(Index) 886 .add(Offset) 887 .add(Segment); 888 LLVM_DEBUG(NewMI->dump();); 889 890 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 891 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 892 .addReg(DestReg) 893 .add(Base); 894 LLVM_DEBUG(NewMI->dump();); 895 896 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); 897 MBB.erase(I); 898 I = NewMI; 899 } 900