10b57cec5SDimitry Andric //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric /// The pass tries to use the 32-bit encoding for instructions when possible. 80b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 90b57cec5SDimitry Andric // 100b57cec5SDimitry Andric 110b57cec5SDimitry Andric #include "AMDGPU.h" 12e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 130b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14*bdd1243dSDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 150b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 160b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric #define DEBUG_TYPE "si-shrink-instructions" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric STATISTIC(NumInstructionsShrunk, 210b57cec5SDimitry Andric "Number of 64-bit instruction reduced to 32-bit."); 220b57cec5SDimitry Andric STATISTIC(NumLiteralConstantsFolded, 230b57cec5SDimitry Andric "Number of literal constants folded into 32-bit instructions."); 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric using namespace llvm; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric namespace { 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric class SIShrinkInstructions : public MachineFunctionPass { 30*bdd1243dSDimitry Andric MachineFunction *MF; 3181ad6265SDimitry Andric MachineRegisterInfo *MRI; 3281ad6265SDimitry Andric const GCNSubtarget *ST; 3381ad6265SDimitry Andric const SIInstrInfo *TII; 3481ad6265SDimitry Andric const SIRegisterInfo *TRI; 3581ad6265SDimitry Andric 360b57cec5SDimitry Andric public: 370b57cec5SDimitry Andric static char ID; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric public: 400b57cec5SDimitry Andric SIShrinkInstructions() : MachineFunctionPass(ID) { 410b57cec5SDimitry Andric } 420b57cec5SDimitry Andric 4381ad6265SDimitry Andric bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const; 44*bdd1243dSDimitry Andric bool shouldShrinkTrue16(MachineInstr &MI) const; 4581ad6265SDimitry Andric bool isKImmOperand(const MachineOperand &Src) const; 4681ad6265SDimitry Andric bool isKUImmOperand(const MachineOperand &Src) const; 4781ad6265SDimitry Andric bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const; 4881ad6265SDimitry Andric bool isReverseInlineImm(const MachineOperand &Src, int32_t &ReverseImm) const; 4981ad6265SDimitry Andric void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const; 5081ad6265SDimitry Andric void shrinkScalarCompare(MachineInstr &MI) const; 5181ad6265SDimitry Andric void shrinkMIMG(MachineInstr &MI) const; 5281ad6265SDimitry Andric void shrinkMadFma(MachineInstr &MI) const; 5381ad6265SDimitry Andric bool shrinkScalarLogicOp(MachineInstr &MI) const; 5481ad6265SDimitry Andric bool tryReplaceDeadSDST(MachineInstr &MI) const; 5581ad6265SDimitry Andric bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R, 5681ad6265SDimitry Andric Register Reg, unsigned SubReg) const; 5781ad6265SDimitry Andric bool instReadsReg(const MachineInstr *MI, unsigned Reg, 5881ad6265SDimitry Andric unsigned SubReg) const; 5981ad6265SDimitry Andric bool instModifiesReg(const MachineInstr *MI, unsigned Reg, 6081ad6265SDimitry Andric unsigned SubReg) const; 6181ad6265SDimitry Andric TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub, 6281ad6265SDimitry Andric unsigned I) const; 6381ad6265SDimitry Andric void dropInstructionKeepingImpDefs(MachineInstr &MI) const; 6481ad6265SDimitry Andric MachineInstr *matchSwap(MachineInstr &MovT) const; 6581ad6265SDimitry Andric 660b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric StringRef getPassName() const override { return "SI Shrink Instructions"; } 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 710b57cec5SDimitry Andric AU.setPreservesCFG(); 720b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric }; 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric } // End anonymous namespace. 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE, 790b57cec5SDimitry Andric "SI Shrink Instructions", false, false) 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric char SIShrinkInstructions::ID = 0; 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric FunctionPass *llvm::createSIShrinkInstructionsPass() { 840b57cec5SDimitry Andric return new SIShrinkInstructions(); 850b57cec5SDimitry Andric } 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric /// This function checks \p MI for operands defined by a move immediate 880b57cec5SDimitry Andric /// instruction and then folds the literal constant into the instruction if it 890b57cec5SDimitry Andric /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions. 9081ad6265SDimitry Andric bool SIShrinkInstructions::foldImmediates(MachineInstr &MI, 9181ad6265SDimitry Andric bool TryToCommute) const { 920b57cec5SDimitry Andric assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI)); 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric // Try to fold Src0 970b57cec5SDimitry Andric MachineOperand &Src0 = MI.getOperand(Src0Idx); 980b57cec5SDimitry Andric if (Src0.isReg()) { 998bcb0991SDimitry Andric Register Reg = Src0.getReg(); 10081ad6265SDimitry Andric if (Reg.isVirtual()) { 10181ad6265SDimitry Andric MachineInstr *Def = MRI->getUniqueVRegDef(Reg); 1020b57cec5SDimitry Andric if (Def && Def->isMoveImmediate()) { 1030b57cec5SDimitry Andric MachineOperand &MovSrc = Def->getOperand(1); 1040b57cec5SDimitry Andric bool ConstantFolded = false; 1050b57cec5SDimitry Andric 106d409305fSDimitry Andric if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) { 107d409305fSDimitry Andric if (MovSrc.isImm() && 108d409305fSDimitry Andric (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) { 1090b57cec5SDimitry Andric Src0.ChangeToImmediate(MovSrc.getImm()); 1100b57cec5SDimitry Andric ConstantFolded = true; 1110b57cec5SDimitry Andric } else if (MovSrc.isFI()) { 1120b57cec5SDimitry Andric Src0.ChangeToFrameIndex(MovSrc.getIndex()); 1130b57cec5SDimitry Andric ConstantFolded = true; 1140b57cec5SDimitry Andric } else if (MovSrc.isGlobal()) { 1150b57cec5SDimitry Andric Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(), 1160b57cec5SDimitry Andric MovSrc.getTargetFlags()); 1170b57cec5SDimitry Andric ConstantFolded = true; 1180b57cec5SDimitry Andric } 119d409305fSDimitry Andric } 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric if (ConstantFolded) { 12281ad6265SDimitry Andric if (MRI->use_nodbg_empty(Reg)) 1230b57cec5SDimitry Andric Def->eraseFromParent(); 1240b57cec5SDimitry Andric ++NumLiteralConstantsFolded; 1250b57cec5SDimitry Andric return true; 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric } 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric // We have failed to fold src0, so commute the instruction and try again. 1320b57cec5SDimitry Andric if (TryToCommute && MI.isCommutable()) { 1330b57cec5SDimitry Andric if (TII->commuteInstruction(MI)) { 13481ad6265SDimitry Andric if (foldImmediates(MI, false)) 1350b57cec5SDimitry Andric return true; 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric // Commute back. 1380b57cec5SDimitry Andric TII->commuteInstruction(MI); 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric return false; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 145*bdd1243dSDimitry Andric /// Do not shrink the instruction if its registers are not expressible in the 146*bdd1243dSDimitry Andric /// shrunk encoding. 147*bdd1243dSDimitry Andric bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const { 148*bdd1243dSDimitry Andric for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { 149*bdd1243dSDimitry Andric const MachineOperand &MO = MI.getOperand(I); 150*bdd1243dSDimitry Andric if (MO.isReg()) { 151*bdd1243dSDimitry Andric Register Reg = MO.getReg(); 152*bdd1243dSDimitry Andric assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink " 153*bdd1243dSDimitry Andric "True16 Instructions post-RA"); 154*bdd1243dSDimitry Andric if (AMDGPU::VGPR_32RegClass.contains(Reg) && 155*bdd1243dSDimitry Andric !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg)) 156*bdd1243dSDimitry Andric return false; 157*bdd1243dSDimitry Andric } 158*bdd1243dSDimitry Andric } 159*bdd1243dSDimitry Andric return true; 160*bdd1243dSDimitry Andric } 161*bdd1243dSDimitry Andric 16281ad6265SDimitry Andric bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const { 1630b57cec5SDimitry Andric return isInt<16>(Src.getImm()) && 1640b57cec5SDimitry Andric !TII->isInlineConstant(*Src.getParent(), 1650b57cec5SDimitry Andric Src.getParent()->getOperandNo(&Src)); 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 16881ad6265SDimitry Andric bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const { 1690b57cec5SDimitry Andric return isUInt<16>(Src.getImm()) && 1700b57cec5SDimitry Andric !TII->isInlineConstant(*Src.getParent(), 1710b57cec5SDimitry Andric Src.getParent()->getOperandNo(&Src)); 1720b57cec5SDimitry Andric } 1730b57cec5SDimitry Andric 17481ad6265SDimitry Andric bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src, 17581ad6265SDimitry Andric bool &IsUnsigned) const { 1760b57cec5SDimitry Andric if (isInt<16>(Src.getImm())) { 1770b57cec5SDimitry Andric IsUnsigned = false; 1780b57cec5SDimitry Andric return !TII->isInlineConstant(Src); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric if (isUInt<16>(Src.getImm())) { 1820b57cec5SDimitry Andric IsUnsigned = true; 1830b57cec5SDimitry Andric return !TII->isInlineConstant(Src); 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric return false; 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric /// \returns true if the constant in \p Src should be replaced with a bitreverse 1900b57cec5SDimitry Andric /// of an inline immediate. 19181ad6265SDimitry Andric bool SIShrinkInstructions::isReverseInlineImm(const MachineOperand &Src, 19281ad6265SDimitry Andric int32_t &ReverseImm) const { 1930b57cec5SDimitry Andric if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src)) 1940b57cec5SDimitry Andric return false; 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm())); 1970b57cec5SDimitry Andric return ReverseImm >= -16 && ReverseImm <= 64; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric /// Copy implicit register operands from specified instruction to this 2010b57cec5SDimitry Andric /// instruction that are not part of the instruction definition. 20281ad6265SDimitry Andric void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI, 20381ad6265SDimitry Andric MachineInstr &MI) const { 20481ad6265SDimitry Andric MachineFunction &MF = *MI.getMF(); 2050b57cec5SDimitry Andric for (unsigned i = MI.getDesc().getNumOperands() + 206*bdd1243dSDimitry Andric MI.getDesc().implicit_uses().size() + 207*bdd1243dSDimitry Andric MI.getDesc().implicit_defs().size(), 208*bdd1243dSDimitry Andric e = MI.getNumOperands(); 2090b57cec5SDimitry Andric i != e; ++i) { 2100b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(i); 2110b57cec5SDimitry Andric if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask()) 2120b57cec5SDimitry Andric NewMI.addOperand(MF, MO); 2130b57cec5SDimitry Andric } 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric 21681ad6265SDimitry Andric void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const { 2170b57cec5SDimitry Andric // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to 2180b57cec5SDimitry Andric // get constants on the RHS. 2190b57cec5SDimitry Andric if (!MI.getOperand(0).isReg()) 2200b57cec5SDimitry Andric TII->commuteInstruction(MI, false, 0, 1); 2210b57cec5SDimitry Andric 2225ffd83dbSDimitry Andric // cmpk requires src0 to be a register 2235ffd83dbSDimitry Andric const MachineOperand &Src0 = MI.getOperand(0); 2245ffd83dbSDimitry Andric if (!Src0.isReg()) 2255ffd83dbSDimitry Andric return; 2265ffd83dbSDimitry Andric 2270b57cec5SDimitry Andric const MachineOperand &Src1 = MI.getOperand(1); 2280b57cec5SDimitry Andric if (!Src1.isImm()) 2290b57cec5SDimitry Andric return; 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode()); 2320b57cec5SDimitry Andric if (SOPKOpc == -1) 2330b57cec5SDimitry Andric return; 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric // eq/ne is special because the imm16 can be treated as signed or unsigned, 236349cc55cSDimitry Andric // and initially selected to the unsigned versions. 2370b57cec5SDimitry Andric if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) { 2380b57cec5SDimitry Andric bool HasUImm; 23981ad6265SDimitry Andric if (isKImmOrKUImmOperand(Src1, HasUImm)) { 2400b57cec5SDimitry Andric if (!HasUImm) { 2410b57cec5SDimitry Andric SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ? 2420b57cec5SDimitry Andric AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32; 2430b57cec5SDimitry Andric } 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric MI.setDesc(TII->get(SOPKOpc)); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric return; 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric const MCInstrDesc &NewDesc = TII->get(SOPKOpc); 2520b57cec5SDimitry Andric 25381ad6265SDimitry Andric if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) || 25481ad6265SDimitry Andric (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) { 2550b57cec5SDimitry Andric MI.setDesc(NewDesc); 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric // Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding. 26081ad6265SDimitry Andric void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const { 2610b57cec5SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 26281ad6265SDimitry Andric if (!Info) 2630b57cec5SDimitry Andric return; 2640b57cec5SDimitry Andric 26581ad6265SDimitry Andric uint8_t NewEncoding; 26681ad6265SDimitry Andric switch (Info->MIMGEncoding) { 26781ad6265SDimitry Andric case AMDGPU::MIMGEncGfx10NSA: 26881ad6265SDimitry Andric NewEncoding = AMDGPU::MIMGEncGfx10Default; 26981ad6265SDimitry Andric break; 27081ad6265SDimitry Andric case AMDGPU::MIMGEncGfx11NSA: 27181ad6265SDimitry Andric NewEncoding = AMDGPU::MIMGEncGfx11Default; 27281ad6265SDimitry Andric break; 27381ad6265SDimitry Andric default: 27481ad6265SDimitry Andric return; 27581ad6265SDimitry Andric } 27681ad6265SDimitry Andric 2770b57cec5SDimitry Andric int VAddr0Idx = 2780b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); 2790b57cec5SDimitry Andric unsigned NewAddrDwords = Info->VAddrDwords; 2800b57cec5SDimitry Andric const TargetRegisterClass *RC; 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric if (Info->VAddrDwords == 2) { 2830b57cec5SDimitry Andric RC = &AMDGPU::VReg_64RegClass; 2840b57cec5SDimitry Andric } else if (Info->VAddrDwords == 3) { 2850b57cec5SDimitry Andric RC = &AMDGPU::VReg_96RegClass; 2860b57cec5SDimitry Andric } else if (Info->VAddrDwords == 4) { 2870b57cec5SDimitry Andric RC = &AMDGPU::VReg_128RegClass; 288fe6060f1SDimitry Andric } else if (Info->VAddrDwords == 5) { 289fe6060f1SDimitry Andric RC = &AMDGPU::VReg_160RegClass; 290fe6060f1SDimitry Andric } else if (Info->VAddrDwords == 6) { 291fe6060f1SDimitry Andric RC = &AMDGPU::VReg_192RegClass; 292fe6060f1SDimitry Andric } else if (Info->VAddrDwords == 7) { 293fe6060f1SDimitry Andric RC = &AMDGPU::VReg_224RegClass; 294fe6060f1SDimitry Andric } else if (Info->VAddrDwords == 8) { 2950b57cec5SDimitry Andric RC = &AMDGPU::VReg_256RegClass; 296*bdd1243dSDimitry Andric } else if (Info->VAddrDwords == 9) { 297*bdd1243dSDimitry Andric RC = &AMDGPU::VReg_288RegClass; 298*bdd1243dSDimitry Andric } else if (Info->VAddrDwords == 10) { 299*bdd1243dSDimitry Andric RC = &AMDGPU::VReg_320RegClass; 300*bdd1243dSDimitry Andric } else if (Info->VAddrDwords == 11) { 301*bdd1243dSDimitry Andric RC = &AMDGPU::VReg_352RegClass; 302*bdd1243dSDimitry Andric } else if (Info->VAddrDwords == 12) { 303*bdd1243dSDimitry Andric RC = &AMDGPU::VReg_384RegClass; 3040b57cec5SDimitry Andric } else { 3050b57cec5SDimitry Andric RC = &AMDGPU::VReg_512RegClass; 3060b57cec5SDimitry Andric NewAddrDwords = 16; 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric unsigned VgprBase = 0; 31081ad6265SDimitry Andric unsigned NextVgpr = 0; 3110b57cec5SDimitry Andric bool IsUndef = true; 3120b57cec5SDimitry Andric bool IsKill = NewAddrDwords == Info->VAddrDwords; 31381ad6265SDimitry Andric for (unsigned Idx = 0; Idx < Info->VAddrOperands; ++Idx) { 31481ad6265SDimitry Andric const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx); 31581ad6265SDimitry Andric unsigned Vgpr = TRI->getHWRegIndex(Op.getReg()); 31681ad6265SDimitry Andric unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32; 31781ad6265SDimitry Andric assert(Dwords > 0 && "Un-implemented for less than 32 bit regs"); 3180b57cec5SDimitry Andric 31981ad6265SDimitry Andric if (Idx == 0) { 3200b57cec5SDimitry Andric VgprBase = Vgpr; 32181ad6265SDimitry Andric NextVgpr = Vgpr + Dwords; 32281ad6265SDimitry Andric } else if (Vgpr == NextVgpr) { 32381ad6265SDimitry Andric NextVgpr = Vgpr + Dwords; 32481ad6265SDimitry Andric } else { 3250b57cec5SDimitry Andric return; 32681ad6265SDimitry Andric } 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric if (!Op.isUndef()) 3290b57cec5SDimitry Andric IsUndef = false; 3300b57cec5SDimitry Andric if (!Op.isKill()) 3310b57cec5SDimitry Andric IsKill = false; 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric if (VgprBase + NewAddrDwords > 256) 3350b57cec5SDimitry Andric return; 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric // Further check for implicit tied operands - this may be present if TFE is 3380b57cec5SDimitry Andric // enabled 3390b57cec5SDimitry Andric int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe); 3400b57cec5SDimitry Andric int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe); 341e8d8bef9SDimitry Andric unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm(); 342e8d8bef9SDimitry Andric unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm(); 3430b57cec5SDimitry Andric int ToUntie = -1; 3440b57cec5SDimitry Andric if (TFEVal || LWEVal) { 3450b57cec5SDimitry Andric // TFE/LWE is enabled so we need to deal with an implicit tied operand 3460b57cec5SDimitry Andric for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) { 3470b57cec5SDimitry Andric if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() && 3480b57cec5SDimitry Andric MI.getOperand(i).isImplicit()) { 3490b57cec5SDimitry Andric // This is the tied operand 3500b57cec5SDimitry Andric assert( 3510b57cec5SDimitry Andric ToUntie == -1 && 3520b57cec5SDimitry Andric "found more than one tied implicit operand when expecting only 1"); 3530b57cec5SDimitry Andric ToUntie = i; 3540b57cec5SDimitry Andric MI.untieRegOperand(ToUntie); 3550b57cec5SDimitry Andric } 3560b57cec5SDimitry Andric } 3570b57cec5SDimitry Andric } 3580b57cec5SDimitry Andric 35981ad6265SDimitry Andric unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding, 3600b57cec5SDimitry Andric Info->VDataDwords, NewAddrDwords); 3610b57cec5SDimitry Andric MI.setDesc(TII->get(NewOpcode)); 3620b57cec5SDimitry Andric MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase)); 3630b57cec5SDimitry Andric MI.getOperand(VAddr0Idx).setIsUndef(IsUndef); 3640b57cec5SDimitry Andric MI.getOperand(VAddr0Idx).setIsKill(IsKill); 3650b57cec5SDimitry Andric 36681ad6265SDimitry Andric for (int i = 1; i < Info->VAddrOperands; ++i) 36781ad6265SDimitry Andric MI.removeOperand(VAddr0Idx + 1); 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric if (ToUntie >= 0) { 3700b57cec5SDimitry Andric MI.tieOperands( 3710b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata), 37281ad6265SDimitry Andric ToUntie - (Info->VAddrOperands - 1)); 37381ad6265SDimitry Andric } 37481ad6265SDimitry Andric } 37581ad6265SDimitry Andric 37681ad6265SDimitry Andric // Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK. 37781ad6265SDimitry Andric void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { 378*bdd1243dSDimitry Andric // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so 379*bdd1243dSDimitry Andric // there is no reason to try to shrink them. 38081ad6265SDimitry Andric if (!ST->hasVOP3Literal()) 38181ad6265SDimitry Andric return; 38281ad6265SDimitry Andric 383*bdd1243dSDimitry Andric // There is no advantage to doing this pre-RA. 384*bdd1243dSDimitry Andric if (!MF->getProperties().hasProperty( 385*bdd1243dSDimitry Andric MachineFunctionProperties::Property::NoVRegs)) 386*bdd1243dSDimitry Andric return; 387*bdd1243dSDimitry Andric 38881ad6265SDimitry Andric if (TII->hasAnyModifiersSet(MI)) 38981ad6265SDimitry Andric return; 39081ad6265SDimitry Andric 39181ad6265SDimitry Andric const unsigned Opcode = MI.getOpcode(); 39281ad6265SDimitry Andric MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0); 39381ad6265SDimitry Andric MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1); 39481ad6265SDimitry Andric MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2); 39581ad6265SDimitry Andric unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END; 39681ad6265SDimitry Andric 39781ad6265SDimitry Andric bool Swap; 39881ad6265SDimitry Andric 39981ad6265SDimitry Andric // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form. 40081ad6265SDimitry Andric if (Src2.isImm() && !TII->isInlineConstant(Src2)) { 40181ad6265SDimitry Andric if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg())) 40281ad6265SDimitry Andric Swap = false; 40381ad6265SDimitry Andric else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg())) 40481ad6265SDimitry Andric Swap = true; 40581ad6265SDimitry Andric else 40681ad6265SDimitry Andric return; 40781ad6265SDimitry Andric 40881ad6265SDimitry Andric switch (Opcode) { 40981ad6265SDimitry Andric default: 41081ad6265SDimitry Andric llvm_unreachable("Unexpected mad/fma opcode!"); 41181ad6265SDimitry Andric case AMDGPU::V_MAD_F32_e64: 41281ad6265SDimitry Andric NewOpcode = AMDGPU::V_MADAK_F32; 41381ad6265SDimitry Andric break; 41481ad6265SDimitry Andric case AMDGPU::V_FMA_F32_e64: 41581ad6265SDimitry Andric NewOpcode = AMDGPU::V_FMAAK_F32; 41681ad6265SDimitry Andric break; 41781ad6265SDimitry Andric case AMDGPU::V_MAD_F16_e64: 41881ad6265SDimitry Andric NewOpcode = AMDGPU::V_MADAK_F16; 41981ad6265SDimitry Andric break; 42081ad6265SDimitry Andric case AMDGPU::V_FMA_F16_e64: 421*bdd1243dSDimitry Andric case AMDGPU::V_FMA_F16_gfx9_e64: 422*bdd1243dSDimitry Andric NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16 423*bdd1243dSDimitry Andric : AMDGPU::V_FMAAK_F16; 42481ad6265SDimitry Andric break; 42581ad6265SDimitry Andric } 42681ad6265SDimitry Andric } 42781ad6265SDimitry Andric 42881ad6265SDimitry Andric // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form. 42981ad6265SDimitry Andric if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) { 43081ad6265SDimitry Andric if (Src1.isImm() && !TII->isInlineConstant(Src1)) 43181ad6265SDimitry Andric Swap = false; 43281ad6265SDimitry Andric else if (Src0.isImm() && !TII->isInlineConstant(Src0)) 43381ad6265SDimitry Andric Swap = true; 43481ad6265SDimitry Andric else 43581ad6265SDimitry Andric return; 43681ad6265SDimitry Andric 43781ad6265SDimitry Andric switch (Opcode) { 43881ad6265SDimitry Andric default: 43981ad6265SDimitry Andric llvm_unreachable("Unexpected mad/fma opcode!"); 44081ad6265SDimitry Andric case AMDGPU::V_MAD_F32_e64: 44181ad6265SDimitry Andric NewOpcode = AMDGPU::V_MADMK_F32; 44281ad6265SDimitry Andric break; 44381ad6265SDimitry Andric case AMDGPU::V_FMA_F32_e64: 44481ad6265SDimitry Andric NewOpcode = AMDGPU::V_FMAMK_F32; 44581ad6265SDimitry Andric break; 44681ad6265SDimitry Andric case AMDGPU::V_MAD_F16_e64: 44781ad6265SDimitry Andric NewOpcode = AMDGPU::V_MADMK_F16; 44881ad6265SDimitry Andric break; 44981ad6265SDimitry Andric case AMDGPU::V_FMA_F16_e64: 450*bdd1243dSDimitry Andric case AMDGPU::V_FMA_F16_gfx9_e64: 451*bdd1243dSDimitry Andric NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16 452*bdd1243dSDimitry Andric : AMDGPU::V_FMAMK_F16; 45381ad6265SDimitry Andric break; 45481ad6265SDimitry Andric } 45581ad6265SDimitry Andric } 45681ad6265SDimitry Andric 45781ad6265SDimitry Andric if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) 45881ad6265SDimitry Andric return; 45981ad6265SDimitry Andric 460*bdd1243dSDimitry Andric if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI)) 461*bdd1243dSDimitry Andric return; 462*bdd1243dSDimitry Andric 46381ad6265SDimitry Andric if (Swap) { 46481ad6265SDimitry Andric // Swap Src0 and Src1 by building a new instruction. 46581ad6265SDimitry Andric BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode), 46681ad6265SDimitry Andric MI.getOperand(0).getReg()) 46781ad6265SDimitry Andric .add(Src1) 46881ad6265SDimitry Andric .add(Src0) 46981ad6265SDimitry Andric .add(Src2) 47081ad6265SDimitry Andric .setMIFlags(MI.getFlags()); 47181ad6265SDimitry Andric MI.eraseFromParent(); 47281ad6265SDimitry Andric } else { 47381ad6265SDimitry Andric TII->removeModOperands(MI); 47481ad6265SDimitry Andric MI.setDesc(TII->get(NewOpcode)); 4750b57cec5SDimitry Andric } 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric /// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals. 4790b57cec5SDimitry Andric /// For AND or OR, try using S_BITSET{0,1} to clear or set bits. 4800b57cec5SDimitry Andric /// If the inverse of the immediate is legal, use ANDN2, ORN2 or 4810b57cec5SDimitry Andric /// XNOR (as a ^ b == ~(a ^ ~b)). 4820b57cec5SDimitry Andric /// \returns true if the caller should continue the machine function iterator 48381ad6265SDimitry Andric bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const { 4840b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 4850b57cec5SDimitry Andric const MachineOperand *Dest = &MI.getOperand(0); 4860b57cec5SDimitry Andric MachineOperand *Src0 = &MI.getOperand(1); 4870b57cec5SDimitry Andric MachineOperand *Src1 = &MI.getOperand(2); 4880b57cec5SDimitry Andric MachineOperand *SrcReg = Src0; 4890b57cec5SDimitry Andric MachineOperand *SrcImm = Src1; 4900b57cec5SDimitry Andric 4915ffd83dbSDimitry Andric if (!SrcImm->isImm() || 49281ad6265SDimitry Andric AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm())) 4935ffd83dbSDimitry Andric return false; 4945ffd83dbSDimitry Andric 4950b57cec5SDimitry Andric uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm()); 4960b57cec5SDimitry Andric uint32_t NewImm = 0; 4970b57cec5SDimitry Andric 4980b57cec5SDimitry Andric if (Opc == AMDGPU::S_AND_B32) { 4990b57cec5SDimitry Andric if (isPowerOf2_32(~Imm)) { 5000b57cec5SDimitry Andric NewImm = countTrailingOnes(Imm); 5010b57cec5SDimitry Andric Opc = AMDGPU::S_BITSET0_B32; 50281ad6265SDimitry Andric } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) { 5030b57cec5SDimitry Andric NewImm = ~Imm; 5040b57cec5SDimitry Andric Opc = AMDGPU::S_ANDN2_B32; 5050b57cec5SDimitry Andric } 5060b57cec5SDimitry Andric } else if (Opc == AMDGPU::S_OR_B32) { 5070b57cec5SDimitry Andric if (isPowerOf2_32(Imm)) { 5080b57cec5SDimitry Andric NewImm = countTrailingZeros(Imm); 5090b57cec5SDimitry Andric Opc = AMDGPU::S_BITSET1_B32; 51081ad6265SDimitry Andric } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) { 5110b57cec5SDimitry Andric NewImm = ~Imm; 5120b57cec5SDimitry Andric Opc = AMDGPU::S_ORN2_B32; 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric } else if (Opc == AMDGPU::S_XOR_B32) { 51581ad6265SDimitry Andric if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) { 5160b57cec5SDimitry Andric NewImm = ~Imm; 5170b57cec5SDimitry Andric Opc = AMDGPU::S_XNOR_B32; 5180b57cec5SDimitry Andric } 5190b57cec5SDimitry Andric } else { 5200b57cec5SDimitry Andric llvm_unreachable("unexpected opcode"); 5210b57cec5SDimitry Andric } 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric if (NewImm != 0) { 524e8d8bef9SDimitry Andric if (Dest->getReg().isVirtual() && SrcReg->isReg()) { 52581ad6265SDimitry Andric MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg()); 52681ad6265SDimitry Andric MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg()); 5270b57cec5SDimitry Andric return true; 5280b57cec5SDimitry Andric } 5290b57cec5SDimitry Andric 5300b57cec5SDimitry Andric if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) { 531e8d8bef9SDimitry Andric const bool IsUndef = SrcReg->isUndef(); 532e8d8bef9SDimitry Andric const bool IsKill = SrcReg->isKill(); 5330b57cec5SDimitry Andric MI.setDesc(TII->get(Opc)); 5340b57cec5SDimitry Andric if (Opc == AMDGPU::S_BITSET0_B32 || 5350b57cec5SDimitry Andric Opc == AMDGPU::S_BITSET1_B32) { 5360b57cec5SDimitry Andric Src0->ChangeToImmediate(NewImm); 5370b57cec5SDimitry Andric // Remove the immediate and add the tied input. 538e8d8bef9SDimitry Andric MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false, 539e8d8bef9SDimitry Andric /*isImp*/ false, IsKill, 540e8d8bef9SDimitry Andric /*isDead*/ false, IsUndef); 5410b57cec5SDimitry Andric MI.tieOperands(0, 2); 5420b57cec5SDimitry Andric } else { 5430b57cec5SDimitry Andric SrcImm->setImm(NewImm); 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric } 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric return false; 5490b57cec5SDimitry Andric } 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric // This is the same as MachineInstr::readsRegister/modifiesRegister except 5520b57cec5SDimitry Andric // it takes subregs into account. 55381ad6265SDimitry Andric bool SIShrinkInstructions::instAccessReg( 55481ad6265SDimitry Andric iterator_range<MachineInstr::const_mop_iterator> &&R, Register Reg, 55581ad6265SDimitry Andric unsigned SubReg) const { 5560b57cec5SDimitry Andric for (const MachineOperand &MO : R) { 5570b57cec5SDimitry Andric if (!MO.isReg()) 5580b57cec5SDimitry Andric continue; 5590b57cec5SDimitry Andric 560e8d8bef9SDimitry Andric if (Reg.isPhysical() && MO.getReg().isPhysical()) { 56181ad6265SDimitry Andric if (TRI->regsOverlap(Reg, MO.getReg())) 5620b57cec5SDimitry Andric return true; 563e8d8bef9SDimitry Andric } else if (MO.getReg() == Reg && Reg.isVirtual()) { 56481ad6265SDimitry Andric LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) & 56581ad6265SDimitry Andric TRI->getSubRegIndexLaneMask(MO.getSubReg()); 5660b57cec5SDimitry Andric if (Overlap.any()) 5670b57cec5SDimitry Andric return true; 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric } 5700b57cec5SDimitry Andric return false; 5710b57cec5SDimitry Andric } 5720b57cec5SDimitry Andric 57381ad6265SDimitry Andric bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg, 57481ad6265SDimitry Andric unsigned SubReg) const { 57581ad6265SDimitry Andric return instAccessReg(MI->uses(), Reg, SubReg); 5760b57cec5SDimitry Andric } 5770b57cec5SDimitry Andric 57881ad6265SDimitry Andric bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg, 57981ad6265SDimitry Andric unsigned SubReg) const { 58081ad6265SDimitry Andric return instAccessReg(MI->defs(), Reg, SubReg); 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric 58381ad6265SDimitry Andric TargetInstrInfo::RegSubRegPair 58481ad6265SDimitry Andric SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub, 58581ad6265SDimitry Andric unsigned I) const { 58681ad6265SDimitry Andric if (TRI->getRegSizeInBits(Reg, *MRI) != 32) { 587e8d8bef9SDimitry Andric if (Reg.isPhysical()) { 58881ad6265SDimitry Andric Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I)); 5890b57cec5SDimitry Andric } else { 59081ad6265SDimitry Andric Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub)); 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric } 5930b57cec5SDimitry Andric return TargetInstrInfo::RegSubRegPair(Reg, Sub); 5940b57cec5SDimitry Andric } 5950b57cec5SDimitry Andric 59681ad6265SDimitry Andric void SIShrinkInstructions::dropInstructionKeepingImpDefs( 59781ad6265SDimitry Andric MachineInstr &MI) const { 598e8d8bef9SDimitry Andric for (unsigned i = MI.getDesc().getNumOperands() + 599*bdd1243dSDimitry Andric MI.getDesc().implicit_uses().size() + 600*bdd1243dSDimitry Andric MI.getDesc().implicit_defs().size(), 601*bdd1243dSDimitry Andric e = MI.getNumOperands(); 602e8d8bef9SDimitry Andric i != e; ++i) { 603e8d8bef9SDimitry Andric const MachineOperand &Op = MI.getOperand(i); 604e8d8bef9SDimitry Andric if (!Op.isDef()) 605e8d8bef9SDimitry Andric continue; 606e8d8bef9SDimitry Andric BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), 607e8d8bef9SDimitry Andric TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg()); 608e8d8bef9SDimitry Andric } 609e8d8bef9SDimitry Andric 610e8d8bef9SDimitry Andric MI.eraseFromParent(); 611e8d8bef9SDimitry Andric } 612e8d8bef9SDimitry Andric 6130b57cec5SDimitry Andric // Match: 6140b57cec5SDimitry Andric // mov t, x 6150b57cec5SDimitry Andric // mov x, y 6160b57cec5SDimitry Andric // mov y, t 6170b57cec5SDimitry Andric // 6180b57cec5SDimitry Andric // => 6190b57cec5SDimitry Andric // 6200b57cec5SDimitry Andric // mov t, x (t is potentially dead and move eliminated) 6210b57cec5SDimitry Andric // v_swap_b32 x, y 6220b57cec5SDimitry Andric // 6230b57cec5SDimitry Andric // Returns next valid instruction pointer if was able to create v_swap_b32. 6240b57cec5SDimitry Andric // 6250b57cec5SDimitry Andric // This shall not be done too early not to prevent possible folding which may 62681ad6265SDimitry Andric // remove matched moves, and this should preferably be done before RA to 6270b57cec5SDimitry Andric // release saved registers and also possibly after RA which can insert copies 6280b57cec5SDimitry Andric // too. 6290b57cec5SDimitry Andric // 63081ad6265SDimitry Andric // This is really just a generic peephole that is not a canonical shrinking, 6310b57cec5SDimitry Andric // although requirements match the pass placement and it reduces code size too. 63281ad6265SDimitry Andric MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const { 6330b57cec5SDimitry Andric assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 || 6340b57cec5SDimitry Andric MovT.getOpcode() == AMDGPU::COPY); 6350b57cec5SDimitry Andric 6368bcb0991SDimitry Andric Register T = MovT.getOperand(0).getReg(); 6370b57cec5SDimitry Andric unsigned Tsub = MovT.getOperand(0).getSubReg(); 6380b57cec5SDimitry Andric MachineOperand &Xop = MovT.getOperand(1); 6390b57cec5SDimitry Andric 6400b57cec5SDimitry Andric if (!Xop.isReg()) 6410b57cec5SDimitry Andric return nullptr; 6428bcb0991SDimitry Andric Register X = Xop.getReg(); 6430b57cec5SDimitry Andric unsigned Xsub = Xop.getSubReg(); 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric unsigned Size = TII->getOpSize(MovT, 0) / 4; 6460b57cec5SDimitry Andric 64781ad6265SDimitry Andric if (!TRI->isVGPR(*MRI, X)) 6480b57cec5SDimitry Andric return nullptr; 6490b57cec5SDimitry Andric 6505ffd83dbSDimitry Andric const unsigned SearchLimit = 16; 6515ffd83dbSDimitry Andric unsigned Count = 0; 652e8d8bef9SDimitry Andric bool KilledT = false; 6535ffd83dbSDimitry Andric for (auto Iter = std::next(MovT.getIterator()), 6545ffd83dbSDimitry Andric E = MovT.getParent()->instr_end(); 655e8d8bef9SDimitry Andric Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) { 6565ffd83dbSDimitry Andric 6575ffd83dbSDimitry Andric MachineInstr *MovY = &*Iter; 65881ad6265SDimitry Andric KilledT = MovY->killsRegister(T, TRI); 659e8d8bef9SDimitry Andric 6605ffd83dbSDimitry Andric if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 && 6615ffd83dbSDimitry Andric MovY->getOpcode() != AMDGPU::COPY) || 6625ffd83dbSDimitry Andric !MovY->getOperand(1).isReg() || 6635ffd83dbSDimitry Andric MovY->getOperand(1).getReg() != T || 664*bdd1243dSDimitry Andric MovY->getOperand(1).getSubReg() != Tsub) 6650b57cec5SDimitry Andric continue; 6660b57cec5SDimitry Andric 6675ffd83dbSDimitry Andric Register Y = MovY->getOperand(0).getReg(); 6685ffd83dbSDimitry Andric unsigned Ysub = MovY->getOperand(0).getSubReg(); 6690b57cec5SDimitry Andric 67081ad6265SDimitry Andric if (!TRI->isVGPR(*MRI, Y)) 6710b57cec5SDimitry Andric continue; 6720b57cec5SDimitry Andric 6730b57cec5SDimitry Andric MachineInstr *MovX = nullptr; 6745ffd83dbSDimitry Andric for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator()); 6755ffd83dbSDimitry Andric I != IY; ++I) { 67681ad6265SDimitry Andric if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) || 67781ad6265SDimitry Andric instModifiesReg(&*I, T, Tsub) || 67881ad6265SDimitry Andric (MovX && instModifiesReg(&*I, X, Xsub))) { 6790b57cec5SDimitry Andric MovX = nullptr; 6800b57cec5SDimitry Andric break; 6810b57cec5SDimitry Andric } 68281ad6265SDimitry Andric if (!instReadsReg(&*I, Y, Ysub)) { 68381ad6265SDimitry Andric if (!MovX && instModifiesReg(&*I, X, Xsub)) { 6840b57cec5SDimitry Andric MovX = nullptr; 6850b57cec5SDimitry Andric break; 6860b57cec5SDimitry Andric } 6870b57cec5SDimitry Andric continue; 6880b57cec5SDimitry Andric } 6890b57cec5SDimitry Andric if (MovX || 6900b57cec5SDimitry Andric (I->getOpcode() != AMDGPU::V_MOV_B32_e32 && 6910b57cec5SDimitry Andric I->getOpcode() != AMDGPU::COPY) || 6920b57cec5SDimitry Andric I->getOperand(0).getReg() != X || 6930b57cec5SDimitry Andric I->getOperand(0).getSubReg() != Xsub) { 6940b57cec5SDimitry Andric MovX = nullptr; 6950b57cec5SDimitry Andric break; 6960b57cec5SDimitry Andric } 697e8d8bef9SDimitry Andric 698e8d8bef9SDimitry Andric if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U))) 699e8d8bef9SDimitry Andric continue; 700e8d8bef9SDimitry Andric 7010b57cec5SDimitry Andric MovX = &*I; 7020b57cec5SDimitry Andric } 7030b57cec5SDimitry Andric 7045ffd83dbSDimitry Andric if (!MovX) 7050b57cec5SDimitry Andric continue; 7060b57cec5SDimitry Andric 707e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY); 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric for (unsigned I = 0; I < Size; ++I) { 7100b57cec5SDimitry Andric TargetInstrInfo::RegSubRegPair X1, Y1; 71181ad6265SDimitry Andric X1 = getSubRegForIndex(X, Xsub, I); 71281ad6265SDimitry Andric Y1 = getSubRegForIndex(Y, Ysub, I); 713e8d8bef9SDimitry Andric MachineBasicBlock &MBB = *MovT.getParent(); 714e8d8bef9SDimitry Andric auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(), 7150b57cec5SDimitry Andric TII->get(AMDGPU::V_SWAP_B32)) 7160b57cec5SDimitry Andric .addDef(X1.Reg, 0, X1.SubReg) 7170b57cec5SDimitry Andric .addDef(Y1.Reg, 0, Y1.SubReg) 7180b57cec5SDimitry Andric .addReg(Y1.Reg, 0, Y1.SubReg) 7190b57cec5SDimitry Andric .addReg(X1.Reg, 0, X1.SubReg).getInstr(); 720e8d8bef9SDimitry Andric if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { 721e8d8bef9SDimitry Andric // Drop implicit EXEC. 72281ad6265SDimitry Andric MIB->removeOperand(MIB->getNumExplicitOperands()); 723e8d8bef9SDimitry Andric MIB->copyImplicitOps(*MBB.getParent(), *MovX); 724e8d8bef9SDimitry Andric } 7250b57cec5SDimitry Andric } 7260b57cec5SDimitry Andric MovX->eraseFromParent(); 72781ad6265SDimitry Andric dropInstructionKeepingImpDefs(*MovY); 7280b57cec5SDimitry Andric MachineInstr *Next = &*std::next(MovT.getIterator()); 729e8d8bef9SDimitry Andric 73081ad6265SDimitry Andric if (T.isVirtual() && MRI->use_nodbg_empty(T)) { 73181ad6265SDimitry Andric dropInstructionKeepingImpDefs(MovT); 732e8d8bef9SDimitry Andric } else { 7330b57cec5SDimitry Andric Xop.setIsKill(false); 734e8d8bef9SDimitry Andric for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) { 735e8d8bef9SDimitry Andric unsigned OpNo = MovT.getNumExplicitOperands() + I; 736e8d8bef9SDimitry Andric const MachineOperand &Op = MovT.getOperand(OpNo); 73781ad6265SDimitry Andric if (Op.isKill() && TRI->regsOverlap(X, Op.getReg())) 73881ad6265SDimitry Andric MovT.removeOperand(OpNo); 739e8d8bef9SDimitry Andric } 740e8d8bef9SDimitry Andric } 7410b57cec5SDimitry Andric 7420b57cec5SDimitry Andric return Next; 7430b57cec5SDimitry Andric } 7440b57cec5SDimitry Andric 7450b57cec5SDimitry Andric return nullptr; 7460b57cec5SDimitry Andric } 7470b57cec5SDimitry Andric 74881ad6265SDimitry Andric // If an instruction has dead sdst replace it with NULL register on gfx1030+ 74981ad6265SDimitry Andric bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const { 75081ad6265SDimitry Andric if (!ST->hasGFX10_3Insts()) 75181ad6265SDimitry Andric return false; 75281ad6265SDimitry Andric 75381ad6265SDimitry Andric MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); 75481ad6265SDimitry Andric if (!Op) 75581ad6265SDimitry Andric return false; 75681ad6265SDimitry Andric Register SDstReg = Op->getReg(); 75781ad6265SDimitry Andric if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg)) 75881ad6265SDimitry Andric return false; 75981ad6265SDimitry Andric 76081ad6265SDimitry Andric Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64); 76181ad6265SDimitry Andric return true; 76281ad6265SDimitry Andric } 76381ad6265SDimitry Andric 7640b57cec5SDimitry Andric bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { 7650b57cec5SDimitry Andric if (skipFunction(MF.getFunction())) 7660b57cec5SDimitry Andric return false; 7670b57cec5SDimitry Andric 768*bdd1243dSDimitry Andric this->MF = &MF; 76981ad6265SDimitry Andric MRI = &MF.getRegInfo(); 77081ad6265SDimitry Andric ST = &MF.getSubtarget<GCNSubtarget>(); 77181ad6265SDimitry Andric TII = ST->getInstrInfo(); 77281ad6265SDimitry Andric TRI = &TII->getRegisterInfo(); 77381ad6265SDimitry Andric 77481ad6265SDimitry Andric unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC; 7750b57cec5SDimitry Andric 7760b57cec5SDimitry Andric std::vector<unsigned> I1Defs; 7770b57cec5SDimitry Andric 7780b57cec5SDimitry Andric for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 7790b57cec5SDimitry Andric BI != BE; ++BI) { 7800b57cec5SDimitry Andric 7810b57cec5SDimitry Andric MachineBasicBlock &MBB = *BI; 7820b57cec5SDimitry Andric MachineBasicBlock::iterator I, Next; 7830b57cec5SDimitry Andric for (I = MBB.begin(); I != MBB.end(); I = Next) { 7840b57cec5SDimitry Andric Next = std::next(I); 7850b57cec5SDimitry Andric MachineInstr &MI = *I; 7860b57cec5SDimitry Andric 7870b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) { 7880b57cec5SDimitry Andric // If this has a literal constant source that is the same as the 7890b57cec5SDimitry Andric // reversed bits of an inline immediate, replace with a bitreverse of 7900b57cec5SDimitry Andric // that constant. This saves 4 bytes in the common case of materializing 7910b57cec5SDimitry Andric // sign bits. 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andric // Test if we are after regalloc. We only want to do this after any 7940b57cec5SDimitry Andric // optimizations happen because this will confuse them. 7950b57cec5SDimitry Andric // XXX - not exactly a check for post-regalloc run. 7960b57cec5SDimitry Andric MachineOperand &Src = MI.getOperand(1); 797e8d8bef9SDimitry Andric if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) { 7980b57cec5SDimitry Andric int32_t ReverseImm; 79981ad6265SDimitry Andric if (isReverseInlineImm(Src, ReverseImm)) { 8000b57cec5SDimitry Andric MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32)); 8010b57cec5SDimitry Andric Src.setImm(ReverseImm); 8020b57cec5SDimitry Andric continue; 8030b57cec5SDimitry Andric } 8040b57cec5SDimitry Andric } 8050b57cec5SDimitry Andric } 8060b57cec5SDimitry Andric 80781ad6265SDimitry Andric if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || 8080b57cec5SDimitry Andric MI.getOpcode() == AMDGPU::COPY)) { 80981ad6265SDimitry Andric if (auto *NextMI = matchSwap(MI)) { 8100b57cec5SDimitry Andric Next = NextMI->getIterator(); 8110b57cec5SDimitry Andric continue; 8120b57cec5SDimitry Andric } 8130b57cec5SDimitry Andric } 8140b57cec5SDimitry Andric 81581ad6265SDimitry Andric // Try to use S_ADDK_I32 and S_MULK_I32. 8160b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::S_ADD_I32 || 8170b57cec5SDimitry Andric MI.getOpcode() == AMDGPU::S_MUL_I32) { 8180b57cec5SDimitry Andric const MachineOperand *Dest = &MI.getOperand(0); 8190b57cec5SDimitry Andric MachineOperand *Src0 = &MI.getOperand(1); 8200b57cec5SDimitry Andric MachineOperand *Src1 = &MI.getOperand(2); 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric if (!Src0->isReg() && Src1->isReg()) { 8230b57cec5SDimitry Andric if (TII->commuteInstruction(MI, false, 1, 2)) 8240b57cec5SDimitry Andric std::swap(Src0, Src1); 8250b57cec5SDimitry Andric } 8260b57cec5SDimitry Andric 8270b57cec5SDimitry Andric // FIXME: This could work better if hints worked with subregisters. If 8280b57cec5SDimitry Andric // we have a vector add of a constant, we usually don't get the correct 8290b57cec5SDimitry Andric // allocation due to the subregister usage. 830e8d8bef9SDimitry Andric if (Dest->getReg().isVirtual() && Src0->isReg()) { 83181ad6265SDimitry Andric MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg()); 83281ad6265SDimitry Andric MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg()); 8330b57cec5SDimitry Andric continue; 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric 8360b57cec5SDimitry Andric if (Src0->isReg() && Src0->getReg() == Dest->getReg()) { 83781ad6265SDimitry Andric if (Src1->isImm() && isKImmOperand(*Src1)) { 8380b57cec5SDimitry Andric unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ? 8390b57cec5SDimitry Andric AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32; 8400b57cec5SDimitry Andric 8410b57cec5SDimitry Andric MI.setDesc(TII->get(Opc)); 8420b57cec5SDimitry Andric MI.tieOperands(0, 1); 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric } 8450b57cec5SDimitry Andric } 8460b57cec5SDimitry Andric 8470b57cec5SDimitry Andric // Try to use s_cmpk_* 8480b57cec5SDimitry Andric if (MI.isCompare() && TII->isSOPC(MI)) { 84981ad6265SDimitry Andric shrinkScalarCompare(MI); 8500b57cec5SDimitry Andric continue; 8510b57cec5SDimitry Andric } 8520b57cec5SDimitry Andric 8530b57cec5SDimitry Andric // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. 8540b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::S_MOV_B32) { 8550b57cec5SDimitry Andric const MachineOperand &Dst = MI.getOperand(0); 8560b57cec5SDimitry Andric MachineOperand &Src = MI.getOperand(1); 8570b57cec5SDimitry Andric 858e8d8bef9SDimitry Andric if (Src.isImm() && Dst.getReg().isPhysical()) { 8590b57cec5SDimitry Andric int32_t ReverseImm; 86081ad6265SDimitry Andric if (isKImmOperand(Src)) 8610b57cec5SDimitry Andric MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); 86281ad6265SDimitry Andric else if (isReverseInlineImm(Src, ReverseImm)) { 8630b57cec5SDimitry Andric MI.setDesc(TII->get(AMDGPU::S_BREV_B32)); 8640b57cec5SDimitry Andric Src.setImm(ReverseImm); 8650b57cec5SDimitry Andric } 8660b57cec5SDimitry Andric } 8670b57cec5SDimitry Andric 8680b57cec5SDimitry Andric continue; 8690b57cec5SDimitry Andric } 8700b57cec5SDimitry Andric 8710b57cec5SDimitry Andric // Shrink scalar logic operations. 8720b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::S_AND_B32 || 8730b57cec5SDimitry Andric MI.getOpcode() == AMDGPU::S_OR_B32 || 8740b57cec5SDimitry Andric MI.getOpcode() == AMDGPU::S_XOR_B32) { 87581ad6265SDimitry Andric if (shrinkScalarLogicOp(MI)) 8760b57cec5SDimitry Andric continue; 8770b57cec5SDimitry Andric } 8780b57cec5SDimitry Andric 8790b57cec5SDimitry Andric if (TII->isMIMG(MI.getOpcode()) && 88081ad6265SDimitry Andric ST->getGeneration() >= AMDGPUSubtarget::GFX10 && 8810b57cec5SDimitry Andric MF.getProperties().hasProperty( 8820b57cec5SDimitry Andric MachineFunctionProperties::Property::NoVRegs)) { 8830b57cec5SDimitry Andric shrinkMIMG(MI); 8840b57cec5SDimitry Andric continue; 8850b57cec5SDimitry Andric } 8860b57cec5SDimitry Andric 88781ad6265SDimitry Andric if (!TII->isVOP3(MI)) 8880b57cec5SDimitry Andric continue; 8890b57cec5SDimitry Andric 89081ad6265SDimitry Andric if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 || 89181ad6265SDimitry Andric MI.getOpcode() == AMDGPU::V_FMA_F32_e64 || 89281ad6265SDimitry Andric MI.getOpcode() == AMDGPU::V_MAD_F16_e64 || 893*bdd1243dSDimitry Andric MI.getOpcode() == AMDGPU::V_FMA_F16_e64 || 894*bdd1243dSDimitry Andric MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) { 89581ad6265SDimitry Andric shrinkMadFma(MI); 89681ad6265SDimitry Andric continue; 89781ad6265SDimitry Andric } 89881ad6265SDimitry Andric 89981ad6265SDimitry Andric if (!TII->hasVALU32BitEncoding(MI.getOpcode())) { 90081ad6265SDimitry Andric // If there is no chance we will shrink it and use VCC as sdst to get 90181ad6265SDimitry Andric // a 32 bit form try to replace dead sdst with NULL. 90281ad6265SDimitry Andric tryReplaceDeadSDST(MI); 90381ad6265SDimitry Andric continue; 90481ad6265SDimitry Andric } 90581ad6265SDimitry Andric 90681ad6265SDimitry Andric if (!TII->canShrink(MI, *MRI)) { 9070b57cec5SDimitry Andric // Try commuting the instruction and see if that enables us to shrink 9080b57cec5SDimitry Andric // it. 9090b57cec5SDimitry Andric if (!MI.isCommutable() || !TII->commuteInstruction(MI) || 91081ad6265SDimitry Andric !TII->canShrink(MI, *MRI)) { 91181ad6265SDimitry Andric tryReplaceDeadSDST(MI); 9120b57cec5SDimitry Andric continue; 9130b57cec5SDimitry Andric } 91481ad6265SDimitry Andric } 9150b57cec5SDimitry Andric 9160b57cec5SDimitry Andric int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); 9170b57cec5SDimitry Andric 9180b57cec5SDimitry Andric if (TII->isVOPC(Op32)) { 91981ad6265SDimitry Andric MachineOperand &Op0 = MI.getOperand(0); 92081ad6265SDimitry Andric if (Op0.isReg()) { 92181ad6265SDimitry Andric // Exclude VOPCX instructions as these don't explicitly write a 92281ad6265SDimitry Andric // dst. 92381ad6265SDimitry Andric Register DstReg = Op0.getReg(); 924e8d8bef9SDimitry Andric if (DstReg.isVirtual()) { 9250b57cec5SDimitry Andric // VOPC instructions can only write to the VCC register. We can't 9260b57cec5SDimitry Andric // force them to use VCC here, because this is only one register and 9270b57cec5SDimitry Andric // cannot deal with sequences which would require multiple copies of 9280b57cec5SDimitry Andric // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...) 9290b57cec5SDimitry Andric // 93081ad6265SDimitry Andric // So, instead of forcing the instruction to write to VCC, we 93181ad6265SDimitry Andric // provide a hint to the register allocator to use VCC and then we 93281ad6265SDimitry Andric // will run this pass again after RA and shrink it if it outputs to 93381ad6265SDimitry Andric // VCC. 93481ad6265SDimitry Andric MRI->setRegAllocationHint(DstReg, 0, VCCReg); 9350b57cec5SDimitry Andric continue; 9360b57cec5SDimitry Andric } 9370b57cec5SDimitry Andric if (DstReg != VCCReg) 9380b57cec5SDimitry Andric continue; 9390b57cec5SDimitry Andric } 94081ad6265SDimitry Andric } 9410b57cec5SDimitry Andric 9420b57cec5SDimitry Andric if (Op32 == AMDGPU::V_CNDMASK_B32_e32) { 9430b57cec5SDimitry Andric // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC 9440b57cec5SDimitry Andric // instructions. 9450b57cec5SDimitry Andric const MachineOperand *Src2 = 9460b57cec5SDimitry Andric TII->getNamedOperand(MI, AMDGPU::OpName::src2); 9470b57cec5SDimitry Andric if (!Src2->isReg()) 9480b57cec5SDimitry Andric continue; 9498bcb0991SDimitry Andric Register SReg = Src2->getReg(); 950e8d8bef9SDimitry Andric if (SReg.isVirtual()) { 95181ad6265SDimitry Andric MRI->setRegAllocationHint(SReg, 0, VCCReg); 9520b57cec5SDimitry Andric continue; 9530b57cec5SDimitry Andric } 9540b57cec5SDimitry Andric if (SReg != VCCReg) 9550b57cec5SDimitry Andric continue; 9560b57cec5SDimitry Andric } 9570b57cec5SDimitry Andric 9580b57cec5SDimitry Andric // Check for the bool flag output for instructions like V_ADD_I32_e64. 9590b57cec5SDimitry Andric const MachineOperand *SDst = TII->getNamedOperand(MI, 9600b57cec5SDimitry Andric AMDGPU::OpName::sdst); 9610b57cec5SDimitry Andric 9620b57cec5SDimitry Andric if (SDst) { 9630b57cec5SDimitry Andric bool Next = false; 9640b57cec5SDimitry Andric 9650b57cec5SDimitry Andric if (SDst->getReg() != VCCReg) { 966e8d8bef9SDimitry Andric if (SDst->getReg().isVirtual()) 96781ad6265SDimitry Andric MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg); 9680b57cec5SDimitry Andric Next = true; 9690b57cec5SDimitry Andric } 9700b57cec5SDimitry Andric 9710b57cec5SDimitry Andric // All of the instructions with carry outs also have an SGPR input in 9720b57cec5SDimitry Andric // src2. 9730eae32dcSDimitry Andric const MachineOperand *Src2 = TII->getNamedOperand(MI, 9740eae32dcSDimitry Andric AMDGPU::OpName::src2); 9750b57cec5SDimitry Andric if (Src2 && Src2->getReg() != VCCReg) { 976e8d8bef9SDimitry Andric if (Src2->getReg().isVirtual()) 97781ad6265SDimitry Andric MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg); 9780b57cec5SDimitry Andric Next = true; 9790b57cec5SDimitry Andric } 9800b57cec5SDimitry Andric 9810b57cec5SDimitry Andric if (Next) 9820b57cec5SDimitry Andric continue; 9830b57cec5SDimitry Andric } 9840b57cec5SDimitry Andric 985*bdd1243dSDimitry Andric // Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to 986*bdd1243dSDimitry Andric // fold an immediate into the shrunk instruction as a literal operand. In 987*bdd1243dSDimitry Andric // GFX10 VOP3 instructions can take a literal operand anyway, so there is 988*bdd1243dSDimitry Andric // no advantage to doing this. 989*bdd1243dSDimitry Andric if (ST->hasVOP3Literal() && 990*bdd1243dSDimitry Andric !MF.getProperties().hasProperty( 991*bdd1243dSDimitry Andric MachineFunctionProperties::Property::NoVRegs)) 992*bdd1243dSDimitry Andric continue; 993*bdd1243dSDimitry Andric 994*bdd1243dSDimitry Andric if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) && 995*bdd1243dSDimitry Andric !shouldShrinkTrue16(MI)) 996*bdd1243dSDimitry Andric continue; 997*bdd1243dSDimitry Andric 9980b57cec5SDimitry Andric // We can shrink this instruction 9990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Shrinking " << MI); 10000b57cec5SDimitry Andric 10010b57cec5SDimitry Andric MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32); 10020b57cec5SDimitry Andric ++NumInstructionsShrunk; 10030b57cec5SDimitry Andric 10040b57cec5SDimitry Andric // Copy extra operands not present in the instruction definition. 100581ad6265SDimitry Andric copyExtraImplicitOps(*Inst32, MI); 10060b57cec5SDimitry Andric 1007349cc55cSDimitry Andric // Copy deadness from the old explicit vcc def to the new implicit def. 1008349cc55cSDimitry Andric if (SDst && SDst->isDead()) 1009349cc55cSDimitry Andric Inst32->findRegisterDefOperand(VCCReg)->setIsDead(); 1010349cc55cSDimitry Andric 10110b57cec5SDimitry Andric MI.eraseFromParent(); 101281ad6265SDimitry Andric foldImmediates(*Inst32); 10130b57cec5SDimitry Andric 10140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); 10150b57cec5SDimitry Andric } 10160b57cec5SDimitry Andric } 10170b57cec5SDimitry Andric return false; 10180b57cec5SDimitry Andric } 1019