xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric /// The pass tries to use the 32-bit encoding for instructions when possible.
80b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
90b57cec5SDimitry Andric //
100b57cec5SDimitry Andric 
110b57cec5SDimitry Andric #include "AMDGPU.h"
12e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
130b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14*bdd1243dSDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
150b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
170b57cec5SDimitry Andric 
180b57cec5SDimitry Andric #define DEBUG_TYPE "si-shrink-instructions"
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric STATISTIC(NumInstructionsShrunk,
210b57cec5SDimitry Andric           "Number of 64-bit instruction reduced to 32-bit.");
220b57cec5SDimitry Andric STATISTIC(NumLiteralConstantsFolded,
230b57cec5SDimitry Andric           "Number of literal constants folded into 32-bit instructions.");
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric using namespace llvm;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric namespace {
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric class SIShrinkInstructions : public MachineFunctionPass {
30*bdd1243dSDimitry Andric   MachineFunction *MF;
3181ad6265SDimitry Andric   MachineRegisterInfo *MRI;
3281ad6265SDimitry Andric   const GCNSubtarget *ST;
3381ad6265SDimitry Andric   const SIInstrInfo *TII;
3481ad6265SDimitry Andric   const SIRegisterInfo *TRI;
3581ad6265SDimitry Andric 
360b57cec5SDimitry Andric public:
370b57cec5SDimitry Andric   static char ID;
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric public:
400b57cec5SDimitry Andric   SIShrinkInstructions() : MachineFunctionPass(ID) {
410b57cec5SDimitry Andric   }
420b57cec5SDimitry Andric 
4381ad6265SDimitry Andric   bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;
44*bdd1243dSDimitry Andric   bool shouldShrinkTrue16(MachineInstr &MI) const;
4581ad6265SDimitry Andric   bool isKImmOperand(const MachineOperand &Src) const;
4681ad6265SDimitry Andric   bool isKUImmOperand(const MachineOperand &Src) const;
4781ad6265SDimitry Andric   bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
4881ad6265SDimitry Andric   bool isReverseInlineImm(const MachineOperand &Src, int32_t &ReverseImm) const;
4981ad6265SDimitry Andric   void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
5081ad6265SDimitry Andric   void shrinkScalarCompare(MachineInstr &MI) const;
5181ad6265SDimitry Andric   void shrinkMIMG(MachineInstr &MI) const;
5281ad6265SDimitry Andric   void shrinkMadFma(MachineInstr &MI) const;
5381ad6265SDimitry Andric   bool shrinkScalarLogicOp(MachineInstr &MI) const;
5481ad6265SDimitry Andric   bool tryReplaceDeadSDST(MachineInstr &MI) const;
5581ad6265SDimitry Andric   bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
5681ad6265SDimitry Andric                      Register Reg, unsigned SubReg) const;
5781ad6265SDimitry Andric   bool instReadsReg(const MachineInstr *MI, unsigned Reg,
5881ad6265SDimitry Andric                     unsigned SubReg) const;
5981ad6265SDimitry Andric   bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
6081ad6265SDimitry Andric                        unsigned SubReg) const;
6181ad6265SDimitry Andric   TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
6281ad6265SDimitry Andric                                                    unsigned I) const;
6381ad6265SDimitry Andric   void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
6481ad6265SDimitry Andric   MachineInstr *matchSwap(MachineInstr &MovT) const;
6581ad6265SDimitry Andric 
660b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   StringRef getPassName() const override { return "SI Shrink Instructions"; }
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
710b57cec5SDimitry Andric     AU.setPreservesCFG();
720b57cec5SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
730b57cec5SDimitry Andric   }
740b57cec5SDimitry Andric };
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric } // End anonymous namespace.
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
790b57cec5SDimitry Andric                 "SI Shrink Instructions", false, false)
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric char SIShrinkInstructions::ID = 0;
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric FunctionPass *llvm::createSIShrinkInstructionsPass() {
840b57cec5SDimitry Andric   return new SIShrinkInstructions();
850b57cec5SDimitry Andric }
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric /// This function checks \p MI for operands defined by a move immediate
880b57cec5SDimitry Andric /// instruction and then folds the literal constant into the instruction if it
890b57cec5SDimitry Andric /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
9081ad6265SDimitry Andric bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
9181ad6265SDimitry Andric                                           bool TryToCommute) const {
920b57cec5SDimitry Andric   assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric   int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric   // Try to fold Src0
970b57cec5SDimitry Andric   MachineOperand &Src0 = MI.getOperand(Src0Idx);
980b57cec5SDimitry Andric   if (Src0.isReg()) {
998bcb0991SDimitry Andric     Register Reg = Src0.getReg();
10081ad6265SDimitry Andric     if (Reg.isVirtual()) {
10181ad6265SDimitry Andric       MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
1020b57cec5SDimitry Andric       if (Def && Def->isMoveImmediate()) {
1030b57cec5SDimitry Andric         MachineOperand &MovSrc = Def->getOperand(1);
1040b57cec5SDimitry Andric         bool ConstantFolded = false;
1050b57cec5SDimitry Andric 
106d409305fSDimitry Andric         if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
107d409305fSDimitry Andric           if (MovSrc.isImm() &&
108d409305fSDimitry Andric               (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) {
1090b57cec5SDimitry Andric             Src0.ChangeToImmediate(MovSrc.getImm());
1100b57cec5SDimitry Andric             ConstantFolded = true;
1110b57cec5SDimitry Andric           } else if (MovSrc.isFI()) {
1120b57cec5SDimitry Andric             Src0.ChangeToFrameIndex(MovSrc.getIndex());
1130b57cec5SDimitry Andric             ConstantFolded = true;
1140b57cec5SDimitry Andric           } else if (MovSrc.isGlobal()) {
1150b57cec5SDimitry Andric             Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
1160b57cec5SDimitry Andric                             MovSrc.getTargetFlags());
1170b57cec5SDimitry Andric             ConstantFolded = true;
1180b57cec5SDimitry Andric           }
119d409305fSDimitry Andric         }
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric         if (ConstantFolded) {
12281ad6265SDimitry Andric           if (MRI->use_nodbg_empty(Reg))
1230b57cec5SDimitry Andric             Def->eraseFromParent();
1240b57cec5SDimitry Andric           ++NumLiteralConstantsFolded;
1250b57cec5SDimitry Andric           return true;
1260b57cec5SDimitry Andric         }
1270b57cec5SDimitry Andric       }
1280b57cec5SDimitry Andric     }
1290b57cec5SDimitry Andric   }
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric   // We have failed to fold src0, so commute the instruction and try again.
1320b57cec5SDimitry Andric   if (TryToCommute && MI.isCommutable()) {
1330b57cec5SDimitry Andric     if (TII->commuteInstruction(MI)) {
13481ad6265SDimitry Andric       if (foldImmediates(MI, false))
1350b57cec5SDimitry Andric         return true;
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric       // Commute back.
1380b57cec5SDimitry Andric       TII->commuteInstruction(MI);
1390b57cec5SDimitry Andric     }
1400b57cec5SDimitry Andric   }
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric   return false;
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric 
145*bdd1243dSDimitry Andric /// Do not shrink the instruction if its registers are not expressible in the
146*bdd1243dSDimitry Andric /// shrunk encoding.
147*bdd1243dSDimitry Andric bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
148*bdd1243dSDimitry Andric   for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
149*bdd1243dSDimitry Andric     const MachineOperand &MO = MI.getOperand(I);
150*bdd1243dSDimitry Andric     if (MO.isReg()) {
151*bdd1243dSDimitry Andric       Register Reg = MO.getReg();
152*bdd1243dSDimitry Andric       assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "
153*bdd1243dSDimitry Andric                                  "True16 Instructions post-RA");
154*bdd1243dSDimitry Andric       if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
155*bdd1243dSDimitry Andric           !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
156*bdd1243dSDimitry Andric         return false;
157*bdd1243dSDimitry Andric     }
158*bdd1243dSDimitry Andric   }
159*bdd1243dSDimitry Andric   return true;
160*bdd1243dSDimitry Andric }
161*bdd1243dSDimitry Andric 
16281ad6265SDimitry Andric bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
1630b57cec5SDimitry Andric   return isInt<16>(Src.getImm()) &&
1640b57cec5SDimitry Andric     !TII->isInlineConstant(*Src.getParent(),
1650b57cec5SDimitry Andric                            Src.getParent()->getOperandNo(&Src));
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric 
16881ad6265SDimitry Andric bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
1690b57cec5SDimitry Andric   return isUInt<16>(Src.getImm()) &&
1700b57cec5SDimitry Andric     !TII->isInlineConstant(*Src.getParent(),
1710b57cec5SDimitry Andric                            Src.getParent()->getOperandNo(&Src));
1720b57cec5SDimitry Andric }
1730b57cec5SDimitry Andric 
17481ad6265SDimitry Andric bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
17581ad6265SDimitry Andric                                                 bool &IsUnsigned) const {
1760b57cec5SDimitry Andric   if (isInt<16>(Src.getImm())) {
1770b57cec5SDimitry Andric     IsUnsigned = false;
1780b57cec5SDimitry Andric     return !TII->isInlineConstant(Src);
1790b57cec5SDimitry Andric   }
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric   if (isUInt<16>(Src.getImm())) {
1820b57cec5SDimitry Andric     IsUnsigned = true;
1830b57cec5SDimitry Andric     return !TII->isInlineConstant(Src);
1840b57cec5SDimitry Andric   }
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric   return false;
1870b57cec5SDimitry Andric }
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric /// \returns true if the constant in \p Src should be replaced with a bitreverse
1900b57cec5SDimitry Andric /// of an inline immediate.
19181ad6265SDimitry Andric bool SIShrinkInstructions::isReverseInlineImm(const MachineOperand &Src,
19281ad6265SDimitry Andric                                               int32_t &ReverseImm) const {
1930b57cec5SDimitry Andric   if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
1940b57cec5SDimitry Andric     return false;
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric   ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
1970b57cec5SDimitry Andric   return ReverseImm >= -16 && ReverseImm <= 64;
1980b57cec5SDimitry Andric }
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric /// Copy implicit register operands from specified instruction to this
2010b57cec5SDimitry Andric /// instruction that are not part of the instruction definition.
20281ad6265SDimitry Andric void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
20381ad6265SDimitry Andric                                                 MachineInstr &MI) const {
20481ad6265SDimitry Andric   MachineFunction &MF = *MI.getMF();
2050b57cec5SDimitry Andric   for (unsigned i = MI.getDesc().getNumOperands() +
206*bdd1243dSDimitry Andric                     MI.getDesc().implicit_uses().size() +
207*bdd1243dSDimitry Andric                     MI.getDesc().implicit_defs().size(),
208*bdd1243dSDimitry Andric                 e = MI.getNumOperands();
2090b57cec5SDimitry Andric        i != e; ++i) {
2100b57cec5SDimitry Andric     const MachineOperand &MO = MI.getOperand(i);
2110b57cec5SDimitry Andric     if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
2120b57cec5SDimitry Andric       NewMI.addOperand(MF, MO);
2130b57cec5SDimitry Andric   }
2140b57cec5SDimitry Andric }
2150b57cec5SDimitry Andric 
21681ad6265SDimitry Andric void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
2170b57cec5SDimitry Andric   // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
2180b57cec5SDimitry Andric   // get constants on the RHS.
2190b57cec5SDimitry Andric   if (!MI.getOperand(0).isReg())
2200b57cec5SDimitry Andric     TII->commuteInstruction(MI, false, 0, 1);
2210b57cec5SDimitry Andric 
2225ffd83dbSDimitry Andric   // cmpk requires src0 to be a register
2235ffd83dbSDimitry Andric   const MachineOperand &Src0 = MI.getOperand(0);
2245ffd83dbSDimitry Andric   if (!Src0.isReg())
2255ffd83dbSDimitry Andric     return;
2265ffd83dbSDimitry Andric 
2270b57cec5SDimitry Andric   const MachineOperand &Src1 = MI.getOperand(1);
2280b57cec5SDimitry Andric   if (!Src1.isImm())
2290b57cec5SDimitry Andric     return;
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric   int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
2320b57cec5SDimitry Andric   if (SOPKOpc == -1)
2330b57cec5SDimitry Andric     return;
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   // eq/ne is special because the imm16 can be treated as signed or unsigned,
236349cc55cSDimitry Andric   // and initially selected to the unsigned versions.
2370b57cec5SDimitry Andric   if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
2380b57cec5SDimitry Andric     bool HasUImm;
23981ad6265SDimitry Andric     if (isKImmOrKUImmOperand(Src1, HasUImm)) {
2400b57cec5SDimitry Andric       if (!HasUImm) {
2410b57cec5SDimitry Andric         SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
2420b57cec5SDimitry Andric           AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
2430b57cec5SDimitry Andric       }
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric       MI.setDesc(TII->get(SOPKOpc));
2460b57cec5SDimitry Andric     }
2470b57cec5SDimitry Andric 
2480b57cec5SDimitry Andric     return;
2490b57cec5SDimitry Andric   }
2500b57cec5SDimitry Andric 
2510b57cec5SDimitry Andric   const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
2520b57cec5SDimitry Andric 
25381ad6265SDimitry Andric   if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) ||
25481ad6265SDimitry Andric       (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) {
2550b57cec5SDimitry Andric     MI.setDesc(NewDesc);
2560b57cec5SDimitry Andric   }
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric 
2590b57cec5SDimitry Andric // Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
26081ad6265SDimitry Andric void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
2610b57cec5SDimitry Andric   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
26281ad6265SDimitry Andric   if (!Info)
2630b57cec5SDimitry Andric     return;
2640b57cec5SDimitry Andric 
26581ad6265SDimitry Andric   uint8_t NewEncoding;
26681ad6265SDimitry Andric   switch (Info->MIMGEncoding) {
26781ad6265SDimitry Andric   case AMDGPU::MIMGEncGfx10NSA:
26881ad6265SDimitry Andric     NewEncoding = AMDGPU::MIMGEncGfx10Default;
26981ad6265SDimitry Andric     break;
27081ad6265SDimitry Andric   case AMDGPU::MIMGEncGfx11NSA:
27181ad6265SDimitry Andric     NewEncoding = AMDGPU::MIMGEncGfx11Default;
27281ad6265SDimitry Andric     break;
27381ad6265SDimitry Andric   default:
27481ad6265SDimitry Andric     return;
27581ad6265SDimitry Andric   }
27681ad6265SDimitry Andric 
2770b57cec5SDimitry Andric   int VAddr0Idx =
2780b57cec5SDimitry Andric       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
2790b57cec5SDimitry Andric   unsigned NewAddrDwords = Info->VAddrDwords;
2800b57cec5SDimitry Andric   const TargetRegisterClass *RC;
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric   if (Info->VAddrDwords == 2) {
2830b57cec5SDimitry Andric     RC = &AMDGPU::VReg_64RegClass;
2840b57cec5SDimitry Andric   } else if (Info->VAddrDwords == 3) {
2850b57cec5SDimitry Andric     RC = &AMDGPU::VReg_96RegClass;
2860b57cec5SDimitry Andric   } else if (Info->VAddrDwords == 4) {
2870b57cec5SDimitry Andric     RC = &AMDGPU::VReg_128RegClass;
288fe6060f1SDimitry Andric   } else if (Info->VAddrDwords == 5) {
289fe6060f1SDimitry Andric     RC = &AMDGPU::VReg_160RegClass;
290fe6060f1SDimitry Andric   } else if (Info->VAddrDwords == 6) {
291fe6060f1SDimitry Andric     RC = &AMDGPU::VReg_192RegClass;
292fe6060f1SDimitry Andric   } else if (Info->VAddrDwords == 7) {
293fe6060f1SDimitry Andric     RC = &AMDGPU::VReg_224RegClass;
294fe6060f1SDimitry Andric   } else if (Info->VAddrDwords == 8) {
2950b57cec5SDimitry Andric     RC = &AMDGPU::VReg_256RegClass;
296*bdd1243dSDimitry Andric   } else if (Info->VAddrDwords == 9) {
297*bdd1243dSDimitry Andric     RC = &AMDGPU::VReg_288RegClass;
298*bdd1243dSDimitry Andric   } else if (Info->VAddrDwords == 10) {
299*bdd1243dSDimitry Andric     RC = &AMDGPU::VReg_320RegClass;
300*bdd1243dSDimitry Andric   } else if (Info->VAddrDwords == 11) {
301*bdd1243dSDimitry Andric     RC = &AMDGPU::VReg_352RegClass;
302*bdd1243dSDimitry Andric   } else if (Info->VAddrDwords == 12) {
303*bdd1243dSDimitry Andric     RC = &AMDGPU::VReg_384RegClass;
3040b57cec5SDimitry Andric   } else {
3050b57cec5SDimitry Andric     RC = &AMDGPU::VReg_512RegClass;
3060b57cec5SDimitry Andric     NewAddrDwords = 16;
3070b57cec5SDimitry Andric   }
3080b57cec5SDimitry Andric 
3090b57cec5SDimitry Andric   unsigned VgprBase = 0;
31081ad6265SDimitry Andric   unsigned NextVgpr = 0;
3110b57cec5SDimitry Andric   bool IsUndef = true;
3120b57cec5SDimitry Andric   bool IsKill = NewAddrDwords == Info->VAddrDwords;
31381ad6265SDimitry Andric   for (unsigned Idx = 0; Idx < Info->VAddrOperands; ++Idx) {
31481ad6265SDimitry Andric     const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);
31581ad6265SDimitry Andric     unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());
31681ad6265SDimitry Andric     unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;
31781ad6265SDimitry Andric     assert(Dwords > 0 && "Un-implemented for less than 32 bit regs");
3180b57cec5SDimitry Andric 
31981ad6265SDimitry Andric     if (Idx == 0) {
3200b57cec5SDimitry Andric       VgprBase = Vgpr;
32181ad6265SDimitry Andric       NextVgpr = Vgpr + Dwords;
32281ad6265SDimitry Andric     } else if (Vgpr == NextVgpr) {
32381ad6265SDimitry Andric       NextVgpr = Vgpr + Dwords;
32481ad6265SDimitry Andric     } else {
3250b57cec5SDimitry Andric       return;
32681ad6265SDimitry Andric     }
3270b57cec5SDimitry Andric 
3280b57cec5SDimitry Andric     if (!Op.isUndef())
3290b57cec5SDimitry Andric       IsUndef = false;
3300b57cec5SDimitry Andric     if (!Op.isKill())
3310b57cec5SDimitry Andric       IsKill = false;
3320b57cec5SDimitry Andric   }
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric   if (VgprBase + NewAddrDwords > 256)
3350b57cec5SDimitry Andric     return;
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric   // Further check for implicit tied operands - this may be present if TFE is
3380b57cec5SDimitry Andric   // enabled
3390b57cec5SDimitry Andric   int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
3400b57cec5SDimitry Andric   int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
341e8d8bef9SDimitry Andric   unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();
342e8d8bef9SDimitry Andric   unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();
3430b57cec5SDimitry Andric   int ToUntie = -1;
3440b57cec5SDimitry Andric   if (TFEVal || LWEVal) {
3450b57cec5SDimitry Andric     // TFE/LWE is enabled so we need to deal with an implicit tied operand
3460b57cec5SDimitry Andric     for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
3470b57cec5SDimitry Andric       if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
3480b57cec5SDimitry Andric           MI.getOperand(i).isImplicit()) {
3490b57cec5SDimitry Andric         // This is the tied operand
3500b57cec5SDimitry Andric         assert(
3510b57cec5SDimitry Andric             ToUntie == -1 &&
3520b57cec5SDimitry Andric             "found more than one tied implicit operand when expecting only 1");
3530b57cec5SDimitry Andric         ToUntie = i;
3540b57cec5SDimitry Andric         MI.untieRegOperand(ToUntie);
3550b57cec5SDimitry Andric       }
3560b57cec5SDimitry Andric     }
3570b57cec5SDimitry Andric   }
3580b57cec5SDimitry Andric 
35981ad6265SDimitry Andric   unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding,
3600b57cec5SDimitry Andric                                              Info->VDataDwords, NewAddrDwords);
3610b57cec5SDimitry Andric   MI.setDesc(TII->get(NewOpcode));
3620b57cec5SDimitry Andric   MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
3630b57cec5SDimitry Andric   MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
3640b57cec5SDimitry Andric   MI.getOperand(VAddr0Idx).setIsKill(IsKill);
3650b57cec5SDimitry Andric 
36681ad6265SDimitry Andric   for (int i = 1; i < Info->VAddrOperands; ++i)
36781ad6265SDimitry Andric     MI.removeOperand(VAddr0Idx + 1);
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric   if (ToUntie >= 0) {
3700b57cec5SDimitry Andric     MI.tieOperands(
3710b57cec5SDimitry Andric         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
37281ad6265SDimitry Andric         ToUntie - (Info->VAddrOperands - 1));
37381ad6265SDimitry Andric   }
37481ad6265SDimitry Andric }
37581ad6265SDimitry Andric 
37681ad6265SDimitry Andric // Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
37781ad6265SDimitry Andric void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
378*bdd1243dSDimitry Andric   // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
379*bdd1243dSDimitry Andric   // there is no reason to try to shrink them.
38081ad6265SDimitry Andric   if (!ST->hasVOP3Literal())
38181ad6265SDimitry Andric     return;
38281ad6265SDimitry Andric 
383*bdd1243dSDimitry Andric   // There is no advantage to doing this pre-RA.
384*bdd1243dSDimitry Andric   if (!MF->getProperties().hasProperty(
385*bdd1243dSDimitry Andric           MachineFunctionProperties::Property::NoVRegs))
386*bdd1243dSDimitry Andric     return;
387*bdd1243dSDimitry Andric 
38881ad6265SDimitry Andric   if (TII->hasAnyModifiersSet(MI))
38981ad6265SDimitry Andric     return;
39081ad6265SDimitry Andric 
39181ad6265SDimitry Andric   const unsigned Opcode = MI.getOpcode();
39281ad6265SDimitry Andric   MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
39381ad6265SDimitry Andric   MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1);
39481ad6265SDimitry Andric   MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2);
39581ad6265SDimitry Andric   unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;
39681ad6265SDimitry Andric 
39781ad6265SDimitry Andric   bool Swap;
39881ad6265SDimitry Andric 
39981ad6265SDimitry Andric   // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form.
40081ad6265SDimitry Andric   if (Src2.isImm() && !TII->isInlineConstant(Src2)) {
40181ad6265SDimitry Andric     if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg()))
40281ad6265SDimitry Andric       Swap = false;
40381ad6265SDimitry Andric     else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
40481ad6265SDimitry Andric       Swap = true;
40581ad6265SDimitry Andric     else
40681ad6265SDimitry Andric       return;
40781ad6265SDimitry Andric 
40881ad6265SDimitry Andric     switch (Opcode) {
40981ad6265SDimitry Andric     default:
41081ad6265SDimitry Andric       llvm_unreachable("Unexpected mad/fma opcode!");
41181ad6265SDimitry Andric     case AMDGPU::V_MAD_F32_e64:
41281ad6265SDimitry Andric       NewOpcode = AMDGPU::V_MADAK_F32;
41381ad6265SDimitry Andric       break;
41481ad6265SDimitry Andric     case AMDGPU::V_FMA_F32_e64:
41581ad6265SDimitry Andric       NewOpcode = AMDGPU::V_FMAAK_F32;
41681ad6265SDimitry Andric       break;
41781ad6265SDimitry Andric     case AMDGPU::V_MAD_F16_e64:
41881ad6265SDimitry Andric       NewOpcode = AMDGPU::V_MADAK_F16;
41981ad6265SDimitry Andric       break;
42081ad6265SDimitry Andric     case AMDGPU::V_FMA_F16_e64:
421*bdd1243dSDimitry Andric     case AMDGPU::V_FMA_F16_gfx9_e64:
422*bdd1243dSDimitry Andric       NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16
423*bdd1243dSDimitry Andric                                           : AMDGPU::V_FMAAK_F16;
42481ad6265SDimitry Andric       break;
42581ad6265SDimitry Andric     }
42681ad6265SDimitry Andric   }
42781ad6265SDimitry Andric 
42881ad6265SDimitry Andric   // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form.
42981ad6265SDimitry Andric   if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) {
43081ad6265SDimitry Andric     if (Src1.isImm() && !TII->isInlineConstant(Src1))
43181ad6265SDimitry Andric       Swap = false;
43281ad6265SDimitry Andric     else if (Src0.isImm() && !TII->isInlineConstant(Src0))
43381ad6265SDimitry Andric       Swap = true;
43481ad6265SDimitry Andric     else
43581ad6265SDimitry Andric       return;
43681ad6265SDimitry Andric 
43781ad6265SDimitry Andric     switch (Opcode) {
43881ad6265SDimitry Andric     default:
43981ad6265SDimitry Andric       llvm_unreachable("Unexpected mad/fma opcode!");
44081ad6265SDimitry Andric     case AMDGPU::V_MAD_F32_e64:
44181ad6265SDimitry Andric       NewOpcode = AMDGPU::V_MADMK_F32;
44281ad6265SDimitry Andric       break;
44381ad6265SDimitry Andric     case AMDGPU::V_FMA_F32_e64:
44481ad6265SDimitry Andric       NewOpcode = AMDGPU::V_FMAMK_F32;
44581ad6265SDimitry Andric       break;
44681ad6265SDimitry Andric     case AMDGPU::V_MAD_F16_e64:
44781ad6265SDimitry Andric       NewOpcode = AMDGPU::V_MADMK_F16;
44881ad6265SDimitry Andric       break;
44981ad6265SDimitry Andric     case AMDGPU::V_FMA_F16_e64:
450*bdd1243dSDimitry Andric     case AMDGPU::V_FMA_F16_gfx9_e64:
451*bdd1243dSDimitry Andric       NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
452*bdd1243dSDimitry Andric                                           : AMDGPU::V_FMAMK_F16;
45381ad6265SDimitry Andric       break;
45481ad6265SDimitry Andric     }
45581ad6265SDimitry Andric   }
45681ad6265SDimitry Andric 
45781ad6265SDimitry Andric   if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
45881ad6265SDimitry Andric     return;
45981ad6265SDimitry Andric 
460*bdd1243dSDimitry Andric   if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))
461*bdd1243dSDimitry Andric     return;
462*bdd1243dSDimitry Andric 
46381ad6265SDimitry Andric   if (Swap) {
46481ad6265SDimitry Andric     // Swap Src0 and Src1 by building a new instruction.
46581ad6265SDimitry Andric     BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode),
46681ad6265SDimitry Andric             MI.getOperand(0).getReg())
46781ad6265SDimitry Andric         .add(Src1)
46881ad6265SDimitry Andric         .add(Src0)
46981ad6265SDimitry Andric         .add(Src2)
47081ad6265SDimitry Andric         .setMIFlags(MI.getFlags());
47181ad6265SDimitry Andric     MI.eraseFromParent();
47281ad6265SDimitry Andric   } else {
47381ad6265SDimitry Andric     TII->removeModOperands(MI);
47481ad6265SDimitry Andric     MI.setDesc(TII->get(NewOpcode));
4750b57cec5SDimitry Andric   }
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric /// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
4790b57cec5SDimitry Andric /// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
4800b57cec5SDimitry Andric /// If the inverse of the immediate is legal, use ANDN2, ORN2 or
4810b57cec5SDimitry Andric /// XNOR (as a ^ b == ~(a ^ ~b)).
4820b57cec5SDimitry Andric /// \returns true if the caller should continue the machine function iterator
48381ad6265SDimitry Andric bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
4840b57cec5SDimitry Andric   unsigned Opc = MI.getOpcode();
4850b57cec5SDimitry Andric   const MachineOperand *Dest = &MI.getOperand(0);
4860b57cec5SDimitry Andric   MachineOperand *Src0 = &MI.getOperand(1);
4870b57cec5SDimitry Andric   MachineOperand *Src1 = &MI.getOperand(2);
4880b57cec5SDimitry Andric   MachineOperand *SrcReg = Src0;
4890b57cec5SDimitry Andric   MachineOperand *SrcImm = Src1;
4900b57cec5SDimitry Andric 
4915ffd83dbSDimitry Andric   if (!SrcImm->isImm() ||
49281ad6265SDimitry Andric       AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm()))
4935ffd83dbSDimitry Andric     return false;
4945ffd83dbSDimitry Andric 
4950b57cec5SDimitry Andric   uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
4960b57cec5SDimitry Andric   uint32_t NewImm = 0;
4970b57cec5SDimitry Andric 
4980b57cec5SDimitry Andric   if (Opc == AMDGPU::S_AND_B32) {
4990b57cec5SDimitry Andric     if (isPowerOf2_32(~Imm)) {
5000b57cec5SDimitry Andric       NewImm = countTrailingOnes(Imm);
5010b57cec5SDimitry Andric       Opc = AMDGPU::S_BITSET0_B32;
50281ad6265SDimitry Andric     } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
5030b57cec5SDimitry Andric       NewImm = ~Imm;
5040b57cec5SDimitry Andric       Opc = AMDGPU::S_ANDN2_B32;
5050b57cec5SDimitry Andric     }
5060b57cec5SDimitry Andric   } else if (Opc == AMDGPU::S_OR_B32) {
5070b57cec5SDimitry Andric     if (isPowerOf2_32(Imm)) {
5080b57cec5SDimitry Andric       NewImm = countTrailingZeros(Imm);
5090b57cec5SDimitry Andric       Opc = AMDGPU::S_BITSET1_B32;
51081ad6265SDimitry Andric     } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
5110b57cec5SDimitry Andric       NewImm = ~Imm;
5120b57cec5SDimitry Andric       Opc = AMDGPU::S_ORN2_B32;
5130b57cec5SDimitry Andric     }
5140b57cec5SDimitry Andric   } else if (Opc == AMDGPU::S_XOR_B32) {
51581ad6265SDimitry Andric     if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
5160b57cec5SDimitry Andric       NewImm = ~Imm;
5170b57cec5SDimitry Andric       Opc = AMDGPU::S_XNOR_B32;
5180b57cec5SDimitry Andric     }
5190b57cec5SDimitry Andric   } else {
5200b57cec5SDimitry Andric     llvm_unreachable("unexpected opcode");
5210b57cec5SDimitry Andric   }
5220b57cec5SDimitry Andric 
5230b57cec5SDimitry Andric   if (NewImm != 0) {
524e8d8bef9SDimitry Andric     if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
52581ad6265SDimitry Andric       MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
52681ad6265SDimitry Andric       MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
5270b57cec5SDimitry Andric       return true;
5280b57cec5SDimitry Andric     }
5290b57cec5SDimitry Andric 
5300b57cec5SDimitry Andric     if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
531e8d8bef9SDimitry Andric       const bool IsUndef = SrcReg->isUndef();
532e8d8bef9SDimitry Andric       const bool IsKill = SrcReg->isKill();
5330b57cec5SDimitry Andric       MI.setDesc(TII->get(Opc));
5340b57cec5SDimitry Andric       if (Opc == AMDGPU::S_BITSET0_B32 ||
5350b57cec5SDimitry Andric           Opc == AMDGPU::S_BITSET1_B32) {
5360b57cec5SDimitry Andric         Src0->ChangeToImmediate(NewImm);
5370b57cec5SDimitry Andric         // Remove the immediate and add the tied input.
538e8d8bef9SDimitry Andric         MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false,
539e8d8bef9SDimitry Andric                                           /*isImp*/ false, IsKill,
540e8d8bef9SDimitry Andric                                           /*isDead*/ false, IsUndef);
5410b57cec5SDimitry Andric         MI.tieOperands(0, 2);
5420b57cec5SDimitry Andric       } else {
5430b57cec5SDimitry Andric         SrcImm->setImm(NewImm);
5440b57cec5SDimitry Andric       }
5450b57cec5SDimitry Andric     }
5460b57cec5SDimitry Andric   }
5470b57cec5SDimitry Andric 
5480b57cec5SDimitry Andric   return false;
5490b57cec5SDimitry Andric }
5500b57cec5SDimitry Andric 
5510b57cec5SDimitry Andric // This is the same as MachineInstr::readsRegister/modifiesRegister except
5520b57cec5SDimitry Andric // it takes subregs into account.
55381ad6265SDimitry Andric bool SIShrinkInstructions::instAccessReg(
55481ad6265SDimitry Andric     iterator_range<MachineInstr::const_mop_iterator> &&R, Register Reg,
55581ad6265SDimitry Andric     unsigned SubReg) const {
5560b57cec5SDimitry Andric   for (const MachineOperand &MO : R) {
5570b57cec5SDimitry Andric     if (!MO.isReg())
5580b57cec5SDimitry Andric       continue;
5590b57cec5SDimitry Andric 
560e8d8bef9SDimitry Andric     if (Reg.isPhysical() && MO.getReg().isPhysical()) {
56181ad6265SDimitry Andric       if (TRI->regsOverlap(Reg, MO.getReg()))
5620b57cec5SDimitry Andric         return true;
563e8d8bef9SDimitry Andric     } else if (MO.getReg() == Reg && Reg.isVirtual()) {
56481ad6265SDimitry Andric       LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) &
56581ad6265SDimitry Andric                             TRI->getSubRegIndexLaneMask(MO.getSubReg());
5660b57cec5SDimitry Andric       if (Overlap.any())
5670b57cec5SDimitry Andric         return true;
5680b57cec5SDimitry Andric     }
5690b57cec5SDimitry Andric   }
5700b57cec5SDimitry Andric   return false;
5710b57cec5SDimitry Andric }
5720b57cec5SDimitry Andric 
57381ad6265SDimitry Andric bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg,
57481ad6265SDimitry Andric                                         unsigned SubReg) const {
57581ad6265SDimitry Andric   return instAccessReg(MI->uses(), Reg, SubReg);
5760b57cec5SDimitry Andric }
5770b57cec5SDimitry Andric 
57881ad6265SDimitry Andric bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg,
57981ad6265SDimitry Andric                                            unsigned SubReg) const {
58081ad6265SDimitry Andric   return instAccessReg(MI->defs(), Reg, SubReg);
5810b57cec5SDimitry Andric }
5820b57cec5SDimitry Andric 
58381ad6265SDimitry Andric TargetInstrInfo::RegSubRegPair
58481ad6265SDimitry Andric SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,
58581ad6265SDimitry Andric                                         unsigned I) const {
58681ad6265SDimitry Andric   if (TRI->getRegSizeInBits(Reg, *MRI) != 32) {
587e8d8bef9SDimitry Andric     if (Reg.isPhysical()) {
58881ad6265SDimitry Andric       Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I));
5890b57cec5SDimitry Andric     } else {
59081ad6265SDimitry Andric       Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub));
5910b57cec5SDimitry Andric     }
5920b57cec5SDimitry Andric   }
5930b57cec5SDimitry Andric   return TargetInstrInfo::RegSubRegPair(Reg, Sub);
5940b57cec5SDimitry Andric }
5950b57cec5SDimitry Andric 
59681ad6265SDimitry Andric void SIShrinkInstructions::dropInstructionKeepingImpDefs(
59781ad6265SDimitry Andric     MachineInstr &MI) const {
598e8d8bef9SDimitry Andric   for (unsigned i = MI.getDesc().getNumOperands() +
599*bdd1243dSDimitry Andric                     MI.getDesc().implicit_uses().size() +
600*bdd1243dSDimitry Andric                     MI.getDesc().implicit_defs().size(),
601*bdd1243dSDimitry Andric                 e = MI.getNumOperands();
602e8d8bef9SDimitry Andric        i != e; ++i) {
603e8d8bef9SDimitry Andric     const MachineOperand &Op = MI.getOperand(i);
604e8d8bef9SDimitry Andric     if (!Op.isDef())
605e8d8bef9SDimitry Andric       continue;
606e8d8bef9SDimitry Andric     BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
607e8d8bef9SDimitry Andric             TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
608e8d8bef9SDimitry Andric   }
609e8d8bef9SDimitry Andric 
610e8d8bef9SDimitry Andric   MI.eraseFromParent();
611e8d8bef9SDimitry Andric }
612e8d8bef9SDimitry Andric 
6130b57cec5SDimitry Andric // Match:
6140b57cec5SDimitry Andric // mov t, x
6150b57cec5SDimitry Andric // mov x, y
6160b57cec5SDimitry Andric // mov y, t
6170b57cec5SDimitry Andric //
6180b57cec5SDimitry Andric // =>
6190b57cec5SDimitry Andric //
6200b57cec5SDimitry Andric // mov t, x (t is potentially dead and move eliminated)
6210b57cec5SDimitry Andric // v_swap_b32 x, y
6220b57cec5SDimitry Andric //
6230b57cec5SDimitry Andric // Returns next valid instruction pointer if was able to create v_swap_b32.
6240b57cec5SDimitry Andric //
6250b57cec5SDimitry Andric // This shall not be done too early not to prevent possible folding which may
62681ad6265SDimitry Andric // remove matched moves, and this should preferably be done before RA to
6270b57cec5SDimitry Andric // release saved registers and also possibly after RA which can insert copies
6280b57cec5SDimitry Andric // too.
6290b57cec5SDimitry Andric //
63081ad6265SDimitry Andric // This is really just a generic peephole that is not a canonical shrinking,
6310b57cec5SDimitry Andric // although requirements match the pass placement and it reduces code size too.
63281ad6265SDimitry Andric MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
6330b57cec5SDimitry Andric   assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
6340b57cec5SDimitry Andric          MovT.getOpcode() == AMDGPU::COPY);
6350b57cec5SDimitry Andric 
6368bcb0991SDimitry Andric   Register T = MovT.getOperand(0).getReg();
6370b57cec5SDimitry Andric   unsigned Tsub = MovT.getOperand(0).getSubReg();
6380b57cec5SDimitry Andric   MachineOperand &Xop = MovT.getOperand(1);
6390b57cec5SDimitry Andric 
6400b57cec5SDimitry Andric   if (!Xop.isReg())
6410b57cec5SDimitry Andric     return nullptr;
6428bcb0991SDimitry Andric   Register X = Xop.getReg();
6430b57cec5SDimitry Andric   unsigned Xsub = Xop.getSubReg();
6440b57cec5SDimitry Andric 
6450b57cec5SDimitry Andric   unsigned Size = TII->getOpSize(MovT, 0) / 4;
6460b57cec5SDimitry Andric 
64781ad6265SDimitry Andric   if (!TRI->isVGPR(*MRI, X))
6480b57cec5SDimitry Andric     return nullptr;
6490b57cec5SDimitry Andric 
6505ffd83dbSDimitry Andric   const unsigned SearchLimit = 16;
6515ffd83dbSDimitry Andric   unsigned Count = 0;
652e8d8bef9SDimitry Andric   bool KilledT = false;
6535ffd83dbSDimitry Andric   for (auto Iter = std::next(MovT.getIterator()),
6545ffd83dbSDimitry Andric             E = MovT.getParent()->instr_end();
655e8d8bef9SDimitry Andric        Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
6565ffd83dbSDimitry Andric 
6575ffd83dbSDimitry Andric     MachineInstr *MovY = &*Iter;
65881ad6265SDimitry Andric     KilledT = MovY->killsRegister(T, TRI);
659e8d8bef9SDimitry Andric 
6605ffd83dbSDimitry Andric     if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
6615ffd83dbSDimitry Andric          MovY->getOpcode() != AMDGPU::COPY) ||
6625ffd83dbSDimitry Andric         !MovY->getOperand(1).isReg()        ||
6635ffd83dbSDimitry Andric         MovY->getOperand(1).getReg() != T   ||
664*bdd1243dSDimitry Andric         MovY->getOperand(1).getSubReg() != Tsub)
6650b57cec5SDimitry Andric       continue;
6660b57cec5SDimitry Andric 
6675ffd83dbSDimitry Andric     Register Y = MovY->getOperand(0).getReg();
6685ffd83dbSDimitry Andric     unsigned Ysub = MovY->getOperand(0).getSubReg();
6690b57cec5SDimitry Andric 
67081ad6265SDimitry Andric     if (!TRI->isVGPR(*MRI, Y))
6710b57cec5SDimitry Andric       continue;
6720b57cec5SDimitry Andric 
6730b57cec5SDimitry Andric     MachineInstr *MovX = nullptr;
6745ffd83dbSDimitry Andric     for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
6755ffd83dbSDimitry Andric          I != IY; ++I) {
67681ad6265SDimitry Andric       if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) ||
67781ad6265SDimitry Andric           instModifiesReg(&*I, T, Tsub) ||
67881ad6265SDimitry Andric           (MovX && instModifiesReg(&*I, X, Xsub))) {
6790b57cec5SDimitry Andric         MovX = nullptr;
6800b57cec5SDimitry Andric         break;
6810b57cec5SDimitry Andric       }
68281ad6265SDimitry Andric       if (!instReadsReg(&*I, Y, Ysub)) {
68381ad6265SDimitry Andric         if (!MovX && instModifiesReg(&*I, X, Xsub)) {
6840b57cec5SDimitry Andric           MovX = nullptr;
6850b57cec5SDimitry Andric           break;
6860b57cec5SDimitry Andric         }
6870b57cec5SDimitry Andric         continue;
6880b57cec5SDimitry Andric       }
6890b57cec5SDimitry Andric       if (MovX ||
6900b57cec5SDimitry Andric           (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
6910b57cec5SDimitry Andric            I->getOpcode() != AMDGPU::COPY) ||
6920b57cec5SDimitry Andric           I->getOperand(0).getReg() != X ||
6930b57cec5SDimitry Andric           I->getOperand(0).getSubReg() != Xsub) {
6940b57cec5SDimitry Andric         MovX = nullptr;
6950b57cec5SDimitry Andric         break;
6960b57cec5SDimitry Andric       }
697e8d8bef9SDimitry Andric 
698e8d8bef9SDimitry Andric       if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
699e8d8bef9SDimitry Andric         continue;
700e8d8bef9SDimitry Andric 
7010b57cec5SDimitry Andric       MovX = &*I;
7020b57cec5SDimitry Andric     }
7030b57cec5SDimitry Andric 
7045ffd83dbSDimitry Andric     if (!MovX)
7050b57cec5SDimitry Andric       continue;
7060b57cec5SDimitry Andric 
707e8d8bef9SDimitry Andric     LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY);
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric     for (unsigned I = 0; I < Size; ++I) {
7100b57cec5SDimitry Andric       TargetInstrInfo::RegSubRegPair X1, Y1;
71181ad6265SDimitry Andric       X1 = getSubRegForIndex(X, Xsub, I);
71281ad6265SDimitry Andric       Y1 = getSubRegForIndex(Y, Ysub, I);
713e8d8bef9SDimitry Andric       MachineBasicBlock &MBB = *MovT.getParent();
714e8d8bef9SDimitry Andric       auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
7150b57cec5SDimitry Andric                          TII->get(AMDGPU::V_SWAP_B32))
7160b57cec5SDimitry Andric         .addDef(X1.Reg, 0, X1.SubReg)
7170b57cec5SDimitry Andric         .addDef(Y1.Reg, 0, Y1.SubReg)
7180b57cec5SDimitry Andric         .addReg(Y1.Reg, 0, Y1.SubReg)
7190b57cec5SDimitry Andric         .addReg(X1.Reg, 0, X1.SubReg).getInstr();
720e8d8bef9SDimitry Andric       if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
721e8d8bef9SDimitry Andric         // Drop implicit EXEC.
72281ad6265SDimitry Andric         MIB->removeOperand(MIB->getNumExplicitOperands());
723e8d8bef9SDimitry Andric         MIB->copyImplicitOps(*MBB.getParent(), *MovX);
724e8d8bef9SDimitry Andric       }
7250b57cec5SDimitry Andric     }
7260b57cec5SDimitry Andric     MovX->eraseFromParent();
72781ad6265SDimitry Andric     dropInstructionKeepingImpDefs(*MovY);
7280b57cec5SDimitry Andric     MachineInstr *Next = &*std::next(MovT.getIterator());
729e8d8bef9SDimitry Andric 
73081ad6265SDimitry Andric     if (T.isVirtual() && MRI->use_nodbg_empty(T)) {
73181ad6265SDimitry Andric       dropInstructionKeepingImpDefs(MovT);
732e8d8bef9SDimitry Andric     } else {
7330b57cec5SDimitry Andric       Xop.setIsKill(false);
734e8d8bef9SDimitry Andric       for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
735e8d8bef9SDimitry Andric         unsigned OpNo = MovT.getNumExplicitOperands() + I;
736e8d8bef9SDimitry Andric         const MachineOperand &Op = MovT.getOperand(OpNo);
73781ad6265SDimitry Andric         if (Op.isKill() && TRI->regsOverlap(X, Op.getReg()))
73881ad6265SDimitry Andric           MovT.removeOperand(OpNo);
739e8d8bef9SDimitry Andric       }
740e8d8bef9SDimitry Andric     }
7410b57cec5SDimitry Andric 
7420b57cec5SDimitry Andric     return Next;
7430b57cec5SDimitry Andric   }
7440b57cec5SDimitry Andric 
7450b57cec5SDimitry Andric   return nullptr;
7460b57cec5SDimitry Andric }
7470b57cec5SDimitry Andric 
74881ad6265SDimitry Andric // If an instruction has dead sdst replace it with NULL register on gfx1030+
74981ad6265SDimitry Andric bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
75081ad6265SDimitry Andric   if (!ST->hasGFX10_3Insts())
75181ad6265SDimitry Andric     return false;
75281ad6265SDimitry Andric 
75381ad6265SDimitry Andric   MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
75481ad6265SDimitry Andric   if (!Op)
75581ad6265SDimitry Andric     return false;
75681ad6265SDimitry Andric   Register SDstReg = Op->getReg();
75781ad6265SDimitry Andric   if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg))
75881ad6265SDimitry Andric     return false;
75981ad6265SDimitry Andric 
76081ad6265SDimitry Andric   Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
76181ad6265SDimitry Andric   return true;
76281ad6265SDimitry Andric }
76381ad6265SDimitry Andric 
7640b57cec5SDimitry Andric bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
7650b57cec5SDimitry Andric   if (skipFunction(MF.getFunction()))
7660b57cec5SDimitry Andric     return false;
7670b57cec5SDimitry Andric 
768*bdd1243dSDimitry Andric   this->MF = &MF;
76981ad6265SDimitry Andric   MRI = &MF.getRegInfo();
77081ad6265SDimitry Andric   ST = &MF.getSubtarget<GCNSubtarget>();
77181ad6265SDimitry Andric   TII = ST->getInstrInfo();
77281ad6265SDimitry Andric   TRI = &TII->getRegisterInfo();
77381ad6265SDimitry Andric 
77481ad6265SDimitry Andric   unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
7750b57cec5SDimitry Andric 
7760b57cec5SDimitry Andric   std::vector<unsigned> I1Defs;
7770b57cec5SDimitry Andric 
7780b57cec5SDimitry Andric   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
7790b57cec5SDimitry Andric                                                   BI != BE; ++BI) {
7800b57cec5SDimitry Andric 
7810b57cec5SDimitry Andric     MachineBasicBlock &MBB = *BI;
7820b57cec5SDimitry Andric     MachineBasicBlock::iterator I, Next;
7830b57cec5SDimitry Andric     for (I = MBB.begin(); I != MBB.end(); I = Next) {
7840b57cec5SDimitry Andric       Next = std::next(I);
7850b57cec5SDimitry Andric       MachineInstr &MI = *I;
7860b57cec5SDimitry Andric 
7870b57cec5SDimitry Andric       if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
7880b57cec5SDimitry Andric         // If this has a literal constant source that is the same as the
7890b57cec5SDimitry Andric         // reversed bits of an inline immediate, replace with a bitreverse of
7900b57cec5SDimitry Andric         // that constant. This saves 4 bytes in the common case of materializing
7910b57cec5SDimitry Andric         // sign bits.
7920b57cec5SDimitry Andric 
7930b57cec5SDimitry Andric         // Test if we are after regalloc. We only want to do this after any
7940b57cec5SDimitry Andric         // optimizations happen because this will confuse them.
7950b57cec5SDimitry Andric         // XXX - not exactly a check for post-regalloc run.
7960b57cec5SDimitry Andric         MachineOperand &Src = MI.getOperand(1);
797e8d8bef9SDimitry Andric         if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) {
7980b57cec5SDimitry Andric           int32_t ReverseImm;
79981ad6265SDimitry Andric           if (isReverseInlineImm(Src, ReverseImm)) {
8000b57cec5SDimitry Andric             MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
8010b57cec5SDimitry Andric             Src.setImm(ReverseImm);
8020b57cec5SDimitry Andric             continue;
8030b57cec5SDimitry Andric           }
8040b57cec5SDimitry Andric         }
8050b57cec5SDimitry Andric       }
8060b57cec5SDimitry Andric 
80781ad6265SDimitry Andric       if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
8080b57cec5SDimitry Andric                             MI.getOpcode() == AMDGPU::COPY)) {
80981ad6265SDimitry Andric         if (auto *NextMI = matchSwap(MI)) {
8100b57cec5SDimitry Andric           Next = NextMI->getIterator();
8110b57cec5SDimitry Andric           continue;
8120b57cec5SDimitry Andric         }
8130b57cec5SDimitry Andric       }
8140b57cec5SDimitry Andric 
81581ad6265SDimitry Andric       // Try to use S_ADDK_I32 and S_MULK_I32.
8160b57cec5SDimitry Andric       if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
8170b57cec5SDimitry Andric           MI.getOpcode() == AMDGPU::S_MUL_I32) {
8180b57cec5SDimitry Andric         const MachineOperand *Dest = &MI.getOperand(0);
8190b57cec5SDimitry Andric         MachineOperand *Src0 = &MI.getOperand(1);
8200b57cec5SDimitry Andric         MachineOperand *Src1 = &MI.getOperand(2);
8210b57cec5SDimitry Andric 
8220b57cec5SDimitry Andric         if (!Src0->isReg() && Src1->isReg()) {
8230b57cec5SDimitry Andric           if (TII->commuteInstruction(MI, false, 1, 2))
8240b57cec5SDimitry Andric             std::swap(Src0, Src1);
8250b57cec5SDimitry Andric         }
8260b57cec5SDimitry Andric 
8270b57cec5SDimitry Andric         // FIXME: This could work better if hints worked with subregisters. If
8280b57cec5SDimitry Andric         // we have a vector add of a constant, we usually don't get the correct
8290b57cec5SDimitry Andric         // allocation due to the subregister usage.
830e8d8bef9SDimitry Andric         if (Dest->getReg().isVirtual() && Src0->isReg()) {
83181ad6265SDimitry Andric           MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
83281ad6265SDimitry Andric           MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
8330b57cec5SDimitry Andric           continue;
8340b57cec5SDimitry Andric         }
8350b57cec5SDimitry Andric 
8360b57cec5SDimitry Andric         if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
83781ad6265SDimitry Andric           if (Src1->isImm() && isKImmOperand(*Src1)) {
8380b57cec5SDimitry Andric             unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
8390b57cec5SDimitry Andric               AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
8400b57cec5SDimitry Andric 
8410b57cec5SDimitry Andric             MI.setDesc(TII->get(Opc));
8420b57cec5SDimitry Andric             MI.tieOperands(0, 1);
8430b57cec5SDimitry Andric           }
8440b57cec5SDimitry Andric         }
8450b57cec5SDimitry Andric       }
8460b57cec5SDimitry Andric 
8470b57cec5SDimitry Andric       // Try to use s_cmpk_*
8480b57cec5SDimitry Andric       if (MI.isCompare() && TII->isSOPC(MI)) {
84981ad6265SDimitry Andric         shrinkScalarCompare(MI);
8500b57cec5SDimitry Andric         continue;
8510b57cec5SDimitry Andric       }
8520b57cec5SDimitry Andric 
8530b57cec5SDimitry Andric       // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
8540b57cec5SDimitry Andric       if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
8550b57cec5SDimitry Andric         const MachineOperand &Dst = MI.getOperand(0);
8560b57cec5SDimitry Andric         MachineOperand &Src = MI.getOperand(1);
8570b57cec5SDimitry Andric 
858e8d8bef9SDimitry Andric         if (Src.isImm() && Dst.getReg().isPhysical()) {
8590b57cec5SDimitry Andric           int32_t ReverseImm;
86081ad6265SDimitry Andric           if (isKImmOperand(Src))
8610b57cec5SDimitry Andric             MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
86281ad6265SDimitry Andric           else if (isReverseInlineImm(Src, ReverseImm)) {
8630b57cec5SDimitry Andric             MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
8640b57cec5SDimitry Andric             Src.setImm(ReverseImm);
8650b57cec5SDimitry Andric           }
8660b57cec5SDimitry Andric         }
8670b57cec5SDimitry Andric 
8680b57cec5SDimitry Andric         continue;
8690b57cec5SDimitry Andric       }
8700b57cec5SDimitry Andric 
8710b57cec5SDimitry Andric       // Shrink scalar logic operations.
8720b57cec5SDimitry Andric       if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
8730b57cec5SDimitry Andric           MI.getOpcode() == AMDGPU::S_OR_B32 ||
8740b57cec5SDimitry Andric           MI.getOpcode() == AMDGPU::S_XOR_B32) {
87581ad6265SDimitry Andric         if (shrinkScalarLogicOp(MI))
8760b57cec5SDimitry Andric           continue;
8770b57cec5SDimitry Andric       }
8780b57cec5SDimitry Andric 
8790b57cec5SDimitry Andric       if (TII->isMIMG(MI.getOpcode()) &&
88081ad6265SDimitry Andric           ST->getGeneration() >= AMDGPUSubtarget::GFX10 &&
8810b57cec5SDimitry Andric           MF.getProperties().hasProperty(
8820b57cec5SDimitry Andric               MachineFunctionProperties::Property::NoVRegs)) {
8830b57cec5SDimitry Andric         shrinkMIMG(MI);
8840b57cec5SDimitry Andric         continue;
8850b57cec5SDimitry Andric       }
8860b57cec5SDimitry Andric 
88781ad6265SDimitry Andric       if (!TII->isVOP3(MI))
8880b57cec5SDimitry Andric         continue;
8890b57cec5SDimitry Andric 
89081ad6265SDimitry Andric       if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
89181ad6265SDimitry Andric           MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
89281ad6265SDimitry Andric           MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
893*bdd1243dSDimitry Andric           MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
894*bdd1243dSDimitry Andric           MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
89581ad6265SDimitry Andric         shrinkMadFma(MI);
89681ad6265SDimitry Andric         continue;
89781ad6265SDimitry Andric       }
89881ad6265SDimitry Andric 
89981ad6265SDimitry Andric       if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
90081ad6265SDimitry Andric         // If there is no chance we will shrink it and use VCC as sdst to get
90181ad6265SDimitry Andric         // a 32 bit form try to replace dead sdst with NULL.
90281ad6265SDimitry Andric         tryReplaceDeadSDST(MI);
90381ad6265SDimitry Andric         continue;
90481ad6265SDimitry Andric       }
90581ad6265SDimitry Andric 
90681ad6265SDimitry Andric       if (!TII->canShrink(MI, *MRI)) {
9070b57cec5SDimitry Andric         // Try commuting the instruction and see if that enables us to shrink
9080b57cec5SDimitry Andric         // it.
9090b57cec5SDimitry Andric         if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
91081ad6265SDimitry Andric             !TII->canShrink(MI, *MRI)) {
91181ad6265SDimitry Andric           tryReplaceDeadSDST(MI);
9120b57cec5SDimitry Andric           continue;
9130b57cec5SDimitry Andric         }
91481ad6265SDimitry Andric       }
9150b57cec5SDimitry Andric 
9160b57cec5SDimitry Andric       int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
9170b57cec5SDimitry Andric 
9180b57cec5SDimitry Andric       if (TII->isVOPC(Op32)) {
91981ad6265SDimitry Andric         MachineOperand &Op0 = MI.getOperand(0);
92081ad6265SDimitry Andric         if (Op0.isReg()) {
92181ad6265SDimitry Andric           // Exclude VOPCX instructions as these don't explicitly write a
92281ad6265SDimitry Andric           // dst.
92381ad6265SDimitry Andric           Register DstReg = Op0.getReg();
924e8d8bef9SDimitry Andric           if (DstReg.isVirtual()) {
9250b57cec5SDimitry Andric             // VOPC instructions can only write to the VCC register. We can't
9260b57cec5SDimitry Andric             // force them to use VCC here, because this is only one register and
9270b57cec5SDimitry Andric             // cannot deal with sequences which would require multiple copies of
9280b57cec5SDimitry Andric             // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
9290b57cec5SDimitry Andric             //
93081ad6265SDimitry Andric             // So, instead of forcing the instruction to write to VCC, we
93181ad6265SDimitry Andric             // provide a hint to the register allocator to use VCC and then we
93281ad6265SDimitry Andric             // will run this pass again after RA and shrink it if it outputs to
93381ad6265SDimitry Andric             // VCC.
93481ad6265SDimitry Andric             MRI->setRegAllocationHint(DstReg, 0, VCCReg);
9350b57cec5SDimitry Andric             continue;
9360b57cec5SDimitry Andric           }
9370b57cec5SDimitry Andric           if (DstReg != VCCReg)
9380b57cec5SDimitry Andric             continue;
9390b57cec5SDimitry Andric         }
94081ad6265SDimitry Andric       }
9410b57cec5SDimitry Andric 
9420b57cec5SDimitry Andric       if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
9430b57cec5SDimitry Andric         // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
9440b57cec5SDimitry Andric         // instructions.
9450b57cec5SDimitry Andric         const MachineOperand *Src2 =
9460b57cec5SDimitry Andric             TII->getNamedOperand(MI, AMDGPU::OpName::src2);
9470b57cec5SDimitry Andric         if (!Src2->isReg())
9480b57cec5SDimitry Andric           continue;
9498bcb0991SDimitry Andric         Register SReg = Src2->getReg();
950e8d8bef9SDimitry Andric         if (SReg.isVirtual()) {
95181ad6265SDimitry Andric           MRI->setRegAllocationHint(SReg, 0, VCCReg);
9520b57cec5SDimitry Andric           continue;
9530b57cec5SDimitry Andric         }
9540b57cec5SDimitry Andric         if (SReg != VCCReg)
9550b57cec5SDimitry Andric           continue;
9560b57cec5SDimitry Andric       }
9570b57cec5SDimitry Andric 
9580b57cec5SDimitry Andric       // Check for the bool flag output for instructions like V_ADD_I32_e64.
9590b57cec5SDimitry Andric       const MachineOperand *SDst = TII->getNamedOperand(MI,
9600b57cec5SDimitry Andric                                                         AMDGPU::OpName::sdst);
9610b57cec5SDimitry Andric 
9620b57cec5SDimitry Andric       if (SDst) {
9630b57cec5SDimitry Andric         bool Next = false;
9640b57cec5SDimitry Andric 
9650b57cec5SDimitry Andric         if (SDst->getReg() != VCCReg) {
966e8d8bef9SDimitry Andric           if (SDst->getReg().isVirtual())
96781ad6265SDimitry Andric             MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg);
9680b57cec5SDimitry Andric           Next = true;
9690b57cec5SDimitry Andric         }
9700b57cec5SDimitry Andric 
9710b57cec5SDimitry Andric         // All of the instructions with carry outs also have an SGPR input in
9720b57cec5SDimitry Andric         // src2.
9730eae32dcSDimitry Andric         const MachineOperand *Src2 = TII->getNamedOperand(MI,
9740eae32dcSDimitry Andric                                                           AMDGPU::OpName::src2);
9750b57cec5SDimitry Andric         if (Src2 && Src2->getReg() != VCCReg) {
976e8d8bef9SDimitry Andric           if (Src2->getReg().isVirtual())
97781ad6265SDimitry Andric             MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg);
9780b57cec5SDimitry Andric           Next = true;
9790b57cec5SDimitry Andric         }
9800b57cec5SDimitry Andric 
9810b57cec5SDimitry Andric         if (Next)
9820b57cec5SDimitry Andric           continue;
9830b57cec5SDimitry Andric       }
9840b57cec5SDimitry Andric 
985*bdd1243dSDimitry Andric       // Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to
986*bdd1243dSDimitry Andric       // fold an immediate into the shrunk instruction as a literal operand. In
987*bdd1243dSDimitry Andric       // GFX10 VOP3 instructions can take a literal operand anyway, so there is
988*bdd1243dSDimitry Andric       // no advantage to doing this.
989*bdd1243dSDimitry Andric       if (ST->hasVOP3Literal() &&
990*bdd1243dSDimitry Andric           !MF.getProperties().hasProperty(
991*bdd1243dSDimitry Andric               MachineFunctionProperties::Property::NoVRegs))
992*bdd1243dSDimitry Andric         continue;
993*bdd1243dSDimitry Andric 
994*bdd1243dSDimitry Andric       if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
995*bdd1243dSDimitry Andric           !shouldShrinkTrue16(MI))
996*bdd1243dSDimitry Andric         continue;
997*bdd1243dSDimitry Andric 
9980b57cec5SDimitry Andric       // We can shrink this instruction
9990b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Shrinking " << MI);
10000b57cec5SDimitry Andric 
10010b57cec5SDimitry Andric       MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
10020b57cec5SDimitry Andric       ++NumInstructionsShrunk;
10030b57cec5SDimitry Andric 
10040b57cec5SDimitry Andric       // Copy extra operands not present in the instruction definition.
100581ad6265SDimitry Andric       copyExtraImplicitOps(*Inst32, MI);
10060b57cec5SDimitry Andric 
1007349cc55cSDimitry Andric       // Copy deadness from the old explicit vcc def to the new implicit def.
1008349cc55cSDimitry Andric       if (SDst && SDst->isDead())
1009349cc55cSDimitry Andric         Inst32->findRegisterDefOperand(VCCReg)->setIsDead();
1010349cc55cSDimitry Andric 
10110b57cec5SDimitry Andric       MI.eraseFromParent();
101281ad6265SDimitry Andric       foldImmediates(*Inst32);
10130b57cec5SDimitry Andric 
10140b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
10150b57cec5SDimitry Andric     }
10160b57cec5SDimitry Andric   }
10170b57cec5SDimitry Andric   return false;
10180b57cec5SDimitry Andric }
1019