xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
15ffd83dbSDimitry Andric //===-- SIPreEmitPeephole.cpp ------------------------------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric /// \file
105ffd83dbSDimitry Andric /// This pass performs the peephole optimizations before code emission.
115ffd83dbSDimitry Andric ///
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric 
145ffd83dbSDimitry Andric #include "AMDGPU.h"
15e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
165ffd83dbSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
175ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
185ffd83dbSDimitry Andric 
195ffd83dbSDimitry Andric using namespace llvm;
205ffd83dbSDimitry Andric 
215ffd83dbSDimitry Andric #define DEBUG_TYPE "si-pre-emit-peephole"
225ffd83dbSDimitry Andric 
23fe6060f1SDimitry Andric static unsigned SkipThreshold;
24fe6060f1SDimitry Andric 
25fe6060f1SDimitry Andric static cl::opt<unsigned, true> SkipThresholdFlag(
26fe6060f1SDimitry Andric     "amdgpu-skip-threshold", cl::Hidden,
27fe6060f1SDimitry Andric     cl::desc(
28fe6060f1SDimitry Andric         "Number of instructions before jumping over divergent control flow"),
29fe6060f1SDimitry Andric     cl::location(SkipThreshold), cl::init(12));
30fe6060f1SDimitry Andric 
315ffd83dbSDimitry Andric namespace {
325ffd83dbSDimitry Andric 
335ffd83dbSDimitry Andric class SIPreEmitPeephole : public MachineFunctionPass {
345ffd83dbSDimitry Andric private:
355ffd83dbSDimitry Andric   const SIInstrInfo *TII = nullptr;
365ffd83dbSDimitry Andric   const SIRegisterInfo *TRI = nullptr;
375ffd83dbSDimitry Andric 
385ffd83dbSDimitry Andric   bool optimizeVccBranch(MachineInstr &MI) const;
395ffd83dbSDimitry Andric   bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const;
40fe6060f1SDimitry Andric   bool getBlockDestinations(MachineBasicBlock &SrcMBB,
41fe6060f1SDimitry Andric                             MachineBasicBlock *&TrueMBB,
42fe6060f1SDimitry Andric                             MachineBasicBlock *&FalseMBB,
43fe6060f1SDimitry Andric                             SmallVectorImpl<MachineOperand> &Cond);
44fe6060f1SDimitry Andric   bool mustRetainExeczBranch(const MachineBasicBlock &From,
45fe6060f1SDimitry Andric                              const MachineBasicBlock &To) const;
46fe6060f1SDimitry Andric   bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
475ffd83dbSDimitry Andric 
485ffd83dbSDimitry Andric public:
495ffd83dbSDimitry Andric   static char ID;
505ffd83dbSDimitry Andric 
SIPreEmitPeephole()515ffd83dbSDimitry Andric   SIPreEmitPeephole() : MachineFunctionPass(ID) {
525ffd83dbSDimitry Andric     initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
535ffd83dbSDimitry Andric   }
545ffd83dbSDimitry Andric 
555ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
565ffd83dbSDimitry Andric };
575ffd83dbSDimitry Andric 
585ffd83dbSDimitry Andric } // End anonymous namespace.
595ffd83dbSDimitry Andric 
605ffd83dbSDimitry Andric INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE,
615ffd83dbSDimitry Andric                 "SI peephole optimizations", false, false)
625ffd83dbSDimitry Andric 
635ffd83dbSDimitry Andric char SIPreEmitPeephole::ID = 0;
645ffd83dbSDimitry Andric 
655ffd83dbSDimitry Andric char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID;
665ffd83dbSDimitry Andric 
optimizeVccBranch(MachineInstr & MI) const675ffd83dbSDimitry Andric bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
685ffd83dbSDimitry Andric   // Match:
695ffd83dbSDimitry Andric   // sreg = -1 or 0
705ffd83dbSDimitry Andric   // vcc = S_AND_B64 exec, sreg or S_ANDN2_B64 exec, sreg
715ffd83dbSDimitry Andric   // S_CBRANCH_VCC[N]Z
725ffd83dbSDimitry Andric   // =>
735ffd83dbSDimitry Andric   // S_CBRANCH_EXEC[N]Z
745ffd83dbSDimitry Andric   // We end up with this pattern sometimes after basic block placement.
755ffd83dbSDimitry Andric   // It happens while combining a block which assigns -1 or 0 to a saved mask
765ffd83dbSDimitry Andric   // and another block which consumes that saved mask and then a branch.
7781ad6265SDimitry Andric   //
7881ad6265SDimitry Andric   // While searching this also performs the following substitution:
7981ad6265SDimitry Andric   // vcc = V_CMP
8081ad6265SDimitry Andric   // vcc = S_AND exec, vcc
8181ad6265SDimitry Andric   // S_CBRANCH_VCC[N]Z
8281ad6265SDimitry Andric   // =>
8381ad6265SDimitry Andric   // vcc = V_CMP
8481ad6265SDimitry Andric   // S_CBRANCH_VCC[N]Z
8581ad6265SDimitry Andric 
865ffd83dbSDimitry Andric   bool Changed = false;
875ffd83dbSDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
885ffd83dbSDimitry Andric   const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
895ffd83dbSDimitry Andric   const bool IsWave32 = ST.isWave32();
905ffd83dbSDimitry Andric   const unsigned CondReg = TRI->getVCC();
915ffd83dbSDimitry Andric   const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
925ffd83dbSDimitry Andric   const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
935ffd83dbSDimitry Andric   const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
94e8d8bef9SDimitry Andric   const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
955ffd83dbSDimitry Andric 
965ffd83dbSDimitry Andric   MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
975ffd83dbSDimitry Andric                                       E = MBB.rend();
985ffd83dbSDimitry Andric   bool ReadsCond = false;
995ffd83dbSDimitry Andric   unsigned Threshold = 5;
1005ffd83dbSDimitry Andric   for (++A; A != E; ++A) {
1015ffd83dbSDimitry Andric     if (!--Threshold)
1025ffd83dbSDimitry Andric       return false;
1035ffd83dbSDimitry Andric     if (A->modifiesRegister(ExecReg, TRI))
1045ffd83dbSDimitry Andric       return false;
1055ffd83dbSDimitry Andric     if (A->modifiesRegister(CondReg, TRI)) {
1065ffd83dbSDimitry Andric       if (!A->definesRegister(CondReg, TRI) ||
1075ffd83dbSDimitry Andric           (A->getOpcode() != And && A->getOpcode() != AndN2))
1085ffd83dbSDimitry Andric         return false;
1095ffd83dbSDimitry Andric       break;
1105ffd83dbSDimitry Andric     }
1115ffd83dbSDimitry Andric     ReadsCond |= A->readsRegister(CondReg, TRI);
1125ffd83dbSDimitry Andric   }
1135ffd83dbSDimitry Andric   if (A == E)
1145ffd83dbSDimitry Andric     return false;
1155ffd83dbSDimitry Andric 
1165ffd83dbSDimitry Andric   MachineOperand &Op1 = A->getOperand(1);
1175ffd83dbSDimitry Andric   MachineOperand &Op2 = A->getOperand(2);
1185ffd83dbSDimitry Andric   if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
1195ffd83dbSDimitry Andric     TII->commuteInstruction(*A);
1205ffd83dbSDimitry Andric     Changed = true;
1215ffd83dbSDimitry Andric   }
1225ffd83dbSDimitry Andric   if (Op1.getReg() != ExecReg)
1235ffd83dbSDimitry Andric     return Changed;
1245ffd83dbSDimitry Andric   if (Op2.isImm() && !(Op2.getImm() == -1 || Op2.getImm() == 0))
1255ffd83dbSDimitry Andric     return Changed;
1265ffd83dbSDimitry Andric 
1275ffd83dbSDimitry Andric   int64_t MaskValue = 0;
1285ffd83dbSDimitry Andric   Register SReg;
1295ffd83dbSDimitry Andric   if (Op2.isReg()) {
1305ffd83dbSDimitry Andric     SReg = Op2.getReg();
1315ffd83dbSDimitry Andric     auto M = std::next(A);
1325ffd83dbSDimitry Andric     bool ReadsSreg = false;
13381ad6265SDimitry Andric     bool ModifiesExec = false;
1345ffd83dbSDimitry Andric     for (; M != E; ++M) {
1355ffd83dbSDimitry Andric       if (M->definesRegister(SReg, TRI))
1365ffd83dbSDimitry Andric         break;
1375ffd83dbSDimitry Andric       if (M->modifiesRegister(SReg, TRI))
1385ffd83dbSDimitry Andric         return Changed;
1395ffd83dbSDimitry Andric       ReadsSreg |= M->readsRegister(SReg, TRI);
14081ad6265SDimitry Andric       ModifiesExec |= M->modifiesRegister(ExecReg, TRI);
1415ffd83dbSDimitry Andric     }
14281ad6265SDimitry Andric     if (M == E)
14381ad6265SDimitry Andric       return Changed;
14481ad6265SDimitry Andric     // If SReg is VCC and SReg definition is a VALU comparison.
14581ad6265SDimitry Andric     // This means S_AND with EXEC is not required.
14681ad6265SDimitry Andric     // Erase the S_AND and return.
14781ad6265SDimitry Andric     // Note: isVOPC is used instead of isCompare to catch V_CMP_CLASS
14881ad6265SDimitry Andric     if (A->getOpcode() == And && SReg == CondReg && !ModifiesExec &&
14981ad6265SDimitry Andric         TII->isVOPC(*M)) {
15081ad6265SDimitry Andric       A->eraseFromParent();
15181ad6265SDimitry Andric       return true;
15281ad6265SDimitry Andric     }
15381ad6265SDimitry Andric     if (!M->isMoveImmediate() || !M->getOperand(1).isImm() ||
1545ffd83dbSDimitry Andric         (M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0))
1555ffd83dbSDimitry Andric       return Changed;
1565ffd83dbSDimitry Andric     MaskValue = M->getOperand(1).getImm();
1575ffd83dbSDimitry Andric     // First if sreg is only used in the AND instruction fold the immediate
15881ad6265SDimitry Andric     // into the AND.
1595ffd83dbSDimitry Andric     if (!ReadsSreg && Op2.isKill()) {
1605ffd83dbSDimitry Andric       A->getOperand(2).ChangeToImmediate(MaskValue);
1615ffd83dbSDimitry Andric       M->eraseFromParent();
1625ffd83dbSDimitry Andric     }
1635ffd83dbSDimitry Andric   } else if (Op2.isImm()) {
1645ffd83dbSDimitry Andric     MaskValue = Op2.getImm();
1655ffd83dbSDimitry Andric   } else {
1665ffd83dbSDimitry Andric     llvm_unreachable("Op2 must be register or immediate");
1675ffd83dbSDimitry Andric   }
1685ffd83dbSDimitry Andric 
1695ffd83dbSDimitry Andric   // Invert mask for s_andn2
1705ffd83dbSDimitry Andric   assert(MaskValue == 0 || MaskValue == -1);
1715ffd83dbSDimitry Andric   if (A->getOpcode() == AndN2)
1725ffd83dbSDimitry Andric     MaskValue = ~MaskValue;
1735ffd83dbSDimitry Andric 
174*0fca6ea1SDimitry Andric   if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
175e8d8bef9SDimitry Andric     if (!MI.killsRegister(CondReg, TRI)) {
176e8d8bef9SDimitry Andric       // Replace AND with MOV
177e8d8bef9SDimitry Andric       if (MaskValue == 0) {
178e8d8bef9SDimitry Andric         BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
179e8d8bef9SDimitry Andric             .addImm(0);
180e8d8bef9SDimitry Andric       } else {
181e8d8bef9SDimitry Andric         BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
182e8d8bef9SDimitry Andric             .addReg(ExecReg);
183e8d8bef9SDimitry Andric       }
184e8d8bef9SDimitry Andric     }
185e8d8bef9SDimitry Andric     // Remove AND instruction
1865ffd83dbSDimitry Andric     A->eraseFromParent();
187e8d8bef9SDimitry Andric   }
1885ffd83dbSDimitry Andric 
1895ffd83dbSDimitry Andric   bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
1905ffd83dbSDimitry Andric   if (SReg == ExecReg) {
1915ffd83dbSDimitry Andric     // EXEC is updated directly
1925ffd83dbSDimitry Andric     if (IsVCCZ) {
1935ffd83dbSDimitry Andric       MI.eraseFromParent();
1945ffd83dbSDimitry Andric       return true;
1955ffd83dbSDimitry Andric     }
1965ffd83dbSDimitry Andric     MI.setDesc(TII->get(AMDGPU::S_BRANCH));
1975ffd83dbSDimitry Andric   } else if (IsVCCZ && MaskValue == 0) {
1985ffd83dbSDimitry Andric     // Will always branch
199349cc55cSDimitry Andric     // Remove all successors shadowed by new unconditional branch
2005ffd83dbSDimitry Andric     MachineBasicBlock *Parent = MI.getParent();
2015ffd83dbSDimitry Andric     SmallVector<MachineInstr *, 4> ToRemove;
2025ffd83dbSDimitry Andric     bool Found = false;
2035ffd83dbSDimitry Andric     for (MachineInstr &Term : Parent->terminators()) {
2045ffd83dbSDimitry Andric       if (Found) {
2055ffd83dbSDimitry Andric         if (Term.isBranch())
2065ffd83dbSDimitry Andric           ToRemove.push_back(&Term);
2075ffd83dbSDimitry Andric       } else {
2085ffd83dbSDimitry Andric         Found = Term.isIdenticalTo(MI);
2095ffd83dbSDimitry Andric       }
2105ffd83dbSDimitry Andric     }
2115ffd83dbSDimitry Andric     assert(Found && "conditional branch is not terminator");
212bdd1243dSDimitry Andric     for (auto *BranchMI : ToRemove) {
2135ffd83dbSDimitry Andric       MachineOperand &Dst = BranchMI->getOperand(0);
2145ffd83dbSDimitry Andric       assert(Dst.isMBB() && "destination is not basic block");
2155ffd83dbSDimitry Andric       Parent->removeSuccessor(Dst.getMBB());
2165ffd83dbSDimitry Andric       BranchMI->eraseFromParent();
2175ffd83dbSDimitry Andric     }
2185ffd83dbSDimitry Andric 
2195ffd83dbSDimitry Andric     if (MachineBasicBlock *Succ = Parent->getFallThrough()) {
2205ffd83dbSDimitry Andric       Parent->removeSuccessor(Succ);
2215ffd83dbSDimitry Andric     }
2225ffd83dbSDimitry Andric 
2235ffd83dbSDimitry Andric     // Rewrite to unconditional branch
2245ffd83dbSDimitry Andric     MI.setDesc(TII->get(AMDGPU::S_BRANCH));
2255ffd83dbSDimitry Andric   } else if (!IsVCCZ && MaskValue == 0) {
2265ffd83dbSDimitry Andric     // Will never branch
2275ffd83dbSDimitry Andric     MachineOperand &Dst = MI.getOperand(0);
2285ffd83dbSDimitry Andric     assert(Dst.isMBB() && "destination is not basic block");
2295ffd83dbSDimitry Andric     MI.getParent()->removeSuccessor(Dst.getMBB());
2305ffd83dbSDimitry Andric     MI.eraseFromParent();
2315ffd83dbSDimitry Andric     return true;
2325ffd83dbSDimitry Andric   } else if (MaskValue == -1) {
2335ffd83dbSDimitry Andric     // Depends only on EXEC
2345ffd83dbSDimitry Andric     MI.setDesc(
2355ffd83dbSDimitry Andric         TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ));
2365ffd83dbSDimitry Andric   }
2375ffd83dbSDimitry Andric 
238*0fca6ea1SDimitry Andric   MI.removeOperand(MI.findRegisterUseOperandIdx(CondReg, TRI, false /*Kill*/));
2395ffd83dbSDimitry Andric   MI.addImplicitDefUseOperands(*MBB.getParent());
2405ffd83dbSDimitry Andric 
2415ffd83dbSDimitry Andric   return true;
2425ffd83dbSDimitry Andric }
2435ffd83dbSDimitry Andric 
optimizeSetGPR(MachineInstr & First,MachineInstr & MI) const2445ffd83dbSDimitry Andric bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
2455ffd83dbSDimitry Andric                                        MachineInstr &MI) const {
2465ffd83dbSDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
2475ffd83dbSDimitry Andric   const MachineFunction &MF = *MBB.getParent();
2485ffd83dbSDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
2495ffd83dbSDimitry Andric   MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
2505ffd83dbSDimitry Andric   Register IdxReg = Idx->isReg() ? Idx->getReg() : Register();
2515ffd83dbSDimitry Andric   SmallVector<MachineInstr *, 4> ToRemove;
2525ffd83dbSDimitry Andric   bool IdxOn = true;
2535ffd83dbSDimitry Andric 
2545ffd83dbSDimitry Andric   if (!MI.isIdenticalTo(First))
2555ffd83dbSDimitry Andric     return false;
2565ffd83dbSDimitry Andric 
2575ffd83dbSDimitry Andric   // Scan back to find an identical S_SET_GPR_IDX_ON
258fe6060f1SDimitry Andric   for (MachineBasicBlock::instr_iterator I = std::next(First.getIterator()),
259fe6060f1SDimitry Andric                                          E = MI.getIterator();
260fe6060f1SDimitry Andric        I != E; ++I) {
261fe6060f1SDimitry Andric     if (I->isBundle())
262fe6060f1SDimitry Andric       continue;
2635ffd83dbSDimitry Andric     switch (I->getOpcode()) {
2645ffd83dbSDimitry Andric     case AMDGPU::S_SET_GPR_IDX_MODE:
2655ffd83dbSDimitry Andric       return false;
2665ffd83dbSDimitry Andric     case AMDGPU::S_SET_GPR_IDX_OFF:
2675ffd83dbSDimitry Andric       IdxOn = false;
2685ffd83dbSDimitry Andric       ToRemove.push_back(&*I);
2695ffd83dbSDimitry Andric       break;
2705ffd83dbSDimitry Andric     default:
2715ffd83dbSDimitry Andric       if (I->modifiesRegister(AMDGPU::M0, TRI))
2725ffd83dbSDimitry Andric         return false;
2735ffd83dbSDimitry Andric       if (IdxReg && I->modifiesRegister(IdxReg, TRI))
2745ffd83dbSDimitry Andric         return false;
2755ffd83dbSDimitry Andric       if (llvm::any_of(I->operands(),
2765ffd83dbSDimitry Andric                        [&MRI, this](const MachineOperand &MO) {
2775ffd83dbSDimitry Andric                          return MO.isReg() &&
2785ffd83dbSDimitry Andric                                 TRI->isVectorRegister(MRI, MO.getReg());
2795ffd83dbSDimitry Andric                        })) {
2805ffd83dbSDimitry Andric         // The only exception allowed here is another indirect vector move
2815ffd83dbSDimitry Andric         // with the same mode.
282349cc55cSDimitry Andric         if (!IdxOn || !(I->getOpcode() == AMDGPU::V_MOV_B32_indirect_write ||
283349cc55cSDimitry Andric                         I->getOpcode() == AMDGPU::V_MOV_B32_indirect_read))
2845ffd83dbSDimitry Andric           return false;
2855ffd83dbSDimitry Andric       }
2865ffd83dbSDimitry Andric     }
2875ffd83dbSDimitry Andric   }
2885ffd83dbSDimitry Andric 
289fe6060f1SDimitry Andric   MI.eraseFromBundle();
2905ffd83dbSDimitry Andric   for (MachineInstr *RI : ToRemove)
291fe6060f1SDimitry Andric     RI->eraseFromBundle();
292fe6060f1SDimitry Andric   return true;
293fe6060f1SDimitry Andric }
294fe6060f1SDimitry Andric 
getBlockDestinations(MachineBasicBlock & SrcMBB,MachineBasicBlock * & TrueMBB,MachineBasicBlock * & FalseMBB,SmallVectorImpl<MachineOperand> & Cond)295fe6060f1SDimitry Andric bool SIPreEmitPeephole::getBlockDestinations(
296fe6060f1SDimitry Andric     MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
297fe6060f1SDimitry Andric     MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
298fe6060f1SDimitry Andric   if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
299fe6060f1SDimitry Andric     return false;
300fe6060f1SDimitry Andric 
301fe6060f1SDimitry Andric   if (!FalseMBB)
302fe6060f1SDimitry Andric     FalseMBB = SrcMBB.getNextNode();
303fe6060f1SDimitry Andric 
304fe6060f1SDimitry Andric   return true;
305fe6060f1SDimitry Andric }
306fe6060f1SDimitry Andric 
mustRetainExeczBranch(const MachineBasicBlock & From,const MachineBasicBlock & To) const307fe6060f1SDimitry Andric bool SIPreEmitPeephole::mustRetainExeczBranch(
308fe6060f1SDimitry Andric     const MachineBasicBlock &From, const MachineBasicBlock &To) const {
309fe6060f1SDimitry Andric   unsigned NumInstr = 0;
310fe6060f1SDimitry Andric   const MachineFunction *MF = From.getParent();
311fe6060f1SDimitry Andric 
312fe6060f1SDimitry Andric   for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
313fe6060f1SDimitry Andric        MBBI != End && MBBI != ToI; ++MBBI) {
314fe6060f1SDimitry Andric     const MachineBasicBlock &MBB = *MBBI;
315fe6060f1SDimitry Andric 
3164824e7fdSDimitry Andric     for (const MachineInstr &MI : MBB) {
317fe6060f1SDimitry Andric       // When a uniform loop is inside non-uniform control flow, the branch
318fe6060f1SDimitry Andric       // leaving the loop might never be taken when EXEC = 0.
319fe6060f1SDimitry Andric       // Hence we should retain cbranch out of the loop lest it become infinite.
3204824e7fdSDimitry Andric       if (MI.isConditionalBranch())
321fe6060f1SDimitry Andric         return true;
322fe6060f1SDimitry Andric 
3235f757f3fSDimitry Andric       if (MI.isMetaInstruction())
3245f757f3fSDimitry Andric         continue;
3255f757f3fSDimitry Andric 
3264824e7fdSDimitry Andric       if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
327fe6060f1SDimitry Andric         return true;
328fe6060f1SDimitry Andric 
329fe6060f1SDimitry Andric       // These instructions are potentially expensive even if EXEC = 0.
3304824e7fdSDimitry Andric       if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
331*0fca6ea1SDimitry Andric           TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
332fe6060f1SDimitry Andric         return true;
333fe6060f1SDimitry Andric 
334fe6060f1SDimitry Andric       ++NumInstr;
335fe6060f1SDimitry Andric       if (NumInstr >= SkipThreshold)
336fe6060f1SDimitry Andric         return true;
337fe6060f1SDimitry Andric     }
338fe6060f1SDimitry Andric   }
339fe6060f1SDimitry Andric 
340fe6060f1SDimitry Andric   return false;
341fe6060f1SDimitry Andric }
342fe6060f1SDimitry Andric 
343fe6060f1SDimitry Andric // Returns true if the skip branch instruction is removed.
removeExeczBranch(MachineInstr & MI,MachineBasicBlock & SrcMBB)344fe6060f1SDimitry Andric bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
345fe6060f1SDimitry Andric                                           MachineBasicBlock &SrcMBB) {
346fe6060f1SDimitry Andric   MachineBasicBlock *TrueMBB = nullptr;
347fe6060f1SDimitry Andric   MachineBasicBlock *FalseMBB = nullptr;
348fe6060f1SDimitry Andric   SmallVector<MachineOperand, 1> Cond;
349fe6060f1SDimitry Andric 
350fe6060f1SDimitry Andric   if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
351fe6060f1SDimitry Andric     return false;
352fe6060f1SDimitry Andric 
353fe6060f1SDimitry Andric   // Consider only the forward branches.
354fe6060f1SDimitry Andric   if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
355fe6060f1SDimitry Andric       mustRetainExeczBranch(*FalseMBB, *TrueMBB))
356fe6060f1SDimitry Andric     return false;
357fe6060f1SDimitry Andric 
358fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
359fe6060f1SDimitry Andric   MI.eraseFromParent();
360fe6060f1SDimitry Andric   SrcMBB.removeSuccessor(TrueMBB);
361fe6060f1SDimitry Andric 
3625ffd83dbSDimitry Andric   return true;
3635ffd83dbSDimitry Andric }
3645ffd83dbSDimitry Andric 
runOnMachineFunction(MachineFunction & MF)3655ffd83dbSDimitry Andric bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
3665ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3675ffd83dbSDimitry Andric   TII = ST.getInstrInfo();
3685ffd83dbSDimitry Andric   TRI = &TII->getRegisterInfo();
3695ffd83dbSDimitry Andric   bool Changed = false;
3705ffd83dbSDimitry Andric 
371fe6060f1SDimitry Andric   MF.RenumberBlocks();
372fe6060f1SDimitry Andric 
3735ffd83dbSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
374fe6060f1SDimitry Andric     MachineBasicBlock::iterator TermI = MBB.getFirstTerminator();
375fe6060f1SDimitry Andric     // Check first terminator for branches to optimize
376eaeb601bSDimitry Andric     if (TermI != MBB.end()) {
377eaeb601bSDimitry Andric       MachineInstr &MI = *TermI;
3785ffd83dbSDimitry Andric       switch (MI.getOpcode()) {
3795ffd83dbSDimitry Andric       case AMDGPU::S_CBRANCH_VCCZ:
3805ffd83dbSDimitry Andric       case AMDGPU::S_CBRANCH_VCCNZ:
3815ffd83dbSDimitry Andric         Changed |= optimizeVccBranch(MI);
382fe6060f1SDimitry Andric         break;
383fe6060f1SDimitry Andric       case AMDGPU::S_CBRANCH_EXECZ:
384fe6060f1SDimitry Andric         Changed |= removeExeczBranch(MI, MBB);
385eaeb601bSDimitry Andric         break;
386eaeb601bSDimitry Andric       }
387eaeb601bSDimitry Andric     }
3885ffd83dbSDimitry Andric 
3895ffd83dbSDimitry Andric     if (!ST.hasVGPRIndexMode())
3905ffd83dbSDimitry Andric       continue;
3915ffd83dbSDimitry Andric 
3925ffd83dbSDimitry Andric     MachineInstr *SetGPRMI = nullptr;
3935ffd83dbSDimitry Andric     const unsigned Threshold = 20;
3945ffd83dbSDimitry Andric     unsigned Count = 0;
3955ffd83dbSDimitry Andric     // Scan the block for two S_SET_GPR_IDX_ON instructions to see if a
3965ffd83dbSDimitry Andric     // second is not needed. Do expensive checks in the optimizeSetGPR()
3975ffd83dbSDimitry Andric     // and limit the distance to 20 instructions for compile time purposes.
398fe6060f1SDimitry Andric     // Note: this needs to work on bundles as S_SET_GPR_IDX* instructions
399fe6060f1SDimitry Andric     // may be bundled with the instructions they modify.
400*0fca6ea1SDimitry Andric     for (auto &MI : make_early_inc_range(MBB.instrs())) {
4015ffd83dbSDimitry Andric       if (Count == Threshold)
4025ffd83dbSDimitry Andric         SetGPRMI = nullptr;
4035ffd83dbSDimitry Andric       else
4045ffd83dbSDimitry Andric         ++Count;
4055ffd83dbSDimitry Andric 
4065ffd83dbSDimitry Andric       if (MI.getOpcode() != AMDGPU::S_SET_GPR_IDX_ON)
4075ffd83dbSDimitry Andric         continue;
4085ffd83dbSDimitry Andric 
4095ffd83dbSDimitry Andric       Count = 0;
4105ffd83dbSDimitry Andric       if (!SetGPRMI) {
4115ffd83dbSDimitry Andric         SetGPRMI = &MI;
4125ffd83dbSDimitry Andric         continue;
4135ffd83dbSDimitry Andric       }
4145ffd83dbSDimitry Andric 
4155ffd83dbSDimitry Andric       if (optimizeSetGPR(*SetGPRMI, MI))
4165ffd83dbSDimitry Andric         Changed = true;
4175ffd83dbSDimitry Andric       else
4185ffd83dbSDimitry Andric         SetGPRMI = &MI;
4195ffd83dbSDimitry Andric     }
4205ffd83dbSDimitry Andric   }
4215ffd83dbSDimitry Andric 
4225ffd83dbSDimitry Andric   return Changed;
4235ffd83dbSDimitry Andric }
424