15ffd83dbSDimitry Andric //===-- SIPreEmitPeephole.cpp ------------------------------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric /// \file
105ffd83dbSDimitry Andric /// This pass performs the peephole optimizations before code emission.
115ffd83dbSDimitry Andric ///
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric
145ffd83dbSDimitry Andric #include "AMDGPU.h"
15e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
165ffd83dbSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
175ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
185ffd83dbSDimitry Andric
195ffd83dbSDimitry Andric using namespace llvm;
205ffd83dbSDimitry Andric
215ffd83dbSDimitry Andric #define DEBUG_TYPE "si-pre-emit-peephole"
225ffd83dbSDimitry Andric
23fe6060f1SDimitry Andric static unsigned SkipThreshold;
24fe6060f1SDimitry Andric
25fe6060f1SDimitry Andric static cl::opt<unsigned, true> SkipThresholdFlag(
26fe6060f1SDimitry Andric "amdgpu-skip-threshold", cl::Hidden,
27fe6060f1SDimitry Andric cl::desc(
28fe6060f1SDimitry Andric "Number of instructions before jumping over divergent control flow"),
29fe6060f1SDimitry Andric cl::location(SkipThreshold), cl::init(12));
30fe6060f1SDimitry Andric
315ffd83dbSDimitry Andric namespace {
325ffd83dbSDimitry Andric
335ffd83dbSDimitry Andric class SIPreEmitPeephole : public MachineFunctionPass {
345ffd83dbSDimitry Andric private:
355ffd83dbSDimitry Andric const SIInstrInfo *TII = nullptr;
365ffd83dbSDimitry Andric const SIRegisterInfo *TRI = nullptr;
375ffd83dbSDimitry Andric
385ffd83dbSDimitry Andric bool optimizeVccBranch(MachineInstr &MI) const;
395ffd83dbSDimitry Andric bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const;
40fe6060f1SDimitry Andric bool getBlockDestinations(MachineBasicBlock &SrcMBB,
41fe6060f1SDimitry Andric MachineBasicBlock *&TrueMBB,
42fe6060f1SDimitry Andric MachineBasicBlock *&FalseMBB,
43fe6060f1SDimitry Andric SmallVectorImpl<MachineOperand> &Cond);
44fe6060f1SDimitry Andric bool mustRetainExeczBranch(const MachineBasicBlock &From,
45fe6060f1SDimitry Andric const MachineBasicBlock &To) const;
46fe6060f1SDimitry Andric bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
475ffd83dbSDimitry Andric
485ffd83dbSDimitry Andric public:
495ffd83dbSDimitry Andric static char ID;
505ffd83dbSDimitry Andric
SIPreEmitPeephole()515ffd83dbSDimitry Andric SIPreEmitPeephole() : MachineFunctionPass(ID) {
525ffd83dbSDimitry Andric initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
535ffd83dbSDimitry Andric }
545ffd83dbSDimitry Andric
555ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
565ffd83dbSDimitry Andric };
575ffd83dbSDimitry Andric
585ffd83dbSDimitry Andric } // End anonymous namespace.
595ffd83dbSDimitry Andric
605ffd83dbSDimitry Andric INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE,
615ffd83dbSDimitry Andric "SI peephole optimizations", false, false)
625ffd83dbSDimitry Andric
635ffd83dbSDimitry Andric char SIPreEmitPeephole::ID = 0;
645ffd83dbSDimitry Andric
655ffd83dbSDimitry Andric char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID;
665ffd83dbSDimitry Andric
optimizeVccBranch(MachineInstr & MI) const675ffd83dbSDimitry Andric bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
685ffd83dbSDimitry Andric // Match:
695ffd83dbSDimitry Andric // sreg = -1 or 0
705ffd83dbSDimitry Andric // vcc = S_AND_B64 exec, sreg or S_ANDN2_B64 exec, sreg
715ffd83dbSDimitry Andric // S_CBRANCH_VCC[N]Z
725ffd83dbSDimitry Andric // =>
735ffd83dbSDimitry Andric // S_CBRANCH_EXEC[N]Z
745ffd83dbSDimitry Andric // We end up with this pattern sometimes after basic block placement.
755ffd83dbSDimitry Andric // It happens while combining a block which assigns -1 or 0 to a saved mask
765ffd83dbSDimitry Andric // and another block which consumes that saved mask and then a branch.
7781ad6265SDimitry Andric //
7881ad6265SDimitry Andric // While searching this also performs the following substitution:
7981ad6265SDimitry Andric // vcc = V_CMP
8081ad6265SDimitry Andric // vcc = S_AND exec, vcc
8181ad6265SDimitry Andric // S_CBRANCH_VCC[N]Z
8281ad6265SDimitry Andric // =>
8381ad6265SDimitry Andric // vcc = V_CMP
8481ad6265SDimitry Andric // S_CBRANCH_VCC[N]Z
8581ad6265SDimitry Andric
865ffd83dbSDimitry Andric bool Changed = false;
875ffd83dbSDimitry Andric MachineBasicBlock &MBB = *MI.getParent();
885ffd83dbSDimitry Andric const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
895ffd83dbSDimitry Andric const bool IsWave32 = ST.isWave32();
905ffd83dbSDimitry Andric const unsigned CondReg = TRI->getVCC();
915ffd83dbSDimitry Andric const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
925ffd83dbSDimitry Andric const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
935ffd83dbSDimitry Andric const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
94e8d8bef9SDimitry Andric const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
955ffd83dbSDimitry Andric
965ffd83dbSDimitry Andric MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
975ffd83dbSDimitry Andric E = MBB.rend();
985ffd83dbSDimitry Andric bool ReadsCond = false;
995ffd83dbSDimitry Andric unsigned Threshold = 5;
1005ffd83dbSDimitry Andric for (++A; A != E; ++A) {
1015ffd83dbSDimitry Andric if (!--Threshold)
1025ffd83dbSDimitry Andric return false;
1035ffd83dbSDimitry Andric if (A->modifiesRegister(ExecReg, TRI))
1045ffd83dbSDimitry Andric return false;
1055ffd83dbSDimitry Andric if (A->modifiesRegister(CondReg, TRI)) {
1065ffd83dbSDimitry Andric if (!A->definesRegister(CondReg, TRI) ||
1075ffd83dbSDimitry Andric (A->getOpcode() != And && A->getOpcode() != AndN2))
1085ffd83dbSDimitry Andric return false;
1095ffd83dbSDimitry Andric break;
1105ffd83dbSDimitry Andric }
1115ffd83dbSDimitry Andric ReadsCond |= A->readsRegister(CondReg, TRI);
1125ffd83dbSDimitry Andric }
1135ffd83dbSDimitry Andric if (A == E)
1145ffd83dbSDimitry Andric return false;
1155ffd83dbSDimitry Andric
1165ffd83dbSDimitry Andric MachineOperand &Op1 = A->getOperand(1);
1175ffd83dbSDimitry Andric MachineOperand &Op2 = A->getOperand(2);
1185ffd83dbSDimitry Andric if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
1195ffd83dbSDimitry Andric TII->commuteInstruction(*A);
1205ffd83dbSDimitry Andric Changed = true;
1215ffd83dbSDimitry Andric }
1225ffd83dbSDimitry Andric if (Op1.getReg() != ExecReg)
1235ffd83dbSDimitry Andric return Changed;
1245ffd83dbSDimitry Andric if (Op2.isImm() && !(Op2.getImm() == -1 || Op2.getImm() == 0))
1255ffd83dbSDimitry Andric return Changed;
1265ffd83dbSDimitry Andric
1275ffd83dbSDimitry Andric int64_t MaskValue = 0;
1285ffd83dbSDimitry Andric Register SReg;
1295ffd83dbSDimitry Andric if (Op2.isReg()) {
1305ffd83dbSDimitry Andric SReg = Op2.getReg();
1315ffd83dbSDimitry Andric auto M = std::next(A);
1325ffd83dbSDimitry Andric bool ReadsSreg = false;
13381ad6265SDimitry Andric bool ModifiesExec = false;
1345ffd83dbSDimitry Andric for (; M != E; ++M) {
1355ffd83dbSDimitry Andric if (M->definesRegister(SReg, TRI))
1365ffd83dbSDimitry Andric break;
1375ffd83dbSDimitry Andric if (M->modifiesRegister(SReg, TRI))
1385ffd83dbSDimitry Andric return Changed;
1395ffd83dbSDimitry Andric ReadsSreg |= M->readsRegister(SReg, TRI);
14081ad6265SDimitry Andric ModifiesExec |= M->modifiesRegister(ExecReg, TRI);
1415ffd83dbSDimitry Andric }
14281ad6265SDimitry Andric if (M == E)
14381ad6265SDimitry Andric return Changed;
14481ad6265SDimitry Andric // If SReg is VCC and SReg definition is a VALU comparison.
14581ad6265SDimitry Andric // This means S_AND with EXEC is not required.
14681ad6265SDimitry Andric // Erase the S_AND and return.
14781ad6265SDimitry Andric // Note: isVOPC is used instead of isCompare to catch V_CMP_CLASS
14881ad6265SDimitry Andric if (A->getOpcode() == And && SReg == CondReg && !ModifiesExec &&
14981ad6265SDimitry Andric TII->isVOPC(*M)) {
15081ad6265SDimitry Andric A->eraseFromParent();
15181ad6265SDimitry Andric return true;
15281ad6265SDimitry Andric }
15381ad6265SDimitry Andric if (!M->isMoveImmediate() || !M->getOperand(1).isImm() ||
1545ffd83dbSDimitry Andric (M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0))
1555ffd83dbSDimitry Andric return Changed;
1565ffd83dbSDimitry Andric MaskValue = M->getOperand(1).getImm();
1575ffd83dbSDimitry Andric // First if sreg is only used in the AND instruction fold the immediate
15881ad6265SDimitry Andric // into the AND.
1595ffd83dbSDimitry Andric if (!ReadsSreg && Op2.isKill()) {
1605ffd83dbSDimitry Andric A->getOperand(2).ChangeToImmediate(MaskValue);
1615ffd83dbSDimitry Andric M->eraseFromParent();
1625ffd83dbSDimitry Andric }
1635ffd83dbSDimitry Andric } else if (Op2.isImm()) {
1645ffd83dbSDimitry Andric MaskValue = Op2.getImm();
1655ffd83dbSDimitry Andric } else {
1665ffd83dbSDimitry Andric llvm_unreachable("Op2 must be register or immediate");
1675ffd83dbSDimitry Andric }
1685ffd83dbSDimitry Andric
1695ffd83dbSDimitry Andric // Invert mask for s_andn2
1705ffd83dbSDimitry Andric assert(MaskValue == 0 || MaskValue == -1);
1715ffd83dbSDimitry Andric if (A->getOpcode() == AndN2)
1725ffd83dbSDimitry Andric MaskValue = ~MaskValue;
1735ffd83dbSDimitry Andric
174*0fca6ea1SDimitry Andric if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
175e8d8bef9SDimitry Andric if (!MI.killsRegister(CondReg, TRI)) {
176e8d8bef9SDimitry Andric // Replace AND with MOV
177e8d8bef9SDimitry Andric if (MaskValue == 0) {
178e8d8bef9SDimitry Andric BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
179e8d8bef9SDimitry Andric .addImm(0);
180e8d8bef9SDimitry Andric } else {
181e8d8bef9SDimitry Andric BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
182e8d8bef9SDimitry Andric .addReg(ExecReg);
183e8d8bef9SDimitry Andric }
184e8d8bef9SDimitry Andric }
185e8d8bef9SDimitry Andric // Remove AND instruction
1865ffd83dbSDimitry Andric A->eraseFromParent();
187e8d8bef9SDimitry Andric }
1885ffd83dbSDimitry Andric
1895ffd83dbSDimitry Andric bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
1905ffd83dbSDimitry Andric if (SReg == ExecReg) {
1915ffd83dbSDimitry Andric // EXEC is updated directly
1925ffd83dbSDimitry Andric if (IsVCCZ) {
1935ffd83dbSDimitry Andric MI.eraseFromParent();
1945ffd83dbSDimitry Andric return true;
1955ffd83dbSDimitry Andric }
1965ffd83dbSDimitry Andric MI.setDesc(TII->get(AMDGPU::S_BRANCH));
1975ffd83dbSDimitry Andric } else if (IsVCCZ && MaskValue == 0) {
1985ffd83dbSDimitry Andric // Will always branch
199349cc55cSDimitry Andric // Remove all successors shadowed by new unconditional branch
2005ffd83dbSDimitry Andric MachineBasicBlock *Parent = MI.getParent();
2015ffd83dbSDimitry Andric SmallVector<MachineInstr *, 4> ToRemove;
2025ffd83dbSDimitry Andric bool Found = false;
2035ffd83dbSDimitry Andric for (MachineInstr &Term : Parent->terminators()) {
2045ffd83dbSDimitry Andric if (Found) {
2055ffd83dbSDimitry Andric if (Term.isBranch())
2065ffd83dbSDimitry Andric ToRemove.push_back(&Term);
2075ffd83dbSDimitry Andric } else {
2085ffd83dbSDimitry Andric Found = Term.isIdenticalTo(MI);
2095ffd83dbSDimitry Andric }
2105ffd83dbSDimitry Andric }
2115ffd83dbSDimitry Andric assert(Found && "conditional branch is not terminator");
212bdd1243dSDimitry Andric for (auto *BranchMI : ToRemove) {
2135ffd83dbSDimitry Andric MachineOperand &Dst = BranchMI->getOperand(0);
2145ffd83dbSDimitry Andric assert(Dst.isMBB() && "destination is not basic block");
2155ffd83dbSDimitry Andric Parent->removeSuccessor(Dst.getMBB());
2165ffd83dbSDimitry Andric BranchMI->eraseFromParent();
2175ffd83dbSDimitry Andric }
2185ffd83dbSDimitry Andric
2195ffd83dbSDimitry Andric if (MachineBasicBlock *Succ = Parent->getFallThrough()) {
2205ffd83dbSDimitry Andric Parent->removeSuccessor(Succ);
2215ffd83dbSDimitry Andric }
2225ffd83dbSDimitry Andric
2235ffd83dbSDimitry Andric // Rewrite to unconditional branch
2245ffd83dbSDimitry Andric MI.setDesc(TII->get(AMDGPU::S_BRANCH));
2255ffd83dbSDimitry Andric } else if (!IsVCCZ && MaskValue == 0) {
2265ffd83dbSDimitry Andric // Will never branch
2275ffd83dbSDimitry Andric MachineOperand &Dst = MI.getOperand(0);
2285ffd83dbSDimitry Andric assert(Dst.isMBB() && "destination is not basic block");
2295ffd83dbSDimitry Andric MI.getParent()->removeSuccessor(Dst.getMBB());
2305ffd83dbSDimitry Andric MI.eraseFromParent();
2315ffd83dbSDimitry Andric return true;
2325ffd83dbSDimitry Andric } else if (MaskValue == -1) {
2335ffd83dbSDimitry Andric // Depends only on EXEC
2345ffd83dbSDimitry Andric MI.setDesc(
2355ffd83dbSDimitry Andric TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ));
2365ffd83dbSDimitry Andric }
2375ffd83dbSDimitry Andric
238*0fca6ea1SDimitry Andric MI.removeOperand(MI.findRegisterUseOperandIdx(CondReg, TRI, false /*Kill*/));
2395ffd83dbSDimitry Andric MI.addImplicitDefUseOperands(*MBB.getParent());
2405ffd83dbSDimitry Andric
2415ffd83dbSDimitry Andric return true;
2425ffd83dbSDimitry Andric }
2435ffd83dbSDimitry Andric
optimizeSetGPR(MachineInstr & First,MachineInstr & MI) const2445ffd83dbSDimitry Andric bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
2455ffd83dbSDimitry Andric MachineInstr &MI) const {
2465ffd83dbSDimitry Andric MachineBasicBlock &MBB = *MI.getParent();
2475ffd83dbSDimitry Andric const MachineFunction &MF = *MBB.getParent();
2485ffd83dbSDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo();
2495ffd83dbSDimitry Andric MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
2505ffd83dbSDimitry Andric Register IdxReg = Idx->isReg() ? Idx->getReg() : Register();
2515ffd83dbSDimitry Andric SmallVector<MachineInstr *, 4> ToRemove;
2525ffd83dbSDimitry Andric bool IdxOn = true;
2535ffd83dbSDimitry Andric
2545ffd83dbSDimitry Andric if (!MI.isIdenticalTo(First))
2555ffd83dbSDimitry Andric return false;
2565ffd83dbSDimitry Andric
2575ffd83dbSDimitry Andric // Scan back to find an identical S_SET_GPR_IDX_ON
258fe6060f1SDimitry Andric for (MachineBasicBlock::instr_iterator I = std::next(First.getIterator()),
259fe6060f1SDimitry Andric E = MI.getIterator();
260fe6060f1SDimitry Andric I != E; ++I) {
261fe6060f1SDimitry Andric if (I->isBundle())
262fe6060f1SDimitry Andric continue;
2635ffd83dbSDimitry Andric switch (I->getOpcode()) {
2645ffd83dbSDimitry Andric case AMDGPU::S_SET_GPR_IDX_MODE:
2655ffd83dbSDimitry Andric return false;
2665ffd83dbSDimitry Andric case AMDGPU::S_SET_GPR_IDX_OFF:
2675ffd83dbSDimitry Andric IdxOn = false;
2685ffd83dbSDimitry Andric ToRemove.push_back(&*I);
2695ffd83dbSDimitry Andric break;
2705ffd83dbSDimitry Andric default:
2715ffd83dbSDimitry Andric if (I->modifiesRegister(AMDGPU::M0, TRI))
2725ffd83dbSDimitry Andric return false;
2735ffd83dbSDimitry Andric if (IdxReg && I->modifiesRegister(IdxReg, TRI))
2745ffd83dbSDimitry Andric return false;
2755ffd83dbSDimitry Andric if (llvm::any_of(I->operands(),
2765ffd83dbSDimitry Andric [&MRI, this](const MachineOperand &MO) {
2775ffd83dbSDimitry Andric return MO.isReg() &&
2785ffd83dbSDimitry Andric TRI->isVectorRegister(MRI, MO.getReg());
2795ffd83dbSDimitry Andric })) {
2805ffd83dbSDimitry Andric // The only exception allowed here is another indirect vector move
2815ffd83dbSDimitry Andric // with the same mode.
282349cc55cSDimitry Andric if (!IdxOn || !(I->getOpcode() == AMDGPU::V_MOV_B32_indirect_write ||
283349cc55cSDimitry Andric I->getOpcode() == AMDGPU::V_MOV_B32_indirect_read))
2845ffd83dbSDimitry Andric return false;
2855ffd83dbSDimitry Andric }
2865ffd83dbSDimitry Andric }
2875ffd83dbSDimitry Andric }
2885ffd83dbSDimitry Andric
289fe6060f1SDimitry Andric MI.eraseFromBundle();
2905ffd83dbSDimitry Andric for (MachineInstr *RI : ToRemove)
291fe6060f1SDimitry Andric RI->eraseFromBundle();
292fe6060f1SDimitry Andric return true;
293fe6060f1SDimitry Andric }
294fe6060f1SDimitry Andric
getBlockDestinations(MachineBasicBlock & SrcMBB,MachineBasicBlock * & TrueMBB,MachineBasicBlock * & FalseMBB,SmallVectorImpl<MachineOperand> & Cond)295fe6060f1SDimitry Andric bool SIPreEmitPeephole::getBlockDestinations(
296fe6060f1SDimitry Andric MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
297fe6060f1SDimitry Andric MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
298fe6060f1SDimitry Andric if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
299fe6060f1SDimitry Andric return false;
300fe6060f1SDimitry Andric
301fe6060f1SDimitry Andric if (!FalseMBB)
302fe6060f1SDimitry Andric FalseMBB = SrcMBB.getNextNode();
303fe6060f1SDimitry Andric
304fe6060f1SDimitry Andric return true;
305fe6060f1SDimitry Andric }
306fe6060f1SDimitry Andric
mustRetainExeczBranch(const MachineBasicBlock & From,const MachineBasicBlock & To) const307fe6060f1SDimitry Andric bool SIPreEmitPeephole::mustRetainExeczBranch(
308fe6060f1SDimitry Andric const MachineBasicBlock &From, const MachineBasicBlock &To) const {
309fe6060f1SDimitry Andric unsigned NumInstr = 0;
310fe6060f1SDimitry Andric const MachineFunction *MF = From.getParent();
311fe6060f1SDimitry Andric
312fe6060f1SDimitry Andric for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
313fe6060f1SDimitry Andric MBBI != End && MBBI != ToI; ++MBBI) {
314fe6060f1SDimitry Andric const MachineBasicBlock &MBB = *MBBI;
315fe6060f1SDimitry Andric
3164824e7fdSDimitry Andric for (const MachineInstr &MI : MBB) {
317fe6060f1SDimitry Andric // When a uniform loop is inside non-uniform control flow, the branch
318fe6060f1SDimitry Andric // leaving the loop might never be taken when EXEC = 0.
319fe6060f1SDimitry Andric // Hence we should retain cbranch out of the loop lest it become infinite.
3204824e7fdSDimitry Andric if (MI.isConditionalBranch())
321fe6060f1SDimitry Andric return true;
322fe6060f1SDimitry Andric
3235f757f3fSDimitry Andric if (MI.isMetaInstruction())
3245f757f3fSDimitry Andric continue;
3255f757f3fSDimitry Andric
3264824e7fdSDimitry Andric if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
327fe6060f1SDimitry Andric return true;
328fe6060f1SDimitry Andric
329fe6060f1SDimitry Andric // These instructions are potentially expensive even if EXEC = 0.
3304824e7fdSDimitry Andric if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
331*0fca6ea1SDimitry Andric TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
332fe6060f1SDimitry Andric return true;
333fe6060f1SDimitry Andric
334fe6060f1SDimitry Andric ++NumInstr;
335fe6060f1SDimitry Andric if (NumInstr >= SkipThreshold)
336fe6060f1SDimitry Andric return true;
337fe6060f1SDimitry Andric }
338fe6060f1SDimitry Andric }
339fe6060f1SDimitry Andric
340fe6060f1SDimitry Andric return false;
341fe6060f1SDimitry Andric }
342fe6060f1SDimitry Andric
343fe6060f1SDimitry Andric // Returns true if the skip branch instruction is removed.
removeExeczBranch(MachineInstr & MI,MachineBasicBlock & SrcMBB)344fe6060f1SDimitry Andric bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
345fe6060f1SDimitry Andric MachineBasicBlock &SrcMBB) {
346fe6060f1SDimitry Andric MachineBasicBlock *TrueMBB = nullptr;
347fe6060f1SDimitry Andric MachineBasicBlock *FalseMBB = nullptr;
348fe6060f1SDimitry Andric SmallVector<MachineOperand, 1> Cond;
349fe6060f1SDimitry Andric
350fe6060f1SDimitry Andric if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
351fe6060f1SDimitry Andric return false;
352fe6060f1SDimitry Andric
353fe6060f1SDimitry Andric // Consider only the forward branches.
354fe6060f1SDimitry Andric if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
355fe6060f1SDimitry Andric mustRetainExeczBranch(*FalseMBB, *TrueMBB))
356fe6060f1SDimitry Andric return false;
357fe6060f1SDimitry Andric
358fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
359fe6060f1SDimitry Andric MI.eraseFromParent();
360fe6060f1SDimitry Andric SrcMBB.removeSuccessor(TrueMBB);
361fe6060f1SDimitry Andric
3625ffd83dbSDimitry Andric return true;
3635ffd83dbSDimitry Andric }
3645ffd83dbSDimitry Andric
runOnMachineFunction(MachineFunction & MF)3655ffd83dbSDimitry Andric bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
3665ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3675ffd83dbSDimitry Andric TII = ST.getInstrInfo();
3685ffd83dbSDimitry Andric TRI = &TII->getRegisterInfo();
3695ffd83dbSDimitry Andric bool Changed = false;
3705ffd83dbSDimitry Andric
371fe6060f1SDimitry Andric MF.RenumberBlocks();
372fe6060f1SDimitry Andric
3735ffd83dbSDimitry Andric for (MachineBasicBlock &MBB : MF) {
374fe6060f1SDimitry Andric MachineBasicBlock::iterator TermI = MBB.getFirstTerminator();
375fe6060f1SDimitry Andric // Check first terminator for branches to optimize
376eaeb601bSDimitry Andric if (TermI != MBB.end()) {
377eaeb601bSDimitry Andric MachineInstr &MI = *TermI;
3785ffd83dbSDimitry Andric switch (MI.getOpcode()) {
3795ffd83dbSDimitry Andric case AMDGPU::S_CBRANCH_VCCZ:
3805ffd83dbSDimitry Andric case AMDGPU::S_CBRANCH_VCCNZ:
3815ffd83dbSDimitry Andric Changed |= optimizeVccBranch(MI);
382fe6060f1SDimitry Andric break;
383fe6060f1SDimitry Andric case AMDGPU::S_CBRANCH_EXECZ:
384fe6060f1SDimitry Andric Changed |= removeExeczBranch(MI, MBB);
385eaeb601bSDimitry Andric break;
386eaeb601bSDimitry Andric }
387eaeb601bSDimitry Andric }
3885ffd83dbSDimitry Andric
3895ffd83dbSDimitry Andric if (!ST.hasVGPRIndexMode())
3905ffd83dbSDimitry Andric continue;
3915ffd83dbSDimitry Andric
3925ffd83dbSDimitry Andric MachineInstr *SetGPRMI = nullptr;
3935ffd83dbSDimitry Andric const unsigned Threshold = 20;
3945ffd83dbSDimitry Andric unsigned Count = 0;
3955ffd83dbSDimitry Andric // Scan the block for two S_SET_GPR_IDX_ON instructions to see if a
3965ffd83dbSDimitry Andric // second is not needed. Do expensive checks in the optimizeSetGPR()
3975ffd83dbSDimitry Andric // and limit the distance to 20 instructions for compile time purposes.
398fe6060f1SDimitry Andric // Note: this needs to work on bundles as S_SET_GPR_IDX* instructions
399fe6060f1SDimitry Andric // may be bundled with the instructions they modify.
400*0fca6ea1SDimitry Andric for (auto &MI : make_early_inc_range(MBB.instrs())) {
4015ffd83dbSDimitry Andric if (Count == Threshold)
4025ffd83dbSDimitry Andric SetGPRMI = nullptr;
4035ffd83dbSDimitry Andric else
4045ffd83dbSDimitry Andric ++Count;
4055ffd83dbSDimitry Andric
4065ffd83dbSDimitry Andric if (MI.getOpcode() != AMDGPU::S_SET_GPR_IDX_ON)
4075ffd83dbSDimitry Andric continue;
4085ffd83dbSDimitry Andric
4095ffd83dbSDimitry Andric Count = 0;
4105ffd83dbSDimitry Andric if (!SetGPRMI) {
4115ffd83dbSDimitry Andric SetGPRMI = &MI;
4125ffd83dbSDimitry Andric continue;
4135ffd83dbSDimitry Andric }
4145ffd83dbSDimitry Andric
4155ffd83dbSDimitry Andric if (optimizeSetGPR(*SetGPRMI, MI))
4165ffd83dbSDimitry Andric Changed = true;
4175ffd83dbSDimitry Andric else
4185ffd83dbSDimitry Andric SetGPRMI = &MI;
4195ffd83dbSDimitry Andric }
4205ffd83dbSDimitry Andric }
4215ffd83dbSDimitry Andric
4225ffd83dbSDimitry Andric return Changed;
4235ffd83dbSDimitry Andric }
424