15f757f3fSDimitry Andric //===-- AMDGPUGlobalISelDivergenceLowering.cpp ----------------------------===//
25f757f3fSDimitry Andric //
35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65f757f3fSDimitry Andric //
75f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
85f757f3fSDimitry Andric //
95f757f3fSDimitry Andric /// \file
105f757f3fSDimitry Andric /// GlobalISel pass that selects divergent i1 phis as lane mask phis.
115f757f3fSDimitry Andric /// Lane mask merging uses same algorithm as SDAG in SILowerI1Copies.
125f757f3fSDimitry Andric /// Handles all cases of temporal divergence.
135f757f3fSDimitry Andric /// For divergent non-phi i1 and uniform i1 uses outside of the cycle this pass
145f757f3fSDimitry Andric /// currently depends on LCSSA to insert phis with one incoming.
155f757f3fSDimitry Andric //
165f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
175f757f3fSDimitry Andric
185f757f3fSDimitry Andric #include "AMDGPU.h"
19*0fca6ea1SDimitry Andric #include "SILowerI1Copies.h"
20*0fca6ea1SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
215f757f3fSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
22*0fca6ea1SDimitry Andric #include "llvm/CodeGen/MachineUniformityAnalysis.h"
23*0fca6ea1SDimitry Andric #include "llvm/InitializePasses.h"
245f757f3fSDimitry Andric
255f757f3fSDimitry Andric #define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
265f757f3fSDimitry Andric
275f757f3fSDimitry Andric using namespace llvm;
285f757f3fSDimitry Andric
295f757f3fSDimitry Andric namespace {
305f757f3fSDimitry Andric
315f757f3fSDimitry Andric class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
325f757f3fSDimitry Andric public:
335f757f3fSDimitry Andric static char ID;
345f757f3fSDimitry Andric
355f757f3fSDimitry Andric public:
AMDGPUGlobalISelDivergenceLowering()365f757f3fSDimitry Andric AMDGPUGlobalISelDivergenceLowering() : MachineFunctionPass(ID) {
375f757f3fSDimitry Andric initializeAMDGPUGlobalISelDivergenceLoweringPass(
385f757f3fSDimitry Andric *PassRegistry::getPassRegistry());
395f757f3fSDimitry Andric }
405f757f3fSDimitry Andric
415f757f3fSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
425f757f3fSDimitry Andric
getPassName() const435f757f3fSDimitry Andric StringRef getPassName() const override {
445f757f3fSDimitry Andric return "AMDGPU GlobalISel divergence lowering";
455f757f3fSDimitry Andric }
465f757f3fSDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const475f757f3fSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
485f757f3fSDimitry Andric AU.setPreservesCFG();
49*0fca6ea1SDimitry Andric AU.addRequired<MachineDominatorTreeWrapperPass>();
50*0fca6ea1SDimitry Andric AU.addRequired<MachinePostDominatorTreeWrapperPass>();
51*0fca6ea1SDimitry Andric AU.addRequired<MachineUniformityAnalysisPass>();
525f757f3fSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
535f757f3fSDimitry Andric }
545f757f3fSDimitry Andric };
555f757f3fSDimitry Andric
56*0fca6ea1SDimitry Andric class DivergenceLoweringHelper : public PhiLoweringHelper {
57*0fca6ea1SDimitry Andric public:
58*0fca6ea1SDimitry Andric DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
59*0fca6ea1SDimitry Andric MachinePostDominatorTree *PDT,
60*0fca6ea1SDimitry Andric MachineUniformityInfo *MUI);
61*0fca6ea1SDimitry Andric
62*0fca6ea1SDimitry Andric private:
63*0fca6ea1SDimitry Andric MachineUniformityInfo *MUI = nullptr;
64*0fca6ea1SDimitry Andric MachineIRBuilder B;
65*0fca6ea1SDimitry Andric Register buildRegCopyToLaneMask(Register Reg);
66*0fca6ea1SDimitry Andric
67*0fca6ea1SDimitry Andric public:
68*0fca6ea1SDimitry Andric void markAsLaneMask(Register DstReg) const override;
69*0fca6ea1SDimitry Andric void getCandidatesForLowering(
70*0fca6ea1SDimitry Andric SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
71*0fca6ea1SDimitry Andric void collectIncomingValuesFromPhi(
72*0fca6ea1SDimitry Andric const MachineInstr *MI,
73*0fca6ea1SDimitry Andric SmallVectorImpl<Incoming> &Incomings) const override;
74*0fca6ea1SDimitry Andric void replaceDstReg(Register NewReg, Register OldReg,
75*0fca6ea1SDimitry Andric MachineBasicBlock *MBB) override;
76*0fca6ea1SDimitry Andric void buildMergeLaneMasks(MachineBasicBlock &MBB,
77*0fca6ea1SDimitry Andric MachineBasicBlock::iterator I, const DebugLoc &DL,
78*0fca6ea1SDimitry Andric Register DstReg, Register PrevReg,
79*0fca6ea1SDimitry Andric Register CurReg) override;
80*0fca6ea1SDimitry Andric void constrainAsLaneMask(Incoming &In) override;
81*0fca6ea1SDimitry Andric };
82*0fca6ea1SDimitry Andric
DivergenceLoweringHelper(MachineFunction * MF,MachineDominatorTree * DT,MachinePostDominatorTree * PDT,MachineUniformityInfo * MUI)83*0fca6ea1SDimitry Andric DivergenceLoweringHelper::DivergenceLoweringHelper(
84*0fca6ea1SDimitry Andric MachineFunction *MF, MachineDominatorTree *DT,
85*0fca6ea1SDimitry Andric MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
86*0fca6ea1SDimitry Andric : PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}
87*0fca6ea1SDimitry Andric
88*0fca6ea1SDimitry Andric // _(s1) -> SReg_32/64(s1)
markAsLaneMask(Register DstReg) const89*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
90*0fca6ea1SDimitry Andric assert(MRI->getType(DstReg) == LLT::scalar(1));
91*0fca6ea1SDimitry Andric
92*0fca6ea1SDimitry Andric if (MRI->getRegClassOrNull(DstReg)) {
93*0fca6ea1SDimitry Andric if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
94*0fca6ea1SDimitry Andric return;
95*0fca6ea1SDimitry Andric llvm_unreachable("Failed to constrain register class");
96*0fca6ea1SDimitry Andric }
97*0fca6ea1SDimitry Andric
98*0fca6ea1SDimitry Andric MRI->setRegClass(DstReg, ST->getBoolRC());
99*0fca6ea1SDimitry Andric }
100*0fca6ea1SDimitry Andric
getCandidatesForLowering(SmallVectorImpl<MachineInstr * > & Vreg1Phis) const101*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::getCandidatesForLowering(
102*0fca6ea1SDimitry Andric SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
103*0fca6ea1SDimitry Andric LLT S1 = LLT::scalar(1);
104*0fca6ea1SDimitry Andric
105*0fca6ea1SDimitry Andric // Add divergent i1 phis to the list
106*0fca6ea1SDimitry Andric for (MachineBasicBlock &MBB : *MF) {
107*0fca6ea1SDimitry Andric for (MachineInstr &MI : MBB.phis()) {
108*0fca6ea1SDimitry Andric Register Dst = MI.getOperand(0).getReg();
109*0fca6ea1SDimitry Andric if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
110*0fca6ea1SDimitry Andric Vreg1Phis.push_back(&MI);
111*0fca6ea1SDimitry Andric }
112*0fca6ea1SDimitry Andric }
113*0fca6ea1SDimitry Andric }
114*0fca6ea1SDimitry Andric
collectIncomingValuesFromPhi(const MachineInstr * MI,SmallVectorImpl<Incoming> & Incomings) const115*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
116*0fca6ea1SDimitry Andric const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
117*0fca6ea1SDimitry Andric for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
118*0fca6ea1SDimitry Andric Incomings.emplace_back(MI->getOperand(i).getReg(),
119*0fca6ea1SDimitry Andric MI->getOperand(i + 1).getMBB(), Register());
120*0fca6ea1SDimitry Andric }
121*0fca6ea1SDimitry Andric }
122*0fca6ea1SDimitry Andric
replaceDstReg(Register NewReg,Register OldReg,MachineBasicBlock * MBB)123*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
124*0fca6ea1SDimitry Andric MachineBasicBlock *MBB) {
125*0fca6ea1SDimitry Andric BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
126*0fca6ea1SDimitry Andric .addReg(NewReg);
127*0fca6ea1SDimitry Andric }
128*0fca6ea1SDimitry Andric
129*0fca6ea1SDimitry Andric // Copy Reg to new lane mask register, insert a copy after instruction that
130*0fca6ea1SDimitry Andric // defines Reg while skipping phis if needed.
buildRegCopyToLaneMask(Register Reg)131*0fca6ea1SDimitry Andric Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) {
132*0fca6ea1SDimitry Andric Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs);
133*0fca6ea1SDimitry Andric MachineInstr *Instr = MRI->getVRegDef(Reg);
134*0fca6ea1SDimitry Andric MachineBasicBlock *MBB = Instr->getParent();
135*0fca6ea1SDimitry Andric B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
136*0fca6ea1SDimitry Andric B.buildCopy(LaneMask, Reg);
137*0fca6ea1SDimitry Andric return LaneMask;
138*0fca6ea1SDimitry Andric }
139*0fca6ea1SDimitry Andric
140*0fca6ea1SDimitry Andric // bb.previous
141*0fca6ea1SDimitry Andric // %PrevReg = ...
142*0fca6ea1SDimitry Andric //
143*0fca6ea1SDimitry Andric // bb.current
144*0fca6ea1SDimitry Andric // %CurReg = ...
145*0fca6ea1SDimitry Andric //
146*0fca6ea1SDimitry Andric // %DstReg - not defined
147*0fca6ea1SDimitry Andric //
148*0fca6ea1SDimitry Andric // -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
149*0fca6ea1SDimitry Andric //
150*0fca6ea1SDimitry Andric // bb.previous
151*0fca6ea1SDimitry Andric // %PrevReg = ...
152*0fca6ea1SDimitry Andric // %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
153*0fca6ea1SDimitry Andric //
154*0fca6ea1SDimitry Andric // bb.current
155*0fca6ea1SDimitry Andric // %CurReg = ...
156*0fca6ea1SDimitry Andric // %CurRegCopy:sreg_32(s1) = COPY %CurReg
157*0fca6ea1SDimitry Andric // ...
158*0fca6ea1SDimitry Andric // %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
159*0fca6ea1SDimitry Andric // %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
160*0fca6ea1SDimitry Andric // %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
161*0fca6ea1SDimitry Andric //
162*0fca6ea1SDimitry Andric // DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
buildMergeLaneMasks(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register DstReg,Register PrevReg,Register CurReg)163*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::buildMergeLaneMasks(
164*0fca6ea1SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
165*0fca6ea1SDimitry Andric Register DstReg, Register PrevReg, Register CurReg) {
166*0fca6ea1SDimitry Andric // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
167*0fca6ea1SDimitry Andric // TODO: check if inputs are constants or results of a compare.
168*0fca6ea1SDimitry Andric
169*0fca6ea1SDimitry Andric Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
170*0fca6ea1SDimitry Andric Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
171*0fca6ea1SDimitry Andric Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
172*0fca6ea1SDimitry Andric Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
173*0fca6ea1SDimitry Andric
174*0fca6ea1SDimitry Andric B.setInsertPt(MBB, I);
175*0fca6ea1SDimitry Andric B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
176*0fca6ea1SDimitry Andric B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
177*0fca6ea1SDimitry Andric B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
178*0fca6ea1SDimitry Andric }
179*0fca6ea1SDimitry Andric
180*0fca6ea1SDimitry Andric // GlobalISel has to constrain S1 incoming taken as-is with lane mask register
181*0fca6ea1SDimitry Andric // class. Insert a copy of Incoming.Reg to new lane mask inside Incoming.Block,
182*0fca6ea1SDimitry Andric // Incoming.Reg becomes that new lane mask.
constrainAsLaneMask(Incoming & In)183*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) {
184*0fca6ea1SDimitry Andric B.setInsertPt(*In.Block, In.Block->getFirstTerminator());
185*0fca6ea1SDimitry Andric
186*0fca6ea1SDimitry Andric auto Copy = B.buildCopy(LLT::scalar(1), In.Reg);
187*0fca6ea1SDimitry Andric MRI->setRegClass(Copy.getReg(0), ST->getBoolRC());
188*0fca6ea1SDimitry Andric In.Reg = Copy.getReg(0);
189*0fca6ea1SDimitry Andric }
190*0fca6ea1SDimitry Andric
1915f757f3fSDimitry Andric } // End anonymous namespace.
1925f757f3fSDimitry Andric
1935f757f3fSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
1945f757f3fSDimitry Andric "AMDGPU GlobalISel divergence lowering", false, false)
195*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
196*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
197*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
1985f757f3fSDimitry Andric INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
1995f757f3fSDimitry Andric "AMDGPU GlobalISel divergence lowering", false, false)
2005f757f3fSDimitry Andric
2015f757f3fSDimitry Andric char AMDGPUGlobalISelDivergenceLowering::ID = 0;
2025f757f3fSDimitry Andric
2035f757f3fSDimitry Andric char &llvm::AMDGPUGlobalISelDivergenceLoweringID =
2045f757f3fSDimitry Andric AMDGPUGlobalISelDivergenceLowering::ID;
2055f757f3fSDimitry Andric
createAMDGPUGlobalISelDivergenceLoweringPass()2065f757f3fSDimitry Andric FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
2075f757f3fSDimitry Andric return new AMDGPUGlobalISelDivergenceLowering();
2085f757f3fSDimitry Andric }
2095f757f3fSDimitry Andric
runOnMachineFunction(MachineFunction & MF)2105f757f3fSDimitry Andric bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
2115f757f3fSDimitry Andric MachineFunction &MF) {
212*0fca6ea1SDimitry Andric MachineDominatorTree &DT =
213*0fca6ea1SDimitry Andric getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
214*0fca6ea1SDimitry Andric MachinePostDominatorTree &PDT =
215*0fca6ea1SDimitry Andric getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
216*0fca6ea1SDimitry Andric MachineUniformityInfo &MUI =
217*0fca6ea1SDimitry Andric getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
218*0fca6ea1SDimitry Andric
219*0fca6ea1SDimitry Andric DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
220*0fca6ea1SDimitry Andric
221*0fca6ea1SDimitry Andric return Helper.lowerPhis();
2225f757f3fSDimitry Andric }
223