xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp (revision 1342eb5a832fa10e689a29faab3acb6054e4778c)
1 //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file implements the idea to rewrite undef incoming operand for certain
9 // PHIs in structurized CFG. This pass only works on IR that has gone through
10 // StructurizedCFG pass, and this pass has some additional limitation that make
11 // it can only run after SIAnnotateControlFlow.
12 //
13 // To achieve optimal code generation for AMDGPU, we assume that uniformity
14 // analysis reports the PHI in join block of divergent branch as uniform if
15 // it has one unique uniform value plus additional undefined/poisoned incoming
16 // value. That is to say the later compiler pipeline will ensure such PHI always
17 // return uniform value and ensure it work correctly. Let's take a look at two
18 // typical patterns in structured CFG that need to be taken care: (In both
19 // patterns, block %if terminate with divergent branch.)
20 //
21 // Pattern A: Block with undefined incoming value dominates defined predecessor
22 //  %if
23 //  | \
24 //  | %then
25 //  | /
26 //  %endif: %phi = phi [%undef, %if], [%uniform, %then]
27 //
28 //  Pattern B: Block with defined incoming value dominates undefined predecessor
29 //  %if
30 //  | \
31 //  | %then
32 //  | /
33 //  %endif: %phi = phi [%uniform, %if], [%undef, %then]
34 //
35 // For pattern A, by reporting %phi as uniform, the later pipeline need to make
36 // sure it be handled correctly. The backend usually allocates a scalar register
37 // and if any thread in a wave takes %then path, the scalar register will get
38 // the %uniform value.
39 //
40 // For pattern B, we will replace the undef operand with the other defined value
41 // in this pass. So the scalar register allocated for such PHI will get correct
42 // liveness. Without this transformation, the scalar register may be overwritten
43 // in the %then block.
44 //
45 // Limitation note:
46 // If the join block of divergent threads is a loop header, the pass cannot
47 // handle it correctly right now. For below case, the undef in %phi should also
48 // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
49 // block to get a separate join block, then we can rewrite the undef correctly.
50 //     %if
51 //     | \
52 //     | %then
53 //     | /
54 // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
55 // |   |
56 // \---
57 
58 #include "AMDGPU.h"
59 #include "llvm/Analysis/UniformityAnalysis.h"
60 #include "llvm/IR/BasicBlock.h"
61 #include "llvm/IR/Constants.h"
62 #include "llvm/IR/Dominators.h"
63 #include "llvm/IR/Instructions.h"
64 #include "llvm/InitializePasses.h"
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"
69 
70 namespace {
71 
72 class AMDGPURewriteUndefForPHILegacy : public FunctionPass {
73 public:
74   static char ID;
75   AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) {}
76   bool runOnFunction(Function &F) override;
77   StringRef getPassName() const override {
78     return "AMDGPU Rewrite Undef for PHI";
79   }
80 
81   void getAnalysisUsage(AnalysisUsage &AU) const override {
82     AU.addRequired<UniformityInfoWrapperPass>();
83     AU.addRequired<DominatorTreeWrapperPass>();
84 
85     AU.addPreserved<DominatorTreeWrapperPass>();
86     AU.setPreservesCFG();
87   }
88 };
89 
90 } // end anonymous namespace
91 char AMDGPURewriteUndefForPHILegacy::ID = 0;
92 
93 INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
94                       "Rewrite undef for PHI", false, false)
95 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
96 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
97 INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
98                     "Rewrite undef for PHI", false, false)
99 
100 bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) {
101   bool Changed = false;
102   SmallVector<PHINode *> ToBeDeleted;
103   for (auto &BB : F) {
104     for (auto &PHI : BB.phis()) {
105       if (UA.isDivergent(&PHI))
106         continue;
107 
108       // The unique incoming value except undef/poison for the PHI node.
109       Value *UniqueDefinedIncoming = nullptr;
110       // The divergent block with defined incoming value that dominates all
111       // other block with the same incoming value.
112       BasicBlock *DominateBB = nullptr;
113       // Predecessors with undefined incoming value (excluding loop backedge).
114       SmallVector<BasicBlock *> Undefs;
115 
116       for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
117         Value *Incoming = PHI.getIncomingValue(i);
118         BasicBlock *IncomingBB = PHI.getIncomingBlock(i);
119 
120         if (Incoming == &PHI)
121           continue;
122 
123         if (isa<UndefValue>(Incoming)) {
124           // Undef from loop backedge will not be replaced.
125           if (!DT->dominates(&BB, IncomingBB))
126             Undefs.push_back(IncomingBB);
127           continue;
128         }
129 
130         if (!UniqueDefinedIncoming) {
131           UniqueDefinedIncoming = Incoming;
132           DominateBB = IncomingBB;
133         } else if (Incoming == UniqueDefinedIncoming) {
134           // Update DominateBB if necessary.
135           if (DT->dominates(IncomingBB, DominateBB))
136             DominateBB = IncomingBB;
137         } else {
138           UniqueDefinedIncoming = nullptr;
139           break;
140         }
141       }
142       // We only need to replace the undef for the PHI which is merging
143       // defined/undefined values from divergent threads.
144       // TODO: We should still be able to replace undef value if the unique
145       // value is a Constant.
146       if (!UniqueDefinedIncoming || Undefs.empty() ||
147           !UA.isDivergent(DominateBB->getTerminator()))
148         continue;
149 
150       // We only replace the undef when DominateBB truly dominates all the
151       // other predecessors with undefined incoming value. Make sure DominateBB
152       // dominates BB so that UniqueDefinedIncoming is available in BB and
153       // afterwards.
154       if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
155             return DT->dominates(DominateBB, UD);
156           })) {
157         PHI.replaceAllUsesWith(UniqueDefinedIncoming);
158         ToBeDeleted.push_back(&PHI);
159         Changed = true;
160       }
161     }
162   }
163 
164   for (auto *PHI : ToBeDeleted)
165     PHI->eraseFromParent();
166 
167   return Changed;
168 }
169 
170 bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) {
171   UniformityInfo &UA =
172       getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
173   DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
174   return rewritePHIs(F, UA, DT);
175 }
176 
177 PreservedAnalyses
178 AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) {
179   UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(F);
180   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
181   bool Changed = rewritePHIs(F, UA, DT);
182   if (Changed) {
183     PreservedAnalyses PA;
184     PA.preserveSet<CFGAnalyses>();
185     return PA;
186   }
187 
188   return PreservedAnalyses::all();
189 }
190 
191 FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() {
192   return new AMDGPURewriteUndefForPHILegacy();
193 }
194