1bdd1243dSDimitry Andric //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===// 2bdd1243dSDimitry Andric // 3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6bdd1243dSDimitry Andric // 7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 8bdd1243dSDimitry Andric // This file implements the idea to rewrite undef incoming operand for certain 9bdd1243dSDimitry Andric // PHIs in structurized CFG. This pass only works on IR that has gone through 10bdd1243dSDimitry Andric // StructurizedCFG pass, and this pass has some additional limitation that make 11bdd1243dSDimitry Andric // it can only run after SIAnnotateControlFlow. 12bdd1243dSDimitry Andric // 1306c3fb27SDimitry Andric // To achieve optimal code generation for AMDGPU, we assume that uniformity 14bdd1243dSDimitry Andric // analysis reports the PHI in join block of divergent branch as uniform if 15bdd1243dSDimitry Andric // it has one unique uniform value plus additional undefined/poisoned incoming 16bdd1243dSDimitry Andric // value. That is to say the later compiler pipeline will ensure such PHI always 17bdd1243dSDimitry Andric // return uniform value and ensure it work correctly. Let's take a look at two 18bdd1243dSDimitry Andric // typical patterns in structured CFG that need to be taken care: (In both 19bdd1243dSDimitry Andric // patterns, block %if terminate with divergent branch.) 20bdd1243dSDimitry Andric // 21bdd1243dSDimitry Andric // Pattern A: Block with undefined incoming value dominates defined predecessor 22bdd1243dSDimitry Andric // %if 23bdd1243dSDimitry Andric // | \ 24bdd1243dSDimitry Andric // | %then 25bdd1243dSDimitry Andric // | / 26bdd1243dSDimitry Andric // %endif: %phi = phi [%undef, %if], [%uniform, %then] 27bdd1243dSDimitry Andric // 28bdd1243dSDimitry Andric // Pattern B: Block with defined incoming value dominates undefined predecessor 29bdd1243dSDimitry Andric // %if 30bdd1243dSDimitry Andric // | \ 31bdd1243dSDimitry Andric // | %then 32bdd1243dSDimitry Andric // | / 33bdd1243dSDimitry Andric // %endif: %phi = phi [%uniform, %if], [%undef, %then] 34bdd1243dSDimitry Andric // 35bdd1243dSDimitry Andric // For pattern A, by reporting %phi as uniform, the later pipeline need to make 36bdd1243dSDimitry Andric // sure it be handled correctly. The backend usually allocates a scalar register 37bdd1243dSDimitry Andric // and if any thread in a wave takes %then path, the scalar register will get 38bdd1243dSDimitry Andric // the %uniform value. 39bdd1243dSDimitry Andric // 40bdd1243dSDimitry Andric // For pattern B, we will replace the undef operand with the other defined value 41bdd1243dSDimitry Andric // in this pass. So the scalar register allocated for such PHI will get correct 42bdd1243dSDimitry Andric // liveness. Without this transformation, the scalar register may be overwritten 43bdd1243dSDimitry Andric // in the %then block. 44bdd1243dSDimitry Andric // 45bdd1243dSDimitry Andric // Limitation note: 46bdd1243dSDimitry Andric // If the join block of divergent threads is a loop header, the pass cannot 47bdd1243dSDimitry Andric // handle it correctly right now. For below case, the undef in %phi should also 48bdd1243dSDimitry Andric // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header 49bdd1243dSDimitry Andric // block to get a separate join block, then we can rewrite the undef correctly. 50bdd1243dSDimitry Andric // %if 51bdd1243dSDimitry Andric // | \ 52bdd1243dSDimitry Andric // | %then 53bdd1243dSDimitry Andric // | / 54bdd1243dSDimitry Andric // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header] 55bdd1243dSDimitry Andric // | | 56bdd1243dSDimitry Andric // \--- 57bdd1243dSDimitry Andric 58bdd1243dSDimitry Andric #include "AMDGPU.h" 5906c3fb27SDimitry Andric #include "llvm/Analysis/UniformityAnalysis.h" 60bdd1243dSDimitry Andric #include "llvm/IR/BasicBlock.h" 61bdd1243dSDimitry Andric #include "llvm/IR/Constants.h" 62bdd1243dSDimitry Andric #include "llvm/IR/Dominators.h" 63bdd1243dSDimitry Andric #include "llvm/IR/Instructions.h" 64bdd1243dSDimitry Andric #include "llvm/InitializePasses.h" 65bdd1243dSDimitry Andric 66bdd1243dSDimitry Andric using namespace llvm; 67bdd1243dSDimitry Andric 68bdd1243dSDimitry Andric #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi" 69bdd1243dSDimitry Andric 70bdd1243dSDimitry Andric namespace { 71bdd1243dSDimitry Andric 72*5f757f3fSDimitry Andric class AMDGPURewriteUndefForPHILegacy : public FunctionPass { 73bdd1243dSDimitry Andric public: 74bdd1243dSDimitry Andric static char ID; 75*5f757f3fSDimitry Andric AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) { 76*5f757f3fSDimitry Andric initializeAMDGPURewriteUndefForPHILegacyPass(*PassRegistry::getPassRegistry()); 77bdd1243dSDimitry Andric } 78bdd1243dSDimitry Andric bool runOnFunction(Function &F) override; 79bdd1243dSDimitry Andric StringRef getPassName() const override { 80bdd1243dSDimitry Andric return "AMDGPU Rewrite Undef for PHI"; 81bdd1243dSDimitry Andric } 82bdd1243dSDimitry Andric 83bdd1243dSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 8406c3fb27SDimitry Andric AU.addRequired<UniformityInfoWrapperPass>(); 85bdd1243dSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 86bdd1243dSDimitry Andric 87bdd1243dSDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 88bdd1243dSDimitry Andric AU.setPreservesCFG(); 89bdd1243dSDimitry Andric } 90bdd1243dSDimitry Andric }; 91bdd1243dSDimitry Andric 92bdd1243dSDimitry Andric } // end anonymous namespace 93*5f757f3fSDimitry Andric char AMDGPURewriteUndefForPHILegacy::ID = 0; 94bdd1243dSDimitry Andric 95*5f757f3fSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, 96bdd1243dSDimitry Andric "Rewrite undef for PHI", false, false) 9706c3fb27SDimitry Andric INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 98bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 99*5f757f3fSDimitry Andric INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, 100bdd1243dSDimitry Andric "Rewrite undef for PHI", false, false) 101bdd1243dSDimitry Andric 10206c3fb27SDimitry Andric bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) { 103bdd1243dSDimitry Andric bool Changed = false; 104bdd1243dSDimitry Andric SmallVector<PHINode *> ToBeDeleted; 105bdd1243dSDimitry Andric for (auto &BB : F) { 106bdd1243dSDimitry Andric for (auto &PHI : BB.phis()) { 10706c3fb27SDimitry Andric if (UA.isDivergent(&PHI)) 108bdd1243dSDimitry Andric continue; 109bdd1243dSDimitry Andric 110bdd1243dSDimitry Andric // The unique incoming value except undef/poison for the PHI node. 111bdd1243dSDimitry Andric Value *UniqueDefinedIncoming = nullptr; 112bdd1243dSDimitry Andric // The divergent block with defined incoming value that dominates all 113bdd1243dSDimitry Andric // other block with the same incoming value. 114bdd1243dSDimitry Andric BasicBlock *DominateBB = nullptr; 115bdd1243dSDimitry Andric // Predecessors with undefined incoming value (excluding loop backedge). 116bdd1243dSDimitry Andric SmallVector<BasicBlock *> Undefs; 117bdd1243dSDimitry Andric 118bdd1243dSDimitry Andric for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { 119bdd1243dSDimitry Andric Value *Incoming = PHI.getIncomingValue(i); 120bdd1243dSDimitry Andric BasicBlock *IncomingBB = PHI.getIncomingBlock(i); 121bdd1243dSDimitry Andric 122bdd1243dSDimitry Andric if (Incoming == &PHI) 123bdd1243dSDimitry Andric continue; 124bdd1243dSDimitry Andric 125bdd1243dSDimitry Andric if (isa<UndefValue>(Incoming)) { 126bdd1243dSDimitry Andric // Undef from loop backedge will not be replaced. 127bdd1243dSDimitry Andric if (!DT->dominates(&BB, IncomingBB)) 128bdd1243dSDimitry Andric Undefs.push_back(IncomingBB); 129bdd1243dSDimitry Andric continue; 130bdd1243dSDimitry Andric } 131bdd1243dSDimitry Andric 132bdd1243dSDimitry Andric if (!UniqueDefinedIncoming) { 133bdd1243dSDimitry Andric UniqueDefinedIncoming = Incoming; 134bdd1243dSDimitry Andric DominateBB = IncomingBB; 135bdd1243dSDimitry Andric } else if (Incoming == UniqueDefinedIncoming) { 136bdd1243dSDimitry Andric // Update DominateBB if necessary. 137bdd1243dSDimitry Andric if (DT->dominates(IncomingBB, DominateBB)) 138bdd1243dSDimitry Andric DominateBB = IncomingBB; 139bdd1243dSDimitry Andric } else { 140bdd1243dSDimitry Andric UniqueDefinedIncoming = nullptr; 141bdd1243dSDimitry Andric break; 142bdd1243dSDimitry Andric } 143bdd1243dSDimitry Andric } 144bdd1243dSDimitry Andric // We only need to replace the undef for the PHI which is merging 145bdd1243dSDimitry Andric // defined/undefined values from divergent threads. 146bdd1243dSDimitry Andric // TODO: We should still be able to replace undef value if the unique 147bdd1243dSDimitry Andric // value is a Constant. 148bdd1243dSDimitry Andric if (!UniqueDefinedIncoming || Undefs.empty() || 14906c3fb27SDimitry Andric !UA.isDivergent(DominateBB->getTerminator())) 150bdd1243dSDimitry Andric continue; 151bdd1243dSDimitry Andric 152bdd1243dSDimitry Andric // We only replace the undef when DominateBB truly dominates all the 153bdd1243dSDimitry Andric // other predecessors with undefined incoming value. Make sure DominateBB 154bdd1243dSDimitry Andric // dominates BB so that UniqueDefinedIncoming is available in BB and 155bdd1243dSDimitry Andric // afterwards. 156bdd1243dSDimitry Andric if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) { 157bdd1243dSDimitry Andric return DT->dominates(DominateBB, UD); 158bdd1243dSDimitry Andric })) { 159bdd1243dSDimitry Andric PHI.replaceAllUsesWith(UniqueDefinedIncoming); 160bdd1243dSDimitry Andric ToBeDeleted.push_back(&PHI); 161bdd1243dSDimitry Andric Changed = true; 162bdd1243dSDimitry Andric } 163bdd1243dSDimitry Andric } 164bdd1243dSDimitry Andric } 165bdd1243dSDimitry Andric 166bdd1243dSDimitry Andric for (auto *PHI : ToBeDeleted) 167bdd1243dSDimitry Andric PHI->eraseFromParent(); 168bdd1243dSDimitry Andric 169bdd1243dSDimitry Andric return Changed; 170bdd1243dSDimitry Andric } 171bdd1243dSDimitry Andric 172*5f757f3fSDimitry Andric bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) { 17306c3fb27SDimitry Andric UniformityInfo &UA = 17406c3fb27SDimitry Andric getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 175bdd1243dSDimitry Andric DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 17606c3fb27SDimitry Andric return rewritePHIs(F, UA, DT); 177bdd1243dSDimitry Andric } 178bdd1243dSDimitry Andric 179*5f757f3fSDimitry Andric PreservedAnalyses 180*5f757f3fSDimitry Andric AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) { 181*5f757f3fSDimitry Andric UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(F); 182*5f757f3fSDimitry Andric DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 183*5f757f3fSDimitry Andric bool Changed = rewritePHIs(F, UA, DT); 184*5f757f3fSDimitry Andric if (Changed) { 185*5f757f3fSDimitry Andric PreservedAnalyses PA; 186*5f757f3fSDimitry Andric PA.preserveSet<CFGAnalyses>(); 187*5f757f3fSDimitry Andric return PA; 188*5f757f3fSDimitry Andric } 189*5f757f3fSDimitry Andric 190*5f757f3fSDimitry Andric return PreservedAnalyses::all(); 191*5f757f3fSDimitry Andric } 192*5f757f3fSDimitry Andric 193*5f757f3fSDimitry Andric FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() { 194*5f757f3fSDimitry Andric return new AMDGPURewriteUndefForPHILegacy(); 195bdd1243dSDimitry Andric } 196