1 //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This file implements the idea to rewrite undef incoming operand for certain 9 // PHIs in structurized CFG. This pass only works on IR that has gone through 10 // StructurizedCFG pass, and this pass has some additional limitation that make 11 // it can only run after SIAnnotateControlFlow. 12 // 13 // To achieve optimal code generation for AMDGPU, we assume that uniformity 14 // analysis reports the PHI in join block of divergent branch as uniform if 15 // it has one unique uniform value plus additional undefined/poisoned incoming 16 // value. That is to say the later compiler pipeline will ensure such PHI always 17 // return uniform value and ensure it work correctly. Let's take a look at two 18 // typical patterns in structured CFG that need to be taken care: (In both 19 // patterns, block %if terminate with divergent branch.) 20 // 21 // Pattern A: Block with undefined incoming value dominates defined predecessor 22 // %if 23 // | \ 24 // | %then 25 // | / 26 // %endif: %phi = phi [%undef, %if], [%uniform, %then] 27 // 28 // Pattern B: Block with defined incoming value dominates undefined predecessor 29 // %if 30 // | \ 31 // | %then 32 // | / 33 // %endif: %phi = phi [%uniform, %if], [%undef, %then] 34 // 35 // For pattern A, by reporting %phi as uniform, the later pipeline need to make 36 // sure it be handled correctly. The backend usually allocates a scalar register 37 // and if any thread in a wave takes %then path, the scalar register will get 38 // the %uniform value. 39 // 40 // For pattern B, we will replace the undef operand with the other defined value 41 // in this pass. So the scalar register allocated for such PHI will get correct 42 // liveness. Without this transformation, the scalar register may be overwritten 43 // in the %then block. 44 // 45 // Limitation note: 46 // If the join block of divergent threads is a loop header, the pass cannot 47 // handle it correctly right now. For below case, the undef in %phi should also 48 // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header 49 // block to get a separate join block, then we can rewrite the undef correctly. 50 // %if 51 // | \ 52 // | %then 53 // | / 54 // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header] 55 // | | 56 // \--- 57 58 #include "AMDGPU.h" 59 #include "llvm/Analysis/UniformityAnalysis.h" 60 #include "llvm/IR/BasicBlock.h" 61 #include "llvm/IR/Constants.h" 62 #include "llvm/IR/Dominators.h" 63 #include "llvm/IR/Instructions.h" 64 #include "llvm/InitializePasses.h" 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi" 69 70 namespace { 71 72 class AMDGPURewriteUndefForPHILegacy : public FunctionPass { 73 public: 74 static char ID; 75 AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) { 76 initializeAMDGPURewriteUndefForPHILegacyPass(*PassRegistry::getPassRegistry()); 77 } 78 bool runOnFunction(Function &F) override; 79 StringRef getPassName() const override { 80 return "AMDGPU Rewrite Undef for PHI"; 81 } 82 83 void getAnalysisUsage(AnalysisUsage &AU) const override { 84 AU.addRequired<UniformityInfoWrapperPass>(); 85 AU.addRequired<DominatorTreeWrapperPass>(); 86 87 AU.addPreserved<DominatorTreeWrapperPass>(); 88 AU.setPreservesCFG(); 89 } 90 }; 91 92 } // end anonymous namespace 93 char AMDGPURewriteUndefForPHILegacy::ID = 0; 94 95 INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, 96 "Rewrite undef for PHI", false, false) 97 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 98 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 99 INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, 100 "Rewrite undef for PHI", false, false) 101 102 bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) { 103 bool Changed = false; 104 SmallVector<PHINode *> ToBeDeleted; 105 for (auto &BB : F) { 106 for (auto &PHI : BB.phis()) { 107 if (UA.isDivergent(&PHI)) 108 continue; 109 110 // The unique incoming value except undef/poison for the PHI node. 111 Value *UniqueDefinedIncoming = nullptr; 112 // The divergent block with defined incoming value that dominates all 113 // other block with the same incoming value. 114 BasicBlock *DominateBB = nullptr; 115 // Predecessors with undefined incoming value (excluding loop backedge). 116 SmallVector<BasicBlock *> Undefs; 117 118 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { 119 Value *Incoming = PHI.getIncomingValue(i); 120 BasicBlock *IncomingBB = PHI.getIncomingBlock(i); 121 122 if (Incoming == &PHI) 123 continue; 124 125 if (isa<UndefValue>(Incoming)) { 126 // Undef from loop backedge will not be replaced. 127 if (!DT->dominates(&BB, IncomingBB)) 128 Undefs.push_back(IncomingBB); 129 continue; 130 } 131 132 if (!UniqueDefinedIncoming) { 133 UniqueDefinedIncoming = Incoming; 134 DominateBB = IncomingBB; 135 } else if (Incoming == UniqueDefinedIncoming) { 136 // Update DominateBB if necessary. 137 if (DT->dominates(IncomingBB, DominateBB)) 138 DominateBB = IncomingBB; 139 } else { 140 UniqueDefinedIncoming = nullptr; 141 break; 142 } 143 } 144 // We only need to replace the undef for the PHI which is merging 145 // defined/undefined values from divergent threads. 146 // TODO: We should still be able to replace undef value if the unique 147 // value is a Constant. 148 if (!UniqueDefinedIncoming || Undefs.empty() || 149 !UA.isDivergent(DominateBB->getTerminator())) 150 continue; 151 152 // We only replace the undef when DominateBB truly dominates all the 153 // other predecessors with undefined incoming value. Make sure DominateBB 154 // dominates BB so that UniqueDefinedIncoming is available in BB and 155 // afterwards. 156 if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) { 157 return DT->dominates(DominateBB, UD); 158 })) { 159 PHI.replaceAllUsesWith(UniqueDefinedIncoming); 160 ToBeDeleted.push_back(&PHI); 161 Changed = true; 162 } 163 } 164 } 165 166 for (auto *PHI : ToBeDeleted) 167 PHI->eraseFromParent(); 168 169 return Changed; 170 } 171 172 bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) { 173 UniformityInfo &UA = 174 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 175 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 176 return rewritePHIs(F, UA, DT); 177 } 178 179 PreservedAnalyses 180 AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) { 181 UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(F); 182 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 183 bool Changed = rewritePHIs(F, UA, DT); 184 if (Changed) { 185 PreservedAnalyses PA; 186 PA.preserveSet<CFGAnalyses>(); 187 return PA; 188 } 189 190 return PreservedAnalyses::all(); 191 } 192 193 FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() { 194 return new AMDGPURewriteUndefForPHILegacy(); 195 } 196