1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass adds amdgpu.uniform metadata to IR values so this information 11 /// can be used during instruction selection. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/Analysis/AliasAnalysis.h" 19 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 20 #include "llvm/Analysis/MemorySSA.h" 21 #include "llvm/IR/InstVisitor.h" 22 #include "llvm/IR/IntrinsicsAMDGPU.h" 23 #include "llvm/InitializePasses.h" 24 25 #define DEBUG_TYPE "amdgpu-annotate-uniform" 26 27 using namespace llvm; 28 29 namespace { 30 31 class AMDGPUAnnotateUniformValues : public FunctionPass, 32 public InstVisitor<AMDGPUAnnotateUniformValues> { 33 LegacyDivergenceAnalysis *DA; 34 MemorySSA *MSSA; 35 AliasAnalysis *AA; 36 DenseMap<Value*, GetElementPtrInst*> noClobberClones; 37 bool isEntryFunc; 38 39 public: 40 static char ID; 41 AMDGPUAnnotateUniformValues() : 42 FunctionPass(ID) { } 43 bool doInitialization(Module &M) override; 44 bool runOnFunction(Function &F) override; 45 StringRef getPassName() const override { 46 return "AMDGPU Annotate Uniform Values"; 47 } 48 void getAnalysisUsage(AnalysisUsage &AU) const override { 49 AU.addRequired<LegacyDivergenceAnalysis>(); 50 AU.addRequired<MemorySSAWrapperPass>(); 51 AU.addRequired<AAResultsWrapperPass>(); 52 AU.setPreservesAll(); 53 } 54 55 void visitBranchInst(BranchInst &I); 56 void visitLoadInst(LoadInst &I); 57 bool isClobberedInFunction(LoadInst * Load); 58 }; 59 60 } // End anonymous namespace 61 62 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, 63 "Add AMDGPU uniform metadata", false, false) 64 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 65 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 66 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 67 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, 68 "Add AMDGPU uniform metadata", false, false) 69 70 char AMDGPUAnnotateUniformValues::ID = 0; 71 72 static void setUniformMetadata(Instruction *I) { 73 I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); 74 } 75 static void setNoClobberMetadata(Instruction *I) { 76 I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); 77 } 78 79 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) { 80 MemorySSAWalker *Walker = MSSA->getWalker(); 81 SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)}; 82 SmallSet<MemoryAccess *, 8> Visited; 83 MemoryLocation Loc(MemoryLocation::get(Load)); 84 85 const auto isReallyAClobber = [this, Load](MemoryDef *Def) -> bool { 86 Instruction *DefInst = Def->getMemoryInst(); 87 LLVM_DEBUG(dbgs() << " Def: " << *DefInst << '\n'); 88 89 if (isa<FenceInst>(DefInst)) 90 return false; 91 92 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 93 switch (II->getIntrinsicID()) { 94 case Intrinsic::amdgcn_s_barrier: 95 case Intrinsic::amdgcn_wave_barrier: 96 return false; 97 default: 98 break; 99 } 100 } 101 102 // Ignore atomics not aliasing with the original load, any atomic is a 103 // universal MemoryDef from MSSA's point of view too, just like a fence. 104 const auto checkNoAlias = [this, Load](auto I) -> bool { 105 return I && AA->isNoAlias(I->getPointerOperand(), 106 Load->getPointerOperand()); 107 }; 108 109 if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) || 110 checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst))) 111 return false; 112 113 return true; 114 }; 115 116 LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); 117 118 // Start with a nearest dominating clobbering access, it will be either 119 // live on entry (nothing to do, load is not clobbered), MemoryDef, or 120 // MemoryPhi if several MemoryDefs can define this memory state. In that 121 // case add all Defs to WorkList and continue going up and checking all 122 // the definitions of this memory location until the root. When all the 123 // defs are exhausted and came to the entry state we have no clobber. 124 // Along the scan ignore barriers and fences which are considered clobbers 125 // by the MemorySSA, but not really writing anything into the memory. 126 while (!WorkList.empty()) { 127 MemoryAccess *MA = WorkList.pop_back_val(); 128 if (!Visited.insert(MA).second) 129 continue; 130 131 if (MSSA->isLiveOnEntryDef(MA)) 132 continue; 133 134 if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) { 135 if (isReallyAClobber(Def)) { 136 LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); 137 return true; 138 } 139 140 WorkList.push_back( 141 Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); 142 continue; 143 } 144 145 const MemoryPhi *Phi = cast<MemoryPhi>(MA); 146 for (auto &Use : Phi->incoming_values()) 147 WorkList.push_back(cast<MemoryAccess>(&Use)); 148 } 149 150 LLVM_DEBUG(dbgs() << " -> no clobber\n"); 151 return false; 152 } 153 154 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { 155 if (DA->isUniform(&I)) 156 setUniformMetadata(&I); 157 } 158 159 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { 160 Value *Ptr = I.getPointerOperand(); 161 if (!DA->isUniform(Ptr)) 162 return; 163 // We're tracking up to the Function boundaries, and cannot go beyond because 164 // of FunctionPass restrictions. We can ensure that is memory not clobbered 165 // for memory operations that are live in to entry points only. 166 Instruction *PtrI = dyn_cast<Instruction>(Ptr); 167 168 if (!isEntryFunc) { 169 if (PtrI) 170 setUniformMetadata(PtrI); 171 return; 172 } 173 174 bool NotClobbered = false; 175 bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 176 if (PtrI) 177 NotClobbered = GlobalLoad && !isClobberedInFunction(&I); 178 else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) { 179 if (GlobalLoad && !isClobberedInFunction(&I)) { 180 NotClobbered = true; 181 // Lookup for the existing GEP 182 if (noClobberClones.count(Ptr)) { 183 PtrI = noClobberClones[Ptr]; 184 } else { 185 // Create GEP of the Value 186 Function *F = I.getParent()->getParent(); 187 Value *Idx = Constant::getIntegerValue( 188 Type::getInt32Ty(Ptr->getContext()), APInt(64, 0)); 189 // Insert GEP at the entry to make it dominate all uses 190 PtrI = GetElementPtrInst::Create(I.getType(), Ptr, 191 ArrayRef<Value *>(Idx), Twine(""), 192 F->getEntryBlock().getFirstNonPHI()); 193 } 194 I.replaceUsesOfWith(Ptr, PtrI); 195 } 196 } 197 198 if (PtrI) { 199 setUniformMetadata(PtrI); 200 if (NotClobbered) 201 setNoClobberMetadata(PtrI); 202 } 203 } 204 205 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { 206 return false; 207 } 208 209 bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { 210 if (skipFunction(F)) 211 return false; 212 213 DA = &getAnalysis<LegacyDivergenceAnalysis>(); 214 MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); 215 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 216 isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv()); 217 218 visit(F); 219 noClobberClones.clear(); 220 return true; 221 } 222 223 FunctionPass * 224 llvm::createAMDGPUAnnotateUniformValues() { 225 return new AMDGPUAnnotateUniformValues(); 226 } 227