1 //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass recursively promotes generic pointer arguments of a kernel 10 /// into the global address space. 11 /// 12 /// The pass walks kernel's pointer arguments, then loads from them. If a loaded 13 /// value is a pointer and loaded pointer is unmodified in the kernel before the 14 /// load, then promote loaded pointer to global. Then recursively continue. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "Utils/AMDGPUMemoryUtils.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Analysis/MemorySSA.h" 23 #include "llvm/IR/IRBuilder.h" 24 #include "llvm/InitializePasses.h" 25 26 #define DEBUG_TYPE "amdgpu-promote-kernel-arguments" 27 28 using namespace llvm; 29 30 namespace { 31 32 class AMDGPUPromoteKernelArguments : public FunctionPass { 33 MemorySSA *MSSA; 34 35 AliasAnalysis *AA; 36 37 Instruction *ArgCastInsertPt; 38 39 SmallVector<Value *> Ptrs; 40 41 void enqueueUsers(Value *Ptr); 42 43 bool promotePointer(Value *Ptr); 44 45 bool promoteLoad(LoadInst *LI); 46 47 public: 48 static char ID; 49 50 AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} 51 52 bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA); 53 54 bool runOnFunction(Function &F) override; 55 56 void getAnalysisUsage(AnalysisUsage &AU) const override { 57 AU.addRequired<AAResultsWrapperPass>(); 58 AU.addRequired<MemorySSAWrapperPass>(); 59 AU.setPreservesAll(); 60 } 61 }; 62 63 } // end anonymous namespace 64 65 void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { 66 SmallVector<User *> PtrUsers(Ptr->users()); 67 68 while (!PtrUsers.empty()) { 69 Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val()); 70 if (!U) 71 continue; 72 73 switch (U->getOpcode()) { 74 default: 75 break; 76 case Instruction::Load: { 77 LoadInst *LD = cast<LoadInst>(U); 78 if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr && 79 !AMDGPU::isClobberedInFunction(LD, MSSA, AA)) 80 Ptrs.push_back(LD); 81 82 break; 83 } 84 case Instruction::GetElementPtr: 85 case Instruction::AddrSpaceCast: 86 case Instruction::BitCast: 87 if (U->getOperand(0)->stripInBoundsOffsets() == Ptr) 88 PtrUsers.append(U->user_begin(), U->user_end()); 89 break; 90 } 91 } 92 } 93 94 bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { 95 bool Changed = false; 96 97 LoadInst *LI = dyn_cast<LoadInst>(Ptr); 98 if (LI) 99 Changed |= promoteLoad(LI); 100 101 PointerType *PT = dyn_cast<PointerType>(Ptr->getType()); 102 if (!PT) 103 return Changed; 104 105 if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 106 PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || 107 PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 108 enqueueUsers(Ptr); 109 110 if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) 111 return Changed; 112 113 IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator()) 114 : ArgCastInsertPt); 115 116 // Cast pointer to global address space and back to flat and let 117 // Infer Address Spaces pass to do all necessary rewriting. 118 PointerType *NewPT = 119 PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS); 120 Value *Cast = 121 B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global")); 122 Value *CastBack = 123 B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat")); 124 Ptr->replaceUsesWithIf(CastBack, 125 [Cast](Use &U) { return U.getUser() != Cast; }); 126 127 return true; 128 } 129 130 bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) { 131 if (!LI->isSimple()) 132 return false; 133 134 LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {})); 135 return true; 136 } 137 138 // skip allocas 139 static BasicBlock::iterator getInsertPt(BasicBlock &BB) { 140 BasicBlock::iterator InsPt = BB.getFirstInsertionPt(); 141 for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) { 142 AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt); 143 144 // If this is a dynamic alloca, the value may depend on the loaded kernargs, 145 // so loads will need to be inserted before it. 146 if (!AI || !AI->isStaticAlloca()) 147 break; 148 } 149 150 return InsPt; 151 } 152 153 bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA, 154 AliasAnalysis &AA) { 155 if (skipFunction(F)) 156 return false; 157 158 CallingConv::ID CC = F.getCallingConv(); 159 if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) 160 return false; 161 162 ArgCastInsertPt = &*getInsertPt(*F.begin()); 163 this->MSSA = &MSSA; 164 this->AA = &AA; 165 166 for (Argument &Arg : F.args()) { 167 if (Arg.use_empty()) 168 continue; 169 170 PointerType *PT = dyn_cast<PointerType>(Arg.getType()); 171 if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && 172 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && 173 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)) 174 continue; 175 176 Ptrs.push_back(&Arg); 177 } 178 179 bool Changed = false; 180 while (!Ptrs.empty()) { 181 Value *Ptr = Ptrs.pop_back_val(); 182 Changed |= promotePointer(Ptr); 183 } 184 185 return Changed; 186 } 187 188 bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) { 189 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 190 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 191 return run(F, MSSA, AA); 192 } 193 194 INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, 195 "AMDGPU Promote Kernel Arguments", false, false) 196 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 197 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 198 INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE, 199 "AMDGPU Promote Kernel Arguments", false, false) 200 201 char AMDGPUPromoteKernelArguments::ID = 0; 202 203 FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() { 204 return new AMDGPUPromoteKernelArguments(); 205 } 206 207 PreservedAnalyses 208 AMDGPUPromoteKernelArgumentsPass::run(Function &F, 209 FunctionAnalysisManager &AM) { 210 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); 211 AliasAnalysis &AA = AM.getResult<AAManager>(F); 212 if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) { 213 PreservedAnalyses PA; 214 PA.preserveSet<CFGAnalyses>(); 215 PA.preserve<MemorySSAAnalysis>(); 216 return PA; 217 } 218 return PreservedAnalyses::all(); 219 } 220