1 //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUMemoryUtils.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUBaseInfo.h" 12 #include "llvm/ADT/SetVector.h" 13 #include "llvm/ADT/SmallSet.h" 14 #include "llvm/Analysis/AliasAnalysis.h" 15 #include "llvm/Analysis/MemorySSA.h" 16 #include "llvm/IR/DataLayout.h" 17 #include "llvm/IR/Instructions.h" 18 #include "llvm/IR/IntrinsicInst.h" 19 #include "llvm/IR/IntrinsicsAMDGPU.h" 20 #include "llvm/IR/ReplaceConstant.h" 21 22 #define DEBUG_TYPE "amdgpu-memory-utils" 23 24 using namespace llvm; 25 26 namespace llvm { 27 28 namespace AMDGPU { 29 30 Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { 31 return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), 32 GV->getValueType()); 33 } 34 35 static void collectFunctionUses(User *U, const Function *F, 36 SetVector<Instruction *> &InstUsers) { 37 SmallVector<User *> Stack{U}; 38 39 while (!Stack.empty()) { 40 U = Stack.pop_back_val(); 41 42 if (auto *I = dyn_cast<Instruction>(U)) { 43 if (I->getFunction() == F) 44 InstUsers.insert(I); 45 continue; 46 } 47 48 if (!isa<ConstantExpr>(U)) 49 continue; 50 51 append_range(Stack, U->users()); 52 } 53 } 54 55 void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) { 56 SetVector<Instruction *> InstUsers; 57 58 collectFunctionUses(C, F, InstUsers); 59 for (Instruction *I : InstUsers) { 60 convertConstantExprsToInstructions(I, C); 61 } 62 } 63 64 static bool shouldLowerLDSToStruct(const GlobalVariable &GV, 65 const Function *F) { 66 // We are not interested in kernel LDS lowering for module LDS itself. 67 if (F && GV.getName() == "llvm.amdgcn.module.lds") 68 return false; 69 70 bool Ret = false; 71 SmallPtrSet<const User *, 8> Visited; 72 SmallVector<const User *, 16> Stack(GV.users()); 73 74 assert(!F || isKernelCC(F)); 75 76 while (!Stack.empty()) { 77 const User *V = Stack.pop_back_val(); 78 Visited.insert(V); 79 80 if (isa<GlobalValue>(V)) { 81 // This use of the LDS variable is the initializer of a global variable. 82 // This is ill formed. The address of an LDS variable is kernel dependent 83 // and unknown until runtime. It can't be written to a global variable. 84 continue; 85 } 86 87 if (auto *I = dyn_cast<Instruction>(V)) { 88 const Function *UF = I->getFunction(); 89 if (UF == F) { 90 // Used from this kernel, we want to put it into the structure. 91 Ret = true; 92 } else if (!F) { 93 // For module LDS lowering, lowering is required if the user instruction 94 // is from non-kernel function. 95 Ret |= !isKernelCC(UF); 96 } 97 continue; 98 } 99 100 // User V should be a constant, recursively visit users of V. 101 assert(isa<Constant>(V) && "Expected a constant."); 102 append_range(Stack, V->users()); 103 } 104 105 return Ret; 106 } 107 108 std::vector<GlobalVariable *> findVariablesToLower(Module &M, 109 const Function *F) { 110 std::vector<llvm::GlobalVariable *> LocalVars; 111 for (auto &GV : M.globals()) { 112 if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { 113 continue; 114 } 115 if (!GV.hasInitializer()) { 116 // addrspace(3) without initializer implies cuda/hip extern __shared__ 117 // the semantics for such a variable appears to be that all extern 118 // __shared__ variables alias one another, in which case this transform 119 // is not required 120 continue; 121 } 122 if (!isa<UndefValue>(GV.getInitializer())) { 123 // Initializers are unimplemented for LDS address space. 124 // Leave such variables in place for consistent error reporting. 125 continue; 126 } 127 if (GV.isConstant()) { 128 // A constant undef variable can't be written to, and any load is 129 // undef, so it should be eliminated by the optimizer. It could be 130 // dropped by the back end if not. This pass skips over it. 131 continue; 132 } 133 if (!shouldLowerLDSToStruct(GV, F)) { 134 continue; 135 } 136 LocalVars.push_back(&GV); 137 } 138 return LocalVars; 139 } 140 141 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) { 142 Instruction *DefInst = Def->getMemoryInst(); 143 144 if (isa<FenceInst>(DefInst)) 145 return false; 146 147 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 148 switch (II->getIntrinsicID()) { 149 case Intrinsic::amdgcn_s_barrier: 150 case Intrinsic::amdgcn_wave_barrier: 151 case Intrinsic::amdgcn_sched_barrier: 152 return false; 153 default: 154 break; 155 } 156 } 157 158 // Ignore atomics not aliasing with the original load, any atomic is a 159 // universal MemoryDef from MSSA's point of view too, just like a fence. 160 const auto checkNoAlias = [AA, Ptr](auto I) -> bool { 161 return I && AA->isNoAlias(I->getPointerOperand(), Ptr); 162 }; 163 164 if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) || 165 checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst))) 166 return false; 167 168 return true; 169 } 170 171 bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, 172 AAResults *AA) { 173 MemorySSAWalker *Walker = MSSA->getWalker(); 174 SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)}; 175 SmallSet<MemoryAccess *, 8> Visited; 176 MemoryLocation Loc(MemoryLocation::get(Load)); 177 178 LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); 179 180 // Start with a nearest dominating clobbering access, it will be either 181 // live on entry (nothing to do, load is not clobbered), MemoryDef, or 182 // MemoryPhi if several MemoryDefs can define this memory state. In that 183 // case add all Defs to WorkList and continue going up and checking all 184 // the definitions of this memory location until the root. When all the 185 // defs are exhausted and came to the entry state we have no clobber. 186 // Along the scan ignore barriers and fences which are considered clobbers 187 // by the MemorySSA, but not really writing anything into the memory. 188 while (!WorkList.empty()) { 189 MemoryAccess *MA = WorkList.pop_back_val(); 190 if (!Visited.insert(MA).second) 191 continue; 192 193 if (MSSA->isLiveOnEntryDef(MA)) 194 continue; 195 196 if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) { 197 LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n'); 198 199 if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) { 200 LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); 201 return true; 202 } 203 204 WorkList.push_back( 205 Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); 206 continue; 207 } 208 209 const MemoryPhi *Phi = cast<MemoryPhi>(MA); 210 for (auto &Use : Phi->incoming_values()) 211 WorkList.push_back(cast<MemoryAccess>(&Use)); 212 } 213 214 LLVM_DEBUG(dbgs() << " -> no clobber\n"); 215 return false; 216 } 217 218 } // end namespace AMDGPU 219 220 } // end namespace llvm 221