1 //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUMemoryUtils.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUBaseInfo.h" 12 #include "llvm/ADT/SmallSet.h" 13 #include "llvm/Analysis/AliasAnalysis.h" 14 #include "llvm/Analysis/MemorySSA.h" 15 #include "llvm/IR/DataLayout.h" 16 #include "llvm/IR/Instructions.h" 17 #include "llvm/IR/IntrinsicInst.h" 18 #include "llvm/IR/IntrinsicsAMDGPU.h" 19 #include "llvm/IR/ReplaceConstant.h" 20 21 #define DEBUG_TYPE "amdgpu-memory-utils" 22 23 using namespace llvm; 24 25 namespace llvm { 26 27 namespace AMDGPU { 28 29 Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { 30 return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), 31 GV->getValueType()); 32 } 33 34 bool isDynamicLDS(const GlobalVariable &GV) { 35 // external zero size addrspace(3) without initializer implies cuda/hip extern 36 // __shared__ the semantics for such a variable appears to be that all extern 37 // __shared__ variables alias one another. This hits different handling. 38 const Module *M = GV.getParent(); 39 const DataLayout &DL = M->getDataLayout(); 40 if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { 41 return false; 42 } 43 uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType()); 44 return GV.hasExternalLinkage() && AllocSize == 0; 45 } 46 47 bool isLDSVariableToLower(const GlobalVariable &GV) { 48 if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { 49 return false; 50 } 51 if (isDynamicLDS(GV)) { 52 return true; 53 } 54 if (GV.isConstant()) { 55 // A constant undef variable can't be written to, and any load is 56 // undef, so it should be eliminated by the optimizer. It could be 57 // dropped by the back end if not. This pass skips over it. 58 return false; 59 } 60 if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) { 61 // Initializers are unimplemented for LDS address space. 62 // Leave such variables in place for consistent error reporting. 63 return false; 64 } 65 return true; 66 } 67 68 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) { 69 Instruction *DefInst = Def->getMemoryInst(); 70 71 if (isa<FenceInst>(DefInst)) 72 return false; 73 74 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 75 switch (II->getIntrinsicID()) { 76 case Intrinsic::amdgcn_s_barrier: 77 case Intrinsic::amdgcn_s_barrier_signal: 78 case Intrinsic::amdgcn_s_barrier_signal_var: 79 case Intrinsic::amdgcn_s_barrier_signal_isfirst: 80 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var: 81 case Intrinsic::amdgcn_s_barrier_init: 82 case Intrinsic::amdgcn_s_barrier_join: 83 case Intrinsic::amdgcn_s_barrier_wait: 84 case Intrinsic::amdgcn_s_barrier_leave: 85 case Intrinsic::amdgcn_s_get_barrier_state: 86 case Intrinsic::amdgcn_s_wakeup_barrier: 87 case Intrinsic::amdgcn_wave_barrier: 88 case Intrinsic::amdgcn_sched_barrier: 89 case Intrinsic::amdgcn_sched_group_barrier: 90 return false; 91 default: 92 break; 93 } 94 } 95 96 // Ignore atomics not aliasing with the original load, any atomic is a 97 // universal MemoryDef from MSSA's point of view too, just like a fence. 98 const auto checkNoAlias = [AA, Ptr](auto I) -> bool { 99 return I && AA->isNoAlias(I->getPointerOperand(), Ptr); 100 }; 101 102 if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) || 103 checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst))) 104 return false; 105 106 return true; 107 } 108 109 bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, 110 AAResults *AA) { 111 MemorySSAWalker *Walker = MSSA->getWalker(); 112 SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)}; 113 SmallSet<MemoryAccess *, 8> Visited; 114 MemoryLocation Loc(MemoryLocation::get(Load)); 115 116 LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); 117 118 // Start with a nearest dominating clobbering access, it will be either 119 // live on entry (nothing to do, load is not clobbered), MemoryDef, or 120 // MemoryPhi if several MemoryDefs can define this memory state. In that 121 // case add all Defs to WorkList and continue going up and checking all 122 // the definitions of this memory location until the root. When all the 123 // defs are exhausted and came to the entry state we have no clobber. 124 // Along the scan ignore barriers and fences which are considered clobbers 125 // by the MemorySSA, but not really writing anything into the memory. 126 while (!WorkList.empty()) { 127 MemoryAccess *MA = WorkList.pop_back_val(); 128 if (!Visited.insert(MA).second) 129 continue; 130 131 if (MSSA->isLiveOnEntryDef(MA)) 132 continue; 133 134 if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) { 135 LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n'); 136 137 if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) { 138 LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); 139 return true; 140 } 141 142 WorkList.push_back( 143 Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); 144 continue; 145 } 146 147 const MemoryPhi *Phi = cast<MemoryPhi>(MA); 148 for (const auto &Use : Phi->incoming_values()) 149 WorkList.push_back(cast<MemoryAccess>(&Use)); 150 } 151 152 LLVM_DEBUG(dbgs() << " -> no clobber\n"); 153 return false; 154 } 155 156 } // end namespace AMDGPU 157 158 } // end namespace llvm 159