181ad6265SDimitry Andric //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric
981ad6265SDimitry Andric #include "AMDGPUMemoryUtils.h"
1081ad6265SDimitry Andric #include "AMDGPU.h"
1181ad6265SDimitry Andric #include "AMDGPUBaseInfo.h"
12*0fca6ea1SDimitry Andric #include "llvm/ADT/SetOperations.h"
1381ad6265SDimitry Andric #include "llvm/ADT/SmallSet.h"
1481ad6265SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
15*0fca6ea1SDimitry Andric #include "llvm/Analysis/CallGraph.h"
1681ad6265SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
1781ad6265SDimitry Andric #include "llvm/IR/DataLayout.h"
1881ad6265SDimitry Andric #include "llvm/IR/Instructions.h"
1981ad6265SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
2081ad6265SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
21*0fca6ea1SDimitry Andric #include "llvm/IR/Operator.h"
2281ad6265SDimitry Andric #include "llvm/IR/ReplaceConstant.h"
2381ad6265SDimitry Andric
2481ad6265SDimitry Andric #define DEBUG_TYPE "amdgpu-memory-utils"
2581ad6265SDimitry Andric
2681ad6265SDimitry Andric using namespace llvm;
2781ad6265SDimitry Andric
28*0fca6ea1SDimitry Andric namespace llvm::AMDGPU {
2981ad6265SDimitry Andric
getAlign(const DataLayout & DL,const GlobalVariable * GV)30*0fca6ea1SDimitry Andric Align getAlign(const DataLayout &DL, const GlobalVariable *GV) {
3181ad6265SDimitry Andric return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
3281ad6265SDimitry Andric GV->getValueType());
3381ad6265SDimitry Andric }
3481ad6265SDimitry Andric
isDynamicLDS(const GlobalVariable & GV)3506c3fb27SDimitry Andric bool isDynamicLDS(const GlobalVariable &GV) {
36*0fca6ea1SDimitry Andric // external zero size addrspace(3) without initializer is dynlds.
3706c3fb27SDimitry Andric const Module *M = GV.getParent();
3806c3fb27SDimitry Andric const DataLayout &DL = M->getDataLayout();
39*0fca6ea1SDimitry Andric if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
4081ad6265SDimitry Andric return false;
41*0fca6ea1SDimitry Andric return DL.getTypeAllocSize(GV.getValueType()) == 0;
4281ad6265SDimitry Andric }
4381ad6265SDimitry Andric
isLDSVariableToLower(const GlobalVariable & GV)44bdd1243dSDimitry Andric bool isLDSVariableToLower(const GlobalVariable &GV) {
4581ad6265SDimitry Andric if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
46bdd1243dSDimitry Andric return false;
4781ad6265SDimitry Andric }
4806c3fb27SDimitry Andric if (isDynamicLDS(GV)) {
4906c3fb27SDimitry Andric return true;
5081ad6265SDimitry Andric }
5181ad6265SDimitry Andric if (GV.isConstant()) {
5281ad6265SDimitry Andric // A constant undef variable can't be written to, and any load is
5381ad6265SDimitry Andric // undef, so it should be eliminated by the optimizer. It could be
5481ad6265SDimitry Andric // dropped by the back end if not. This pass skips over it.
55bdd1243dSDimitry Andric return false;
56bdd1243dSDimitry Andric }
5706c3fb27SDimitry Andric if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
5806c3fb27SDimitry Andric // Initializers are unimplemented for LDS address space.
5906c3fb27SDimitry Andric // Leave such variables in place for consistent error reporting.
6006c3fb27SDimitry Andric return false;
6106c3fb27SDimitry Andric }
62bdd1243dSDimitry Andric return true;
63bdd1243dSDimitry Andric }
64bdd1243dSDimitry Andric
eliminateConstantExprUsesOfLDSFromAllInstructions(Module & M)65*0fca6ea1SDimitry Andric bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
66*0fca6ea1SDimitry Andric // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
67*0fca6ea1SDimitry Andric // global may have uses from multiple different functions as a result.
68*0fca6ea1SDimitry Andric // This pass specialises LDS variables with respect to the kernel that
69*0fca6ea1SDimitry Andric // allocates them.
70*0fca6ea1SDimitry Andric
71*0fca6ea1SDimitry Andric // This is semantically equivalent to (the unimplemented as slow):
72*0fca6ea1SDimitry Andric // for (auto &F : M.functions())
73*0fca6ea1SDimitry Andric // for (auto &BB : F)
74*0fca6ea1SDimitry Andric // for (auto &I : BB)
75*0fca6ea1SDimitry Andric // for (Use &Op : I.operands())
76*0fca6ea1SDimitry Andric // if (constantExprUsesLDS(Op))
77*0fca6ea1SDimitry Andric // replaceConstantExprInFunction(I, Op);
78*0fca6ea1SDimitry Andric
79*0fca6ea1SDimitry Andric SmallVector<Constant *> LDSGlobals;
80*0fca6ea1SDimitry Andric for (auto &GV : M.globals())
81*0fca6ea1SDimitry Andric if (AMDGPU::isLDSVariableToLower(GV))
82*0fca6ea1SDimitry Andric LDSGlobals.push_back(&GV);
83*0fca6ea1SDimitry Andric return convertUsersOfConstantsToInstructions(LDSGlobals);
84*0fca6ea1SDimitry Andric }
85*0fca6ea1SDimitry Andric
getUsesOfLDSByFunction(const CallGraph & CG,Module & M,FunctionVariableMap & kernels,FunctionVariableMap & Functions)86*0fca6ea1SDimitry Andric void getUsesOfLDSByFunction(const CallGraph &CG, Module &M,
87*0fca6ea1SDimitry Andric FunctionVariableMap &kernels,
88*0fca6ea1SDimitry Andric FunctionVariableMap &Functions) {
89*0fca6ea1SDimitry Andric // Get uses from the current function, excluding uses by called Functions
90*0fca6ea1SDimitry Andric // Two output variables to avoid walking the globals list twice
91*0fca6ea1SDimitry Andric for (auto &GV : M.globals()) {
92*0fca6ea1SDimitry Andric if (!AMDGPU::isLDSVariableToLower(GV))
93*0fca6ea1SDimitry Andric continue;
94*0fca6ea1SDimitry Andric for (User *V : GV.users()) {
95*0fca6ea1SDimitry Andric if (auto *I = dyn_cast<Instruction>(V)) {
96*0fca6ea1SDimitry Andric Function *F = I->getFunction();
97*0fca6ea1SDimitry Andric if (isKernelLDS(F))
98*0fca6ea1SDimitry Andric kernels[F].insert(&GV);
99*0fca6ea1SDimitry Andric else
100*0fca6ea1SDimitry Andric Functions[F].insert(&GV);
101*0fca6ea1SDimitry Andric }
102*0fca6ea1SDimitry Andric }
103*0fca6ea1SDimitry Andric }
104*0fca6ea1SDimitry Andric }
105*0fca6ea1SDimitry Andric
isKernelLDS(const Function * F)106*0fca6ea1SDimitry Andric bool isKernelLDS(const Function *F) {
107*0fca6ea1SDimitry Andric // Some weirdness here. AMDGPU::isKernelCC does not call into
108*0fca6ea1SDimitry Andric // AMDGPU::isKernel with the calling conv, it instead calls into
109*0fca6ea1SDimitry Andric // isModuleEntryFunction which returns true for more calling conventions
110*0fca6ea1SDimitry Andric // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
111*0fca6ea1SDimitry Andric // There's also a test that checks that the LDS lowering does not hit on
112*0fca6ea1SDimitry Andric // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
113*0fca6ea1SDimitry Andric // Putting LDS in the name of the function to draw attention to this.
114*0fca6ea1SDimitry Andric return AMDGPU::isKernel(F->getCallingConv());
115*0fca6ea1SDimitry Andric }
116*0fca6ea1SDimitry Andric
getTransitiveUsesOfLDS(const CallGraph & CG,Module & M)117*0fca6ea1SDimitry Andric LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
118*0fca6ea1SDimitry Andric
119*0fca6ea1SDimitry Andric FunctionVariableMap DirectMapKernel;
120*0fca6ea1SDimitry Andric FunctionVariableMap DirectMapFunction;
121*0fca6ea1SDimitry Andric getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction);
122*0fca6ea1SDimitry Andric
123*0fca6ea1SDimitry Andric // Collect variables that are used by functions whose address has escaped
124*0fca6ea1SDimitry Andric DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
125*0fca6ea1SDimitry Andric for (Function &F : M.functions()) {
126*0fca6ea1SDimitry Andric if (!isKernelLDS(&F))
127*0fca6ea1SDimitry Andric if (F.hasAddressTaken(nullptr,
128*0fca6ea1SDimitry Andric /* IgnoreCallbackUses */ false,
129*0fca6ea1SDimitry Andric /* IgnoreAssumeLikeCalls */ false,
130*0fca6ea1SDimitry Andric /* IgnoreLLVMUsed */ true,
131*0fca6ea1SDimitry Andric /* IgnoreArcAttachedCall */ false)) {
132*0fca6ea1SDimitry Andric set_union(VariablesReachableThroughFunctionPointer,
133*0fca6ea1SDimitry Andric DirectMapFunction[&F]);
134*0fca6ea1SDimitry Andric }
135*0fca6ea1SDimitry Andric }
136*0fca6ea1SDimitry Andric
137*0fca6ea1SDimitry Andric auto FunctionMakesUnknownCall = [&](const Function *F) -> bool {
138*0fca6ea1SDimitry Andric assert(!F->isDeclaration());
139*0fca6ea1SDimitry Andric for (const CallGraphNode::CallRecord &R : *CG[F]) {
140*0fca6ea1SDimitry Andric if (!R.second->getFunction())
141*0fca6ea1SDimitry Andric return true;
142*0fca6ea1SDimitry Andric }
143*0fca6ea1SDimitry Andric return false;
144*0fca6ea1SDimitry Andric };
145*0fca6ea1SDimitry Andric
146*0fca6ea1SDimitry Andric // Work out which variables are reachable through function calls
147*0fca6ea1SDimitry Andric FunctionVariableMap TransitiveMapFunction = DirectMapFunction;
148*0fca6ea1SDimitry Andric
149*0fca6ea1SDimitry Andric // If the function makes any unknown call, assume the worst case that it can
150*0fca6ea1SDimitry Andric // access all variables accessed by functions whose address escaped
151*0fca6ea1SDimitry Andric for (Function &F : M.functions()) {
152*0fca6ea1SDimitry Andric if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) {
153*0fca6ea1SDimitry Andric if (!isKernelLDS(&F)) {
154*0fca6ea1SDimitry Andric set_union(TransitiveMapFunction[&F],
155*0fca6ea1SDimitry Andric VariablesReachableThroughFunctionPointer);
156*0fca6ea1SDimitry Andric }
157*0fca6ea1SDimitry Andric }
158*0fca6ea1SDimitry Andric }
159*0fca6ea1SDimitry Andric
160*0fca6ea1SDimitry Andric // Direct implementation of collecting all variables reachable from each
161*0fca6ea1SDimitry Andric // function
162*0fca6ea1SDimitry Andric for (Function &Func : M.functions()) {
163*0fca6ea1SDimitry Andric if (Func.isDeclaration() || isKernelLDS(&Func))
164*0fca6ea1SDimitry Andric continue;
165*0fca6ea1SDimitry Andric
166*0fca6ea1SDimitry Andric DenseSet<Function *> seen; // catches cycles
167*0fca6ea1SDimitry Andric SmallVector<Function *, 4> wip = {&Func};
168*0fca6ea1SDimitry Andric
169*0fca6ea1SDimitry Andric while (!wip.empty()) {
170*0fca6ea1SDimitry Andric Function *F = wip.pop_back_val();
171*0fca6ea1SDimitry Andric
172*0fca6ea1SDimitry Andric // Can accelerate this by referring to transitive map for functions that
173*0fca6ea1SDimitry Andric // have already been computed, with more care than this
174*0fca6ea1SDimitry Andric set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]);
175*0fca6ea1SDimitry Andric
176*0fca6ea1SDimitry Andric for (const CallGraphNode::CallRecord &R : *CG[F]) {
177*0fca6ea1SDimitry Andric Function *Ith = R.second->getFunction();
178*0fca6ea1SDimitry Andric if (Ith) {
179*0fca6ea1SDimitry Andric if (!seen.contains(Ith)) {
180*0fca6ea1SDimitry Andric seen.insert(Ith);
181*0fca6ea1SDimitry Andric wip.push_back(Ith);
182*0fca6ea1SDimitry Andric }
183*0fca6ea1SDimitry Andric }
184*0fca6ea1SDimitry Andric }
185*0fca6ea1SDimitry Andric }
186*0fca6ea1SDimitry Andric }
187*0fca6ea1SDimitry Andric
188*0fca6ea1SDimitry Andric // DirectMapKernel lists which variables are used by the kernel
189*0fca6ea1SDimitry Andric // find the variables which are used through a function call
190*0fca6ea1SDimitry Andric FunctionVariableMap IndirectMapKernel;
191*0fca6ea1SDimitry Andric
192*0fca6ea1SDimitry Andric for (Function &Func : M.functions()) {
193*0fca6ea1SDimitry Andric if (Func.isDeclaration() || !isKernelLDS(&Func))
194*0fca6ea1SDimitry Andric continue;
195*0fca6ea1SDimitry Andric
196*0fca6ea1SDimitry Andric for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
197*0fca6ea1SDimitry Andric Function *Ith = R.second->getFunction();
198*0fca6ea1SDimitry Andric if (Ith) {
199*0fca6ea1SDimitry Andric set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]);
200*0fca6ea1SDimitry Andric } else {
201*0fca6ea1SDimitry Andric set_union(IndirectMapKernel[&Func],
202*0fca6ea1SDimitry Andric VariablesReachableThroughFunctionPointer);
203*0fca6ea1SDimitry Andric }
204*0fca6ea1SDimitry Andric }
205*0fca6ea1SDimitry Andric }
206*0fca6ea1SDimitry Andric
207*0fca6ea1SDimitry Andric // Verify that we fall into one of 2 cases:
208*0fca6ea1SDimitry Andric // - All variables are either absolute
209*0fca6ea1SDimitry Andric // or direct mapped dynamic LDS that is not lowered.
210*0fca6ea1SDimitry Andric // this is a re-run of the pass
211*0fca6ea1SDimitry Andric // so we don't have anything to do.
212*0fca6ea1SDimitry Andric // - No variables are absolute.
213*0fca6ea1SDimitry Andric std::optional<bool> HasAbsoluteGVs;
214*0fca6ea1SDimitry Andric for (auto &Map : {DirectMapKernel, IndirectMapKernel}) {
215*0fca6ea1SDimitry Andric for (auto &[Fn, GVs] : Map) {
216*0fca6ea1SDimitry Andric for (auto *GV : GVs) {
217*0fca6ea1SDimitry Andric bool IsAbsolute = GV->isAbsoluteSymbolRef();
218*0fca6ea1SDimitry Andric bool IsDirectMapDynLDSGV = AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn);
219*0fca6ea1SDimitry Andric if (IsDirectMapDynLDSGV)
220*0fca6ea1SDimitry Andric continue;
221*0fca6ea1SDimitry Andric if (HasAbsoluteGVs.has_value()) {
222*0fca6ea1SDimitry Andric if (*HasAbsoluteGVs != IsAbsolute) {
223*0fca6ea1SDimitry Andric report_fatal_error(
224*0fca6ea1SDimitry Andric "Module cannot mix absolute and non-absolute LDS GVs");
225*0fca6ea1SDimitry Andric }
226*0fca6ea1SDimitry Andric } else
227*0fca6ea1SDimitry Andric HasAbsoluteGVs = IsAbsolute;
228*0fca6ea1SDimitry Andric }
229*0fca6ea1SDimitry Andric }
230*0fca6ea1SDimitry Andric }
231*0fca6ea1SDimitry Andric
232*0fca6ea1SDimitry Andric // If we only had absolute GVs, we have nothing to do, return an empty
233*0fca6ea1SDimitry Andric // result.
234*0fca6ea1SDimitry Andric if (HasAbsoluteGVs && *HasAbsoluteGVs)
235*0fca6ea1SDimitry Andric return {FunctionVariableMap(), FunctionVariableMap()};
236*0fca6ea1SDimitry Andric
237*0fca6ea1SDimitry Andric return {std::move(DirectMapKernel), std::move(IndirectMapKernel)};
238*0fca6ea1SDimitry Andric }
239*0fca6ea1SDimitry Andric
removeFnAttrFromReachable(CallGraph & CG,Function * KernelRoot,ArrayRef<StringRef> FnAttrs)240*0fca6ea1SDimitry Andric void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
241*0fca6ea1SDimitry Andric ArrayRef<StringRef> FnAttrs) {
242*0fca6ea1SDimitry Andric for (StringRef Attr : FnAttrs)
243*0fca6ea1SDimitry Andric KernelRoot->removeFnAttr(Attr);
244*0fca6ea1SDimitry Andric
245*0fca6ea1SDimitry Andric SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};
246*0fca6ea1SDimitry Andric SmallPtrSet<Function *, 8> Visited;
247*0fca6ea1SDimitry Andric bool SeenUnknownCall = false;
248*0fca6ea1SDimitry Andric
249*0fca6ea1SDimitry Andric while (!WorkList.empty()) {
250*0fca6ea1SDimitry Andric Function *F = WorkList.pop_back_val();
251*0fca6ea1SDimitry Andric
252*0fca6ea1SDimitry Andric for (auto &CallRecord : *CG[F]) {
253*0fca6ea1SDimitry Andric if (!CallRecord.second)
254*0fca6ea1SDimitry Andric continue;
255*0fca6ea1SDimitry Andric
256*0fca6ea1SDimitry Andric Function *Callee = CallRecord.second->getFunction();
257*0fca6ea1SDimitry Andric if (!Callee) {
258*0fca6ea1SDimitry Andric if (!SeenUnknownCall) {
259*0fca6ea1SDimitry Andric SeenUnknownCall = true;
260*0fca6ea1SDimitry Andric
261*0fca6ea1SDimitry Andric // If we see any indirect calls, assume nothing about potential
262*0fca6ea1SDimitry Andric // targets.
263*0fca6ea1SDimitry Andric // TODO: This could be refined to possible LDS global users.
264*0fca6ea1SDimitry Andric for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) {
265*0fca6ea1SDimitry Andric Function *PotentialCallee =
266*0fca6ea1SDimitry Andric ExternalCallRecord.second->getFunction();
267*0fca6ea1SDimitry Andric assert(PotentialCallee);
268*0fca6ea1SDimitry Andric if (!isKernelLDS(PotentialCallee)) {
269*0fca6ea1SDimitry Andric for (StringRef Attr : FnAttrs)
270*0fca6ea1SDimitry Andric PotentialCallee->removeFnAttr(Attr);
271*0fca6ea1SDimitry Andric }
272*0fca6ea1SDimitry Andric }
273*0fca6ea1SDimitry Andric }
274*0fca6ea1SDimitry Andric } else {
275*0fca6ea1SDimitry Andric for (StringRef Attr : FnAttrs)
276*0fca6ea1SDimitry Andric Callee->removeFnAttr(Attr);
277*0fca6ea1SDimitry Andric if (Visited.insert(Callee).second)
278*0fca6ea1SDimitry Andric WorkList.push_back(Callee);
279*0fca6ea1SDimitry Andric }
280*0fca6ea1SDimitry Andric }
281*0fca6ea1SDimitry Andric }
282*0fca6ea1SDimitry Andric }
283*0fca6ea1SDimitry Andric
isReallyAClobber(const Value * Ptr,MemoryDef * Def,AAResults * AA)28481ad6265SDimitry Andric bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
28581ad6265SDimitry Andric Instruction *DefInst = Def->getMemoryInst();
28681ad6265SDimitry Andric
28781ad6265SDimitry Andric if (isa<FenceInst>(DefInst))
28881ad6265SDimitry Andric return false;
28981ad6265SDimitry Andric
29081ad6265SDimitry Andric if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
29181ad6265SDimitry Andric switch (II->getIntrinsicID()) {
29281ad6265SDimitry Andric case Intrinsic::amdgcn_s_barrier:
2935f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_signal:
2945f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_signal_var:
2955f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2965f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
2975f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_init:
2985f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_join:
2995f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_wait:
3005f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_leave:
3015f757f3fSDimitry Andric case Intrinsic::amdgcn_s_get_barrier_state:
3025f757f3fSDimitry Andric case Intrinsic::amdgcn_s_wakeup_barrier:
30381ad6265SDimitry Andric case Intrinsic::amdgcn_wave_barrier:
30481ad6265SDimitry Andric case Intrinsic::amdgcn_sched_barrier:
305bdd1243dSDimitry Andric case Intrinsic::amdgcn_sched_group_barrier:
30681ad6265SDimitry Andric return false;
30781ad6265SDimitry Andric default:
30881ad6265SDimitry Andric break;
30981ad6265SDimitry Andric }
31081ad6265SDimitry Andric }
31181ad6265SDimitry Andric
31281ad6265SDimitry Andric // Ignore atomics not aliasing with the original load, any atomic is a
31381ad6265SDimitry Andric // universal MemoryDef from MSSA's point of view too, just like a fence.
31481ad6265SDimitry Andric const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
31581ad6265SDimitry Andric return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
31681ad6265SDimitry Andric };
31781ad6265SDimitry Andric
31881ad6265SDimitry Andric if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
31981ad6265SDimitry Andric checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
32081ad6265SDimitry Andric return false;
32181ad6265SDimitry Andric
32281ad6265SDimitry Andric return true;
32381ad6265SDimitry Andric }
32481ad6265SDimitry Andric
isClobberedInFunction(const LoadInst * Load,MemorySSA * MSSA,AAResults * AA)32581ad6265SDimitry Andric bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
32681ad6265SDimitry Andric AAResults *AA) {
32781ad6265SDimitry Andric MemorySSAWalker *Walker = MSSA->getWalker();
32881ad6265SDimitry Andric SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
32981ad6265SDimitry Andric SmallSet<MemoryAccess *, 8> Visited;
33081ad6265SDimitry Andric MemoryLocation Loc(MemoryLocation::get(Load));
33181ad6265SDimitry Andric
33281ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
33381ad6265SDimitry Andric
33481ad6265SDimitry Andric // Start with a nearest dominating clobbering access, it will be either
33581ad6265SDimitry Andric // live on entry (nothing to do, load is not clobbered), MemoryDef, or
33681ad6265SDimitry Andric // MemoryPhi if several MemoryDefs can define this memory state. In that
33781ad6265SDimitry Andric // case add all Defs to WorkList and continue going up and checking all
33881ad6265SDimitry Andric // the definitions of this memory location until the root. When all the
33981ad6265SDimitry Andric // defs are exhausted and came to the entry state we have no clobber.
34081ad6265SDimitry Andric // Along the scan ignore barriers and fences which are considered clobbers
34181ad6265SDimitry Andric // by the MemorySSA, but not really writing anything into the memory.
34281ad6265SDimitry Andric while (!WorkList.empty()) {
34381ad6265SDimitry Andric MemoryAccess *MA = WorkList.pop_back_val();
34481ad6265SDimitry Andric if (!Visited.insert(MA).second)
34581ad6265SDimitry Andric continue;
34681ad6265SDimitry Andric
34781ad6265SDimitry Andric if (MSSA->isLiveOnEntryDef(MA))
34881ad6265SDimitry Andric continue;
34981ad6265SDimitry Andric
35081ad6265SDimitry Andric if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
35181ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
35281ad6265SDimitry Andric
35381ad6265SDimitry Andric if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
35481ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
35581ad6265SDimitry Andric return true;
35681ad6265SDimitry Andric }
35781ad6265SDimitry Andric
35881ad6265SDimitry Andric WorkList.push_back(
35981ad6265SDimitry Andric Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
36081ad6265SDimitry Andric continue;
36181ad6265SDimitry Andric }
36281ad6265SDimitry Andric
36381ad6265SDimitry Andric const MemoryPhi *Phi = cast<MemoryPhi>(MA);
364bdd1243dSDimitry Andric for (const auto &Use : Phi->incoming_values())
36581ad6265SDimitry Andric WorkList.push_back(cast<MemoryAccess>(&Use));
36681ad6265SDimitry Andric }
36781ad6265SDimitry Andric
36881ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " -> no clobber\n");
36981ad6265SDimitry Andric return false;
37081ad6265SDimitry Andric }
37181ad6265SDimitry Andric
372*0fca6ea1SDimitry Andric } // end namespace llvm::AMDGPU
373