xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp (revision 79ac3c12a714bcd3f2354c52d948aed9575c46d6)
1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass adds amdgpu.uniform metadata to IR values so this information
11 /// can be used during instruction selection.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/SetVector.h"
18 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
21 #include "llvm/IR/InstVisitor.h"
22 #include "llvm/InitializePasses.h"
23 
24 #define DEBUG_TYPE "amdgpu-annotate-uniform"
25 
26 using namespace llvm;
27 
28 namespace {
29 
30 class AMDGPUAnnotateUniformValues : public FunctionPass,
31                        public InstVisitor<AMDGPUAnnotateUniformValues> {
32   LegacyDivergenceAnalysis *DA;
33   MemoryDependenceResults *MDR;
34   LoopInfo *LI;
35   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
36   bool isEntryFunc;
37 
38 public:
39   static char ID;
40   AMDGPUAnnotateUniformValues() :
41     FunctionPass(ID) { }
42   bool doInitialization(Module &M) override;
43   bool runOnFunction(Function &F) override;
44   StringRef getPassName() const override {
45     return "AMDGPU Annotate Uniform Values";
46   }
47   void getAnalysisUsage(AnalysisUsage &AU) const override {
48     AU.addRequired<LegacyDivergenceAnalysis>();
49     AU.addRequired<MemoryDependenceWrapperPass>();
50     AU.addRequired<LoopInfoWrapperPass>();
51     AU.setPreservesAll();
52  }
53 
54   void visitBranchInst(BranchInst &I);
55   void visitLoadInst(LoadInst &I);
56   bool isClobberedInFunction(LoadInst * Load);
57 };
58 
59 } // End anonymous namespace
60 
61 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
62                       "Add AMDGPU uniform metadata", false, false)
63 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
64 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
65 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
66 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
67                     "Add AMDGPU uniform metadata", false, false)
68 
69 char AMDGPUAnnotateUniformValues::ID = 0;
70 
71 static void setUniformMetadata(Instruction *I) {
72   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
73 }
74 static void setNoClobberMetadata(Instruction *I) {
75   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
76 }
77 
78 static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) {
79   for (auto I : predecessors(Root))
80     if (Set.insert(I))
81       DFS(I, Set);
82 }
83 
84 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
85   // 1. get Loop for the Load->getparent();
86   // 2. if it exists, collect all the BBs from the most outer
87   // loop and check for the writes. If NOT - start DFS over all preds.
88   // 3. Start DFS over all preds from the most outer loop header.
89   SetVector<BasicBlock *> Checklist;
90   BasicBlock *Start = Load->getParent();
91   Checklist.insert(Start);
92   const Value *Ptr = Load->getPointerOperand();
93   const Loop *L = LI->getLoopFor(Start);
94   if (L) {
95     const Loop *P = L;
96     do {
97       L = P;
98       P = P->getParentLoop();
99     } while (P);
100     Checklist.insert(L->block_begin(), L->block_end());
101     Start = L->getHeader();
102   }
103 
104   DFS(Start, Checklist);
105   for (auto &BB : Checklist) {
106     BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
107       BasicBlock::iterator(Load) : BB->end();
108     auto Q = MDR->getPointerDependencyFrom(
109         MemoryLocation::getBeforeOrAfter(Ptr), true, StartIt, BB, Load);
110     if (Q.isClobber() || Q.isUnknown() ||
111         // Store defines the load and thus clobbers it.
112         (Q.isDef() && Q.getInst()->mayWriteToMemory()))
113       return true;
114   }
115   return false;
116 }
117 
118 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
119   if (DA->isUniform(&I))
120     setUniformMetadata(I.getParent()->getTerminator());
121 }
122 
123 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
124   Value *Ptr = I.getPointerOperand();
125   if (!DA->isUniform(Ptr))
126     return;
127   auto isGlobalLoad = [&](LoadInst &Load)->bool {
128     return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
129   };
130   // We're tracking up to the Function boundaries, and cannot go beyond because
131   // of FunctionPass restrictions. We can ensure that is memory not clobbered
132   // for memory operations that are live in to entry points only.
133   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
134 
135   if (!isEntryFunc) {
136     if (PtrI)
137       setUniformMetadata(PtrI);
138     return;
139   }
140 
141   bool NotClobbered = false;
142   bool GlobalLoad = isGlobalLoad(I);
143   if (PtrI)
144     NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
145   else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
146     if (GlobalLoad && !isClobberedInFunction(&I)) {
147       NotClobbered = true;
148       // Lookup for the existing GEP
149       if (noClobberClones.count(Ptr)) {
150         PtrI = noClobberClones[Ptr];
151       } else {
152         // Create GEP of the Value
153         Function *F = I.getParent()->getParent();
154         Value *Idx = Constant::getIntegerValue(
155           Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
156         // Insert GEP at the entry to make it dominate all uses
157         PtrI = GetElementPtrInst::Create(
158           Ptr->getType()->getPointerElementType(), Ptr,
159           ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
160       }
161       I.replaceUsesOfWith(Ptr, PtrI);
162     }
163   }
164 
165   if (PtrI) {
166     setUniformMetadata(PtrI);
167     if (NotClobbered)
168       setNoClobberMetadata(PtrI);
169   }
170 }
171 
172 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
173   return false;
174 }
175 
176 bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
177   if (skipFunction(F))
178     return false;
179 
180   DA  = &getAnalysis<LegacyDivergenceAnalysis>();
181   MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
182   LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
183   isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
184 
185   visit(F);
186   noClobberClones.clear();
187   return true;
188 }
189 
190 FunctionPass *
191 llvm::createAMDGPUAnnotateUniformValues() {
192   return new AMDGPUAnnotateUniformValues();
193 }
194