xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Insert s_clause instructions to form hard clauses.
11 ///
12 /// Clausing load instructions can give cache coherency benefits. Before gfx10,
13 /// the hardware automatically detected "soft clauses", which were sequences of
14 /// memory instructions of the same type. In gfx10 this detection was removed,
15 /// and the s_clause instruction was introduced to explicitly mark "hard
16 /// clauses".
17 ///
18 /// It's the scheduler's job to form the clauses by putting similar memory
19 /// instructions next to each other. Our job is just to insert an s_clause
20 /// instruction to mark the start of each clause.
21 ///
22 /// Note that hard clauses are very similar to, but logically distinct from, the
23 /// groups of instructions that have to be restartable when XNACK is enabled.
24 /// The rules are slightly different in each case. For example an s_nop
25 /// instruction breaks a restartable group, but can appear in the middle of a
26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27 /// "soft clauses" or just "clauses".)
28 ///
29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30 /// groups, not hard clauses.
31 //
32 //===----------------------------------------------------------------------===//
33 
34 #include "AMDGPU.h"
35 #include "GCNSubtarget.h"
36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/CodeGen/MachineFunctionPass.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "si-insert-hard-clauses"
43 
44 namespace {
45 
46 enum HardClauseType {
47   // For GFX10:
48 
49   // Texture, buffer, global or scratch memory instructions.
50   HARDCLAUSE_VMEM,
51   // Flat (not global or scratch) memory instructions.
52   HARDCLAUSE_FLAT,
53 
54   // For GFX11:
55 
56   // Texture memory instructions.
57   HARDCLAUSE_MIMG_LOAD,
58   HARDCLAUSE_MIMG_STORE,
59   HARDCLAUSE_MIMG_ATOMIC,
60   HARDCLAUSE_MIMG_SAMPLE,
61   // Buffer, global or scratch memory instructions.
62   HARDCLAUSE_VMEM_LOAD,
63   HARDCLAUSE_VMEM_STORE,
64   HARDCLAUSE_VMEM_ATOMIC,
65   // Flat (not global or scratch) memory instructions.
66   HARDCLAUSE_FLAT_LOAD,
67   HARDCLAUSE_FLAT_STORE,
68   HARDCLAUSE_FLAT_ATOMIC,
69   // BVH instructions.
70   HARDCLAUSE_BVH,
71 
72   // Common:
73 
74   // Instructions that access LDS.
75   HARDCLAUSE_LDS,
76   // Scalar memory instructions.
77   HARDCLAUSE_SMEM,
78   // VALU instructions.
79   HARDCLAUSE_VALU,
80   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
81 
82   // Internal instructions, which are allowed in the middle of a hard clause,
83   // except for s_waitcnt.
84   HARDCLAUSE_INTERNAL,
85   // Meta instructions that do not result in any ISA like KILL.
86   HARDCLAUSE_IGNORE,
87   // Instructions that are not allowed in a hard clause: SALU, export, branch,
88   // message, GDS, s_waitcnt and anything else not mentioned above.
89   HARDCLAUSE_ILLEGAL,
90 };
91 
92 class SIInsertHardClauses : public MachineFunctionPass {
93 public:
94   static char ID;
95   const GCNSubtarget *ST = nullptr;
96 
SIInsertHardClauses()97   SIInsertHardClauses() : MachineFunctionPass(ID) {}
98 
getAnalysisUsage(AnalysisUsage & AU) const99   void getAnalysisUsage(AnalysisUsage &AU) const override {
100     AU.setPreservesCFG();
101     MachineFunctionPass::getAnalysisUsage(AU);
102   }
103 
getHardClauseType(const MachineInstr & MI)104   HardClauseType getHardClauseType(const MachineInstr &MI) {
105     if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
106       if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
107         if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
108           if (ST->hasNSAClauseBug()) {
109             const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
110             if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
111               return HARDCLAUSE_ILLEGAL;
112           }
113           return HARDCLAUSE_VMEM;
114         }
115         if (SIInstrInfo::isFLAT(MI))
116           return HARDCLAUSE_FLAT;
117       } else {
118         assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
119         if (SIInstrInfo::isMIMG(MI)) {
120           const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
121           const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
122               AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
123           if (BaseInfo->BVH)
124             return HARDCLAUSE_BVH;
125           if (BaseInfo->Sampler)
126             return HARDCLAUSE_MIMG_SAMPLE;
127           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
128                                               : HARDCLAUSE_MIMG_LOAD
129                               : HARDCLAUSE_MIMG_STORE;
130         }
131         if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
132           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
133                                               : HARDCLAUSE_VMEM_LOAD
134                               : HARDCLAUSE_VMEM_STORE;
135         }
136         if (SIInstrInfo::isFLAT(MI)) {
137           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
138                                               : HARDCLAUSE_FLAT_LOAD
139                               : HARDCLAUSE_FLAT_STORE;
140         }
141       }
142       // TODO: LDS
143       if (SIInstrInfo::isSMRD(MI))
144         return HARDCLAUSE_SMEM;
145     }
146 
147     // Don't form VALU clauses. It's not clear what benefit they give, if any.
148 
149     // In practice s_nop is the only internal instruction we're likely to see.
150     // It's safe to treat the rest as illegal.
151     if (MI.getOpcode() == AMDGPU::S_NOP)
152       return HARDCLAUSE_INTERNAL;
153     if (MI.isMetaInstruction())
154       return HARDCLAUSE_IGNORE;
155     return HARDCLAUSE_ILLEGAL;
156   }
157 
158   // Track information about a clause as we discover it.
159   struct ClauseInfo {
160     // The type of all (non-internal) instructions in the clause.
161     HardClauseType Type = HARDCLAUSE_ILLEGAL;
162     // The first (necessarily non-internal) instruction in the clause.
163     MachineInstr *First = nullptr;
164     // The last non-internal instruction in the clause.
165     MachineInstr *Last = nullptr;
166     // The length of the clause including any internal instructions in the
167     // middle (but not at the end) of the clause.
168     unsigned Length = 0;
169     // Internal instructions at the and of a clause should not be included in
170     // the clause. Count them in TrailingInternalLength until a new memory
171     // instruction is added.
172     unsigned TrailingInternalLength = 0;
173     // The base operands of *Last.
174     SmallVector<const MachineOperand *, 4> BaseOps;
175   };
176 
emitClause(const ClauseInfo & CI,const SIInstrInfo * SII)177   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
178     if (CI.First == CI.Last)
179       return false;
180     assert(CI.Length <= ST->maxHardClauseLength() &&
181            "Hard clause is too long!");
182 
183     auto &MBB = *CI.First->getParent();
184     auto ClauseMI =
185         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
186             .addImm(CI.Length - 1);
187     finalizeBundle(MBB, ClauseMI->getIterator(),
188                    std::next(CI.Last->getIterator()));
189     return true;
190   }
191 
runOnMachineFunction(MachineFunction & MF)192   bool runOnMachineFunction(MachineFunction &MF) override {
193     if (skipFunction(MF.getFunction()))
194       return false;
195 
196     ST = &MF.getSubtarget<GCNSubtarget>();
197     if (!ST->hasHardClauses())
198       return false;
199 
200     const SIInstrInfo *SII = ST->getInstrInfo();
201     const TargetRegisterInfo *TRI = ST->getRegisterInfo();
202 
203     bool Changed = false;
204     for (auto &MBB : MF) {
205       ClauseInfo CI;
206       for (auto &MI : MBB) {
207         HardClauseType Type = getHardClauseType(MI);
208 
209         int64_t Dummy1;
210         bool Dummy2;
211         LocationSize Dummy3 = 0;
212         SmallVector<const MachineOperand *, 4> BaseOps;
213         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
214           if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
215                                                   Dummy3, TRI)) {
216             // We failed to get the base operands, so we'll never clause this
217             // instruction with any other, so pretend it's illegal.
218             Type = HARDCLAUSE_ILLEGAL;
219           }
220         }
221 
222         if (CI.Length == ST->maxHardClauseLength() ||
223             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
224              Type != HARDCLAUSE_IGNORE &&
225              (Type != CI.Type ||
226               // Note that we lie to shouldClusterMemOps about the size of the
227               // cluster. When shouldClusterMemOps is called from the machine
228               // scheduler it limits the size of the cluster to avoid increasing
229               // register pressure too much, but this pass runs after register
230               // allocation so there is no need for that kind of limit.
231               // We also lie about the Offset and OffsetIsScalable parameters,
232               // as they aren't used in the SIInstrInfo implementation.
233               !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false,
234                                         2, 2)))) {
235           // Finish the current clause.
236           Changed |= emitClause(CI, SII);
237           CI = ClauseInfo();
238         }
239 
240         if (CI.Length) {
241           // Extend the current clause.
242           if (Type != HARDCLAUSE_IGNORE) {
243             if (Type == HARDCLAUSE_INTERNAL) {
244               ++CI.TrailingInternalLength;
245             } else {
246               ++CI.Length;
247               CI.Length += CI.TrailingInternalLength;
248               CI.TrailingInternalLength = 0;
249               CI.Last = &MI;
250               CI.BaseOps = std::move(BaseOps);
251             }
252           }
253         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
254           // Start a new clause.
255           CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
256         }
257       }
258 
259       // Finish the last clause in the basic block if any.
260       if (CI.Length)
261         Changed |= emitClause(CI, SII);
262     }
263 
264     return Changed;
265   }
266 };
267 
268 } // namespace
269 
270 char SIInsertHardClauses::ID = 0;
271 
272 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
273 
274 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
275                 false, false)
276