xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1*5ffd83dbSDimitry Andric //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2*5ffd83dbSDimitry Andric //
3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*5ffd83dbSDimitry Andric //
7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
8*5ffd83dbSDimitry Andric //
9*5ffd83dbSDimitry Andric /// \file
10*5ffd83dbSDimitry Andric /// Insert s_clause instructions to form hard clauses.
11*5ffd83dbSDimitry Andric ///
12*5ffd83dbSDimitry Andric /// Clausing load instructions can give cache coherency benefits. Before gfx10,
13*5ffd83dbSDimitry Andric /// the hardware automatically detected "soft clauses", which were sequences of
14*5ffd83dbSDimitry Andric /// memory instructions of the same type. In gfx10 this detection was removed,
15*5ffd83dbSDimitry Andric /// and the s_clause instruction was introduced to explicitly mark "hard
16*5ffd83dbSDimitry Andric /// clauses".
17*5ffd83dbSDimitry Andric ///
18*5ffd83dbSDimitry Andric /// It's the scheduler's job to form the clauses by putting similar memory
19*5ffd83dbSDimitry Andric /// instructions next to each other. Our job is just to insert an s_clause
20*5ffd83dbSDimitry Andric /// instruction to mark the start of each clause.
21*5ffd83dbSDimitry Andric ///
22*5ffd83dbSDimitry Andric /// Note that hard clauses are very similar to, but logically distinct from, the
23*5ffd83dbSDimitry Andric /// groups of instructions that have to be restartable when XNACK is enabled.
24*5ffd83dbSDimitry Andric /// The rules are slightly different in each case. For example an s_nop
25*5ffd83dbSDimitry Andric /// instruction breaks a restartable group, but can appear in the middle of a
26*5ffd83dbSDimitry Andric /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27*5ffd83dbSDimitry Andric /// "soft clauses" or just "clauses".)
28*5ffd83dbSDimitry Andric ///
29*5ffd83dbSDimitry Andric /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30*5ffd83dbSDimitry Andric /// groups, not hard clauses.
31*5ffd83dbSDimitry Andric //
32*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
33*5ffd83dbSDimitry Andric 
34*5ffd83dbSDimitry Andric #include "AMDGPUSubtarget.h"
35*5ffd83dbSDimitry Andric #include "SIInstrInfo.h"
36*5ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h"
37*5ffd83dbSDimitry Andric 
38*5ffd83dbSDimitry Andric using namespace llvm;
39*5ffd83dbSDimitry Andric 
40*5ffd83dbSDimitry Andric #define DEBUG_TYPE "si-insert-hard-clauses"
41*5ffd83dbSDimitry Andric 
42*5ffd83dbSDimitry Andric namespace {
43*5ffd83dbSDimitry Andric 
44*5ffd83dbSDimitry Andric enum HardClauseType {
45*5ffd83dbSDimitry Andric   // Texture, buffer, global or scratch memory instructions.
46*5ffd83dbSDimitry Andric   HARDCLAUSE_VMEM,
47*5ffd83dbSDimitry Andric   // Flat (not global or scratch) memory instructions.
48*5ffd83dbSDimitry Andric   HARDCLAUSE_FLAT,
49*5ffd83dbSDimitry Andric   // Instructions that access LDS.
50*5ffd83dbSDimitry Andric   HARDCLAUSE_LDS,
51*5ffd83dbSDimitry Andric   // Scalar memory instructions.
52*5ffd83dbSDimitry Andric   HARDCLAUSE_SMEM,
53*5ffd83dbSDimitry Andric   // VALU instructions.
54*5ffd83dbSDimitry Andric   HARDCLAUSE_VALU,
55*5ffd83dbSDimitry Andric   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
56*5ffd83dbSDimitry Andric 
57*5ffd83dbSDimitry Andric   // Internal instructions, which are allowed in the middle of a hard clause,
58*5ffd83dbSDimitry Andric   // except for s_waitcnt.
59*5ffd83dbSDimitry Andric   HARDCLAUSE_INTERNAL,
60*5ffd83dbSDimitry Andric   // Instructions that are not allowed in a hard clause: SALU, export, branch,
61*5ffd83dbSDimitry Andric   // message, GDS, s_waitcnt and anything else not mentioned above.
62*5ffd83dbSDimitry Andric   HARDCLAUSE_ILLEGAL,
63*5ffd83dbSDimitry Andric };
64*5ffd83dbSDimitry Andric 
65*5ffd83dbSDimitry Andric HardClauseType getHardClauseType(const MachineInstr &MI) {
66*5ffd83dbSDimitry Andric   // On current architectures we only get a benefit from clausing loads.
67*5ffd83dbSDimitry Andric   if (MI.mayLoad()) {
68*5ffd83dbSDimitry Andric     if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
69*5ffd83dbSDimitry Andric       return HARDCLAUSE_VMEM;
70*5ffd83dbSDimitry Andric     if (SIInstrInfo::isFLAT(MI))
71*5ffd83dbSDimitry Andric       return HARDCLAUSE_FLAT;
72*5ffd83dbSDimitry Andric     // TODO: LDS
73*5ffd83dbSDimitry Andric     if (SIInstrInfo::isSMRD(MI))
74*5ffd83dbSDimitry Andric       return HARDCLAUSE_SMEM;
75*5ffd83dbSDimitry Andric   }
76*5ffd83dbSDimitry Andric 
77*5ffd83dbSDimitry Andric   // Don't form VALU clauses. It's not clear what benefit they give, if any.
78*5ffd83dbSDimitry Andric 
79*5ffd83dbSDimitry Andric   // In practice s_nop is the only internal instruction we're likely to see.
80*5ffd83dbSDimitry Andric   // It's safe to treat the rest as illegal.
81*5ffd83dbSDimitry Andric   if (MI.getOpcode() == AMDGPU::S_NOP)
82*5ffd83dbSDimitry Andric     return HARDCLAUSE_INTERNAL;
83*5ffd83dbSDimitry Andric   return HARDCLAUSE_ILLEGAL;
84*5ffd83dbSDimitry Andric }
85*5ffd83dbSDimitry Andric 
86*5ffd83dbSDimitry Andric class SIInsertHardClauses : public MachineFunctionPass {
87*5ffd83dbSDimitry Andric public:
88*5ffd83dbSDimitry Andric   static char ID;
89*5ffd83dbSDimitry Andric 
90*5ffd83dbSDimitry Andric   SIInsertHardClauses() : MachineFunctionPass(ID) {}
91*5ffd83dbSDimitry Andric 
92*5ffd83dbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
93*5ffd83dbSDimitry Andric     AU.setPreservesCFG();
94*5ffd83dbSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
95*5ffd83dbSDimitry Andric   }
96*5ffd83dbSDimitry Andric 
97*5ffd83dbSDimitry Andric   // Track information about a clause as we discover it.
98*5ffd83dbSDimitry Andric   struct ClauseInfo {
99*5ffd83dbSDimitry Andric     // The type of all (non-internal) instructions in the clause.
100*5ffd83dbSDimitry Andric     HardClauseType Type = HARDCLAUSE_ILLEGAL;
101*5ffd83dbSDimitry Andric     // The first (necessarily non-internal) instruction in the clause.
102*5ffd83dbSDimitry Andric     MachineInstr *First = nullptr;
103*5ffd83dbSDimitry Andric     // The last non-internal instruction in the clause.
104*5ffd83dbSDimitry Andric     MachineInstr *Last = nullptr;
105*5ffd83dbSDimitry Andric     // The length of the clause including any internal instructions in the
106*5ffd83dbSDimitry Andric     // middle or after the end of the clause.
107*5ffd83dbSDimitry Andric     unsigned Length = 0;
108*5ffd83dbSDimitry Andric     // The base operands of *Last.
109*5ffd83dbSDimitry Andric     SmallVector<const MachineOperand *, 4> BaseOps;
110*5ffd83dbSDimitry Andric   };
111*5ffd83dbSDimitry Andric 
112*5ffd83dbSDimitry Andric   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
113*5ffd83dbSDimitry Andric     // Get the size of the clause excluding any internal instructions at the
114*5ffd83dbSDimitry Andric     // end.
115*5ffd83dbSDimitry Andric     unsigned Size =
116*5ffd83dbSDimitry Andric         std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
117*5ffd83dbSDimitry Andric     if (Size < 2)
118*5ffd83dbSDimitry Andric       return false;
119*5ffd83dbSDimitry Andric     assert(Size <= 64 && "Hard clause is too long!");
120*5ffd83dbSDimitry Andric 
121*5ffd83dbSDimitry Andric     auto &MBB = *CI.First->getParent();
122*5ffd83dbSDimitry Andric     auto ClauseMI =
123*5ffd83dbSDimitry Andric         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
124*5ffd83dbSDimitry Andric             .addImm(Size - 1);
125*5ffd83dbSDimitry Andric     finalizeBundle(MBB, ClauseMI->getIterator(),
126*5ffd83dbSDimitry Andric                    std::next(CI.Last->getIterator()));
127*5ffd83dbSDimitry Andric     return true;
128*5ffd83dbSDimitry Andric   }
129*5ffd83dbSDimitry Andric 
130*5ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override {
131*5ffd83dbSDimitry Andric     if (skipFunction(MF.getFunction()))
132*5ffd83dbSDimitry Andric       return false;
133*5ffd83dbSDimitry Andric 
134*5ffd83dbSDimitry Andric     const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
135*5ffd83dbSDimitry Andric     if (!ST.hasHardClauses())
136*5ffd83dbSDimitry Andric       return false;
137*5ffd83dbSDimitry Andric 
138*5ffd83dbSDimitry Andric     const SIInstrInfo *SII = ST.getInstrInfo();
139*5ffd83dbSDimitry Andric     const TargetRegisterInfo *TRI = ST.getRegisterInfo();
140*5ffd83dbSDimitry Andric 
141*5ffd83dbSDimitry Andric     bool Changed = false;
142*5ffd83dbSDimitry Andric     for (auto &MBB : MF) {
143*5ffd83dbSDimitry Andric       ClauseInfo CI;
144*5ffd83dbSDimitry Andric       for (auto &MI : MBB) {
145*5ffd83dbSDimitry Andric         HardClauseType Type = getHardClauseType(MI);
146*5ffd83dbSDimitry Andric 
147*5ffd83dbSDimitry Andric         int64_t Dummy1;
148*5ffd83dbSDimitry Andric         bool Dummy2;
149*5ffd83dbSDimitry Andric         unsigned Dummy3;
150*5ffd83dbSDimitry Andric         SmallVector<const MachineOperand *, 4> BaseOps;
151*5ffd83dbSDimitry Andric         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
152*5ffd83dbSDimitry Andric           if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
153*5ffd83dbSDimitry Andric                                                   Dummy3, TRI)) {
154*5ffd83dbSDimitry Andric             // We failed to get the base operands, so we'll never clause this
155*5ffd83dbSDimitry Andric             // instruction with any other, so pretend it's illegal.
156*5ffd83dbSDimitry Andric             Type = HARDCLAUSE_ILLEGAL;
157*5ffd83dbSDimitry Andric           }
158*5ffd83dbSDimitry Andric         }
159*5ffd83dbSDimitry Andric 
160*5ffd83dbSDimitry Andric         if (CI.Length == 64 ||
161*5ffd83dbSDimitry Andric             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
162*5ffd83dbSDimitry Andric              (Type != CI.Type ||
163*5ffd83dbSDimitry Andric               // Note that we lie to shouldClusterMemOps about the size of the
164*5ffd83dbSDimitry Andric               // cluster. When shouldClusterMemOps is called from the machine
165*5ffd83dbSDimitry Andric               // scheduler it limits the size of the cluster to avoid increasing
166*5ffd83dbSDimitry Andric               // register pressure too much, but this pass runs after register
167*5ffd83dbSDimitry Andric               // allocation so there is no need for that kind of limit.
168*5ffd83dbSDimitry Andric               !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
169*5ffd83dbSDimitry Andric           // Finish the current clause.
170*5ffd83dbSDimitry Andric           Changed |= emitClause(CI, SII);
171*5ffd83dbSDimitry Andric           CI = ClauseInfo();
172*5ffd83dbSDimitry Andric         }
173*5ffd83dbSDimitry Andric 
174*5ffd83dbSDimitry Andric         if (CI.Length) {
175*5ffd83dbSDimitry Andric           // Extend the current clause.
176*5ffd83dbSDimitry Andric           ++CI.Length;
177*5ffd83dbSDimitry Andric           if (Type != HARDCLAUSE_INTERNAL) {
178*5ffd83dbSDimitry Andric             CI.Last = &MI;
179*5ffd83dbSDimitry Andric             CI.BaseOps = std::move(BaseOps);
180*5ffd83dbSDimitry Andric           }
181*5ffd83dbSDimitry Andric         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
182*5ffd83dbSDimitry Andric           // Start a new clause.
183*5ffd83dbSDimitry Andric           CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
184*5ffd83dbSDimitry Andric         }
185*5ffd83dbSDimitry Andric       }
186*5ffd83dbSDimitry Andric 
187*5ffd83dbSDimitry Andric       // Finish the last clause in the basic block if any.
188*5ffd83dbSDimitry Andric       if (CI.Length)
189*5ffd83dbSDimitry Andric         Changed |= emitClause(CI, SII);
190*5ffd83dbSDimitry Andric     }
191*5ffd83dbSDimitry Andric 
192*5ffd83dbSDimitry Andric     return Changed;
193*5ffd83dbSDimitry Andric   }
194*5ffd83dbSDimitry Andric };
195*5ffd83dbSDimitry Andric 
196*5ffd83dbSDimitry Andric } // namespace
197*5ffd83dbSDimitry Andric 
198*5ffd83dbSDimitry Andric char SIInsertHardClauses::ID = 0;
199*5ffd83dbSDimitry Andric 
200*5ffd83dbSDimitry Andric char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
201*5ffd83dbSDimitry Andric 
202*5ffd83dbSDimitry Andric INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
203*5ffd83dbSDimitry Andric                 false, false)
204