xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
15ffd83dbSDimitry Andric //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric /// \file
105ffd83dbSDimitry Andric /// Insert s_clause instructions to form hard clauses.
115ffd83dbSDimitry Andric ///
125ffd83dbSDimitry Andric /// Clausing load instructions can give cache coherency benefits. Before gfx10,
135ffd83dbSDimitry Andric /// the hardware automatically detected "soft clauses", which were sequences of
145ffd83dbSDimitry Andric /// memory instructions of the same type. In gfx10 this detection was removed,
155ffd83dbSDimitry Andric /// and the s_clause instruction was introduced to explicitly mark "hard
165ffd83dbSDimitry Andric /// clauses".
175ffd83dbSDimitry Andric ///
185ffd83dbSDimitry Andric /// It's the scheduler's job to form the clauses by putting similar memory
195ffd83dbSDimitry Andric /// instructions next to each other. Our job is just to insert an s_clause
205ffd83dbSDimitry Andric /// instruction to mark the start of each clause.
215ffd83dbSDimitry Andric ///
225ffd83dbSDimitry Andric /// Note that hard clauses are very similar to, but logically distinct from, the
235ffd83dbSDimitry Andric /// groups of instructions that have to be restartable when XNACK is enabled.
245ffd83dbSDimitry Andric /// The rules are slightly different in each case. For example an s_nop
255ffd83dbSDimitry Andric /// instruction breaks a restartable group, but can appear in the middle of a
265ffd83dbSDimitry Andric /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
275ffd83dbSDimitry Andric /// "soft clauses" or just "clauses".)
285ffd83dbSDimitry Andric ///
295ffd83dbSDimitry Andric /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
305ffd83dbSDimitry Andric /// groups, not hard clauses.
315ffd83dbSDimitry Andric //
325ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
335ffd83dbSDimitry Andric 
34e8d8bef9SDimitry Andric #include "AMDGPU.h"
35e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
36e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
375ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h"
3881ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
395ffd83dbSDimitry Andric 
405ffd83dbSDimitry Andric using namespace llvm;
415ffd83dbSDimitry Andric 
425ffd83dbSDimitry Andric #define DEBUG_TYPE "si-insert-hard-clauses"
435ffd83dbSDimitry Andric 
445ffd83dbSDimitry Andric namespace {
455ffd83dbSDimitry Andric 
4681ad6265SDimitry Andric // A clause length of 64 instructions could be encoded in the s_clause
4781ad6265SDimitry Andric // instruction, but the hardware documentation (at least for GFX11) says that
4881ad6265SDimitry Andric // 63 is the maximum allowed.
4981ad6265SDimitry Andric constexpr unsigned MaxInstructionsInClause = 63;
5081ad6265SDimitry Andric 
515ffd83dbSDimitry Andric enum HardClauseType {
5281ad6265SDimitry Andric   // For GFX10:
5381ad6265SDimitry Andric 
545ffd83dbSDimitry Andric   // Texture, buffer, global or scratch memory instructions.
555ffd83dbSDimitry Andric   HARDCLAUSE_VMEM,
565ffd83dbSDimitry Andric   // Flat (not global or scratch) memory instructions.
575ffd83dbSDimitry Andric   HARDCLAUSE_FLAT,
5881ad6265SDimitry Andric 
5981ad6265SDimitry Andric   // For GFX11:
6081ad6265SDimitry Andric 
6181ad6265SDimitry Andric   // Texture memory instructions.
6281ad6265SDimitry Andric   HARDCLAUSE_MIMG_LOAD,
6381ad6265SDimitry Andric   HARDCLAUSE_MIMG_STORE,
6481ad6265SDimitry Andric   HARDCLAUSE_MIMG_ATOMIC,
6581ad6265SDimitry Andric   HARDCLAUSE_MIMG_SAMPLE,
6681ad6265SDimitry Andric   // Buffer, global or scratch memory instructions.
6781ad6265SDimitry Andric   HARDCLAUSE_VMEM_LOAD,
6881ad6265SDimitry Andric   HARDCLAUSE_VMEM_STORE,
6981ad6265SDimitry Andric   HARDCLAUSE_VMEM_ATOMIC,
7081ad6265SDimitry Andric   // Flat (not global or scratch) memory instructions.
7181ad6265SDimitry Andric   HARDCLAUSE_FLAT_LOAD,
7281ad6265SDimitry Andric   HARDCLAUSE_FLAT_STORE,
7381ad6265SDimitry Andric   HARDCLAUSE_FLAT_ATOMIC,
7481ad6265SDimitry Andric   // BVH instructions.
7581ad6265SDimitry Andric   HARDCLAUSE_BVH,
7681ad6265SDimitry Andric 
7781ad6265SDimitry Andric   // Common:
7881ad6265SDimitry Andric 
795ffd83dbSDimitry Andric   // Instructions that access LDS.
805ffd83dbSDimitry Andric   HARDCLAUSE_LDS,
815ffd83dbSDimitry Andric   // Scalar memory instructions.
825ffd83dbSDimitry Andric   HARDCLAUSE_SMEM,
835ffd83dbSDimitry Andric   // VALU instructions.
845ffd83dbSDimitry Andric   HARDCLAUSE_VALU,
855ffd83dbSDimitry Andric   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
865ffd83dbSDimitry Andric 
875ffd83dbSDimitry Andric   // Internal instructions, which are allowed in the middle of a hard clause,
885ffd83dbSDimitry Andric   // except for s_waitcnt.
895ffd83dbSDimitry Andric   HARDCLAUSE_INTERNAL,
90349cc55cSDimitry Andric   // Meta instructions that do not result in any ISA like KILL.
91349cc55cSDimitry Andric   HARDCLAUSE_IGNORE,
925ffd83dbSDimitry Andric   // Instructions that are not allowed in a hard clause: SALU, export, branch,
935ffd83dbSDimitry Andric   // message, GDS, s_waitcnt and anything else not mentioned above.
945ffd83dbSDimitry Andric   HARDCLAUSE_ILLEGAL,
955ffd83dbSDimitry Andric };
965ffd83dbSDimitry Andric 
97fe6060f1SDimitry Andric class SIInsertHardClauses : public MachineFunctionPass {
98fe6060f1SDimitry Andric public:
99fe6060f1SDimitry Andric   static char ID;
100fe6060f1SDimitry Andric   const GCNSubtarget *ST = nullptr;
101fe6060f1SDimitry Andric 
102fe6060f1SDimitry Andric   SIInsertHardClauses() : MachineFunctionPass(ID) {}
103fe6060f1SDimitry Andric 
104fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
105fe6060f1SDimitry Andric     AU.setPreservesCFG();
106fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
107fe6060f1SDimitry Andric   }
108fe6060f1SDimitry Andric 
1095ffd83dbSDimitry Andric   HardClauseType getHardClauseType(const MachineInstr &MI) {
11081ad6265SDimitry Andric     if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
11181ad6265SDimitry Andric       if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
112fe6060f1SDimitry Andric         if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
113fe6060f1SDimitry Andric           if (ST->hasNSAClauseBug()) {
114fe6060f1SDimitry Andric             const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
115fe6060f1SDimitry Andric             if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
116fe6060f1SDimitry Andric               return HARDCLAUSE_ILLEGAL;
117fe6060f1SDimitry Andric           }
1185ffd83dbSDimitry Andric           return HARDCLAUSE_VMEM;
119fe6060f1SDimitry Andric         }
1205ffd83dbSDimitry Andric         if (SIInstrInfo::isFLAT(MI))
1215ffd83dbSDimitry Andric           return HARDCLAUSE_FLAT;
12281ad6265SDimitry Andric       } else {
12381ad6265SDimitry Andric         assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
12481ad6265SDimitry Andric         if (SIInstrInfo::isMIMG(MI)) {
12581ad6265SDimitry Andric           const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
12681ad6265SDimitry Andric           const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
12781ad6265SDimitry Andric               AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
12881ad6265SDimitry Andric           if (BaseInfo->BVH)
12981ad6265SDimitry Andric             return HARDCLAUSE_BVH;
13081ad6265SDimitry Andric           if (BaseInfo->Sampler)
13181ad6265SDimitry Andric             return HARDCLAUSE_MIMG_SAMPLE;
13281ad6265SDimitry Andric           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
13381ad6265SDimitry Andric                                               : HARDCLAUSE_MIMG_LOAD
13481ad6265SDimitry Andric                               : HARDCLAUSE_MIMG_STORE;
13581ad6265SDimitry Andric         }
13681ad6265SDimitry Andric         if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
13781ad6265SDimitry Andric           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
13881ad6265SDimitry Andric                                               : HARDCLAUSE_VMEM_LOAD
13981ad6265SDimitry Andric                               : HARDCLAUSE_VMEM_STORE;
14081ad6265SDimitry Andric         }
14181ad6265SDimitry Andric         if (SIInstrInfo::isFLAT(MI)) {
14281ad6265SDimitry Andric           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
14381ad6265SDimitry Andric                                               : HARDCLAUSE_FLAT_LOAD
14481ad6265SDimitry Andric                               : HARDCLAUSE_FLAT_STORE;
14581ad6265SDimitry Andric         }
14681ad6265SDimitry Andric       }
1475ffd83dbSDimitry Andric       // TODO: LDS
1485ffd83dbSDimitry Andric       if (SIInstrInfo::isSMRD(MI))
1495ffd83dbSDimitry Andric         return HARDCLAUSE_SMEM;
1505ffd83dbSDimitry Andric     }
1515ffd83dbSDimitry Andric 
1525ffd83dbSDimitry Andric     // Don't form VALU clauses. It's not clear what benefit they give, if any.
1535ffd83dbSDimitry Andric 
1545ffd83dbSDimitry Andric     // In practice s_nop is the only internal instruction we're likely to see.
1555ffd83dbSDimitry Andric     // It's safe to treat the rest as illegal.
1565ffd83dbSDimitry Andric     if (MI.getOpcode() == AMDGPU::S_NOP)
1575ffd83dbSDimitry Andric       return HARDCLAUSE_INTERNAL;
158349cc55cSDimitry Andric     if (MI.isMetaInstruction())
159349cc55cSDimitry Andric       return HARDCLAUSE_IGNORE;
1605ffd83dbSDimitry Andric     return HARDCLAUSE_ILLEGAL;
1615ffd83dbSDimitry Andric   }
1625ffd83dbSDimitry Andric 
1635ffd83dbSDimitry Andric   // Track information about a clause as we discover it.
1645ffd83dbSDimitry Andric   struct ClauseInfo {
1655ffd83dbSDimitry Andric     // The type of all (non-internal) instructions in the clause.
1665ffd83dbSDimitry Andric     HardClauseType Type = HARDCLAUSE_ILLEGAL;
1675ffd83dbSDimitry Andric     // The first (necessarily non-internal) instruction in the clause.
1685ffd83dbSDimitry Andric     MachineInstr *First = nullptr;
1695ffd83dbSDimitry Andric     // The last non-internal instruction in the clause.
1705ffd83dbSDimitry Andric     MachineInstr *Last = nullptr;
1715ffd83dbSDimitry Andric     // The length of the clause including any internal instructions in the
172349cc55cSDimitry Andric     // middle (but not at the end) of the clause.
1735ffd83dbSDimitry Andric     unsigned Length = 0;
174349cc55cSDimitry Andric     // Internal instructions at the and of a clause should not be included in
175349cc55cSDimitry Andric     // the clause. Count them in TrailingInternalLength until a new memory
176349cc55cSDimitry Andric     // instruction is added.
177349cc55cSDimitry Andric     unsigned TrailingInternalLength = 0;
1785ffd83dbSDimitry Andric     // The base operands of *Last.
1795ffd83dbSDimitry Andric     SmallVector<const MachineOperand *, 4> BaseOps;
1805ffd83dbSDimitry Andric   };
1815ffd83dbSDimitry Andric 
1825ffd83dbSDimitry Andric   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
183349cc55cSDimitry Andric     if (CI.First == CI.Last)
1845ffd83dbSDimitry Andric       return false;
18581ad6265SDimitry Andric     assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!");
1865ffd83dbSDimitry Andric 
1875ffd83dbSDimitry Andric     auto &MBB = *CI.First->getParent();
1885ffd83dbSDimitry Andric     auto ClauseMI =
1895ffd83dbSDimitry Andric         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
190349cc55cSDimitry Andric             .addImm(CI.Length - 1);
1915ffd83dbSDimitry Andric     finalizeBundle(MBB, ClauseMI->getIterator(),
1925ffd83dbSDimitry Andric                    std::next(CI.Last->getIterator()));
1935ffd83dbSDimitry Andric     return true;
1945ffd83dbSDimitry Andric   }
1955ffd83dbSDimitry Andric 
1965ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override {
1975ffd83dbSDimitry Andric     if (skipFunction(MF.getFunction()))
1985ffd83dbSDimitry Andric       return false;
1995ffd83dbSDimitry Andric 
200fe6060f1SDimitry Andric     ST = &MF.getSubtarget<GCNSubtarget>();
201fe6060f1SDimitry Andric     if (!ST->hasHardClauses())
2025ffd83dbSDimitry Andric       return false;
2035ffd83dbSDimitry Andric 
204fe6060f1SDimitry Andric     const SIInstrInfo *SII = ST->getInstrInfo();
205fe6060f1SDimitry Andric     const TargetRegisterInfo *TRI = ST->getRegisterInfo();
2065ffd83dbSDimitry Andric 
2075ffd83dbSDimitry Andric     bool Changed = false;
2085ffd83dbSDimitry Andric     for (auto &MBB : MF) {
2095ffd83dbSDimitry Andric       ClauseInfo CI;
2105ffd83dbSDimitry Andric       for (auto &MI : MBB) {
2115ffd83dbSDimitry Andric         HardClauseType Type = getHardClauseType(MI);
2125ffd83dbSDimitry Andric 
2135ffd83dbSDimitry Andric         int64_t Dummy1;
2145ffd83dbSDimitry Andric         bool Dummy2;
2155ffd83dbSDimitry Andric         unsigned Dummy3;
2165ffd83dbSDimitry Andric         SmallVector<const MachineOperand *, 4> BaseOps;
2175ffd83dbSDimitry Andric         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
2185ffd83dbSDimitry Andric           if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
2195ffd83dbSDimitry Andric                                                   Dummy3, TRI)) {
2205ffd83dbSDimitry Andric             // We failed to get the base operands, so we'll never clause this
2215ffd83dbSDimitry Andric             // instruction with any other, so pretend it's illegal.
2225ffd83dbSDimitry Andric             Type = HARDCLAUSE_ILLEGAL;
2235ffd83dbSDimitry Andric           }
2245ffd83dbSDimitry Andric         }
2255ffd83dbSDimitry Andric 
22681ad6265SDimitry Andric         if (CI.Length == MaxInstructionsInClause ||
2275ffd83dbSDimitry Andric             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
228349cc55cSDimitry Andric              Type != HARDCLAUSE_IGNORE &&
2295ffd83dbSDimitry Andric              (Type != CI.Type ||
2305ffd83dbSDimitry Andric               // Note that we lie to shouldClusterMemOps about the size of the
2315ffd83dbSDimitry Andric               // cluster. When shouldClusterMemOps is called from the machine
2325ffd83dbSDimitry Andric               // scheduler it limits the size of the cluster to avoid increasing
2335ffd83dbSDimitry Andric               // register pressure too much, but this pass runs after register
2345ffd83dbSDimitry Andric               // allocation so there is no need for that kind of limit.
235*5f757f3fSDimitry Andric               // We also lie about the Offset and OffsetIsScalable parameters,
236*5f757f3fSDimitry Andric               // as they aren't used in the SIInstrInfo implementation.
237*5f757f3fSDimitry Andric               !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false,
238*5f757f3fSDimitry Andric                                         2, 2)))) {
2395ffd83dbSDimitry Andric           // Finish the current clause.
2405ffd83dbSDimitry Andric           Changed |= emitClause(CI, SII);
2415ffd83dbSDimitry Andric           CI = ClauseInfo();
2425ffd83dbSDimitry Andric         }
2435ffd83dbSDimitry Andric 
2445ffd83dbSDimitry Andric         if (CI.Length) {
2455ffd83dbSDimitry Andric           // Extend the current clause.
246349cc55cSDimitry Andric           if (Type != HARDCLAUSE_IGNORE) {
247349cc55cSDimitry Andric             if (Type == HARDCLAUSE_INTERNAL) {
248349cc55cSDimitry Andric               ++CI.TrailingInternalLength;
249349cc55cSDimitry Andric             } else {
2505ffd83dbSDimitry Andric               ++CI.Length;
251349cc55cSDimitry Andric               CI.Length += CI.TrailingInternalLength;
252349cc55cSDimitry Andric               CI.TrailingInternalLength = 0;
2535ffd83dbSDimitry Andric               CI.Last = &MI;
2545ffd83dbSDimitry Andric               CI.BaseOps = std::move(BaseOps);
2555ffd83dbSDimitry Andric             }
256349cc55cSDimitry Andric           }
2575ffd83dbSDimitry Andric         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
2585ffd83dbSDimitry Andric           // Start a new clause.
259349cc55cSDimitry Andric           CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
2605ffd83dbSDimitry Andric         }
2615ffd83dbSDimitry Andric       }
2625ffd83dbSDimitry Andric 
2635ffd83dbSDimitry Andric       // Finish the last clause in the basic block if any.
2645ffd83dbSDimitry Andric       if (CI.Length)
2655ffd83dbSDimitry Andric         Changed |= emitClause(CI, SII);
2665ffd83dbSDimitry Andric     }
2675ffd83dbSDimitry Andric 
2685ffd83dbSDimitry Andric     return Changed;
2695ffd83dbSDimitry Andric   }
2705ffd83dbSDimitry Andric };
2715ffd83dbSDimitry Andric 
2725ffd83dbSDimitry Andric } // namespace
2735ffd83dbSDimitry Andric 
2745ffd83dbSDimitry Andric char SIInsertHardClauses::ID = 0;
2755ffd83dbSDimitry Andric 
2765ffd83dbSDimitry Andric char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
2775ffd83dbSDimitry Andric 
2785ffd83dbSDimitry Andric INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
2795ffd83dbSDimitry Andric                 false, false)
280