xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
15ffd83dbSDimitry Andric //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric /// \file
105ffd83dbSDimitry Andric /// Insert s_clause instructions to form hard clauses.
115ffd83dbSDimitry Andric ///
125ffd83dbSDimitry Andric /// Clausing load instructions can give cache coherency benefits. Before gfx10,
135ffd83dbSDimitry Andric /// the hardware automatically detected "soft clauses", which were sequences of
145ffd83dbSDimitry Andric /// memory instructions of the same type. In gfx10 this detection was removed,
155ffd83dbSDimitry Andric /// and the s_clause instruction was introduced to explicitly mark "hard
165ffd83dbSDimitry Andric /// clauses".
175ffd83dbSDimitry Andric ///
185ffd83dbSDimitry Andric /// It's the scheduler's job to form the clauses by putting similar memory
195ffd83dbSDimitry Andric /// instructions next to each other. Our job is just to insert an s_clause
205ffd83dbSDimitry Andric /// instruction to mark the start of each clause.
215ffd83dbSDimitry Andric ///
225ffd83dbSDimitry Andric /// Note that hard clauses are very similar to, but logically distinct from, the
235ffd83dbSDimitry Andric /// groups of instructions that have to be restartable when XNACK is enabled.
245ffd83dbSDimitry Andric /// The rules are slightly different in each case. For example an s_nop
255ffd83dbSDimitry Andric /// instruction breaks a restartable group, but can appear in the middle of a
265ffd83dbSDimitry Andric /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
275ffd83dbSDimitry Andric /// "soft clauses" or just "clauses".)
285ffd83dbSDimitry Andric ///
295ffd83dbSDimitry Andric /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
305ffd83dbSDimitry Andric /// groups, not hard clauses.
315ffd83dbSDimitry Andric //
325ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
335ffd83dbSDimitry Andric 
34*e8d8bef9SDimitry Andric #include "AMDGPU.h"
35*e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
36*e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
375ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h"
385ffd83dbSDimitry Andric 
395ffd83dbSDimitry Andric using namespace llvm;
405ffd83dbSDimitry Andric 
415ffd83dbSDimitry Andric #define DEBUG_TYPE "si-insert-hard-clauses"
425ffd83dbSDimitry Andric 
435ffd83dbSDimitry Andric namespace {
445ffd83dbSDimitry Andric 
455ffd83dbSDimitry Andric enum HardClauseType {
465ffd83dbSDimitry Andric   // Texture, buffer, global or scratch memory instructions.
475ffd83dbSDimitry Andric   HARDCLAUSE_VMEM,
485ffd83dbSDimitry Andric   // Flat (not global or scratch) memory instructions.
495ffd83dbSDimitry Andric   HARDCLAUSE_FLAT,
505ffd83dbSDimitry Andric   // Instructions that access LDS.
515ffd83dbSDimitry Andric   HARDCLAUSE_LDS,
525ffd83dbSDimitry Andric   // Scalar memory instructions.
535ffd83dbSDimitry Andric   HARDCLAUSE_SMEM,
545ffd83dbSDimitry Andric   // VALU instructions.
555ffd83dbSDimitry Andric   HARDCLAUSE_VALU,
565ffd83dbSDimitry Andric   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
575ffd83dbSDimitry Andric 
585ffd83dbSDimitry Andric   // Internal instructions, which are allowed in the middle of a hard clause,
595ffd83dbSDimitry Andric   // except for s_waitcnt.
605ffd83dbSDimitry Andric   HARDCLAUSE_INTERNAL,
615ffd83dbSDimitry Andric   // Instructions that are not allowed in a hard clause: SALU, export, branch,
625ffd83dbSDimitry Andric   // message, GDS, s_waitcnt and anything else not mentioned above.
635ffd83dbSDimitry Andric   HARDCLAUSE_ILLEGAL,
645ffd83dbSDimitry Andric };
655ffd83dbSDimitry Andric 
665ffd83dbSDimitry Andric HardClauseType getHardClauseType(const MachineInstr &MI) {
675ffd83dbSDimitry Andric   // On current architectures we only get a benefit from clausing loads.
685ffd83dbSDimitry Andric   if (MI.mayLoad()) {
695ffd83dbSDimitry Andric     if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
705ffd83dbSDimitry Andric       return HARDCLAUSE_VMEM;
715ffd83dbSDimitry Andric     if (SIInstrInfo::isFLAT(MI))
725ffd83dbSDimitry Andric       return HARDCLAUSE_FLAT;
735ffd83dbSDimitry Andric     // TODO: LDS
745ffd83dbSDimitry Andric     if (SIInstrInfo::isSMRD(MI))
755ffd83dbSDimitry Andric       return HARDCLAUSE_SMEM;
765ffd83dbSDimitry Andric   }
775ffd83dbSDimitry Andric 
785ffd83dbSDimitry Andric   // Don't form VALU clauses. It's not clear what benefit they give, if any.
795ffd83dbSDimitry Andric 
805ffd83dbSDimitry Andric   // In practice s_nop is the only internal instruction we're likely to see.
815ffd83dbSDimitry Andric   // It's safe to treat the rest as illegal.
825ffd83dbSDimitry Andric   if (MI.getOpcode() == AMDGPU::S_NOP)
835ffd83dbSDimitry Andric     return HARDCLAUSE_INTERNAL;
845ffd83dbSDimitry Andric   return HARDCLAUSE_ILLEGAL;
855ffd83dbSDimitry Andric }
865ffd83dbSDimitry Andric 
875ffd83dbSDimitry Andric class SIInsertHardClauses : public MachineFunctionPass {
885ffd83dbSDimitry Andric public:
895ffd83dbSDimitry Andric   static char ID;
905ffd83dbSDimitry Andric 
915ffd83dbSDimitry Andric   SIInsertHardClauses() : MachineFunctionPass(ID) {}
925ffd83dbSDimitry Andric 
935ffd83dbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
945ffd83dbSDimitry Andric     AU.setPreservesCFG();
955ffd83dbSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
965ffd83dbSDimitry Andric   }
975ffd83dbSDimitry Andric 
985ffd83dbSDimitry Andric   // Track information about a clause as we discover it.
995ffd83dbSDimitry Andric   struct ClauseInfo {
1005ffd83dbSDimitry Andric     // The type of all (non-internal) instructions in the clause.
1015ffd83dbSDimitry Andric     HardClauseType Type = HARDCLAUSE_ILLEGAL;
1025ffd83dbSDimitry Andric     // The first (necessarily non-internal) instruction in the clause.
1035ffd83dbSDimitry Andric     MachineInstr *First = nullptr;
1045ffd83dbSDimitry Andric     // The last non-internal instruction in the clause.
1055ffd83dbSDimitry Andric     MachineInstr *Last = nullptr;
1065ffd83dbSDimitry Andric     // The length of the clause including any internal instructions in the
1075ffd83dbSDimitry Andric     // middle or after the end of the clause.
1085ffd83dbSDimitry Andric     unsigned Length = 0;
1095ffd83dbSDimitry Andric     // The base operands of *Last.
1105ffd83dbSDimitry Andric     SmallVector<const MachineOperand *, 4> BaseOps;
1115ffd83dbSDimitry Andric   };
1125ffd83dbSDimitry Andric 
1135ffd83dbSDimitry Andric   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
1145ffd83dbSDimitry Andric     // Get the size of the clause excluding any internal instructions at the
1155ffd83dbSDimitry Andric     // end.
1165ffd83dbSDimitry Andric     unsigned Size =
1175ffd83dbSDimitry Andric         std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
1185ffd83dbSDimitry Andric     if (Size < 2)
1195ffd83dbSDimitry Andric       return false;
1205ffd83dbSDimitry Andric     assert(Size <= 64 && "Hard clause is too long!");
1215ffd83dbSDimitry Andric 
1225ffd83dbSDimitry Andric     auto &MBB = *CI.First->getParent();
1235ffd83dbSDimitry Andric     auto ClauseMI =
1245ffd83dbSDimitry Andric         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
1255ffd83dbSDimitry Andric             .addImm(Size - 1);
1265ffd83dbSDimitry Andric     finalizeBundle(MBB, ClauseMI->getIterator(),
1275ffd83dbSDimitry Andric                    std::next(CI.Last->getIterator()));
1285ffd83dbSDimitry Andric     return true;
1295ffd83dbSDimitry Andric   }
1305ffd83dbSDimitry Andric 
1315ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override {
1325ffd83dbSDimitry Andric     if (skipFunction(MF.getFunction()))
1335ffd83dbSDimitry Andric       return false;
1345ffd83dbSDimitry Andric 
1355ffd83dbSDimitry Andric     const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1365ffd83dbSDimitry Andric     if (!ST.hasHardClauses())
1375ffd83dbSDimitry Andric       return false;
1385ffd83dbSDimitry Andric 
1395ffd83dbSDimitry Andric     const SIInstrInfo *SII = ST.getInstrInfo();
1405ffd83dbSDimitry Andric     const TargetRegisterInfo *TRI = ST.getRegisterInfo();
1415ffd83dbSDimitry Andric 
1425ffd83dbSDimitry Andric     bool Changed = false;
1435ffd83dbSDimitry Andric     for (auto &MBB : MF) {
1445ffd83dbSDimitry Andric       ClauseInfo CI;
1455ffd83dbSDimitry Andric       for (auto &MI : MBB) {
1465ffd83dbSDimitry Andric         HardClauseType Type = getHardClauseType(MI);
1475ffd83dbSDimitry Andric 
1485ffd83dbSDimitry Andric         int64_t Dummy1;
1495ffd83dbSDimitry Andric         bool Dummy2;
1505ffd83dbSDimitry Andric         unsigned Dummy3;
1515ffd83dbSDimitry Andric         SmallVector<const MachineOperand *, 4> BaseOps;
1525ffd83dbSDimitry Andric         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
1535ffd83dbSDimitry Andric           if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
1545ffd83dbSDimitry Andric                                                   Dummy3, TRI)) {
1555ffd83dbSDimitry Andric             // We failed to get the base operands, so we'll never clause this
1565ffd83dbSDimitry Andric             // instruction with any other, so pretend it's illegal.
1575ffd83dbSDimitry Andric             Type = HARDCLAUSE_ILLEGAL;
1585ffd83dbSDimitry Andric           }
1595ffd83dbSDimitry Andric         }
1605ffd83dbSDimitry Andric 
1615ffd83dbSDimitry Andric         if (CI.Length == 64 ||
1625ffd83dbSDimitry Andric             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
1635ffd83dbSDimitry Andric              (Type != CI.Type ||
1645ffd83dbSDimitry Andric               // Note that we lie to shouldClusterMemOps about the size of the
1655ffd83dbSDimitry Andric               // cluster. When shouldClusterMemOps is called from the machine
1665ffd83dbSDimitry Andric               // scheduler it limits the size of the cluster to avoid increasing
1675ffd83dbSDimitry Andric               // register pressure too much, but this pass runs after register
1685ffd83dbSDimitry Andric               // allocation so there is no need for that kind of limit.
1695ffd83dbSDimitry Andric               !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
1705ffd83dbSDimitry Andric           // Finish the current clause.
1715ffd83dbSDimitry Andric           Changed |= emitClause(CI, SII);
1725ffd83dbSDimitry Andric           CI = ClauseInfo();
1735ffd83dbSDimitry Andric         }
1745ffd83dbSDimitry Andric 
1755ffd83dbSDimitry Andric         if (CI.Length) {
1765ffd83dbSDimitry Andric           // Extend the current clause.
1775ffd83dbSDimitry Andric           ++CI.Length;
1785ffd83dbSDimitry Andric           if (Type != HARDCLAUSE_INTERNAL) {
1795ffd83dbSDimitry Andric             CI.Last = &MI;
1805ffd83dbSDimitry Andric             CI.BaseOps = std::move(BaseOps);
1815ffd83dbSDimitry Andric           }
1825ffd83dbSDimitry Andric         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
1835ffd83dbSDimitry Andric           // Start a new clause.
1845ffd83dbSDimitry Andric           CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
1855ffd83dbSDimitry Andric         }
1865ffd83dbSDimitry Andric       }
1875ffd83dbSDimitry Andric 
1885ffd83dbSDimitry Andric       // Finish the last clause in the basic block if any.
1895ffd83dbSDimitry Andric       if (CI.Length)
1905ffd83dbSDimitry Andric         Changed |= emitClause(CI, SII);
1915ffd83dbSDimitry Andric     }
1925ffd83dbSDimitry Andric 
1935ffd83dbSDimitry Andric     return Changed;
1945ffd83dbSDimitry Andric   }
1955ffd83dbSDimitry Andric };
1965ffd83dbSDimitry Andric 
1975ffd83dbSDimitry Andric } // namespace
1985ffd83dbSDimitry Andric 
1995ffd83dbSDimitry Andric char SIInsertHardClauses::ID = 0;
2005ffd83dbSDimitry Andric 
2015ffd83dbSDimitry Andric char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
2025ffd83dbSDimitry Andric 
2035ffd83dbSDimitry Andric INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
2045ffd83dbSDimitry Andric                 false, false)
205