15ffd83dbSDimitry Andric //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric /// \file 105ffd83dbSDimitry Andric /// Insert s_clause instructions to form hard clauses. 115ffd83dbSDimitry Andric /// 125ffd83dbSDimitry Andric /// Clausing load instructions can give cache coherency benefits. Before gfx10, 135ffd83dbSDimitry Andric /// the hardware automatically detected "soft clauses", which were sequences of 145ffd83dbSDimitry Andric /// memory instructions of the same type. In gfx10 this detection was removed, 155ffd83dbSDimitry Andric /// and the s_clause instruction was introduced to explicitly mark "hard 165ffd83dbSDimitry Andric /// clauses". 175ffd83dbSDimitry Andric /// 185ffd83dbSDimitry Andric /// It's the scheduler's job to form the clauses by putting similar memory 195ffd83dbSDimitry Andric /// instructions next to each other. Our job is just to insert an s_clause 205ffd83dbSDimitry Andric /// instruction to mark the start of each clause. 215ffd83dbSDimitry Andric /// 225ffd83dbSDimitry Andric /// Note that hard clauses are very similar to, but logically distinct from, the 235ffd83dbSDimitry Andric /// groups of instructions that have to be restartable when XNACK is enabled. 245ffd83dbSDimitry Andric /// The rules are slightly different in each case. For example an s_nop 255ffd83dbSDimitry Andric /// instruction breaks a restartable group, but can appear in the middle of a 265ffd83dbSDimitry Andric /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 275ffd83dbSDimitry Andric /// "soft clauses" or just "clauses".) 285ffd83dbSDimitry Andric /// 295ffd83dbSDimitry Andric /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 305ffd83dbSDimitry Andric /// groups, not hard clauses. 315ffd83dbSDimitry Andric // 325ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 335ffd83dbSDimitry Andric 34e8d8bef9SDimitry Andric #include "AMDGPU.h" 35e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 36e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 375ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h" 3881ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 395ffd83dbSDimitry Andric 405ffd83dbSDimitry Andric using namespace llvm; 415ffd83dbSDimitry Andric 425ffd83dbSDimitry Andric #define DEBUG_TYPE "si-insert-hard-clauses" 435ffd83dbSDimitry Andric 445ffd83dbSDimitry Andric namespace { 455ffd83dbSDimitry Andric 4681ad6265SDimitry Andric // A clause length of 64 instructions could be encoded in the s_clause 4781ad6265SDimitry Andric // instruction, but the hardware documentation (at least for GFX11) says that 4881ad6265SDimitry Andric // 63 is the maximum allowed. 4981ad6265SDimitry Andric constexpr unsigned MaxInstructionsInClause = 63; 5081ad6265SDimitry Andric 515ffd83dbSDimitry Andric enum HardClauseType { 5281ad6265SDimitry Andric // For GFX10: 5381ad6265SDimitry Andric 545ffd83dbSDimitry Andric // Texture, buffer, global or scratch memory instructions. 555ffd83dbSDimitry Andric HARDCLAUSE_VMEM, 565ffd83dbSDimitry Andric // Flat (not global or scratch) memory instructions. 575ffd83dbSDimitry Andric HARDCLAUSE_FLAT, 5881ad6265SDimitry Andric 5981ad6265SDimitry Andric // For GFX11: 6081ad6265SDimitry Andric 6181ad6265SDimitry Andric // Texture memory instructions. 6281ad6265SDimitry Andric HARDCLAUSE_MIMG_LOAD, 6381ad6265SDimitry Andric HARDCLAUSE_MIMG_STORE, 6481ad6265SDimitry Andric HARDCLAUSE_MIMG_ATOMIC, 6581ad6265SDimitry Andric HARDCLAUSE_MIMG_SAMPLE, 6681ad6265SDimitry Andric // Buffer, global or scratch memory instructions. 6781ad6265SDimitry Andric HARDCLAUSE_VMEM_LOAD, 6881ad6265SDimitry Andric HARDCLAUSE_VMEM_STORE, 6981ad6265SDimitry Andric HARDCLAUSE_VMEM_ATOMIC, 7081ad6265SDimitry Andric // Flat (not global or scratch) memory instructions. 7181ad6265SDimitry Andric HARDCLAUSE_FLAT_LOAD, 7281ad6265SDimitry Andric HARDCLAUSE_FLAT_STORE, 7381ad6265SDimitry Andric HARDCLAUSE_FLAT_ATOMIC, 7481ad6265SDimitry Andric // BVH instructions. 7581ad6265SDimitry Andric HARDCLAUSE_BVH, 7681ad6265SDimitry Andric 7781ad6265SDimitry Andric // Common: 7881ad6265SDimitry Andric 795ffd83dbSDimitry Andric // Instructions that access LDS. 805ffd83dbSDimitry Andric HARDCLAUSE_LDS, 815ffd83dbSDimitry Andric // Scalar memory instructions. 825ffd83dbSDimitry Andric HARDCLAUSE_SMEM, 835ffd83dbSDimitry Andric // VALU instructions. 845ffd83dbSDimitry Andric HARDCLAUSE_VALU, 855ffd83dbSDimitry Andric LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 865ffd83dbSDimitry Andric 875ffd83dbSDimitry Andric // Internal instructions, which are allowed in the middle of a hard clause, 885ffd83dbSDimitry Andric // except for s_waitcnt. 895ffd83dbSDimitry Andric HARDCLAUSE_INTERNAL, 90349cc55cSDimitry Andric // Meta instructions that do not result in any ISA like KILL. 91349cc55cSDimitry Andric HARDCLAUSE_IGNORE, 925ffd83dbSDimitry Andric // Instructions that are not allowed in a hard clause: SALU, export, branch, 935ffd83dbSDimitry Andric // message, GDS, s_waitcnt and anything else not mentioned above. 945ffd83dbSDimitry Andric HARDCLAUSE_ILLEGAL, 955ffd83dbSDimitry Andric }; 965ffd83dbSDimitry Andric 97fe6060f1SDimitry Andric class SIInsertHardClauses : public MachineFunctionPass { 98fe6060f1SDimitry Andric public: 99fe6060f1SDimitry Andric static char ID; 100fe6060f1SDimitry Andric const GCNSubtarget *ST = nullptr; 101fe6060f1SDimitry Andric 102fe6060f1SDimitry Andric SIInsertHardClauses() : MachineFunctionPass(ID) {} 103fe6060f1SDimitry Andric 104fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 105fe6060f1SDimitry Andric AU.setPreservesCFG(); 106fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 107fe6060f1SDimitry Andric } 108fe6060f1SDimitry Andric 1095ffd83dbSDimitry Andric HardClauseType getHardClauseType(const MachineInstr &MI) { 11081ad6265SDimitry Andric if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) { 11181ad6265SDimitry Andric if (ST->getGeneration() == AMDGPUSubtarget::GFX10) { 112fe6060f1SDimitry Andric if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { 113fe6060f1SDimitry Andric if (ST->hasNSAClauseBug()) { 114fe6060f1SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 115fe6060f1SDimitry Andric if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) 116fe6060f1SDimitry Andric return HARDCLAUSE_ILLEGAL; 117fe6060f1SDimitry Andric } 1185ffd83dbSDimitry Andric return HARDCLAUSE_VMEM; 119fe6060f1SDimitry Andric } 1205ffd83dbSDimitry Andric if (SIInstrInfo::isFLAT(MI)) 1215ffd83dbSDimitry Andric return HARDCLAUSE_FLAT; 12281ad6265SDimitry Andric } else { 12381ad6265SDimitry Andric assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11); 12481ad6265SDimitry Andric if (SIInstrInfo::isMIMG(MI)) { 12581ad6265SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 12681ad6265SDimitry Andric const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo = 12781ad6265SDimitry Andric AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 12881ad6265SDimitry Andric if (BaseInfo->BVH) 12981ad6265SDimitry Andric return HARDCLAUSE_BVH; 13081ad6265SDimitry Andric if (BaseInfo->Sampler) 13181ad6265SDimitry Andric return HARDCLAUSE_MIMG_SAMPLE; 13281ad6265SDimitry Andric return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC 13381ad6265SDimitry Andric : HARDCLAUSE_MIMG_LOAD 13481ad6265SDimitry Andric : HARDCLAUSE_MIMG_STORE; 13581ad6265SDimitry Andric } 13681ad6265SDimitry Andric if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { 13781ad6265SDimitry Andric return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC 13881ad6265SDimitry Andric : HARDCLAUSE_VMEM_LOAD 13981ad6265SDimitry Andric : HARDCLAUSE_VMEM_STORE; 14081ad6265SDimitry Andric } 14181ad6265SDimitry Andric if (SIInstrInfo::isFLAT(MI)) { 14281ad6265SDimitry Andric return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC 14381ad6265SDimitry Andric : HARDCLAUSE_FLAT_LOAD 14481ad6265SDimitry Andric : HARDCLAUSE_FLAT_STORE; 14581ad6265SDimitry Andric } 14681ad6265SDimitry Andric } 1475ffd83dbSDimitry Andric // TODO: LDS 1485ffd83dbSDimitry Andric if (SIInstrInfo::isSMRD(MI)) 1495ffd83dbSDimitry Andric return HARDCLAUSE_SMEM; 1505ffd83dbSDimitry Andric } 1515ffd83dbSDimitry Andric 1525ffd83dbSDimitry Andric // Don't form VALU clauses. It's not clear what benefit they give, if any. 1535ffd83dbSDimitry Andric 1545ffd83dbSDimitry Andric // In practice s_nop is the only internal instruction we're likely to see. 1555ffd83dbSDimitry Andric // It's safe to treat the rest as illegal. 1565ffd83dbSDimitry Andric if (MI.getOpcode() == AMDGPU::S_NOP) 1575ffd83dbSDimitry Andric return HARDCLAUSE_INTERNAL; 158349cc55cSDimitry Andric if (MI.isMetaInstruction()) 159349cc55cSDimitry Andric return HARDCLAUSE_IGNORE; 1605ffd83dbSDimitry Andric return HARDCLAUSE_ILLEGAL; 1615ffd83dbSDimitry Andric } 1625ffd83dbSDimitry Andric 1635ffd83dbSDimitry Andric // Track information about a clause as we discover it. 1645ffd83dbSDimitry Andric struct ClauseInfo { 1655ffd83dbSDimitry Andric // The type of all (non-internal) instructions in the clause. 1665ffd83dbSDimitry Andric HardClauseType Type = HARDCLAUSE_ILLEGAL; 1675ffd83dbSDimitry Andric // The first (necessarily non-internal) instruction in the clause. 1685ffd83dbSDimitry Andric MachineInstr *First = nullptr; 1695ffd83dbSDimitry Andric // The last non-internal instruction in the clause. 1705ffd83dbSDimitry Andric MachineInstr *Last = nullptr; 1715ffd83dbSDimitry Andric // The length of the clause including any internal instructions in the 172349cc55cSDimitry Andric // middle (but not at the end) of the clause. 1735ffd83dbSDimitry Andric unsigned Length = 0; 174349cc55cSDimitry Andric // Internal instructions at the and of a clause should not be included in 175349cc55cSDimitry Andric // the clause. Count them in TrailingInternalLength until a new memory 176349cc55cSDimitry Andric // instruction is added. 177349cc55cSDimitry Andric unsigned TrailingInternalLength = 0; 1785ffd83dbSDimitry Andric // The base operands of *Last. 1795ffd83dbSDimitry Andric SmallVector<const MachineOperand *, 4> BaseOps; 1805ffd83dbSDimitry Andric }; 1815ffd83dbSDimitry Andric 1825ffd83dbSDimitry Andric bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 183349cc55cSDimitry Andric if (CI.First == CI.Last) 1845ffd83dbSDimitry Andric return false; 18581ad6265SDimitry Andric assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!"); 1865ffd83dbSDimitry Andric 1875ffd83dbSDimitry Andric auto &MBB = *CI.First->getParent(); 1885ffd83dbSDimitry Andric auto ClauseMI = 1895ffd83dbSDimitry Andric BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 190349cc55cSDimitry Andric .addImm(CI.Length - 1); 1915ffd83dbSDimitry Andric finalizeBundle(MBB, ClauseMI->getIterator(), 1925ffd83dbSDimitry Andric std::next(CI.Last->getIterator())); 1935ffd83dbSDimitry Andric return true; 1945ffd83dbSDimitry Andric } 1955ffd83dbSDimitry Andric 1965ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override { 1975ffd83dbSDimitry Andric if (skipFunction(MF.getFunction())) 1985ffd83dbSDimitry Andric return false; 1995ffd83dbSDimitry Andric 200fe6060f1SDimitry Andric ST = &MF.getSubtarget<GCNSubtarget>(); 201fe6060f1SDimitry Andric if (!ST->hasHardClauses()) 2025ffd83dbSDimitry Andric return false; 2035ffd83dbSDimitry Andric 204fe6060f1SDimitry Andric const SIInstrInfo *SII = ST->getInstrInfo(); 205fe6060f1SDimitry Andric const TargetRegisterInfo *TRI = ST->getRegisterInfo(); 2065ffd83dbSDimitry Andric 2075ffd83dbSDimitry Andric bool Changed = false; 2085ffd83dbSDimitry Andric for (auto &MBB : MF) { 2095ffd83dbSDimitry Andric ClauseInfo CI; 2105ffd83dbSDimitry Andric for (auto &MI : MBB) { 2115ffd83dbSDimitry Andric HardClauseType Type = getHardClauseType(MI); 2125ffd83dbSDimitry Andric 2135ffd83dbSDimitry Andric int64_t Dummy1; 2145ffd83dbSDimitry Andric bool Dummy2; 2155ffd83dbSDimitry Andric unsigned Dummy3; 2165ffd83dbSDimitry Andric SmallVector<const MachineOperand *, 4> BaseOps; 2175ffd83dbSDimitry Andric if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 2185ffd83dbSDimitry Andric if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, 2195ffd83dbSDimitry Andric Dummy3, TRI)) { 2205ffd83dbSDimitry Andric // We failed to get the base operands, so we'll never clause this 2215ffd83dbSDimitry Andric // instruction with any other, so pretend it's illegal. 2225ffd83dbSDimitry Andric Type = HARDCLAUSE_ILLEGAL; 2235ffd83dbSDimitry Andric } 2245ffd83dbSDimitry Andric } 2255ffd83dbSDimitry Andric 22681ad6265SDimitry Andric if (CI.Length == MaxInstructionsInClause || 2275ffd83dbSDimitry Andric (CI.Length && Type != HARDCLAUSE_INTERNAL && 228349cc55cSDimitry Andric Type != HARDCLAUSE_IGNORE && 2295ffd83dbSDimitry Andric (Type != CI.Type || 2305ffd83dbSDimitry Andric // Note that we lie to shouldClusterMemOps about the size of the 2315ffd83dbSDimitry Andric // cluster. When shouldClusterMemOps is called from the machine 2325ffd83dbSDimitry Andric // scheduler it limits the size of the cluster to avoid increasing 2335ffd83dbSDimitry Andric // register pressure too much, but this pass runs after register 2345ffd83dbSDimitry Andric // allocation so there is no need for that kind of limit. 235*5f757f3fSDimitry Andric // We also lie about the Offset and OffsetIsScalable parameters, 236*5f757f3fSDimitry Andric // as they aren't used in the SIInstrInfo implementation. 237*5f757f3fSDimitry Andric !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false, 238*5f757f3fSDimitry Andric 2, 2)))) { 2395ffd83dbSDimitry Andric // Finish the current clause. 2405ffd83dbSDimitry Andric Changed |= emitClause(CI, SII); 2415ffd83dbSDimitry Andric CI = ClauseInfo(); 2425ffd83dbSDimitry Andric } 2435ffd83dbSDimitry Andric 2445ffd83dbSDimitry Andric if (CI.Length) { 2455ffd83dbSDimitry Andric // Extend the current clause. 246349cc55cSDimitry Andric if (Type != HARDCLAUSE_IGNORE) { 247349cc55cSDimitry Andric if (Type == HARDCLAUSE_INTERNAL) { 248349cc55cSDimitry Andric ++CI.TrailingInternalLength; 249349cc55cSDimitry Andric } else { 2505ffd83dbSDimitry Andric ++CI.Length; 251349cc55cSDimitry Andric CI.Length += CI.TrailingInternalLength; 252349cc55cSDimitry Andric CI.TrailingInternalLength = 0; 2535ffd83dbSDimitry Andric CI.Last = &MI; 2545ffd83dbSDimitry Andric CI.BaseOps = std::move(BaseOps); 2555ffd83dbSDimitry Andric } 256349cc55cSDimitry Andric } 2575ffd83dbSDimitry Andric } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 2585ffd83dbSDimitry Andric // Start a new clause. 259349cc55cSDimitry Andric CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)}; 2605ffd83dbSDimitry Andric } 2615ffd83dbSDimitry Andric } 2625ffd83dbSDimitry Andric 2635ffd83dbSDimitry Andric // Finish the last clause in the basic block if any. 2645ffd83dbSDimitry Andric if (CI.Length) 2655ffd83dbSDimitry Andric Changed |= emitClause(CI, SII); 2665ffd83dbSDimitry Andric } 2675ffd83dbSDimitry Andric 2685ffd83dbSDimitry Andric return Changed; 2695ffd83dbSDimitry Andric } 2705ffd83dbSDimitry Andric }; 2715ffd83dbSDimitry Andric 2725ffd83dbSDimitry Andric } // namespace 2735ffd83dbSDimitry Andric 2745ffd83dbSDimitry Andric char SIInsertHardClauses::ID = 0; 2755ffd83dbSDimitry Andric 2765ffd83dbSDimitry Andric char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; 2775ffd83dbSDimitry Andric 2785ffd83dbSDimitry Andric INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", 2795ffd83dbSDimitry Andric false, false) 280