1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Insert s_clause instructions to form hard clauses. 11 /// 12 /// Clausing load instructions can give cache coherency benefits. Before gfx10, 13 /// the hardware automatically detected "soft clauses", which were sequences of 14 /// memory instructions of the same type. In gfx10 this detection was removed, 15 /// and the s_clause instruction was introduced to explicitly mark "hard 16 /// clauses". 17 /// 18 /// It's the scheduler's job to form the clauses by putting similar memory 19 /// instructions next to each other. Our job is just to insert an s_clause 20 /// instruction to mark the start of each clause. 21 /// 22 /// Note that hard clauses are very similar to, but logically distinct from, the 23 /// groups of instructions that have to be restartable when XNACK is enabled. 24 /// The rules are slightly different in each case. For example an s_nop 25 /// instruction breaks a restartable group, but can appear in the middle of a 26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 27 /// "soft clauses" or just "clauses".) 28 /// 29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 30 /// groups, not hard clauses. 31 // 32 //===----------------------------------------------------------------------===// 33 34 #include "AMDGPU.h" 35 #include "GCNSubtarget.h" 36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 37 #include "llvm/ADT/SmallVector.h" 38 #include "llvm/CodeGen/MachineFunctionPass.h" 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "si-insert-hard-clauses" 43 44 namespace { 45 46 // A clause length of 64 instructions could be encoded in the s_clause 47 // instruction, but the hardware documentation (at least for GFX11) says that 48 // 63 is the maximum allowed. 49 constexpr unsigned MaxInstructionsInClause = 63; 50 51 enum HardClauseType { 52 // For GFX10: 53 54 // Texture, buffer, global or scratch memory instructions. 55 HARDCLAUSE_VMEM, 56 // Flat (not global or scratch) memory instructions. 57 HARDCLAUSE_FLAT, 58 59 // For GFX11: 60 61 // Texture memory instructions. 62 HARDCLAUSE_MIMG_LOAD, 63 HARDCLAUSE_MIMG_STORE, 64 HARDCLAUSE_MIMG_ATOMIC, 65 HARDCLAUSE_MIMG_SAMPLE, 66 // Buffer, global or scratch memory instructions. 67 HARDCLAUSE_VMEM_LOAD, 68 HARDCLAUSE_VMEM_STORE, 69 HARDCLAUSE_VMEM_ATOMIC, 70 // Flat (not global or scratch) memory instructions. 71 HARDCLAUSE_FLAT_LOAD, 72 HARDCLAUSE_FLAT_STORE, 73 HARDCLAUSE_FLAT_ATOMIC, 74 // BVH instructions. 75 HARDCLAUSE_BVH, 76 77 // Common: 78 79 // Instructions that access LDS. 80 HARDCLAUSE_LDS, 81 // Scalar memory instructions. 82 HARDCLAUSE_SMEM, 83 // VALU instructions. 84 HARDCLAUSE_VALU, 85 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 86 87 // Internal instructions, which are allowed in the middle of a hard clause, 88 // except for s_waitcnt. 89 HARDCLAUSE_INTERNAL, 90 // Meta instructions that do not result in any ISA like KILL. 91 HARDCLAUSE_IGNORE, 92 // Instructions that are not allowed in a hard clause: SALU, export, branch, 93 // message, GDS, s_waitcnt and anything else not mentioned above. 94 HARDCLAUSE_ILLEGAL, 95 }; 96 97 class SIInsertHardClauses : public MachineFunctionPass { 98 public: 99 static char ID; 100 const GCNSubtarget *ST = nullptr; 101 102 SIInsertHardClauses() : MachineFunctionPass(ID) {} 103 104 void getAnalysisUsage(AnalysisUsage &AU) const override { 105 AU.setPreservesCFG(); 106 MachineFunctionPass::getAnalysisUsage(AU); 107 } 108 109 HardClauseType getHardClauseType(const MachineInstr &MI) { 110 if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) { 111 if (ST->getGeneration() == AMDGPUSubtarget::GFX10) { 112 if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { 113 if (ST->hasNSAClauseBug()) { 114 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 115 if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) 116 return HARDCLAUSE_ILLEGAL; 117 } 118 return HARDCLAUSE_VMEM; 119 } 120 if (SIInstrInfo::isFLAT(MI)) 121 return HARDCLAUSE_FLAT; 122 } else { 123 assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11); 124 if (SIInstrInfo::isMIMG(MI)) { 125 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 126 const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo = 127 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 128 if (BaseInfo->BVH) 129 return HARDCLAUSE_BVH; 130 if (BaseInfo->Sampler) 131 return HARDCLAUSE_MIMG_SAMPLE; 132 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC 133 : HARDCLAUSE_MIMG_LOAD 134 : HARDCLAUSE_MIMG_STORE; 135 } 136 if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { 137 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC 138 : HARDCLAUSE_VMEM_LOAD 139 : HARDCLAUSE_VMEM_STORE; 140 } 141 if (SIInstrInfo::isFLAT(MI)) { 142 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC 143 : HARDCLAUSE_FLAT_LOAD 144 : HARDCLAUSE_FLAT_STORE; 145 } 146 } 147 // TODO: LDS 148 if (SIInstrInfo::isSMRD(MI)) 149 return HARDCLAUSE_SMEM; 150 } 151 152 // Don't form VALU clauses. It's not clear what benefit they give, if any. 153 154 // In practice s_nop is the only internal instruction we're likely to see. 155 // It's safe to treat the rest as illegal. 156 if (MI.getOpcode() == AMDGPU::S_NOP) 157 return HARDCLAUSE_INTERNAL; 158 if (MI.isMetaInstruction()) 159 return HARDCLAUSE_IGNORE; 160 return HARDCLAUSE_ILLEGAL; 161 } 162 163 // Track information about a clause as we discover it. 164 struct ClauseInfo { 165 // The type of all (non-internal) instructions in the clause. 166 HardClauseType Type = HARDCLAUSE_ILLEGAL; 167 // The first (necessarily non-internal) instruction in the clause. 168 MachineInstr *First = nullptr; 169 // The last non-internal instruction in the clause. 170 MachineInstr *Last = nullptr; 171 // The length of the clause including any internal instructions in the 172 // middle (but not at the end) of the clause. 173 unsigned Length = 0; 174 // Internal instructions at the and of a clause should not be included in 175 // the clause. Count them in TrailingInternalLength until a new memory 176 // instruction is added. 177 unsigned TrailingInternalLength = 0; 178 // The base operands of *Last. 179 SmallVector<const MachineOperand *, 4> BaseOps; 180 }; 181 182 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 183 if (CI.First == CI.Last) 184 return false; 185 assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!"); 186 187 auto &MBB = *CI.First->getParent(); 188 auto ClauseMI = 189 BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 190 .addImm(CI.Length - 1); 191 finalizeBundle(MBB, ClauseMI->getIterator(), 192 std::next(CI.Last->getIterator())); 193 return true; 194 } 195 196 bool runOnMachineFunction(MachineFunction &MF) override { 197 if (skipFunction(MF.getFunction())) 198 return false; 199 200 ST = &MF.getSubtarget<GCNSubtarget>(); 201 if (!ST->hasHardClauses()) 202 return false; 203 204 const SIInstrInfo *SII = ST->getInstrInfo(); 205 const TargetRegisterInfo *TRI = ST->getRegisterInfo(); 206 207 bool Changed = false; 208 for (auto &MBB : MF) { 209 ClauseInfo CI; 210 for (auto &MI : MBB) { 211 HardClauseType Type = getHardClauseType(MI); 212 213 int64_t Dummy1; 214 bool Dummy2; 215 unsigned Dummy3; 216 SmallVector<const MachineOperand *, 4> BaseOps; 217 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 218 if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, 219 Dummy3, TRI)) { 220 // We failed to get the base operands, so we'll never clause this 221 // instruction with any other, so pretend it's illegal. 222 Type = HARDCLAUSE_ILLEGAL; 223 } 224 } 225 226 if (CI.Length == MaxInstructionsInClause || 227 (CI.Length && Type != HARDCLAUSE_INTERNAL && 228 Type != HARDCLAUSE_IGNORE && 229 (Type != CI.Type || 230 // Note that we lie to shouldClusterMemOps about the size of the 231 // cluster. When shouldClusterMemOps is called from the machine 232 // scheduler it limits the size of the cluster to avoid increasing 233 // register pressure too much, but this pass runs after register 234 // allocation so there is no need for that kind of limit. 235 // We also lie about the Offset and OffsetIsScalable parameters, 236 // as they aren't used in the SIInstrInfo implementation. 237 !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false, 238 2, 2)))) { 239 // Finish the current clause. 240 Changed |= emitClause(CI, SII); 241 CI = ClauseInfo(); 242 } 243 244 if (CI.Length) { 245 // Extend the current clause. 246 if (Type != HARDCLAUSE_IGNORE) { 247 if (Type == HARDCLAUSE_INTERNAL) { 248 ++CI.TrailingInternalLength; 249 } else { 250 ++CI.Length; 251 CI.Length += CI.TrailingInternalLength; 252 CI.TrailingInternalLength = 0; 253 CI.Last = &MI; 254 CI.BaseOps = std::move(BaseOps); 255 } 256 } 257 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 258 // Start a new clause. 259 CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)}; 260 } 261 } 262 263 // Finish the last clause in the basic block if any. 264 if (CI.Length) 265 Changed |= emitClause(CI, SII); 266 } 267 268 return Changed; 269 } 270 }; 271 272 } // namespace 273 274 char SIInsertHardClauses::ID = 0; 275 276 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; 277 278 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", 279 false, false) 280