1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Insert s_clause instructions to form hard clauses. 11 /// 12 /// Clausing load instructions can give cache coherency benefits. Before gfx10, 13 /// the hardware automatically detected "soft clauses", which were sequences of 14 /// memory instructions of the same type. In gfx10 this detection was removed, 15 /// and the s_clause instruction was introduced to explicitly mark "hard 16 /// clauses". 17 /// 18 /// It's the scheduler's job to form the clauses by putting similar memory 19 /// instructions next to each other. Our job is just to insert an s_clause 20 /// instruction to mark the start of each clause. 21 /// 22 /// Note that hard clauses are very similar to, but logically distinct from, the 23 /// groups of instructions that have to be restartable when XNACK is enabled. 24 /// The rules are slightly different in each case. For example an s_nop 25 /// instruction breaks a restartable group, but can appear in the middle of a 26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 27 /// "soft clauses" or just "clauses".) 28 /// 29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 30 /// groups, not hard clauses. 31 // 32 //===----------------------------------------------------------------------===// 33 34 #include "AMDGPU.h" 35 #include "GCNSubtarget.h" 36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 37 #include "llvm/ADT/SmallVector.h" 38 #include "llvm/CodeGen/MachineFunctionPass.h" 39 #include "llvm/CodeGen/MachinePassManager.h" 40 41 using namespace llvm; 42 43 #define DEBUG_TYPE "si-insert-hard-clauses" 44 45 static cl::opt<unsigned> 46 HardClauseLengthLimit("amdgpu-hard-clause-length-limit", 47 cl::desc("Maximum number of memory instructions to " 48 "place in the same hard clause"), 49 cl::Hidden); 50 51 namespace { 52 53 enum HardClauseType { 54 // For GFX10: 55 56 // Texture, buffer, global or scratch memory instructions. 57 HARDCLAUSE_VMEM, 58 // Flat (not global or scratch) memory instructions. 59 HARDCLAUSE_FLAT, 60 61 // For GFX11: 62 63 // Texture memory instructions. 64 HARDCLAUSE_MIMG_LOAD, 65 HARDCLAUSE_MIMG_STORE, 66 HARDCLAUSE_MIMG_ATOMIC, 67 HARDCLAUSE_MIMG_SAMPLE, 68 // Buffer, global or scratch memory instructions. 69 HARDCLAUSE_VMEM_LOAD, 70 HARDCLAUSE_VMEM_STORE, 71 HARDCLAUSE_VMEM_ATOMIC, 72 // Flat (not global or scratch) memory instructions. 73 HARDCLAUSE_FLAT_LOAD, 74 HARDCLAUSE_FLAT_STORE, 75 HARDCLAUSE_FLAT_ATOMIC, 76 // BVH instructions. 77 HARDCLAUSE_BVH, 78 79 // Common: 80 81 // Instructions that access LDS. 82 HARDCLAUSE_LDS, 83 // Scalar memory instructions. 84 HARDCLAUSE_SMEM, 85 // VALU instructions. 86 HARDCLAUSE_VALU, 87 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 88 89 // Internal instructions, which are allowed in the middle of a hard clause, 90 // except for s_waitcnt. 91 HARDCLAUSE_INTERNAL, 92 // Meta instructions that do not result in any ISA like KILL. 93 HARDCLAUSE_IGNORE, 94 // Instructions that are not allowed in a hard clause: SALU, export, branch, 95 // message, GDS, s_waitcnt and anything else not mentioned above. 96 HARDCLAUSE_ILLEGAL, 97 }; 98 99 class SIInsertHardClauses { 100 public: 101 const GCNSubtarget *ST = nullptr; 102 103 HardClauseType getHardClauseType(const MachineInstr &MI) { 104 if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) { 105 if (ST->getGeneration() == AMDGPUSubtarget::GFX10) { 106 if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) || 107 SIInstrInfo::isSegmentSpecificFLAT(MI)) { 108 if (ST->hasNSAClauseBug()) { 109 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 110 if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) 111 return HARDCLAUSE_ILLEGAL; 112 } 113 return HARDCLAUSE_VMEM; 114 } 115 if (SIInstrInfo::isFLAT(MI)) 116 return HARDCLAUSE_FLAT; 117 } else { 118 assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11); 119 if (SIInstrInfo::isMIMG(MI)) { 120 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 121 const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo = 122 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 123 if (BaseInfo->BVH) 124 return HARDCLAUSE_BVH; 125 if (BaseInfo->Sampler || BaseInfo->MSAA) 126 return HARDCLAUSE_MIMG_SAMPLE; 127 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC 128 : HARDCLAUSE_MIMG_LOAD 129 : HARDCLAUSE_MIMG_STORE; 130 } 131 if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) || 132 SIInstrInfo::isSegmentSpecificFLAT(MI)) { 133 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC 134 : HARDCLAUSE_VMEM_LOAD 135 : HARDCLAUSE_VMEM_STORE; 136 } 137 if (SIInstrInfo::isFLAT(MI)) { 138 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC 139 : HARDCLAUSE_FLAT_LOAD 140 : HARDCLAUSE_FLAT_STORE; 141 } 142 } 143 // TODO: LDS 144 if (SIInstrInfo::isSMRD(MI)) 145 return HARDCLAUSE_SMEM; 146 } 147 148 // Don't form VALU clauses. It's not clear what benefit they give, if any. 149 150 // In practice s_nop is the only internal instruction we're likely to see. 151 // It's safe to treat the rest as illegal. 152 if (MI.getOpcode() == AMDGPU::S_NOP) 153 return HARDCLAUSE_INTERNAL; 154 if (MI.isMetaInstruction()) 155 return HARDCLAUSE_IGNORE; 156 return HARDCLAUSE_ILLEGAL; 157 } 158 159 // Track information about a clause as we discover it. 160 struct ClauseInfo { 161 // The type of all (non-internal) instructions in the clause. 162 HardClauseType Type = HARDCLAUSE_ILLEGAL; 163 // The first (necessarily non-internal) instruction in the clause. 164 MachineInstr *First = nullptr; 165 // The last non-internal instruction in the clause. 166 MachineInstr *Last = nullptr; 167 // The length of the clause including any internal instructions in the 168 // middle (but not at the end) of the clause. 169 unsigned Length = 0; 170 // Internal instructions at the and of a clause should not be included in 171 // the clause. Count them in TrailingInternalLength until a new memory 172 // instruction is added. 173 unsigned TrailingInternalLength = 0; 174 // The base operands of *Last. 175 SmallVector<const MachineOperand *, 4> BaseOps; 176 }; 177 178 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 179 if (CI.First == CI.Last) 180 return false; 181 assert(CI.Length <= ST->maxHardClauseLength() && 182 "Hard clause is too long!"); 183 184 auto &MBB = *CI.First->getParent(); 185 auto ClauseMI = 186 BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 187 .addImm(CI.Length - 1); 188 finalizeBundle(MBB, ClauseMI->getIterator(), 189 std::next(CI.Last->getIterator())); 190 return true; 191 } 192 193 bool run(MachineFunction &MF) { 194 ST = &MF.getSubtarget<GCNSubtarget>(); 195 if (!ST->hasHardClauses()) 196 return false; 197 198 unsigned MaxClauseLength = MF.getFunction().getFnAttributeAsParsedInteger( 199 "amdgpu-hard-clause-length-limit", 255); 200 if (HardClauseLengthLimit.getNumOccurrences()) 201 MaxClauseLength = HardClauseLengthLimit; 202 MaxClauseLength = std::min(MaxClauseLength, ST->maxHardClauseLength()); 203 if (MaxClauseLength <= 1) 204 return false; 205 206 const SIInstrInfo *SII = ST->getInstrInfo(); 207 const TargetRegisterInfo *TRI = ST->getRegisterInfo(); 208 209 bool Changed = false; 210 for (auto &MBB : MF) { 211 ClauseInfo CI; 212 for (auto &MI : MBB) { 213 HardClauseType Type = getHardClauseType(MI); 214 215 int64_t Dummy1; 216 bool Dummy2; 217 LocationSize Dummy3 = LocationSize::precise(0); 218 SmallVector<const MachineOperand *, 4> BaseOps; 219 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 220 if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, 221 Dummy3, TRI)) { 222 // We failed to get the base operands, so we'll never clause this 223 // instruction with any other, so pretend it's illegal. 224 Type = HARDCLAUSE_ILLEGAL; 225 } 226 } 227 228 if (CI.Length == MaxClauseLength || 229 (CI.Length && Type != HARDCLAUSE_INTERNAL && 230 Type != HARDCLAUSE_IGNORE && 231 (Type != CI.Type || 232 // Note that we lie to shouldClusterMemOps about the size of the 233 // cluster. When shouldClusterMemOps is called from the machine 234 // scheduler it limits the size of the cluster to avoid increasing 235 // register pressure too much, but this pass runs after register 236 // allocation so there is no need for that kind of limit. 237 // We also lie about the Offset and OffsetIsScalable parameters, 238 // as they aren't used in the SIInstrInfo implementation. 239 !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false, 240 2, 2)))) { 241 // Finish the current clause. 242 Changed |= emitClause(CI, SII); 243 CI = ClauseInfo(); 244 } 245 246 if (CI.Length) { 247 // Extend the current clause. 248 if (Type != HARDCLAUSE_IGNORE) { 249 if (Type == HARDCLAUSE_INTERNAL) { 250 ++CI.TrailingInternalLength; 251 } else { 252 ++CI.Length; 253 CI.Length += CI.TrailingInternalLength; 254 CI.TrailingInternalLength = 0; 255 CI.Last = &MI; 256 CI.BaseOps = std::move(BaseOps); 257 } 258 } 259 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 260 // Start a new clause. 261 CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)}; 262 } 263 } 264 265 // Finish the last clause in the basic block if any. 266 if (CI.Length) 267 Changed |= emitClause(CI, SII); 268 } 269 270 return Changed; 271 } 272 }; 273 274 class SIInsertHardClausesLegacy : public MachineFunctionPass { 275 public: 276 static char ID; 277 SIInsertHardClausesLegacy() : MachineFunctionPass(ID) {} 278 279 bool runOnMachineFunction(MachineFunction &MF) override { 280 if (skipFunction(MF.getFunction())) 281 return false; 282 283 return SIInsertHardClauses().run(MF); 284 } 285 286 void getAnalysisUsage(AnalysisUsage &AU) const override { 287 AU.setPreservesCFG(); 288 MachineFunctionPass::getAnalysisUsage(AU); 289 } 290 }; 291 292 } // namespace 293 294 PreservedAnalyses 295 llvm::SIInsertHardClausesPass::run(MachineFunction &MF, 296 MachineFunctionAnalysisManager &MFAM) { 297 if (!SIInsertHardClauses().run(MF)) 298 return PreservedAnalyses::all(); 299 300 auto PA = getMachineFunctionPassPreservedAnalyses(); 301 PA.preserveSet<CFGAnalyses>(); 302 return PA; 303 } 304 305 char SIInsertHardClausesLegacy::ID = 0; 306 307 char &llvm::SIInsertHardClausesID = SIInsertHardClausesLegacy::ID; 308 309 INITIALIZE_PASS(SIInsertHardClausesLegacy, DEBUG_TYPE, "SI Insert Hard Clauses", 310 false, false) 311