1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Insert s_clause instructions to form hard clauses. 11 /// 12 /// Clausing load instructions can give cache coherency benefits. Before gfx10, 13 /// the hardware automatically detected "soft clauses", which were sequences of 14 /// memory instructions of the same type. In gfx10 this detection was removed, 15 /// and the s_clause instruction was introduced to explicitly mark "hard 16 /// clauses". 17 /// 18 /// It's the scheduler's job to form the clauses by putting similar memory 19 /// instructions next to each other. Our job is just to insert an s_clause 20 /// instruction to mark the start of each clause. 21 /// 22 /// Note that hard clauses are very similar to, but logically distinct from, the 23 /// groups of instructions that have to be restartable when XNACK is enabled. 24 /// The rules are slightly different in each case. For example an s_nop 25 /// instruction breaks a restartable group, but can appear in the middle of a 26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 27 /// "soft clauses" or just "clauses".) 28 /// 29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 30 /// groups, not hard clauses. 31 // 32 //===----------------------------------------------------------------------===// 33 34 #include "AMDGPUSubtarget.h" 35 #include "SIInstrInfo.h" 36 #include "llvm/ADT/SmallVector.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "si-insert-hard-clauses" 41 42 namespace { 43 44 enum HardClauseType { 45 // Texture, buffer, global or scratch memory instructions. 46 HARDCLAUSE_VMEM, 47 // Flat (not global or scratch) memory instructions. 48 HARDCLAUSE_FLAT, 49 // Instructions that access LDS. 50 HARDCLAUSE_LDS, 51 // Scalar memory instructions. 52 HARDCLAUSE_SMEM, 53 // VALU instructions. 54 HARDCLAUSE_VALU, 55 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 56 57 // Internal instructions, which are allowed in the middle of a hard clause, 58 // except for s_waitcnt. 59 HARDCLAUSE_INTERNAL, 60 // Instructions that are not allowed in a hard clause: SALU, export, branch, 61 // message, GDS, s_waitcnt and anything else not mentioned above. 62 HARDCLAUSE_ILLEGAL, 63 }; 64 65 HardClauseType getHardClauseType(const MachineInstr &MI) { 66 // On current architectures we only get a benefit from clausing loads. 67 if (MI.mayLoad()) { 68 if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) 69 return HARDCLAUSE_VMEM; 70 if (SIInstrInfo::isFLAT(MI)) 71 return HARDCLAUSE_FLAT; 72 // TODO: LDS 73 if (SIInstrInfo::isSMRD(MI)) 74 return HARDCLAUSE_SMEM; 75 } 76 77 // Don't form VALU clauses. It's not clear what benefit they give, if any. 78 79 // In practice s_nop is the only internal instruction we're likely to see. 80 // It's safe to treat the rest as illegal. 81 if (MI.getOpcode() == AMDGPU::S_NOP) 82 return HARDCLAUSE_INTERNAL; 83 return HARDCLAUSE_ILLEGAL; 84 } 85 86 class SIInsertHardClauses : public MachineFunctionPass { 87 public: 88 static char ID; 89 90 SIInsertHardClauses() : MachineFunctionPass(ID) {} 91 92 void getAnalysisUsage(AnalysisUsage &AU) const override { 93 AU.setPreservesCFG(); 94 MachineFunctionPass::getAnalysisUsage(AU); 95 } 96 97 // Track information about a clause as we discover it. 98 struct ClauseInfo { 99 // The type of all (non-internal) instructions in the clause. 100 HardClauseType Type = HARDCLAUSE_ILLEGAL; 101 // The first (necessarily non-internal) instruction in the clause. 102 MachineInstr *First = nullptr; 103 // The last non-internal instruction in the clause. 104 MachineInstr *Last = nullptr; 105 // The length of the clause including any internal instructions in the 106 // middle or after the end of the clause. 107 unsigned Length = 0; 108 // The base operands of *Last. 109 SmallVector<const MachineOperand *, 4> BaseOps; 110 }; 111 112 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 113 // Get the size of the clause excluding any internal instructions at the 114 // end. 115 unsigned Size = 116 std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1; 117 if (Size < 2) 118 return false; 119 assert(Size <= 64 && "Hard clause is too long!"); 120 121 auto &MBB = *CI.First->getParent(); 122 auto ClauseMI = 123 BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 124 .addImm(Size - 1); 125 finalizeBundle(MBB, ClauseMI->getIterator(), 126 std::next(CI.Last->getIterator())); 127 return true; 128 } 129 130 bool runOnMachineFunction(MachineFunction &MF) override { 131 if (skipFunction(MF.getFunction())) 132 return false; 133 134 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 135 if (!ST.hasHardClauses()) 136 return false; 137 138 const SIInstrInfo *SII = ST.getInstrInfo(); 139 const TargetRegisterInfo *TRI = ST.getRegisterInfo(); 140 141 bool Changed = false; 142 for (auto &MBB : MF) { 143 ClauseInfo CI; 144 for (auto &MI : MBB) { 145 HardClauseType Type = getHardClauseType(MI); 146 147 int64_t Dummy1; 148 bool Dummy2; 149 unsigned Dummy3; 150 SmallVector<const MachineOperand *, 4> BaseOps; 151 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 152 if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, 153 Dummy3, TRI)) { 154 // We failed to get the base operands, so we'll never clause this 155 // instruction with any other, so pretend it's illegal. 156 Type = HARDCLAUSE_ILLEGAL; 157 } 158 } 159 160 if (CI.Length == 64 || 161 (CI.Length && Type != HARDCLAUSE_INTERNAL && 162 (Type != CI.Type || 163 // Note that we lie to shouldClusterMemOps about the size of the 164 // cluster. When shouldClusterMemOps is called from the machine 165 // scheduler it limits the size of the cluster to avoid increasing 166 // register pressure too much, but this pass runs after register 167 // allocation so there is no need for that kind of limit. 168 !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) { 169 // Finish the current clause. 170 Changed |= emitClause(CI, SII); 171 CI = ClauseInfo(); 172 } 173 174 if (CI.Length) { 175 // Extend the current clause. 176 ++CI.Length; 177 if (Type != HARDCLAUSE_INTERNAL) { 178 CI.Last = &MI; 179 CI.BaseOps = std::move(BaseOps); 180 } 181 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 182 // Start a new clause. 183 CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)}; 184 } 185 } 186 187 // Finish the last clause in the basic block if any. 188 if (CI.Length) 189 Changed |= emitClause(CI, SII); 190 } 191 192 return Changed; 193 } 194 }; 195 196 } // namespace 197 198 char SIInsertHardClauses::ID = 0; 199 200 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; 201 202 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", 203 false, false) 204