1*5ffd83dbSDimitry Andric //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 2*5ffd83dbSDimitry Andric // 3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5ffd83dbSDimitry Andric // 7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 8*5ffd83dbSDimitry Andric // 9*5ffd83dbSDimitry Andric /// \file 10*5ffd83dbSDimitry Andric /// Insert s_clause instructions to form hard clauses. 11*5ffd83dbSDimitry Andric /// 12*5ffd83dbSDimitry Andric /// Clausing load instructions can give cache coherency benefits. Before gfx10, 13*5ffd83dbSDimitry Andric /// the hardware automatically detected "soft clauses", which were sequences of 14*5ffd83dbSDimitry Andric /// memory instructions of the same type. In gfx10 this detection was removed, 15*5ffd83dbSDimitry Andric /// and the s_clause instruction was introduced to explicitly mark "hard 16*5ffd83dbSDimitry Andric /// clauses". 17*5ffd83dbSDimitry Andric /// 18*5ffd83dbSDimitry Andric /// It's the scheduler's job to form the clauses by putting similar memory 19*5ffd83dbSDimitry Andric /// instructions next to each other. Our job is just to insert an s_clause 20*5ffd83dbSDimitry Andric /// instruction to mark the start of each clause. 21*5ffd83dbSDimitry Andric /// 22*5ffd83dbSDimitry Andric /// Note that hard clauses are very similar to, but logically distinct from, the 23*5ffd83dbSDimitry Andric /// groups of instructions that have to be restartable when XNACK is enabled. 24*5ffd83dbSDimitry Andric /// The rules are slightly different in each case. For example an s_nop 25*5ffd83dbSDimitry Andric /// instruction breaks a restartable group, but can appear in the middle of a 26*5ffd83dbSDimitry Andric /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 27*5ffd83dbSDimitry Andric /// "soft clauses" or just "clauses".) 28*5ffd83dbSDimitry Andric /// 29*5ffd83dbSDimitry Andric /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 30*5ffd83dbSDimitry Andric /// groups, not hard clauses. 31*5ffd83dbSDimitry Andric // 32*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 33*5ffd83dbSDimitry Andric 34*5ffd83dbSDimitry Andric #include "AMDGPUSubtarget.h" 35*5ffd83dbSDimitry Andric #include "SIInstrInfo.h" 36*5ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h" 37*5ffd83dbSDimitry Andric 38*5ffd83dbSDimitry Andric using namespace llvm; 39*5ffd83dbSDimitry Andric 40*5ffd83dbSDimitry Andric #define DEBUG_TYPE "si-insert-hard-clauses" 41*5ffd83dbSDimitry Andric 42*5ffd83dbSDimitry Andric namespace { 43*5ffd83dbSDimitry Andric 44*5ffd83dbSDimitry Andric enum HardClauseType { 45*5ffd83dbSDimitry Andric // Texture, buffer, global or scratch memory instructions. 46*5ffd83dbSDimitry Andric HARDCLAUSE_VMEM, 47*5ffd83dbSDimitry Andric // Flat (not global or scratch) memory instructions. 48*5ffd83dbSDimitry Andric HARDCLAUSE_FLAT, 49*5ffd83dbSDimitry Andric // Instructions that access LDS. 50*5ffd83dbSDimitry Andric HARDCLAUSE_LDS, 51*5ffd83dbSDimitry Andric // Scalar memory instructions. 52*5ffd83dbSDimitry Andric HARDCLAUSE_SMEM, 53*5ffd83dbSDimitry Andric // VALU instructions. 54*5ffd83dbSDimitry Andric HARDCLAUSE_VALU, 55*5ffd83dbSDimitry Andric LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 56*5ffd83dbSDimitry Andric 57*5ffd83dbSDimitry Andric // Internal instructions, which are allowed in the middle of a hard clause, 58*5ffd83dbSDimitry Andric // except for s_waitcnt. 59*5ffd83dbSDimitry Andric HARDCLAUSE_INTERNAL, 60*5ffd83dbSDimitry Andric // Instructions that are not allowed in a hard clause: SALU, export, branch, 61*5ffd83dbSDimitry Andric // message, GDS, s_waitcnt and anything else not mentioned above. 62*5ffd83dbSDimitry Andric HARDCLAUSE_ILLEGAL, 63*5ffd83dbSDimitry Andric }; 64*5ffd83dbSDimitry Andric 65*5ffd83dbSDimitry Andric HardClauseType getHardClauseType(const MachineInstr &MI) { 66*5ffd83dbSDimitry Andric // On current architectures we only get a benefit from clausing loads. 67*5ffd83dbSDimitry Andric if (MI.mayLoad()) { 68*5ffd83dbSDimitry Andric if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) 69*5ffd83dbSDimitry Andric return HARDCLAUSE_VMEM; 70*5ffd83dbSDimitry Andric if (SIInstrInfo::isFLAT(MI)) 71*5ffd83dbSDimitry Andric return HARDCLAUSE_FLAT; 72*5ffd83dbSDimitry Andric // TODO: LDS 73*5ffd83dbSDimitry Andric if (SIInstrInfo::isSMRD(MI)) 74*5ffd83dbSDimitry Andric return HARDCLAUSE_SMEM; 75*5ffd83dbSDimitry Andric } 76*5ffd83dbSDimitry Andric 77*5ffd83dbSDimitry Andric // Don't form VALU clauses. It's not clear what benefit they give, if any. 78*5ffd83dbSDimitry Andric 79*5ffd83dbSDimitry Andric // In practice s_nop is the only internal instruction we're likely to see. 80*5ffd83dbSDimitry Andric // It's safe to treat the rest as illegal. 81*5ffd83dbSDimitry Andric if (MI.getOpcode() == AMDGPU::S_NOP) 82*5ffd83dbSDimitry Andric return HARDCLAUSE_INTERNAL; 83*5ffd83dbSDimitry Andric return HARDCLAUSE_ILLEGAL; 84*5ffd83dbSDimitry Andric } 85*5ffd83dbSDimitry Andric 86*5ffd83dbSDimitry Andric class SIInsertHardClauses : public MachineFunctionPass { 87*5ffd83dbSDimitry Andric public: 88*5ffd83dbSDimitry Andric static char ID; 89*5ffd83dbSDimitry Andric 90*5ffd83dbSDimitry Andric SIInsertHardClauses() : MachineFunctionPass(ID) {} 91*5ffd83dbSDimitry Andric 92*5ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 93*5ffd83dbSDimitry Andric AU.setPreservesCFG(); 94*5ffd83dbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 95*5ffd83dbSDimitry Andric } 96*5ffd83dbSDimitry Andric 97*5ffd83dbSDimitry Andric // Track information about a clause as we discover it. 98*5ffd83dbSDimitry Andric struct ClauseInfo { 99*5ffd83dbSDimitry Andric // The type of all (non-internal) instructions in the clause. 100*5ffd83dbSDimitry Andric HardClauseType Type = HARDCLAUSE_ILLEGAL; 101*5ffd83dbSDimitry Andric // The first (necessarily non-internal) instruction in the clause. 102*5ffd83dbSDimitry Andric MachineInstr *First = nullptr; 103*5ffd83dbSDimitry Andric // The last non-internal instruction in the clause. 104*5ffd83dbSDimitry Andric MachineInstr *Last = nullptr; 105*5ffd83dbSDimitry Andric // The length of the clause including any internal instructions in the 106*5ffd83dbSDimitry Andric // middle or after the end of the clause. 107*5ffd83dbSDimitry Andric unsigned Length = 0; 108*5ffd83dbSDimitry Andric // The base operands of *Last. 109*5ffd83dbSDimitry Andric SmallVector<const MachineOperand *, 4> BaseOps; 110*5ffd83dbSDimitry Andric }; 111*5ffd83dbSDimitry Andric 112*5ffd83dbSDimitry Andric bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 113*5ffd83dbSDimitry Andric // Get the size of the clause excluding any internal instructions at the 114*5ffd83dbSDimitry Andric // end. 115*5ffd83dbSDimitry Andric unsigned Size = 116*5ffd83dbSDimitry Andric std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1; 117*5ffd83dbSDimitry Andric if (Size < 2) 118*5ffd83dbSDimitry Andric return false; 119*5ffd83dbSDimitry Andric assert(Size <= 64 && "Hard clause is too long!"); 120*5ffd83dbSDimitry Andric 121*5ffd83dbSDimitry Andric auto &MBB = *CI.First->getParent(); 122*5ffd83dbSDimitry Andric auto ClauseMI = 123*5ffd83dbSDimitry Andric BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 124*5ffd83dbSDimitry Andric .addImm(Size - 1); 125*5ffd83dbSDimitry Andric finalizeBundle(MBB, ClauseMI->getIterator(), 126*5ffd83dbSDimitry Andric std::next(CI.Last->getIterator())); 127*5ffd83dbSDimitry Andric return true; 128*5ffd83dbSDimitry Andric } 129*5ffd83dbSDimitry Andric 130*5ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override { 131*5ffd83dbSDimitry Andric if (skipFunction(MF.getFunction())) 132*5ffd83dbSDimitry Andric return false; 133*5ffd83dbSDimitry Andric 134*5ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 135*5ffd83dbSDimitry Andric if (!ST.hasHardClauses()) 136*5ffd83dbSDimitry Andric return false; 137*5ffd83dbSDimitry Andric 138*5ffd83dbSDimitry Andric const SIInstrInfo *SII = ST.getInstrInfo(); 139*5ffd83dbSDimitry Andric const TargetRegisterInfo *TRI = ST.getRegisterInfo(); 140*5ffd83dbSDimitry Andric 141*5ffd83dbSDimitry Andric bool Changed = false; 142*5ffd83dbSDimitry Andric for (auto &MBB : MF) { 143*5ffd83dbSDimitry Andric ClauseInfo CI; 144*5ffd83dbSDimitry Andric for (auto &MI : MBB) { 145*5ffd83dbSDimitry Andric HardClauseType Type = getHardClauseType(MI); 146*5ffd83dbSDimitry Andric 147*5ffd83dbSDimitry Andric int64_t Dummy1; 148*5ffd83dbSDimitry Andric bool Dummy2; 149*5ffd83dbSDimitry Andric unsigned Dummy3; 150*5ffd83dbSDimitry Andric SmallVector<const MachineOperand *, 4> BaseOps; 151*5ffd83dbSDimitry Andric if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 152*5ffd83dbSDimitry Andric if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, 153*5ffd83dbSDimitry Andric Dummy3, TRI)) { 154*5ffd83dbSDimitry Andric // We failed to get the base operands, so we'll never clause this 155*5ffd83dbSDimitry Andric // instruction with any other, so pretend it's illegal. 156*5ffd83dbSDimitry Andric Type = HARDCLAUSE_ILLEGAL; 157*5ffd83dbSDimitry Andric } 158*5ffd83dbSDimitry Andric } 159*5ffd83dbSDimitry Andric 160*5ffd83dbSDimitry Andric if (CI.Length == 64 || 161*5ffd83dbSDimitry Andric (CI.Length && Type != HARDCLAUSE_INTERNAL && 162*5ffd83dbSDimitry Andric (Type != CI.Type || 163*5ffd83dbSDimitry Andric // Note that we lie to shouldClusterMemOps about the size of the 164*5ffd83dbSDimitry Andric // cluster. When shouldClusterMemOps is called from the machine 165*5ffd83dbSDimitry Andric // scheduler it limits the size of the cluster to avoid increasing 166*5ffd83dbSDimitry Andric // register pressure too much, but this pass runs after register 167*5ffd83dbSDimitry Andric // allocation so there is no need for that kind of limit. 168*5ffd83dbSDimitry Andric !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) { 169*5ffd83dbSDimitry Andric // Finish the current clause. 170*5ffd83dbSDimitry Andric Changed |= emitClause(CI, SII); 171*5ffd83dbSDimitry Andric CI = ClauseInfo(); 172*5ffd83dbSDimitry Andric } 173*5ffd83dbSDimitry Andric 174*5ffd83dbSDimitry Andric if (CI.Length) { 175*5ffd83dbSDimitry Andric // Extend the current clause. 176*5ffd83dbSDimitry Andric ++CI.Length; 177*5ffd83dbSDimitry Andric if (Type != HARDCLAUSE_INTERNAL) { 178*5ffd83dbSDimitry Andric CI.Last = &MI; 179*5ffd83dbSDimitry Andric CI.BaseOps = std::move(BaseOps); 180*5ffd83dbSDimitry Andric } 181*5ffd83dbSDimitry Andric } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 182*5ffd83dbSDimitry Andric // Start a new clause. 183*5ffd83dbSDimitry Andric CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)}; 184*5ffd83dbSDimitry Andric } 185*5ffd83dbSDimitry Andric } 186*5ffd83dbSDimitry Andric 187*5ffd83dbSDimitry Andric // Finish the last clause in the basic block if any. 188*5ffd83dbSDimitry Andric if (CI.Length) 189*5ffd83dbSDimitry Andric Changed |= emitClause(CI, SII); 190*5ffd83dbSDimitry Andric } 191*5ffd83dbSDimitry Andric 192*5ffd83dbSDimitry Andric return Changed; 193*5ffd83dbSDimitry Andric } 194*5ffd83dbSDimitry Andric }; 195*5ffd83dbSDimitry Andric 196*5ffd83dbSDimitry Andric } // namespace 197*5ffd83dbSDimitry Andric 198*5ffd83dbSDimitry Andric char SIInsertHardClauses::ID = 0; 199*5ffd83dbSDimitry Andric 200*5ffd83dbSDimitry Andric char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; 201*5ffd83dbSDimitry Andric 202*5ffd83dbSDimitry Andric INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", 203*5ffd83dbSDimitry Andric false, false) 204