15ffd83dbSDimitry Andric //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric /// \file 105ffd83dbSDimitry Andric /// Insert s_clause instructions to form hard clauses. 115ffd83dbSDimitry Andric /// 125ffd83dbSDimitry Andric /// Clausing load instructions can give cache coherency benefits. Before gfx10, 135ffd83dbSDimitry Andric /// the hardware automatically detected "soft clauses", which were sequences of 145ffd83dbSDimitry Andric /// memory instructions of the same type. In gfx10 this detection was removed, 155ffd83dbSDimitry Andric /// and the s_clause instruction was introduced to explicitly mark "hard 165ffd83dbSDimitry Andric /// clauses". 175ffd83dbSDimitry Andric /// 185ffd83dbSDimitry Andric /// It's the scheduler's job to form the clauses by putting similar memory 195ffd83dbSDimitry Andric /// instructions next to each other. Our job is just to insert an s_clause 205ffd83dbSDimitry Andric /// instruction to mark the start of each clause. 215ffd83dbSDimitry Andric /// 225ffd83dbSDimitry Andric /// Note that hard clauses are very similar to, but logically distinct from, the 235ffd83dbSDimitry Andric /// groups of instructions that have to be restartable when XNACK is enabled. 245ffd83dbSDimitry Andric /// The rules are slightly different in each case. For example an s_nop 255ffd83dbSDimitry Andric /// instruction breaks a restartable group, but can appear in the middle of a 265ffd83dbSDimitry Andric /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 275ffd83dbSDimitry Andric /// "soft clauses" or just "clauses".) 285ffd83dbSDimitry Andric /// 295ffd83dbSDimitry Andric /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 305ffd83dbSDimitry Andric /// groups, not hard clauses. 315ffd83dbSDimitry Andric // 325ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 335ffd83dbSDimitry Andric 34*e8d8bef9SDimitry Andric #include "AMDGPU.h" 35*e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 36*e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 375ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h" 385ffd83dbSDimitry Andric 395ffd83dbSDimitry Andric using namespace llvm; 405ffd83dbSDimitry Andric 415ffd83dbSDimitry Andric #define DEBUG_TYPE "si-insert-hard-clauses" 425ffd83dbSDimitry Andric 435ffd83dbSDimitry Andric namespace { 445ffd83dbSDimitry Andric 455ffd83dbSDimitry Andric enum HardClauseType { 465ffd83dbSDimitry Andric // Texture, buffer, global or scratch memory instructions. 475ffd83dbSDimitry Andric HARDCLAUSE_VMEM, 485ffd83dbSDimitry Andric // Flat (not global or scratch) memory instructions. 495ffd83dbSDimitry Andric HARDCLAUSE_FLAT, 505ffd83dbSDimitry Andric // Instructions that access LDS. 515ffd83dbSDimitry Andric HARDCLAUSE_LDS, 525ffd83dbSDimitry Andric // Scalar memory instructions. 535ffd83dbSDimitry Andric HARDCLAUSE_SMEM, 545ffd83dbSDimitry Andric // VALU instructions. 555ffd83dbSDimitry Andric HARDCLAUSE_VALU, 565ffd83dbSDimitry Andric LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 575ffd83dbSDimitry Andric 585ffd83dbSDimitry Andric // Internal instructions, which are allowed in the middle of a hard clause, 595ffd83dbSDimitry Andric // except for s_waitcnt. 605ffd83dbSDimitry Andric HARDCLAUSE_INTERNAL, 615ffd83dbSDimitry Andric // Instructions that are not allowed in a hard clause: SALU, export, branch, 625ffd83dbSDimitry Andric // message, GDS, s_waitcnt and anything else not mentioned above. 635ffd83dbSDimitry Andric HARDCLAUSE_ILLEGAL, 645ffd83dbSDimitry Andric }; 655ffd83dbSDimitry Andric 665ffd83dbSDimitry Andric HardClauseType getHardClauseType(const MachineInstr &MI) { 675ffd83dbSDimitry Andric // On current architectures we only get a benefit from clausing loads. 685ffd83dbSDimitry Andric if (MI.mayLoad()) { 695ffd83dbSDimitry Andric if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) 705ffd83dbSDimitry Andric return HARDCLAUSE_VMEM; 715ffd83dbSDimitry Andric if (SIInstrInfo::isFLAT(MI)) 725ffd83dbSDimitry Andric return HARDCLAUSE_FLAT; 735ffd83dbSDimitry Andric // TODO: LDS 745ffd83dbSDimitry Andric if (SIInstrInfo::isSMRD(MI)) 755ffd83dbSDimitry Andric return HARDCLAUSE_SMEM; 765ffd83dbSDimitry Andric } 775ffd83dbSDimitry Andric 785ffd83dbSDimitry Andric // Don't form VALU clauses. It's not clear what benefit they give, if any. 795ffd83dbSDimitry Andric 805ffd83dbSDimitry Andric // In practice s_nop is the only internal instruction we're likely to see. 815ffd83dbSDimitry Andric // It's safe to treat the rest as illegal. 825ffd83dbSDimitry Andric if (MI.getOpcode() == AMDGPU::S_NOP) 835ffd83dbSDimitry Andric return HARDCLAUSE_INTERNAL; 845ffd83dbSDimitry Andric return HARDCLAUSE_ILLEGAL; 855ffd83dbSDimitry Andric } 865ffd83dbSDimitry Andric 875ffd83dbSDimitry Andric class SIInsertHardClauses : public MachineFunctionPass { 885ffd83dbSDimitry Andric public: 895ffd83dbSDimitry Andric static char ID; 905ffd83dbSDimitry Andric 915ffd83dbSDimitry Andric SIInsertHardClauses() : MachineFunctionPass(ID) {} 925ffd83dbSDimitry Andric 935ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 945ffd83dbSDimitry Andric AU.setPreservesCFG(); 955ffd83dbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 965ffd83dbSDimitry Andric } 975ffd83dbSDimitry Andric 985ffd83dbSDimitry Andric // Track information about a clause as we discover it. 995ffd83dbSDimitry Andric struct ClauseInfo { 1005ffd83dbSDimitry Andric // The type of all (non-internal) instructions in the clause. 1015ffd83dbSDimitry Andric HardClauseType Type = HARDCLAUSE_ILLEGAL; 1025ffd83dbSDimitry Andric // The first (necessarily non-internal) instruction in the clause. 1035ffd83dbSDimitry Andric MachineInstr *First = nullptr; 1045ffd83dbSDimitry Andric // The last non-internal instruction in the clause. 1055ffd83dbSDimitry Andric MachineInstr *Last = nullptr; 1065ffd83dbSDimitry Andric // The length of the clause including any internal instructions in the 1075ffd83dbSDimitry Andric // middle or after the end of the clause. 1085ffd83dbSDimitry Andric unsigned Length = 0; 1095ffd83dbSDimitry Andric // The base operands of *Last. 1105ffd83dbSDimitry Andric SmallVector<const MachineOperand *, 4> BaseOps; 1115ffd83dbSDimitry Andric }; 1125ffd83dbSDimitry Andric 1135ffd83dbSDimitry Andric bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 1145ffd83dbSDimitry Andric // Get the size of the clause excluding any internal instructions at the 1155ffd83dbSDimitry Andric // end. 1165ffd83dbSDimitry Andric unsigned Size = 1175ffd83dbSDimitry Andric std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1; 1185ffd83dbSDimitry Andric if (Size < 2) 1195ffd83dbSDimitry Andric return false; 1205ffd83dbSDimitry Andric assert(Size <= 64 && "Hard clause is too long!"); 1215ffd83dbSDimitry Andric 1225ffd83dbSDimitry Andric auto &MBB = *CI.First->getParent(); 1235ffd83dbSDimitry Andric auto ClauseMI = 1245ffd83dbSDimitry Andric BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 1255ffd83dbSDimitry Andric .addImm(Size - 1); 1265ffd83dbSDimitry Andric finalizeBundle(MBB, ClauseMI->getIterator(), 1275ffd83dbSDimitry Andric std::next(CI.Last->getIterator())); 1285ffd83dbSDimitry Andric return true; 1295ffd83dbSDimitry Andric } 1305ffd83dbSDimitry Andric 1315ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override { 1325ffd83dbSDimitry Andric if (skipFunction(MF.getFunction())) 1335ffd83dbSDimitry Andric return false; 1345ffd83dbSDimitry Andric 1355ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1365ffd83dbSDimitry Andric if (!ST.hasHardClauses()) 1375ffd83dbSDimitry Andric return false; 1385ffd83dbSDimitry Andric 1395ffd83dbSDimitry Andric const SIInstrInfo *SII = ST.getInstrInfo(); 1405ffd83dbSDimitry Andric const TargetRegisterInfo *TRI = ST.getRegisterInfo(); 1415ffd83dbSDimitry Andric 1425ffd83dbSDimitry Andric bool Changed = false; 1435ffd83dbSDimitry Andric for (auto &MBB : MF) { 1445ffd83dbSDimitry Andric ClauseInfo CI; 1455ffd83dbSDimitry Andric for (auto &MI : MBB) { 1465ffd83dbSDimitry Andric HardClauseType Type = getHardClauseType(MI); 1475ffd83dbSDimitry Andric 1485ffd83dbSDimitry Andric int64_t Dummy1; 1495ffd83dbSDimitry Andric bool Dummy2; 1505ffd83dbSDimitry Andric unsigned Dummy3; 1515ffd83dbSDimitry Andric SmallVector<const MachineOperand *, 4> BaseOps; 1525ffd83dbSDimitry Andric if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 1535ffd83dbSDimitry Andric if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, 1545ffd83dbSDimitry Andric Dummy3, TRI)) { 1555ffd83dbSDimitry Andric // We failed to get the base operands, so we'll never clause this 1565ffd83dbSDimitry Andric // instruction with any other, so pretend it's illegal. 1575ffd83dbSDimitry Andric Type = HARDCLAUSE_ILLEGAL; 1585ffd83dbSDimitry Andric } 1595ffd83dbSDimitry Andric } 1605ffd83dbSDimitry Andric 1615ffd83dbSDimitry Andric if (CI.Length == 64 || 1625ffd83dbSDimitry Andric (CI.Length && Type != HARDCLAUSE_INTERNAL && 1635ffd83dbSDimitry Andric (Type != CI.Type || 1645ffd83dbSDimitry Andric // Note that we lie to shouldClusterMemOps about the size of the 1655ffd83dbSDimitry Andric // cluster. When shouldClusterMemOps is called from the machine 1665ffd83dbSDimitry Andric // scheduler it limits the size of the cluster to avoid increasing 1675ffd83dbSDimitry Andric // register pressure too much, but this pass runs after register 1685ffd83dbSDimitry Andric // allocation so there is no need for that kind of limit. 1695ffd83dbSDimitry Andric !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) { 1705ffd83dbSDimitry Andric // Finish the current clause. 1715ffd83dbSDimitry Andric Changed |= emitClause(CI, SII); 1725ffd83dbSDimitry Andric CI = ClauseInfo(); 1735ffd83dbSDimitry Andric } 1745ffd83dbSDimitry Andric 1755ffd83dbSDimitry Andric if (CI.Length) { 1765ffd83dbSDimitry Andric // Extend the current clause. 1775ffd83dbSDimitry Andric ++CI.Length; 1785ffd83dbSDimitry Andric if (Type != HARDCLAUSE_INTERNAL) { 1795ffd83dbSDimitry Andric CI.Last = &MI; 1805ffd83dbSDimitry Andric CI.BaseOps = std::move(BaseOps); 1815ffd83dbSDimitry Andric } 1825ffd83dbSDimitry Andric } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 1835ffd83dbSDimitry Andric // Start a new clause. 1845ffd83dbSDimitry Andric CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)}; 1855ffd83dbSDimitry Andric } 1865ffd83dbSDimitry Andric } 1875ffd83dbSDimitry Andric 1885ffd83dbSDimitry Andric // Finish the last clause in the basic block if any. 1895ffd83dbSDimitry Andric if (CI.Length) 1905ffd83dbSDimitry Andric Changed |= emitClause(CI, SII); 1915ffd83dbSDimitry Andric } 1925ffd83dbSDimitry Andric 1935ffd83dbSDimitry Andric return Changed; 1945ffd83dbSDimitry Andric } 1955ffd83dbSDimitry Andric }; 1965ffd83dbSDimitry Andric 1975ffd83dbSDimitry Andric } // namespace 1985ffd83dbSDimitry Andric 1995ffd83dbSDimitry Andric char SIInsertHardClauses::ID = 0; 2005ffd83dbSDimitry Andric 2015ffd83dbSDimitry Andric char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; 2025ffd83dbSDimitry Andric 2035ffd83dbSDimitry Andric INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", 2045ffd83dbSDimitry Andric false, false) 205