1 //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file contains a DAG scheduling mutation to cluster shader 10 /// exports. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUExportClustering.h" 15 #include "SIInstrInfo.h" 16 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 17 18 using namespace llvm; 19 20 namespace { 21 22 class ExportClustering : public ScheduleDAGMutation { 23 public: 24 ExportClustering() = default; 25 void apply(ScheduleDAGInstrs *DAG) override; 26 }; 27 28 static bool isExport(const SUnit &SU) { 29 return SIInstrInfo::isEXP(*SU.getInstr()); 30 } 31 32 static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) { 33 const MachineInstr *MI = SU->getInstr(); 34 unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm(); 35 return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST; 36 } 37 38 static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain, 39 unsigned PosCount) { 40 if (!PosCount || PosCount == Chain.size()) 41 return; 42 43 // Position exports should occur as soon as possible in the shader 44 // for optimal performance. This moves position exports before 45 // other exports while preserving the order within different export 46 // types (pos or other). 47 SmallVector<SUnit *, 8> Copy(Chain); 48 unsigned PosIdx = 0; 49 unsigned OtherIdx = PosCount; 50 for (SUnit *SU : Copy) { 51 if (isPositionExport(TII, SU)) 52 Chain[PosIdx++] = SU; 53 else 54 Chain[OtherIdx++] = SU; 55 } 56 } 57 58 static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) { 59 SUnit *ChainHead = Exports.front(); 60 61 // Now construct cluster from chain by adding new edges. 62 for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) { 63 SUnit *SUa = Exports[Idx]; 64 SUnit *SUb = Exports[Idx + 1]; 65 66 // Copy all dependencies to the head of the chain to avoid any 67 // computation being inserted into the chain. 68 for (const SDep &Pred : SUb->Preds) { 69 SUnit *PredSU = Pred.getSUnit(); 70 if (!isExport(*PredSU) && !Pred.isWeak()) 71 DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial)); 72 } 73 74 // New barrier edge ordering exports 75 DAG->addEdge(SUb, SDep(SUa, SDep::Barrier)); 76 // Also add cluster edge 77 DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)); 78 } 79 } 80 81 static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) { 82 SmallVector<SDep, 2> ToAdd, ToRemove; 83 84 for (const SDep &Pred : SU.Preds) { 85 SUnit *PredSU = Pred.getSUnit(); 86 if (Pred.isBarrier() && isExport(*PredSU)) { 87 ToRemove.push_back(Pred); 88 if (isExport(SU)) 89 continue; 90 91 // If we remove a barrier we need to copy dependencies 92 // from the predecessor to maintain order. 93 for (const SDep &ExportPred : PredSU->Preds) { 94 SUnit *ExportPredSU = ExportPred.getSUnit(); 95 if (ExportPred.isBarrier() && !isExport(*ExportPredSU)) 96 ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier)); 97 } 98 } 99 } 100 101 for (SDep Pred : ToRemove) 102 SU.removePred(Pred); 103 for (SDep Pred : ToAdd) 104 DAG->addEdge(&SU, Pred); 105 } 106 107 void ExportClustering::apply(ScheduleDAGInstrs *DAG) { 108 const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII); 109 110 SmallVector<SUnit *, 8> Chain; 111 112 // Pass through DAG gathering a list of exports and removing barrier edges 113 // creating dependencies on exports. Freeing exports of successor edges 114 // allows more scheduling freedom, and nothing should be order dependent 115 // on exports. Edges will be added later to order the exports. 116 unsigned PosCount = 0; 117 for (SUnit &SU : DAG->SUnits) { 118 if (!isExport(SU)) 119 continue; 120 121 Chain.push_back(&SU); 122 if (isPositionExport(TII, &SU)) 123 PosCount++; 124 125 removeExportDependencies(DAG, SU); 126 127 SmallVector<SDep, 4> Succs(SU.Succs); 128 for (SDep Succ : Succs) 129 removeExportDependencies(DAG, *Succ.getSUnit()); 130 } 131 132 // Apply clustering if there are multiple exports 133 if (Chain.size() > 1) { 134 sortChain(TII, Chain, PosCount); 135 buildCluster(Chain, DAG); 136 } 137 } 138 139 } // end namespace 140 141 std::unique_ptr<ScheduleDAGMutation> 142 llvm::createAMDGPUExportClusteringDAGMutation() { 143 return std::make_unique<ExportClustering>(); 144 } 145