1 //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file contains a DAG scheduling mutation to cluster shader 10 /// exports. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUExportClustering.h" 15 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 16 #include "SIInstrInfo.h" 17 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 18 19 using namespace llvm; 20 21 namespace { 22 23 class ExportClustering : public ScheduleDAGMutation { 24 public: 25 ExportClustering() = default; 26 void apply(ScheduleDAGInstrs *DAG) override; 27 }; 28 29 static bool isExport(const SUnit &SU) { 30 return SIInstrInfo::isEXP(*SU.getInstr()); 31 } 32 33 static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) { 34 const MachineInstr *MI = SU->getInstr(); 35 unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm(); 36 return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST; 37 } 38 39 static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain, 40 unsigned PosCount) { 41 if (!PosCount || PosCount == Chain.size()) 42 return; 43 44 // Position exports should occur as soon as possible in the shader 45 // for optimal performance. This moves position exports before 46 // other exports while preserving the order within different export 47 // types (pos or other). 48 SmallVector<SUnit *, 8> Copy(Chain); 49 unsigned PosIdx = 0; 50 unsigned OtherIdx = PosCount; 51 for (SUnit *SU : Copy) { 52 if (isPositionExport(TII, SU)) 53 Chain[PosIdx++] = SU; 54 else 55 Chain[OtherIdx++] = SU; 56 } 57 } 58 59 static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) { 60 SUnit *ChainHead = Exports.front(); 61 62 // Now construct cluster from chain by adding new edges. 63 for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) { 64 SUnit *SUa = Exports[Idx]; 65 SUnit *SUb = Exports[Idx + 1]; 66 67 // Copy all dependencies to the head of the chain to avoid any 68 // computation being inserted into the chain. 69 for (const SDep &Pred : SUb->Preds) { 70 SUnit *PredSU = Pred.getSUnit(); 71 if (!isExport(*PredSU) && !Pred.isWeak()) 72 DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial)); 73 } 74 75 // New barrier edge ordering exports 76 DAG->addEdge(SUb, SDep(SUa, SDep::Barrier)); 77 // Also add cluster edge 78 DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)); 79 } 80 } 81 82 static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) { 83 SmallVector<SDep, 2> ToAdd, ToRemove; 84 85 for (const SDep &Pred : SU.Preds) { 86 SUnit *PredSU = Pred.getSUnit(); 87 if (Pred.isBarrier() && isExport(*PredSU)) { 88 ToRemove.push_back(Pred); 89 if (isExport(SU)) 90 continue; 91 92 // If we remove a barrier we need to copy dependencies 93 // from the predecessor to maintain order. 94 for (const SDep &ExportPred : PredSU->Preds) { 95 SUnit *ExportPredSU = ExportPred.getSUnit(); 96 if (ExportPred.isBarrier() && !isExport(*ExportPredSU)) 97 ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier)); 98 } 99 } 100 } 101 102 for (SDep Pred : ToRemove) 103 SU.removePred(Pred); 104 for (SDep Pred : ToAdd) 105 DAG->addEdge(&SU, Pred); 106 } 107 108 void ExportClustering::apply(ScheduleDAGInstrs *DAG) { 109 const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII); 110 111 SmallVector<SUnit *, 8> Chain; 112 113 // Pass through DAG gathering a list of exports and removing barrier edges 114 // creating dependencies on exports. Freeing exports of successor edges 115 // allows more scheduling freedom, and nothing should be order dependent 116 // on exports. Edges will be added later to order the exports. 117 unsigned PosCount = 0; 118 for (SUnit &SU : DAG->SUnits) { 119 if (!isExport(SU)) 120 continue; 121 122 Chain.push_back(&SU); 123 if (isPositionExport(TII, &SU)) 124 PosCount++; 125 126 removeExportDependencies(DAG, SU); 127 128 SmallVector<SDep, 4> Succs(SU.Succs); 129 for (SDep Succ : Succs) 130 removeExportDependencies(DAG, *Succ.getSUnit()); 131 } 132 133 // Apply clustering if there are multiple exports 134 if (Chain.size() > 1) { 135 sortChain(TII, Chain, PosCount); 136 buildCluster(Chain, DAG); 137 } 138 } 139 140 } // end namespace 141 142 namespace llvm { 143 144 std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() { 145 return std::make_unique<ExportClustering>(); 146 } 147 148 } // end namespace llvm 149