1 //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file contains a DAG scheduling mutation to cluster shader 10 /// exports. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUExportClustering.h" 15 #include "AMDGPUSubtarget.h" 16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17 #include "SIInstrInfo.h" 18 19 using namespace llvm; 20 21 namespace { 22 23 class ExportClustering : public ScheduleDAGMutation { 24 public: 25 ExportClustering() {} 26 void apply(ScheduleDAGInstrs *DAG) override; 27 }; 28 29 static bool isExport(const SUnit &SU) { 30 const MachineInstr *MI = SU.getInstr(); 31 return MI->getOpcode() == AMDGPU::EXP || 32 MI->getOpcode() == AMDGPU::EXP_DONE; 33 } 34 35 static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) { 36 const MachineInstr *MI = SU->getInstr(); 37 int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm(); 38 return Imm >= 12 && Imm <= 15; 39 } 40 41 static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain, 42 unsigned PosCount) { 43 if (!PosCount || PosCount == Chain.size()) 44 return; 45 46 // Position exports should occur as soon as possible in the shader 47 // for optimal performance. This moves position exports before 48 // other exports while preserving the order within different export 49 // types (pos or other). 50 SmallVector<SUnit *, 8> Copy(Chain); 51 unsigned PosIdx = 0; 52 unsigned OtherIdx = PosCount; 53 for (SUnit *SU : Copy) { 54 if (isPositionExport(TII, SU)) 55 Chain[PosIdx++] = SU; 56 else 57 Chain[OtherIdx++] = SU; 58 } 59 } 60 61 static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) { 62 SUnit *ChainHead = Exports.front(); 63 64 // Now construct cluster from chain by adding new edges. 65 for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) { 66 SUnit *SUa = Exports[Idx]; 67 SUnit *SUb = Exports[Idx + 1]; 68 69 // Copy all dependencies to the head of the chain to avoid any 70 // computation being inserted into the chain. 71 for (const SDep &Pred : SUb->Preds) { 72 SUnit *PredSU = Pred.getSUnit(); 73 if (!isExport(*PredSU) && !Pred.isWeak()) 74 DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial)); 75 } 76 77 // New barrier edge ordering exports 78 DAG->addEdge(SUb, SDep(SUa, SDep::Barrier)); 79 // Also add cluster edge 80 DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)); 81 } 82 } 83 84 static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) { 85 SmallVector<SDep, 2> ToAdd, ToRemove; 86 87 for (const SDep &Pred : SU.Preds) { 88 SUnit *PredSU = Pred.getSUnit(); 89 if (Pred.isBarrier() && isExport(*PredSU)) { 90 ToRemove.push_back(Pred); 91 if (isExport(SU)) 92 continue; 93 94 // If we remove a barrier we need to copy dependencies 95 // from the predecessor to maintain order. 96 for (const SDep &ExportPred : PredSU->Preds) { 97 SUnit *ExportPredSU = ExportPred.getSUnit(); 98 if (ExportPred.isBarrier() && !isExport(*ExportPredSU)) 99 ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier)); 100 } 101 } 102 } 103 104 for (SDep Pred : ToRemove) 105 SU.removePred(Pred); 106 for (SDep Pred : ToAdd) 107 DAG->addEdge(&SU, Pred); 108 } 109 110 void ExportClustering::apply(ScheduleDAGInstrs *DAG) { 111 const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII); 112 113 SmallVector<SUnit *, 8> Chain; 114 115 // Pass through DAG gathering a list of exports and removing barrier edges 116 // creating dependencies on exports. Freeing exports of successor edges 117 // allows more scheduling freedom, and nothing should be order dependent 118 // on exports. Edges will be added later to order the exports. 119 unsigned PosCount = 0; 120 for (SUnit &SU : DAG->SUnits) { 121 if (!isExport(SU)) 122 continue; 123 124 Chain.push_back(&SU); 125 if (isPositionExport(TII, &SU)) 126 PosCount++; 127 128 removeExportDependencies(DAG, SU); 129 130 SmallVector<SDep, 4> Succs(SU.Succs); 131 for (SDep Succ : Succs) 132 removeExportDependencies(DAG, *Succ.getSUnit()); 133 } 134 135 // Apply clustering if there are multiple exports 136 if (Chain.size() > 1) { 137 sortChain(TII, Chain, PosCount); 138 buildCluster(Chain, DAG); 139 } 140 } 141 142 } // end namespace 143 144 namespace llvm { 145 146 std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() { 147 return std::make_unique<ExportClustering>(); 148 } 149 150 } // end namespace llvm 151