xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering  -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file contains a DAG scheduling mutation to cluster shader
10 ///       exports.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUExportClustering.h"
15 #include "SIInstrInfo.h"
16 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
17 
18 using namespace llvm;
19 
20 namespace {
21 
22 class ExportClustering : public ScheduleDAGMutation {
23 public:
24   ExportClustering() = default;
25   void apply(ScheduleDAGInstrs *DAG) override;
26 };
27 
28 static bool isExport(const SUnit &SU) {
29   return SIInstrInfo::isEXP(*SU.getInstr());
30 }
31 
32 static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
33   const MachineInstr *MI = SU->getInstr();
34   unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
35   return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
36 }
37 
38 static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
39                       unsigned PosCount) {
40   if (!PosCount || PosCount == Chain.size())
41     return;
42 
43   // Position exports should occur as soon as possible in the shader
44   // for optimal performance.  This moves position exports before
45   // other exports while preserving the order within different export
46   // types (pos or other).
47   SmallVector<SUnit *, 8> Copy(Chain);
48   unsigned PosIdx = 0;
49   unsigned OtherIdx = PosCount;
50   for (SUnit *SU : Copy) {
51     if (isPositionExport(TII, SU))
52       Chain[PosIdx++] = SU;
53     else
54       Chain[OtherIdx++] = SU;
55   }
56 }
57 
58 static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
59   SUnit *ChainHead = Exports.front();
60 
61   // Now construct cluster from chain by adding new edges.
62   for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
63     SUnit *SUa = Exports[Idx];
64     SUnit *SUb = Exports[Idx + 1];
65 
66     // Copy all dependencies to the head of the chain to avoid any
67     // computation being inserted into the chain.
68     for (const SDep &Pred : SUb->Preds) {
69       SUnit *PredSU = Pred.getSUnit();
70       if (!isExport(*PredSU) && !Pred.isWeak())
71         DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
72     }
73 
74     // New barrier edge ordering exports
75     DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
76     // Also add cluster edge
77     DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
78   }
79 }
80 
81 static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
82   SmallVector<SDep, 2> ToAdd, ToRemove;
83 
84   for (const SDep &Pred : SU.Preds) {
85     SUnit *PredSU = Pred.getSUnit();
86     if (Pred.isBarrier() && isExport(*PredSU)) {
87       ToRemove.push_back(Pred);
88       if (isExport(SU))
89         continue;
90 
91       // If we remove a barrier we need to copy dependencies
92       // from the predecessor to maintain order.
93       for (const SDep &ExportPred : PredSU->Preds) {
94         SUnit *ExportPredSU = ExportPred.getSUnit();
95         if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
96           ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
97       }
98     }
99   }
100 
101   for (SDep Pred : ToRemove)
102     SU.removePred(Pred);
103   for (SDep Pred : ToAdd)
104     DAG->addEdge(&SU, Pred);
105 }
106 
107 void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
108   const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
109 
110   SmallVector<SUnit *, 8> Chain;
111 
112   // Pass through DAG gathering a list of exports and removing barrier edges
113   // creating dependencies on exports. Freeing exports of successor edges
114   // allows more scheduling freedom, and nothing should be order dependent
115   // on exports.  Edges will be added later to order the exports.
116   unsigned PosCount = 0;
117   for (SUnit &SU : DAG->SUnits) {
118     if (!isExport(SU))
119       continue;
120 
121     Chain.push_back(&SU);
122     if (isPositionExport(TII, &SU))
123       PosCount++;
124 
125     removeExportDependencies(DAG, SU);
126 
127     SmallVector<SDep, 4> Succs(SU.Succs);
128     for (SDep Succ : Succs)
129       removeExportDependencies(DAG, *Succ.getSUnit());
130   }
131 
132   // Apply clustering if there are multiple exports
133   if (Chain.size() > 1) {
134     sortChain(TII, Chain, PosCount);
135     buildCluster(Chain, DAG);
136   }
137 }
138 
139 } // end namespace
140 
141 std::unique_ptr<ScheduleDAGMutation>
142 llvm::createAMDGPUExportClusteringDAGMutation() {
143   return std::make_unique<ExportClustering>();
144 }
145