//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file This file contains a DAG scheduling mutation to cluster shader /// exports. // //===----------------------------------------------------------------------===// #include "AMDGPUExportClustering.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIInstrInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" using namespace llvm; namespace { class ExportClustering : public ScheduleDAGMutation { public: ExportClustering() = default; void apply(ScheduleDAGInstrs *DAG) override; }; static bool isExport(const SUnit &SU) { return SIInstrInfo::isEXP(*SU.getInstr()); } static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) { const MachineInstr *MI = SU->getInstr(); unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm(); return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST; } static void sortChain(const SIInstrInfo *TII, SmallVector &Chain, unsigned PosCount) { if (!PosCount || PosCount == Chain.size()) return; // Position exports should occur as soon as possible in the shader // for optimal performance. This moves position exports before // other exports while preserving the order within different export // types (pos or other). SmallVector Copy(Chain); unsigned PosIdx = 0; unsigned OtherIdx = PosCount; for (SUnit *SU : Copy) { if (isPositionExport(TII, SU)) Chain[PosIdx++] = SU; else Chain[OtherIdx++] = SU; } } static void buildCluster(ArrayRef Exports, ScheduleDAGInstrs *DAG) { SUnit *ChainHead = Exports.front(); // Now construct cluster from chain by adding new edges. for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) { SUnit *SUa = Exports[Idx]; SUnit *SUb = Exports[Idx + 1]; // Copy all dependencies to the head of the chain to avoid any // computation being inserted into the chain. for (const SDep &Pred : SUb->Preds) { SUnit *PredSU = Pred.getSUnit(); if (!isExport(*PredSU) && !Pred.isWeak()) DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial)); } // New barrier edge ordering exports DAG->addEdge(SUb, SDep(SUa, SDep::Barrier)); // Also add cluster edge DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)); } } static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) { SmallVector ToAdd, ToRemove; for (const SDep &Pred : SU.Preds) { SUnit *PredSU = Pred.getSUnit(); if (Pred.isBarrier() && isExport(*PredSU)) { ToRemove.push_back(Pred); if (isExport(SU)) continue; // If we remove a barrier we need to copy dependencies // from the predecessor to maintain order. for (const SDep &ExportPred : PredSU->Preds) { SUnit *ExportPredSU = ExportPred.getSUnit(); if (ExportPred.isBarrier() && !isExport(*ExportPredSU)) ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier)); } } } for (SDep Pred : ToRemove) SU.removePred(Pred); for (SDep Pred : ToAdd) DAG->addEdge(&SU, Pred); } void ExportClustering::apply(ScheduleDAGInstrs *DAG) { const SIInstrInfo *TII = static_cast(DAG->TII); SmallVector Chain; // Pass through DAG gathering a list of exports and removing barrier edges // creating dependencies on exports. Freeing exports of successor edges // allows more scheduling freedom, and nothing should be order dependent // on exports. Edges will be added later to order the exports. unsigned PosCount = 0; for (SUnit &SU : DAG->SUnits) { if (!isExport(SU)) continue; Chain.push_back(&SU); if (isPositionExport(TII, &SU)) PosCount++; removeExportDependencies(DAG, SU); SmallVector Succs(SU.Succs); for (SDep Succ : Succs) removeExportDependencies(DAG, *Succ.getSUnit()); } // Apply clustering if there are multiple exports if (Chain.size() > 1) { sortChain(TII, Chain, PosCount); buildCluster(Chain, DAG); } } } // end namespace namespace llvm { std::unique_ptr createAMDGPUExportClusteringDAGMutation() { return std::make_unique(); } } // end namespace llvm