xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp (revision 9c77fb6aaa366cbabc80ee1b834bcfe4df135491)
1 //===-- SIPostRABundler.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass creates bundles of memory instructions to protect adjacent loads
11 /// and stores from being rescheduled apart from each other post-RA.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIPostRABundler.h"
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 
21 using namespace llvm;
22 
23 #define DEBUG_TYPE "si-post-ra-bundler"
24 
25 namespace {
26 
27 class SIPostRABundlerLegacy : public MachineFunctionPass {
28 public:
29   static char ID;
30 
31 public:
32   SIPostRABundlerLegacy() : MachineFunctionPass(ID) {
33     initializeSIPostRABundlerLegacyPass(*PassRegistry::getPassRegistry());
34   }
35 
36   bool runOnMachineFunction(MachineFunction &MF) override;
37 
38   StringRef getPassName() const override {
39     return "SI post-RA bundler";
40   }
41 
42   void getAnalysisUsage(AnalysisUsage &AU) const override {
43     AU.setPreservesAll();
44     MachineFunctionPass::getAnalysisUsage(AU);
45   }
46 };
47 
48 class SIPostRABundler {
49 public:
50   bool run(MachineFunction &MF);
51 
52 private:
53   const SIRegisterInfo *TRI;
54 
55   SmallSet<Register, 16> Defs;
56 
57   void collectUsedRegUnits(const MachineInstr &MI,
58                            BitVector &UsedRegUnits) const;
59 
60   bool isBundleCandidate(const MachineInstr &MI) const;
61   bool isDependentLoad(const MachineInstr &MI) const;
62   bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
63 };
64 
65 constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
66                               SIInstrFlags::SMRD | SIInstrFlags::DS |
67                               SIInstrFlags::FLAT | SIInstrFlags::MIMG |
68                               SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
69 
70 } // End anonymous namespace.
71 
72 INITIALIZE_PASS(SIPostRABundlerLegacy, DEBUG_TYPE, "SI post-RA bundler", false,
73                 false)
74 
75 char SIPostRABundlerLegacy::ID = 0;
76 
77 char &llvm::SIPostRABundlerLegacyID = SIPostRABundlerLegacy::ID;
78 
79 FunctionPass *llvm::createSIPostRABundlerPass() {
80   return new SIPostRABundlerLegacy();
81 }
82 
83 bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
84   if (!MI.mayLoad())
85     return false;
86 
87   for (const MachineOperand &Op : MI.explicit_operands()) {
88     if (!Op.isReg())
89       continue;
90     Register Reg = Op.getReg();
91     for (Register Def : Defs)
92       if (TRI->regsOverlap(Reg, Def))
93         return true;
94   }
95 
96   return false;
97 }
98 
99 void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
100                                           BitVector &UsedRegUnits) const {
101   if (MI.isDebugInstr())
102     return;
103 
104   for (const MachineOperand &Op : MI.operands()) {
105     if (!Op.isReg() || !Op.readsReg())
106       continue;
107 
108     Register Reg = Op.getReg();
109     assert(!Op.getSubReg() &&
110            "subregister indexes should not be present after RA");
111 
112     for (MCRegUnit Unit : TRI->regunits(Reg))
113       UsedRegUnits.set(Unit);
114   }
115 }
116 
117 bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
118   const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
119   return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
120 }
121 
122 bool SIPostRABundler::canBundle(const MachineInstr &MI,
123                                 const MachineInstr &NextMI) const {
124   const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
125 
126   return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() &&
127           NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() &&
128           ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) &&
129           !isDependentLoad(NextMI));
130 }
131 
132 bool SIPostRABundlerLegacy::runOnMachineFunction(MachineFunction &MF) {
133   if (skipFunction(MF.getFunction()))
134     return false;
135   return SIPostRABundler().run(MF);
136 }
137 
138 PreservedAnalyses SIPostRABundlerPass::run(MachineFunction &MF,
139                                            MachineFunctionAnalysisManager &) {
140   SIPostRABundler().run(MF);
141   return PreservedAnalyses::all();
142 }
143 
144 bool SIPostRABundler::run(MachineFunction &MF) {
145 
146   TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
147   BitVector BundleUsedRegUnits(TRI->getNumRegUnits());
148   BitVector KillUsedRegUnits(TRI->getNumRegUnits());
149 
150   bool Changed = false;
151   for (MachineBasicBlock &MBB : MF) {
152     bool HasIGLPInstrs = llvm::any_of(MBB.instrs(), [](MachineInstr &MI) {
153       unsigned Opc = MI.getOpcode();
154       return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
155     });
156 
157     // Don't cluster with IGLP instructions.
158     if (HasIGLPInstrs)
159       continue;
160 
161     MachineBasicBlock::instr_iterator Next;
162     MachineBasicBlock::instr_iterator B = MBB.instr_begin();
163     MachineBasicBlock::instr_iterator E = MBB.instr_end();
164 
165     for (auto I = B; I != E; I = Next) {
166       Next = std::next(I);
167       if (!isBundleCandidate(*I))
168         continue;
169 
170       assert(Defs.empty());
171 
172       if (I->getNumExplicitDefs() != 0)
173         Defs.insert(I->defs().begin()->getReg());
174 
175       MachineBasicBlock::instr_iterator BundleStart = I;
176       MachineBasicBlock::instr_iterator BundleEnd = I;
177       unsigned ClauseLength = 1;
178       for (I = Next; I != E; I = Next) {
179         Next = std::next(I);
180 
181         assert(BundleEnd != I);
182         if (canBundle(*BundleEnd, *I)) {
183           BundleEnd = I;
184           if (I->getNumExplicitDefs() != 0)
185             Defs.insert(I->defs().begin()->getReg());
186           ++ClauseLength;
187         } else if (!I->isMetaInstruction()) {
188           // Allow meta instructions in between bundle candidates, but do not
189           // start or end a bundle on one.
190           //
191           // TODO: It may be better to move meta instructions like dbg_value
192           // after the bundle. We're relying on the memory legalizer to unbundle
193           // these.
194           break;
195         }
196       }
197 
198       Next = std::next(BundleEnd);
199       if (ClauseLength > 1) {
200         Changed = true;
201 
202         // Before register allocation, kills are inserted after potential soft
203         // clauses to hint register allocation. Look for kills that look like
204         // this, and erase them.
205         if (Next != E && Next->isKill()) {
206 
207           // TODO: Should maybe back-propagate kill flags to the bundle.
208           for (const MachineInstr &BundleMI : make_range(BundleStart, Next))
209             collectUsedRegUnits(BundleMI, BundleUsedRegUnits);
210 
211           BundleUsedRegUnits.flip();
212 
213           while (Next != E && Next->isKill()) {
214             MachineInstr &Kill = *Next;
215             collectUsedRegUnits(Kill, KillUsedRegUnits);
216 
217             KillUsedRegUnits &= BundleUsedRegUnits;
218 
219             // Erase the kill if it's a subset of the used registers.
220             //
221             // TODO: Should we just remove all kills? Is there any real reason to
222             // keep them after RA?
223             if (KillUsedRegUnits.none()) {
224               ++Next;
225               Kill.eraseFromParent();
226             } else
227               break;
228 
229             KillUsedRegUnits.reset();
230           }
231 
232           BundleUsedRegUnits.reset();
233         }
234 
235         finalizeBundle(MBB, BundleStart, Next);
236       }
237 
238       Defs.clear();
239     }
240   }
241 
242   return Changed;
243 }
244