xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass implements instructions packetization for R600. It unsets isLast
11 /// bit of instructions inside a bundle and substitutes src register with
12 /// PreviousVector when applicable.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "MCTargetDesc/R600MCTargetDesc.h"
17 #include "R600.h"
18 #include "R600Subtarget.h"
19 #include "llvm/CodeGen/DFAPacketizer.h"
20 #include "llvm/CodeGen/MachineDominators.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/ScheduleDAG.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "packets"
27 
28 namespace {
29 
30 class R600Packetizer : public MachineFunctionPass {
31 
32 public:
33   static char ID;
34   R600Packetizer() : MachineFunctionPass(ID) {}
35 
36   void getAnalysisUsage(AnalysisUsage &AU) const override {
37     AU.setPreservesCFG();
38     AU.addRequired<MachineDominatorTreeWrapperPass>();
39     AU.addPreserved<MachineDominatorTreeWrapperPass>();
40     AU.addRequired<MachineLoopInfoWrapperPass>();
41     AU.addPreserved<MachineLoopInfoWrapperPass>();
42     MachineFunctionPass::getAnalysisUsage(AU);
43   }
44 
45   StringRef getPassName() const override { return "R600 Packetizer"; }
46 
47   bool runOnMachineFunction(MachineFunction &Fn) override;
48 };
49 
50 class R600PacketizerList : public VLIWPacketizerList {
51 private:
52   const R600InstrInfo *TII;
53   const R600RegisterInfo &TRI;
54   bool VLIW5;
55   bool ConsideredInstUsesAlreadyWrittenVectorElement;
56 
57   unsigned getSlot(const MachineInstr &MI) const {
58     return TRI.getHWRegChan(MI.getOperand(0).getReg());
59   }
60 
61   /// \returns register to PV chan mapping for bundle/single instructions that
62   /// immediately precedes I.
63   DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
64       const {
65     DenseMap<unsigned, unsigned> Result;
66     I--;
67     if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
68       return Result;
69     MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
70     if (I->isBundle())
71       BI++;
72     int LastDstChan = -1;
73     do {
74       bool isTrans = false;
75       int BISlot = getSlot(*BI);
76       if (LastDstChan >= BISlot)
77         isTrans = true;
78       LastDstChan = BISlot;
79       if (TII->isPredicated(*BI))
80         continue;
81       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
82       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
83         continue;
84       int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
85       if (DstIdx == -1) {
86         continue;
87       }
88       Register Dst = BI->getOperand(DstIdx).getReg();
89       if (isTrans || TII->isTransOnly(*BI)) {
90         Result[Dst] = R600::PS;
91         continue;
92       }
93       if (BI->getOpcode() == R600::DOT4_r600 ||
94           BI->getOpcode() == R600::DOT4_eg) {
95         Result[Dst] = R600::PV_X;
96         continue;
97       }
98       if (Dst == R600::OQAP) {
99         continue;
100       }
101       unsigned PVReg = 0;
102       switch (TRI.getHWRegChan(Dst)) {
103       case 0:
104         PVReg = R600::PV_X;
105         break;
106       case 1:
107         PVReg = R600::PV_Y;
108         break;
109       case 2:
110         PVReg = R600::PV_Z;
111         break;
112       case 3:
113         PVReg = R600::PV_W;
114         break;
115       default:
116         llvm_unreachable("Invalid Chan");
117       }
118       Result[Dst] = PVReg;
119     } while ((++BI)->isBundledWithPred());
120     return Result;
121   }
122 
123   void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
124       const {
125     const R600::OpName Ops[] = {R600::OpName::src0, R600::OpName::src1,
126                                 R600::OpName::src2};
127     for (R600::OpName Op : Ops) {
128       int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Op);
129       if (OperandIdx < 0)
130         continue;
131       Register Src = MI.getOperand(OperandIdx).getReg();
132       const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
133       if (It != PVs.end())
134         MI.getOperand(OperandIdx).setReg(It->second);
135     }
136   }
137 public:
138   // Ctor.
139   R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
140                      MachineLoopInfo &MLI)
141       : VLIWPacketizerList(MF, MLI, nullptr),
142         TII(ST.getInstrInfo()),
143         TRI(TII->getRegisterInfo()) {
144     VLIW5 = !ST.hasCaymanISA();
145   }
146 
147   // initPacketizerState - initialize some internal flags.
148   void initPacketizerState() override {
149     ConsideredInstUsesAlreadyWrittenVectorElement = false;
150   }
151 
152   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
153   bool ignorePseudoInstruction(const MachineInstr &MI,
154                                const MachineBasicBlock *MBB) override {
155     return false;
156   }
157 
158   // isSoloInstruction - return true if instruction MI can not be packetized
159   // with any other instruction, which means that MI itself is a packet.
160   bool isSoloInstruction(const MachineInstr &MI) override {
161     if (TII->isVector(MI))
162       return true;
163     if (!TII->isALUInstr(MI.getOpcode()))
164       return true;
165     if (MI.getOpcode() == R600::GROUP_BARRIER)
166       return true;
167     // XXX: This can be removed once the packetizer properly handles all the
168     // LDS instruction group restrictions.
169     return TII->isLDSInstr(MI.getOpcode());
170   }
171 
172   // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
173   // together.
174   bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
175     MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
176     if (getSlot(*MII) == getSlot(*MIJ))
177       ConsideredInstUsesAlreadyWrittenVectorElement = true;
178     // Does MII and MIJ share the same pred_sel ?
179     int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
180         OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
181     Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(),
182       PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register();
183     if (PredI != PredJ)
184       return false;
185     if (SUJ->isSucc(SUI)) {
186       for (const SDep &Dep : SUJ->Succs) {
187         if (Dep.getSUnit() != SUI)
188           continue;
189         if (Dep.getKind() == SDep::Anti)
190           continue;
191         if (Dep.getKind() == SDep::Output)
192           if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
193             continue;
194         return false;
195       }
196     }
197 
198     bool ARDef =
199         TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ);
200     bool ARUse =
201         TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ);
202 
203     return !ARDef || !ARUse;
204   }
205 
206   // isLegalToPruneDependencies - Is it legal to prune dependency between SUI
207   // and SUJ.
208   bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
209     return false;
210   }
211 
212   void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
213     unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
214     MI->getOperand(LastOp).setImm(Bit);
215   }
216 
217   bool isBundlableWithCurrentPMI(MachineInstr &MI,
218                                  const DenseMap<unsigned, unsigned> &PV,
219                                  std::vector<R600InstrInfo::BankSwizzle> &BS,
220                                  bool &isTransSlot) {
221     isTransSlot = TII->isTransOnly(MI);
222     assert (!isTransSlot || VLIW5);
223 
224     // Is the dst reg sequence legal ?
225     if (!isTransSlot && !CurrentPacketMIs.empty()) {
226       if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
227         if (ConsideredInstUsesAlreadyWrittenVectorElement &&
228             !TII->isVectorOnly(MI) && VLIW5) {
229           isTransSlot = true;
230           LLVM_DEBUG({
231             dbgs() << "Considering as Trans Inst :";
232             MI.dump();
233           });
234         }
235         else
236           return false;
237       }
238     }
239 
240     // Are the Constants limitations met ?
241     CurrentPacketMIs.push_back(&MI);
242     if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
243       LLVM_DEBUG({
244         dbgs() << "Couldn't pack :\n";
245         MI.dump();
246         dbgs() << "with the following packets :\n";
247         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
248           CurrentPacketMIs[i]->dump();
249           dbgs() << "\n";
250         }
251         dbgs() << "because of Consts read limitations\n";
252       });
253       CurrentPacketMIs.pop_back();
254       return false;
255     }
256 
257     // Is there a BankSwizzle set that meet Read Port limitations ?
258     if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
259             PV, BS, isTransSlot)) {
260       LLVM_DEBUG({
261         dbgs() << "Couldn't pack :\n";
262         MI.dump();
263         dbgs() << "with the following packets :\n";
264         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
265           CurrentPacketMIs[i]->dump();
266           dbgs() << "\n";
267         }
268         dbgs() << "because of Read port limitations\n";
269       });
270       CurrentPacketMIs.pop_back();
271       return false;
272     }
273 
274     // We cannot read LDS source registers from the Trans slot.
275     if (isTransSlot && TII->readsLDSSrcReg(MI))
276       return false;
277 
278     CurrentPacketMIs.pop_back();
279     return true;
280   }
281 
282   MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override {
283     MachineBasicBlock::iterator FirstInBundle =
284         CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front();
285     const DenseMap<unsigned, unsigned> &PV =
286         getPreviousVector(FirstInBundle);
287     std::vector<R600InstrInfo::BankSwizzle> BS;
288     bool isTransSlot;
289 
290     if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
291       for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
292         MachineInstr *MI = CurrentPacketMIs[i];
293         unsigned Op = TII->getOperandIdx(MI->getOpcode(),
294             R600::OpName::bank_swizzle);
295         MI->getOperand(Op).setImm(BS[i]);
296       }
297       unsigned Op =
298           TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
299       MI.getOperand(Op).setImm(BS.back());
300       if (!CurrentPacketMIs.empty())
301         setIsLastBit(CurrentPacketMIs.back(), 0);
302       substitutePV(MI, PV);
303       MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
304       if (isTransSlot) {
305         endPacket(std::next(It)->getParent(), std::next(It));
306       }
307       return It;
308     }
309     endPacket(MI.getParent(), MI);
310     if (TII->isTransOnly(MI))
311       return MI;
312     return VLIWPacketizerList::addToPacket(MI);
313   }
314 };
315 
316 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
317   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
318   const R600InstrInfo *TII = ST.getInstrInfo();
319 
320   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
321 
322   // Instantiate the packetizer.
323   R600PacketizerList Packetizer(Fn, ST, MLI);
324 
325   // DFA state table should not be empty.
326   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
327   assert(Packetizer.getResourceTracker()->getInstrItins());
328 
329   if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
330     return false;
331 
332   //
333   // Loop over all basic blocks and remove KILL pseudo-instructions
334   // These instructions confuse the dependence analysis. Consider:
335   // D0 = ...   (Insn 0)
336   // R0 = KILL R0, D0 (Insn 1)
337   // R0 = ... (Insn 2)
338   // Here, Insn 1 will result in the dependence graph not emitting an output
339   // dependence between Insn 0 and Insn 2. This can lead to incorrect
340   // packetization
341   //
342   for (MachineBasicBlock &MBB : Fn) {
343     for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
344       if (MI.isKill() || MI.getOpcode() == R600::IMPLICIT_DEF ||
345           (MI.getOpcode() == R600::CF_ALU && !MI.getOperand(8).getImm()))
346         MBB.erase(MI);
347     }
348   }
349 
350   // Loop over all of the basic blocks.
351   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
352        MBB != MBBe; ++MBB) {
353     // Find scheduling regions and schedule / packetize each region.
354     unsigned RemainingCount = MBB->size();
355     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
356         RegionEnd != MBB->begin();) {
357       // The next region starts above the previous region. Look backward in the
358       // instruction stream until we find the nearest boundary.
359       MachineBasicBlock::iterator I = RegionEnd;
360       for(;I != MBB->begin(); --I, --RemainingCount) {
361         if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn))
362           break;
363       }
364       I = MBB->begin();
365 
366       // Skip empty scheduling regions.
367       if (I == RegionEnd) {
368         RegionEnd = std::prev(RegionEnd);
369         --RemainingCount;
370         continue;
371       }
372       // Skip regions with one instruction.
373       if (I == std::prev(RegionEnd)) {
374         RegionEnd = std::prev(RegionEnd);
375         continue;
376       }
377 
378       Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
379       RegionEnd = I;
380     }
381   }
382 
383   return true;
384 
385 }
386 
387 } // end anonymous namespace
388 
389 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE,
390                      "R600 Packetizer", false, false)
391 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE,
392                     "R600 Packetizer", false, false)
393 
394 char R600Packetizer::ID = 0;
395 
396 char &llvm::R600PacketizerID = R600Packetizer::ID;
397 
398 llvm::FunctionPass *llvm::createR600Packetizer() {
399   return new R600Packetizer();
400 }
401