xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1 //===- R600MergeVectorRegisters.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass merges inputs of swizzeable instructions into vector sharing
11 /// common data and/or have enough undef subreg using swizzle abilities.
12 ///
13 /// For instance let's consider the following pseudo code :
14 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
15 /// ...
16 /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3
17 /// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3
18 ///
19 /// is turned into :
20 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
21 /// ...
22 /// %7 = INSERT_SUBREG %4, sub3
23 /// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3
24 ///
25 /// This allow regalloc to reduce register pressure for vector registers and
26 /// to reduce MOV count.
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "AMDGPUSubtarget.h"
31 #include "R600Defines.h"
32 #include "R600InstrInfo.h"
33 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/CodeGen/MachineBasicBlock.h"
38 #include "llvm/CodeGen/MachineDominators.h"
39 #include "llvm/CodeGen/MachineFunction.h"
40 #include "llvm/CodeGen/MachineFunctionPass.h"
41 #include "llvm/CodeGen/MachineInstr.h"
42 #include "llvm/CodeGen/MachineInstrBuilder.h"
43 #include "llvm/CodeGen/MachineLoopInfo.h"
44 #include "llvm/CodeGen/MachineOperand.h"
45 #include "llvm/CodeGen/MachineRegisterInfo.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/ErrorHandling.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include <cassert>
52 #include <utility>
53 #include <vector>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "vec-merger"
58 
59 static bool isImplicitlyDef(MachineRegisterInfo &MRI, Register Reg) {
60   if (Reg.isPhysical())
61     return false;
62   const MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
63   return MI && MI->isImplicitDef();
64 }
65 
66 namespace {
67 
68 class RegSeqInfo {
69 public:
70   MachineInstr *Instr;
71   DenseMap<Register, unsigned> RegToChan;
72   std::vector<Register> UndefReg;
73 
74   RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
75     assert(MI->getOpcode() == R600::REG_SEQUENCE);
76     for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
77       MachineOperand &MO = Instr->getOperand(i);
78       unsigned Chan = Instr->getOperand(i + 1).getImm();
79       if (isImplicitlyDef(MRI, MO.getReg()))
80         UndefReg.push_back(Chan);
81       else
82         RegToChan[MO.getReg()] = Chan;
83     }
84   }
85 
86   RegSeqInfo() = default;
87 
88   bool operator==(const RegSeqInfo &RSI) const {
89     return RSI.Instr == Instr;
90   }
91 };
92 
93 class R600VectorRegMerger : public MachineFunctionPass {
94 private:
95   using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>;
96 
97   MachineRegisterInfo *MRI;
98   const R600InstrInfo *TII = nullptr;
99   DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
100   InstructionSetMap PreviousRegSeqByReg;
101   InstructionSetMap PreviousRegSeqByUndefCount;
102 
103   bool canSwizzle(const MachineInstr &MI) const;
104   bool areAllUsesSwizzeable(Register Reg) const;
105   void SwizzleInput(MachineInstr &,
106       const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
107   bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge,
108       std::vector<std::pair<unsigned, unsigned>> &Remap) const;
109   bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
110       std::vector<std::pair<unsigned, unsigned>> &RemapChan);
111   bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
112       std::vector<std::pair<unsigned, unsigned>> &RemapChan);
113   MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec,
114       const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
115   void RemoveMI(MachineInstr *);
116   void trackRSI(const RegSeqInfo &RSI);
117 
118 public:
119   static char ID;
120 
121   R600VectorRegMerger() : MachineFunctionPass(ID) {}
122 
123   void getAnalysisUsage(AnalysisUsage &AU) const override {
124     AU.setPreservesCFG();
125     AU.addRequired<MachineDominatorTree>();
126     AU.addPreserved<MachineDominatorTree>();
127     AU.addRequired<MachineLoopInfo>();
128     AU.addPreserved<MachineLoopInfo>();
129     MachineFunctionPass::getAnalysisUsage(AU);
130   }
131 
132   MachineFunctionProperties getRequiredProperties() const override {
133     return MachineFunctionProperties()
134       .set(MachineFunctionProperties::Property::IsSSA);
135   }
136 
137   StringRef getPassName() const override {
138     return "R600 Vector Registers Merge Pass";
139   }
140 
141   bool runOnMachineFunction(MachineFunction &Fn) override;
142 };
143 
144 } // end anonymous namespace
145 
146 INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE,
147                      "R600 Vector Reg Merger", false, false)
148 INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE,
149                     "R600 Vector Reg Merger", false, false)
150 
151 char R600VectorRegMerger::ID = 0;
152 
153 char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID;
154 
155 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
156     const {
157   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
158     return true;
159   switch (MI.getOpcode()) {
160   case R600::R600_ExportSwz:
161   case R600::EG_ExportSwz:
162     return true;
163   default:
164     return false;
165   }
166 }
167 
168 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
169     RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
170     const {
171   unsigned CurrentUndexIdx = 0;
172   for (DenseMap<Register, unsigned>::iterator It = ToMerge->RegToChan.begin(),
173       E = ToMerge->RegToChan.end(); It != E; ++It) {
174     DenseMap<Register, unsigned>::const_iterator PosInUntouched =
175         Untouched->RegToChan.find((*It).first);
176     if (PosInUntouched != Untouched->RegToChan.end()) {
177       Remap.push_back(std::pair<unsigned, unsigned>
178           ((*It).second, (*PosInUntouched).second));
179       continue;
180     }
181     if (CurrentUndexIdx >= Untouched->UndefReg.size())
182       return false;
183     Remap.push_back(std::pair<unsigned, unsigned>
184         ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
185   }
186 
187   return true;
188 }
189 
190 static
191 unsigned getReassignedChan(
192     const std::vector<std::pair<unsigned, unsigned>> &RemapChan,
193     unsigned Chan) {
194   for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
195     if (RemapChan[j].first == Chan)
196       return RemapChan[j].second;
197   }
198   llvm_unreachable("Chan wasn't reassigned");
199 }
200 
201 MachineInstr *R600VectorRegMerger::RebuildVector(
202     RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
203     const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
204   Register Reg = RSI->Instr->getOperand(0).getReg();
205   MachineBasicBlock::iterator Pos = RSI->Instr;
206   MachineBasicBlock &MBB = *Pos->getParent();
207   DebugLoc DL = Pos->getDebugLoc();
208 
209   Register SrcVec = BaseRSI->Instr->getOperand(0).getReg();
210   DenseMap<Register, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
211   std::vector<Register> UpdatedUndef = BaseRSI->UndefReg;
212   for (DenseMap<Register, unsigned>::iterator It = RSI->RegToChan.begin(),
213       E = RSI->RegToChan.end(); It != E; ++It) {
214     Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
215     unsigned SubReg = (*It).first;
216     unsigned Swizzle = (*It).second;
217     unsigned Chan = getReassignedChan(RemapChan, Swizzle);
218 
219     MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
220         DstReg)
221         .addReg(SrcVec)
222         .addReg(SubReg)
223         .addImm(Chan);
224     UpdatedRegToChan[SubReg] = Chan;
225     std::vector<Register>::iterator ChanPos = llvm::find(UpdatedUndef, Chan);
226     if (ChanPos != UpdatedUndef.end())
227       UpdatedUndef.erase(ChanPos);
228     assert(!is_contained(UpdatedUndef, Chan) &&
229            "UpdatedUndef shouldn't contain Chan more than once!");
230     LLVM_DEBUG(dbgs() << "    ->"; Tmp->dump(););
231     (void)Tmp;
232     SrcVec = DstReg;
233   }
234   MachineInstr *NewMI =
235       BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);
236   LLVM_DEBUG(dbgs() << "    ->"; NewMI->dump(););
237 
238   LLVM_DEBUG(dbgs() << "  Updating Swizzle:\n");
239   for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
240       E = MRI->use_instr_end(); It != E; ++It) {
241     LLVM_DEBUG(dbgs() << "    "; (*It).dump(); dbgs() << "    ->");
242     SwizzleInput(*It, RemapChan);
243     LLVM_DEBUG((*It).dump());
244   }
245   RSI->Instr->eraseFromParent();
246 
247   // Update RSI
248   RSI->Instr = NewMI;
249   RSI->RegToChan = UpdatedRegToChan;
250   RSI->UndefReg = UpdatedUndef;
251 
252   return NewMI;
253 }
254 
255 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
256   for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
257       E = PreviousRegSeqByReg.end(); It != E; ++It) {
258     std::vector<MachineInstr *> &MIs = (*It).second;
259     MIs.erase(llvm::find(MIs, MI), MIs.end());
260   }
261   for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
262       E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
263     std::vector<MachineInstr *> &MIs = (*It).second;
264     MIs.erase(llvm::find(MIs, MI), MIs.end());
265   }
266 }
267 
268 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
269     const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
270   unsigned Offset;
271   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
272     Offset = 2;
273   else
274     Offset = 3;
275   for (unsigned i = 0; i < 4; i++) {
276     unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
277     for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
278       if (RemapChan[j].first == Swizzle) {
279         MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
280         break;
281       }
282     }
283   }
284 }
285 
286 bool R600VectorRegMerger::areAllUsesSwizzeable(Register Reg) const {
287   for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
288       E = MRI->use_instr_end(); It != E; ++It) {
289     if (!canSwizzle(*It))
290       return false;
291   }
292   return true;
293 }
294 
295 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
296     RegSeqInfo &CompatibleRSI,
297     std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
298   for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
299       MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
300     if (!MOp->isReg())
301       continue;
302     if (PreviousRegSeqByReg[MOp->getReg()].empty())
303       continue;
304     for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
305       CompatibleRSI = PreviousRegSeq[MI];
306       if (RSI == CompatibleRSI)
307         continue;
308       if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
309         return true;
310     }
311   }
312   return false;
313 }
314 
315 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
316     RegSeqInfo &CompatibleRSI,
317     std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
318   unsigned NeededUndefs = 4 - RSI.UndefReg.size();
319   if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
320     return false;
321   std::vector<MachineInstr *> &MIs =
322       PreviousRegSeqByUndefCount[NeededUndefs];
323   CompatibleRSI = PreviousRegSeq[MIs.back()];
324   tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
325   return true;
326 }
327 
328 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
329   for (DenseMap<Register, unsigned>::const_iterator
330   It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
331     PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
332   }
333   PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
334   PreviousRegSeq[RSI.Instr] = RSI;
335 }
336 
337 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
338   if (skipFunction(Fn.getFunction()))
339     return false;
340 
341   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
342   TII = ST.getInstrInfo();
343   MRI = &Fn.getRegInfo();
344 
345   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
346        MBB != MBBe; ++MBB) {
347     MachineBasicBlock *MB = &*MBB;
348     PreviousRegSeq.clear();
349     PreviousRegSeqByReg.clear();
350     PreviousRegSeqByUndefCount.clear();
351 
352     for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
353          MII != MIIE; ++MII) {
354       MachineInstr &MI = *MII;
355       if (MI.getOpcode() != R600::REG_SEQUENCE) {
356         if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
357           Register Reg = MI.getOperand(1).getReg();
358           for (MachineRegisterInfo::def_instr_iterator
359                It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
360                It != E; ++It) {
361             RemoveMI(&(*It));
362           }
363         }
364         continue;
365       }
366 
367       RegSeqInfo RSI(*MRI, &MI);
368 
369       // All uses of MI are swizzeable ?
370       Register Reg = MI.getOperand(0).getReg();
371       if (!areAllUsesSwizzeable(Reg))
372         continue;
373 
374       LLVM_DEBUG({
375         dbgs() << "Trying to optimize ";
376         MI.dump();
377       });
378 
379       RegSeqInfo CandidateRSI;
380       std::vector<std::pair<unsigned, unsigned>> RemapChan;
381       LLVM_DEBUG(dbgs() << "Using common slots...\n";);
382       if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
383         // Remove CandidateRSI mapping
384         RemoveMI(CandidateRSI.Instr);
385         MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
386         trackRSI(RSI);
387         continue;
388       }
389       LLVM_DEBUG(dbgs() << "Using free slots...\n";);
390       RemapChan.clear();
391       if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
392         RemoveMI(CandidateRSI.Instr);
393         MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
394         trackRSI(RSI);
395         continue;
396       }
397       //Failed to merge
398       trackRSI(RSI);
399     }
400   }
401   return false;
402 }
403 
404 llvm::FunctionPass *llvm::createR600VectorRegMerger() {
405   return new R600VectorRegMerger();
406 }
407