xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1*06c3fb27SDimitry Andric //===-------------- GCNRewritePartialRegUses.cpp --------------------------===//
2*06c3fb27SDimitry Andric //
3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric //
7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
8*06c3fb27SDimitry Andric /// \file
9*06c3fb27SDimitry Andric /// RenameIndependentSubregs pass leaves large partially used super registers,
10*06c3fb27SDimitry Andric /// for example:
11*06c3fb27SDimitry Andric ///   undef %0.sub4:VReg_1024 = ...
12*06c3fb27SDimitry Andric ///   %0.sub5:VReg_1024 = ...
13*06c3fb27SDimitry Andric ///   %0.sub6:VReg_1024 = ...
14*06c3fb27SDimitry Andric ///   %0.sub7:VReg_1024 = ...
15*06c3fb27SDimitry Andric ///   use %0.sub4_sub5_sub6_sub7
16*06c3fb27SDimitry Andric ///   use %0.sub6_sub7
17*06c3fb27SDimitry Andric ///
18*06c3fb27SDimitry Andric /// GCNRewritePartialRegUses goes right after RenameIndependentSubregs and
19*06c3fb27SDimitry Andric /// rewrites such partially used super registers with registers of minimal size:
20*06c3fb27SDimitry Andric ///   undef %0.sub0:VReg_128 = ...
21*06c3fb27SDimitry Andric ///   %0.sub1:VReg_128 = ...
22*06c3fb27SDimitry Andric ///   %0.sub2:VReg_128 = ...
23*06c3fb27SDimitry Andric ///   %0.sub3:VReg_128 = ...
24*06c3fb27SDimitry Andric ///   use %0.sub0_sub1_sub2_sub3
25*06c3fb27SDimitry Andric ///   use %0.sub2_sub3
26*06c3fb27SDimitry Andric ///
27*06c3fb27SDimitry Andric /// This allows to avoid subreg lanemasks tracking during register pressure
28*06c3fb27SDimitry Andric /// calculation and creates more possibilities for the code unaware of lanemasks
29*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
30*06c3fb27SDimitry Andric 
31*06c3fb27SDimitry Andric #include "AMDGPU.h"
32*06c3fb27SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
33*06c3fb27SDimitry Andric #include "SIRegisterInfo.h"
34*06c3fb27SDimitry Andric #include "llvm/CodeGen/LiveInterval.h"
35*06c3fb27SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
36*06c3fb27SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
37*06c3fb27SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
38*06c3fb27SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
39*06c3fb27SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
40*06c3fb27SDimitry Andric #include "llvm/InitializePasses.h"
41*06c3fb27SDimitry Andric #include "llvm/Pass.h"
42*06c3fb27SDimitry Andric 
43*06c3fb27SDimitry Andric using namespace llvm;
44*06c3fb27SDimitry Andric 
45*06c3fb27SDimitry Andric #define DEBUG_TYPE "rewrite-partial-reg-uses"
46*06c3fb27SDimitry Andric 
47*06c3fb27SDimitry Andric namespace {
48*06c3fb27SDimitry Andric 
49*06c3fb27SDimitry Andric class GCNRewritePartialRegUses : public MachineFunctionPass {
50*06c3fb27SDimitry Andric public:
51*06c3fb27SDimitry Andric   static char ID;
52*06c3fb27SDimitry Andric   GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}
53*06c3fb27SDimitry Andric 
54*06c3fb27SDimitry Andric   StringRef getPassName() const override {
55*06c3fb27SDimitry Andric     return "Rewrite Partial Register Uses";
56*06c3fb27SDimitry Andric   }
57*06c3fb27SDimitry Andric 
58*06c3fb27SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
59*06c3fb27SDimitry Andric     AU.setPreservesCFG();
60*06c3fb27SDimitry Andric     AU.addPreserved<LiveIntervals>();
61*06c3fb27SDimitry Andric     AU.addPreserved<SlotIndexes>();
62*06c3fb27SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
63*06c3fb27SDimitry Andric   }
64*06c3fb27SDimitry Andric 
65*06c3fb27SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
66*06c3fb27SDimitry Andric 
67*06c3fb27SDimitry Andric private:
68*06c3fb27SDimitry Andric   MachineRegisterInfo *MRI;
69*06c3fb27SDimitry Andric   const SIRegisterInfo *TRI;
70*06c3fb27SDimitry Andric   const TargetInstrInfo *TII;
71*06c3fb27SDimitry Andric   LiveIntervals *LIS;
72*06c3fb27SDimitry Andric 
73*06c3fb27SDimitry Andric   /// Rewrite partially used register Reg by shifting all its subregisters to
74*06c3fb27SDimitry Andric   /// the right and replacing the original register with a register of minimal
75*06c3fb27SDimitry Andric   /// size. Return true if the change has been made.
76*06c3fb27SDimitry Andric   bool rewriteReg(Register Reg) const;
77*06c3fb27SDimitry Andric 
78*06c3fb27SDimitry Andric   /// Value type for SubRegMap below.
79*06c3fb27SDimitry Andric   struct SubRegInfo {
80*06c3fb27SDimitry Andric     /// Register class required to hold the value stored in the SubReg.
81*06c3fb27SDimitry Andric     const TargetRegisterClass *RC;
82*06c3fb27SDimitry Andric 
83*06c3fb27SDimitry Andric     /// Index for the right-shifted subregister. If 0 this is the "covering"
84*06c3fb27SDimitry Andric     /// subreg i.e. subreg that covers all others. Covering subreg becomes the
85*06c3fb27SDimitry Andric     /// whole register after the replacement.
86*06c3fb27SDimitry Andric     unsigned SubReg = AMDGPU::NoSubRegister;
87*06c3fb27SDimitry Andric     SubRegInfo(const TargetRegisterClass *RC_ = nullptr) : RC(RC_) {}
88*06c3fb27SDimitry Andric   };
89*06c3fb27SDimitry Andric 
90*06c3fb27SDimitry Andric   /// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container.
91*06c3fb27SDimitry Andric   typedef SmallDenseMap<unsigned, SubRegInfo> SubRegMap;
92*06c3fb27SDimitry Andric 
93*06c3fb27SDimitry Andric   /// Given register class RC and the set of used subregs as keys in the SubRegs
94*06c3fb27SDimitry Andric   /// map return new register class and indexes of right-shifted subregs as
95*06c3fb27SDimitry Andric   /// values in SubRegs map such that the resulting regclass would contain
96*06c3fb27SDimitry Andric   /// registers of minimal size.
97*06c3fb27SDimitry Andric   const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC,
98*06c3fb27SDimitry Andric                                            SubRegMap &SubRegs) const;
99*06c3fb27SDimitry Andric 
100*06c3fb27SDimitry Andric   /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to
101*06c3fb27SDimitry Andric   /// find new regclass such that:
102*06c3fb27SDimitry Andric   ///   1. It has subregs obtained by shifting each OldSubReg by RShift number
103*06c3fb27SDimitry Andric   ///      of bits to the right. Every "shifted" subreg should have the same
104*06c3fb27SDimitry Andric   ///      SubRegRC. SubRegRC can be null, in this case it initialized using
105*06c3fb27SDimitry Andric   ///      getSubRegisterClass. If CoverSubregIdx is not zero it's a subreg that
106*06c3fb27SDimitry Andric   ///      "covers" all other subregs in pairs. Basically such subreg becomes a
107*06c3fb27SDimitry Andric   ///      whole register.
108*06c3fb27SDimitry Andric   ///   2. Resulting register class contains registers of minimal size but not
109*06c3fb27SDimitry Andric   ///      less than RegNumBits.
110*06c3fb27SDimitry Andric   ///
111*06c3fb27SDimitry Andric   /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
112*06c3fb27SDimitry Andric   /// parameter:
113*06c3fb27SDimitry Andric   ///   OldSubReg - input parameter,
114*06c3fb27SDimitry Andric   ///   SubRegRC  - in/out, should be changed for unknown regclass,
115*06c3fb27SDimitry Andric   ///   NewSubReg - output, contains shifted subregs on return.
116*06c3fb27SDimitry Andric   const TargetRegisterClass *
117*06c3fb27SDimitry Andric   getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
118*06c3fb27SDimitry Andric                                 unsigned RegNumBits, unsigned CoverSubregIdx,
119*06c3fb27SDimitry Andric                                 SubRegMap &SubRegs) const;
120*06c3fb27SDimitry Andric 
121*06c3fb27SDimitry Andric   /// Update live intervals after rewriting OldReg to NewReg with SubRegs map
122*06c3fb27SDimitry Andric   /// describing OldSubReg -> NewSubReg mapping.
123*06c3fb27SDimitry Andric   void updateLiveIntervals(Register OldReg, Register NewReg,
124*06c3fb27SDimitry Andric                            SubRegMap &SubRegs) const;
125*06c3fb27SDimitry Andric 
126*06c3fb27SDimitry Andric   /// Helper methods.
127*06c3fb27SDimitry Andric 
128*06c3fb27SDimitry Andric   /// Return reg class expected by a MO's parent instruction for a given MO.
129*06c3fb27SDimitry Andric   const TargetRegisterClass *getOperandRegClass(MachineOperand &MO) const;
130*06c3fb27SDimitry Andric 
131*06c3fb27SDimitry Andric   /// Find right-shifted by RShift amount version of the SubReg if it exists,
132*06c3fb27SDimitry Andric   /// return 0 otherwise.
133*06c3fb27SDimitry Andric   unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const;
134*06c3fb27SDimitry Andric 
135*06c3fb27SDimitry Andric   /// Find subreg index with a given Offset and Size, return 0 if there is no
136*06c3fb27SDimitry Andric   /// such subregister index. The result is cached in SubRegs data-member.
137*06c3fb27SDimitry Andric   unsigned getSubReg(unsigned Offset, unsigned Size) const;
138*06c3fb27SDimitry Andric 
139*06c3fb27SDimitry Andric   /// Cache for getSubReg method: {Offset, Size} -> SubReg index.
140*06c3fb27SDimitry Andric   mutable SmallDenseMap<std::pair<unsigned, unsigned>, unsigned> SubRegs;
141*06c3fb27SDimitry Andric 
142*06c3fb27SDimitry Andric   /// Return bit mask that contains all register classes that are projected into
143*06c3fb27SDimitry Andric   /// RC by SubRegIdx. The result is cached in SuperRegMasks data-member.
144*06c3fb27SDimitry Andric   const uint32_t *getSuperRegClassMask(const TargetRegisterClass *RC,
145*06c3fb27SDimitry Andric                                        unsigned SubRegIdx) const;
146*06c3fb27SDimitry Andric 
147*06c3fb27SDimitry Andric   /// Cache for getSuperRegClassMask method: { RC, SubRegIdx } -> Class bitmask.
148*06c3fb27SDimitry Andric   mutable SmallDenseMap<std::pair<const TargetRegisterClass *, unsigned>,
149*06c3fb27SDimitry Andric                         const uint32_t *>
150*06c3fb27SDimitry Andric       SuperRegMasks;
151*06c3fb27SDimitry Andric 
152*06c3fb27SDimitry Andric   /// Return bitmask containing all allocatable register classes with registers
153*06c3fb27SDimitry Andric   /// aligned at AlignNumBits. The result is cached in
154*06c3fb27SDimitry Andric   /// AllocatableAndAlignedRegClassMasks data-member.
155*06c3fb27SDimitry Andric   const BitVector &
156*06c3fb27SDimitry Andric   getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const;
157*06c3fb27SDimitry Andric 
158*06c3fb27SDimitry Andric   /// Cache for getAllocatableAndAlignedRegClassMask method:
159*06c3fb27SDimitry Andric   ///   AlignNumBits -> Class bitmask.
160*06c3fb27SDimitry Andric   mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;
161*06c3fb27SDimitry Andric };
162*06c3fb27SDimitry Andric 
163*06c3fb27SDimitry Andric } // end anonymous namespace
164*06c3fb27SDimitry Andric 
165*06c3fb27SDimitry Andric // TODO: move this to the tablegen and use binary search by Offset.
166*06c3fb27SDimitry Andric unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
167*06c3fb27SDimitry Andric                                              unsigned Size) const {
168*06c3fb27SDimitry Andric   const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
169*06c3fb27SDimitry Andric   if (Inserted) {
170*06c3fb27SDimitry Andric     for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
171*06c3fb27SDimitry Andric       if (TRI->getSubRegIdxOffset(Idx) == Offset &&
172*06c3fb27SDimitry Andric           TRI->getSubRegIdxSize(Idx) == Size) {
173*06c3fb27SDimitry Andric         I->second = Idx;
174*06c3fb27SDimitry Andric         break;
175*06c3fb27SDimitry Andric       }
176*06c3fb27SDimitry Andric     }
177*06c3fb27SDimitry Andric   }
178*06c3fb27SDimitry Andric   return I->second;
179*06c3fb27SDimitry Andric }
180*06c3fb27SDimitry Andric 
181*06c3fb27SDimitry Andric unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
182*06c3fb27SDimitry Andric                                                unsigned RShift) const {
183*06c3fb27SDimitry Andric   unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
184*06c3fb27SDimitry Andric   return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
185*06c3fb27SDimitry Andric }
186*06c3fb27SDimitry Andric 
187*06c3fb27SDimitry Andric const uint32_t *
188*06c3fb27SDimitry Andric GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
189*06c3fb27SDimitry Andric                                                unsigned SubRegIdx) const {
190*06c3fb27SDimitry Andric   const auto [I, Inserted] =
191*06c3fb27SDimitry Andric       SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
192*06c3fb27SDimitry Andric   if (Inserted) {
193*06c3fb27SDimitry Andric     for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) {
194*06c3fb27SDimitry Andric       if (RCI.getSubReg() == SubRegIdx) {
195*06c3fb27SDimitry Andric         I->second = RCI.getMask();
196*06c3fb27SDimitry Andric         break;
197*06c3fb27SDimitry Andric       }
198*06c3fb27SDimitry Andric     }
199*06c3fb27SDimitry Andric   }
200*06c3fb27SDimitry Andric   return I->second;
201*06c3fb27SDimitry Andric }
202*06c3fb27SDimitry Andric 
203*06c3fb27SDimitry Andric const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
204*06c3fb27SDimitry Andric     unsigned AlignNumBits) const {
205*06c3fb27SDimitry Andric   const auto [I, Inserted] =
206*06c3fb27SDimitry Andric       AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
207*06c3fb27SDimitry Andric   if (Inserted) {
208*06c3fb27SDimitry Andric     BitVector &BV = I->second;
209*06c3fb27SDimitry Andric     BV.resize(TRI->getNumRegClasses());
210*06c3fb27SDimitry Andric     for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) {
211*06c3fb27SDimitry Andric       auto *RC = TRI->getRegClass(ClassID);
212*06c3fb27SDimitry Andric       if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits))
213*06c3fb27SDimitry Andric         BV.set(ClassID);
214*06c3fb27SDimitry Andric     }
215*06c3fb27SDimitry Andric   }
216*06c3fb27SDimitry Andric   return I->second;
217*06c3fb27SDimitry Andric }
218*06c3fb27SDimitry Andric 
219*06c3fb27SDimitry Andric const TargetRegisterClass *
220*06c3fb27SDimitry Andric GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
221*06c3fb27SDimitry Andric     const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
222*06c3fb27SDimitry Andric     unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
223*06c3fb27SDimitry Andric 
224*06c3fb27SDimitry Andric   unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC);
225*06c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "  Shift " << RShift << ", reg align " << RCAlign
226*06c3fb27SDimitry Andric                     << '\n');
227*06c3fb27SDimitry Andric 
228*06c3fb27SDimitry Andric   BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
229*06c3fb27SDimitry Andric   for (auto &[OldSubReg, SRI] : SubRegs) {
230*06c3fb27SDimitry Andric     auto &[SubRegRC, NewSubReg] = SRI;
231*06c3fb27SDimitry Andric 
232*06c3fb27SDimitry Andric     // Register class may be unknown, for example:
233*06c3fb27SDimitry Andric     //   undef %0.sub4:sgpr_1024 = S_MOV_B32 01
234*06c3fb27SDimitry Andric     //   %0.sub5:sgpr_1024 = S_MOV_B32 02
235*06c3fb27SDimitry Andric     //   %1:vreg_64 = COPY %0.sub4_sub5
236*06c3fb27SDimitry Andric     // Register classes for subregs 'sub4' and 'sub5' are known from the
237*06c3fb27SDimitry Andric     // description of destination operand of S_MOV_B32 instruction but the
238*06c3fb27SDimitry Andric     // class for the subreg 'sub4_sub5' isn't specified by the COPY instruction.
239*06c3fb27SDimitry Andric     if (!SubRegRC)
240*06c3fb27SDimitry Andric       SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);
241*06c3fb27SDimitry Andric 
242*06c3fb27SDimitry Andric     if (!SubRegRC)
243*06c3fb27SDimitry Andric       return nullptr;
244*06c3fb27SDimitry Andric 
245*06c3fb27SDimitry Andric     LLVM_DEBUG(dbgs() << "  " << TRI->getSubRegIndexName(OldSubReg) << ':'
246*06c3fb27SDimitry Andric                       << TRI->getRegClassName(SubRegRC)
247*06c3fb27SDimitry Andric                       << (SubRegRC->isAllocatable() ? "" : " not alloc")
248*06c3fb27SDimitry Andric                       << " -> ");
249*06c3fb27SDimitry Andric 
250*06c3fb27SDimitry Andric     if (OldSubReg == CoverSubregIdx) {
251*06c3fb27SDimitry Andric       NewSubReg = AMDGPU::NoSubRegister;
252*06c3fb27SDimitry Andric       LLVM_DEBUG(dbgs() << "whole reg");
253*06c3fb27SDimitry Andric     } else {
254*06c3fb27SDimitry Andric       NewSubReg = shiftSubReg(OldSubReg, RShift);
255*06c3fb27SDimitry Andric       if (!NewSubReg) {
256*06c3fb27SDimitry Andric         LLVM_DEBUG(dbgs() << "none\n");
257*06c3fb27SDimitry Andric         return nullptr;
258*06c3fb27SDimitry Andric       }
259*06c3fb27SDimitry Andric       LLVM_DEBUG(dbgs() << TRI->getSubRegIndexName(NewSubReg));
260*06c3fb27SDimitry Andric     }
261*06c3fb27SDimitry Andric 
262*06c3fb27SDimitry Andric     const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
263*06c3fb27SDimitry Andric                                      : SubRegRC->getSubClassMask();
264*06c3fb27SDimitry Andric     if (!Mask)
265*06c3fb27SDimitry Andric       llvm_unreachable("no register class mask?");
266*06c3fb27SDimitry Andric 
267*06c3fb27SDimitry Andric     ClassMask.clearBitsNotInMask(Mask);
268*06c3fb27SDimitry Andric     // Don't try to early exit because checking if ClassMask has set bits isn't
269*06c3fb27SDimitry Andric     // that cheap and we expect it to pass in most cases.
270*06c3fb27SDimitry Andric     LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n');
271*06c3fb27SDimitry Andric   }
272*06c3fb27SDimitry Andric 
273*06c3fb27SDimitry Andric   // ClassMask is the set of all register classes such that each class is
274*06c3fb27SDimitry Andric   // allocatable, aligned, has all shifted subregs and each subreg has required
275*06c3fb27SDimitry Andric   // register class (see SubRegRC above). Now select first (that is largest)
276*06c3fb27SDimitry Andric   // register class with registers of minimal but not less than RegNumBits size.
277*06c3fb27SDimitry Andric   // We have to check register size because we may encounter classes of smaller
278*06c3fb27SDimitry Andric   // registers like VReg_1 in some situations.
279*06c3fb27SDimitry Andric   const TargetRegisterClass *MinRC = nullptr;
280*06c3fb27SDimitry Andric   unsigned MinNumBits = std::numeric_limits<unsigned>::max();
281*06c3fb27SDimitry Andric   for (unsigned ClassID : ClassMask.set_bits()) {
282*06c3fb27SDimitry Andric     auto *RC = TRI->getRegClass(ClassID);
283*06c3fb27SDimitry Andric     unsigned NumBits = TRI->getRegSizeInBits(*RC);
284*06c3fb27SDimitry Andric     if (NumBits < MinNumBits && NumBits >= RegNumBits) {
285*06c3fb27SDimitry Andric       MinNumBits = NumBits;
286*06c3fb27SDimitry Andric       MinRC = RC;
287*06c3fb27SDimitry Andric     }
288*06c3fb27SDimitry Andric     if (MinNumBits == RegNumBits)
289*06c3fb27SDimitry Andric       break;
290*06c3fb27SDimitry Andric   }
291*06c3fb27SDimitry Andric #ifndef NDEBUG
292*06c3fb27SDimitry Andric   if (MinRC) {
293*06c3fb27SDimitry Andric     assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign));
294*06c3fb27SDimitry Andric     for (auto [SubReg, SRI] : SubRegs)
295*06c3fb27SDimitry Andric       // Check that all registers in MinRC support SRI.SubReg subregister.
296*06c3fb27SDimitry Andric       assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
297*06c3fb27SDimitry Andric   }
298*06c3fb27SDimitry Andric #endif
299*06c3fb27SDimitry Andric   // There might be zero RShift - in this case we just trying to find smaller
300*06c3fb27SDimitry Andric   // register.
301*06c3fb27SDimitry Andric   return (MinRC != RC || RShift != 0) ? MinRC : nullptr;
302*06c3fb27SDimitry Andric }
303*06c3fb27SDimitry Andric 
304*06c3fb27SDimitry Andric const TargetRegisterClass *
305*06c3fb27SDimitry Andric GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
306*06c3fb27SDimitry Andric                                         SubRegMap &SubRegs) const {
307*06c3fb27SDimitry Andric   unsigned CoverSubreg = AMDGPU::NoSubRegister;
308*06c3fb27SDimitry Andric   unsigned Offset = std::numeric_limits<unsigned>::max();
309*06c3fb27SDimitry Andric   unsigned End = 0;
310*06c3fb27SDimitry Andric   for (auto [SubReg, SRI] : SubRegs) {
311*06c3fb27SDimitry Andric     unsigned SubRegOffset = TRI->getSubRegIdxOffset(SubReg);
312*06c3fb27SDimitry Andric     unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(SubReg);
313*06c3fb27SDimitry Andric     if (SubRegOffset < Offset) {
314*06c3fb27SDimitry Andric       Offset = SubRegOffset;
315*06c3fb27SDimitry Andric       CoverSubreg = AMDGPU::NoSubRegister;
316*06c3fb27SDimitry Andric     }
317*06c3fb27SDimitry Andric     if (SubRegEnd > End) {
318*06c3fb27SDimitry Andric       End = SubRegEnd;
319*06c3fb27SDimitry Andric       CoverSubreg = AMDGPU::NoSubRegister;
320*06c3fb27SDimitry Andric     }
321*06c3fb27SDimitry Andric     if (SubRegOffset == Offset && SubRegEnd == End)
322*06c3fb27SDimitry Andric       CoverSubreg = SubReg;
323*06c3fb27SDimitry Andric   }
324*06c3fb27SDimitry Andric   // If covering subreg is found shift everything so the covering subreg would
325*06c3fb27SDimitry Andric   // be in the rightmost position.
326*06c3fb27SDimitry Andric   if (CoverSubreg != AMDGPU::NoSubRegister)
327*06c3fb27SDimitry Andric     return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg,
328*06c3fb27SDimitry Andric                                          SubRegs);
329*06c3fb27SDimitry Andric 
330*06c3fb27SDimitry Andric   // Otherwise find subreg with maximum required alignment and shift it and all
331*06c3fb27SDimitry Andric   // other subregs to the rightmost possible position with respect to the
332*06c3fb27SDimitry Andric   // alignment.
333*06c3fb27SDimitry Andric   unsigned MaxAlign = 0;
334*06c3fb27SDimitry Andric   for (auto [SubReg, SRI] : SubRegs)
335*06c3fb27SDimitry Andric     MaxAlign = std::max(MaxAlign, TRI->getSubRegAlignmentNumBits(RC, SubReg));
336*06c3fb27SDimitry Andric 
337*06c3fb27SDimitry Andric   unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
338*06c3fb27SDimitry Andric   for (auto [SubReg, SRI] : SubRegs) {
339*06c3fb27SDimitry Andric     if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign)
340*06c3fb27SDimitry Andric       continue;
341*06c3fb27SDimitry Andric     FirstMaxAlignedSubRegOffset =
342*06c3fb27SDimitry Andric         std::min(FirstMaxAlignedSubRegOffset, TRI->getSubRegIdxOffset(SubReg));
343*06c3fb27SDimitry Andric     if (FirstMaxAlignedSubRegOffset == Offset)
344*06c3fb27SDimitry Andric       break;
345*06c3fb27SDimitry Andric   }
346*06c3fb27SDimitry Andric 
347*06c3fb27SDimitry Andric   unsigned NewOffsetOfMaxAlignedSubReg =
348*06c3fb27SDimitry Andric       alignTo(FirstMaxAlignedSubRegOffset - Offset, MaxAlign);
349*06c3fb27SDimitry Andric 
350*06c3fb27SDimitry Andric   if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
351*06c3fb27SDimitry Andric     llvm_unreachable("misaligned subreg");
352*06c3fb27SDimitry Andric 
353*06c3fb27SDimitry Andric   unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
354*06c3fb27SDimitry Andric   return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs);
355*06c3fb27SDimitry Andric }
356*06c3fb27SDimitry Andric 
357*06c3fb27SDimitry Andric // Only the subrange's lanemasks of the original interval need to be modified.
358*06c3fb27SDimitry Andric // Subrange for a covering subreg becomes the main range.
359*06c3fb27SDimitry Andric void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
360*06c3fb27SDimitry Andric                                                    Register NewReg,
361*06c3fb27SDimitry Andric                                                    SubRegMap &SubRegs) const {
362*06c3fb27SDimitry Andric   if (!LIS->hasInterval(OldReg))
363*06c3fb27SDimitry Andric     return;
364*06c3fb27SDimitry Andric 
365*06c3fb27SDimitry Andric   auto &OldLI = LIS->getInterval(OldReg);
366*06c3fb27SDimitry Andric   auto &NewLI = LIS->createEmptyInterval(NewReg);
367*06c3fb27SDimitry Andric 
368*06c3fb27SDimitry Andric   auto &Allocator = LIS->getVNInfoAllocator();
369*06c3fb27SDimitry Andric   NewLI.setWeight(OldLI.weight());
370*06c3fb27SDimitry Andric 
371*06c3fb27SDimitry Andric   for (auto &SR : OldLI.subranges()) {
372*06c3fb27SDimitry Andric     auto I = find_if(SubRegs, [&](auto &P) {
373*06c3fb27SDimitry Andric       return SR.LaneMask == TRI->getSubRegIndexLaneMask(P.first);
374*06c3fb27SDimitry Andric     });
375*06c3fb27SDimitry Andric 
376*06c3fb27SDimitry Andric     if (I == SubRegs.end()) {
377*06c3fb27SDimitry Andric       // There might be a situation when subranges don't exactly match used
378*06c3fb27SDimitry Andric       // subregs, for example:
379*06c3fb27SDimitry Andric       // %120 [160r,1392r:0) 0@160r
380*06c3fb27SDimitry Andric       //    L000000000000C000 [160r,1392r:0) 0@160r
381*06c3fb27SDimitry Andric       //    L0000000000003000 [160r,1392r:0) 0@160r
382*06c3fb27SDimitry Andric       //    L0000000000000C00 [160r,1392r:0) 0@160r
383*06c3fb27SDimitry Andric       //    L0000000000000300 [160r,1392r:0) 0@160r
384*06c3fb27SDimitry Andric       //    L0000000000000003 [160r,1104r:0) 0@160r
385*06c3fb27SDimitry Andric       //    L000000000000000C [160r,1104r:0) 0@160r
386*06c3fb27SDimitry Andric       //    L0000000000000030 [160r,1104r:0) 0@160r
387*06c3fb27SDimitry Andric       //    L00000000000000C0 [160r,1104r:0) 0@160r
388*06c3fb27SDimitry Andric       // but used subregs are:
389*06c3fb27SDimitry Andric       //    sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, L000000000000FFFF
390*06c3fb27SDimitry Andric       //    sub0_sub1_sub2_sub3, L00000000000000FF
391*06c3fb27SDimitry Andric       //    sub4_sub5_sub6_sub7, L000000000000FF00
392*06c3fb27SDimitry Andric       // In this example subregs sub0_sub1_sub2_sub3 and sub4_sub5_sub6_sub7
393*06c3fb27SDimitry Andric       // have several subranges with the same lifetime. For such cases just
394*06c3fb27SDimitry Andric       // recreate the interval.
395*06c3fb27SDimitry Andric       LIS->removeInterval(OldReg);
396*06c3fb27SDimitry Andric       LIS->removeInterval(NewReg);
397*06c3fb27SDimitry Andric       LIS->createAndComputeVirtRegInterval(NewReg);
398*06c3fb27SDimitry Andric       return;
399*06c3fb27SDimitry Andric     }
400*06c3fb27SDimitry Andric 
401*06c3fb27SDimitry Andric     if (unsigned NewSubReg = I->second.SubReg)
402*06c3fb27SDimitry Andric       NewLI.createSubRangeFrom(Allocator,
403*06c3fb27SDimitry Andric                                TRI->getSubRegIndexLaneMask(NewSubReg), SR);
404*06c3fb27SDimitry Andric     else // This is the covering subreg (0 index) - set it as main range.
405*06c3fb27SDimitry Andric       NewLI.assign(SR, Allocator);
406*06c3fb27SDimitry Andric 
407*06c3fb27SDimitry Andric     SubRegs.erase(I);
408*06c3fb27SDimitry Andric   }
409*06c3fb27SDimitry Andric   if (NewLI.empty())
410*06c3fb27SDimitry Andric     NewLI.assign(OldLI, Allocator);
411*06c3fb27SDimitry Andric   NewLI.verify(MRI);
412*06c3fb27SDimitry Andric   LIS->removeInterval(OldReg);
413*06c3fb27SDimitry Andric }
414*06c3fb27SDimitry Andric 
415*06c3fb27SDimitry Andric const TargetRegisterClass *
416*06c3fb27SDimitry Andric GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
417*06c3fb27SDimitry Andric   MachineInstr *MI = MO.getParent();
418*06c3fb27SDimitry Andric   return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
419*06c3fb27SDimitry Andric                           *MI->getParent()->getParent());
420*06c3fb27SDimitry Andric }
421*06c3fb27SDimitry Andric 
422*06c3fb27SDimitry Andric bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
423*06c3fb27SDimitry Andric   auto Range = MRI->reg_nodbg_operands(Reg);
424*06c3fb27SDimitry Andric   if (Range.begin() == Range.end())
425*06c3fb27SDimitry Andric     return false;
426*06c3fb27SDimitry Andric 
427*06c3fb27SDimitry Andric   for (MachineOperand &MO : Range) {
428*06c3fb27SDimitry Andric     if (MO.getSubReg() == AMDGPU::NoSubRegister) // Whole reg used, quit.
429*06c3fb27SDimitry Andric       return false;
430*06c3fb27SDimitry Andric   }
431*06c3fb27SDimitry Andric 
432*06c3fb27SDimitry Andric   auto *RC = MRI->getRegClass(Reg);
433*06c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
434*06c3fb27SDimitry Andric                     << ':' << TRI->getRegClassName(RC) << '\n');
435*06c3fb27SDimitry Andric 
436*06c3fb27SDimitry Andric   // Collect used subregs and constrained reg classes infered from instruction
437*06c3fb27SDimitry Andric   // operands.
438*06c3fb27SDimitry Andric   SubRegMap SubRegs;
439*06c3fb27SDimitry Andric   for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
440*06c3fb27SDimitry Andric     assert(MO.getSubReg() != AMDGPU::NoSubRegister);
441*06c3fb27SDimitry Andric     auto *OpDescRC = getOperandRegClass(MO);
442*06c3fb27SDimitry Andric     const auto [I, Inserted] = SubRegs.try_emplace(MO.getSubReg(), OpDescRC);
443*06c3fb27SDimitry Andric     if (!Inserted && OpDescRC) {
444*06c3fb27SDimitry Andric       SubRegInfo &SRI = I->second;
445*06c3fb27SDimitry Andric       SRI.RC = SRI.RC ? TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC;
446*06c3fb27SDimitry Andric       if (!SRI.RC) {
447*06c3fb27SDimitry Andric         LLVM_DEBUG(dbgs() << "  Couldn't find common target regclass\n");
448*06c3fb27SDimitry Andric         return false;
449*06c3fb27SDimitry Andric       }
450*06c3fb27SDimitry Andric     }
451*06c3fb27SDimitry Andric   }
452*06c3fb27SDimitry Andric 
453*06c3fb27SDimitry Andric   auto *NewRC = getMinSizeReg(RC, SubRegs);
454*06c3fb27SDimitry Andric   if (!NewRC) {
455*06c3fb27SDimitry Andric     LLVM_DEBUG(dbgs() << "  No improvement achieved\n");
456*06c3fb27SDimitry Andric     return false;
457*06c3fb27SDimitry Andric   }
458*06c3fb27SDimitry Andric 
459*06c3fb27SDimitry Andric   Register NewReg = MRI->createVirtualRegister(NewRC);
460*06c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "  Success " << printReg(Reg, TRI) << ':'
461*06c3fb27SDimitry Andric                     << TRI->getRegClassName(RC) << " -> "
462*06c3fb27SDimitry Andric                     << printReg(NewReg, TRI) << ':'
463*06c3fb27SDimitry Andric                     << TRI->getRegClassName(NewRC) << '\n');
464*06c3fb27SDimitry Andric 
465*06c3fb27SDimitry Andric   for (auto &MO : make_early_inc_range(MRI->reg_operands(Reg))) {
466*06c3fb27SDimitry Andric     MO.setReg(NewReg);
467*06c3fb27SDimitry Andric     // Debug info can refer to the whole reg, just leave it as it is for now.
468*06c3fb27SDimitry Andric     // TODO: create some DI shift expression?
469*06c3fb27SDimitry Andric     if (MO.isDebug() && MO.getSubReg() == 0)
470*06c3fb27SDimitry Andric       continue;
471*06c3fb27SDimitry Andric     unsigned SubReg = SubRegs[MO.getSubReg()].SubReg;
472*06c3fb27SDimitry Andric     MO.setSubReg(SubReg);
473*06c3fb27SDimitry Andric     if (SubReg == AMDGPU::NoSubRegister && MO.isDef())
474*06c3fb27SDimitry Andric       MO.setIsUndef(false);
475*06c3fb27SDimitry Andric   }
476*06c3fb27SDimitry Andric 
477*06c3fb27SDimitry Andric   if (LIS)
478*06c3fb27SDimitry Andric     updateLiveIntervals(Reg, NewReg, SubRegs);
479*06c3fb27SDimitry Andric 
480*06c3fb27SDimitry Andric   return true;
481*06c3fb27SDimitry Andric }
482*06c3fb27SDimitry Andric 
483*06c3fb27SDimitry Andric bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
484*06c3fb27SDimitry Andric   MRI = &MF.getRegInfo();
485*06c3fb27SDimitry Andric   TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
486*06c3fb27SDimitry Andric   TII = MF.getSubtarget().getInstrInfo();
487*06c3fb27SDimitry Andric   LIS = getAnalysisIfAvailable<LiveIntervals>();
488*06c3fb27SDimitry Andric   bool Changed = false;
489*06c3fb27SDimitry Andric   for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
490*06c3fb27SDimitry Andric     Changed |= rewriteReg(Register::index2VirtReg(I));
491*06c3fb27SDimitry Andric   }
492*06c3fb27SDimitry Andric   return Changed;
493*06c3fb27SDimitry Andric }
494*06c3fb27SDimitry Andric 
495*06c3fb27SDimitry Andric char GCNRewritePartialRegUses::ID;
496*06c3fb27SDimitry Andric 
497*06c3fb27SDimitry Andric char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
498*06c3fb27SDimitry Andric 
499*06c3fb27SDimitry Andric INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
500*06c3fb27SDimitry Andric                       "Rewrite Partial Register Uses", false, false)
501*06c3fb27SDimitry Andric INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
502*06c3fb27SDimitry Andric                     "Rewrite Partial Register Uses", false, false)
503