1*06c3fb27SDimitry Andric //===-------------- GCNRewritePartialRegUses.cpp --------------------------===// 2*06c3fb27SDimitry Andric // 3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*06c3fb27SDimitry Andric // 7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 8*06c3fb27SDimitry Andric /// \file 9*06c3fb27SDimitry Andric /// RenameIndependentSubregs pass leaves large partially used super registers, 10*06c3fb27SDimitry Andric /// for example: 11*06c3fb27SDimitry Andric /// undef %0.sub4:VReg_1024 = ... 12*06c3fb27SDimitry Andric /// %0.sub5:VReg_1024 = ... 13*06c3fb27SDimitry Andric /// %0.sub6:VReg_1024 = ... 14*06c3fb27SDimitry Andric /// %0.sub7:VReg_1024 = ... 15*06c3fb27SDimitry Andric /// use %0.sub4_sub5_sub6_sub7 16*06c3fb27SDimitry Andric /// use %0.sub6_sub7 17*06c3fb27SDimitry Andric /// 18*06c3fb27SDimitry Andric /// GCNRewritePartialRegUses goes right after RenameIndependentSubregs and 19*06c3fb27SDimitry Andric /// rewrites such partially used super registers with registers of minimal size: 20*06c3fb27SDimitry Andric /// undef %0.sub0:VReg_128 = ... 21*06c3fb27SDimitry Andric /// %0.sub1:VReg_128 = ... 22*06c3fb27SDimitry Andric /// %0.sub2:VReg_128 = ... 23*06c3fb27SDimitry Andric /// %0.sub3:VReg_128 = ... 24*06c3fb27SDimitry Andric /// use %0.sub0_sub1_sub2_sub3 25*06c3fb27SDimitry Andric /// use %0.sub2_sub3 26*06c3fb27SDimitry Andric /// 27*06c3fb27SDimitry Andric /// This allows to avoid subreg lanemasks tracking during register pressure 28*06c3fb27SDimitry Andric /// calculation and creates more possibilities for the code unaware of lanemasks 29*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 30*06c3fb27SDimitry Andric 31*06c3fb27SDimitry Andric #include "AMDGPU.h" 32*06c3fb27SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 33*06c3fb27SDimitry Andric #include "SIRegisterInfo.h" 34*06c3fb27SDimitry Andric #include "llvm/CodeGen/LiveInterval.h" 35*06c3fb27SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 36*06c3fb27SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 37*06c3fb27SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 38*06c3fb27SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 39*06c3fb27SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 40*06c3fb27SDimitry Andric #include "llvm/InitializePasses.h" 41*06c3fb27SDimitry Andric #include "llvm/Pass.h" 42*06c3fb27SDimitry Andric 43*06c3fb27SDimitry Andric using namespace llvm; 44*06c3fb27SDimitry Andric 45*06c3fb27SDimitry Andric #define DEBUG_TYPE "rewrite-partial-reg-uses" 46*06c3fb27SDimitry Andric 47*06c3fb27SDimitry Andric namespace { 48*06c3fb27SDimitry Andric 49*06c3fb27SDimitry Andric class GCNRewritePartialRegUses : public MachineFunctionPass { 50*06c3fb27SDimitry Andric public: 51*06c3fb27SDimitry Andric static char ID; 52*06c3fb27SDimitry Andric GCNRewritePartialRegUses() : MachineFunctionPass(ID) {} 53*06c3fb27SDimitry Andric 54*06c3fb27SDimitry Andric StringRef getPassName() const override { 55*06c3fb27SDimitry Andric return "Rewrite Partial Register Uses"; 56*06c3fb27SDimitry Andric } 57*06c3fb27SDimitry Andric 58*06c3fb27SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 59*06c3fb27SDimitry Andric AU.setPreservesCFG(); 60*06c3fb27SDimitry Andric AU.addPreserved<LiveIntervals>(); 61*06c3fb27SDimitry Andric AU.addPreserved<SlotIndexes>(); 62*06c3fb27SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 63*06c3fb27SDimitry Andric } 64*06c3fb27SDimitry Andric 65*06c3fb27SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 66*06c3fb27SDimitry Andric 67*06c3fb27SDimitry Andric private: 68*06c3fb27SDimitry Andric MachineRegisterInfo *MRI; 69*06c3fb27SDimitry Andric const SIRegisterInfo *TRI; 70*06c3fb27SDimitry Andric const TargetInstrInfo *TII; 71*06c3fb27SDimitry Andric LiveIntervals *LIS; 72*06c3fb27SDimitry Andric 73*06c3fb27SDimitry Andric /// Rewrite partially used register Reg by shifting all its subregisters to 74*06c3fb27SDimitry Andric /// the right and replacing the original register with a register of minimal 75*06c3fb27SDimitry Andric /// size. Return true if the change has been made. 76*06c3fb27SDimitry Andric bool rewriteReg(Register Reg) const; 77*06c3fb27SDimitry Andric 78*06c3fb27SDimitry Andric /// Value type for SubRegMap below. 79*06c3fb27SDimitry Andric struct SubRegInfo { 80*06c3fb27SDimitry Andric /// Register class required to hold the value stored in the SubReg. 81*06c3fb27SDimitry Andric const TargetRegisterClass *RC; 82*06c3fb27SDimitry Andric 83*06c3fb27SDimitry Andric /// Index for the right-shifted subregister. If 0 this is the "covering" 84*06c3fb27SDimitry Andric /// subreg i.e. subreg that covers all others. Covering subreg becomes the 85*06c3fb27SDimitry Andric /// whole register after the replacement. 86*06c3fb27SDimitry Andric unsigned SubReg = AMDGPU::NoSubRegister; 87*06c3fb27SDimitry Andric SubRegInfo(const TargetRegisterClass *RC_ = nullptr) : RC(RC_) {} 88*06c3fb27SDimitry Andric }; 89*06c3fb27SDimitry Andric 90*06c3fb27SDimitry Andric /// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container. 91*06c3fb27SDimitry Andric typedef SmallDenseMap<unsigned, SubRegInfo> SubRegMap; 92*06c3fb27SDimitry Andric 93*06c3fb27SDimitry Andric /// Given register class RC and the set of used subregs as keys in the SubRegs 94*06c3fb27SDimitry Andric /// map return new register class and indexes of right-shifted subregs as 95*06c3fb27SDimitry Andric /// values in SubRegs map such that the resulting regclass would contain 96*06c3fb27SDimitry Andric /// registers of minimal size. 97*06c3fb27SDimitry Andric const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC, 98*06c3fb27SDimitry Andric SubRegMap &SubRegs) const; 99*06c3fb27SDimitry Andric 100*06c3fb27SDimitry Andric /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to 101*06c3fb27SDimitry Andric /// find new regclass such that: 102*06c3fb27SDimitry Andric /// 1. It has subregs obtained by shifting each OldSubReg by RShift number 103*06c3fb27SDimitry Andric /// of bits to the right. Every "shifted" subreg should have the same 104*06c3fb27SDimitry Andric /// SubRegRC. SubRegRC can be null, in this case it initialized using 105*06c3fb27SDimitry Andric /// getSubRegisterClass. If CoverSubregIdx is not zero it's a subreg that 106*06c3fb27SDimitry Andric /// "covers" all other subregs in pairs. Basically such subreg becomes a 107*06c3fb27SDimitry Andric /// whole register. 108*06c3fb27SDimitry Andric /// 2. Resulting register class contains registers of minimal size but not 109*06c3fb27SDimitry Andric /// less than RegNumBits. 110*06c3fb27SDimitry Andric /// 111*06c3fb27SDimitry Andric /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out 112*06c3fb27SDimitry Andric /// parameter: 113*06c3fb27SDimitry Andric /// OldSubReg - input parameter, 114*06c3fb27SDimitry Andric /// SubRegRC - in/out, should be changed for unknown regclass, 115*06c3fb27SDimitry Andric /// NewSubReg - output, contains shifted subregs on return. 116*06c3fb27SDimitry Andric const TargetRegisterClass * 117*06c3fb27SDimitry Andric getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift, 118*06c3fb27SDimitry Andric unsigned RegNumBits, unsigned CoverSubregIdx, 119*06c3fb27SDimitry Andric SubRegMap &SubRegs) const; 120*06c3fb27SDimitry Andric 121*06c3fb27SDimitry Andric /// Update live intervals after rewriting OldReg to NewReg with SubRegs map 122*06c3fb27SDimitry Andric /// describing OldSubReg -> NewSubReg mapping. 123*06c3fb27SDimitry Andric void updateLiveIntervals(Register OldReg, Register NewReg, 124*06c3fb27SDimitry Andric SubRegMap &SubRegs) const; 125*06c3fb27SDimitry Andric 126*06c3fb27SDimitry Andric /// Helper methods. 127*06c3fb27SDimitry Andric 128*06c3fb27SDimitry Andric /// Return reg class expected by a MO's parent instruction for a given MO. 129*06c3fb27SDimitry Andric const TargetRegisterClass *getOperandRegClass(MachineOperand &MO) const; 130*06c3fb27SDimitry Andric 131*06c3fb27SDimitry Andric /// Find right-shifted by RShift amount version of the SubReg if it exists, 132*06c3fb27SDimitry Andric /// return 0 otherwise. 133*06c3fb27SDimitry Andric unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const; 134*06c3fb27SDimitry Andric 135*06c3fb27SDimitry Andric /// Find subreg index with a given Offset and Size, return 0 if there is no 136*06c3fb27SDimitry Andric /// such subregister index. The result is cached in SubRegs data-member. 137*06c3fb27SDimitry Andric unsigned getSubReg(unsigned Offset, unsigned Size) const; 138*06c3fb27SDimitry Andric 139*06c3fb27SDimitry Andric /// Cache for getSubReg method: {Offset, Size} -> SubReg index. 140*06c3fb27SDimitry Andric mutable SmallDenseMap<std::pair<unsigned, unsigned>, unsigned> SubRegs; 141*06c3fb27SDimitry Andric 142*06c3fb27SDimitry Andric /// Return bit mask that contains all register classes that are projected into 143*06c3fb27SDimitry Andric /// RC by SubRegIdx. The result is cached in SuperRegMasks data-member. 144*06c3fb27SDimitry Andric const uint32_t *getSuperRegClassMask(const TargetRegisterClass *RC, 145*06c3fb27SDimitry Andric unsigned SubRegIdx) const; 146*06c3fb27SDimitry Andric 147*06c3fb27SDimitry Andric /// Cache for getSuperRegClassMask method: { RC, SubRegIdx } -> Class bitmask. 148*06c3fb27SDimitry Andric mutable SmallDenseMap<std::pair<const TargetRegisterClass *, unsigned>, 149*06c3fb27SDimitry Andric const uint32_t *> 150*06c3fb27SDimitry Andric SuperRegMasks; 151*06c3fb27SDimitry Andric 152*06c3fb27SDimitry Andric /// Return bitmask containing all allocatable register classes with registers 153*06c3fb27SDimitry Andric /// aligned at AlignNumBits. The result is cached in 154*06c3fb27SDimitry Andric /// AllocatableAndAlignedRegClassMasks data-member. 155*06c3fb27SDimitry Andric const BitVector & 156*06c3fb27SDimitry Andric getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const; 157*06c3fb27SDimitry Andric 158*06c3fb27SDimitry Andric /// Cache for getAllocatableAndAlignedRegClassMask method: 159*06c3fb27SDimitry Andric /// AlignNumBits -> Class bitmask. 160*06c3fb27SDimitry Andric mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks; 161*06c3fb27SDimitry Andric }; 162*06c3fb27SDimitry Andric 163*06c3fb27SDimitry Andric } // end anonymous namespace 164*06c3fb27SDimitry Andric 165*06c3fb27SDimitry Andric // TODO: move this to the tablegen and use binary search by Offset. 166*06c3fb27SDimitry Andric unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset, 167*06c3fb27SDimitry Andric unsigned Size) const { 168*06c3fb27SDimitry Andric const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0); 169*06c3fb27SDimitry Andric if (Inserted) { 170*06c3fb27SDimitry Andric for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) { 171*06c3fb27SDimitry Andric if (TRI->getSubRegIdxOffset(Idx) == Offset && 172*06c3fb27SDimitry Andric TRI->getSubRegIdxSize(Idx) == Size) { 173*06c3fb27SDimitry Andric I->second = Idx; 174*06c3fb27SDimitry Andric break; 175*06c3fb27SDimitry Andric } 176*06c3fb27SDimitry Andric } 177*06c3fb27SDimitry Andric } 178*06c3fb27SDimitry Andric return I->second; 179*06c3fb27SDimitry Andric } 180*06c3fb27SDimitry Andric 181*06c3fb27SDimitry Andric unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg, 182*06c3fb27SDimitry Andric unsigned RShift) const { 183*06c3fb27SDimitry Andric unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift; 184*06c3fb27SDimitry Andric return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg)); 185*06c3fb27SDimitry Andric } 186*06c3fb27SDimitry Andric 187*06c3fb27SDimitry Andric const uint32_t * 188*06c3fb27SDimitry Andric GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC, 189*06c3fb27SDimitry Andric unsigned SubRegIdx) const { 190*06c3fb27SDimitry Andric const auto [I, Inserted] = 191*06c3fb27SDimitry Andric SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr); 192*06c3fb27SDimitry Andric if (Inserted) { 193*06c3fb27SDimitry Andric for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) { 194*06c3fb27SDimitry Andric if (RCI.getSubReg() == SubRegIdx) { 195*06c3fb27SDimitry Andric I->second = RCI.getMask(); 196*06c3fb27SDimitry Andric break; 197*06c3fb27SDimitry Andric } 198*06c3fb27SDimitry Andric } 199*06c3fb27SDimitry Andric } 200*06c3fb27SDimitry Andric return I->second; 201*06c3fb27SDimitry Andric } 202*06c3fb27SDimitry Andric 203*06c3fb27SDimitry Andric const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask( 204*06c3fb27SDimitry Andric unsigned AlignNumBits) const { 205*06c3fb27SDimitry Andric const auto [I, Inserted] = 206*06c3fb27SDimitry Andric AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits); 207*06c3fb27SDimitry Andric if (Inserted) { 208*06c3fb27SDimitry Andric BitVector &BV = I->second; 209*06c3fb27SDimitry Andric BV.resize(TRI->getNumRegClasses()); 210*06c3fb27SDimitry Andric for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) { 211*06c3fb27SDimitry Andric auto *RC = TRI->getRegClass(ClassID); 212*06c3fb27SDimitry Andric if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits)) 213*06c3fb27SDimitry Andric BV.set(ClassID); 214*06c3fb27SDimitry Andric } 215*06c3fb27SDimitry Andric } 216*06c3fb27SDimitry Andric return I->second; 217*06c3fb27SDimitry Andric } 218*06c3fb27SDimitry Andric 219*06c3fb27SDimitry Andric const TargetRegisterClass * 220*06c3fb27SDimitry Andric GCNRewritePartialRegUses::getRegClassWithShiftedSubregs( 221*06c3fb27SDimitry Andric const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits, 222*06c3fb27SDimitry Andric unsigned CoverSubregIdx, SubRegMap &SubRegs) const { 223*06c3fb27SDimitry Andric 224*06c3fb27SDimitry Andric unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC); 225*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign 226*06c3fb27SDimitry Andric << '\n'); 227*06c3fb27SDimitry Andric 228*06c3fb27SDimitry Andric BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign)); 229*06c3fb27SDimitry Andric for (auto &[OldSubReg, SRI] : SubRegs) { 230*06c3fb27SDimitry Andric auto &[SubRegRC, NewSubReg] = SRI; 231*06c3fb27SDimitry Andric 232*06c3fb27SDimitry Andric // Register class may be unknown, for example: 233*06c3fb27SDimitry Andric // undef %0.sub4:sgpr_1024 = S_MOV_B32 01 234*06c3fb27SDimitry Andric // %0.sub5:sgpr_1024 = S_MOV_B32 02 235*06c3fb27SDimitry Andric // %1:vreg_64 = COPY %0.sub4_sub5 236*06c3fb27SDimitry Andric // Register classes for subregs 'sub4' and 'sub5' are known from the 237*06c3fb27SDimitry Andric // description of destination operand of S_MOV_B32 instruction but the 238*06c3fb27SDimitry Andric // class for the subreg 'sub4_sub5' isn't specified by the COPY instruction. 239*06c3fb27SDimitry Andric if (!SubRegRC) 240*06c3fb27SDimitry Andric SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg); 241*06c3fb27SDimitry Andric 242*06c3fb27SDimitry Andric if (!SubRegRC) 243*06c3fb27SDimitry Andric return nullptr; 244*06c3fb27SDimitry Andric 245*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':' 246*06c3fb27SDimitry Andric << TRI->getRegClassName(SubRegRC) 247*06c3fb27SDimitry Andric << (SubRegRC->isAllocatable() ? "" : " not alloc") 248*06c3fb27SDimitry Andric << " -> "); 249*06c3fb27SDimitry Andric 250*06c3fb27SDimitry Andric if (OldSubReg == CoverSubregIdx) { 251*06c3fb27SDimitry Andric NewSubReg = AMDGPU::NoSubRegister; 252*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "whole reg"); 253*06c3fb27SDimitry Andric } else { 254*06c3fb27SDimitry Andric NewSubReg = shiftSubReg(OldSubReg, RShift); 255*06c3fb27SDimitry Andric if (!NewSubReg) { 256*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "none\n"); 257*06c3fb27SDimitry Andric return nullptr; 258*06c3fb27SDimitry Andric } 259*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << TRI->getSubRegIndexName(NewSubReg)); 260*06c3fb27SDimitry Andric } 261*06c3fb27SDimitry Andric 262*06c3fb27SDimitry Andric const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg) 263*06c3fb27SDimitry Andric : SubRegRC->getSubClassMask(); 264*06c3fb27SDimitry Andric if (!Mask) 265*06c3fb27SDimitry Andric llvm_unreachable("no register class mask?"); 266*06c3fb27SDimitry Andric 267*06c3fb27SDimitry Andric ClassMask.clearBitsNotInMask(Mask); 268*06c3fb27SDimitry Andric // Don't try to early exit because checking if ClassMask has set bits isn't 269*06c3fb27SDimitry Andric // that cheap and we expect it to pass in most cases. 270*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n'); 271*06c3fb27SDimitry Andric } 272*06c3fb27SDimitry Andric 273*06c3fb27SDimitry Andric // ClassMask is the set of all register classes such that each class is 274*06c3fb27SDimitry Andric // allocatable, aligned, has all shifted subregs and each subreg has required 275*06c3fb27SDimitry Andric // register class (see SubRegRC above). Now select first (that is largest) 276*06c3fb27SDimitry Andric // register class with registers of minimal but not less than RegNumBits size. 277*06c3fb27SDimitry Andric // We have to check register size because we may encounter classes of smaller 278*06c3fb27SDimitry Andric // registers like VReg_1 in some situations. 279*06c3fb27SDimitry Andric const TargetRegisterClass *MinRC = nullptr; 280*06c3fb27SDimitry Andric unsigned MinNumBits = std::numeric_limits<unsigned>::max(); 281*06c3fb27SDimitry Andric for (unsigned ClassID : ClassMask.set_bits()) { 282*06c3fb27SDimitry Andric auto *RC = TRI->getRegClass(ClassID); 283*06c3fb27SDimitry Andric unsigned NumBits = TRI->getRegSizeInBits(*RC); 284*06c3fb27SDimitry Andric if (NumBits < MinNumBits && NumBits >= RegNumBits) { 285*06c3fb27SDimitry Andric MinNumBits = NumBits; 286*06c3fb27SDimitry Andric MinRC = RC; 287*06c3fb27SDimitry Andric } 288*06c3fb27SDimitry Andric if (MinNumBits == RegNumBits) 289*06c3fb27SDimitry Andric break; 290*06c3fb27SDimitry Andric } 291*06c3fb27SDimitry Andric #ifndef NDEBUG 292*06c3fb27SDimitry Andric if (MinRC) { 293*06c3fb27SDimitry Andric assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign)); 294*06c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs) 295*06c3fb27SDimitry Andric // Check that all registers in MinRC support SRI.SubReg subregister. 296*06c3fb27SDimitry Andric assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg)); 297*06c3fb27SDimitry Andric } 298*06c3fb27SDimitry Andric #endif 299*06c3fb27SDimitry Andric // There might be zero RShift - in this case we just trying to find smaller 300*06c3fb27SDimitry Andric // register. 301*06c3fb27SDimitry Andric return (MinRC != RC || RShift != 0) ? MinRC : nullptr; 302*06c3fb27SDimitry Andric } 303*06c3fb27SDimitry Andric 304*06c3fb27SDimitry Andric const TargetRegisterClass * 305*06c3fb27SDimitry Andric GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC, 306*06c3fb27SDimitry Andric SubRegMap &SubRegs) const { 307*06c3fb27SDimitry Andric unsigned CoverSubreg = AMDGPU::NoSubRegister; 308*06c3fb27SDimitry Andric unsigned Offset = std::numeric_limits<unsigned>::max(); 309*06c3fb27SDimitry Andric unsigned End = 0; 310*06c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs) { 311*06c3fb27SDimitry Andric unsigned SubRegOffset = TRI->getSubRegIdxOffset(SubReg); 312*06c3fb27SDimitry Andric unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(SubReg); 313*06c3fb27SDimitry Andric if (SubRegOffset < Offset) { 314*06c3fb27SDimitry Andric Offset = SubRegOffset; 315*06c3fb27SDimitry Andric CoverSubreg = AMDGPU::NoSubRegister; 316*06c3fb27SDimitry Andric } 317*06c3fb27SDimitry Andric if (SubRegEnd > End) { 318*06c3fb27SDimitry Andric End = SubRegEnd; 319*06c3fb27SDimitry Andric CoverSubreg = AMDGPU::NoSubRegister; 320*06c3fb27SDimitry Andric } 321*06c3fb27SDimitry Andric if (SubRegOffset == Offset && SubRegEnd == End) 322*06c3fb27SDimitry Andric CoverSubreg = SubReg; 323*06c3fb27SDimitry Andric } 324*06c3fb27SDimitry Andric // If covering subreg is found shift everything so the covering subreg would 325*06c3fb27SDimitry Andric // be in the rightmost position. 326*06c3fb27SDimitry Andric if (CoverSubreg != AMDGPU::NoSubRegister) 327*06c3fb27SDimitry Andric return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg, 328*06c3fb27SDimitry Andric SubRegs); 329*06c3fb27SDimitry Andric 330*06c3fb27SDimitry Andric // Otherwise find subreg with maximum required alignment and shift it and all 331*06c3fb27SDimitry Andric // other subregs to the rightmost possible position with respect to the 332*06c3fb27SDimitry Andric // alignment. 333*06c3fb27SDimitry Andric unsigned MaxAlign = 0; 334*06c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs) 335*06c3fb27SDimitry Andric MaxAlign = std::max(MaxAlign, TRI->getSubRegAlignmentNumBits(RC, SubReg)); 336*06c3fb27SDimitry Andric 337*06c3fb27SDimitry Andric unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max(); 338*06c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs) { 339*06c3fb27SDimitry Andric if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign) 340*06c3fb27SDimitry Andric continue; 341*06c3fb27SDimitry Andric FirstMaxAlignedSubRegOffset = 342*06c3fb27SDimitry Andric std::min(FirstMaxAlignedSubRegOffset, TRI->getSubRegIdxOffset(SubReg)); 343*06c3fb27SDimitry Andric if (FirstMaxAlignedSubRegOffset == Offset) 344*06c3fb27SDimitry Andric break; 345*06c3fb27SDimitry Andric } 346*06c3fb27SDimitry Andric 347*06c3fb27SDimitry Andric unsigned NewOffsetOfMaxAlignedSubReg = 348*06c3fb27SDimitry Andric alignTo(FirstMaxAlignedSubRegOffset - Offset, MaxAlign); 349*06c3fb27SDimitry Andric 350*06c3fb27SDimitry Andric if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset) 351*06c3fb27SDimitry Andric llvm_unreachable("misaligned subreg"); 352*06c3fb27SDimitry Andric 353*06c3fb27SDimitry Andric unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg; 354*06c3fb27SDimitry Andric return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs); 355*06c3fb27SDimitry Andric } 356*06c3fb27SDimitry Andric 357*06c3fb27SDimitry Andric // Only the subrange's lanemasks of the original interval need to be modified. 358*06c3fb27SDimitry Andric // Subrange for a covering subreg becomes the main range. 359*06c3fb27SDimitry Andric void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg, 360*06c3fb27SDimitry Andric Register NewReg, 361*06c3fb27SDimitry Andric SubRegMap &SubRegs) const { 362*06c3fb27SDimitry Andric if (!LIS->hasInterval(OldReg)) 363*06c3fb27SDimitry Andric return; 364*06c3fb27SDimitry Andric 365*06c3fb27SDimitry Andric auto &OldLI = LIS->getInterval(OldReg); 366*06c3fb27SDimitry Andric auto &NewLI = LIS->createEmptyInterval(NewReg); 367*06c3fb27SDimitry Andric 368*06c3fb27SDimitry Andric auto &Allocator = LIS->getVNInfoAllocator(); 369*06c3fb27SDimitry Andric NewLI.setWeight(OldLI.weight()); 370*06c3fb27SDimitry Andric 371*06c3fb27SDimitry Andric for (auto &SR : OldLI.subranges()) { 372*06c3fb27SDimitry Andric auto I = find_if(SubRegs, [&](auto &P) { 373*06c3fb27SDimitry Andric return SR.LaneMask == TRI->getSubRegIndexLaneMask(P.first); 374*06c3fb27SDimitry Andric }); 375*06c3fb27SDimitry Andric 376*06c3fb27SDimitry Andric if (I == SubRegs.end()) { 377*06c3fb27SDimitry Andric // There might be a situation when subranges don't exactly match used 378*06c3fb27SDimitry Andric // subregs, for example: 379*06c3fb27SDimitry Andric // %120 [160r,1392r:0) 0@160r 380*06c3fb27SDimitry Andric // L000000000000C000 [160r,1392r:0) 0@160r 381*06c3fb27SDimitry Andric // L0000000000003000 [160r,1392r:0) 0@160r 382*06c3fb27SDimitry Andric // L0000000000000C00 [160r,1392r:0) 0@160r 383*06c3fb27SDimitry Andric // L0000000000000300 [160r,1392r:0) 0@160r 384*06c3fb27SDimitry Andric // L0000000000000003 [160r,1104r:0) 0@160r 385*06c3fb27SDimitry Andric // L000000000000000C [160r,1104r:0) 0@160r 386*06c3fb27SDimitry Andric // L0000000000000030 [160r,1104r:0) 0@160r 387*06c3fb27SDimitry Andric // L00000000000000C0 [160r,1104r:0) 0@160r 388*06c3fb27SDimitry Andric // but used subregs are: 389*06c3fb27SDimitry Andric // sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, L000000000000FFFF 390*06c3fb27SDimitry Andric // sub0_sub1_sub2_sub3, L00000000000000FF 391*06c3fb27SDimitry Andric // sub4_sub5_sub6_sub7, L000000000000FF00 392*06c3fb27SDimitry Andric // In this example subregs sub0_sub1_sub2_sub3 and sub4_sub5_sub6_sub7 393*06c3fb27SDimitry Andric // have several subranges with the same lifetime. For such cases just 394*06c3fb27SDimitry Andric // recreate the interval. 395*06c3fb27SDimitry Andric LIS->removeInterval(OldReg); 396*06c3fb27SDimitry Andric LIS->removeInterval(NewReg); 397*06c3fb27SDimitry Andric LIS->createAndComputeVirtRegInterval(NewReg); 398*06c3fb27SDimitry Andric return; 399*06c3fb27SDimitry Andric } 400*06c3fb27SDimitry Andric 401*06c3fb27SDimitry Andric if (unsigned NewSubReg = I->second.SubReg) 402*06c3fb27SDimitry Andric NewLI.createSubRangeFrom(Allocator, 403*06c3fb27SDimitry Andric TRI->getSubRegIndexLaneMask(NewSubReg), SR); 404*06c3fb27SDimitry Andric else // This is the covering subreg (0 index) - set it as main range. 405*06c3fb27SDimitry Andric NewLI.assign(SR, Allocator); 406*06c3fb27SDimitry Andric 407*06c3fb27SDimitry Andric SubRegs.erase(I); 408*06c3fb27SDimitry Andric } 409*06c3fb27SDimitry Andric if (NewLI.empty()) 410*06c3fb27SDimitry Andric NewLI.assign(OldLI, Allocator); 411*06c3fb27SDimitry Andric NewLI.verify(MRI); 412*06c3fb27SDimitry Andric LIS->removeInterval(OldReg); 413*06c3fb27SDimitry Andric } 414*06c3fb27SDimitry Andric 415*06c3fb27SDimitry Andric const TargetRegisterClass * 416*06c3fb27SDimitry Andric GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const { 417*06c3fb27SDimitry Andric MachineInstr *MI = MO.getParent(); 418*06c3fb27SDimitry Andric return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI, 419*06c3fb27SDimitry Andric *MI->getParent()->getParent()); 420*06c3fb27SDimitry Andric } 421*06c3fb27SDimitry Andric 422*06c3fb27SDimitry Andric bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const { 423*06c3fb27SDimitry Andric auto Range = MRI->reg_nodbg_operands(Reg); 424*06c3fb27SDimitry Andric if (Range.begin() == Range.end()) 425*06c3fb27SDimitry Andric return false; 426*06c3fb27SDimitry Andric 427*06c3fb27SDimitry Andric for (MachineOperand &MO : Range) { 428*06c3fb27SDimitry Andric if (MO.getSubReg() == AMDGPU::NoSubRegister) // Whole reg used, quit. 429*06c3fb27SDimitry Andric return false; 430*06c3fb27SDimitry Andric } 431*06c3fb27SDimitry Andric 432*06c3fb27SDimitry Andric auto *RC = MRI->getRegClass(Reg); 433*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI) 434*06c3fb27SDimitry Andric << ':' << TRI->getRegClassName(RC) << '\n'); 435*06c3fb27SDimitry Andric 436*06c3fb27SDimitry Andric // Collect used subregs and constrained reg classes infered from instruction 437*06c3fb27SDimitry Andric // operands. 438*06c3fb27SDimitry Andric SubRegMap SubRegs; 439*06c3fb27SDimitry Andric for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { 440*06c3fb27SDimitry Andric assert(MO.getSubReg() != AMDGPU::NoSubRegister); 441*06c3fb27SDimitry Andric auto *OpDescRC = getOperandRegClass(MO); 442*06c3fb27SDimitry Andric const auto [I, Inserted] = SubRegs.try_emplace(MO.getSubReg(), OpDescRC); 443*06c3fb27SDimitry Andric if (!Inserted && OpDescRC) { 444*06c3fb27SDimitry Andric SubRegInfo &SRI = I->second; 445*06c3fb27SDimitry Andric SRI.RC = SRI.RC ? TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC; 446*06c3fb27SDimitry Andric if (!SRI.RC) { 447*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " Couldn't find common target regclass\n"); 448*06c3fb27SDimitry Andric return false; 449*06c3fb27SDimitry Andric } 450*06c3fb27SDimitry Andric } 451*06c3fb27SDimitry Andric } 452*06c3fb27SDimitry Andric 453*06c3fb27SDimitry Andric auto *NewRC = getMinSizeReg(RC, SubRegs); 454*06c3fb27SDimitry Andric if (!NewRC) { 455*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " No improvement achieved\n"); 456*06c3fb27SDimitry Andric return false; 457*06c3fb27SDimitry Andric } 458*06c3fb27SDimitry Andric 459*06c3fb27SDimitry Andric Register NewReg = MRI->createVirtualRegister(NewRC); 460*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " Success " << printReg(Reg, TRI) << ':' 461*06c3fb27SDimitry Andric << TRI->getRegClassName(RC) << " -> " 462*06c3fb27SDimitry Andric << printReg(NewReg, TRI) << ':' 463*06c3fb27SDimitry Andric << TRI->getRegClassName(NewRC) << '\n'); 464*06c3fb27SDimitry Andric 465*06c3fb27SDimitry Andric for (auto &MO : make_early_inc_range(MRI->reg_operands(Reg))) { 466*06c3fb27SDimitry Andric MO.setReg(NewReg); 467*06c3fb27SDimitry Andric // Debug info can refer to the whole reg, just leave it as it is for now. 468*06c3fb27SDimitry Andric // TODO: create some DI shift expression? 469*06c3fb27SDimitry Andric if (MO.isDebug() && MO.getSubReg() == 0) 470*06c3fb27SDimitry Andric continue; 471*06c3fb27SDimitry Andric unsigned SubReg = SubRegs[MO.getSubReg()].SubReg; 472*06c3fb27SDimitry Andric MO.setSubReg(SubReg); 473*06c3fb27SDimitry Andric if (SubReg == AMDGPU::NoSubRegister && MO.isDef()) 474*06c3fb27SDimitry Andric MO.setIsUndef(false); 475*06c3fb27SDimitry Andric } 476*06c3fb27SDimitry Andric 477*06c3fb27SDimitry Andric if (LIS) 478*06c3fb27SDimitry Andric updateLiveIntervals(Reg, NewReg, SubRegs); 479*06c3fb27SDimitry Andric 480*06c3fb27SDimitry Andric return true; 481*06c3fb27SDimitry Andric } 482*06c3fb27SDimitry Andric 483*06c3fb27SDimitry Andric bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) { 484*06c3fb27SDimitry Andric MRI = &MF.getRegInfo(); 485*06c3fb27SDimitry Andric TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo()); 486*06c3fb27SDimitry Andric TII = MF.getSubtarget().getInstrInfo(); 487*06c3fb27SDimitry Andric LIS = getAnalysisIfAvailable<LiveIntervals>(); 488*06c3fb27SDimitry Andric bool Changed = false; 489*06c3fb27SDimitry Andric for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { 490*06c3fb27SDimitry Andric Changed |= rewriteReg(Register::index2VirtReg(I)); 491*06c3fb27SDimitry Andric } 492*06c3fb27SDimitry Andric return Changed; 493*06c3fb27SDimitry Andric } 494*06c3fb27SDimitry Andric 495*06c3fb27SDimitry Andric char GCNRewritePartialRegUses::ID; 496*06c3fb27SDimitry Andric 497*06c3fb27SDimitry Andric char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID; 498*06c3fb27SDimitry Andric 499*06c3fb27SDimitry Andric INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE, 500*06c3fb27SDimitry Andric "Rewrite Partial Register Uses", false, false) 501*06c3fb27SDimitry Andric INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE, 502*06c3fb27SDimitry Andric "Rewrite Partial Register Uses", false, false) 503