106c3fb27SDimitry Andric //===-------------- GCNRewritePartialRegUses.cpp --------------------------===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric /// \file
906c3fb27SDimitry Andric /// RenameIndependentSubregs pass leaves large partially used super registers,
1006c3fb27SDimitry Andric /// for example:
1106c3fb27SDimitry Andric /// undef %0.sub4:VReg_1024 = ...
1206c3fb27SDimitry Andric /// %0.sub5:VReg_1024 = ...
1306c3fb27SDimitry Andric /// %0.sub6:VReg_1024 = ...
1406c3fb27SDimitry Andric /// %0.sub7:VReg_1024 = ...
1506c3fb27SDimitry Andric /// use %0.sub4_sub5_sub6_sub7
1606c3fb27SDimitry Andric /// use %0.sub6_sub7
1706c3fb27SDimitry Andric ///
1806c3fb27SDimitry Andric /// GCNRewritePartialRegUses goes right after RenameIndependentSubregs and
1906c3fb27SDimitry Andric /// rewrites such partially used super registers with registers of minimal size:
2006c3fb27SDimitry Andric /// undef %0.sub0:VReg_128 = ...
2106c3fb27SDimitry Andric /// %0.sub1:VReg_128 = ...
2206c3fb27SDimitry Andric /// %0.sub2:VReg_128 = ...
2306c3fb27SDimitry Andric /// %0.sub3:VReg_128 = ...
2406c3fb27SDimitry Andric /// use %0.sub0_sub1_sub2_sub3
2506c3fb27SDimitry Andric /// use %0.sub2_sub3
2606c3fb27SDimitry Andric ///
2706c3fb27SDimitry Andric /// This allows to avoid subreg lanemasks tracking during register pressure
2806c3fb27SDimitry Andric /// calculation and creates more possibilities for the code unaware of lanemasks
2906c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
3006c3fb27SDimitry Andric
3106c3fb27SDimitry Andric #include "AMDGPU.h"
3206c3fb27SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
3306c3fb27SDimitry Andric #include "SIRegisterInfo.h"
3406c3fb27SDimitry Andric #include "llvm/CodeGen/LiveInterval.h"
3506c3fb27SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
3606c3fb27SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
3706c3fb27SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
3806c3fb27SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
3906c3fb27SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
4006c3fb27SDimitry Andric #include "llvm/InitializePasses.h"
4106c3fb27SDimitry Andric #include "llvm/Pass.h"
4206c3fb27SDimitry Andric
4306c3fb27SDimitry Andric using namespace llvm;
4406c3fb27SDimitry Andric
4506c3fb27SDimitry Andric #define DEBUG_TYPE "rewrite-partial-reg-uses"
4606c3fb27SDimitry Andric
4706c3fb27SDimitry Andric namespace {
4806c3fb27SDimitry Andric
4906c3fb27SDimitry Andric class GCNRewritePartialRegUses : public MachineFunctionPass {
5006c3fb27SDimitry Andric public:
5106c3fb27SDimitry Andric static char ID;
GCNRewritePartialRegUses()5206c3fb27SDimitry Andric GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}
5306c3fb27SDimitry Andric
getPassName() const5406c3fb27SDimitry Andric StringRef getPassName() const override {
5506c3fb27SDimitry Andric return "Rewrite Partial Register Uses";
5606c3fb27SDimitry Andric }
5706c3fb27SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const5806c3fb27SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
5906c3fb27SDimitry Andric AU.setPreservesCFG();
60*0fca6ea1SDimitry Andric AU.addPreserved<LiveIntervalsWrapperPass>();
61*0fca6ea1SDimitry Andric AU.addPreserved<SlotIndexesWrapperPass>();
6206c3fb27SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
6306c3fb27SDimitry Andric }
6406c3fb27SDimitry Andric
6506c3fb27SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
6606c3fb27SDimitry Andric
6706c3fb27SDimitry Andric private:
6806c3fb27SDimitry Andric MachineRegisterInfo *MRI;
6906c3fb27SDimitry Andric const SIRegisterInfo *TRI;
7006c3fb27SDimitry Andric const TargetInstrInfo *TII;
7106c3fb27SDimitry Andric LiveIntervals *LIS;
7206c3fb27SDimitry Andric
7306c3fb27SDimitry Andric /// Rewrite partially used register Reg by shifting all its subregisters to
7406c3fb27SDimitry Andric /// the right and replacing the original register with a register of minimal
7506c3fb27SDimitry Andric /// size. Return true if the change has been made.
7606c3fb27SDimitry Andric bool rewriteReg(Register Reg) const;
7706c3fb27SDimitry Andric
7806c3fb27SDimitry Andric /// Value type for SubRegMap below.
7906c3fb27SDimitry Andric struct SubRegInfo {
8006c3fb27SDimitry Andric /// Register class required to hold the value stored in the SubReg.
8106c3fb27SDimitry Andric const TargetRegisterClass *RC;
8206c3fb27SDimitry Andric
8306c3fb27SDimitry Andric /// Index for the right-shifted subregister. If 0 this is the "covering"
8406c3fb27SDimitry Andric /// subreg i.e. subreg that covers all others. Covering subreg becomes the
8506c3fb27SDimitry Andric /// whole register after the replacement.
8606c3fb27SDimitry Andric unsigned SubReg = AMDGPU::NoSubRegister;
SubRegInfo__anonf1e524d80111::GCNRewritePartialRegUses::SubRegInfo8706c3fb27SDimitry Andric SubRegInfo(const TargetRegisterClass *RC_ = nullptr) : RC(RC_) {}
8806c3fb27SDimitry Andric };
8906c3fb27SDimitry Andric
9006c3fb27SDimitry Andric /// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container.
91*0fca6ea1SDimitry Andric using SubRegMap = SmallDenseMap<unsigned, SubRegInfo>;
9206c3fb27SDimitry Andric
9306c3fb27SDimitry Andric /// Given register class RC and the set of used subregs as keys in the SubRegs
9406c3fb27SDimitry Andric /// map return new register class and indexes of right-shifted subregs as
9506c3fb27SDimitry Andric /// values in SubRegs map such that the resulting regclass would contain
9606c3fb27SDimitry Andric /// registers of minimal size.
9706c3fb27SDimitry Andric const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC,
9806c3fb27SDimitry Andric SubRegMap &SubRegs) const;
9906c3fb27SDimitry Andric
10006c3fb27SDimitry Andric /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to
10106c3fb27SDimitry Andric /// find new regclass such that:
10206c3fb27SDimitry Andric /// 1. It has subregs obtained by shifting each OldSubReg by RShift number
10306c3fb27SDimitry Andric /// of bits to the right. Every "shifted" subreg should have the same
1045f757f3fSDimitry Andric /// SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers"
1055f757f3fSDimitry Andric /// all other subregs in pairs. Basically such subreg becomes a whole
1065f757f3fSDimitry Andric /// register.
10706c3fb27SDimitry Andric /// 2. Resulting register class contains registers of minimal size but not
10806c3fb27SDimitry Andric /// less than RegNumBits.
10906c3fb27SDimitry Andric ///
11006c3fb27SDimitry Andric /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
11106c3fb27SDimitry Andric /// parameter:
11206c3fb27SDimitry Andric /// OldSubReg - input parameter,
1135f757f3fSDimitry Andric /// SubRegRC - input parameter (cannot be null),
11406c3fb27SDimitry Andric /// NewSubReg - output, contains shifted subregs on return.
11506c3fb27SDimitry Andric const TargetRegisterClass *
11606c3fb27SDimitry Andric getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
11706c3fb27SDimitry Andric unsigned RegNumBits, unsigned CoverSubregIdx,
11806c3fb27SDimitry Andric SubRegMap &SubRegs) const;
11906c3fb27SDimitry Andric
12006c3fb27SDimitry Andric /// Update live intervals after rewriting OldReg to NewReg with SubRegs map
12106c3fb27SDimitry Andric /// describing OldSubReg -> NewSubReg mapping.
12206c3fb27SDimitry Andric void updateLiveIntervals(Register OldReg, Register NewReg,
12306c3fb27SDimitry Andric SubRegMap &SubRegs) const;
12406c3fb27SDimitry Andric
12506c3fb27SDimitry Andric /// Helper methods.
12606c3fb27SDimitry Andric
12706c3fb27SDimitry Andric /// Return reg class expected by a MO's parent instruction for a given MO.
12806c3fb27SDimitry Andric const TargetRegisterClass *getOperandRegClass(MachineOperand &MO) const;
12906c3fb27SDimitry Andric
13006c3fb27SDimitry Andric /// Find right-shifted by RShift amount version of the SubReg if it exists,
13106c3fb27SDimitry Andric /// return 0 otherwise.
13206c3fb27SDimitry Andric unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const;
13306c3fb27SDimitry Andric
13406c3fb27SDimitry Andric /// Find subreg index with a given Offset and Size, return 0 if there is no
13506c3fb27SDimitry Andric /// such subregister index. The result is cached in SubRegs data-member.
13606c3fb27SDimitry Andric unsigned getSubReg(unsigned Offset, unsigned Size) const;
13706c3fb27SDimitry Andric
13806c3fb27SDimitry Andric /// Cache for getSubReg method: {Offset, Size} -> SubReg index.
13906c3fb27SDimitry Andric mutable SmallDenseMap<std::pair<unsigned, unsigned>, unsigned> SubRegs;
14006c3fb27SDimitry Andric
14106c3fb27SDimitry Andric /// Return bit mask that contains all register classes that are projected into
14206c3fb27SDimitry Andric /// RC by SubRegIdx. The result is cached in SuperRegMasks data-member.
14306c3fb27SDimitry Andric const uint32_t *getSuperRegClassMask(const TargetRegisterClass *RC,
14406c3fb27SDimitry Andric unsigned SubRegIdx) const;
14506c3fb27SDimitry Andric
14606c3fb27SDimitry Andric /// Cache for getSuperRegClassMask method: { RC, SubRegIdx } -> Class bitmask.
14706c3fb27SDimitry Andric mutable SmallDenseMap<std::pair<const TargetRegisterClass *, unsigned>,
14806c3fb27SDimitry Andric const uint32_t *>
14906c3fb27SDimitry Andric SuperRegMasks;
15006c3fb27SDimitry Andric
15106c3fb27SDimitry Andric /// Return bitmask containing all allocatable register classes with registers
15206c3fb27SDimitry Andric /// aligned at AlignNumBits. The result is cached in
15306c3fb27SDimitry Andric /// AllocatableAndAlignedRegClassMasks data-member.
15406c3fb27SDimitry Andric const BitVector &
15506c3fb27SDimitry Andric getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const;
15606c3fb27SDimitry Andric
15706c3fb27SDimitry Andric /// Cache for getAllocatableAndAlignedRegClassMask method:
15806c3fb27SDimitry Andric /// AlignNumBits -> Class bitmask.
15906c3fb27SDimitry Andric mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;
16006c3fb27SDimitry Andric };
16106c3fb27SDimitry Andric
16206c3fb27SDimitry Andric } // end anonymous namespace
16306c3fb27SDimitry Andric
16406c3fb27SDimitry Andric // TODO: move this to the tablegen and use binary search by Offset.
getSubReg(unsigned Offset,unsigned Size) const16506c3fb27SDimitry Andric unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
16606c3fb27SDimitry Andric unsigned Size) const {
16706c3fb27SDimitry Andric const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
16806c3fb27SDimitry Andric if (Inserted) {
16906c3fb27SDimitry Andric for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
17006c3fb27SDimitry Andric if (TRI->getSubRegIdxOffset(Idx) == Offset &&
17106c3fb27SDimitry Andric TRI->getSubRegIdxSize(Idx) == Size) {
17206c3fb27SDimitry Andric I->second = Idx;
17306c3fb27SDimitry Andric break;
17406c3fb27SDimitry Andric }
17506c3fb27SDimitry Andric }
17606c3fb27SDimitry Andric }
17706c3fb27SDimitry Andric return I->second;
17806c3fb27SDimitry Andric }
17906c3fb27SDimitry Andric
shiftSubReg(unsigned SubReg,unsigned RShift) const18006c3fb27SDimitry Andric unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
18106c3fb27SDimitry Andric unsigned RShift) const {
18206c3fb27SDimitry Andric unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
18306c3fb27SDimitry Andric return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
18406c3fb27SDimitry Andric }
18506c3fb27SDimitry Andric
18606c3fb27SDimitry Andric const uint32_t *
getSuperRegClassMask(const TargetRegisterClass * RC,unsigned SubRegIdx) const18706c3fb27SDimitry Andric GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
18806c3fb27SDimitry Andric unsigned SubRegIdx) const {
18906c3fb27SDimitry Andric const auto [I, Inserted] =
19006c3fb27SDimitry Andric SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
19106c3fb27SDimitry Andric if (Inserted) {
19206c3fb27SDimitry Andric for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) {
19306c3fb27SDimitry Andric if (RCI.getSubReg() == SubRegIdx) {
19406c3fb27SDimitry Andric I->second = RCI.getMask();
19506c3fb27SDimitry Andric break;
19606c3fb27SDimitry Andric }
19706c3fb27SDimitry Andric }
19806c3fb27SDimitry Andric }
19906c3fb27SDimitry Andric return I->second;
20006c3fb27SDimitry Andric }
20106c3fb27SDimitry Andric
getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const20206c3fb27SDimitry Andric const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
20306c3fb27SDimitry Andric unsigned AlignNumBits) const {
20406c3fb27SDimitry Andric const auto [I, Inserted] =
20506c3fb27SDimitry Andric AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
20606c3fb27SDimitry Andric if (Inserted) {
20706c3fb27SDimitry Andric BitVector &BV = I->second;
20806c3fb27SDimitry Andric BV.resize(TRI->getNumRegClasses());
20906c3fb27SDimitry Andric for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) {
21006c3fb27SDimitry Andric auto *RC = TRI->getRegClass(ClassID);
21106c3fb27SDimitry Andric if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits))
21206c3fb27SDimitry Andric BV.set(ClassID);
21306c3fb27SDimitry Andric }
21406c3fb27SDimitry Andric }
21506c3fb27SDimitry Andric return I->second;
21606c3fb27SDimitry Andric }
21706c3fb27SDimitry Andric
21806c3fb27SDimitry Andric const TargetRegisterClass *
getRegClassWithShiftedSubregs(const TargetRegisterClass * RC,unsigned RShift,unsigned RegNumBits,unsigned CoverSubregIdx,SubRegMap & SubRegs) const21906c3fb27SDimitry Andric GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
22006c3fb27SDimitry Andric const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
22106c3fb27SDimitry Andric unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
22206c3fb27SDimitry Andric
22306c3fb27SDimitry Andric unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC);
22406c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign
22506c3fb27SDimitry Andric << '\n');
22606c3fb27SDimitry Andric
22706c3fb27SDimitry Andric BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
22806c3fb27SDimitry Andric for (auto &[OldSubReg, SRI] : SubRegs) {
22906c3fb27SDimitry Andric auto &[SubRegRC, NewSubReg] = SRI;
2305f757f3fSDimitry Andric assert(SubRegRC);
23106c3fb27SDimitry Andric
23206c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':'
23306c3fb27SDimitry Andric << TRI->getRegClassName(SubRegRC)
23406c3fb27SDimitry Andric << (SubRegRC->isAllocatable() ? "" : " not alloc")
23506c3fb27SDimitry Andric << " -> ");
23606c3fb27SDimitry Andric
23706c3fb27SDimitry Andric if (OldSubReg == CoverSubregIdx) {
2385f757f3fSDimitry Andric // Covering subreg will become a full register, RC should be allocatable.
2395f757f3fSDimitry Andric assert(SubRegRC->isAllocatable());
24006c3fb27SDimitry Andric NewSubReg = AMDGPU::NoSubRegister;
24106c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "whole reg");
24206c3fb27SDimitry Andric } else {
24306c3fb27SDimitry Andric NewSubReg = shiftSubReg(OldSubReg, RShift);
24406c3fb27SDimitry Andric if (!NewSubReg) {
24506c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "none\n");
24606c3fb27SDimitry Andric return nullptr;
24706c3fb27SDimitry Andric }
24806c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << TRI->getSubRegIndexName(NewSubReg));
24906c3fb27SDimitry Andric }
25006c3fb27SDimitry Andric
25106c3fb27SDimitry Andric const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
25206c3fb27SDimitry Andric : SubRegRC->getSubClassMask();
25306c3fb27SDimitry Andric if (!Mask)
25406c3fb27SDimitry Andric llvm_unreachable("no register class mask?");
25506c3fb27SDimitry Andric
25606c3fb27SDimitry Andric ClassMask.clearBitsNotInMask(Mask);
25706c3fb27SDimitry Andric // Don't try to early exit because checking if ClassMask has set bits isn't
25806c3fb27SDimitry Andric // that cheap and we expect it to pass in most cases.
25906c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n');
26006c3fb27SDimitry Andric }
26106c3fb27SDimitry Andric
26206c3fb27SDimitry Andric // ClassMask is the set of all register classes such that each class is
26306c3fb27SDimitry Andric // allocatable, aligned, has all shifted subregs and each subreg has required
26406c3fb27SDimitry Andric // register class (see SubRegRC above). Now select first (that is largest)
26506c3fb27SDimitry Andric // register class with registers of minimal but not less than RegNumBits size.
26606c3fb27SDimitry Andric // We have to check register size because we may encounter classes of smaller
26706c3fb27SDimitry Andric // registers like VReg_1 in some situations.
26806c3fb27SDimitry Andric const TargetRegisterClass *MinRC = nullptr;
26906c3fb27SDimitry Andric unsigned MinNumBits = std::numeric_limits<unsigned>::max();
27006c3fb27SDimitry Andric for (unsigned ClassID : ClassMask.set_bits()) {
27106c3fb27SDimitry Andric auto *RC = TRI->getRegClass(ClassID);
27206c3fb27SDimitry Andric unsigned NumBits = TRI->getRegSizeInBits(*RC);
27306c3fb27SDimitry Andric if (NumBits < MinNumBits && NumBits >= RegNumBits) {
27406c3fb27SDimitry Andric MinNumBits = NumBits;
27506c3fb27SDimitry Andric MinRC = RC;
27606c3fb27SDimitry Andric }
27706c3fb27SDimitry Andric if (MinNumBits == RegNumBits)
27806c3fb27SDimitry Andric break;
27906c3fb27SDimitry Andric }
28006c3fb27SDimitry Andric #ifndef NDEBUG
28106c3fb27SDimitry Andric if (MinRC) {
28206c3fb27SDimitry Andric assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign));
28306c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs)
28406c3fb27SDimitry Andric // Check that all registers in MinRC support SRI.SubReg subregister.
28506c3fb27SDimitry Andric assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
28606c3fb27SDimitry Andric }
28706c3fb27SDimitry Andric #endif
28806c3fb27SDimitry Andric // There might be zero RShift - in this case we just trying to find smaller
28906c3fb27SDimitry Andric // register.
29006c3fb27SDimitry Andric return (MinRC != RC || RShift != 0) ? MinRC : nullptr;
29106c3fb27SDimitry Andric }
29206c3fb27SDimitry Andric
29306c3fb27SDimitry Andric const TargetRegisterClass *
getMinSizeReg(const TargetRegisterClass * RC,SubRegMap & SubRegs) const29406c3fb27SDimitry Andric GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
29506c3fb27SDimitry Andric SubRegMap &SubRegs) const {
29606c3fb27SDimitry Andric unsigned CoverSubreg = AMDGPU::NoSubRegister;
29706c3fb27SDimitry Andric unsigned Offset = std::numeric_limits<unsigned>::max();
29806c3fb27SDimitry Andric unsigned End = 0;
29906c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs) {
30006c3fb27SDimitry Andric unsigned SubRegOffset = TRI->getSubRegIdxOffset(SubReg);
30106c3fb27SDimitry Andric unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(SubReg);
30206c3fb27SDimitry Andric if (SubRegOffset < Offset) {
30306c3fb27SDimitry Andric Offset = SubRegOffset;
30406c3fb27SDimitry Andric CoverSubreg = AMDGPU::NoSubRegister;
30506c3fb27SDimitry Andric }
30606c3fb27SDimitry Andric if (SubRegEnd > End) {
30706c3fb27SDimitry Andric End = SubRegEnd;
30806c3fb27SDimitry Andric CoverSubreg = AMDGPU::NoSubRegister;
30906c3fb27SDimitry Andric }
31006c3fb27SDimitry Andric if (SubRegOffset == Offset && SubRegEnd == End)
31106c3fb27SDimitry Andric CoverSubreg = SubReg;
31206c3fb27SDimitry Andric }
31306c3fb27SDimitry Andric // If covering subreg is found shift everything so the covering subreg would
31406c3fb27SDimitry Andric // be in the rightmost position.
31506c3fb27SDimitry Andric if (CoverSubreg != AMDGPU::NoSubRegister)
31606c3fb27SDimitry Andric return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg,
31706c3fb27SDimitry Andric SubRegs);
31806c3fb27SDimitry Andric
31906c3fb27SDimitry Andric // Otherwise find subreg with maximum required alignment and shift it and all
32006c3fb27SDimitry Andric // other subregs to the rightmost possible position with respect to the
32106c3fb27SDimitry Andric // alignment.
32206c3fb27SDimitry Andric unsigned MaxAlign = 0;
32306c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs)
32406c3fb27SDimitry Andric MaxAlign = std::max(MaxAlign, TRI->getSubRegAlignmentNumBits(RC, SubReg));
32506c3fb27SDimitry Andric
32606c3fb27SDimitry Andric unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
32706c3fb27SDimitry Andric for (auto [SubReg, SRI] : SubRegs) {
32806c3fb27SDimitry Andric if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign)
32906c3fb27SDimitry Andric continue;
33006c3fb27SDimitry Andric FirstMaxAlignedSubRegOffset =
33106c3fb27SDimitry Andric std::min(FirstMaxAlignedSubRegOffset, TRI->getSubRegIdxOffset(SubReg));
33206c3fb27SDimitry Andric if (FirstMaxAlignedSubRegOffset == Offset)
33306c3fb27SDimitry Andric break;
33406c3fb27SDimitry Andric }
33506c3fb27SDimitry Andric
33606c3fb27SDimitry Andric unsigned NewOffsetOfMaxAlignedSubReg =
33706c3fb27SDimitry Andric alignTo(FirstMaxAlignedSubRegOffset - Offset, MaxAlign);
33806c3fb27SDimitry Andric
33906c3fb27SDimitry Andric if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
34006c3fb27SDimitry Andric llvm_unreachable("misaligned subreg");
34106c3fb27SDimitry Andric
34206c3fb27SDimitry Andric unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
34306c3fb27SDimitry Andric return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs);
34406c3fb27SDimitry Andric }
34506c3fb27SDimitry Andric
34606c3fb27SDimitry Andric // Only the subrange's lanemasks of the original interval need to be modified.
34706c3fb27SDimitry Andric // Subrange for a covering subreg becomes the main range.
updateLiveIntervals(Register OldReg,Register NewReg,SubRegMap & SubRegs) const34806c3fb27SDimitry Andric void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
34906c3fb27SDimitry Andric Register NewReg,
35006c3fb27SDimitry Andric SubRegMap &SubRegs) const {
35106c3fb27SDimitry Andric if (!LIS->hasInterval(OldReg))
35206c3fb27SDimitry Andric return;
35306c3fb27SDimitry Andric
35406c3fb27SDimitry Andric auto &OldLI = LIS->getInterval(OldReg);
35506c3fb27SDimitry Andric auto &NewLI = LIS->createEmptyInterval(NewReg);
35606c3fb27SDimitry Andric
35706c3fb27SDimitry Andric auto &Allocator = LIS->getVNInfoAllocator();
35806c3fb27SDimitry Andric NewLI.setWeight(OldLI.weight());
35906c3fb27SDimitry Andric
36006c3fb27SDimitry Andric for (auto &SR : OldLI.subranges()) {
36106c3fb27SDimitry Andric auto I = find_if(SubRegs, [&](auto &P) {
36206c3fb27SDimitry Andric return SR.LaneMask == TRI->getSubRegIndexLaneMask(P.first);
36306c3fb27SDimitry Andric });
36406c3fb27SDimitry Andric
36506c3fb27SDimitry Andric if (I == SubRegs.end()) {
36606c3fb27SDimitry Andric // There might be a situation when subranges don't exactly match used
36706c3fb27SDimitry Andric // subregs, for example:
36806c3fb27SDimitry Andric // %120 [160r,1392r:0) 0@160r
36906c3fb27SDimitry Andric // L000000000000C000 [160r,1392r:0) 0@160r
37006c3fb27SDimitry Andric // L0000000000003000 [160r,1392r:0) 0@160r
37106c3fb27SDimitry Andric // L0000000000000C00 [160r,1392r:0) 0@160r
37206c3fb27SDimitry Andric // L0000000000000300 [160r,1392r:0) 0@160r
37306c3fb27SDimitry Andric // L0000000000000003 [160r,1104r:0) 0@160r
37406c3fb27SDimitry Andric // L000000000000000C [160r,1104r:0) 0@160r
37506c3fb27SDimitry Andric // L0000000000000030 [160r,1104r:0) 0@160r
37606c3fb27SDimitry Andric // L00000000000000C0 [160r,1104r:0) 0@160r
37706c3fb27SDimitry Andric // but used subregs are:
37806c3fb27SDimitry Andric // sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, L000000000000FFFF
37906c3fb27SDimitry Andric // sub0_sub1_sub2_sub3, L00000000000000FF
38006c3fb27SDimitry Andric // sub4_sub5_sub6_sub7, L000000000000FF00
38106c3fb27SDimitry Andric // In this example subregs sub0_sub1_sub2_sub3 and sub4_sub5_sub6_sub7
38206c3fb27SDimitry Andric // have several subranges with the same lifetime. For such cases just
38306c3fb27SDimitry Andric // recreate the interval.
38406c3fb27SDimitry Andric LIS->removeInterval(OldReg);
38506c3fb27SDimitry Andric LIS->removeInterval(NewReg);
38606c3fb27SDimitry Andric LIS->createAndComputeVirtRegInterval(NewReg);
38706c3fb27SDimitry Andric return;
38806c3fb27SDimitry Andric }
38906c3fb27SDimitry Andric
39006c3fb27SDimitry Andric if (unsigned NewSubReg = I->second.SubReg)
39106c3fb27SDimitry Andric NewLI.createSubRangeFrom(Allocator,
39206c3fb27SDimitry Andric TRI->getSubRegIndexLaneMask(NewSubReg), SR);
39306c3fb27SDimitry Andric else // This is the covering subreg (0 index) - set it as main range.
39406c3fb27SDimitry Andric NewLI.assign(SR, Allocator);
39506c3fb27SDimitry Andric
39606c3fb27SDimitry Andric SubRegs.erase(I);
39706c3fb27SDimitry Andric }
39806c3fb27SDimitry Andric if (NewLI.empty())
39906c3fb27SDimitry Andric NewLI.assign(OldLI, Allocator);
40006c3fb27SDimitry Andric NewLI.verify(MRI);
40106c3fb27SDimitry Andric LIS->removeInterval(OldReg);
40206c3fb27SDimitry Andric }
40306c3fb27SDimitry Andric
40406c3fb27SDimitry Andric const TargetRegisterClass *
getOperandRegClass(MachineOperand & MO) const40506c3fb27SDimitry Andric GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
40606c3fb27SDimitry Andric MachineInstr *MI = MO.getParent();
40706c3fb27SDimitry Andric return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
40806c3fb27SDimitry Andric *MI->getParent()->getParent());
40906c3fb27SDimitry Andric }
41006c3fb27SDimitry Andric
rewriteReg(Register Reg) const41106c3fb27SDimitry Andric bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
41206c3fb27SDimitry Andric auto Range = MRI->reg_nodbg_operands(Reg);
4135f757f3fSDimitry Andric if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
4145f757f3fSDimitry Andric return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
4155f757f3fSDimitry Andric }))
41606c3fb27SDimitry Andric return false;
41706c3fb27SDimitry Andric
41806c3fb27SDimitry Andric auto *RC = MRI->getRegClass(Reg);
41906c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
42006c3fb27SDimitry Andric << ':' << TRI->getRegClassName(RC) << '\n');
42106c3fb27SDimitry Andric
4225f757f3fSDimitry Andric // Collect used subregs and their reg classes infered from instruction
42306c3fb27SDimitry Andric // operands.
42406c3fb27SDimitry Andric SubRegMap SubRegs;
4255f757f3fSDimitry Andric for (MachineOperand &MO : Range) {
4265f757f3fSDimitry Andric const unsigned SubReg = MO.getSubReg();
4275f757f3fSDimitry Andric assert(SubReg != AMDGPU::NoSubRegister); // Due to [1].
4285f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(SubReg) << ':');
4295f757f3fSDimitry Andric
4305f757f3fSDimitry Andric const auto [I, Inserted] = SubRegs.try_emplace(SubReg);
4315f757f3fSDimitry Andric const TargetRegisterClass *&SubRegRC = I->second.RC;
4325f757f3fSDimitry Andric
4335f757f3fSDimitry Andric if (Inserted)
4345f757f3fSDimitry Andric SubRegRC = TRI->getSubRegisterClass(RC, SubReg);
4355f757f3fSDimitry Andric
4365f757f3fSDimitry Andric if (SubRegRC) {
4375f757f3fSDimitry Andric if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) {
4385f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & "
4395f757f3fSDimitry Andric << TRI->getRegClassName(OpDescRC) << " = ");
4405f757f3fSDimitry Andric SubRegRC = TRI->getCommonSubClass(SubRegRC, OpDescRC);
4415f757f3fSDimitry Andric }
4425f757f3fSDimitry Andric }
4435f757f3fSDimitry Andric
4445f757f3fSDimitry Andric if (!SubRegRC) {
4455f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");
44606c3fb27SDimitry Andric return false;
44706c3fb27SDimitry Andric }
4485f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n');
44906c3fb27SDimitry Andric }
45006c3fb27SDimitry Andric
45106c3fb27SDimitry Andric auto *NewRC = getMinSizeReg(RC, SubRegs);
45206c3fb27SDimitry Andric if (!NewRC) {
45306c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " No improvement achieved\n");
45406c3fb27SDimitry Andric return false;
45506c3fb27SDimitry Andric }
45606c3fb27SDimitry Andric
45706c3fb27SDimitry Andric Register NewReg = MRI->createVirtualRegister(NewRC);
45806c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " Success " << printReg(Reg, TRI) << ':'
45906c3fb27SDimitry Andric << TRI->getRegClassName(RC) << " -> "
46006c3fb27SDimitry Andric << printReg(NewReg, TRI) << ':'
46106c3fb27SDimitry Andric << TRI->getRegClassName(NewRC) << '\n');
46206c3fb27SDimitry Andric
46306c3fb27SDimitry Andric for (auto &MO : make_early_inc_range(MRI->reg_operands(Reg))) {
46406c3fb27SDimitry Andric MO.setReg(NewReg);
46506c3fb27SDimitry Andric // Debug info can refer to the whole reg, just leave it as it is for now.
46606c3fb27SDimitry Andric // TODO: create some DI shift expression?
46706c3fb27SDimitry Andric if (MO.isDebug() && MO.getSubReg() == 0)
46806c3fb27SDimitry Andric continue;
46906c3fb27SDimitry Andric unsigned SubReg = SubRegs[MO.getSubReg()].SubReg;
47006c3fb27SDimitry Andric MO.setSubReg(SubReg);
47106c3fb27SDimitry Andric if (SubReg == AMDGPU::NoSubRegister && MO.isDef())
47206c3fb27SDimitry Andric MO.setIsUndef(false);
47306c3fb27SDimitry Andric }
47406c3fb27SDimitry Andric
47506c3fb27SDimitry Andric if (LIS)
47606c3fb27SDimitry Andric updateLiveIntervals(Reg, NewReg, SubRegs);
47706c3fb27SDimitry Andric
47806c3fb27SDimitry Andric return true;
47906c3fb27SDimitry Andric }
48006c3fb27SDimitry Andric
runOnMachineFunction(MachineFunction & MF)48106c3fb27SDimitry Andric bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
48206c3fb27SDimitry Andric MRI = &MF.getRegInfo();
48306c3fb27SDimitry Andric TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
48406c3fb27SDimitry Andric TII = MF.getSubtarget().getInstrInfo();
485*0fca6ea1SDimitry Andric auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
486*0fca6ea1SDimitry Andric LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
48706c3fb27SDimitry Andric bool Changed = false;
48806c3fb27SDimitry Andric for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
48906c3fb27SDimitry Andric Changed |= rewriteReg(Register::index2VirtReg(I));
49006c3fb27SDimitry Andric }
49106c3fb27SDimitry Andric return Changed;
49206c3fb27SDimitry Andric }
49306c3fb27SDimitry Andric
49406c3fb27SDimitry Andric char GCNRewritePartialRegUses::ID;
49506c3fb27SDimitry Andric
49606c3fb27SDimitry Andric char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
49706c3fb27SDimitry Andric
49806c3fb27SDimitry Andric INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
49906c3fb27SDimitry Andric "Rewrite Partial Register Uses", false, false)
50006c3fb27SDimitry Andric INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
50106c3fb27SDimitry Andric "Rewrite Partial Register Uses", false, false)
502