xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1*fe6060f1SDimitry Andric //===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2*fe6060f1SDimitry Andric //
3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*fe6060f1SDimitry Andric //
7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8*fe6060f1SDimitry Andric //
9*fe6060f1SDimitry Andric /// \file
10*fe6060f1SDimitry Andric /// This pass combines split register tuple initialization into a single psuedo:
11*fe6060f1SDimitry Andric ///
12*fe6060f1SDimitry Andric ///   undef %0.sub1:sreg_64 = S_MOV_B32 1
13*fe6060f1SDimitry Andric ///   %0.sub0:sreg_64 = S_MOV_B32 2
14*fe6060f1SDimitry Andric /// =>
15*fe6060f1SDimitry Andric ///   %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16*fe6060f1SDimitry Andric ///
17*fe6060f1SDimitry Andric /// This is to allow rematerialization of a value instead of spilling. It is
18*fe6060f1SDimitry Andric /// supposed to be done after register coalescer to allow it to do its job and
19*fe6060f1SDimitry Andric /// before actual register allocation to allow rematerialization.
20*fe6060f1SDimitry Andric ///
21*fe6060f1SDimitry Andric /// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22*fe6060f1SDimitry Andric /// although the same shall be possible with other register classes and
23*fe6060f1SDimitry Andric /// instructions if necessary.
24*fe6060f1SDimitry Andric ///
25*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
26*fe6060f1SDimitry Andric 
27*fe6060f1SDimitry Andric #include "AMDGPU.h"
28*fe6060f1SDimitry Andric #include "GCNSubtarget.h"
29*fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
30*fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
31*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
32*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
33*fe6060f1SDimitry Andric 
34*fe6060f1SDimitry Andric using namespace llvm;
35*fe6060f1SDimitry Andric 
36*fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
37*fe6060f1SDimitry Andric 
38*fe6060f1SDimitry Andric namespace {
39*fe6060f1SDimitry Andric 
40*fe6060f1SDimitry Andric class GCNPreRAOptimizations : public MachineFunctionPass {
41*fe6060f1SDimitry Andric private:
42*fe6060f1SDimitry Andric   const SIInstrInfo *TII;
43*fe6060f1SDimitry Andric   MachineRegisterInfo *MRI;
44*fe6060f1SDimitry Andric   LiveIntervals *LIS;
45*fe6060f1SDimitry Andric 
46*fe6060f1SDimitry Andric   bool processReg(Register Reg);
47*fe6060f1SDimitry Andric 
48*fe6060f1SDimitry Andric public:
49*fe6060f1SDimitry Andric   static char ID;
50*fe6060f1SDimitry Andric 
51*fe6060f1SDimitry Andric   GCNPreRAOptimizations() : MachineFunctionPass(ID) {
52*fe6060f1SDimitry Andric     initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
53*fe6060f1SDimitry Andric   }
54*fe6060f1SDimitry Andric 
55*fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
56*fe6060f1SDimitry Andric 
57*fe6060f1SDimitry Andric   StringRef getPassName() const override {
58*fe6060f1SDimitry Andric     return "AMDGPU Pre-RA optimizations";
59*fe6060f1SDimitry Andric   }
60*fe6060f1SDimitry Andric 
61*fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
62*fe6060f1SDimitry Andric     AU.addRequired<LiveIntervals>();
63*fe6060f1SDimitry Andric     AU.setPreservesAll();
64*fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
65*fe6060f1SDimitry Andric   }
66*fe6060f1SDimitry Andric };
67*fe6060f1SDimitry Andric 
68*fe6060f1SDimitry Andric } // End anonymous namespace.
69*fe6060f1SDimitry Andric 
70*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
71*fe6060f1SDimitry Andric                       "AMDGPU Pre-RA optimizations", false, false)
72*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
73*fe6060f1SDimitry Andric INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
74*fe6060f1SDimitry Andric                     false, false)
75*fe6060f1SDimitry Andric 
76*fe6060f1SDimitry Andric char GCNPreRAOptimizations::ID = 0;
77*fe6060f1SDimitry Andric 
78*fe6060f1SDimitry Andric char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
79*fe6060f1SDimitry Andric 
80*fe6060f1SDimitry Andric FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
81*fe6060f1SDimitry Andric   return new GCNPreRAOptimizations();
82*fe6060f1SDimitry Andric }
83*fe6060f1SDimitry Andric 
84*fe6060f1SDimitry Andric bool GCNPreRAOptimizations::processReg(Register Reg) {
85*fe6060f1SDimitry Andric   MachineInstr *Def0 = nullptr;
86*fe6060f1SDimitry Andric   MachineInstr *Def1 = nullptr;
87*fe6060f1SDimitry Andric   uint64_t Init = 0;
88*fe6060f1SDimitry Andric 
89*fe6060f1SDimitry Andric   for (MachineInstr &I : MRI->def_instructions(Reg)) {
90*fe6060f1SDimitry Andric     if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg ||
91*fe6060f1SDimitry Andric         !I.getOperand(1).isImm() || I.getNumOperands() != 2)
92*fe6060f1SDimitry Andric       return false;
93*fe6060f1SDimitry Andric 
94*fe6060f1SDimitry Andric     switch (I.getOperand(0).getSubReg()) {
95*fe6060f1SDimitry Andric     default:
96*fe6060f1SDimitry Andric       return false;
97*fe6060f1SDimitry Andric     case AMDGPU::sub0:
98*fe6060f1SDimitry Andric       if (Def0)
99*fe6060f1SDimitry Andric         return false;
100*fe6060f1SDimitry Andric       Def0 = &I;
101*fe6060f1SDimitry Andric       Init |= I.getOperand(1).getImm() & 0xffffffff;
102*fe6060f1SDimitry Andric       break;
103*fe6060f1SDimitry Andric     case AMDGPU::sub1:
104*fe6060f1SDimitry Andric       if (Def1)
105*fe6060f1SDimitry Andric         return false;
106*fe6060f1SDimitry Andric       Def1 = &I;
107*fe6060f1SDimitry Andric       Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
108*fe6060f1SDimitry Andric       break;
109*fe6060f1SDimitry Andric     }
110*fe6060f1SDimitry Andric   }
111*fe6060f1SDimitry Andric 
112*fe6060f1SDimitry Andric   if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
113*fe6060f1SDimitry Andric     return false;
114*fe6060f1SDimitry Andric 
115*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Combining:\n  " << *Def0 << "  " << *Def1
116*fe6060f1SDimitry Andric                     << "    =>\n");
117*fe6060f1SDimitry Andric 
118*fe6060f1SDimitry Andric   if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
119*fe6060f1SDimitry Andric                                 LIS->getInstructionIndex(*Def0)))
120*fe6060f1SDimitry Andric     std::swap(Def0, Def1);
121*fe6060f1SDimitry Andric 
122*fe6060f1SDimitry Andric   LIS->RemoveMachineInstrFromMaps(*Def0);
123*fe6060f1SDimitry Andric   LIS->RemoveMachineInstrFromMaps(*Def1);
124*fe6060f1SDimitry Andric   auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
125*fe6060f1SDimitry Andric                       TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
126*fe6060f1SDimitry Andric                   .addImm(Init);
127*fe6060f1SDimitry Andric 
128*fe6060f1SDimitry Andric   Def0->eraseFromParent();
129*fe6060f1SDimitry Andric   Def1->eraseFromParent();
130*fe6060f1SDimitry Andric   LIS->InsertMachineInstrInMaps(*NewI);
131*fe6060f1SDimitry Andric   LIS->removeInterval(Reg);
132*fe6060f1SDimitry Andric   LIS->createAndComputeVirtRegInterval(Reg);
133*fe6060f1SDimitry Andric 
134*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  " << *NewI);
135*fe6060f1SDimitry Andric 
136*fe6060f1SDimitry Andric   return true;
137*fe6060f1SDimitry Andric }
138*fe6060f1SDimitry Andric 
139*fe6060f1SDimitry Andric bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
140*fe6060f1SDimitry Andric   if (skipFunction(MF.getFunction()))
141*fe6060f1SDimitry Andric     return false;
142*fe6060f1SDimitry Andric 
143*fe6060f1SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
144*fe6060f1SDimitry Andric   TII = ST.getInstrInfo();
145*fe6060f1SDimitry Andric   MRI = &MF.getRegInfo();
146*fe6060f1SDimitry Andric   LIS = &getAnalysis<LiveIntervals>();
147*fe6060f1SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
148*fe6060f1SDimitry Andric 
149*fe6060f1SDimitry Andric   bool Changed = false;
150*fe6060f1SDimitry Andric 
151*fe6060f1SDimitry Andric   for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
152*fe6060f1SDimitry Andric     Register Reg = Register::index2VirtReg(I);
153*fe6060f1SDimitry Andric     if (!LIS->hasInterval(Reg))
154*fe6060f1SDimitry Andric       continue;
155*fe6060f1SDimitry Andric     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
156*fe6060f1SDimitry Andric     if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC))
157*fe6060f1SDimitry Andric       continue;
158*fe6060f1SDimitry Andric     Changed |= processReg(Reg);
159*fe6060f1SDimitry Andric   }
160*fe6060f1SDimitry Andric 
161*fe6060f1SDimitry Andric   return Changed;
162*fe6060f1SDimitry Andric }
163