xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp (revision ec0ea6efa1ad229d75c394c1a9b9cac33af2b1d3)
1 //===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass combines split register tuple initialization into a single psuedo:
11 ///
12 ///   undef %0.sub1:sreg_64 = S_MOV_B32 1
13 ///   %0.sub0:sreg_64 = S_MOV_B32 2
14 /// =>
15 ///   %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16 ///
17 /// This is to allow rematerialization of a value instead of spilling. It is
18 /// supposed to be done after register coalescer to allow it to do its job and
19 /// before actual register allocation to allow rematerialization.
20 ///
21 /// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22 /// although the same shall be possible with other register classes and
23 /// instructions if necessary.
24 ///
25 //===----------------------------------------------------------------------===//
26 
27 #include "AMDGPU.h"
28 #include "GCNSubtarget.h"
29 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
30 #include "llvm/CodeGen/LiveIntervals.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include "llvm/InitializePasses.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
37 
38 namespace {
39 
40 class GCNPreRAOptimizations : public MachineFunctionPass {
41 private:
42   const SIInstrInfo *TII;
43   MachineRegisterInfo *MRI;
44   LiveIntervals *LIS;
45 
46   bool processReg(Register Reg);
47 
48 public:
49   static char ID;
50 
51   GCNPreRAOptimizations() : MachineFunctionPass(ID) {
52     initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
53   }
54 
55   bool runOnMachineFunction(MachineFunction &MF) override;
56 
57   StringRef getPassName() const override {
58     return "AMDGPU Pre-RA optimizations";
59   }
60 
61   void getAnalysisUsage(AnalysisUsage &AU) const override {
62     AU.addRequired<LiveIntervals>();
63     AU.setPreservesAll();
64     MachineFunctionPass::getAnalysisUsage(AU);
65   }
66 };
67 
68 } // End anonymous namespace.
69 
70 INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
71                       "AMDGPU Pre-RA optimizations", false, false)
72 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
73 INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
74                     false, false)
75 
76 char GCNPreRAOptimizations::ID = 0;
77 
78 char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
79 
80 FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
81   return new GCNPreRAOptimizations();
82 }
83 
84 bool GCNPreRAOptimizations::processReg(Register Reg) {
85   MachineInstr *Def0 = nullptr;
86   MachineInstr *Def1 = nullptr;
87   uint64_t Init = 0;
88 
89   for (MachineInstr &I : MRI->def_instructions(Reg)) {
90     if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg ||
91         !I.getOperand(1).isImm() || I.getNumOperands() != 2)
92       return false;
93 
94     switch (I.getOperand(0).getSubReg()) {
95     default:
96       return false;
97     case AMDGPU::sub0:
98       if (Def0)
99         return false;
100       Def0 = &I;
101       Init |= I.getOperand(1).getImm() & 0xffffffff;
102       break;
103     case AMDGPU::sub1:
104       if (Def1)
105         return false;
106       Def1 = &I;
107       Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
108       break;
109     }
110   }
111 
112   if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
113     return false;
114 
115   LLVM_DEBUG(dbgs() << "Combining:\n  " << *Def0 << "  " << *Def1
116                     << "    =>\n");
117 
118   if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
119                                 LIS->getInstructionIndex(*Def0)))
120     std::swap(Def0, Def1);
121 
122   LIS->RemoveMachineInstrFromMaps(*Def0);
123   LIS->RemoveMachineInstrFromMaps(*Def1);
124   auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
125                       TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
126                   .addImm(Init);
127 
128   Def0->eraseFromParent();
129   Def1->eraseFromParent();
130   LIS->InsertMachineInstrInMaps(*NewI);
131   LIS->removeInterval(Reg);
132   LIS->createAndComputeVirtRegInterval(Reg);
133 
134   LLVM_DEBUG(dbgs() << "  " << *NewI);
135 
136   return true;
137 }
138 
139 bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
140   if (skipFunction(MF.getFunction()))
141     return false;
142 
143   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
144   TII = ST.getInstrInfo();
145   MRI = &MF.getRegInfo();
146   LIS = &getAnalysis<LiveIntervals>();
147   const SIRegisterInfo *TRI = ST.getRegisterInfo();
148 
149   bool Changed = false;
150 
151   for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
152     Register Reg = Register::index2VirtReg(I);
153     if (!LIS->hasInterval(Reg))
154       continue;
155     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
156     if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC))
157       continue;
158     Changed |= processReg(Reg);
159   }
160 
161   return Changed;
162 }
163