1*fe6060f1SDimitry Andric //===-- GCNPreRAOptimizations.cpp -----------------------------------------===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*fe6060f1SDimitry Andric // 7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8*fe6060f1SDimitry Andric // 9*fe6060f1SDimitry Andric /// \file 10*fe6060f1SDimitry Andric /// This pass combines split register tuple initialization into a single psuedo: 11*fe6060f1SDimitry Andric /// 12*fe6060f1SDimitry Andric /// undef %0.sub1:sreg_64 = S_MOV_B32 1 13*fe6060f1SDimitry Andric /// %0.sub0:sreg_64 = S_MOV_B32 2 14*fe6060f1SDimitry Andric /// => 15*fe6060f1SDimitry Andric /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001 16*fe6060f1SDimitry Andric /// 17*fe6060f1SDimitry Andric /// This is to allow rematerialization of a value instead of spilling. It is 18*fe6060f1SDimitry Andric /// supposed to be done after register coalescer to allow it to do its job and 19*fe6060f1SDimitry Andric /// before actual register allocation to allow rematerialization. 20*fe6060f1SDimitry Andric /// 21*fe6060f1SDimitry Andric /// Right now the pass only handles 64 bit SGPRs with immediate initializers, 22*fe6060f1SDimitry Andric /// although the same shall be possible with other register classes and 23*fe6060f1SDimitry Andric /// instructions if necessary. 24*fe6060f1SDimitry Andric /// 25*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 26*fe6060f1SDimitry Andric 27*fe6060f1SDimitry Andric #include "AMDGPU.h" 28*fe6060f1SDimitry Andric #include "GCNSubtarget.h" 29*fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 30*fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 31*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 32*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 33*fe6060f1SDimitry Andric 34*fe6060f1SDimitry Andric using namespace llvm; 35*fe6060f1SDimitry Andric 36*fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-pre-ra-optimizations" 37*fe6060f1SDimitry Andric 38*fe6060f1SDimitry Andric namespace { 39*fe6060f1SDimitry Andric 40*fe6060f1SDimitry Andric class GCNPreRAOptimizations : public MachineFunctionPass { 41*fe6060f1SDimitry Andric private: 42*fe6060f1SDimitry Andric const SIInstrInfo *TII; 43*fe6060f1SDimitry Andric MachineRegisterInfo *MRI; 44*fe6060f1SDimitry Andric LiveIntervals *LIS; 45*fe6060f1SDimitry Andric 46*fe6060f1SDimitry Andric bool processReg(Register Reg); 47*fe6060f1SDimitry Andric 48*fe6060f1SDimitry Andric public: 49*fe6060f1SDimitry Andric static char ID; 50*fe6060f1SDimitry Andric 51*fe6060f1SDimitry Andric GCNPreRAOptimizations() : MachineFunctionPass(ID) { 52*fe6060f1SDimitry Andric initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry()); 53*fe6060f1SDimitry Andric } 54*fe6060f1SDimitry Andric 55*fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 56*fe6060f1SDimitry Andric 57*fe6060f1SDimitry Andric StringRef getPassName() const override { 58*fe6060f1SDimitry Andric return "AMDGPU Pre-RA optimizations"; 59*fe6060f1SDimitry Andric } 60*fe6060f1SDimitry Andric 61*fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 62*fe6060f1SDimitry Andric AU.addRequired<LiveIntervals>(); 63*fe6060f1SDimitry Andric AU.setPreservesAll(); 64*fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 65*fe6060f1SDimitry Andric } 66*fe6060f1SDimitry Andric }; 67*fe6060f1SDimitry Andric 68*fe6060f1SDimitry Andric } // End anonymous namespace. 69*fe6060f1SDimitry Andric 70*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, 71*fe6060f1SDimitry Andric "AMDGPU Pre-RA optimizations", false, false) 72*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 73*fe6060f1SDimitry Andric INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations", 74*fe6060f1SDimitry Andric false, false) 75*fe6060f1SDimitry Andric 76*fe6060f1SDimitry Andric char GCNPreRAOptimizations::ID = 0; 77*fe6060f1SDimitry Andric 78*fe6060f1SDimitry Andric char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID; 79*fe6060f1SDimitry Andric 80*fe6060f1SDimitry Andric FunctionPass *llvm::createGCNPreRAOptimizationsPass() { 81*fe6060f1SDimitry Andric return new GCNPreRAOptimizations(); 82*fe6060f1SDimitry Andric } 83*fe6060f1SDimitry Andric 84*fe6060f1SDimitry Andric bool GCNPreRAOptimizations::processReg(Register Reg) { 85*fe6060f1SDimitry Andric MachineInstr *Def0 = nullptr; 86*fe6060f1SDimitry Andric MachineInstr *Def1 = nullptr; 87*fe6060f1SDimitry Andric uint64_t Init = 0; 88*fe6060f1SDimitry Andric 89*fe6060f1SDimitry Andric for (MachineInstr &I : MRI->def_instructions(Reg)) { 90*fe6060f1SDimitry Andric if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg || 91*fe6060f1SDimitry Andric !I.getOperand(1).isImm() || I.getNumOperands() != 2) 92*fe6060f1SDimitry Andric return false; 93*fe6060f1SDimitry Andric 94*fe6060f1SDimitry Andric switch (I.getOperand(0).getSubReg()) { 95*fe6060f1SDimitry Andric default: 96*fe6060f1SDimitry Andric return false; 97*fe6060f1SDimitry Andric case AMDGPU::sub0: 98*fe6060f1SDimitry Andric if (Def0) 99*fe6060f1SDimitry Andric return false; 100*fe6060f1SDimitry Andric Def0 = &I; 101*fe6060f1SDimitry Andric Init |= I.getOperand(1).getImm() & 0xffffffff; 102*fe6060f1SDimitry Andric break; 103*fe6060f1SDimitry Andric case AMDGPU::sub1: 104*fe6060f1SDimitry Andric if (Def1) 105*fe6060f1SDimitry Andric return false; 106*fe6060f1SDimitry Andric Def1 = &I; 107*fe6060f1SDimitry Andric Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32; 108*fe6060f1SDimitry Andric break; 109*fe6060f1SDimitry Andric } 110*fe6060f1SDimitry Andric } 111*fe6060f1SDimitry Andric 112*fe6060f1SDimitry Andric if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent()) 113*fe6060f1SDimitry Andric return false; 114*fe6060f1SDimitry Andric 115*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1 116*fe6060f1SDimitry Andric << " =>\n"); 117*fe6060f1SDimitry Andric 118*fe6060f1SDimitry Andric if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1), 119*fe6060f1SDimitry Andric LIS->getInstructionIndex(*Def0))) 120*fe6060f1SDimitry Andric std::swap(Def0, Def1); 121*fe6060f1SDimitry Andric 122*fe6060f1SDimitry Andric LIS->RemoveMachineInstrFromMaps(*Def0); 123*fe6060f1SDimitry Andric LIS->RemoveMachineInstrFromMaps(*Def1); 124*fe6060f1SDimitry Andric auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(), 125*fe6060f1SDimitry Andric TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg) 126*fe6060f1SDimitry Andric .addImm(Init); 127*fe6060f1SDimitry Andric 128*fe6060f1SDimitry Andric Def0->eraseFromParent(); 129*fe6060f1SDimitry Andric Def1->eraseFromParent(); 130*fe6060f1SDimitry Andric LIS->InsertMachineInstrInMaps(*NewI); 131*fe6060f1SDimitry Andric LIS->removeInterval(Reg); 132*fe6060f1SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 133*fe6060f1SDimitry Andric 134*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " " << *NewI); 135*fe6060f1SDimitry Andric 136*fe6060f1SDimitry Andric return true; 137*fe6060f1SDimitry Andric } 138*fe6060f1SDimitry Andric 139*fe6060f1SDimitry Andric bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) { 140*fe6060f1SDimitry Andric if (skipFunction(MF.getFunction())) 141*fe6060f1SDimitry Andric return false; 142*fe6060f1SDimitry Andric 143*fe6060f1SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 144*fe6060f1SDimitry Andric TII = ST.getInstrInfo(); 145*fe6060f1SDimitry Andric MRI = &MF.getRegInfo(); 146*fe6060f1SDimitry Andric LIS = &getAnalysis<LiveIntervals>(); 147*fe6060f1SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 148*fe6060f1SDimitry Andric 149*fe6060f1SDimitry Andric bool Changed = false; 150*fe6060f1SDimitry Andric 151*fe6060f1SDimitry Andric for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { 152*fe6060f1SDimitry Andric Register Reg = Register::index2VirtReg(I); 153*fe6060f1SDimitry Andric if (!LIS->hasInterval(Reg)) 154*fe6060f1SDimitry Andric continue; 155*fe6060f1SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(Reg); 156*fe6060f1SDimitry Andric if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) 157*fe6060f1SDimitry Andric continue; 158*fe6060f1SDimitry Andric Changed |= processReg(Reg); 159*fe6060f1SDimitry Andric } 160*fe6060f1SDimitry Andric 161*fe6060f1SDimitry Andric return Changed; 162*fe6060f1SDimitry Andric } 163