1 //===-- GCNPreRAOptimizations.cpp -----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass combines split register tuple initialization into a single pseudo: 11 /// 12 /// undef %0.sub1:sreg_64 = S_MOV_B32 1 13 /// %0.sub0:sreg_64 = S_MOV_B32 2 14 /// => 15 /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001 16 /// 17 /// This is to allow rematerialization of a value instead of spilling. It is 18 /// supposed to be done after register coalescer to allow it to do its job and 19 /// before actual register allocation to allow rematerialization. 20 /// 21 /// Right now the pass only handles 64 bit SGPRs with immediate initializers, 22 /// although the same shall be possible with other register classes and 23 /// instructions if necessary. 24 /// 25 //===----------------------------------------------------------------------===// 26 27 #include "AMDGPU.h" 28 #include "GCNSubtarget.h" 29 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 30 #include "llvm/CodeGen/LiveIntervals.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include "llvm/InitializePasses.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "amdgpu-pre-ra-optimizations" 37 38 namespace { 39 40 class GCNPreRAOptimizations : public MachineFunctionPass { 41 private: 42 const SIInstrInfo *TII; 43 const SIRegisterInfo *TRI; 44 MachineRegisterInfo *MRI; 45 LiveIntervals *LIS; 46 47 bool processReg(Register Reg); 48 49 public: 50 static char ID; 51 52 GCNPreRAOptimizations() : MachineFunctionPass(ID) { 53 initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry()); 54 } 55 56 bool runOnMachineFunction(MachineFunction &MF) override; 57 58 StringRef getPassName() const override { 59 return "AMDGPU Pre-RA optimizations"; 60 } 61 62 void getAnalysisUsage(AnalysisUsage &AU) const override { 63 AU.addRequired<LiveIntervals>(); 64 AU.setPreservesAll(); 65 MachineFunctionPass::getAnalysisUsage(AU); 66 } 67 }; 68 69 } // End anonymous namespace. 70 71 INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, 72 "AMDGPU Pre-RA optimizations", false, false) 73 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 74 INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations", 75 false, false) 76 77 char GCNPreRAOptimizations::ID = 0; 78 79 char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID; 80 81 FunctionPass *llvm::createGCNPreRAOptimizationsPass() { 82 return new GCNPreRAOptimizations(); 83 } 84 85 bool GCNPreRAOptimizations::processReg(Register Reg) { 86 MachineInstr *Def0 = nullptr; 87 MachineInstr *Def1 = nullptr; 88 uint64_t Init = 0; 89 bool Changed = false; 90 SmallSet<Register, 32> ModifiedRegs; 91 bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg)); 92 93 for (MachineInstr &I : MRI->def_instructions(Reg)) { 94 switch (I.getOpcode()) { 95 default: 96 return false; 97 case AMDGPU::V_ACCVGPR_WRITE_B32_e64: 98 break; 99 case AMDGPU::COPY: { 100 // Some subtargets cannot do an AGPR to AGPR copy directly, and need an 101 // intermdiate temporary VGPR register. Try to find the defining 102 // accvgpr_write to avoid temporary registers. 103 104 if (!IsAGPRDst) 105 return false; 106 107 Register SrcReg = I.getOperand(1).getReg(); 108 109 if (!SrcReg.isVirtual()) 110 break; 111 112 // Check if source of copy is from another AGPR. 113 bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg)); 114 if (!IsAGPRSrc) 115 break; 116 117 // def_instructions() does not look at subregs so it may give us a 118 // different instruction that defines the same vreg but different subreg 119 // so we have to manually check subreg. 120 Register SrcSubReg = I.getOperand(1).getSubReg(); 121 for (auto &Def : MRI->def_instructions(SrcReg)) { 122 if (SrcSubReg != Def.getOperand(0).getSubReg()) 123 continue; 124 125 if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { 126 MachineOperand DefSrcMO = Def.getOperand(1); 127 128 // Immediates are not an issue and can be propagated in 129 // postrapseudos pass. Only handle cases where defining 130 // accvgpr_write source is a vreg. 131 if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) { 132 // Propagate source reg of accvgpr write to this copy instruction 133 I.getOperand(1).setReg(DefSrcMO.getReg()); 134 I.getOperand(1).setSubReg(DefSrcMO.getSubReg()); 135 136 // Reg uses were changed, collect unique set of registers to update 137 // live intervals at the end. 138 ModifiedRegs.insert(DefSrcMO.getReg()); 139 ModifiedRegs.insert(SrcReg); 140 141 Changed = true; 142 } 143 144 // Found the defining accvgpr_write, stop looking any further. 145 break; 146 } 147 } 148 break; 149 } 150 case AMDGPU::S_MOV_B32: 151 if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() || 152 I.getNumOperands() != 2) 153 return false; 154 155 switch (I.getOperand(0).getSubReg()) { 156 default: 157 return false; 158 case AMDGPU::sub0: 159 if (Def0) 160 return false; 161 Def0 = &I; 162 Init |= I.getOperand(1).getImm() & 0xffffffff; 163 break; 164 case AMDGPU::sub1: 165 if (Def1) 166 return false; 167 Def1 = &I; 168 Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32; 169 break; 170 } 171 break; 172 } 173 } 174 175 // For AGPR reg, check if live intervals need to be updated. 176 if (IsAGPRDst) { 177 if (Changed) { 178 for (Register RegToUpdate : ModifiedRegs) { 179 LIS->removeInterval(RegToUpdate); 180 LIS->createAndComputeVirtRegInterval(RegToUpdate); 181 } 182 } 183 184 return Changed; 185 } 186 187 // For SGPR reg, check if we can combine instructions. 188 if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent()) 189 return Changed; 190 191 LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1 192 << " =>\n"); 193 194 if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1), 195 LIS->getInstructionIndex(*Def0))) 196 std::swap(Def0, Def1); 197 198 LIS->RemoveMachineInstrFromMaps(*Def0); 199 LIS->RemoveMachineInstrFromMaps(*Def1); 200 auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(), 201 TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg) 202 .addImm(Init); 203 204 Def0->eraseFromParent(); 205 Def1->eraseFromParent(); 206 LIS->InsertMachineInstrInMaps(*NewI); 207 LIS->removeInterval(Reg); 208 LIS->createAndComputeVirtRegInterval(Reg); 209 210 LLVM_DEBUG(dbgs() << " " << *NewI); 211 212 return true; 213 } 214 215 bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) { 216 if (skipFunction(MF.getFunction())) 217 return false; 218 219 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 220 TII = ST.getInstrInfo(); 221 MRI = &MF.getRegInfo(); 222 LIS = &getAnalysis<LiveIntervals>(); 223 TRI = ST.getRegisterInfo(); 224 225 bool Changed = false; 226 227 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { 228 Register Reg = Register::index2VirtReg(I); 229 if (!LIS->hasInterval(Reg)) 230 continue; 231 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 232 if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) && 233 (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC))) 234 continue; 235 236 Changed |= processReg(Reg); 237 } 238 239 return Changed; 240 } 241