1*0b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric 9*0b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 10*0b57cec5SDimitry Andric #include "AMDGPUArgumentUsageInfo.h" 11*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h" 12*0b57cec5SDimitry Andric #include "SIRegisterInfo.h" 13*0b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14*0b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 15*0b57cec5SDimitry Andric #include "llvm/ADT/Optional.h" 16*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 17*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 18*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 19*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 20*0b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h" 21*0b57cec5SDimitry Andric #include "llvm/IR/Function.h" 22*0b57cec5SDimitry Andric #include <cassert> 23*0b57cec5SDimitry Andric #include <vector> 24*0b57cec5SDimitry Andric 25*0b57cec5SDimitry Andric #define MAX_LANES 64 26*0b57cec5SDimitry Andric 27*0b57cec5SDimitry Andric using namespace llvm; 28*0b57cec5SDimitry Andric 29*0b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 30*0b57cec5SDimitry Andric : AMDGPUMachineFunction(MF), 31*0b57cec5SDimitry Andric Mode(MF.getFunction()), 32*0b57cec5SDimitry Andric PrivateSegmentBuffer(false), 33*0b57cec5SDimitry Andric DispatchPtr(false), 34*0b57cec5SDimitry Andric QueuePtr(false), 35*0b57cec5SDimitry Andric KernargSegmentPtr(false), 36*0b57cec5SDimitry Andric DispatchID(false), 37*0b57cec5SDimitry Andric FlatScratchInit(false), 38*0b57cec5SDimitry Andric WorkGroupIDX(false), 39*0b57cec5SDimitry Andric WorkGroupIDY(false), 40*0b57cec5SDimitry Andric WorkGroupIDZ(false), 41*0b57cec5SDimitry Andric WorkGroupInfo(false), 42*0b57cec5SDimitry Andric PrivateSegmentWaveByteOffset(false), 43*0b57cec5SDimitry Andric WorkItemIDX(false), 44*0b57cec5SDimitry Andric WorkItemIDY(false), 45*0b57cec5SDimitry Andric WorkItemIDZ(false), 46*0b57cec5SDimitry Andric ImplicitBufferPtr(false), 47*0b57cec5SDimitry Andric ImplicitArgPtr(false), 48*0b57cec5SDimitry Andric GITPtrHigh(0xffffffff), 49*0b57cec5SDimitry Andric HighBitsOf32BitAddress(0), 50*0b57cec5SDimitry Andric GDSSize(0) { 51*0b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 52*0b57cec5SDimitry Andric const Function &F = MF.getFunction(); 53*0b57cec5SDimitry Andric FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); 54*0b57cec5SDimitry Andric WavesPerEU = ST.getWavesPerEU(F); 55*0b57cec5SDimitry Andric 56*0b57cec5SDimitry Andric Occupancy = getMaxWavesPerEU(); 57*0b57cec5SDimitry Andric limitOccupancy(MF); 58*0b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 59*0b57cec5SDimitry Andric 60*0b57cec5SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { 61*0b57cec5SDimitry Andric if (!F.arg_empty()) 62*0b57cec5SDimitry Andric KernargSegmentPtr = true; 63*0b57cec5SDimitry Andric WorkGroupIDX = true; 64*0b57cec5SDimitry Andric WorkItemIDX = true; 65*0b57cec5SDimitry Andric } else if (CC == CallingConv::AMDGPU_PS) { 66*0b57cec5SDimitry Andric PSInputAddr = AMDGPU::getInitialPSInputAddr(F); 67*0b57cec5SDimitry Andric } 68*0b57cec5SDimitry Andric 69*0b57cec5SDimitry Andric if (!isEntryFunction()) { 70*0b57cec5SDimitry Andric // Non-entry functions have no special inputs for now, other registers 71*0b57cec5SDimitry Andric // required for scratch access. 72*0b57cec5SDimitry Andric ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 73*0b57cec5SDimitry Andric ScratchWaveOffsetReg = AMDGPU::SGPR33; 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric // TODO: Pick a high register, and shift down, similar to a kernel. 76*0b57cec5SDimitry Andric FrameOffsetReg = AMDGPU::SGPR34; 77*0b57cec5SDimitry Andric StackPtrOffsetReg = AMDGPU::SGPR32; 78*0b57cec5SDimitry Andric 79*0b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 80*0b57cec5SDimitry Andric ArgDescriptor::createRegister(ScratchRSrcReg); 81*0b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset = 82*0b57cec5SDimitry Andric ArgDescriptor::createRegister(ScratchWaveOffsetReg); 83*0b57cec5SDimitry Andric 84*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) 85*0b57cec5SDimitry Andric ImplicitArgPtr = true; 86*0b57cec5SDimitry Andric } else { 87*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) { 88*0b57cec5SDimitry Andric KernargSegmentPtr = true; 89*0b57cec5SDimitry Andric MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), 90*0b57cec5SDimitry Andric MaxKernArgAlign); 91*0b57cec5SDimitry Andric } 92*0b57cec5SDimitry Andric } 93*0b57cec5SDimitry Andric 94*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-group-id-x")) 95*0b57cec5SDimitry Andric WorkGroupIDX = true; 96*0b57cec5SDimitry Andric 97*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-group-id-y")) 98*0b57cec5SDimitry Andric WorkGroupIDY = true; 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-group-id-z")) 101*0b57cec5SDimitry Andric WorkGroupIDZ = true; 102*0b57cec5SDimitry Andric 103*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-item-id-x")) 104*0b57cec5SDimitry Andric WorkItemIDX = true; 105*0b57cec5SDimitry Andric 106*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-item-id-y")) 107*0b57cec5SDimitry Andric WorkItemIDY = true; 108*0b57cec5SDimitry Andric 109*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-item-id-z")) 110*0b57cec5SDimitry Andric WorkItemIDZ = true; 111*0b57cec5SDimitry Andric 112*0b57cec5SDimitry Andric const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 113*0b57cec5SDimitry Andric bool HasStackObjects = FrameInfo.hasStackObjects(); 114*0b57cec5SDimitry Andric 115*0b57cec5SDimitry Andric if (isEntryFunction()) { 116*0b57cec5SDimitry Andric // X, XY, and XYZ are the only supported combinations, so make sure Y is 117*0b57cec5SDimitry Andric // enabled if Z is. 118*0b57cec5SDimitry Andric if (WorkItemIDZ) 119*0b57cec5SDimitry Andric WorkItemIDY = true; 120*0b57cec5SDimitry Andric 121*0b57cec5SDimitry Andric PrivateSegmentWaveByteOffset = true; 122*0b57cec5SDimitry Andric 123*0b57cec5SDimitry Andric // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 124*0b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 125*0b57cec5SDimitry Andric (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 126*0b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset = 127*0b57cec5SDimitry Andric ArgDescriptor::createRegister(AMDGPU::SGPR5); 128*0b57cec5SDimitry Andric } 129*0b57cec5SDimitry Andric 130*0b57cec5SDimitry Andric bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); 131*0b57cec5SDimitry Andric if (isAmdHsaOrMesa) { 132*0b57cec5SDimitry Andric PrivateSegmentBuffer = true; 133*0b57cec5SDimitry Andric 134*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-dispatch-ptr")) 135*0b57cec5SDimitry Andric DispatchPtr = true; 136*0b57cec5SDimitry Andric 137*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-queue-ptr")) 138*0b57cec5SDimitry Andric QueuePtr = true; 139*0b57cec5SDimitry Andric 140*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-dispatch-id")) 141*0b57cec5SDimitry Andric DispatchID = true; 142*0b57cec5SDimitry Andric } else if (ST.isMesaGfxShader(F)) { 143*0b57cec5SDimitry Andric ImplicitBufferPtr = true; 144*0b57cec5SDimitry Andric } 145*0b57cec5SDimitry Andric 146*0b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) 147*0b57cec5SDimitry Andric KernargSegmentPtr = true; 148*0b57cec5SDimitry Andric 149*0b57cec5SDimitry Andric if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) { 150*0b57cec5SDimitry Andric auto hasNonSpillStackObjects = [&]() { 151*0b57cec5SDimitry Andric // Avoid expensive checking if there's no stack objects. 152*0b57cec5SDimitry Andric if (!HasStackObjects) 153*0b57cec5SDimitry Andric return false; 154*0b57cec5SDimitry Andric for (auto OI = FrameInfo.getObjectIndexBegin(), 155*0b57cec5SDimitry Andric OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI) 156*0b57cec5SDimitry Andric if (!FrameInfo.isSpillSlotObjectIndex(OI)) 157*0b57cec5SDimitry Andric return true; 158*0b57cec5SDimitry Andric // All stack objects are spill slots. 159*0b57cec5SDimitry Andric return false; 160*0b57cec5SDimitry Andric }; 161*0b57cec5SDimitry Andric // TODO: This could be refined a lot. The attribute is a poor way of 162*0b57cec5SDimitry Andric // detecting calls that may require it before argument lowering. 163*0b57cec5SDimitry Andric if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch")) 164*0b57cec5SDimitry Andric FlatScratchInit = true; 165*0b57cec5SDimitry Andric } 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andric Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); 168*0b57cec5SDimitry Andric StringRef S = A.getValueAsString(); 169*0b57cec5SDimitry Andric if (!S.empty()) 170*0b57cec5SDimitry Andric S.consumeInteger(0, GITPtrHigh); 171*0b57cec5SDimitry Andric 172*0b57cec5SDimitry Andric A = F.getFnAttribute("amdgpu-32bit-address-high-bits"); 173*0b57cec5SDimitry Andric S = A.getValueAsString(); 174*0b57cec5SDimitry Andric if (!S.empty()) 175*0b57cec5SDimitry Andric S.consumeInteger(0, HighBitsOf32BitAddress); 176*0b57cec5SDimitry Andric 177*0b57cec5SDimitry Andric S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 178*0b57cec5SDimitry Andric if (!S.empty()) 179*0b57cec5SDimitry Andric S.consumeInteger(0, GDSSize); 180*0b57cec5SDimitry Andric } 181*0b57cec5SDimitry Andric 182*0b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { 183*0b57cec5SDimitry Andric limitOccupancy(getMaxWavesPerEU()); 184*0b57cec5SDimitry Andric const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); 185*0b57cec5SDimitry Andric limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), 186*0b57cec5SDimitry Andric MF.getFunction())); 187*0b57cec5SDimitry Andric } 188*0b57cec5SDimitry Andric 189*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 190*0b57cec5SDimitry Andric const SIRegisterInfo &TRI) { 191*0b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 192*0b57cec5SDimitry Andric ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 193*0b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass)); 194*0b57cec5SDimitry Andric NumUserSGPRs += 4; 195*0b57cec5SDimitry Andric return ArgInfo.PrivateSegmentBuffer.getRegister(); 196*0b57cec5SDimitry Andric } 197*0b57cec5SDimitry Andric 198*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 199*0b57cec5SDimitry Andric ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 200*0b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 201*0b57cec5SDimitry Andric NumUserSGPRs += 2; 202*0b57cec5SDimitry Andric return ArgInfo.DispatchPtr.getRegister(); 203*0b57cec5SDimitry Andric } 204*0b57cec5SDimitry Andric 205*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 206*0b57cec5SDimitry Andric ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 207*0b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 208*0b57cec5SDimitry Andric NumUserSGPRs += 2; 209*0b57cec5SDimitry Andric return ArgInfo.QueuePtr.getRegister(); 210*0b57cec5SDimitry Andric } 211*0b57cec5SDimitry Andric 212*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 213*0b57cec5SDimitry Andric ArgInfo.KernargSegmentPtr 214*0b57cec5SDimitry Andric = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 215*0b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 216*0b57cec5SDimitry Andric NumUserSGPRs += 2; 217*0b57cec5SDimitry Andric return ArgInfo.KernargSegmentPtr.getRegister(); 218*0b57cec5SDimitry Andric } 219*0b57cec5SDimitry Andric 220*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 221*0b57cec5SDimitry Andric ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 222*0b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 223*0b57cec5SDimitry Andric NumUserSGPRs += 2; 224*0b57cec5SDimitry Andric return ArgInfo.DispatchID.getRegister(); 225*0b57cec5SDimitry Andric } 226*0b57cec5SDimitry Andric 227*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 228*0b57cec5SDimitry Andric ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 229*0b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 230*0b57cec5SDimitry Andric NumUserSGPRs += 2; 231*0b57cec5SDimitry Andric return ArgInfo.FlatScratchInit.getRegister(); 232*0b57cec5SDimitry Andric } 233*0b57cec5SDimitry Andric 234*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 235*0b57cec5SDimitry Andric ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 236*0b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 237*0b57cec5SDimitry Andric NumUserSGPRs += 2; 238*0b57cec5SDimitry Andric return ArgInfo.ImplicitBufferPtr.getRegister(); 239*0b57cec5SDimitry Andric } 240*0b57cec5SDimitry Andric 241*0b57cec5SDimitry Andric static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { 242*0b57cec5SDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) { 243*0b57cec5SDimitry Andric if (CSRegs[I] == Reg) 244*0b57cec5SDimitry Andric return true; 245*0b57cec5SDimitry Andric } 246*0b57cec5SDimitry Andric 247*0b57cec5SDimitry Andric return false; 248*0b57cec5SDimitry Andric } 249*0b57cec5SDimitry Andric 250*0b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for 251*0b57cec5SDimitry Andric /// SGPR spilling. 252*0b57cec5SDimitry Andric // 253*0b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized 254*0b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF, 255*0b57cec5SDimitry Andric unsigned NumNeed) const { 256*0b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 257*0b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 258*0b57cec5SDimitry Andric return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size(); 259*0b57cec5SDimitry Andric } 260*0b57cec5SDimitry Andric 261*0b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 262*0b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 263*0b57cec5SDimitry Andric int FI) { 264*0b57cec5SDimitry Andric std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 265*0b57cec5SDimitry Andric 266*0b57cec5SDimitry Andric // This has already been allocated. 267*0b57cec5SDimitry Andric if (!SpillLanes.empty()) 268*0b57cec5SDimitry Andric return true; 269*0b57cec5SDimitry Andric 270*0b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 271*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 272*0b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 273*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 274*0b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 275*0b57cec5SDimitry Andric 276*0b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 277*0b57cec5SDimitry Andric assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 278*0b57cec5SDimitry Andric assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 279*0b57cec5SDimitry Andric 280*0b57cec5SDimitry Andric int NumLanes = Size / 4; 281*0b57cec5SDimitry Andric 282*0b57cec5SDimitry Andric const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 283*0b57cec5SDimitry Andric 284*0b57cec5SDimitry Andric // Make sure to handle the case where a wide SGPR spill may span between two 285*0b57cec5SDimitry Andric // VGPRs. 286*0b57cec5SDimitry Andric for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 287*0b57cec5SDimitry Andric unsigned LaneVGPR; 288*0b57cec5SDimitry Andric unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 289*0b57cec5SDimitry Andric 290*0b57cec5SDimitry Andric if (VGPRIndex == 0) { 291*0b57cec5SDimitry Andric LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 292*0b57cec5SDimitry Andric if (LaneVGPR == AMDGPU::NoRegister) { 293*0b57cec5SDimitry Andric // We have no VGPRs left for spilling SGPRs. Reset because we will not 294*0b57cec5SDimitry Andric // partially spill the SGPR to VGPRs. 295*0b57cec5SDimitry Andric SGPRToVGPRSpills.erase(FI); 296*0b57cec5SDimitry Andric NumVGPRSpillLanes -= I; 297*0b57cec5SDimitry Andric return false; 298*0b57cec5SDimitry Andric } 299*0b57cec5SDimitry Andric 300*0b57cec5SDimitry Andric Optional<int> CSRSpillFI; 301*0b57cec5SDimitry Andric if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs && 302*0b57cec5SDimitry Andric isCalleeSavedReg(CSRegs, LaneVGPR)) { 303*0b57cec5SDimitry Andric CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4); 304*0b57cec5SDimitry Andric } 305*0b57cec5SDimitry Andric 306*0b57cec5SDimitry Andric SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); 307*0b57cec5SDimitry Andric 308*0b57cec5SDimitry Andric // Add this register as live-in to all blocks to avoid machine verifer 309*0b57cec5SDimitry Andric // complaining about use of an undefined physical register. 310*0b57cec5SDimitry Andric for (MachineBasicBlock &BB : MF) 311*0b57cec5SDimitry Andric BB.addLiveIn(LaneVGPR); 312*0b57cec5SDimitry Andric } else { 313*0b57cec5SDimitry Andric LaneVGPR = SpillVGPRs.back().VGPR; 314*0b57cec5SDimitry Andric } 315*0b57cec5SDimitry Andric 316*0b57cec5SDimitry Andric SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 317*0b57cec5SDimitry Andric } 318*0b57cec5SDimitry Andric 319*0b57cec5SDimitry Andric return true; 320*0b57cec5SDimitry Andric } 321*0b57cec5SDimitry Andric 322*0b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. 323*0b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa. 324*0b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely. 325*0b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, 326*0b57cec5SDimitry Andric int FI, 327*0b57cec5SDimitry Andric bool isAGPRtoVGPR) { 328*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 329*0b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 330*0b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 331*0b57cec5SDimitry Andric 332*0b57cec5SDimitry Andric assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI)); 333*0b57cec5SDimitry Andric 334*0b57cec5SDimitry Andric auto &Spill = VGPRToAGPRSpills[FI]; 335*0b57cec5SDimitry Andric 336*0b57cec5SDimitry Andric // This has already been allocated. 337*0b57cec5SDimitry Andric if (!Spill.Lanes.empty()) 338*0b57cec5SDimitry Andric return Spill.FullyAllocated; 339*0b57cec5SDimitry Andric 340*0b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 341*0b57cec5SDimitry Andric unsigned NumLanes = Size / 4; 342*0b57cec5SDimitry Andric Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister); 343*0b57cec5SDimitry Andric 344*0b57cec5SDimitry Andric const TargetRegisterClass &RC = 345*0b57cec5SDimitry Andric isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass; 346*0b57cec5SDimitry Andric auto Regs = RC.getRegisters(); 347*0b57cec5SDimitry Andric 348*0b57cec5SDimitry Andric auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR; 349*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 350*0b57cec5SDimitry Andric Spill.FullyAllocated = true; 351*0b57cec5SDimitry Andric 352*0b57cec5SDimitry Andric // FIXME: Move allocation logic out of MachineFunctionInfo and initialize 353*0b57cec5SDimitry Andric // once. 354*0b57cec5SDimitry Andric BitVector OtherUsedRegs; 355*0b57cec5SDimitry Andric OtherUsedRegs.resize(TRI->getNumRegs()); 356*0b57cec5SDimitry Andric 357*0b57cec5SDimitry Andric const uint32_t *CSRMask = 358*0b57cec5SDimitry Andric TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv()); 359*0b57cec5SDimitry Andric if (CSRMask) 360*0b57cec5SDimitry Andric OtherUsedRegs.setBitsInMask(CSRMask); 361*0b57cec5SDimitry Andric 362*0b57cec5SDimitry Andric // TODO: Should include register tuples, but doesn't matter with current 363*0b57cec5SDimitry Andric // usage. 364*0b57cec5SDimitry Andric for (MCPhysReg Reg : SpillAGPR) 365*0b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 366*0b57cec5SDimitry Andric for (MCPhysReg Reg : SpillVGPR) 367*0b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 368*0b57cec5SDimitry Andric 369*0b57cec5SDimitry Andric SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin(); 370*0b57cec5SDimitry Andric for (unsigned I = 0; I < NumLanes; ++I) { 371*0b57cec5SDimitry Andric NextSpillReg = std::find_if( 372*0b57cec5SDimitry Andric NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) { 373*0b57cec5SDimitry Andric return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) && 374*0b57cec5SDimitry Andric !OtherUsedRegs[Reg]; 375*0b57cec5SDimitry Andric }); 376*0b57cec5SDimitry Andric 377*0b57cec5SDimitry Andric if (NextSpillReg == Regs.end()) { // Registers exhausted 378*0b57cec5SDimitry Andric Spill.FullyAllocated = false; 379*0b57cec5SDimitry Andric break; 380*0b57cec5SDimitry Andric } 381*0b57cec5SDimitry Andric 382*0b57cec5SDimitry Andric OtherUsedRegs.set(*NextSpillReg); 383*0b57cec5SDimitry Andric SpillRegs.push_back(*NextSpillReg); 384*0b57cec5SDimitry Andric Spill.Lanes[I] = *NextSpillReg++; 385*0b57cec5SDimitry Andric } 386*0b57cec5SDimitry Andric 387*0b57cec5SDimitry Andric return Spill.FullyAllocated; 388*0b57cec5SDimitry Andric } 389*0b57cec5SDimitry Andric 390*0b57cec5SDimitry Andric void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { 391*0b57cec5SDimitry Andric // The FP spill hasn't been inserted yet, so keep it around. 392*0b57cec5SDimitry Andric for (auto &R : SGPRToVGPRSpills) { 393*0b57cec5SDimitry Andric if (R.first != FramePointerSaveIndex) 394*0b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 395*0b57cec5SDimitry Andric } 396*0b57cec5SDimitry Andric 397*0b57cec5SDimitry Andric // All other SPGRs must be allocated on the default stack, so reset the stack 398*0b57cec5SDimitry Andric // ID. 399*0b57cec5SDimitry Andric for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; 400*0b57cec5SDimitry Andric ++i) 401*0b57cec5SDimitry Andric if (i != FramePointerSaveIndex) 402*0b57cec5SDimitry Andric MFI.setStackID(i, TargetStackID::Default); 403*0b57cec5SDimitry Andric 404*0b57cec5SDimitry Andric for (auto &R : VGPRToAGPRSpills) { 405*0b57cec5SDimitry Andric if (R.second.FullyAllocated) 406*0b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 407*0b57cec5SDimitry Andric } 408*0b57cec5SDimitry Andric } 409*0b57cec5SDimitry Andric 410*0b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { 411*0b57cec5SDimitry Andric assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 412*0b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs; 413*0b57cec5SDimitry Andric } 414*0b57cec5SDimitry Andric 415*0b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { 416*0b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 417*0b57cec5SDimitry Andric } 418*0b57cec5SDimitry Andric 419*0b57cec5SDimitry Andric static yaml::StringValue regToString(unsigned Reg, 420*0b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 421*0b57cec5SDimitry Andric yaml::StringValue Dest; 422*0b57cec5SDimitry Andric { 423*0b57cec5SDimitry Andric raw_string_ostream OS(Dest.Value); 424*0b57cec5SDimitry Andric OS << printReg(Reg, &TRI); 425*0b57cec5SDimitry Andric } 426*0b57cec5SDimitry Andric return Dest; 427*0b57cec5SDimitry Andric } 428*0b57cec5SDimitry Andric 429*0b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo> 430*0b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, 431*0b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 432*0b57cec5SDimitry Andric yaml::SIArgumentInfo AI; 433*0b57cec5SDimitry Andric 434*0b57cec5SDimitry Andric auto convertArg = [&](Optional<yaml::SIArgument> &A, 435*0b57cec5SDimitry Andric const ArgDescriptor &Arg) { 436*0b57cec5SDimitry Andric if (!Arg) 437*0b57cec5SDimitry Andric return false; 438*0b57cec5SDimitry Andric 439*0b57cec5SDimitry Andric // Create a register or stack argument. 440*0b57cec5SDimitry Andric yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister()); 441*0b57cec5SDimitry Andric if (Arg.isRegister()) { 442*0b57cec5SDimitry Andric raw_string_ostream OS(SA.RegisterName.Value); 443*0b57cec5SDimitry Andric OS << printReg(Arg.getRegister(), &TRI); 444*0b57cec5SDimitry Andric } else 445*0b57cec5SDimitry Andric SA.StackOffset = Arg.getStackOffset(); 446*0b57cec5SDimitry Andric // Check and update the optional mask. 447*0b57cec5SDimitry Andric if (Arg.isMasked()) 448*0b57cec5SDimitry Andric SA.Mask = Arg.getMask(); 449*0b57cec5SDimitry Andric 450*0b57cec5SDimitry Andric A = SA; 451*0b57cec5SDimitry Andric return true; 452*0b57cec5SDimitry Andric }; 453*0b57cec5SDimitry Andric 454*0b57cec5SDimitry Andric bool Any = false; 455*0b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer); 456*0b57cec5SDimitry Andric Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr); 457*0b57cec5SDimitry Andric Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr); 458*0b57cec5SDimitry Andric Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); 459*0b57cec5SDimitry Andric Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); 460*0b57cec5SDimitry Andric Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); 461*0b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); 462*0b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); 463*0b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); 464*0b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ); 465*0b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo); 466*0b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentWaveByteOffset, 467*0b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset); 468*0b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr); 469*0b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr); 470*0b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX); 471*0b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY); 472*0b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ); 473*0b57cec5SDimitry Andric 474*0b57cec5SDimitry Andric if (Any) 475*0b57cec5SDimitry Andric return AI; 476*0b57cec5SDimitry Andric 477*0b57cec5SDimitry Andric return None; 478*0b57cec5SDimitry Andric } 479*0b57cec5SDimitry Andric 480*0b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( 481*0b57cec5SDimitry Andric const llvm::SIMachineFunctionInfo& MFI, 482*0b57cec5SDimitry Andric const TargetRegisterInfo &TRI) 483*0b57cec5SDimitry Andric : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), 484*0b57cec5SDimitry Andric MaxKernArgAlign(MFI.getMaxKernArgAlign()), 485*0b57cec5SDimitry Andric LDSSize(MFI.getLDSSize()), 486*0b57cec5SDimitry Andric IsEntryFunction(MFI.isEntryFunction()), 487*0b57cec5SDimitry Andric NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), 488*0b57cec5SDimitry Andric MemoryBound(MFI.isMemoryBound()), 489*0b57cec5SDimitry Andric WaveLimiter(MFI.needsWaveLimiter()), 490*0b57cec5SDimitry Andric ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), 491*0b57cec5SDimitry Andric ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)), 492*0b57cec5SDimitry Andric FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), 493*0b57cec5SDimitry Andric StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), 494*0b57cec5SDimitry Andric ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), 495*0b57cec5SDimitry Andric Mode(MFI.getMode()) {} 496*0b57cec5SDimitry Andric 497*0b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { 498*0b57cec5SDimitry Andric MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this); 499*0b57cec5SDimitry Andric } 500*0b57cec5SDimitry Andric 501*0b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields( 502*0b57cec5SDimitry Andric const yaml::SIMachineFunctionInfo &YamlMFI) { 503*0b57cec5SDimitry Andric ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; 504*0b57cec5SDimitry Andric MaxKernArgAlign = YamlMFI.MaxKernArgAlign; 505*0b57cec5SDimitry Andric LDSSize = YamlMFI.LDSSize; 506*0b57cec5SDimitry Andric IsEntryFunction = YamlMFI.IsEntryFunction; 507*0b57cec5SDimitry Andric NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; 508*0b57cec5SDimitry Andric MemoryBound = YamlMFI.MemoryBound; 509*0b57cec5SDimitry Andric WaveLimiter = YamlMFI.WaveLimiter; 510*0b57cec5SDimitry Andric return false; 511*0b57cec5SDimitry Andric } 512