10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 105ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h" 110b57cec5SDimitry Andric 120b57cec5SDimitry Andric #define MAX_LANES 64 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric using namespace llvm; 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 170b57cec5SDimitry Andric : AMDGPUMachineFunction(MF), 180b57cec5SDimitry Andric PrivateSegmentBuffer(false), 190b57cec5SDimitry Andric DispatchPtr(false), 200b57cec5SDimitry Andric QueuePtr(false), 210b57cec5SDimitry Andric KernargSegmentPtr(false), 220b57cec5SDimitry Andric DispatchID(false), 230b57cec5SDimitry Andric FlatScratchInit(false), 240b57cec5SDimitry Andric WorkGroupIDX(false), 250b57cec5SDimitry Andric WorkGroupIDY(false), 260b57cec5SDimitry Andric WorkGroupIDZ(false), 270b57cec5SDimitry Andric WorkGroupInfo(false), 280b57cec5SDimitry Andric PrivateSegmentWaveByteOffset(false), 290b57cec5SDimitry Andric WorkItemIDX(false), 300b57cec5SDimitry Andric WorkItemIDY(false), 310b57cec5SDimitry Andric WorkItemIDZ(false), 320b57cec5SDimitry Andric ImplicitBufferPtr(false), 330b57cec5SDimitry Andric ImplicitArgPtr(false), 340b57cec5SDimitry Andric GITPtrHigh(0xffffffff), 350b57cec5SDimitry Andric HighBitsOf32BitAddress(0), 360b57cec5SDimitry Andric GDSSize(0) { 370b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 380b57cec5SDimitry Andric const Function &F = MF.getFunction(); 390b57cec5SDimitry Andric FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); 400b57cec5SDimitry Andric WavesPerEU = ST.getWavesPerEU(F); 410b57cec5SDimitry Andric 425ffd83dbSDimitry Andric Occupancy = ST.computeOccupancy(F, getLDSSize()); 430b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 440b57cec5SDimitry Andric 455ffd83dbSDimitry Andric // FIXME: Should have analysis or something rather than attribute to detect 465ffd83dbSDimitry Andric // calls. 475ffd83dbSDimitry Andric const bool HasCalls = F.hasFnAttribute("amdgpu-calls"); 485ffd83dbSDimitry Andric 495ffd83dbSDimitry Andric // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't 505ffd83dbSDimitry Andric // have any calls. 515ffd83dbSDimitry Andric const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI && 525ffd83dbSDimitry Andric (!isEntryFunction() || HasCalls); 535ffd83dbSDimitry Andric 540b57cec5SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { 550b57cec5SDimitry Andric if (!F.arg_empty()) 560b57cec5SDimitry Andric KernargSegmentPtr = true; 570b57cec5SDimitry Andric WorkGroupIDX = true; 580b57cec5SDimitry Andric WorkItemIDX = true; 590b57cec5SDimitry Andric } else if (CC == CallingConv::AMDGPU_PS) { 600b57cec5SDimitry Andric PSInputAddr = AMDGPU::getInitialPSInputAddr(F); 610b57cec5SDimitry Andric } 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric if (!isEntryFunction()) { 640b57cec5SDimitry Andric // TODO: Pick a high register, and shift down, similar to a kernel. 655ffd83dbSDimitry Andric FrameOffsetReg = AMDGPU::SGPR33; 660b57cec5SDimitry Andric StackPtrOffsetReg = AMDGPU::SGPR32; 670b57cec5SDimitry Andric 68*e8d8bef9SDimitry Andric if (!ST.enableFlatScratch()) { 69*e8d8bef9SDimitry Andric // Non-entry functions have no special inputs for now, other registers 70*e8d8bef9SDimitry Andric // required for scratch access. 71*e8d8bef9SDimitry Andric ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 72*e8d8bef9SDimitry Andric 730b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 740b57cec5SDimitry Andric ArgDescriptor::createRegister(ScratchRSrcReg); 75*e8d8bef9SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) 780b57cec5SDimitry Andric ImplicitArgPtr = true; 790b57cec5SDimitry Andric } else { 800b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) { 810b57cec5SDimitry Andric KernargSegmentPtr = true; 820b57cec5SDimitry Andric MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), 830b57cec5SDimitry Andric MaxKernArgAlign); 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric } 860b57cec5SDimitry Andric 875ffd83dbSDimitry Andric if (UseFixedABI) { 885ffd83dbSDimitry Andric WorkGroupIDX = true; 895ffd83dbSDimitry Andric WorkGroupIDY = true; 905ffd83dbSDimitry Andric WorkGroupIDZ = true; 915ffd83dbSDimitry Andric WorkItemIDX = true; 925ffd83dbSDimitry Andric WorkItemIDY = true; 935ffd83dbSDimitry Andric WorkItemIDZ = true; 945ffd83dbSDimitry Andric ImplicitArgPtr = true; 955ffd83dbSDimitry Andric } else { 960b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-group-id-x")) 970b57cec5SDimitry Andric WorkGroupIDX = true; 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-group-id-y")) 1000b57cec5SDimitry Andric WorkGroupIDY = true; 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-group-id-z")) 1030b57cec5SDimitry Andric WorkGroupIDZ = true; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-item-id-x")) 1060b57cec5SDimitry Andric WorkItemIDX = true; 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-item-id-y")) 1090b57cec5SDimitry Andric WorkItemIDY = true; 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-item-id-z")) 1120b57cec5SDimitry Andric WorkItemIDZ = true; 1135ffd83dbSDimitry Andric } 1140b57cec5SDimitry Andric 1155ffd83dbSDimitry Andric bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects"); 1160b57cec5SDimitry Andric if (isEntryFunction()) { 1170b57cec5SDimitry Andric // X, XY, and XYZ are the only supported combinations, so make sure Y is 1180b57cec5SDimitry Andric // enabled if Z is. 1190b57cec5SDimitry Andric if (WorkItemIDZ) 1200b57cec5SDimitry Andric WorkItemIDY = true; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric PrivateSegmentWaveByteOffset = true; 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 1250b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 1260b57cec5SDimitry Andric (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 1270b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset = 1280b57cec5SDimitry Andric ArgDescriptor::createRegister(AMDGPU::SGPR5); 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); 1320b57cec5SDimitry Andric if (isAmdHsaOrMesa) { 133*e8d8bef9SDimitry Andric if (!ST.enableFlatScratch()) 1340b57cec5SDimitry Andric PrivateSegmentBuffer = true; 1350b57cec5SDimitry Andric 1365ffd83dbSDimitry Andric if (UseFixedABI) { 1375ffd83dbSDimitry Andric DispatchPtr = true; 1385ffd83dbSDimitry Andric QueuePtr = true; 1395ffd83dbSDimitry Andric 1405ffd83dbSDimitry Andric // FIXME: We don't need this? 1415ffd83dbSDimitry Andric DispatchID = true; 1425ffd83dbSDimitry Andric } else { 1430b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-dispatch-ptr")) 1440b57cec5SDimitry Andric DispatchPtr = true; 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-queue-ptr")) 1470b57cec5SDimitry Andric QueuePtr = true; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-dispatch-id")) 1500b57cec5SDimitry Andric DispatchID = true; 1515ffd83dbSDimitry Andric } 1520b57cec5SDimitry Andric } else if (ST.isMesaGfxShader(F)) { 1530b57cec5SDimitry Andric ImplicitBufferPtr = true; 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric 1565ffd83dbSDimitry Andric if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) 1570b57cec5SDimitry Andric KernargSegmentPtr = true; 1580b57cec5SDimitry Andric 159*e8d8bef9SDimitry Andric if (ST.hasFlatAddressSpace() && isEntryFunction() && 160*e8d8bef9SDimitry Andric (isAmdHsaOrMesa || ST.enableFlatScratch())) { 1610b57cec5SDimitry Andric // TODO: This could be refined a lot. The attribute is a poor way of 1625ffd83dbSDimitry Andric // detecting calls or stack objects that may require it before argument 1635ffd83dbSDimitry Andric // lowering. 164*e8d8bef9SDimitry Andric if (HasCalls || HasStackObjects || ST.enableFlatScratch()) 1650b57cec5SDimitry Andric FlatScratchInit = true; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); 1690b57cec5SDimitry Andric StringRef S = A.getValueAsString(); 1700b57cec5SDimitry Andric if (!S.empty()) 1710b57cec5SDimitry Andric S.consumeInteger(0, GITPtrHigh); 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric A = F.getFnAttribute("amdgpu-32bit-address-high-bits"); 1740b57cec5SDimitry Andric S = A.getValueAsString(); 1750b57cec5SDimitry Andric if (!S.empty()) 1760b57cec5SDimitry Andric S.consumeInteger(0, HighBitsOf32BitAddress); 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 1790b57cec5SDimitry Andric if (!S.empty()) 1800b57cec5SDimitry Andric S.consumeInteger(0, GDSSize); 1810b57cec5SDimitry Andric } 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { 1840b57cec5SDimitry Andric limitOccupancy(getMaxWavesPerEU()); 1850b57cec5SDimitry Andric const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); 1860b57cec5SDimitry Andric limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), 1870b57cec5SDimitry Andric MF.getFunction())); 1880b57cec5SDimitry Andric } 1890b57cec5SDimitry Andric 1905ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer( 1910b57cec5SDimitry Andric const SIRegisterInfo &TRI) { 1920b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 1930b57cec5SDimitry Andric ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 1948bcb0991SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass)); 1950b57cec5SDimitry Andric NumUserSGPRs += 4; 1960b57cec5SDimitry Andric return ArgInfo.PrivateSegmentBuffer.getRegister(); 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1995ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 2000b57cec5SDimitry Andric ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2010b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2020b57cec5SDimitry Andric NumUserSGPRs += 2; 2030b57cec5SDimitry Andric return ArgInfo.DispatchPtr.getRegister(); 2040b57cec5SDimitry Andric } 2050b57cec5SDimitry Andric 2065ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 2070b57cec5SDimitry Andric ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2080b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2090b57cec5SDimitry Andric NumUserSGPRs += 2; 2100b57cec5SDimitry Andric return ArgInfo.QueuePtr.getRegister(); 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric 2135ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 2140b57cec5SDimitry Andric ArgInfo.KernargSegmentPtr 2150b57cec5SDimitry Andric = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2160b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2170b57cec5SDimitry Andric NumUserSGPRs += 2; 2180b57cec5SDimitry Andric return ArgInfo.KernargSegmentPtr.getRegister(); 2190b57cec5SDimitry Andric } 2200b57cec5SDimitry Andric 2215ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 2220b57cec5SDimitry Andric ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2230b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2240b57cec5SDimitry Andric NumUserSGPRs += 2; 2250b57cec5SDimitry Andric return ArgInfo.DispatchID.getRegister(); 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric 2285ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 2290b57cec5SDimitry Andric ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2300b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2310b57cec5SDimitry Andric NumUserSGPRs += 2; 2320b57cec5SDimitry Andric return ArgInfo.FlatScratchInit.getRegister(); 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric 2355ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 2360b57cec5SDimitry Andric ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2370b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2380b57cec5SDimitry Andric NumUserSGPRs += 2; 2390b57cec5SDimitry Andric return ArgInfo.ImplicitBufferPtr.getRegister(); 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2425ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, 2435ffd83dbSDimitry Andric MCPhysReg Reg) { 2440b57cec5SDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) { 2450b57cec5SDimitry Andric if (CSRegs[I] == Reg) 2460b57cec5SDimitry Andric return true; 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric return false; 2500b57cec5SDimitry Andric } 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for 2530b57cec5SDimitry Andric /// SGPR spilling. 2540b57cec5SDimitry Andric // 2550b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized 2560b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF, 2570b57cec5SDimitry Andric unsigned NumNeed) const { 2580b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 2590b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 2600b57cec5SDimitry Andric return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size(); 2610b57cec5SDimitry Andric } 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 2640b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 2650b57cec5SDimitry Andric int FI) { 2660b57cec5SDimitry Andric std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric // This has already been allocated. 2690b57cec5SDimitry Andric if (!SpillLanes.empty()) 2700b57cec5SDimitry Andric return true; 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 2730b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 2740b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 2750b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 2760b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 2775ffd83dbSDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 2805ffd83dbSDimitry Andric unsigned NumLanes = Size / 4; 2810b57cec5SDimitry Andric 2825ffd83dbSDimitry Andric if (NumLanes > WaveSize) 2835ffd83dbSDimitry Andric return false; 2845ffd83dbSDimitry Andric 2855ffd83dbSDimitry Andric assert(Size >= 4 && "invalid sgpr spill size"); 2865ffd83dbSDimitry Andric assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric // Make sure to handle the case where a wide SGPR spill may span between two 2910b57cec5SDimitry Andric // VGPRs. 2925ffd83dbSDimitry Andric for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 2935ffd83dbSDimitry Andric Register LaneVGPR; 2940b57cec5SDimitry Andric unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 2950b57cec5SDimitry Andric 2965ffd83dbSDimitry Andric // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and 2975ffd83dbSDimitry Andric // when one of the two conditions is true: 2985ffd83dbSDimitry Andric // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet 2995ffd83dbSDimitry Andric // reserved. 3005ffd83dbSDimitry Andric // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is 3015ffd83dbSDimitry Andric // required. 3025ffd83dbSDimitry Andric if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) { 3035ffd83dbSDimitry Andric assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR); 3045ffd83dbSDimitry Andric LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill; 3055ffd83dbSDimitry Andric } else if (VGPRIndex == 0) { 3060b57cec5SDimitry Andric LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 3070b57cec5SDimitry Andric if (LaneVGPR == AMDGPU::NoRegister) { 3080b57cec5SDimitry Andric // We have no VGPRs left for spilling SGPRs. Reset because we will not 3090b57cec5SDimitry Andric // partially spill the SGPR to VGPRs. 3100b57cec5SDimitry Andric SGPRToVGPRSpills.erase(FI); 3110b57cec5SDimitry Andric NumVGPRSpillLanes -= I; 3120b57cec5SDimitry Andric return false; 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric Optional<int> CSRSpillFI; 3160b57cec5SDimitry Andric if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs && 3170b57cec5SDimitry Andric isCalleeSavedReg(CSRegs, LaneVGPR)) { 3185ffd83dbSDimitry Andric CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4)); 3190b57cec5SDimitry Andric } 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric // Add this register as live-in to all blocks to avoid machine verifer 3240b57cec5SDimitry Andric // complaining about use of an undefined physical register. 3250b57cec5SDimitry Andric for (MachineBasicBlock &BB : MF) 3260b57cec5SDimitry Andric BB.addLiveIn(LaneVGPR); 3270b57cec5SDimitry Andric } else { 3280b57cec5SDimitry Andric LaneVGPR = SpillVGPRs.back().VGPR; 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric return true; 3350b57cec5SDimitry Andric } 3360b57cec5SDimitry Andric 3375ffd83dbSDimitry Andric /// Reserve a VGPR for spilling of SGPRs 3385ffd83dbSDimitry Andric bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) { 3395ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3405ffd83dbSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 3415ffd83dbSDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 3425ffd83dbSDimitry Andric 3435ffd83dbSDimitry Andric Register LaneVGPR = TRI->findUnusedRegister( 3445ffd83dbSDimitry Andric MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true); 345*e8d8bef9SDimitry Andric if (LaneVGPR == Register()) 346*e8d8bef9SDimitry Andric return false; 3475ffd83dbSDimitry Andric SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None)); 3485ffd83dbSDimitry Andric FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR; 3495ffd83dbSDimitry Andric return true; 3505ffd83dbSDimitry Andric } 3515ffd83dbSDimitry Andric 3520b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. 3530b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa. 3540b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely. 3550b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, 3560b57cec5SDimitry Andric int FI, 3570b57cec5SDimitry Andric bool isAGPRtoVGPR) { 3580b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 3590b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 3600b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI)); 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andric auto &Spill = VGPRToAGPRSpills[FI]; 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric // This has already been allocated. 3670b57cec5SDimitry Andric if (!Spill.Lanes.empty()) 3680b57cec5SDimitry Andric return Spill.FullyAllocated; 3690b57cec5SDimitry Andric 3700b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 3710b57cec5SDimitry Andric unsigned NumLanes = Size / 4; 3720b57cec5SDimitry Andric Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister); 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric const TargetRegisterClass &RC = 3750b57cec5SDimitry Andric isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass; 3760b57cec5SDimitry Andric auto Regs = RC.getRegisters(); 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andric auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR; 3790b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 3800b57cec5SDimitry Andric Spill.FullyAllocated = true; 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric // FIXME: Move allocation logic out of MachineFunctionInfo and initialize 3830b57cec5SDimitry Andric // once. 3840b57cec5SDimitry Andric BitVector OtherUsedRegs; 3850b57cec5SDimitry Andric OtherUsedRegs.resize(TRI->getNumRegs()); 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric const uint32_t *CSRMask = 3880b57cec5SDimitry Andric TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv()); 3890b57cec5SDimitry Andric if (CSRMask) 3900b57cec5SDimitry Andric OtherUsedRegs.setBitsInMask(CSRMask); 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric // TODO: Should include register tuples, but doesn't matter with current 3930b57cec5SDimitry Andric // usage. 3940b57cec5SDimitry Andric for (MCPhysReg Reg : SpillAGPR) 3950b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 3960b57cec5SDimitry Andric for (MCPhysReg Reg : SpillVGPR) 3970b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin(); 4000b57cec5SDimitry Andric for (unsigned I = 0; I < NumLanes; ++I) { 4010b57cec5SDimitry Andric NextSpillReg = std::find_if( 4020b57cec5SDimitry Andric NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) { 4030b57cec5SDimitry Andric return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) && 4040b57cec5SDimitry Andric !OtherUsedRegs[Reg]; 4050b57cec5SDimitry Andric }); 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric if (NextSpillReg == Regs.end()) { // Registers exhausted 4080b57cec5SDimitry Andric Spill.FullyAllocated = false; 4090b57cec5SDimitry Andric break; 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric OtherUsedRegs.set(*NextSpillReg); 4130b57cec5SDimitry Andric SpillRegs.push_back(*NextSpillReg); 4140b57cec5SDimitry Andric Spill.Lanes[I] = *NextSpillReg++; 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric return Spill.FullyAllocated; 4180b57cec5SDimitry Andric } 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { 4215ffd83dbSDimitry Andric // The FP & BP spills haven't been inserted yet, so keep them around. 4220b57cec5SDimitry Andric for (auto &R : SGPRToVGPRSpills) { 4235ffd83dbSDimitry Andric if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) 4240b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 4250b57cec5SDimitry Andric } 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric // All other SPGRs must be allocated on the default stack, so reset the stack 4280b57cec5SDimitry Andric // ID. 4290b57cec5SDimitry Andric for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; 4300b57cec5SDimitry Andric ++i) 4315ffd83dbSDimitry Andric if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) 4320b57cec5SDimitry Andric MFI.setStackID(i, TargetStackID::Default); 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric for (auto &R : VGPRToAGPRSpills) { 4350b57cec5SDimitry Andric if (R.second.FullyAllocated) 4360b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 4370b57cec5SDimitry Andric } 4380b57cec5SDimitry Andric } 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { 4410b57cec5SDimitry Andric assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 4420b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs; 4430b57cec5SDimitry Andric } 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { 4460b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 4495ffd83dbSDimitry Andric Register 4505ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const { 4515ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 4525ffd83dbSDimitry Andric if (!ST.isAmdPalOS()) 4535ffd83dbSDimitry Andric return Register(); 4545ffd83dbSDimitry Andric Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in 4555ffd83dbSDimitry Andric if (ST.hasMergedShaders()) { 4565ffd83dbSDimitry Andric switch (MF.getFunction().getCallingConv()) { 4575ffd83dbSDimitry Andric case CallingConv::AMDGPU_HS: 4585ffd83dbSDimitry Andric case CallingConv::AMDGPU_GS: 4595ffd83dbSDimitry Andric // Low GIT address is passed in s8 rather than s0 for an LS+HS or 4605ffd83dbSDimitry Andric // ES+GS merged shader on gfx9+. 4615ffd83dbSDimitry Andric GitPtrLo = AMDGPU::SGPR8; 4625ffd83dbSDimitry Andric return GitPtrLo; 4635ffd83dbSDimitry Andric default: 4645ffd83dbSDimitry Andric return GitPtrLo; 4655ffd83dbSDimitry Andric } 4665ffd83dbSDimitry Andric } 4675ffd83dbSDimitry Andric return GitPtrLo; 4685ffd83dbSDimitry Andric } 4695ffd83dbSDimitry Andric 4705ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg, 4710b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 4720b57cec5SDimitry Andric yaml::StringValue Dest; 4730b57cec5SDimitry Andric { 4740b57cec5SDimitry Andric raw_string_ostream OS(Dest.Value); 4750b57cec5SDimitry Andric OS << printReg(Reg, &TRI); 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric return Dest; 4780b57cec5SDimitry Andric } 4790b57cec5SDimitry Andric 4800b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo> 4810b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, 4820b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 4830b57cec5SDimitry Andric yaml::SIArgumentInfo AI; 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric auto convertArg = [&](Optional<yaml::SIArgument> &A, 4860b57cec5SDimitry Andric const ArgDescriptor &Arg) { 4870b57cec5SDimitry Andric if (!Arg) 4880b57cec5SDimitry Andric return false; 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric // Create a register or stack argument. 4910b57cec5SDimitry Andric yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister()); 4920b57cec5SDimitry Andric if (Arg.isRegister()) { 4930b57cec5SDimitry Andric raw_string_ostream OS(SA.RegisterName.Value); 4940b57cec5SDimitry Andric OS << printReg(Arg.getRegister(), &TRI); 4950b57cec5SDimitry Andric } else 4960b57cec5SDimitry Andric SA.StackOffset = Arg.getStackOffset(); 4970b57cec5SDimitry Andric // Check and update the optional mask. 4980b57cec5SDimitry Andric if (Arg.isMasked()) 4990b57cec5SDimitry Andric SA.Mask = Arg.getMask(); 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric A = SA; 5020b57cec5SDimitry Andric return true; 5030b57cec5SDimitry Andric }; 5040b57cec5SDimitry Andric 5050b57cec5SDimitry Andric bool Any = false; 5060b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer); 5070b57cec5SDimitry Andric Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr); 5080b57cec5SDimitry Andric Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr); 5090b57cec5SDimitry Andric Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); 5100b57cec5SDimitry Andric Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); 5110b57cec5SDimitry Andric Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); 5120b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); 5130b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); 5140b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); 5150b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ); 5160b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo); 5170b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentWaveByteOffset, 5180b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset); 5190b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr); 5200b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr); 5210b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX); 5220b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY); 5230b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ); 5240b57cec5SDimitry Andric 5250b57cec5SDimitry Andric if (Any) 5260b57cec5SDimitry Andric return AI; 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric return None; 5290b57cec5SDimitry Andric } 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( 532*e8d8bef9SDimitry Andric const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI) 5330b57cec5SDimitry Andric : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), 534*e8d8bef9SDimitry Andric MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()), 535*e8d8bef9SDimitry Andric DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()), 5360b57cec5SDimitry Andric NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), 537*e8d8bef9SDimitry Andric MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), 538*e8d8bef9SDimitry Andric HasSpilledSGPRs(MFI.hasSpilledSGPRs()), 539*e8d8bef9SDimitry Andric HasSpilledVGPRs(MFI.hasSpilledVGPRs()), 5408bcb0991SDimitry Andric HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), 541*e8d8bef9SDimitry Andric Occupancy(MFI.getOccupancy()), 5420b57cec5SDimitry Andric ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), 5430b57cec5SDimitry Andric FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), 5440b57cec5SDimitry Andric StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), 545*e8d8bef9SDimitry Andric ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { 546*e8d8bef9SDimitry Andric } 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { 5490b57cec5SDimitry Andric MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this); 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields( 5530b57cec5SDimitry Andric const yaml::SIMachineFunctionInfo &YamlMFI) { 5540b57cec5SDimitry Andric ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; 5558bcb0991SDimitry Andric MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign); 5560b57cec5SDimitry Andric LDSSize = YamlMFI.LDSSize; 557*e8d8bef9SDimitry Andric DynLDSAlign = YamlMFI.DynLDSAlign; 5588bcb0991SDimitry Andric HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; 559*e8d8bef9SDimitry Andric Occupancy = YamlMFI.Occupancy; 5600b57cec5SDimitry Andric IsEntryFunction = YamlMFI.IsEntryFunction; 5610b57cec5SDimitry Andric NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; 5620b57cec5SDimitry Andric MemoryBound = YamlMFI.MemoryBound; 5630b57cec5SDimitry Andric WaveLimiter = YamlMFI.WaveLimiter; 564*e8d8bef9SDimitry Andric HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs; 565*e8d8bef9SDimitry Andric HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs; 5660b57cec5SDimitry Andric return false; 5670b57cec5SDimitry Andric } 5685ffd83dbSDimitry Andric 5695ffd83dbSDimitry Andric // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs 5705ffd83dbSDimitry Andric bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR, 5715ffd83dbSDimitry Andric MachineFunction &MF) { 5725ffd83dbSDimitry Andric for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) { 5735ffd83dbSDimitry Andric if (i->VGPR == ReservedVGPR) { 5745ffd83dbSDimitry Andric SpillVGPRs.erase(i); 5755ffd83dbSDimitry Andric 5765ffd83dbSDimitry Andric for (MachineBasicBlock &MBB : MF) { 5775ffd83dbSDimitry Andric MBB.removeLiveIn(ReservedVGPR); 5785ffd83dbSDimitry Andric MBB.sortUniqueLiveIns(); 5795ffd83dbSDimitry Andric } 5805ffd83dbSDimitry Andric this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister; 5815ffd83dbSDimitry Andric return true; 5825ffd83dbSDimitry Andric } 5835ffd83dbSDimitry Andric } 5845ffd83dbSDimitry Andric return false; 5855ffd83dbSDimitry Andric } 586