10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 100b57cec5SDimitry Andric #include "AMDGPUArgumentUsageInfo.h" 110b57cec5SDimitry Andric #include "AMDGPUSubtarget.h" 120b57cec5SDimitry Andric #include "SIRegisterInfo.h" 130b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 140b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 150b57cec5SDimitry Andric #include "llvm/ADT/Optional.h" 160b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 170b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 200b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h" 210b57cec5SDimitry Andric #include "llvm/IR/Function.h" 220b57cec5SDimitry Andric #include <cassert> 230b57cec5SDimitry Andric #include <vector> 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric #define MAX_LANES 64 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric using namespace llvm; 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 300b57cec5SDimitry Andric : AMDGPUMachineFunction(MF), 310b57cec5SDimitry Andric Mode(MF.getFunction()), 320b57cec5SDimitry Andric PrivateSegmentBuffer(false), 330b57cec5SDimitry Andric DispatchPtr(false), 340b57cec5SDimitry Andric QueuePtr(false), 350b57cec5SDimitry Andric KernargSegmentPtr(false), 360b57cec5SDimitry Andric DispatchID(false), 370b57cec5SDimitry Andric FlatScratchInit(false), 380b57cec5SDimitry Andric WorkGroupIDX(false), 390b57cec5SDimitry Andric WorkGroupIDY(false), 400b57cec5SDimitry Andric WorkGroupIDZ(false), 410b57cec5SDimitry Andric WorkGroupInfo(false), 420b57cec5SDimitry Andric PrivateSegmentWaveByteOffset(false), 430b57cec5SDimitry Andric WorkItemIDX(false), 440b57cec5SDimitry Andric WorkItemIDY(false), 450b57cec5SDimitry Andric WorkItemIDZ(false), 460b57cec5SDimitry Andric ImplicitBufferPtr(false), 470b57cec5SDimitry Andric ImplicitArgPtr(false), 480b57cec5SDimitry Andric GITPtrHigh(0xffffffff), 490b57cec5SDimitry Andric HighBitsOf32BitAddress(0), 500b57cec5SDimitry Andric GDSSize(0) { 510b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 520b57cec5SDimitry Andric const Function &F = MF.getFunction(); 530b57cec5SDimitry Andric FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); 540b57cec5SDimitry Andric WavesPerEU = ST.getWavesPerEU(F); 550b57cec5SDimitry Andric 56*8bcb0991SDimitry Andric Occupancy = ST.computeOccupancy(MF, getLDSSize()); 570b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { 600b57cec5SDimitry Andric if (!F.arg_empty()) 610b57cec5SDimitry Andric KernargSegmentPtr = true; 620b57cec5SDimitry Andric WorkGroupIDX = true; 630b57cec5SDimitry Andric WorkItemIDX = true; 640b57cec5SDimitry Andric } else if (CC == CallingConv::AMDGPU_PS) { 650b57cec5SDimitry Andric PSInputAddr = AMDGPU::getInitialPSInputAddr(F); 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric if (!isEntryFunction()) { 690b57cec5SDimitry Andric // Non-entry functions have no special inputs for now, other registers 700b57cec5SDimitry Andric // required for scratch access. 710b57cec5SDimitry Andric ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 720b57cec5SDimitry Andric ScratchWaveOffsetReg = AMDGPU::SGPR33; 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric // TODO: Pick a high register, and shift down, similar to a kernel. 750b57cec5SDimitry Andric FrameOffsetReg = AMDGPU::SGPR34; 760b57cec5SDimitry Andric StackPtrOffsetReg = AMDGPU::SGPR32; 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 790b57cec5SDimitry Andric ArgDescriptor::createRegister(ScratchRSrcReg); 800b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset = 810b57cec5SDimitry Andric ArgDescriptor::createRegister(ScratchWaveOffsetReg); 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) 840b57cec5SDimitry Andric ImplicitArgPtr = true; 850b57cec5SDimitry Andric } else { 860b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) { 870b57cec5SDimitry Andric KernargSegmentPtr = true; 880b57cec5SDimitry Andric MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), 890b57cec5SDimitry Andric MaxKernArgAlign); 900b57cec5SDimitry Andric } 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-group-id-x")) 940b57cec5SDimitry Andric WorkGroupIDX = true; 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-group-id-y")) 970b57cec5SDimitry Andric WorkGroupIDY = true; 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-group-id-z")) 1000b57cec5SDimitry Andric WorkGroupIDZ = true; 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-item-id-x")) 1030b57cec5SDimitry Andric WorkItemIDX = true; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-item-id-y")) 1060b57cec5SDimitry Andric WorkItemIDY = true; 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-work-item-id-z")) 1090b57cec5SDimitry Andric WorkItemIDZ = true; 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 1120b57cec5SDimitry Andric bool HasStackObjects = FrameInfo.hasStackObjects(); 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric if (isEntryFunction()) { 1150b57cec5SDimitry Andric // X, XY, and XYZ are the only supported combinations, so make sure Y is 1160b57cec5SDimitry Andric // enabled if Z is. 1170b57cec5SDimitry Andric if (WorkItemIDZ) 1180b57cec5SDimitry Andric WorkItemIDY = true; 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric PrivateSegmentWaveByteOffset = true; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 1230b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 1240b57cec5SDimitry Andric (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 1250b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset = 1260b57cec5SDimitry Andric ArgDescriptor::createRegister(AMDGPU::SGPR5); 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); 1300b57cec5SDimitry Andric if (isAmdHsaOrMesa) { 1310b57cec5SDimitry Andric PrivateSegmentBuffer = true; 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-dispatch-ptr")) 1340b57cec5SDimitry Andric DispatchPtr = true; 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-queue-ptr")) 1370b57cec5SDimitry Andric QueuePtr = true; 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-dispatch-id")) 1400b57cec5SDimitry Andric DispatchID = true; 1410b57cec5SDimitry Andric } else if (ST.isMesaGfxShader(F)) { 1420b57cec5SDimitry Andric ImplicitBufferPtr = true; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) 1460b57cec5SDimitry Andric KernargSegmentPtr = true; 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) { 1490b57cec5SDimitry Andric auto hasNonSpillStackObjects = [&]() { 1500b57cec5SDimitry Andric // Avoid expensive checking if there's no stack objects. 1510b57cec5SDimitry Andric if (!HasStackObjects) 1520b57cec5SDimitry Andric return false; 1530b57cec5SDimitry Andric for (auto OI = FrameInfo.getObjectIndexBegin(), 1540b57cec5SDimitry Andric OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI) 1550b57cec5SDimitry Andric if (!FrameInfo.isSpillSlotObjectIndex(OI)) 1560b57cec5SDimitry Andric return true; 1570b57cec5SDimitry Andric // All stack objects are spill slots. 1580b57cec5SDimitry Andric return false; 1590b57cec5SDimitry Andric }; 1600b57cec5SDimitry Andric // TODO: This could be refined a lot. The attribute is a poor way of 1610b57cec5SDimitry Andric // detecting calls that may require it before argument lowering. 1620b57cec5SDimitry Andric if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch")) 1630b57cec5SDimitry Andric FlatScratchInit = true; 1640b57cec5SDimitry Andric } 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); 1670b57cec5SDimitry Andric StringRef S = A.getValueAsString(); 1680b57cec5SDimitry Andric if (!S.empty()) 1690b57cec5SDimitry Andric S.consumeInteger(0, GITPtrHigh); 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric A = F.getFnAttribute("amdgpu-32bit-address-high-bits"); 1720b57cec5SDimitry Andric S = A.getValueAsString(); 1730b57cec5SDimitry Andric if (!S.empty()) 1740b57cec5SDimitry Andric S.consumeInteger(0, HighBitsOf32BitAddress); 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 1770b57cec5SDimitry Andric if (!S.empty()) 1780b57cec5SDimitry Andric S.consumeInteger(0, GDSSize); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { 1820b57cec5SDimitry Andric limitOccupancy(getMaxWavesPerEU()); 1830b57cec5SDimitry Andric const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); 1840b57cec5SDimitry Andric limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), 1850b57cec5SDimitry Andric MF.getFunction())); 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 1890b57cec5SDimitry Andric const SIRegisterInfo &TRI) { 1900b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 1910b57cec5SDimitry Andric ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 192*8bcb0991SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass)); 1930b57cec5SDimitry Andric NumUserSGPRs += 4; 1940b57cec5SDimitry Andric return ArgInfo.PrivateSegmentBuffer.getRegister(); 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 1980b57cec5SDimitry Andric ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 1990b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2000b57cec5SDimitry Andric NumUserSGPRs += 2; 2010b57cec5SDimitry Andric return ArgInfo.DispatchPtr.getRegister(); 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 2050b57cec5SDimitry Andric ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2060b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2070b57cec5SDimitry Andric NumUserSGPRs += 2; 2080b57cec5SDimitry Andric return ArgInfo.QueuePtr.getRegister(); 2090b57cec5SDimitry Andric } 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 2120b57cec5SDimitry Andric ArgInfo.KernargSegmentPtr 2130b57cec5SDimitry Andric = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2140b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2150b57cec5SDimitry Andric NumUserSGPRs += 2; 2160b57cec5SDimitry Andric return ArgInfo.KernargSegmentPtr.getRegister(); 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 2200b57cec5SDimitry Andric ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2210b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2220b57cec5SDimitry Andric NumUserSGPRs += 2; 2230b57cec5SDimitry Andric return ArgInfo.DispatchID.getRegister(); 2240b57cec5SDimitry Andric } 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 2270b57cec5SDimitry Andric ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2280b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2290b57cec5SDimitry Andric NumUserSGPRs += 2; 2300b57cec5SDimitry Andric return ArgInfo.FlatScratchInit.getRegister(); 2310b57cec5SDimitry Andric } 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 2340b57cec5SDimitry Andric ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2350b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2360b57cec5SDimitry Andric NumUserSGPRs += 2; 2370b57cec5SDimitry Andric return ArgInfo.ImplicitBufferPtr.getRegister(); 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { 2410b57cec5SDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) { 2420b57cec5SDimitry Andric if (CSRegs[I] == Reg) 2430b57cec5SDimitry Andric return true; 2440b57cec5SDimitry Andric } 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric return false; 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for 2500b57cec5SDimitry Andric /// SGPR spilling. 2510b57cec5SDimitry Andric // 2520b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized 2530b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF, 2540b57cec5SDimitry Andric unsigned NumNeed) const { 2550b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 2560b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 2570b57cec5SDimitry Andric return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size(); 2580b57cec5SDimitry Andric } 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 2610b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 2620b57cec5SDimitry Andric int FI) { 2630b57cec5SDimitry Andric std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric // This has already been allocated. 2660b57cec5SDimitry Andric if (!SpillLanes.empty()) 2670b57cec5SDimitry Andric return true; 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 2700b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 2710b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 2720b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 2730b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 2760b57cec5SDimitry Andric assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 2770b57cec5SDimitry Andric assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric int NumLanes = Size / 4; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric // Make sure to handle the case where a wide SGPR spill may span between two 2840b57cec5SDimitry Andric // VGPRs. 2850b57cec5SDimitry Andric for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 2860b57cec5SDimitry Andric unsigned LaneVGPR; 2870b57cec5SDimitry Andric unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric if (VGPRIndex == 0) { 2900b57cec5SDimitry Andric LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 2910b57cec5SDimitry Andric if (LaneVGPR == AMDGPU::NoRegister) { 2920b57cec5SDimitry Andric // We have no VGPRs left for spilling SGPRs. Reset because we will not 2930b57cec5SDimitry Andric // partially spill the SGPR to VGPRs. 2940b57cec5SDimitry Andric SGPRToVGPRSpills.erase(FI); 2950b57cec5SDimitry Andric NumVGPRSpillLanes -= I; 2960b57cec5SDimitry Andric return false; 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric 2990b57cec5SDimitry Andric Optional<int> CSRSpillFI; 3000b57cec5SDimitry Andric if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs && 3010b57cec5SDimitry Andric isCalleeSavedReg(CSRegs, LaneVGPR)) { 3020b57cec5SDimitry Andric CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4); 3030b57cec5SDimitry Andric } 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric // Add this register as live-in to all blocks to avoid machine verifer 3080b57cec5SDimitry Andric // complaining about use of an undefined physical register. 3090b57cec5SDimitry Andric for (MachineBasicBlock &BB : MF) 3100b57cec5SDimitry Andric BB.addLiveIn(LaneVGPR); 3110b57cec5SDimitry Andric } else { 3120b57cec5SDimitry Andric LaneVGPR = SpillVGPRs.back().VGPR; 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric return true; 3190b57cec5SDimitry Andric } 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. 3220b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa. 3230b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely. 3240b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, 3250b57cec5SDimitry Andric int FI, 3260b57cec5SDimitry Andric bool isAGPRtoVGPR) { 3270b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 3280b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 3290b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI)); 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric auto &Spill = VGPRToAGPRSpills[FI]; 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric // This has already been allocated. 3360b57cec5SDimitry Andric if (!Spill.Lanes.empty()) 3370b57cec5SDimitry Andric return Spill.FullyAllocated; 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 3400b57cec5SDimitry Andric unsigned NumLanes = Size / 4; 3410b57cec5SDimitry Andric Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister); 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric const TargetRegisterClass &RC = 3440b57cec5SDimitry Andric isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass; 3450b57cec5SDimitry Andric auto Regs = RC.getRegisters(); 3460b57cec5SDimitry Andric 3470b57cec5SDimitry Andric auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR; 3480b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 3490b57cec5SDimitry Andric Spill.FullyAllocated = true; 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric // FIXME: Move allocation logic out of MachineFunctionInfo and initialize 3520b57cec5SDimitry Andric // once. 3530b57cec5SDimitry Andric BitVector OtherUsedRegs; 3540b57cec5SDimitry Andric OtherUsedRegs.resize(TRI->getNumRegs()); 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric const uint32_t *CSRMask = 3570b57cec5SDimitry Andric TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv()); 3580b57cec5SDimitry Andric if (CSRMask) 3590b57cec5SDimitry Andric OtherUsedRegs.setBitsInMask(CSRMask); 3600b57cec5SDimitry Andric 3610b57cec5SDimitry Andric // TODO: Should include register tuples, but doesn't matter with current 3620b57cec5SDimitry Andric // usage. 3630b57cec5SDimitry Andric for (MCPhysReg Reg : SpillAGPR) 3640b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 3650b57cec5SDimitry Andric for (MCPhysReg Reg : SpillVGPR) 3660b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin(); 3690b57cec5SDimitry Andric for (unsigned I = 0; I < NumLanes; ++I) { 3700b57cec5SDimitry Andric NextSpillReg = std::find_if( 3710b57cec5SDimitry Andric NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) { 3720b57cec5SDimitry Andric return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) && 3730b57cec5SDimitry Andric !OtherUsedRegs[Reg]; 3740b57cec5SDimitry Andric }); 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric if (NextSpillReg == Regs.end()) { // Registers exhausted 3770b57cec5SDimitry Andric Spill.FullyAllocated = false; 3780b57cec5SDimitry Andric break; 3790b57cec5SDimitry Andric } 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andric OtherUsedRegs.set(*NextSpillReg); 3820b57cec5SDimitry Andric SpillRegs.push_back(*NextSpillReg); 3830b57cec5SDimitry Andric Spill.Lanes[I] = *NextSpillReg++; 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric return Spill.FullyAllocated; 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { 3900b57cec5SDimitry Andric // The FP spill hasn't been inserted yet, so keep it around. 3910b57cec5SDimitry Andric for (auto &R : SGPRToVGPRSpills) { 3920b57cec5SDimitry Andric if (R.first != FramePointerSaveIndex) 3930b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 3940b57cec5SDimitry Andric } 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric // All other SPGRs must be allocated on the default stack, so reset the stack 3970b57cec5SDimitry Andric // ID. 3980b57cec5SDimitry Andric for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; 3990b57cec5SDimitry Andric ++i) 4000b57cec5SDimitry Andric if (i != FramePointerSaveIndex) 4010b57cec5SDimitry Andric MFI.setStackID(i, TargetStackID::Default); 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric for (auto &R : VGPRToAGPRSpills) { 4040b57cec5SDimitry Andric if (R.second.FullyAllocated) 4050b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { 4100b57cec5SDimitry Andric assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 4110b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs; 4120b57cec5SDimitry Andric } 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { 4150b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric static yaml::StringValue regToString(unsigned Reg, 4190b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 4200b57cec5SDimitry Andric yaml::StringValue Dest; 4210b57cec5SDimitry Andric { 4220b57cec5SDimitry Andric raw_string_ostream OS(Dest.Value); 4230b57cec5SDimitry Andric OS << printReg(Reg, &TRI); 4240b57cec5SDimitry Andric } 4250b57cec5SDimitry Andric return Dest; 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo> 4290b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, 4300b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 4310b57cec5SDimitry Andric yaml::SIArgumentInfo AI; 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric auto convertArg = [&](Optional<yaml::SIArgument> &A, 4340b57cec5SDimitry Andric const ArgDescriptor &Arg) { 4350b57cec5SDimitry Andric if (!Arg) 4360b57cec5SDimitry Andric return false; 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric // Create a register or stack argument. 4390b57cec5SDimitry Andric yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister()); 4400b57cec5SDimitry Andric if (Arg.isRegister()) { 4410b57cec5SDimitry Andric raw_string_ostream OS(SA.RegisterName.Value); 4420b57cec5SDimitry Andric OS << printReg(Arg.getRegister(), &TRI); 4430b57cec5SDimitry Andric } else 4440b57cec5SDimitry Andric SA.StackOffset = Arg.getStackOffset(); 4450b57cec5SDimitry Andric // Check and update the optional mask. 4460b57cec5SDimitry Andric if (Arg.isMasked()) 4470b57cec5SDimitry Andric SA.Mask = Arg.getMask(); 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric A = SA; 4500b57cec5SDimitry Andric return true; 4510b57cec5SDimitry Andric }; 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric bool Any = false; 4540b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer); 4550b57cec5SDimitry Andric Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr); 4560b57cec5SDimitry Andric Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr); 4570b57cec5SDimitry Andric Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); 4580b57cec5SDimitry Andric Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); 4590b57cec5SDimitry Andric Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); 4600b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); 4610b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); 4620b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); 4630b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ); 4640b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo); 4650b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentWaveByteOffset, 4660b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset); 4670b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr); 4680b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr); 4690b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX); 4700b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY); 4710b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ); 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric if (Any) 4740b57cec5SDimitry Andric return AI; 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric return None; 4770b57cec5SDimitry Andric } 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( 4800b57cec5SDimitry Andric const llvm::SIMachineFunctionInfo& MFI, 4810b57cec5SDimitry Andric const TargetRegisterInfo &TRI) 4820b57cec5SDimitry Andric : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), 4830b57cec5SDimitry Andric MaxKernArgAlign(MFI.getMaxKernArgAlign()), 4840b57cec5SDimitry Andric LDSSize(MFI.getLDSSize()), 4850b57cec5SDimitry Andric IsEntryFunction(MFI.isEntryFunction()), 4860b57cec5SDimitry Andric NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), 4870b57cec5SDimitry Andric MemoryBound(MFI.isMemoryBound()), 4880b57cec5SDimitry Andric WaveLimiter(MFI.needsWaveLimiter()), 489*8bcb0991SDimitry Andric HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), 4900b57cec5SDimitry Andric ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), 4910b57cec5SDimitry Andric ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)), 4920b57cec5SDimitry Andric FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), 4930b57cec5SDimitry Andric StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), 4940b57cec5SDimitry Andric ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), 4950b57cec5SDimitry Andric Mode(MFI.getMode()) {} 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { 4980b57cec5SDimitry Andric MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this); 4990b57cec5SDimitry Andric } 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields( 5020b57cec5SDimitry Andric const yaml::SIMachineFunctionInfo &YamlMFI) { 5030b57cec5SDimitry Andric ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; 504*8bcb0991SDimitry Andric MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign); 5050b57cec5SDimitry Andric LDSSize = YamlMFI.LDSSize; 506*8bcb0991SDimitry Andric HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; 5070b57cec5SDimitry Andric IsEntryFunction = YamlMFI.IsEntryFunction; 5080b57cec5SDimitry Andric NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; 5090b57cec5SDimitry Andric MemoryBound = YamlMFI.MemoryBound; 5100b57cec5SDimitry Andric WaveLimiter = YamlMFI.WaveLimiter; 5110b57cec5SDimitry Andric return false; 5120b57cec5SDimitry Andric } 513