10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 105ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h" 11fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h" 12fe6060f1SDimitry Andric #include "SIRegisterInfo.h" 13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 15fe6060f1SDimitry Andric #include "llvm/ADT/Optional.h" 16fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 17fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 20fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 21fe6060f1SDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h" 22fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h" 23fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 24fe6060f1SDimitry Andric #include "llvm/IR/Function.h" 25fe6060f1SDimitry Andric #include <cassert> 26fe6060f1SDimitry Andric #include <vector> 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric #define MAX_LANES 64 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric using namespace llvm; 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 330b57cec5SDimitry Andric : AMDGPUMachineFunction(MF), 34*81ad6265SDimitry Andric BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())), 35*81ad6265SDimitry Andric ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())), 36*81ad6265SDimitry Andric GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())), 370b57cec5SDimitry Andric PrivateSegmentBuffer(false), 380b57cec5SDimitry Andric DispatchPtr(false), 390b57cec5SDimitry Andric QueuePtr(false), 400b57cec5SDimitry Andric KernargSegmentPtr(false), 410b57cec5SDimitry Andric DispatchID(false), 420b57cec5SDimitry Andric FlatScratchInit(false), 430b57cec5SDimitry Andric WorkGroupIDX(false), 440b57cec5SDimitry Andric WorkGroupIDY(false), 450b57cec5SDimitry Andric WorkGroupIDZ(false), 460b57cec5SDimitry Andric WorkGroupInfo(false), 470b57cec5SDimitry Andric PrivateSegmentWaveByteOffset(false), 480b57cec5SDimitry Andric WorkItemIDX(false), 490b57cec5SDimitry Andric WorkItemIDY(false), 500b57cec5SDimitry Andric WorkItemIDZ(false), 510b57cec5SDimitry Andric ImplicitBufferPtr(false), 520b57cec5SDimitry Andric ImplicitArgPtr(false), 530b57cec5SDimitry Andric GITPtrHigh(0xffffffff), 54*81ad6265SDimitry Andric HighBitsOf32BitAddress(0) { 550b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 560b57cec5SDimitry Andric const Function &F = MF.getFunction(); 570b57cec5SDimitry Andric FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); 580b57cec5SDimitry Andric WavesPerEU = ST.getWavesPerEU(F); 590b57cec5SDimitry Andric 605ffd83dbSDimitry Andric Occupancy = ST.computeOccupancy(F, getLDSSize()); 610b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 620b57cec5SDimitry Andric 635ffd83dbSDimitry Andric // FIXME: Should have analysis or something rather than attribute to detect 645ffd83dbSDimitry Andric // calls. 655ffd83dbSDimitry Andric const bool HasCalls = F.hasFnAttribute("amdgpu-calls"); 665ffd83dbSDimitry Andric 67349cc55cSDimitry Andric const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL || 68349cc55cSDimitry Andric CC == CallingConv::SPIR_KERNEL; 695ffd83dbSDimitry Andric 70349cc55cSDimitry Andric if (IsKernel) { 71349cc55cSDimitry Andric if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0) 720b57cec5SDimitry Andric KernargSegmentPtr = true; 730b57cec5SDimitry Andric WorkGroupIDX = true; 740b57cec5SDimitry Andric WorkItemIDX = true; 750b57cec5SDimitry Andric } else if (CC == CallingConv::AMDGPU_PS) { 760b57cec5SDimitry Andric PSInputAddr = AMDGPU::getInitialPSInputAddr(F); 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 79*81ad6265SDimitry Andric MayNeedAGPRs = ST.hasMAIInsts(); 80*81ad6265SDimitry Andric 810b57cec5SDimitry Andric if (!isEntryFunction()) { 820eae32dcSDimitry Andric if (CC != CallingConv::AMDGPU_Gfx) 83fe6060f1SDimitry Andric ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; 84fe6060f1SDimitry Andric 850b57cec5SDimitry Andric // TODO: Pick a high register, and shift down, similar to a kernel. 865ffd83dbSDimitry Andric FrameOffsetReg = AMDGPU::SGPR33; 870b57cec5SDimitry Andric StackPtrOffsetReg = AMDGPU::SGPR32; 880b57cec5SDimitry Andric 89e8d8bef9SDimitry Andric if (!ST.enableFlatScratch()) { 90e8d8bef9SDimitry Andric // Non-entry functions have no special inputs for now, other registers 91e8d8bef9SDimitry Andric // required for scratch access. 92e8d8bef9SDimitry Andric ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 93e8d8bef9SDimitry Andric 940b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 950b57cec5SDimitry Andric ArgDescriptor::createRegister(ScratchRSrcReg); 96e8d8bef9SDimitry Andric } 970b57cec5SDimitry Andric 98349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr")) 990b57cec5SDimitry Andric ImplicitArgPtr = true; 1000b57cec5SDimitry Andric } else { 101349cc55cSDimitry Andric ImplicitArgPtr = false; 1020b57cec5SDimitry Andric MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), 1030b57cec5SDimitry Andric MaxKernArgAlign); 104*81ad6265SDimitry Andric 105*81ad6265SDimitry Andric if (ST.hasGFX90AInsts() && 106*81ad6265SDimitry Andric ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && 107*81ad6265SDimitry Andric !mayUseAGPRs(MF)) 108*81ad6265SDimitry Andric MayNeedAGPRs = false; // We will select all MAI with VGPR operands. 1090b57cec5SDimitry Andric } 110349cc55cSDimitry Andric 111349cc55cSDimitry Andric bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); 112349cc55cSDimitry Andric if (isAmdHsaOrMesa && !ST.enableFlatScratch()) 113349cc55cSDimitry Andric PrivateSegmentBuffer = true; 114349cc55cSDimitry Andric else if (ST.isMesaGfxShader(F)) 115349cc55cSDimitry Andric ImplicitBufferPtr = true; 1160b57cec5SDimitry Andric 1170eae32dcSDimitry Andric if (!AMDGPU::isGraphics(CC)) { 118349cc55cSDimitry Andric if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x")) 1190b57cec5SDimitry Andric WorkGroupIDX = true; 1200b57cec5SDimitry Andric 121349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y")) 1220b57cec5SDimitry Andric WorkGroupIDY = true; 1230b57cec5SDimitry Andric 124349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z")) 1250b57cec5SDimitry Andric WorkGroupIDZ = true; 1260b57cec5SDimitry Andric 127349cc55cSDimitry Andric if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x")) 1280b57cec5SDimitry Andric WorkItemIDX = true; 1290b57cec5SDimitry Andric 13004eeddc0SDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") && 13104eeddc0SDimitry Andric ST.getMaxWorkitemID(F, 1) != 0) 1320b57cec5SDimitry Andric WorkItemIDY = true; 1330b57cec5SDimitry Andric 13404eeddc0SDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") && 13504eeddc0SDimitry Andric ST.getMaxWorkitemID(F, 2) != 0) 1360b57cec5SDimitry Andric WorkItemIDZ = true; 137349cc55cSDimitry Andric 138349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr")) 139349cc55cSDimitry Andric DispatchPtr = true; 140349cc55cSDimitry Andric 141349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-queue-ptr")) 142349cc55cSDimitry Andric QueuePtr = true; 143349cc55cSDimitry Andric 144349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) 145349cc55cSDimitry Andric DispatchID = true; 1465ffd83dbSDimitry Andric } 1470b57cec5SDimitry Andric 148349cc55cSDimitry Andric // FIXME: This attribute is a hack, we just need an analysis on the function 149349cc55cSDimitry Andric // to look for allocas. 1505ffd83dbSDimitry Andric bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects"); 151349cc55cSDimitry Andric 152349cc55cSDimitry Andric // TODO: This could be refined a lot. The attribute is a poor way of 153349cc55cSDimitry Andric // detecting calls or stack objects that may require it before argument 154349cc55cSDimitry Andric // lowering. 155349cc55cSDimitry Andric if (ST.hasFlatAddressSpace() && isEntryFunction() && 156349cc55cSDimitry Andric (isAmdHsaOrMesa || ST.enableFlatScratch()) && 157349cc55cSDimitry Andric (HasCalls || HasStackObjects || ST.enableFlatScratch()) && 158349cc55cSDimitry Andric !ST.flatScratchIsArchitected()) { 159349cc55cSDimitry Andric FlatScratchInit = true; 160349cc55cSDimitry Andric } 161349cc55cSDimitry Andric 1620b57cec5SDimitry Andric if (isEntryFunction()) { 1630b57cec5SDimitry Andric // X, XY, and XYZ are the only supported combinations, so make sure Y is 1640b57cec5SDimitry Andric // enabled if Z is. 1650b57cec5SDimitry Andric if (WorkItemIDZ) 1660b57cec5SDimitry Andric WorkItemIDY = true; 1670b57cec5SDimitry Andric 168fe6060f1SDimitry Andric if (!ST.flatScratchIsArchitected()) { 1690b57cec5SDimitry Andric PrivateSegmentWaveByteOffset = true; 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 1720b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 1730b57cec5SDimitry Andric (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 1740b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset = 1750b57cec5SDimitry Andric ArgDescriptor::createRegister(AMDGPU::SGPR5); 1760b57cec5SDimitry Andric } 177fe6060f1SDimitry Andric } 1780b57cec5SDimitry Andric 1790b57cec5SDimitry Andric Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); 1800b57cec5SDimitry Andric StringRef S = A.getValueAsString(); 1810b57cec5SDimitry Andric if (!S.empty()) 1820b57cec5SDimitry Andric S.consumeInteger(0, GITPtrHigh); 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric A = F.getFnAttribute("amdgpu-32bit-address-high-bits"); 1850b57cec5SDimitry Andric S = A.getValueAsString(); 1860b57cec5SDimitry Andric if (!S.empty()) 1870b57cec5SDimitry Andric S.consumeInteger(0, HighBitsOf32BitAddress); 1880b57cec5SDimitry Andric 189*81ad6265SDimitry Andric // On GFX908, in order to guarantee copying between AGPRs, we need a scratch 190*81ad6265SDimitry Andric // VGPR available at all times. For now, reserve highest available VGPR. After 191*81ad6265SDimitry Andric // RA, shift it to the lowest available unused VGPR if the one exist. 192*81ad6265SDimitry Andric if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { 193*81ad6265SDimitry Andric VGPRForAGPRCopy = 194*81ad6265SDimitry Andric AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1); 195*81ad6265SDimitry Andric } 196*81ad6265SDimitry Andric } 197*81ad6265SDimitry Andric 198*81ad6265SDimitry Andric MachineFunctionInfo *SIMachineFunctionInfo::clone( 199*81ad6265SDimitry Andric BumpPtrAllocator &Allocator, MachineFunction &DestMF, 200*81ad6265SDimitry Andric const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) 201*81ad6265SDimitry Andric const { 202*81ad6265SDimitry Andric return DestMF.cloneInfo<SIMachineFunctionInfo>(*this); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { 2060b57cec5SDimitry Andric limitOccupancy(getMaxWavesPerEU()); 2070b57cec5SDimitry Andric const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); 2080b57cec5SDimitry Andric limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), 2090b57cec5SDimitry Andric MF.getFunction())); 2100b57cec5SDimitry Andric } 2110b57cec5SDimitry Andric 2125ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer( 2130b57cec5SDimitry Andric const SIRegisterInfo &TRI) { 2140b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 2150b57cec5SDimitry Andric ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2168bcb0991SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass)); 2170b57cec5SDimitry Andric NumUserSGPRs += 4; 2180b57cec5SDimitry Andric return ArgInfo.PrivateSegmentBuffer.getRegister(); 2190b57cec5SDimitry Andric } 2200b57cec5SDimitry Andric 2215ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 2220b57cec5SDimitry Andric ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2230b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2240b57cec5SDimitry Andric NumUserSGPRs += 2; 2250b57cec5SDimitry Andric return ArgInfo.DispatchPtr.getRegister(); 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric 2285ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 2290b57cec5SDimitry Andric ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2300b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2310b57cec5SDimitry Andric NumUserSGPRs += 2; 2320b57cec5SDimitry Andric return ArgInfo.QueuePtr.getRegister(); 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric 2355ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 2360b57cec5SDimitry Andric ArgInfo.KernargSegmentPtr 2370b57cec5SDimitry Andric = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2380b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2390b57cec5SDimitry Andric NumUserSGPRs += 2; 2400b57cec5SDimitry Andric return ArgInfo.KernargSegmentPtr.getRegister(); 2410b57cec5SDimitry Andric } 2420b57cec5SDimitry Andric 2435ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 2440b57cec5SDimitry Andric ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2450b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2460b57cec5SDimitry Andric NumUserSGPRs += 2; 2470b57cec5SDimitry Andric return ArgInfo.DispatchID.getRegister(); 2480b57cec5SDimitry Andric } 2490b57cec5SDimitry Andric 2505ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 2510b57cec5SDimitry Andric ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2520b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2530b57cec5SDimitry Andric NumUserSGPRs += 2; 2540b57cec5SDimitry Andric return ArgInfo.FlatScratchInit.getRegister(); 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric 2575ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 2580b57cec5SDimitry Andric ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2590b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2600b57cec5SDimitry Andric NumUserSGPRs += 2; 2610b57cec5SDimitry Andric return ArgInfo.ImplicitBufferPtr.getRegister(); 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2645ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, 2655ffd83dbSDimitry Andric MCPhysReg Reg) { 2660b57cec5SDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) { 2670b57cec5SDimitry Andric if (CSRegs[I] == Reg) 2680b57cec5SDimitry Andric return true; 2690b57cec5SDimitry Andric } 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric return false; 2720b57cec5SDimitry Andric } 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for 2750b57cec5SDimitry Andric /// SGPR spilling. 2760b57cec5SDimitry Andric // 2770b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized 2780b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF, 2790b57cec5SDimitry Andric unsigned NumNeed) const { 2800b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 2810b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 2820b57cec5SDimitry Andric return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size(); 2830b57cec5SDimitry Andric } 2840b57cec5SDimitry Andric 2850b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 2860b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 2870b57cec5SDimitry Andric int FI) { 288*81ad6265SDimitry Andric std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric // This has already been allocated. 2910b57cec5SDimitry Andric if (!SpillLanes.empty()) 2920b57cec5SDimitry Andric return true; 2930b57cec5SDimitry Andric 2940b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 2950b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 2960b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 2970b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 2980b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 3015ffd83dbSDimitry Andric unsigned NumLanes = Size / 4; 3020b57cec5SDimitry Andric 3035ffd83dbSDimitry Andric if (NumLanes > WaveSize) 3045ffd83dbSDimitry Andric return false; 3055ffd83dbSDimitry Andric 3065ffd83dbSDimitry Andric assert(Size >= 4 && "invalid sgpr spill size"); 3075ffd83dbSDimitry Andric assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric // Make sure to handle the case where a wide SGPR spill may span between two 3100b57cec5SDimitry Andric // VGPRs. 3115ffd83dbSDimitry Andric for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 3125ffd83dbSDimitry Andric Register LaneVGPR; 3130b57cec5SDimitry Andric unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 3140b57cec5SDimitry Andric 31504eeddc0SDimitry Andric if (VGPRIndex == 0) { 3160b57cec5SDimitry Andric LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 3170b57cec5SDimitry Andric if (LaneVGPR == AMDGPU::NoRegister) { 3180b57cec5SDimitry Andric // We have no VGPRs left for spilling SGPRs. Reset because we will not 3190b57cec5SDimitry Andric // partially spill the SGPR to VGPRs. 3200b57cec5SDimitry Andric SGPRToVGPRSpills.erase(FI); 3210b57cec5SDimitry Andric NumVGPRSpillLanes -= I; 322fe6060f1SDimitry Andric 32304eeddc0SDimitry Andric // FIXME: We can run out of free registers with split allocation if 32404eeddc0SDimitry Andric // IPRA is enabled and a called function already uses every VGPR. 325fe6060f1SDimitry Andric #if 0 326fe6060f1SDimitry Andric DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(), 327fe6060f1SDimitry Andric "VGPRs for SGPR spilling", 328fe6060f1SDimitry Andric 0, DS_Error); 329fe6060f1SDimitry Andric MF.getFunction().getContext().diagnose(DiagOutOfRegs); 330fe6060f1SDimitry Andric #endif 3310b57cec5SDimitry Andric return false; 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 334fe6060f1SDimitry Andric Optional<int> SpillFI; 335fe6060f1SDimitry Andric // We need to preserve inactive lanes, so always save, even caller-save 336fe6060f1SDimitry Andric // registers. 337fe6060f1SDimitry Andric if (!isEntryFunction()) { 338fe6060f1SDimitry Andric SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4)); 3390b57cec5SDimitry Andric } 3400b57cec5SDimitry Andric 341fe6060f1SDimitry Andric SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI)); 3420b57cec5SDimitry Andric 343*81ad6265SDimitry Andric // Add this register as live-in to all blocks to avoid machine verifier 3440b57cec5SDimitry Andric // complaining about use of an undefined physical register. 3450b57cec5SDimitry Andric for (MachineBasicBlock &BB : MF) 3460b57cec5SDimitry Andric BB.addLiveIn(LaneVGPR); 3470b57cec5SDimitry Andric } else { 3480b57cec5SDimitry Andric LaneVGPR = SpillVGPRs.back().VGPR; 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric 351*81ad6265SDimitry Andric SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex)); 3520b57cec5SDimitry Andric } 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric return true; 3550b57cec5SDimitry Andric } 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. 3580b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa. 3590b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely. 3600b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, 3610b57cec5SDimitry Andric int FI, 3620b57cec5SDimitry Andric bool isAGPRtoVGPR) { 3630b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 3640b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 3650b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI)); 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric auto &Spill = VGPRToAGPRSpills[FI]; 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric // This has already been allocated. 3720b57cec5SDimitry Andric if (!Spill.Lanes.empty()) 3730b57cec5SDimitry Andric return Spill.FullyAllocated; 3740b57cec5SDimitry Andric 3750b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 3760b57cec5SDimitry Andric unsigned NumLanes = Size / 4; 3770b57cec5SDimitry Andric Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister); 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric const TargetRegisterClass &RC = 3800b57cec5SDimitry Andric isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass; 3810b57cec5SDimitry Andric auto Regs = RC.getRegisters(); 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR; 3840b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 3850b57cec5SDimitry Andric Spill.FullyAllocated = true; 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric // FIXME: Move allocation logic out of MachineFunctionInfo and initialize 3880b57cec5SDimitry Andric // once. 3890b57cec5SDimitry Andric BitVector OtherUsedRegs; 3900b57cec5SDimitry Andric OtherUsedRegs.resize(TRI->getNumRegs()); 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric const uint32_t *CSRMask = 3930b57cec5SDimitry Andric TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv()); 3940b57cec5SDimitry Andric if (CSRMask) 3950b57cec5SDimitry Andric OtherUsedRegs.setBitsInMask(CSRMask); 3960b57cec5SDimitry Andric 3970b57cec5SDimitry Andric // TODO: Should include register tuples, but doesn't matter with current 3980b57cec5SDimitry Andric // usage. 3990b57cec5SDimitry Andric for (MCPhysReg Reg : SpillAGPR) 4000b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 4010b57cec5SDimitry Andric for (MCPhysReg Reg : SpillVGPR) 4020b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin(); 405349cc55cSDimitry Andric for (int I = NumLanes - 1; I >= 0; --I) { 4060b57cec5SDimitry Andric NextSpillReg = std::find_if( 4070b57cec5SDimitry Andric NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) { 4080b57cec5SDimitry Andric return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) && 4090b57cec5SDimitry Andric !OtherUsedRegs[Reg]; 4100b57cec5SDimitry Andric }); 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric if (NextSpillReg == Regs.end()) { // Registers exhausted 4130b57cec5SDimitry Andric Spill.FullyAllocated = false; 4140b57cec5SDimitry Andric break; 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric OtherUsedRegs.set(*NextSpillReg); 4180b57cec5SDimitry Andric SpillRegs.push_back(*NextSpillReg); 4190b57cec5SDimitry Andric Spill.Lanes[I] = *NextSpillReg++; 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric return Spill.FullyAllocated; 4230b57cec5SDimitry Andric } 4240b57cec5SDimitry Andric 425*81ad6265SDimitry Andric bool SIMachineFunctionInfo::removeDeadFrameIndices( 426*81ad6265SDimitry Andric MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) { 427349cc55cSDimitry Andric // Remove dead frame indices from function frame, however keep FP & BP since 428349cc55cSDimitry Andric // spills for them haven't been inserted yet. And also make sure to remove the 429349cc55cSDimitry Andric // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could 430349cc55cSDimitry Andric // result in an unexpected side effect and bug, in case of any re-mapping of 431349cc55cSDimitry Andric // freed frame indices by later pass(es) like "stack slot coloring". 432349cc55cSDimitry Andric for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) { 433349cc55cSDimitry Andric if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) { 4340b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 435349cc55cSDimitry Andric SGPRToVGPRSpills.erase(R.first); 436349cc55cSDimitry Andric } 4370b57cec5SDimitry Andric } 4380b57cec5SDimitry Andric 439*81ad6265SDimitry Andric bool HaveSGPRToMemory = false; 440*81ad6265SDimitry Andric 441*81ad6265SDimitry Andric if (ResetSGPRSpillStackIDs) { 442*81ad6265SDimitry Andric // All other SPGRs must be allocated on the default stack, so reset the 443*81ad6265SDimitry Andric // stack ID. 4440b57cec5SDimitry Andric for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; 445*81ad6265SDimitry Andric ++i) { 446*81ad6265SDimitry Andric if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) { 447*81ad6265SDimitry Andric if (MFI.getStackID(i) == TargetStackID::SGPRSpill) { 4480b57cec5SDimitry Andric MFI.setStackID(i, TargetStackID::Default); 449*81ad6265SDimitry Andric HaveSGPRToMemory = true; 450*81ad6265SDimitry Andric } 451*81ad6265SDimitry Andric } 452*81ad6265SDimitry Andric } 453*81ad6265SDimitry Andric } 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric for (auto &R : VGPRToAGPRSpills) { 4560eae32dcSDimitry Andric if (R.second.IsDead) 4570b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 4580b57cec5SDimitry Andric } 459*81ad6265SDimitry Andric 460*81ad6265SDimitry Andric return HaveSGPRToMemory; 461*81ad6265SDimitry Andric } 462*81ad6265SDimitry Andric 463*81ad6265SDimitry Andric void SIMachineFunctionInfo::allocateWWMReservedSpillSlots( 464*81ad6265SDimitry Andric MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { 465*81ad6265SDimitry Andric assert(WWMReservedFrameIndexes.empty()); 466*81ad6265SDimitry Andric 467*81ad6265SDimitry Andric WWMReservedFrameIndexes.resize(WWMReservedRegs.size()); 468*81ad6265SDimitry Andric 469*81ad6265SDimitry Andric int I = 0; 470*81ad6265SDimitry Andric for (Register VGPR : WWMReservedRegs) { 471*81ad6265SDimitry Andric const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR); 472*81ad6265SDimitry Andric WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject( 473*81ad6265SDimitry Andric TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC)); 474*81ad6265SDimitry Andric } 4750b57cec5SDimitry Andric } 4760b57cec5SDimitry Andric 477fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, 478fe6060f1SDimitry Andric const SIRegisterInfo &TRI) { 479fe6060f1SDimitry Andric if (ScavengeFI) 480fe6060f1SDimitry Andric return *ScavengeFI; 481fe6060f1SDimitry Andric if (isEntryFunction()) { 482fe6060f1SDimitry Andric ScavengeFI = MFI.CreateFixedObject( 483fe6060f1SDimitry Andric TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); 484fe6060f1SDimitry Andric } else { 485fe6060f1SDimitry Andric ScavengeFI = MFI.CreateStackObject( 486fe6060f1SDimitry Andric TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 487fe6060f1SDimitry Andric TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false); 488fe6060f1SDimitry Andric } 489fe6060f1SDimitry Andric return *ScavengeFI; 490fe6060f1SDimitry Andric } 491fe6060f1SDimitry Andric 4920b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { 4930b57cec5SDimitry Andric assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 4940b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs; 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { 4980b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 4990b57cec5SDimitry Andric } 5000b57cec5SDimitry Andric 5015ffd83dbSDimitry Andric Register 5025ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const { 5035ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 5045ffd83dbSDimitry Andric if (!ST.isAmdPalOS()) 5055ffd83dbSDimitry Andric return Register(); 5065ffd83dbSDimitry Andric Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in 5075ffd83dbSDimitry Andric if (ST.hasMergedShaders()) { 5085ffd83dbSDimitry Andric switch (MF.getFunction().getCallingConv()) { 5095ffd83dbSDimitry Andric case CallingConv::AMDGPU_HS: 5105ffd83dbSDimitry Andric case CallingConv::AMDGPU_GS: 5115ffd83dbSDimitry Andric // Low GIT address is passed in s8 rather than s0 for an LS+HS or 5125ffd83dbSDimitry Andric // ES+GS merged shader on gfx9+. 5135ffd83dbSDimitry Andric GitPtrLo = AMDGPU::SGPR8; 5145ffd83dbSDimitry Andric return GitPtrLo; 5155ffd83dbSDimitry Andric default: 5165ffd83dbSDimitry Andric return GitPtrLo; 5175ffd83dbSDimitry Andric } 5185ffd83dbSDimitry Andric } 5195ffd83dbSDimitry Andric return GitPtrLo; 5205ffd83dbSDimitry Andric } 5215ffd83dbSDimitry Andric 5225ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg, 5230b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 5240b57cec5SDimitry Andric yaml::StringValue Dest; 5250b57cec5SDimitry Andric { 5260b57cec5SDimitry Andric raw_string_ostream OS(Dest.Value); 5270b57cec5SDimitry Andric OS << printReg(Reg, &TRI); 5280b57cec5SDimitry Andric } 5290b57cec5SDimitry Andric return Dest; 5300b57cec5SDimitry Andric } 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo> 5330b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, 5340b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 5350b57cec5SDimitry Andric yaml::SIArgumentInfo AI; 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric auto convertArg = [&](Optional<yaml::SIArgument> &A, 5380b57cec5SDimitry Andric const ArgDescriptor &Arg) { 5390b57cec5SDimitry Andric if (!Arg) 5400b57cec5SDimitry Andric return false; 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric // Create a register or stack argument. 5430b57cec5SDimitry Andric yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister()); 5440b57cec5SDimitry Andric if (Arg.isRegister()) { 5450b57cec5SDimitry Andric raw_string_ostream OS(SA.RegisterName.Value); 5460b57cec5SDimitry Andric OS << printReg(Arg.getRegister(), &TRI); 5470b57cec5SDimitry Andric } else 5480b57cec5SDimitry Andric SA.StackOffset = Arg.getStackOffset(); 5490b57cec5SDimitry Andric // Check and update the optional mask. 5500b57cec5SDimitry Andric if (Arg.isMasked()) 5510b57cec5SDimitry Andric SA.Mask = Arg.getMask(); 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric A = SA; 5540b57cec5SDimitry Andric return true; 5550b57cec5SDimitry Andric }; 5560b57cec5SDimitry Andric 5570b57cec5SDimitry Andric bool Any = false; 5580b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer); 5590b57cec5SDimitry Andric Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr); 5600b57cec5SDimitry Andric Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr); 5610b57cec5SDimitry Andric Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); 5620b57cec5SDimitry Andric Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); 5630b57cec5SDimitry Andric Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); 5640b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); 5650b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); 5660b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); 5670b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ); 5680b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo); 5690b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentWaveByteOffset, 5700b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset); 5710b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr); 5720b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr); 5730b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX); 5740b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY); 5750b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ); 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric if (Any) 5780b57cec5SDimitry Andric return AI; 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric return None; 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( 584fe6060f1SDimitry Andric const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI, 585fe6060f1SDimitry Andric const llvm::MachineFunction &MF) 5860b57cec5SDimitry Andric : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), 587e8d8bef9SDimitry Andric MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()), 588*81ad6265SDimitry Andric GDSSize(MFI.getGDSSize()), 589e8d8bef9SDimitry Andric DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()), 5900b57cec5SDimitry Andric NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), 591e8d8bef9SDimitry Andric MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), 592e8d8bef9SDimitry Andric HasSpilledSGPRs(MFI.hasSpilledSGPRs()), 593e8d8bef9SDimitry Andric HasSpilledVGPRs(MFI.hasSpilledVGPRs()), 5948bcb0991SDimitry Andric HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), 595e8d8bef9SDimitry Andric Occupancy(MFI.getOccupancy()), 5960b57cec5SDimitry Andric ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), 5970b57cec5SDimitry Andric FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), 5980b57cec5SDimitry Andric StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), 599*81ad6265SDimitry Andric BytesInStackArgArea(MFI.getBytesInStackArgArea()), 600*81ad6265SDimitry Andric ReturnsVoid(MFI.returnsVoid()), 601e8d8bef9SDimitry Andric ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { 602*81ad6265SDimitry Andric for (Register Reg : MFI.WWMReservedRegs) 603*81ad6265SDimitry Andric WWMReservedRegs.push_back(regToString(Reg, TRI)); 604*81ad6265SDimitry Andric 605*81ad6265SDimitry Andric if (MFI.getVGPRForAGPRCopy()) 606*81ad6265SDimitry Andric VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI); 607fe6060f1SDimitry Andric auto SFI = MFI.getOptionalScavengeFI(); 608fe6060f1SDimitry Andric if (SFI) 609fe6060f1SDimitry Andric ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo()); 610e8d8bef9SDimitry Andric } 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { 6130b57cec5SDimitry Andric MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this); 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric 6160b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields( 617fe6060f1SDimitry Andric const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, 618fe6060f1SDimitry Andric PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) { 6190b57cec5SDimitry Andric ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; 620*81ad6265SDimitry Andric MaxKernArgAlign = YamlMFI.MaxKernArgAlign; 6210b57cec5SDimitry Andric LDSSize = YamlMFI.LDSSize; 622*81ad6265SDimitry Andric GDSSize = YamlMFI.GDSSize; 623e8d8bef9SDimitry Andric DynLDSAlign = YamlMFI.DynLDSAlign; 6248bcb0991SDimitry Andric HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; 625e8d8bef9SDimitry Andric Occupancy = YamlMFI.Occupancy; 6260b57cec5SDimitry Andric IsEntryFunction = YamlMFI.IsEntryFunction; 6270b57cec5SDimitry Andric NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; 6280b57cec5SDimitry Andric MemoryBound = YamlMFI.MemoryBound; 6290b57cec5SDimitry Andric WaveLimiter = YamlMFI.WaveLimiter; 630e8d8bef9SDimitry Andric HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs; 631e8d8bef9SDimitry Andric HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs; 632*81ad6265SDimitry Andric BytesInStackArgArea = YamlMFI.BytesInStackArgArea; 633*81ad6265SDimitry Andric ReturnsVoid = YamlMFI.ReturnsVoid; 634fe6060f1SDimitry Andric 635fe6060f1SDimitry Andric if (YamlMFI.ScavengeFI) { 636fe6060f1SDimitry Andric auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo()); 637fe6060f1SDimitry Andric if (!FIOrErr) { 638fe6060f1SDimitry Andric // Create a diagnostic for a the frame index. 639fe6060f1SDimitry Andric const MemoryBuffer &Buffer = 640fe6060f1SDimitry Andric *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID()); 641fe6060f1SDimitry Andric 642fe6060f1SDimitry Andric Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1, 643fe6060f1SDimitry Andric SourceMgr::DK_Error, toString(FIOrErr.takeError()), 644fe6060f1SDimitry Andric "", None, None); 645fe6060f1SDimitry Andric SourceRange = YamlMFI.ScavengeFI->SourceRange; 646fe6060f1SDimitry Andric return true; 647fe6060f1SDimitry Andric } 648fe6060f1SDimitry Andric ScavengeFI = *FIOrErr; 649fe6060f1SDimitry Andric } else { 650fe6060f1SDimitry Andric ScavengeFI = None; 651fe6060f1SDimitry Andric } 6520b57cec5SDimitry Andric return false; 6530b57cec5SDimitry Andric } 6545ffd83dbSDimitry Andric 655*81ad6265SDimitry Andric bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const { 656*81ad6265SDimitry Andric for (const BasicBlock &BB : MF.getFunction()) { 657*81ad6265SDimitry Andric for (const Instruction &I : BB) { 658*81ad6265SDimitry Andric const auto *CB = dyn_cast<CallBase>(&I); 659*81ad6265SDimitry Andric if (!CB) 660*81ad6265SDimitry Andric continue; 661*81ad6265SDimitry Andric 662*81ad6265SDimitry Andric if (CB->isInlineAsm()) { 663*81ad6265SDimitry Andric const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand()); 664*81ad6265SDimitry Andric for (const auto &CI : IA->ParseConstraints()) { 665*81ad6265SDimitry Andric for (StringRef Code : CI.Codes) { 666*81ad6265SDimitry Andric Code.consume_front("{"); 667*81ad6265SDimitry Andric if (Code.startswith("a")) 668*81ad6265SDimitry Andric return true; 669*81ad6265SDimitry Andric } 670*81ad6265SDimitry Andric } 671*81ad6265SDimitry Andric continue; 672*81ad6265SDimitry Andric } 673*81ad6265SDimitry Andric 674*81ad6265SDimitry Andric const Function *Callee = 675*81ad6265SDimitry Andric dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); 676*81ad6265SDimitry Andric if (!Callee) 677*81ad6265SDimitry Andric return true; 678*81ad6265SDimitry Andric 679*81ad6265SDimitry Andric if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic) 680*81ad6265SDimitry Andric return true; 681*81ad6265SDimitry Andric } 682*81ad6265SDimitry Andric } 683*81ad6265SDimitry Andric 684*81ad6265SDimitry Andric return false; 685*81ad6265SDimitry Andric } 686*81ad6265SDimitry Andric 687349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const { 688349cc55cSDimitry Andric if (UsesAGPRs) 689349cc55cSDimitry Andric return *UsesAGPRs; 690349cc55cSDimitry Andric 691*81ad6265SDimitry Andric if (!mayNeedAGPRs()) { 692*81ad6265SDimitry Andric UsesAGPRs = false; 693*81ad6265SDimitry Andric return false; 694*81ad6265SDimitry Andric } 695*81ad6265SDimitry Andric 696349cc55cSDimitry Andric if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) || 697349cc55cSDimitry Andric MF.getFrameInfo().hasCalls()) { 698349cc55cSDimitry Andric UsesAGPRs = true; 699349cc55cSDimitry Andric return true; 700349cc55cSDimitry Andric } 701349cc55cSDimitry Andric 702349cc55cSDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 703349cc55cSDimitry Andric 704349cc55cSDimitry Andric for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { 705349cc55cSDimitry Andric const Register Reg = Register::index2VirtReg(I); 706349cc55cSDimitry Andric const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg); 707349cc55cSDimitry Andric if (RC && SIRegisterInfo::isAGPRClass(RC)) { 708349cc55cSDimitry Andric UsesAGPRs = true; 709349cc55cSDimitry Andric return true; 710349cc55cSDimitry Andric } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) { 711349cc55cSDimitry Andric // Defer caching UsesAGPRs, function might not yet been regbank selected. 712349cc55cSDimitry Andric return true; 713349cc55cSDimitry Andric } 714349cc55cSDimitry Andric } 715349cc55cSDimitry Andric 716349cc55cSDimitry Andric for (MCRegister Reg : AMDGPU::AGPR_32RegClass) { 717349cc55cSDimitry Andric if (MRI.isPhysRegUsed(Reg)) { 718349cc55cSDimitry Andric UsesAGPRs = true; 719349cc55cSDimitry Andric return true; 720349cc55cSDimitry Andric } 721349cc55cSDimitry Andric } 722349cc55cSDimitry Andric 723349cc55cSDimitry Andric UsesAGPRs = false; 724349cc55cSDimitry Andric return false; 725349cc55cSDimitry Andric } 726