10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 105ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h" 11fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h" 12fe6060f1SDimitry Andric #include "SIRegisterInfo.h" 13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 15fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 16fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 17fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 20fe6060f1SDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h" 21fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h" 22fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 23fe6060f1SDimitry Andric #include "llvm/IR/Function.h" 24fe6060f1SDimitry Andric #include <cassert> 25*bdd1243dSDimitry Andric #include <optional> 26fe6060f1SDimitry Andric #include <vector> 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric #define MAX_LANES 64 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric using namespace llvm; 310b57cec5SDimitry Andric 32*bdd1243dSDimitry Andric const GCNTargetMachine &getTM(const GCNSubtarget *STI) { 33*bdd1243dSDimitry Andric const SITargetLowering *TLI = STI->getTargetLowering(); 34*bdd1243dSDimitry Andric return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine()); 35*bdd1243dSDimitry Andric } 36*bdd1243dSDimitry Andric 37*bdd1243dSDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, 38*bdd1243dSDimitry Andric const GCNSubtarget *STI) 39*bdd1243dSDimitry Andric : AMDGPUMachineFunction(F, *STI), 40*bdd1243dSDimitry Andric Mode(F), 41*bdd1243dSDimitry Andric GWSResourcePSV(getTM(STI)), 420b57cec5SDimitry Andric PrivateSegmentBuffer(false), 430b57cec5SDimitry Andric DispatchPtr(false), 440b57cec5SDimitry Andric QueuePtr(false), 450b57cec5SDimitry Andric KernargSegmentPtr(false), 460b57cec5SDimitry Andric DispatchID(false), 470b57cec5SDimitry Andric FlatScratchInit(false), 480b57cec5SDimitry Andric WorkGroupIDX(false), 490b57cec5SDimitry Andric WorkGroupIDY(false), 500b57cec5SDimitry Andric WorkGroupIDZ(false), 510b57cec5SDimitry Andric WorkGroupInfo(false), 52fcaf7f86SDimitry Andric LDSKernelId(false), 530b57cec5SDimitry Andric PrivateSegmentWaveByteOffset(false), 540b57cec5SDimitry Andric WorkItemIDX(false), 550b57cec5SDimitry Andric WorkItemIDY(false), 560b57cec5SDimitry Andric WorkItemIDZ(false), 570b57cec5SDimitry Andric ImplicitBufferPtr(false), 580b57cec5SDimitry Andric ImplicitArgPtr(false), 590b57cec5SDimitry Andric GITPtrHigh(0xffffffff), 6081ad6265SDimitry Andric HighBitsOf32BitAddress(0) { 61*bdd1243dSDimitry Andric const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI); 620b57cec5SDimitry Andric FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); 630b57cec5SDimitry Andric WavesPerEU = ST.getWavesPerEU(F); 640b57cec5SDimitry Andric 655ffd83dbSDimitry Andric Occupancy = ST.computeOccupancy(F, getLDSSize()); 660b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 670b57cec5SDimitry Andric 685ffd83dbSDimitry Andric // FIXME: Should have analysis or something rather than attribute to detect 695ffd83dbSDimitry Andric // calls. 705ffd83dbSDimitry Andric const bool HasCalls = F.hasFnAttribute("amdgpu-calls"); 715ffd83dbSDimitry Andric 72349cc55cSDimitry Andric const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL || 73349cc55cSDimitry Andric CC == CallingConv::SPIR_KERNEL; 745ffd83dbSDimitry Andric 75349cc55cSDimitry Andric if (IsKernel) { 76349cc55cSDimitry Andric if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0) 770b57cec5SDimitry Andric KernargSegmentPtr = true; 780b57cec5SDimitry Andric WorkGroupIDX = true; 790b57cec5SDimitry Andric WorkItemIDX = true; 800b57cec5SDimitry Andric } else if (CC == CallingConv::AMDGPU_PS) { 810b57cec5SDimitry Andric PSInputAddr = AMDGPU::getInitialPSInputAddr(F); 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric 8481ad6265SDimitry Andric MayNeedAGPRs = ST.hasMAIInsts(); 8581ad6265SDimitry Andric 860b57cec5SDimitry Andric if (!isEntryFunction()) { 870eae32dcSDimitry Andric if (CC != CallingConv::AMDGPU_Gfx) 88fe6060f1SDimitry Andric ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; 89fe6060f1SDimitry Andric 900b57cec5SDimitry Andric // TODO: Pick a high register, and shift down, similar to a kernel. 915ffd83dbSDimitry Andric FrameOffsetReg = AMDGPU::SGPR33; 920b57cec5SDimitry Andric StackPtrOffsetReg = AMDGPU::SGPR32; 930b57cec5SDimitry Andric 94e8d8bef9SDimitry Andric if (!ST.enableFlatScratch()) { 95e8d8bef9SDimitry Andric // Non-entry functions have no special inputs for now, other registers 96e8d8bef9SDimitry Andric // required for scratch access. 97e8d8bef9SDimitry Andric ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 98e8d8bef9SDimitry Andric 990b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 1000b57cec5SDimitry Andric ArgDescriptor::createRegister(ScratchRSrcReg); 101e8d8bef9SDimitry Andric } 1020b57cec5SDimitry Andric 103349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr")) 1040b57cec5SDimitry Andric ImplicitArgPtr = true; 1050b57cec5SDimitry Andric } else { 106349cc55cSDimitry Andric ImplicitArgPtr = false; 1070b57cec5SDimitry Andric MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), 1080b57cec5SDimitry Andric MaxKernArgAlign); 10981ad6265SDimitry Andric 11081ad6265SDimitry Andric if (ST.hasGFX90AInsts() && 11181ad6265SDimitry Andric ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && 112*bdd1243dSDimitry Andric !mayUseAGPRs(F)) 11381ad6265SDimitry Andric MayNeedAGPRs = false; // We will select all MAI with VGPR operands. 1140b57cec5SDimitry Andric } 115349cc55cSDimitry Andric 116349cc55cSDimitry Andric bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); 117349cc55cSDimitry Andric if (isAmdHsaOrMesa && !ST.enableFlatScratch()) 118349cc55cSDimitry Andric PrivateSegmentBuffer = true; 119349cc55cSDimitry Andric else if (ST.isMesaGfxShader(F)) 120349cc55cSDimitry Andric ImplicitBufferPtr = true; 1210b57cec5SDimitry Andric 1220eae32dcSDimitry Andric if (!AMDGPU::isGraphics(CC)) { 123349cc55cSDimitry Andric if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x")) 1240b57cec5SDimitry Andric WorkGroupIDX = true; 1250b57cec5SDimitry Andric 126349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y")) 1270b57cec5SDimitry Andric WorkGroupIDY = true; 1280b57cec5SDimitry Andric 129349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z")) 1300b57cec5SDimitry Andric WorkGroupIDZ = true; 1310b57cec5SDimitry Andric 132349cc55cSDimitry Andric if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x")) 1330b57cec5SDimitry Andric WorkItemIDX = true; 1340b57cec5SDimitry Andric 13504eeddc0SDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") && 13604eeddc0SDimitry Andric ST.getMaxWorkitemID(F, 1) != 0) 1370b57cec5SDimitry Andric WorkItemIDY = true; 1380b57cec5SDimitry Andric 13904eeddc0SDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") && 14004eeddc0SDimitry Andric ST.getMaxWorkitemID(F, 2) != 0) 1410b57cec5SDimitry Andric WorkItemIDZ = true; 142349cc55cSDimitry Andric 143349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr")) 144349cc55cSDimitry Andric DispatchPtr = true; 145349cc55cSDimitry Andric 146349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-queue-ptr")) 147349cc55cSDimitry Andric QueuePtr = true; 148349cc55cSDimitry Andric 149349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) 150349cc55cSDimitry Andric DispatchID = true; 151fcaf7f86SDimitry Andric 152fcaf7f86SDimitry Andric if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id")) 153fcaf7f86SDimitry Andric LDSKernelId = true; 1545ffd83dbSDimitry Andric } 1550b57cec5SDimitry Andric 156349cc55cSDimitry Andric // FIXME: This attribute is a hack, we just need an analysis on the function 157349cc55cSDimitry Andric // to look for allocas. 1585ffd83dbSDimitry Andric bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects"); 159349cc55cSDimitry Andric 160349cc55cSDimitry Andric // TODO: This could be refined a lot. The attribute is a poor way of 161349cc55cSDimitry Andric // detecting calls or stack objects that may require it before argument 162349cc55cSDimitry Andric // lowering. 163349cc55cSDimitry Andric if (ST.hasFlatAddressSpace() && isEntryFunction() && 164349cc55cSDimitry Andric (isAmdHsaOrMesa || ST.enableFlatScratch()) && 165349cc55cSDimitry Andric (HasCalls || HasStackObjects || ST.enableFlatScratch()) && 166349cc55cSDimitry Andric !ST.flatScratchIsArchitected()) { 167349cc55cSDimitry Andric FlatScratchInit = true; 168349cc55cSDimitry Andric } 169349cc55cSDimitry Andric 1700b57cec5SDimitry Andric if (isEntryFunction()) { 1710b57cec5SDimitry Andric // X, XY, and XYZ are the only supported combinations, so make sure Y is 1720b57cec5SDimitry Andric // enabled if Z is. 1730b57cec5SDimitry Andric if (WorkItemIDZ) 1740b57cec5SDimitry Andric WorkItemIDY = true; 1750b57cec5SDimitry Andric 176fe6060f1SDimitry Andric if (!ST.flatScratchIsArchitected()) { 1770b57cec5SDimitry Andric PrivateSegmentWaveByteOffset = true; 1780b57cec5SDimitry Andric 1790b57cec5SDimitry Andric // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 1800b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 1810b57cec5SDimitry Andric (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 1820b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset = 1830b57cec5SDimitry Andric ArgDescriptor::createRegister(AMDGPU::SGPR5); 1840b57cec5SDimitry Andric } 185fe6060f1SDimitry Andric } 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); 1880b57cec5SDimitry Andric StringRef S = A.getValueAsString(); 1890b57cec5SDimitry Andric if (!S.empty()) 1900b57cec5SDimitry Andric S.consumeInteger(0, GITPtrHigh); 1910b57cec5SDimitry Andric 1920b57cec5SDimitry Andric A = F.getFnAttribute("amdgpu-32bit-address-high-bits"); 1930b57cec5SDimitry Andric S = A.getValueAsString(); 1940b57cec5SDimitry Andric if (!S.empty()) 1950b57cec5SDimitry Andric S.consumeInteger(0, HighBitsOf32BitAddress); 1960b57cec5SDimitry Andric 19781ad6265SDimitry Andric // On GFX908, in order to guarantee copying between AGPRs, we need a scratch 19881ad6265SDimitry Andric // VGPR available at all times. For now, reserve highest available VGPR. After 19981ad6265SDimitry Andric // RA, shift it to the lowest available unused VGPR if the one exist. 20081ad6265SDimitry Andric if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { 20181ad6265SDimitry Andric VGPRForAGPRCopy = 20281ad6265SDimitry Andric AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1); 20381ad6265SDimitry Andric } 20481ad6265SDimitry Andric } 20581ad6265SDimitry Andric 20681ad6265SDimitry Andric MachineFunctionInfo *SIMachineFunctionInfo::clone( 20781ad6265SDimitry Andric BumpPtrAllocator &Allocator, MachineFunction &DestMF, 20881ad6265SDimitry Andric const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) 20981ad6265SDimitry Andric const { 21081ad6265SDimitry Andric return DestMF.cloneInfo<SIMachineFunctionInfo>(*this); 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { 2140b57cec5SDimitry Andric limitOccupancy(getMaxWavesPerEU()); 2150b57cec5SDimitry Andric const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); 2160b57cec5SDimitry Andric limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), 2170b57cec5SDimitry Andric MF.getFunction())); 2180b57cec5SDimitry Andric } 2190b57cec5SDimitry Andric 2205ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer( 2210b57cec5SDimitry Andric const SIRegisterInfo &TRI) { 2220b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 2230b57cec5SDimitry Andric ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2248bcb0991SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass)); 2250b57cec5SDimitry Andric NumUserSGPRs += 4; 2260b57cec5SDimitry Andric return ArgInfo.PrivateSegmentBuffer.getRegister(); 2270b57cec5SDimitry Andric } 2280b57cec5SDimitry Andric 2295ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 2300b57cec5SDimitry Andric ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2310b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2320b57cec5SDimitry Andric NumUserSGPRs += 2; 2330b57cec5SDimitry Andric return ArgInfo.DispatchPtr.getRegister(); 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric 2365ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 2370b57cec5SDimitry Andric ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2380b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2390b57cec5SDimitry Andric NumUserSGPRs += 2; 2400b57cec5SDimitry Andric return ArgInfo.QueuePtr.getRegister(); 2410b57cec5SDimitry Andric } 2420b57cec5SDimitry Andric 2435ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 2440b57cec5SDimitry Andric ArgInfo.KernargSegmentPtr 2450b57cec5SDimitry Andric = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2460b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2470b57cec5SDimitry Andric NumUserSGPRs += 2; 2480b57cec5SDimitry Andric return ArgInfo.KernargSegmentPtr.getRegister(); 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric 2515ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 2520b57cec5SDimitry Andric ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2530b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2540b57cec5SDimitry Andric NumUserSGPRs += 2; 2550b57cec5SDimitry Andric return ArgInfo.DispatchID.getRegister(); 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric 2585ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 2590b57cec5SDimitry Andric ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2600b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2610b57cec5SDimitry Andric NumUserSGPRs += 2; 2620b57cec5SDimitry Andric return ArgInfo.FlatScratchInit.getRegister(); 2630b57cec5SDimitry Andric } 2640b57cec5SDimitry Andric 2655ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 2660b57cec5SDimitry Andric ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2670b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2680b57cec5SDimitry Andric NumUserSGPRs += 2; 2690b57cec5SDimitry Andric return ArgInfo.ImplicitBufferPtr.getRegister(); 2700b57cec5SDimitry Andric } 2710b57cec5SDimitry Andric 272fcaf7f86SDimitry Andric Register SIMachineFunctionInfo::addLDSKernelId() { 273fcaf7f86SDimitry Andric ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR()); 274fcaf7f86SDimitry Andric NumUserSGPRs += 1; 275fcaf7f86SDimitry Andric return ArgInfo.LDSKernelId.getRegister(); 276fcaf7f86SDimitry Andric } 277fcaf7f86SDimitry Andric 278*bdd1243dSDimitry Andric void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR, 279*bdd1243dSDimitry Andric uint64_t Size, Align Alignment) { 280*bdd1243dSDimitry Andric // Skip if it is an entry function or the register is already added. 281*bdd1243dSDimitry Andric if (isEntryFunction() || WWMSpills.count(VGPR)) 282*bdd1243dSDimitry Andric return; 283*bdd1243dSDimitry Andric 284*bdd1243dSDimitry Andric WWMSpills.insert(std::make_pair( 285*bdd1243dSDimitry Andric VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment))); 286*bdd1243dSDimitry Andric } 287*bdd1243dSDimitry Andric 288*bdd1243dSDimitry Andric // Separate out the callee-saved and scratch registers. 289*bdd1243dSDimitry Andric void SIMachineFunctionInfo::splitWWMSpillRegisters( 290*bdd1243dSDimitry Andric MachineFunction &MF, 291*bdd1243dSDimitry Andric SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs, 292*bdd1243dSDimitry Andric SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const { 293*bdd1243dSDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 294*bdd1243dSDimitry Andric for (auto &Reg : WWMSpills) { 295*bdd1243dSDimitry Andric if (isCalleeSavedReg(CSRegs, Reg.first)) 296*bdd1243dSDimitry Andric CalleeSavedRegs.push_back(Reg); 297*bdd1243dSDimitry Andric else 298*bdd1243dSDimitry Andric ScratchRegs.push_back(Reg); 299*bdd1243dSDimitry Andric } 300*bdd1243dSDimitry Andric } 301*bdd1243dSDimitry Andric 3025ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, 303*bdd1243dSDimitry Andric MCPhysReg Reg) const { 3040b57cec5SDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) { 3050b57cec5SDimitry Andric if (CSRegs[I] == Reg) 3060b57cec5SDimitry Andric return true; 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric return false; 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 312*bdd1243dSDimitry Andric bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(MachineFunction &MF, 313*bdd1243dSDimitry Andric int FI, 314*bdd1243dSDimitry Andric unsigned LaneIndex) { 3150b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 316*bdd1243dSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 317*bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 318*bdd1243dSDimitry Andric Register LaneVGPR; 319*bdd1243dSDimitry Andric if (!LaneIndex) { 320*bdd1243dSDimitry Andric LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 321*bdd1243dSDimitry Andric if (LaneVGPR == AMDGPU::NoRegister) { 322*bdd1243dSDimitry Andric // We have no VGPRs left for spilling SGPRs. Reset because we will not 323*bdd1243dSDimitry Andric // partially spill the SGPR to VGPRs. 324*bdd1243dSDimitry Andric SGPRSpillToVGPRLanes.erase(FI); 325*bdd1243dSDimitry Andric return false; 3260b57cec5SDimitry Andric } 3270b57cec5SDimitry Andric 328*bdd1243dSDimitry Andric SpillVGPRs.push_back(LaneVGPR); 329*bdd1243dSDimitry Andric // Add this register as live-in to all blocks to avoid machine verifier 330*bdd1243dSDimitry Andric // complaining about use of an undefined physical register. 331*bdd1243dSDimitry Andric for (MachineBasicBlock &BB : MF) 332*bdd1243dSDimitry Andric BB.addLiveIn(LaneVGPR); 333*bdd1243dSDimitry Andric } else { 334*bdd1243dSDimitry Andric LaneVGPR = SpillVGPRs.back(); 335*bdd1243dSDimitry Andric } 336*bdd1243dSDimitry Andric 337*bdd1243dSDimitry Andric SGPRSpillToVGPRLanes[FI].push_back( 338*bdd1243dSDimitry Andric SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); 339*bdd1243dSDimitry Andric return true; 340*bdd1243dSDimitry Andric } 341*bdd1243dSDimitry Andric 342*bdd1243dSDimitry Andric bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills( 343*bdd1243dSDimitry Andric MachineFunction &MF, int FI, unsigned LaneIndex) { 344*bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 345*bdd1243dSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 346*bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 347*bdd1243dSDimitry Andric Register LaneVGPR; 348*bdd1243dSDimitry Andric if (!LaneIndex) { 349*bdd1243dSDimitry Andric LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 350*bdd1243dSDimitry Andric if (LaneVGPR == AMDGPU::NoRegister) { 351*bdd1243dSDimitry Andric // We have no VGPRs left for spilling SGPRs. Reset because we will not 352*bdd1243dSDimitry Andric // partially spill the SGPR to VGPRs. 353*bdd1243dSDimitry Andric PrologEpilogSGPRSpillToVGPRLanes.erase(FI); 354*bdd1243dSDimitry Andric return false; 355*bdd1243dSDimitry Andric } 356*bdd1243dSDimitry Andric 357*bdd1243dSDimitry Andric allocateWWMSpill(MF, LaneVGPR); 358*bdd1243dSDimitry Andric } else { 359*bdd1243dSDimitry Andric LaneVGPR = WWMSpills.back().first; 360*bdd1243dSDimitry Andric } 361*bdd1243dSDimitry Andric 362*bdd1243dSDimitry Andric PrologEpilogSGPRSpillToVGPRLanes[FI].push_back( 363*bdd1243dSDimitry Andric SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); 364*bdd1243dSDimitry Andric return true; 365*bdd1243dSDimitry Andric } 366*bdd1243dSDimitry Andric 367*bdd1243dSDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF, 368*bdd1243dSDimitry Andric int FI, 369*bdd1243dSDimitry Andric bool IsPrologEpilog) { 370*bdd1243dSDimitry Andric std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = 371*bdd1243dSDimitry Andric IsPrologEpilog ? PrologEpilogSGPRSpillToVGPRLanes[FI] 372*bdd1243dSDimitry Andric : SGPRSpillToVGPRLanes[FI]; 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric // This has already been allocated. 3750b57cec5SDimitry Andric if (!SpillLanes.empty()) 3760b57cec5SDimitry Andric return true; 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3790b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 3800b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 3835ffd83dbSDimitry Andric unsigned NumLanes = Size / 4; 3840b57cec5SDimitry Andric 3855ffd83dbSDimitry Andric if (NumLanes > WaveSize) 3865ffd83dbSDimitry Andric return false; 3875ffd83dbSDimitry Andric 3885ffd83dbSDimitry Andric assert(Size >= 4 && "invalid sgpr spill size"); 389*bdd1243dSDimitry Andric assert(ST.getRegisterInfo()->spillSGPRToVGPR() && 390*bdd1243dSDimitry Andric "not spilling SGPRs to VGPRs"); 3910b57cec5SDimitry Andric 392*bdd1243dSDimitry Andric unsigned &NumSpillLanes = 393*bdd1243dSDimitry Andric IsPrologEpilog ? NumVGPRPrologEpilogSpillLanes : NumVGPRSpillLanes; 3940b57cec5SDimitry Andric 395*bdd1243dSDimitry Andric for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) { 396*bdd1243dSDimitry Andric unsigned LaneIndex = (NumSpillLanes % WaveSize); 397fe6060f1SDimitry Andric 398*bdd1243dSDimitry Andric bool Allocated = 399*bdd1243dSDimitry Andric IsPrologEpilog 400*bdd1243dSDimitry Andric ? allocateVGPRForPrologEpilogSGPRSpills(MF, FI, LaneIndex) 401*bdd1243dSDimitry Andric : allocateVGPRForSGPRSpills(MF, FI, LaneIndex); 402*bdd1243dSDimitry Andric if (!Allocated) { 403*bdd1243dSDimitry Andric NumSpillLanes -= I; 4040b57cec5SDimitry Andric return false; 4050b57cec5SDimitry Andric } 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric return true; 4090b57cec5SDimitry Andric } 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. 4120b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa. 4130b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely. 4140b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, 4150b57cec5SDimitry Andric int FI, 4160b57cec5SDimitry Andric bool isAGPRtoVGPR) { 4170b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 4180b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 4190b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI)); 4220b57cec5SDimitry Andric 4230b57cec5SDimitry Andric auto &Spill = VGPRToAGPRSpills[FI]; 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric // This has already been allocated. 4260b57cec5SDimitry Andric if (!Spill.Lanes.empty()) 4270b57cec5SDimitry Andric return Spill.FullyAllocated; 4280b57cec5SDimitry Andric 4290b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 4300b57cec5SDimitry Andric unsigned NumLanes = Size / 4; 4310b57cec5SDimitry Andric Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister); 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric const TargetRegisterClass &RC = 4340b57cec5SDimitry Andric isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass; 4350b57cec5SDimitry Andric auto Regs = RC.getRegisters(); 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR; 4380b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 4390b57cec5SDimitry Andric Spill.FullyAllocated = true; 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric // FIXME: Move allocation logic out of MachineFunctionInfo and initialize 4420b57cec5SDimitry Andric // once. 4430b57cec5SDimitry Andric BitVector OtherUsedRegs; 4440b57cec5SDimitry Andric OtherUsedRegs.resize(TRI->getNumRegs()); 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric const uint32_t *CSRMask = 4470b57cec5SDimitry Andric TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv()); 4480b57cec5SDimitry Andric if (CSRMask) 4490b57cec5SDimitry Andric OtherUsedRegs.setBitsInMask(CSRMask); 4500b57cec5SDimitry Andric 4510b57cec5SDimitry Andric // TODO: Should include register tuples, but doesn't matter with current 4520b57cec5SDimitry Andric // usage. 4530b57cec5SDimitry Andric for (MCPhysReg Reg : SpillAGPR) 4540b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 4550b57cec5SDimitry Andric for (MCPhysReg Reg : SpillVGPR) 4560b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin(); 459349cc55cSDimitry Andric for (int I = NumLanes - 1; I >= 0; --I) { 4600b57cec5SDimitry Andric NextSpillReg = std::find_if( 4610b57cec5SDimitry Andric NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) { 4620b57cec5SDimitry Andric return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) && 4630b57cec5SDimitry Andric !OtherUsedRegs[Reg]; 4640b57cec5SDimitry Andric }); 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric if (NextSpillReg == Regs.end()) { // Registers exhausted 4670b57cec5SDimitry Andric Spill.FullyAllocated = false; 4680b57cec5SDimitry Andric break; 4690b57cec5SDimitry Andric } 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric OtherUsedRegs.set(*NextSpillReg); 4720b57cec5SDimitry Andric SpillRegs.push_back(*NextSpillReg); 473*bdd1243dSDimitry Andric MRI.reserveReg(*NextSpillReg, TRI); 4740b57cec5SDimitry Andric Spill.Lanes[I] = *NextSpillReg++; 4750b57cec5SDimitry Andric } 4760b57cec5SDimitry Andric 4770b57cec5SDimitry Andric return Spill.FullyAllocated; 4780b57cec5SDimitry Andric } 4790b57cec5SDimitry Andric 48081ad6265SDimitry Andric bool SIMachineFunctionInfo::removeDeadFrameIndices( 48181ad6265SDimitry Andric MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) { 482*bdd1243dSDimitry Andric // Remove dead frame indices from function frame. And also make sure to remove 483*bdd1243dSDimitry Andric // the frame indices from `SGPRSpillToVGPRLanes` data structure, otherwise, it 484*bdd1243dSDimitry Andric // could result in an unexpected side effect and bug, in case of any 485*bdd1243dSDimitry Andric // re-mapping of freed frame indices by later pass(es) like "stack slot 486*bdd1243dSDimitry Andric // coloring". 487*bdd1243dSDimitry Andric for (auto &R : make_early_inc_range(SGPRSpillToVGPRLanes)) { 4880b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 489*bdd1243dSDimitry Andric SGPRSpillToVGPRLanes.erase(R.first); 4900b57cec5SDimitry Andric } 4910b57cec5SDimitry Andric 49281ad6265SDimitry Andric bool HaveSGPRToMemory = false; 49381ad6265SDimitry Andric 49481ad6265SDimitry Andric if (ResetSGPRSpillStackIDs) { 495*bdd1243dSDimitry Andric // All other SGPRs must be allocated on the default stack, so reset the 49681ad6265SDimitry Andric // stack ID. 497*bdd1243dSDimitry Andric for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; 498*bdd1243dSDimitry Andric ++I) { 499*bdd1243dSDimitry Andric if (!checkIndexInPrologEpilogSGPRSpills(I)) { 500*bdd1243dSDimitry Andric if (MFI.getStackID(I) == TargetStackID::SGPRSpill) { 501*bdd1243dSDimitry Andric MFI.setStackID(I, TargetStackID::Default); 50281ad6265SDimitry Andric HaveSGPRToMemory = true; 50381ad6265SDimitry Andric } 50481ad6265SDimitry Andric } 50581ad6265SDimitry Andric } 50681ad6265SDimitry Andric } 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric for (auto &R : VGPRToAGPRSpills) { 5090eae32dcSDimitry Andric if (R.second.IsDead) 5100b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 5110b57cec5SDimitry Andric } 51281ad6265SDimitry Andric 51381ad6265SDimitry Andric return HaveSGPRToMemory; 51481ad6265SDimitry Andric } 51581ad6265SDimitry Andric 516fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, 517fe6060f1SDimitry Andric const SIRegisterInfo &TRI) { 518fe6060f1SDimitry Andric if (ScavengeFI) 519fe6060f1SDimitry Andric return *ScavengeFI; 520fe6060f1SDimitry Andric if (isEntryFunction()) { 521fe6060f1SDimitry Andric ScavengeFI = MFI.CreateFixedObject( 522fe6060f1SDimitry Andric TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); 523fe6060f1SDimitry Andric } else { 524fe6060f1SDimitry Andric ScavengeFI = MFI.CreateStackObject( 525fe6060f1SDimitry Andric TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 526fe6060f1SDimitry Andric TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false); 527fe6060f1SDimitry Andric } 528fe6060f1SDimitry Andric return *ScavengeFI; 529fe6060f1SDimitry Andric } 530fe6060f1SDimitry Andric 5310b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { 5320b57cec5SDimitry Andric assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 5330b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs; 5340b57cec5SDimitry Andric } 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { 5370b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 5380b57cec5SDimitry Andric } 5390b57cec5SDimitry Andric 5405ffd83dbSDimitry Andric Register 5415ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const { 5425ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 5435ffd83dbSDimitry Andric if (!ST.isAmdPalOS()) 5445ffd83dbSDimitry Andric return Register(); 5455ffd83dbSDimitry Andric Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in 5465ffd83dbSDimitry Andric if (ST.hasMergedShaders()) { 5475ffd83dbSDimitry Andric switch (MF.getFunction().getCallingConv()) { 5485ffd83dbSDimitry Andric case CallingConv::AMDGPU_HS: 5495ffd83dbSDimitry Andric case CallingConv::AMDGPU_GS: 5505ffd83dbSDimitry Andric // Low GIT address is passed in s8 rather than s0 for an LS+HS or 5515ffd83dbSDimitry Andric // ES+GS merged shader on gfx9+. 5525ffd83dbSDimitry Andric GitPtrLo = AMDGPU::SGPR8; 5535ffd83dbSDimitry Andric return GitPtrLo; 5545ffd83dbSDimitry Andric default: 5555ffd83dbSDimitry Andric return GitPtrLo; 5565ffd83dbSDimitry Andric } 5575ffd83dbSDimitry Andric } 5585ffd83dbSDimitry Andric return GitPtrLo; 5595ffd83dbSDimitry Andric } 5605ffd83dbSDimitry Andric 5615ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg, 5620b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 5630b57cec5SDimitry Andric yaml::StringValue Dest; 5640b57cec5SDimitry Andric { 5650b57cec5SDimitry Andric raw_string_ostream OS(Dest.Value); 5660b57cec5SDimitry Andric OS << printReg(Reg, &TRI); 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric return Dest; 5690b57cec5SDimitry Andric } 5700b57cec5SDimitry Andric 571*bdd1243dSDimitry Andric static std::optional<yaml::SIArgumentInfo> 5720b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, 5730b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 5740b57cec5SDimitry Andric yaml::SIArgumentInfo AI; 5750b57cec5SDimitry Andric 576*bdd1243dSDimitry Andric auto convertArg = [&](std::optional<yaml::SIArgument> &A, 5770b57cec5SDimitry Andric const ArgDescriptor &Arg) { 5780b57cec5SDimitry Andric if (!Arg) 5790b57cec5SDimitry Andric return false; 5800b57cec5SDimitry Andric 5810b57cec5SDimitry Andric // Create a register or stack argument. 5820b57cec5SDimitry Andric yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister()); 5830b57cec5SDimitry Andric if (Arg.isRegister()) { 5840b57cec5SDimitry Andric raw_string_ostream OS(SA.RegisterName.Value); 5850b57cec5SDimitry Andric OS << printReg(Arg.getRegister(), &TRI); 5860b57cec5SDimitry Andric } else 5870b57cec5SDimitry Andric SA.StackOffset = Arg.getStackOffset(); 5880b57cec5SDimitry Andric // Check and update the optional mask. 5890b57cec5SDimitry Andric if (Arg.isMasked()) 5900b57cec5SDimitry Andric SA.Mask = Arg.getMask(); 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric A = SA; 5930b57cec5SDimitry Andric return true; 5940b57cec5SDimitry Andric }; 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric bool Any = false; 5970b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer); 5980b57cec5SDimitry Andric Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr); 5990b57cec5SDimitry Andric Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr); 6000b57cec5SDimitry Andric Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); 6010b57cec5SDimitry Andric Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); 6020b57cec5SDimitry Andric Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); 603fcaf7f86SDimitry Andric Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId); 6040b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); 6050b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); 6060b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); 6070b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ); 6080b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo); 6090b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentWaveByteOffset, 6100b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset); 6110b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr); 6120b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr); 6130b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX); 6140b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY); 6150b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ); 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric if (Any) 6180b57cec5SDimitry Andric return AI; 6190b57cec5SDimitry Andric 620*bdd1243dSDimitry Andric return std::nullopt; 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric 6230b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( 624fe6060f1SDimitry Andric const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI, 625fe6060f1SDimitry Andric const llvm::MachineFunction &MF) 6260b57cec5SDimitry Andric : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), 627e8d8bef9SDimitry Andric MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()), 62881ad6265SDimitry Andric GDSSize(MFI.getGDSSize()), 629e8d8bef9SDimitry Andric DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()), 6300b57cec5SDimitry Andric NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), 631e8d8bef9SDimitry Andric MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), 632e8d8bef9SDimitry Andric HasSpilledSGPRs(MFI.hasSpilledSGPRs()), 633e8d8bef9SDimitry Andric HasSpilledVGPRs(MFI.hasSpilledVGPRs()), 6348bcb0991SDimitry Andric HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), 635e8d8bef9SDimitry Andric Occupancy(MFI.getOccupancy()), 6360b57cec5SDimitry Andric ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), 6370b57cec5SDimitry Andric FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), 6380b57cec5SDimitry Andric StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), 63981ad6265SDimitry Andric BytesInStackArgArea(MFI.getBytesInStackArgArea()), 64081ad6265SDimitry Andric ReturnsVoid(MFI.returnsVoid()), 641e8d8bef9SDimitry Andric ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { 642*bdd1243dSDimitry Andric for (Register Reg : MFI.getWWMReservedRegs()) 64381ad6265SDimitry Andric WWMReservedRegs.push_back(regToString(Reg, TRI)); 64481ad6265SDimitry Andric 64581ad6265SDimitry Andric if (MFI.getVGPRForAGPRCopy()) 64681ad6265SDimitry Andric VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI); 647fe6060f1SDimitry Andric auto SFI = MFI.getOptionalScavengeFI(); 648fe6060f1SDimitry Andric if (SFI) 649fe6060f1SDimitry Andric ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo()); 650e8d8bef9SDimitry Andric } 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { 6530b57cec5SDimitry Andric MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this); 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric 6560b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields( 657fe6060f1SDimitry Andric const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, 658fe6060f1SDimitry Andric PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) { 6590b57cec5SDimitry Andric ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; 66081ad6265SDimitry Andric MaxKernArgAlign = YamlMFI.MaxKernArgAlign; 6610b57cec5SDimitry Andric LDSSize = YamlMFI.LDSSize; 66281ad6265SDimitry Andric GDSSize = YamlMFI.GDSSize; 663e8d8bef9SDimitry Andric DynLDSAlign = YamlMFI.DynLDSAlign; 6648bcb0991SDimitry Andric HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; 665e8d8bef9SDimitry Andric Occupancy = YamlMFI.Occupancy; 6660b57cec5SDimitry Andric IsEntryFunction = YamlMFI.IsEntryFunction; 6670b57cec5SDimitry Andric NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; 6680b57cec5SDimitry Andric MemoryBound = YamlMFI.MemoryBound; 6690b57cec5SDimitry Andric WaveLimiter = YamlMFI.WaveLimiter; 670e8d8bef9SDimitry Andric HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs; 671e8d8bef9SDimitry Andric HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs; 67281ad6265SDimitry Andric BytesInStackArgArea = YamlMFI.BytesInStackArgArea; 67381ad6265SDimitry Andric ReturnsVoid = YamlMFI.ReturnsVoid; 674fe6060f1SDimitry Andric 675fe6060f1SDimitry Andric if (YamlMFI.ScavengeFI) { 676fe6060f1SDimitry Andric auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo()); 677fe6060f1SDimitry Andric if (!FIOrErr) { 678fe6060f1SDimitry Andric // Create a diagnostic for a the frame index. 679fe6060f1SDimitry Andric const MemoryBuffer &Buffer = 680fe6060f1SDimitry Andric *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID()); 681fe6060f1SDimitry Andric 682fe6060f1SDimitry Andric Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1, 683fe6060f1SDimitry Andric SourceMgr::DK_Error, toString(FIOrErr.takeError()), 684*bdd1243dSDimitry Andric "", std::nullopt, std::nullopt); 685fe6060f1SDimitry Andric SourceRange = YamlMFI.ScavengeFI->SourceRange; 686fe6060f1SDimitry Andric return true; 687fe6060f1SDimitry Andric } 688fe6060f1SDimitry Andric ScavengeFI = *FIOrErr; 689fe6060f1SDimitry Andric } else { 690*bdd1243dSDimitry Andric ScavengeFI = std::nullopt; 691fe6060f1SDimitry Andric } 6920b57cec5SDimitry Andric return false; 6930b57cec5SDimitry Andric } 6945ffd83dbSDimitry Andric 695*bdd1243dSDimitry Andric bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const { 696*bdd1243dSDimitry Andric for (const BasicBlock &BB : F) { 69781ad6265SDimitry Andric for (const Instruction &I : BB) { 69881ad6265SDimitry Andric const auto *CB = dyn_cast<CallBase>(&I); 69981ad6265SDimitry Andric if (!CB) 70081ad6265SDimitry Andric continue; 70181ad6265SDimitry Andric 70281ad6265SDimitry Andric if (CB->isInlineAsm()) { 70381ad6265SDimitry Andric const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand()); 70481ad6265SDimitry Andric for (const auto &CI : IA->ParseConstraints()) { 70581ad6265SDimitry Andric for (StringRef Code : CI.Codes) { 70681ad6265SDimitry Andric Code.consume_front("{"); 70781ad6265SDimitry Andric if (Code.startswith("a")) 70881ad6265SDimitry Andric return true; 70981ad6265SDimitry Andric } 71081ad6265SDimitry Andric } 71181ad6265SDimitry Andric continue; 71281ad6265SDimitry Andric } 71381ad6265SDimitry Andric 71481ad6265SDimitry Andric const Function *Callee = 71581ad6265SDimitry Andric dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); 71681ad6265SDimitry Andric if (!Callee) 71781ad6265SDimitry Andric return true; 71881ad6265SDimitry Andric 71981ad6265SDimitry Andric if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic) 72081ad6265SDimitry Andric return true; 72181ad6265SDimitry Andric } 72281ad6265SDimitry Andric } 72381ad6265SDimitry Andric 72481ad6265SDimitry Andric return false; 72581ad6265SDimitry Andric } 72681ad6265SDimitry Andric 727349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const { 728349cc55cSDimitry Andric if (UsesAGPRs) 729349cc55cSDimitry Andric return *UsesAGPRs; 730349cc55cSDimitry Andric 73181ad6265SDimitry Andric if (!mayNeedAGPRs()) { 73281ad6265SDimitry Andric UsesAGPRs = false; 73381ad6265SDimitry Andric return false; 73481ad6265SDimitry Andric } 73581ad6265SDimitry Andric 736349cc55cSDimitry Andric if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) || 737349cc55cSDimitry Andric MF.getFrameInfo().hasCalls()) { 738349cc55cSDimitry Andric UsesAGPRs = true; 739349cc55cSDimitry Andric return true; 740349cc55cSDimitry Andric } 741349cc55cSDimitry Andric 742349cc55cSDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 743349cc55cSDimitry Andric 744349cc55cSDimitry Andric for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { 745349cc55cSDimitry Andric const Register Reg = Register::index2VirtReg(I); 746349cc55cSDimitry Andric const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg); 747349cc55cSDimitry Andric if (RC && SIRegisterInfo::isAGPRClass(RC)) { 748349cc55cSDimitry Andric UsesAGPRs = true; 749349cc55cSDimitry Andric return true; 750349cc55cSDimitry Andric } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) { 751349cc55cSDimitry Andric // Defer caching UsesAGPRs, function might not yet been regbank selected. 752349cc55cSDimitry Andric return true; 753349cc55cSDimitry Andric } 754349cc55cSDimitry Andric } 755349cc55cSDimitry Andric 756349cc55cSDimitry Andric for (MCRegister Reg : AMDGPU::AGPR_32RegClass) { 757349cc55cSDimitry Andric if (MRI.isPhysRegUsed(Reg)) { 758349cc55cSDimitry Andric UsesAGPRs = true; 759349cc55cSDimitry Andric return true; 760349cc55cSDimitry Andric } 761349cc55cSDimitry Andric } 762349cc55cSDimitry Andric 763349cc55cSDimitry Andric UsesAGPRs = false; 764349cc55cSDimitry Andric return false; 765349cc55cSDimitry Andric } 766