10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 105ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h" 11fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h" 12fe6060f1SDimitry Andric #include "SIRegisterInfo.h" 13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 15fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 16fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 17fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 20fe6060f1SDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h" 21fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h" 22fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 23fe6060f1SDimitry Andric #include "llvm/IR/Function.h" 24fe6060f1SDimitry Andric #include <cassert> 25bdd1243dSDimitry Andric #include <optional> 26fe6060f1SDimitry Andric #include <vector> 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric #define MAX_LANES 64 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric using namespace llvm; 310b57cec5SDimitry Andric 32bdd1243dSDimitry Andric const GCNTargetMachine &getTM(const GCNSubtarget *STI) { 33bdd1243dSDimitry Andric const SITargetLowering *TLI = STI->getTargetLowering(); 34bdd1243dSDimitry Andric return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine()); 35bdd1243dSDimitry Andric } 36bdd1243dSDimitry Andric 37bdd1243dSDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, 38bdd1243dSDimitry Andric const GCNSubtarget *STI) 39bdd1243dSDimitry Andric : AMDGPUMachineFunction(F, *STI), 40bdd1243dSDimitry Andric Mode(F), 41bdd1243dSDimitry Andric GWSResourcePSV(getTM(STI)), 420b57cec5SDimitry Andric PrivateSegmentBuffer(false), 430b57cec5SDimitry Andric DispatchPtr(false), 440b57cec5SDimitry Andric QueuePtr(false), 450b57cec5SDimitry Andric KernargSegmentPtr(false), 460b57cec5SDimitry Andric DispatchID(false), 470b57cec5SDimitry Andric FlatScratchInit(false), 480b57cec5SDimitry Andric WorkGroupIDX(false), 490b57cec5SDimitry Andric WorkGroupIDY(false), 500b57cec5SDimitry Andric WorkGroupIDZ(false), 510b57cec5SDimitry Andric WorkGroupInfo(false), 52fcaf7f86SDimitry Andric LDSKernelId(false), 530b57cec5SDimitry Andric PrivateSegmentWaveByteOffset(false), 540b57cec5SDimitry Andric WorkItemIDX(false), 550b57cec5SDimitry Andric WorkItemIDY(false), 560b57cec5SDimitry Andric WorkItemIDZ(false), 570b57cec5SDimitry Andric ImplicitBufferPtr(false), 580b57cec5SDimitry Andric ImplicitArgPtr(false), 590b57cec5SDimitry Andric GITPtrHigh(0xffffffff), 6081ad6265SDimitry Andric HighBitsOf32BitAddress(0) { 61bdd1243dSDimitry Andric const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI); 620b57cec5SDimitry Andric FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); 630b57cec5SDimitry Andric WavesPerEU = ST.getWavesPerEU(F); 640b57cec5SDimitry Andric 655ffd83dbSDimitry Andric Occupancy = ST.computeOccupancy(F, getLDSSize()); 660b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 670b57cec5SDimitry Andric 68*06c3fb27SDimitry Andric VRegFlags.reserve(1024); 69*06c3fb27SDimitry Andric 705ffd83dbSDimitry Andric // FIXME: Should have analysis or something rather than attribute to detect 715ffd83dbSDimitry Andric // calls. 725ffd83dbSDimitry Andric const bool HasCalls = F.hasFnAttribute("amdgpu-calls"); 735ffd83dbSDimitry Andric 74349cc55cSDimitry Andric const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL || 75349cc55cSDimitry Andric CC == CallingConv::SPIR_KERNEL; 765ffd83dbSDimitry Andric 77349cc55cSDimitry Andric if (IsKernel) { 78349cc55cSDimitry Andric if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0) 790b57cec5SDimitry Andric KernargSegmentPtr = true; 800b57cec5SDimitry Andric WorkGroupIDX = true; 810b57cec5SDimitry Andric WorkItemIDX = true; 820b57cec5SDimitry Andric } else if (CC == CallingConv::AMDGPU_PS) { 830b57cec5SDimitry Andric PSInputAddr = AMDGPU::getInitialPSInputAddr(F); 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric 8681ad6265SDimitry Andric MayNeedAGPRs = ST.hasMAIInsts(); 8781ad6265SDimitry Andric 880b57cec5SDimitry Andric if (!isEntryFunction()) { 890eae32dcSDimitry Andric if (CC != CallingConv::AMDGPU_Gfx) 90fe6060f1SDimitry Andric ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; 91fe6060f1SDimitry Andric 920b57cec5SDimitry Andric // TODO: Pick a high register, and shift down, similar to a kernel. 935ffd83dbSDimitry Andric FrameOffsetReg = AMDGPU::SGPR33; 940b57cec5SDimitry Andric StackPtrOffsetReg = AMDGPU::SGPR32; 950b57cec5SDimitry Andric 96e8d8bef9SDimitry Andric if (!ST.enableFlatScratch()) { 97e8d8bef9SDimitry Andric // Non-entry functions have no special inputs for now, other registers 98e8d8bef9SDimitry Andric // required for scratch access. 99e8d8bef9SDimitry Andric ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 100e8d8bef9SDimitry Andric 1010b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 1020b57cec5SDimitry Andric ArgDescriptor::createRegister(ScratchRSrcReg); 103e8d8bef9SDimitry Andric } 1040b57cec5SDimitry Andric 105349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr")) 1060b57cec5SDimitry Andric ImplicitArgPtr = true; 1070b57cec5SDimitry Andric } else { 108349cc55cSDimitry Andric ImplicitArgPtr = false; 1090b57cec5SDimitry Andric MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), 1100b57cec5SDimitry Andric MaxKernArgAlign); 11181ad6265SDimitry Andric 11281ad6265SDimitry Andric if (ST.hasGFX90AInsts() && 11381ad6265SDimitry Andric ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && 114bdd1243dSDimitry Andric !mayUseAGPRs(F)) 11581ad6265SDimitry Andric MayNeedAGPRs = false; // We will select all MAI with VGPR operands. 1160b57cec5SDimitry Andric } 117349cc55cSDimitry Andric 118349cc55cSDimitry Andric bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); 119349cc55cSDimitry Andric if (isAmdHsaOrMesa && !ST.enableFlatScratch()) 120349cc55cSDimitry Andric PrivateSegmentBuffer = true; 121349cc55cSDimitry Andric else if (ST.isMesaGfxShader(F)) 122349cc55cSDimitry Andric ImplicitBufferPtr = true; 1230b57cec5SDimitry Andric 124*06c3fb27SDimitry Andric if (!AMDGPU::isGraphics(CC) || 125*06c3fb27SDimitry Andric (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) { 126349cc55cSDimitry Andric if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x")) 1270b57cec5SDimitry Andric WorkGroupIDX = true; 1280b57cec5SDimitry Andric 129349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y")) 1300b57cec5SDimitry Andric WorkGroupIDY = true; 1310b57cec5SDimitry Andric 132349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z")) 1330b57cec5SDimitry Andric WorkGroupIDZ = true; 134*06c3fb27SDimitry Andric } 1350b57cec5SDimitry Andric 136*06c3fb27SDimitry Andric if (!AMDGPU::isGraphics(CC)) { 137349cc55cSDimitry Andric if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x")) 1380b57cec5SDimitry Andric WorkItemIDX = true; 1390b57cec5SDimitry Andric 14004eeddc0SDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") && 14104eeddc0SDimitry Andric ST.getMaxWorkitemID(F, 1) != 0) 1420b57cec5SDimitry Andric WorkItemIDY = true; 1430b57cec5SDimitry Andric 14404eeddc0SDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") && 14504eeddc0SDimitry Andric ST.getMaxWorkitemID(F, 2) != 0) 1460b57cec5SDimitry Andric WorkItemIDZ = true; 147349cc55cSDimitry Andric 148349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr")) 149349cc55cSDimitry Andric DispatchPtr = true; 150349cc55cSDimitry Andric 151349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-queue-ptr")) 152349cc55cSDimitry Andric QueuePtr = true; 153349cc55cSDimitry Andric 154349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) 155349cc55cSDimitry Andric DispatchID = true; 156fcaf7f86SDimitry Andric 157fcaf7f86SDimitry Andric if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id")) 158fcaf7f86SDimitry Andric LDSKernelId = true; 1595ffd83dbSDimitry Andric } 1600b57cec5SDimitry Andric 161349cc55cSDimitry Andric // FIXME: This attribute is a hack, we just need an analysis on the function 162349cc55cSDimitry Andric // to look for allocas. 1635ffd83dbSDimitry Andric bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects"); 164349cc55cSDimitry Andric 165349cc55cSDimitry Andric // TODO: This could be refined a lot. The attribute is a poor way of 166349cc55cSDimitry Andric // detecting calls or stack objects that may require it before argument 167349cc55cSDimitry Andric // lowering. 168349cc55cSDimitry Andric if (ST.hasFlatAddressSpace() && isEntryFunction() && 169349cc55cSDimitry Andric (isAmdHsaOrMesa || ST.enableFlatScratch()) && 170349cc55cSDimitry Andric (HasCalls || HasStackObjects || ST.enableFlatScratch()) && 171349cc55cSDimitry Andric !ST.flatScratchIsArchitected()) { 172349cc55cSDimitry Andric FlatScratchInit = true; 173349cc55cSDimitry Andric } 174349cc55cSDimitry Andric 1750b57cec5SDimitry Andric if (isEntryFunction()) { 1760b57cec5SDimitry Andric // X, XY, and XYZ are the only supported combinations, so make sure Y is 1770b57cec5SDimitry Andric // enabled if Z is. 1780b57cec5SDimitry Andric if (WorkItemIDZ) 1790b57cec5SDimitry Andric WorkItemIDY = true; 1800b57cec5SDimitry Andric 181fe6060f1SDimitry Andric if (!ST.flatScratchIsArchitected()) { 1820b57cec5SDimitry Andric PrivateSegmentWaveByteOffset = true; 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 1850b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 1860b57cec5SDimitry Andric (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 1870b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset = 1880b57cec5SDimitry Andric ArgDescriptor::createRegister(AMDGPU::SGPR5); 1890b57cec5SDimitry Andric } 190fe6060f1SDimitry Andric } 1910b57cec5SDimitry Andric 1920b57cec5SDimitry Andric Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); 1930b57cec5SDimitry Andric StringRef S = A.getValueAsString(); 1940b57cec5SDimitry Andric if (!S.empty()) 1950b57cec5SDimitry Andric S.consumeInteger(0, GITPtrHigh); 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric A = F.getFnAttribute("amdgpu-32bit-address-high-bits"); 1980b57cec5SDimitry Andric S = A.getValueAsString(); 1990b57cec5SDimitry Andric if (!S.empty()) 2000b57cec5SDimitry Andric S.consumeInteger(0, HighBitsOf32BitAddress); 2010b57cec5SDimitry Andric 20281ad6265SDimitry Andric // On GFX908, in order to guarantee copying between AGPRs, we need a scratch 20381ad6265SDimitry Andric // VGPR available at all times. For now, reserve highest available VGPR. After 20481ad6265SDimitry Andric // RA, shift it to the lowest available unused VGPR if the one exist. 20581ad6265SDimitry Andric if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { 20681ad6265SDimitry Andric VGPRForAGPRCopy = 20781ad6265SDimitry Andric AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1); 20881ad6265SDimitry Andric } 20981ad6265SDimitry Andric } 21081ad6265SDimitry Andric 21181ad6265SDimitry Andric MachineFunctionInfo *SIMachineFunctionInfo::clone( 21281ad6265SDimitry Andric BumpPtrAllocator &Allocator, MachineFunction &DestMF, 21381ad6265SDimitry Andric const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) 21481ad6265SDimitry Andric const { 21581ad6265SDimitry Andric return DestMF.cloneInfo<SIMachineFunctionInfo>(*this); 2160b57cec5SDimitry Andric } 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { 2190b57cec5SDimitry Andric limitOccupancy(getMaxWavesPerEU()); 2200b57cec5SDimitry Andric const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); 2210b57cec5SDimitry Andric limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), 2220b57cec5SDimitry Andric MF.getFunction())); 2230b57cec5SDimitry Andric } 2240b57cec5SDimitry Andric 2255ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer( 2260b57cec5SDimitry Andric const SIRegisterInfo &TRI) { 2270b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 2280b57cec5SDimitry Andric ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2298bcb0991SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass)); 2300b57cec5SDimitry Andric NumUserSGPRs += 4; 2310b57cec5SDimitry Andric return ArgInfo.PrivateSegmentBuffer.getRegister(); 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric 2345ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 2350b57cec5SDimitry Andric ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2360b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2370b57cec5SDimitry Andric NumUserSGPRs += 2; 2380b57cec5SDimitry Andric return ArgInfo.DispatchPtr.getRegister(); 2390b57cec5SDimitry Andric } 2400b57cec5SDimitry Andric 2415ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 2420b57cec5SDimitry Andric ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2430b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2440b57cec5SDimitry Andric NumUserSGPRs += 2; 2450b57cec5SDimitry Andric return ArgInfo.QueuePtr.getRegister(); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2485ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 2490b57cec5SDimitry Andric ArgInfo.KernargSegmentPtr 2500b57cec5SDimitry Andric = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2510b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2520b57cec5SDimitry Andric NumUserSGPRs += 2; 2530b57cec5SDimitry Andric return ArgInfo.KernargSegmentPtr.getRegister(); 2540b57cec5SDimitry Andric } 2550b57cec5SDimitry Andric 2565ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 2570b57cec5SDimitry Andric ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2580b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2590b57cec5SDimitry Andric NumUserSGPRs += 2; 2600b57cec5SDimitry Andric return ArgInfo.DispatchID.getRegister(); 2610b57cec5SDimitry Andric } 2620b57cec5SDimitry Andric 2635ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 2640b57cec5SDimitry Andric ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2650b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2660b57cec5SDimitry Andric NumUserSGPRs += 2; 2670b57cec5SDimitry Andric return ArgInfo.FlatScratchInit.getRegister(); 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric 2705ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 2710b57cec5SDimitry Andric ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2720b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2730b57cec5SDimitry Andric NumUserSGPRs += 2; 2740b57cec5SDimitry Andric return ArgInfo.ImplicitBufferPtr.getRegister(); 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric 277fcaf7f86SDimitry Andric Register SIMachineFunctionInfo::addLDSKernelId() { 278fcaf7f86SDimitry Andric ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR()); 279fcaf7f86SDimitry Andric NumUserSGPRs += 1; 280fcaf7f86SDimitry Andric return ArgInfo.LDSKernelId.getRegister(); 281fcaf7f86SDimitry Andric } 282fcaf7f86SDimitry Andric 283bdd1243dSDimitry Andric void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR, 284bdd1243dSDimitry Andric uint64_t Size, Align Alignment) { 285bdd1243dSDimitry Andric // Skip if it is an entry function or the register is already added. 286bdd1243dSDimitry Andric if (isEntryFunction() || WWMSpills.count(VGPR)) 287bdd1243dSDimitry Andric return; 288bdd1243dSDimitry Andric 289bdd1243dSDimitry Andric WWMSpills.insert(std::make_pair( 290bdd1243dSDimitry Andric VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment))); 291bdd1243dSDimitry Andric } 292bdd1243dSDimitry Andric 293bdd1243dSDimitry Andric // Separate out the callee-saved and scratch registers. 294bdd1243dSDimitry Andric void SIMachineFunctionInfo::splitWWMSpillRegisters( 295bdd1243dSDimitry Andric MachineFunction &MF, 296bdd1243dSDimitry Andric SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs, 297bdd1243dSDimitry Andric SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const { 298bdd1243dSDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 299bdd1243dSDimitry Andric for (auto &Reg : WWMSpills) { 300bdd1243dSDimitry Andric if (isCalleeSavedReg(CSRegs, Reg.first)) 301bdd1243dSDimitry Andric CalleeSavedRegs.push_back(Reg); 302bdd1243dSDimitry Andric else 303bdd1243dSDimitry Andric ScratchRegs.push_back(Reg); 304bdd1243dSDimitry Andric } 305bdd1243dSDimitry Andric } 306bdd1243dSDimitry Andric 3075ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, 308bdd1243dSDimitry Andric MCPhysReg Reg) const { 3090b57cec5SDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) { 3100b57cec5SDimitry Andric if (CSRegs[I] == Reg) 3110b57cec5SDimitry Andric return true; 3120b57cec5SDimitry Andric } 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric return false; 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric 317*06c3fb27SDimitry Andric bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills( 318*06c3fb27SDimitry Andric MachineFunction &MF, int FI, unsigned LaneIndex) { 319bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 320bdd1243dSDimitry Andric Register LaneVGPR; 321bdd1243dSDimitry Andric if (!LaneIndex) { 322*06c3fb27SDimitry Andric LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 323bdd1243dSDimitry Andric SpillVGPRs.push_back(LaneVGPR); 324bdd1243dSDimitry Andric } else { 325bdd1243dSDimitry Andric LaneVGPR = SpillVGPRs.back(); 326bdd1243dSDimitry Andric } 327bdd1243dSDimitry Andric 328*06c3fb27SDimitry Andric SGPRSpillsToVirtualVGPRLanes[FI].push_back( 329bdd1243dSDimitry Andric SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); 330bdd1243dSDimitry Andric return true; 331bdd1243dSDimitry Andric } 332bdd1243dSDimitry Andric 333*06c3fb27SDimitry Andric bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills( 334bdd1243dSDimitry Andric MachineFunction &MF, int FI, unsigned LaneIndex) { 335bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 336bdd1243dSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 337bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 338bdd1243dSDimitry Andric Register LaneVGPR; 339bdd1243dSDimitry Andric if (!LaneIndex) { 340bdd1243dSDimitry Andric LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 341bdd1243dSDimitry Andric if (LaneVGPR == AMDGPU::NoRegister) { 342bdd1243dSDimitry Andric // We have no VGPRs left for spilling SGPRs. Reset because we will not 343bdd1243dSDimitry Andric // partially spill the SGPR to VGPRs. 344*06c3fb27SDimitry Andric SGPRSpillsToPhysicalVGPRLanes.erase(FI); 345bdd1243dSDimitry Andric return false; 346bdd1243dSDimitry Andric } 347bdd1243dSDimitry Andric 348bdd1243dSDimitry Andric allocateWWMSpill(MF, LaneVGPR); 349*06c3fb27SDimitry Andric reserveWWMRegister(LaneVGPR); 350*06c3fb27SDimitry Andric for (MachineBasicBlock &MBB : MF) { 351*06c3fb27SDimitry Andric MBB.addLiveIn(LaneVGPR); 352*06c3fb27SDimitry Andric MBB.sortUniqueLiveIns(); 353*06c3fb27SDimitry Andric } 354bdd1243dSDimitry Andric } else { 355*06c3fb27SDimitry Andric LaneVGPR = WWMReservedRegs.back(); 356bdd1243dSDimitry Andric } 357bdd1243dSDimitry Andric 358*06c3fb27SDimitry Andric SGPRSpillsToPhysicalVGPRLanes[FI].push_back( 359bdd1243dSDimitry Andric SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); 360bdd1243dSDimitry Andric return true; 361bdd1243dSDimitry Andric } 362bdd1243dSDimitry Andric 363bdd1243dSDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF, 364bdd1243dSDimitry Andric int FI, 365bdd1243dSDimitry Andric bool IsPrologEpilog) { 366bdd1243dSDimitry Andric std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = 367*06c3fb27SDimitry Andric IsPrologEpilog ? SGPRSpillsToPhysicalVGPRLanes[FI] 368*06c3fb27SDimitry Andric : SGPRSpillsToVirtualVGPRLanes[FI]; 3690b57cec5SDimitry Andric 3700b57cec5SDimitry Andric // This has already been allocated. 3710b57cec5SDimitry Andric if (!SpillLanes.empty()) 3720b57cec5SDimitry Andric return true; 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3750b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 3760b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 3795ffd83dbSDimitry Andric unsigned NumLanes = Size / 4; 3800b57cec5SDimitry Andric 3815ffd83dbSDimitry Andric if (NumLanes > WaveSize) 3825ffd83dbSDimitry Andric return false; 3835ffd83dbSDimitry Andric 3845ffd83dbSDimitry Andric assert(Size >= 4 && "invalid sgpr spill size"); 385bdd1243dSDimitry Andric assert(ST.getRegisterInfo()->spillSGPRToVGPR() && 386bdd1243dSDimitry Andric "not spilling SGPRs to VGPRs"); 3870b57cec5SDimitry Andric 388bdd1243dSDimitry Andric unsigned &NumSpillLanes = 389*06c3fb27SDimitry Andric IsPrologEpilog ? NumPhysicalVGPRSpillLanes : NumVirtualVGPRSpillLanes; 3900b57cec5SDimitry Andric 391bdd1243dSDimitry Andric for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) { 392bdd1243dSDimitry Andric unsigned LaneIndex = (NumSpillLanes % WaveSize); 393fe6060f1SDimitry Andric 394*06c3fb27SDimitry Andric bool Allocated = IsPrologEpilog 395*06c3fb27SDimitry Andric ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex) 396*06c3fb27SDimitry Andric : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex); 397bdd1243dSDimitry Andric if (!Allocated) { 398bdd1243dSDimitry Andric NumSpillLanes -= I; 3990b57cec5SDimitry Andric return false; 4000b57cec5SDimitry Andric } 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric return true; 4040b57cec5SDimitry Andric } 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. 4070b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa. 4080b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely. 4090b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, 4100b57cec5SDimitry Andric int FI, 4110b57cec5SDimitry Andric bool isAGPRtoVGPR) { 4120b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 4130b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 4140b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI)); 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric auto &Spill = VGPRToAGPRSpills[FI]; 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric // This has already been allocated. 4210b57cec5SDimitry Andric if (!Spill.Lanes.empty()) 4220b57cec5SDimitry Andric return Spill.FullyAllocated; 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 4250b57cec5SDimitry Andric unsigned NumLanes = Size / 4; 4260b57cec5SDimitry Andric Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister); 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric const TargetRegisterClass &RC = 4290b57cec5SDimitry Andric isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass; 4300b57cec5SDimitry Andric auto Regs = RC.getRegisters(); 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR; 4330b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 4340b57cec5SDimitry Andric Spill.FullyAllocated = true; 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric // FIXME: Move allocation logic out of MachineFunctionInfo and initialize 4370b57cec5SDimitry Andric // once. 4380b57cec5SDimitry Andric BitVector OtherUsedRegs; 4390b57cec5SDimitry Andric OtherUsedRegs.resize(TRI->getNumRegs()); 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric const uint32_t *CSRMask = 4420b57cec5SDimitry Andric TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv()); 4430b57cec5SDimitry Andric if (CSRMask) 4440b57cec5SDimitry Andric OtherUsedRegs.setBitsInMask(CSRMask); 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric // TODO: Should include register tuples, but doesn't matter with current 4470b57cec5SDimitry Andric // usage. 4480b57cec5SDimitry Andric for (MCPhysReg Reg : SpillAGPR) 4490b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 4500b57cec5SDimitry Andric for (MCPhysReg Reg : SpillVGPR) 4510b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin(); 454349cc55cSDimitry Andric for (int I = NumLanes - 1; I >= 0; --I) { 4550b57cec5SDimitry Andric NextSpillReg = std::find_if( 4560b57cec5SDimitry Andric NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) { 4570b57cec5SDimitry Andric return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) && 4580b57cec5SDimitry Andric !OtherUsedRegs[Reg]; 4590b57cec5SDimitry Andric }); 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric if (NextSpillReg == Regs.end()) { // Registers exhausted 4620b57cec5SDimitry Andric Spill.FullyAllocated = false; 4630b57cec5SDimitry Andric break; 4640b57cec5SDimitry Andric } 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric OtherUsedRegs.set(*NextSpillReg); 4670b57cec5SDimitry Andric SpillRegs.push_back(*NextSpillReg); 468bdd1243dSDimitry Andric MRI.reserveReg(*NextSpillReg, TRI); 4690b57cec5SDimitry Andric Spill.Lanes[I] = *NextSpillReg++; 4700b57cec5SDimitry Andric } 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric return Spill.FullyAllocated; 4730b57cec5SDimitry Andric } 4740b57cec5SDimitry Andric 47581ad6265SDimitry Andric bool SIMachineFunctionInfo::removeDeadFrameIndices( 47681ad6265SDimitry Andric MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) { 477*06c3fb27SDimitry Andric // Remove dead frame indices from function frame, however keep FP & BP since 478*06c3fb27SDimitry Andric // spills for them haven't been inserted yet. And also make sure to remove the 479*06c3fb27SDimitry Andric // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure, 480*06c3fb27SDimitry Andric // otherwise, it could result in an unexpected side effect and bug, in case of 481*06c3fb27SDimitry Andric // any re-mapping of freed frame indices by later pass(es) like "stack slot 482bdd1243dSDimitry Andric // coloring". 483*06c3fb27SDimitry Andric for (auto &R : make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) { 4840b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 485*06c3fb27SDimitry Andric SGPRSpillsToVirtualVGPRLanes.erase(R.first); 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric 488*06c3fb27SDimitry Andric // Remove the dead frame indices of CSR SGPRs which are spilled to physical 489*06c3fb27SDimitry Andric // VGPR lanes during SILowerSGPRSpills pass. 490*06c3fb27SDimitry Andric if (!ResetSGPRSpillStackIDs) { 491*06c3fb27SDimitry Andric for (auto &R : make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) { 492*06c3fb27SDimitry Andric MFI.RemoveStackObject(R.first); 493*06c3fb27SDimitry Andric SGPRSpillsToPhysicalVGPRLanes.erase(R.first); 494*06c3fb27SDimitry Andric } 495*06c3fb27SDimitry Andric } 49681ad6265SDimitry Andric bool HaveSGPRToMemory = false; 49781ad6265SDimitry Andric 49881ad6265SDimitry Andric if (ResetSGPRSpillStackIDs) { 499bdd1243dSDimitry Andric // All other SGPRs must be allocated on the default stack, so reset the 50081ad6265SDimitry Andric // stack ID. 501bdd1243dSDimitry Andric for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; 502bdd1243dSDimitry Andric ++I) { 503bdd1243dSDimitry Andric if (!checkIndexInPrologEpilogSGPRSpills(I)) { 504bdd1243dSDimitry Andric if (MFI.getStackID(I) == TargetStackID::SGPRSpill) { 505bdd1243dSDimitry Andric MFI.setStackID(I, TargetStackID::Default); 50681ad6265SDimitry Andric HaveSGPRToMemory = true; 50781ad6265SDimitry Andric } 50881ad6265SDimitry Andric } 50981ad6265SDimitry Andric } 51081ad6265SDimitry Andric } 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric for (auto &R : VGPRToAGPRSpills) { 5130eae32dcSDimitry Andric if (R.second.IsDead) 5140b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 5150b57cec5SDimitry Andric } 51681ad6265SDimitry Andric 51781ad6265SDimitry Andric return HaveSGPRToMemory; 51881ad6265SDimitry Andric } 51981ad6265SDimitry Andric 520fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, 521fe6060f1SDimitry Andric const SIRegisterInfo &TRI) { 522fe6060f1SDimitry Andric if (ScavengeFI) 523fe6060f1SDimitry Andric return *ScavengeFI; 524fe6060f1SDimitry Andric if (isEntryFunction()) { 525fe6060f1SDimitry Andric ScavengeFI = MFI.CreateFixedObject( 526fe6060f1SDimitry Andric TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); 527fe6060f1SDimitry Andric } else { 528fe6060f1SDimitry Andric ScavengeFI = MFI.CreateStackObject( 529fe6060f1SDimitry Andric TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 530fe6060f1SDimitry Andric TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false); 531fe6060f1SDimitry Andric } 532fe6060f1SDimitry Andric return *ScavengeFI; 533fe6060f1SDimitry Andric } 534fe6060f1SDimitry Andric 5350b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { 5360b57cec5SDimitry Andric assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 5370b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs; 5380b57cec5SDimitry Andric } 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { 5410b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 5420b57cec5SDimitry Andric } 5430b57cec5SDimitry Andric 544*06c3fb27SDimitry Andric void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) { 545*06c3fb27SDimitry Andric VRegFlags.grow(Reg); 546*06c3fb27SDimitry Andric } 547*06c3fb27SDimitry Andric 548*06c3fb27SDimitry Andric void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg, 549*06c3fb27SDimitry Andric Register SrcReg) { 550*06c3fb27SDimitry Andric VRegFlags.grow(NewReg); 551*06c3fb27SDimitry Andric VRegFlags[NewReg] = VRegFlags[SrcReg]; 552*06c3fb27SDimitry Andric } 553*06c3fb27SDimitry Andric 5545ffd83dbSDimitry Andric Register 5555ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const { 5565ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 5575ffd83dbSDimitry Andric if (!ST.isAmdPalOS()) 5585ffd83dbSDimitry Andric return Register(); 5595ffd83dbSDimitry Andric Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in 5605ffd83dbSDimitry Andric if (ST.hasMergedShaders()) { 5615ffd83dbSDimitry Andric switch (MF.getFunction().getCallingConv()) { 5625ffd83dbSDimitry Andric case CallingConv::AMDGPU_HS: 5635ffd83dbSDimitry Andric case CallingConv::AMDGPU_GS: 5645ffd83dbSDimitry Andric // Low GIT address is passed in s8 rather than s0 for an LS+HS or 5655ffd83dbSDimitry Andric // ES+GS merged shader on gfx9+. 5665ffd83dbSDimitry Andric GitPtrLo = AMDGPU::SGPR8; 5675ffd83dbSDimitry Andric return GitPtrLo; 5685ffd83dbSDimitry Andric default: 5695ffd83dbSDimitry Andric return GitPtrLo; 5705ffd83dbSDimitry Andric } 5715ffd83dbSDimitry Andric } 5725ffd83dbSDimitry Andric return GitPtrLo; 5735ffd83dbSDimitry Andric } 5745ffd83dbSDimitry Andric 5755ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg, 5760b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 5770b57cec5SDimitry Andric yaml::StringValue Dest; 5780b57cec5SDimitry Andric { 5790b57cec5SDimitry Andric raw_string_ostream OS(Dest.Value); 5800b57cec5SDimitry Andric OS << printReg(Reg, &TRI); 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric return Dest; 5830b57cec5SDimitry Andric } 5840b57cec5SDimitry Andric 585bdd1243dSDimitry Andric static std::optional<yaml::SIArgumentInfo> 5860b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, 5870b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 5880b57cec5SDimitry Andric yaml::SIArgumentInfo AI; 5890b57cec5SDimitry Andric 590bdd1243dSDimitry Andric auto convertArg = [&](std::optional<yaml::SIArgument> &A, 5910b57cec5SDimitry Andric const ArgDescriptor &Arg) { 5920b57cec5SDimitry Andric if (!Arg) 5930b57cec5SDimitry Andric return false; 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric // Create a register or stack argument. 5960b57cec5SDimitry Andric yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister()); 5970b57cec5SDimitry Andric if (Arg.isRegister()) { 5980b57cec5SDimitry Andric raw_string_ostream OS(SA.RegisterName.Value); 5990b57cec5SDimitry Andric OS << printReg(Arg.getRegister(), &TRI); 6000b57cec5SDimitry Andric } else 6010b57cec5SDimitry Andric SA.StackOffset = Arg.getStackOffset(); 6020b57cec5SDimitry Andric // Check and update the optional mask. 6030b57cec5SDimitry Andric if (Arg.isMasked()) 6040b57cec5SDimitry Andric SA.Mask = Arg.getMask(); 6050b57cec5SDimitry Andric 6060b57cec5SDimitry Andric A = SA; 6070b57cec5SDimitry Andric return true; 6080b57cec5SDimitry Andric }; 6090b57cec5SDimitry Andric 6100b57cec5SDimitry Andric bool Any = false; 6110b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer); 6120b57cec5SDimitry Andric Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr); 6130b57cec5SDimitry Andric Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr); 6140b57cec5SDimitry Andric Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); 6150b57cec5SDimitry Andric Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); 6160b57cec5SDimitry Andric Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); 617fcaf7f86SDimitry Andric Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId); 6180b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); 6190b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); 6200b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); 6210b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ); 6220b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo); 6230b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentWaveByteOffset, 6240b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset); 6250b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr); 6260b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr); 6270b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX); 6280b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY); 6290b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ); 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric if (Any) 6320b57cec5SDimitry Andric return AI; 6330b57cec5SDimitry Andric 634bdd1243dSDimitry Andric return std::nullopt; 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( 638fe6060f1SDimitry Andric const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI, 639fe6060f1SDimitry Andric const llvm::MachineFunction &MF) 6400b57cec5SDimitry Andric : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), 641e8d8bef9SDimitry Andric MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()), 64281ad6265SDimitry Andric GDSSize(MFI.getGDSSize()), 643e8d8bef9SDimitry Andric DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()), 6440b57cec5SDimitry Andric NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), 645e8d8bef9SDimitry Andric MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), 646e8d8bef9SDimitry Andric HasSpilledSGPRs(MFI.hasSpilledSGPRs()), 647e8d8bef9SDimitry Andric HasSpilledVGPRs(MFI.hasSpilledVGPRs()), 6488bcb0991SDimitry Andric HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), 649e8d8bef9SDimitry Andric Occupancy(MFI.getOccupancy()), 6500b57cec5SDimitry Andric ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), 6510b57cec5SDimitry Andric FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), 6520b57cec5SDimitry Andric StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), 65381ad6265SDimitry Andric BytesInStackArgArea(MFI.getBytesInStackArgArea()), 65481ad6265SDimitry Andric ReturnsVoid(MFI.returnsVoid()), 655*06c3fb27SDimitry Andric ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), 656*06c3fb27SDimitry Andric PSInputAddr(MFI.getPSInputAddr()), 657*06c3fb27SDimitry Andric PSInputEnable(MFI.getPSInputEnable()), 658*06c3fb27SDimitry Andric Mode(MFI.getMode()) { 659bdd1243dSDimitry Andric for (Register Reg : MFI.getWWMReservedRegs()) 66081ad6265SDimitry Andric WWMReservedRegs.push_back(regToString(Reg, TRI)); 66181ad6265SDimitry Andric 662*06c3fb27SDimitry Andric if (MFI.getLongBranchReservedReg()) 663*06c3fb27SDimitry Andric LongBranchReservedReg = regToString(MFI.getLongBranchReservedReg(), TRI); 66481ad6265SDimitry Andric if (MFI.getVGPRForAGPRCopy()) 66581ad6265SDimitry Andric VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI); 666*06c3fb27SDimitry Andric 667*06c3fb27SDimitry Andric if (MFI.getSGPRForEXECCopy()) 668*06c3fb27SDimitry Andric SGPRForEXECCopy = regToString(MFI.getSGPRForEXECCopy(), TRI); 669*06c3fb27SDimitry Andric 670fe6060f1SDimitry Andric auto SFI = MFI.getOptionalScavengeFI(); 671fe6060f1SDimitry Andric if (SFI) 672fe6060f1SDimitry Andric ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo()); 673e8d8bef9SDimitry Andric } 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { 6760b57cec5SDimitry Andric MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this); 6770b57cec5SDimitry Andric } 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields( 680fe6060f1SDimitry Andric const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, 681fe6060f1SDimitry Andric PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) { 6820b57cec5SDimitry Andric ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; 68381ad6265SDimitry Andric MaxKernArgAlign = YamlMFI.MaxKernArgAlign; 6840b57cec5SDimitry Andric LDSSize = YamlMFI.LDSSize; 68581ad6265SDimitry Andric GDSSize = YamlMFI.GDSSize; 686e8d8bef9SDimitry Andric DynLDSAlign = YamlMFI.DynLDSAlign; 687*06c3fb27SDimitry Andric PSInputAddr = YamlMFI.PSInputAddr; 688*06c3fb27SDimitry Andric PSInputEnable = YamlMFI.PSInputEnable; 6898bcb0991SDimitry Andric HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; 690e8d8bef9SDimitry Andric Occupancy = YamlMFI.Occupancy; 6910b57cec5SDimitry Andric IsEntryFunction = YamlMFI.IsEntryFunction; 6920b57cec5SDimitry Andric NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; 6930b57cec5SDimitry Andric MemoryBound = YamlMFI.MemoryBound; 6940b57cec5SDimitry Andric WaveLimiter = YamlMFI.WaveLimiter; 695e8d8bef9SDimitry Andric HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs; 696e8d8bef9SDimitry Andric HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs; 69781ad6265SDimitry Andric BytesInStackArgArea = YamlMFI.BytesInStackArgArea; 69881ad6265SDimitry Andric ReturnsVoid = YamlMFI.ReturnsVoid; 699fe6060f1SDimitry Andric 700fe6060f1SDimitry Andric if (YamlMFI.ScavengeFI) { 701fe6060f1SDimitry Andric auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo()); 702fe6060f1SDimitry Andric if (!FIOrErr) { 703fe6060f1SDimitry Andric // Create a diagnostic for a the frame index. 704fe6060f1SDimitry Andric const MemoryBuffer &Buffer = 705fe6060f1SDimitry Andric *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID()); 706fe6060f1SDimitry Andric 707fe6060f1SDimitry Andric Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1, 708fe6060f1SDimitry Andric SourceMgr::DK_Error, toString(FIOrErr.takeError()), 709bdd1243dSDimitry Andric "", std::nullopt, std::nullopt); 710fe6060f1SDimitry Andric SourceRange = YamlMFI.ScavengeFI->SourceRange; 711fe6060f1SDimitry Andric return true; 712fe6060f1SDimitry Andric } 713fe6060f1SDimitry Andric ScavengeFI = *FIOrErr; 714fe6060f1SDimitry Andric } else { 715bdd1243dSDimitry Andric ScavengeFI = std::nullopt; 716fe6060f1SDimitry Andric } 7170b57cec5SDimitry Andric return false; 7180b57cec5SDimitry Andric } 7195ffd83dbSDimitry Andric 720bdd1243dSDimitry Andric bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const { 721bdd1243dSDimitry Andric for (const BasicBlock &BB : F) { 72281ad6265SDimitry Andric for (const Instruction &I : BB) { 72381ad6265SDimitry Andric const auto *CB = dyn_cast<CallBase>(&I); 72481ad6265SDimitry Andric if (!CB) 72581ad6265SDimitry Andric continue; 72681ad6265SDimitry Andric 72781ad6265SDimitry Andric if (CB->isInlineAsm()) { 72881ad6265SDimitry Andric const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand()); 72981ad6265SDimitry Andric for (const auto &CI : IA->ParseConstraints()) { 73081ad6265SDimitry Andric for (StringRef Code : CI.Codes) { 73181ad6265SDimitry Andric Code.consume_front("{"); 73281ad6265SDimitry Andric if (Code.startswith("a")) 73381ad6265SDimitry Andric return true; 73481ad6265SDimitry Andric } 73581ad6265SDimitry Andric } 73681ad6265SDimitry Andric continue; 73781ad6265SDimitry Andric } 73881ad6265SDimitry Andric 73981ad6265SDimitry Andric const Function *Callee = 74081ad6265SDimitry Andric dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); 74181ad6265SDimitry Andric if (!Callee) 74281ad6265SDimitry Andric return true; 74381ad6265SDimitry Andric 74481ad6265SDimitry Andric if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic) 74581ad6265SDimitry Andric return true; 74681ad6265SDimitry Andric } 74781ad6265SDimitry Andric } 74881ad6265SDimitry Andric 74981ad6265SDimitry Andric return false; 75081ad6265SDimitry Andric } 75181ad6265SDimitry Andric 752349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const { 753349cc55cSDimitry Andric if (UsesAGPRs) 754349cc55cSDimitry Andric return *UsesAGPRs; 755349cc55cSDimitry Andric 75681ad6265SDimitry Andric if (!mayNeedAGPRs()) { 75781ad6265SDimitry Andric UsesAGPRs = false; 75881ad6265SDimitry Andric return false; 75981ad6265SDimitry Andric } 76081ad6265SDimitry Andric 761349cc55cSDimitry Andric if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) || 762349cc55cSDimitry Andric MF.getFrameInfo().hasCalls()) { 763349cc55cSDimitry Andric UsesAGPRs = true; 764349cc55cSDimitry Andric return true; 765349cc55cSDimitry Andric } 766349cc55cSDimitry Andric 767349cc55cSDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 768349cc55cSDimitry Andric 769349cc55cSDimitry Andric for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { 770349cc55cSDimitry Andric const Register Reg = Register::index2VirtReg(I); 771349cc55cSDimitry Andric const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg); 772349cc55cSDimitry Andric if (RC && SIRegisterInfo::isAGPRClass(RC)) { 773349cc55cSDimitry Andric UsesAGPRs = true; 774349cc55cSDimitry Andric return true; 775349cc55cSDimitry Andric } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) { 776349cc55cSDimitry Andric // Defer caching UsesAGPRs, function might not yet been regbank selected. 777349cc55cSDimitry Andric return true; 778349cc55cSDimitry Andric } 779349cc55cSDimitry Andric } 780349cc55cSDimitry Andric 781349cc55cSDimitry Andric for (MCRegister Reg : AMDGPU::AGPR_32RegClass) { 782349cc55cSDimitry Andric if (MRI.isPhysRegUsed(Reg)) { 783349cc55cSDimitry Andric UsesAGPRs = true; 784349cc55cSDimitry Andric return true; 785349cc55cSDimitry Andric } 786349cc55cSDimitry Andric } 787349cc55cSDimitry Andric 788349cc55cSDimitry Andric UsesAGPRs = false; 789349cc55cSDimitry Andric return false; 790349cc55cSDimitry Andric } 791