10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 10fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h" 115f757f3fSDimitry Andric #include "AMDGPUTargetMachine.h" 125f757f3fSDimitry Andric #include "GCNSubtarget.h" 13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 145f757f3fSDimitry Andric #include "SIRegisterInfo.h" 15fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 16fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 175f757f3fSDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h" 18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 20fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 21fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 22fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h" 23fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 24fe6060f1SDimitry Andric #include "llvm/IR/Function.h" 25fe6060f1SDimitry Andric #include <cassert> 26bdd1243dSDimitry Andric #include <optional> 27fe6060f1SDimitry Andric #include <vector> 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric #define MAX_LANES 64 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric using namespace llvm; 320b57cec5SDimitry Andric 33bdd1243dSDimitry Andric const GCNTargetMachine &getTM(const GCNSubtarget *STI) { 34bdd1243dSDimitry Andric const SITargetLowering *TLI = STI->getTargetLowering(); 35bdd1243dSDimitry Andric return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine()); 36bdd1243dSDimitry Andric } 37bdd1243dSDimitry Andric 38bdd1243dSDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, 39bdd1243dSDimitry Andric const GCNSubtarget *STI) 405f757f3fSDimitry Andric : AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)), 415f757f3fSDimitry Andric UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false), 425f757f3fSDimitry Andric WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false), 435f757f3fSDimitry Andric PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), 445f757f3fSDimitry Andric WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false), 455f757f3fSDimitry Andric GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) { 46bdd1243dSDimitry Andric const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI); 470b57cec5SDimitry Andric FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); 480b57cec5SDimitry Andric WavesPerEU = ST.getWavesPerEU(F); 490b57cec5SDimitry Andric 505ffd83dbSDimitry Andric Occupancy = ST.computeOccupancy(F, getLDSSize()); 510b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 520b57cec5SDimitry Andric 5306c3fb27SDimitry Andric VRegFlags.reserve(1024); 5406c3fb27SDimitry Andric 55349cc55cSDimitry Andric const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL || 56349cc55cSDimitry Andric CC == CallingConv::SPIR_KERNEL; 575ffd83dbSDimitry Andric 58349cc55cSDimitry Andric if (IsKernel) { 590b57cec5SDimitry Andric WorkGroupIDX = true; 600b57cec5SDimitry Andric WorkItemIDX = true; 610b57cec5SDimitry Andric } else if (CC == CallingConv::AMDGPU_PS) { 620b57cec5SDimitry Andric PSInputAddr = AMDGPU::getInitialPSInputAddr(F); 630b57cec5SDimitry Andric } 640b57cec5SDimitry Andric 6581ad6265SDimitry Andric MayNeedAGPRs = ST.hasMAIInsts(); 6681ad6265SDimitry Andric 675f757f3fSDimitry Andric if (AMDGPU::isChainCC(CC)) { 685f757f3fSDimitry Andric // Chain functions don't receive an SP from their caller, but are free to 695f757f3fSDimitry Andric // set one up. For now, we can use s32 to match what amdgpu_gfx functions 705f757f3fSDimitry Andric // would use if called, but this can be revisited. 715f757f3fSDimitry Andric // FIXME: Only reserve this if we actually need it. 725f757f3fSDimitry Andric StackPtrOffsetReg = AMDGPU::SGPR32; 735f757f3fSDimitry Andric 745f757f3fSDimitry Andric ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51; 755f757f3fSDimitry Andric 765f757f3fSDimitry Andric ArgInfo.PrivateSegmentBuffer = 775f757f3fSDimitry Andric ArgDescriptor::createRegister(ScratchRSrcReg); 785f757f3fSDimitry Andric 795f757f3fSDimitry Andric ImplicitArgPtr = false; 805f757f3fSDimitry Andric } else if (!isEntryFunction()) { 810eae32dcSDimitry Andric if (CC != CallingConv::AMDGPU_Gfx) 82fe6060f1SDimitry Andric ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; 83fe6060f1SDimitry Andric 840b57cec5SDimitry Andric // TODO: Pick a high register, and shift down, similar to a kernel. 855ffd83dbSDimitry Andric FrameOffsetReg = AMDGPU::SGPR33; 860b57cec5SDimitry Andric StackPtrOffsetReg = AMDGPU::SGPR32; 870b57cec5SDimitry Andric 88e8d8bef9SDimitry Andric if (!ST.enableFlatScratch()) { 89e8d8bef9SDimitry Andric // Non-entry functions have no special inputs for now, other registers 90e8d8bef9SDimitry Andric // required for scratch access. 91e8d8bef9SDimitry Andric ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 92e8d8bef9SDimitry Andric 930b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 940b57cec5SDimitry Andric ArgDescriptor::createRegister(ScratchRSrcReg); 95e8d8bef9SDimitry Andric } 960b57cec5SDimitry Andric 97349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr")) 980b57cec5SDimitry Andric ImplicitArgPtr = true; 990b57cec5SDimitry Andric } else { 100349cc55cSDimitry Andric ImplicitArgPtr = false; 1010b57cec5SDimitry Andric MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), 1020b57cec5SDimitry Andric MaxKernArgAlign); 10381ad6265SDimitry Andric 10481ad6265SDimitry Andric if (ST.hasGFX90AInsts() && 10581ad6265SDimitry Andric ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && 106bdd1243dSDimitry Andric !mayUseAGPRs(F)) 10781ad6265SDimitry Andric MayNeedAGPRs = false; // We will select all MAI with VGPR operands. 1080b57cec5SDimitry Andric } 109349cc55cSDimitry Andric 11006c3fb27SDimitry Andric if (!AMDGPU::isGraphics(CC) || 11106c3fb27SDimitry Andric (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) { 112349cc55cSDimitry Andric if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x")) 1130b57cec5SDimitry Andric WorkGroupIDX = true; 1140b57cec5SDimitry Andric 115349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y")) 1160b57cec5SDimitry Andric WorkGroupIDY = true; 1170b57cec5SDimitry Andric 118349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z")) 1190b57cec5SDimitry Andric WorkGroupIDZ = true; 12006c3fb27SDimitry Andric } 1210b57cec5SDimitry Andric 12206c3fb27SDimitry Andric if (!AMDGPU::isGraphics(CC)) { 123349cc55cSDimitry Andric if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x")) 1240b57cec5SDimitry Andric WorkItemIDX = true; 1250b57cec5SDimitry Andric 12604eeddc0SDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") && 12704eeddc0SDimitry Andric ST.getMaxWorkitemID(F, 1) != 0) 1280b57cec5SDimitry Andric WorkItemIDY = true; 1290b57cec5SDimitry Andric 13004eeddc0SDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") && 13104eeddc0SDimitry Andric ST.getMaxWorkitemID(F, 2) != 0) 1320b57cec5SDimitry Andric WorkItemIDZ = true; 133349cc55cSDimitry Andric 134fcaf7f86SDimitry Andric if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id")) 135fcaf7f86SDimitry Andric LDSKernelId = true; 1365ffd83dbSDimitry Andric } 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric if (isEntryFunction()) { 1390b57cec5SDimitry Andric // X, XY, and XYZ are the only supported combinations, so make sure Y is 1400b57cec5SDimitry Andric // enabled if Z is. 1410b57cec5SDimitry Andric if (WorkItemIDZ) 1420b57cec5SDimitry Andric WorkItemIDY = true; 1430b57cec5SDimitry Andric 144fe6060f1SDimitry Andric if (!ST.flatScratchIsArchitected()) { 1450b57cec5SDimitry Andric PrivateSegmentWaveByteOffset = true; 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 1480b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 1490b57cec5SDimitry Andric (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 1500b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset = 1510b57cec5SDimitry Andric ArgDescriptor::createRegister(AMDGPU::SGPR5); 1520b57cec5SDimitry Andric } 153fe6060f1SDimitry Andric } 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); 1560b57cec5SDimitry Andric StringRef S = A.getValueAsString(); 1570b57cec5SDimitry Andric if (!S.empty()) 1580b57cec5SDimitry Andric S.consumeInteger(0, GITPtrHigh); 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric A = F.getFnAttribute("amdgpu-32bit-address-high-bits"); 1610b57cec5SDimitry Andric S = A.getValueAsString(); 1620b57cec5SDimitry Andric if (!S.empty()) 1630b57cec5SDimitry Andric S.consumeInteger(0, HighBitsOf32BitAddress); 1640b57cec5SDimitry Andric 16581ad6265SDimitry Andric // On GFX908, in order to guarantee copying between AGPRs, we need a scratch 16681ad6265SDimitry Andric // VGPR available at all times. For now, reserve highest available VGPR. After 16781ad6265SDimitry Andric // RA, shift it to the lowest available unused VGPR if the one exist. 16881ad6265SDimitry Andric if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { 16981ad6265SDimitry Andric VGPRForAGPRCopy = 17081ad6265SDimitry Andric AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1); 17181ad6265SDimitry Andric } 17281ad6265SDimitry Andric } 17381ad6265SDimitry Andric 17481ad6265SDimitry Andric MachineFunctionInfo *SIMachineFunctionInfo::clone( 17581ad6265SDimitry Andric BumpPtrAllocator &Allocator, MachineFunction &DestMF, 17681ad6265SDimitry Andric const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) 17781ad6265SDimitry Andric const { 17881ad6265SDimitry Andric return DestMF.cloneInfo<SIMachineFunctionInfo>(*this); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { 1820b57cec5SDimitry Andric limitOccupancy(getMaxWavesPerEU()); 1830b57cec5SDimitry Andric const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); 1840b57cec5SDimitry Andric limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), 1850b57cec5SDimitry Andric MF.getFunction())); 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric 1885ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer( 1890b57cec5SDimitry Andric const SIRegisterInfo &TRI) { 1900b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer = 1910b57cec5SDimitry Andric ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 1928bcb0991SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass)); 1930b57cec5SDimitry Andric NumUserSGPRs += 4; 1940b57cec5SDimitry Andric return ArgInfo.PrivateSegmentBuffer.getRegister(); 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric 1975ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 1980b57cec5SDimitry Andric ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 1990b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2000b57cec5SDimitry Andric NumUserSGPRs += 2; 2010b57cec5SDimitry Andric return ArgInfo.DispatchPtr.getRegister(); 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric 2045ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 2050b57cec5SDimitry Andric ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2060b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2070b57cec5SDimitry Andric NumUserSGPRs += 2; 2080b57cec5SDimitry Andric return ArgInfo.QueuePtr.getRegister(); 2090b57cec5SDimitry Andric } 2100b57cec5SDimitry Andric 2115ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 2120b57cec5SDimitry Andric ArgInfo.KernargSegmentPtr 2130b57cec5SDimitry Andric = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2140b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2150b57cec5SDimitry Andric NumUserSGPRs += 2; 2160b57cec5SDimitry Andric return ArgInfo.KernargSegmentPtr.getRegister(); 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric 2195ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 2200b57cec5SDimitry Andric ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2210b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2220b57cec5SDimitry Andric NumUserSGPRs += 2; 2230b57cec5SDimitry Andric return ArgInfo.DispatchID.getRegister(); 2240b57cec5SDimitry Andric } 2250b57cec5SDimitry Andric 2265ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 2270b57cec5SDimitry Andric ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2280b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2290b57cec5SDimitry Andric NumUserSGPRs += 2; 2300b57cec5SDimitry Andric return ArgInfo.FlatScratchInit.getRegister(); 2310b57cec5SDimitry Andric } 2320b57cec5SDimitry Andric 2335ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 2340b57cec5SDimitry Andric ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 2350b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 2360b57cec5SDimitry Andric NumUserSGPRs += 2; 2370b57cec5SDimitry Andric return ArgInfo.ImplicitBufferPtr.getRegister(); 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric 240fcaf7f86SDimitry Andric Register SIMachineFunctionInfo::addLDSKernelId() { 241fcaf7f86SDimitry Andric ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR()); 242fcaf7f86SDimitry Andric NumUserSGPRs += 1; 243fcaf7f86SDimitry Andric return ArgInfo.LDSKernelId.getRegister(); 244fcaf7f86SDimitry Andric } 245fcaf7f86SDimitry Andric 2465f757f3fSDimitry Andric SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg( 2475f757f3fSDimitry Andric const SIRegisterInfo &TRI, const TargetRegisterClass *RC, 2485f757f3fSDimitry Andric unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) { 2495f757f3fSDimitry Andric assert(!ArgInfo.PreloadKernArgs.count(KernArgIdx) && 2505f757f3fSDimitry Andric "Preload kernel argument allocated twice."); 2515f757f3fSDimitry Andric NumUserSGPRs += PaddingSGPRs; 2525f757f3fSDimitry Andric // If the available register tuples are aligned with the kernarg to be 2535f757f3fSDimitry Andric // preloaded use that register, otherwise we need to use a set of SGPRs and 2545f757f3fSDimitry Andric // merge them. 2555f757f3fSDimitry Andric Register PreloadReg = 2565f757f3fSDimitry Andric TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC); 2575f757f3fSDimitry Andric if (PreloadReg && 2585f757f3fSDimitry Andric (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) { 2595f757f3fSDimitry Andric ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(PreloadReg); 2605f757f3fSDimitry Andric NumUserSGPRs += AllocSizeDWord; 2615f757f3fSDimitry Andric } else { 2625f757f3fSDimitry Andric for (unsigned I = 0; I < AllocSizeDWord; ++I) { 2635f757f3fSDimitry Andric ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(getNextUserSGPR()); 2645f757f3fSDimitry Andric NumUserSGPRs++; 2655f757f3fSDimitry Andric } 2665f757f3fSDimitry Andric } 2675f757f3fSDimitry Andric 2685f757f3fSDimitry Andric // Track the actual number of SGPRs that HW will preload to. 2695f757f3fSDimitry Andric UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs); 2705f757f3fSDimitry Andric return &ArgInfo.PreloadKernArgs[KernArgIdx].Regs; 2715f757f3fSDimitry Andric } 2725f757f3fSDimitry Andric 273bdd1243dSDimitry Andric void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR, 274bdd1243dSDimitry Andric uint64_t Size, Align Alignment) { 275bdd1243dSDimitry Andric // Skip if it is an entry function or the register is already added. 276bdd1243dSDimitry Andric if (isEntryFunction() || WWMSpills.count(VGPR)) 277bdd1243dSDimitry Andric return; 278bdd1243dSDimitry Andric 2795f757f3fSDimitry Andric // Skip if this is a function with the amdgpu_cs_chain or 2805f757f3fSDimitry Andric // amdgpu_cs_chain_preserve calling convention and this is a scratch register. 2815f757f3fSDimitry Andric // We never need to allocate a spill for these because we don't even need to 2825f757f3fSDimitry Andric // restore the inactive lanes for them (they're scratchier than the usual 2835f757f3fSDimitry Andric // scratch registers). 2845f757f3fSDimitry Andric if (isChainFunction() && SIRegisterInfo::isChainScratchRegister(VGPR)) 2855f757f3fSDimitry Andric return; 2865f757f3fSDimitry Andric 287bdd1243dSDimitry Andric WWMSpills.insert(std::make_pair( 288bdd1243dSDimitry Andric VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment))); 289bdd1243dSDimitry Andric } 290bdd1243dSDimitry Andric 291bdd1243dSDimitry Andric // Separate out the callee-saved and scratch registers. 292bdd1243dSDimitry Andric void SIMachineFunctionInfo::splitWWMSpillRegisters( 293bdd1243dSDimitry Andric MachineFunction &MF, 294bdd1243dSDimitry Andric SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs, 295bdd1243dSDimitry Andric SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const { 296bdd1243dSDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 297bdd1243dSDimitry Andric for (auto &Reg : WWMSpills) { 298bdd1243dSDimitry Andric if (isCalleeSavedReg(CSRegs, Reg.first)) 299bdd1243dSDimitry Andric CalleeSavedRegs.push_back(Reg); 300bdd1243dSDimitry Andric else 301bdd1243dSDimitry Andric ScratchRegs.push_back(Reg); 302bdd1243dSDimitry Andric } 303bdd1243dSDimitry Andric } 304bdd1243dSDimitry Andric 3055ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, 306bdd1243dSDimitry Andric MCPhysReg Reg) const { 3070b57cec5SDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) { 3080b57cec5SDimitry Andric if (CSRegs[I] == Reg) 3090b57cec5SDimitry Andric return true; 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric return false; 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 315*7a6dacacSDimitry Andric void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange( 316*7a6dacacSDimitry Andric MachineFunction &MF) { 317*7a6dacacSDimitry Andric const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 318*7a6dacacSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 319*7a6dacacSDimitry Andric for (unsigned I = 0, E = SpillPhysVGPRs.size(); I < E; ++I) { 320*7a6dacacSDimitry Andric Register Reg = SpillPhysVGPRs[I]; 321*7a6dacacSDimitry Andric Register NewReg = 322*7a6dacacSDimitry Andric TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 323*7a6dacacSDimitry Andric if (!NewReg || NewReg >= Reg) 324*7a6dacacSDimitry Andric break; 325*7a6dacacSDimitry Andric 326*7a6dacacSDimitry Andric MRI.replaceRegWith(Reg, NewReg); 327*7a6dacacSDimitry Andric 328*7a6dacacSDimitry Andric // Update various tables with the new VGPR. 329*7a6dacacSDimitry Andric SpillPhysVGPRs[I] = NewReg; 330*7a6dacacSDimitry Andric WWMReservedRegs.remove(Reg); 331*7a6dacacSDimitry Andric WWMReservedRegs.insert(NewReg); 332*7a6dacacSDimitry Andric WWMSpills.insert(std::make_pair(NewReg, WWMSpills[Reg])); 333*7a6dacacSDimitry Andric WWMSpills.erase(Reg); 334*7a6dacacSDimitry Andric 335*7a6dacacSDimitry Andric for (MachineBasicBlock &MBB : MF) { 336*7a6dacacSDimitry Andric MBB.removeLiveIn(Reg); 337*7a6dacacSDimitry Andric MBB.sortUniqueLiveIns(); 338*7a6dacacSDimitry Andric } 339*7a6dacacSDimitry Andric } 340*7a6dacacSDimitry Andric } 341*7a6dacacSDimitry Andric 3425f757f3fSDimitry Andric bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills( 3435f757f3fSDimitry Andric MachineFunction &MF, int FI, unsigned LaneIndex) { 344bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 345bdd1243dSDimitry Andric Register LaneVGPR; 346bdd1243dSDimitry Andric if (!LaneIndex) { 3475f757f3fSDimitry Andric LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 348bdd1243dSDimitry Andric SpillVGPRs.push_back(LaneVGPR); 349bdd1243dSDimitry Andric } else { 350bdd1243dSDimitry Andric LaneVGPR = SpillVGPRs.back(); 351bdd1243dSDimitry Andric } 352bdd1243dSDimitry Andric 3535f757f3fSDimitry Andric SGPRSpillsToVirtualVGPRLanes[FI].push_back( 354bdd1243dSDimitry Andric SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); 355bdd1243dSDimitry Andric return true; 356bdd1243dSDimitry Andric } 357bdd1243dSDimitry Andric 3585f757f3fSDimitry Andric bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills( 359*7a6dacacSDimitry Andric MachineFunction &MF, int FI, unsigned LaneIndex, bool IsPrologEpilog) { 360bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 361bdd1243dSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 362bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 363bdd1243dSDimitry Andric Register LaneVGPR; 364bdd1243dSDimitry Andric if (!LaneIndex) { 365*7a6dacacSDimitry Andric // Find the highest available register if called before RA to ensure the 366*7a6dacacSDimitry Andric // lowest registers are available for allocation. The LaneVGPR, in that 367*7a6dacacSDimitry Andric // case, will be shifted back to the lowest range after VGPR allocation. 368*7a6dacacSDimitry Andric LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF, 369*7a6dacacSDimitry Andric !IsPrologEpilog); 370bdd1243dSDimitry Andric if (LaneVGPR == AMDGPU::NoRegister) { 371bdd1243dSDimitry Andric // We have no VGPRs left for spilling SGPRs. Reset because we will not 372bdd1243dSDimitry Andric // partially spill the SGPR to VGPRs. 3735f757f3fSDimitry Andric SGPRSpillsToPhysicalVGPRLanes.erase(FI); 374bdd1243dSDimitry Andric return false; 375bdd1243dSDimitry Andric } 376bdd1243dSDimitry Andric 377bdd1243dSDimitry Andric allocateWWMSpill(MF, LaneVGPR); 3785f757f3fSDimitry Andric reserveWWMRegister(LaneVGPR); 3795f757f3fSDimitry Andric for (MachineBasicBlock &MBB : MF) { 3805f757f3fSDimitry Andric MBB.addLiveIn(LaneVGPR); 3815f757f3fSDimitry Andric MBB.sortUniqueLiveIns(); 3825f757f3fSDimitry Andric } 3835f757f3fSDimitry Andric SpillPhysVGPRs.push_back(LaneVGPR); 384bdd1243dSDimitry Andric } else { 3855f757f3fSDimitry Andric LaneVGPR = SpillPhysVGPRs.back(); 386bdd1243dSDimitry Andric } 387bdd1243dSDimitry Andric 3885f757f3fSDimitry Andric SGPRSpillsToPhysicalVGPRLanes[FI].push_back( 389bdd1243dSDimitry Andric SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); 390bdd1243dSDimitry Andric return true; 391bdd1243dSDimitry Andric } 392bdd1243dSDimitry Andric 393*7a6dacacSDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane( 394*7a6dacacSDimitry Andric MachineFunction &MF, int FI, bool SpillToPhysVGPRLane, 395bdd1243dSDimitry Andric bool IsPrologEpilog) { 396bdd1243dSDimitry Andric std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = 397*7a6dacacSDimitry Andric SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI] 3985f757f3fSDimitry Andric : SGPRSpillsToVirtualVGPRLanes[FI]; 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric // This has already been allocated. 4010b57cec5SDimitry Andric if (!SpillLanes.empty()) 4020b57cec5SDimitry Andric return true; 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 4050b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 4060b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize(); 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 4095ffd83dbSDimitry Andric unsigned NumLanes = Size / 4; 4100b57cec5SDimitry Andric 4115ffd83dbSDimitry Andric if (NumLanes > WaveSize) 4125ffd83dbSDimitry Andric return false; 4135ffd83dbSDimitry Andric 4145ffd83dbSDimitry Andric assert(Size >= 4 && "invalid sgpr spill size"); 415bdd1243dSDimitry Andric assert(ST.getRegisterInfo()->spillSGPRToVGPR() && 416bdd1243dSDimitry Andric "not spilling SGPRs to VGPRs"); 4170b57cec5SDimitry Andric 418*7a6dacacSDimitry Andric unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes 419*7a6dacacSDimitry Andric : NumVirtualVGPRSpillLanes; 4200b57cec5SDimitry Andric 421bdd1243dSDimitry Andric for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) { 422bdd1243dSDimitry Andric unsigned LaneIndex = (NumSpillLanes % WaveSize); 423fe6060f1SDimitry Andric 424*7a6dacacSDimitry Andric bool Allocated = SpillToPhysVGPRLane 425*7a6dacacSDimitry Andric ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex, 426*7a6dacacSDimitry Andric IsPrologEpilog) 4275f757f3fSDimitry Andric : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex); 428bdd1243dSDimitry Andric if (!Allocated) { 429bdd1243dSDimitry Andric NumSpillLanes -= I; 4300b57cec5SDimitry Andric return false; 4310b57cec5SDimitry Andric } 4320b57cec5SDimitry Andric } 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric return true; 4350b57cec5SDimitry Andric } 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. 4380b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa. 4390b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely. 4400b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, 4410b57cec5SDimitry Andric int FI, 4420b57cec5SDimitry Andric bool isAGPRtoVGPR) { 4430b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 4440b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 4450b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 4460b57cec5SDimitry Andric 4470b57cec5SDimitry Andric assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI)); 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric auto &Spill = VGPRToAGPRSpills[FI]; 4500b57cec5SDimitry Andric 4510b57cec5SDimitry Andric // This has already been allocated. 4520b57cec5SDimitry Andric if (!Spill.Lanes.empty()) 4530b57cec5SDimitry Andric return Spill.FullyAllocated; 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI); 4560b57cec5SDimitry Andric unsigned NumLanes = Size / 4; 4570b57cec5SDimitry Andric Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister); 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric const TargetRegisterClass &RC = 4600b57cec5SDimitry Andric isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass; 4610b57cec5SDimitry Andric auto Regs = RC.getRegisters(); 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR; 4640b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 4650b57cec5SDimitry Andric Spill.FullyAllocated = true; 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric // FIXME: Move allocation logic out of MachineFunctionInfo and initialize 4680b57cec5SDimitry Andric // once. 4690b57cec5SDimitry Andric BitVector OtherUsedRegs; 4700b57cec5SDimitry Andric OtherUsedRegs.resize(TRI->getNumRegs()); 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric const uint32_t *CSRMask = 4730b57cec5SDimitry Andric TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv()); 4740b57cec5SDimitry Andric if (CSRMask) 4750b57cec5SDimitry Andric OtherUsedRegs.setBitsInMask(CSRMask); 4760b57cec5SDimitry Andric 4770b57cec5SDimitry Andric // TODO: Should include register tuples, but doesn't matter with current 4780b57cec5SDimitry Andric // usage. 4790b57cec5SDimitry Andric for (MCPhysReg Reg : SpillAGPR) 4800b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 4810b57cec5SDimitry Andric for (MCPhysReg Reg : SpillVGPR) 4820b57cec5SDimitry Andric OtherUsedRegs.set(Reg); 4830b57cec5SDimitry Andric 4840b57cec5SDimitry Andric SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin(); 485349cc55cSDimitry Andric for (int I = NumLanes - 1; I >= 0; --I) { 4860b57cec5SDimitry Andric NextSpillReg = std::find_if( 4870b57cec5SDimitry Andric NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) { 4880b57cec5SDimitry Andric return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) && 4890b57cec5SDimitry Andric !OtherUsedRegs[Reg]; 4900b57cec5SDimitry Andric }); 4910b57cec5SDimitry Andric 4920b57cec5SDimitry Andric if (NextSpillReg == Regs.end()) { // Registers exhausted 4930b57cec5SDimitry Andric Spill.FullyAllocated = false; 4940b57cec5SDimitry Andric break; 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric OtherUsedRegs.set(*NextSpillReg); 4980b57cec5SDimitry Andric SpillRegs.push_back(*NextSpillReg); 499bdd1243dSDimitry Andric MRI.reserveReg(*NextSpillReg, TRI); 5000b57cec5SDimitry Andric Spill.Lanes[I] = *NextSpillReg++; 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric return Spill.FullyAllocated; 5040b57cec5SDimitry Andric } 5050b57cec5SDimitry Andric 50681ad6265SDimitry Andric bool SIMachineFunctionInfo::removeDeadFrameIndices( 50781ad6265SDimitry Andric MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) { 5085f757f3fSDimitry Andric // Remove dead frame indices from function frame, however keep FP & BP since 5095f757f3fSDimitry Andric // spills for them haven't been inserted yet. And also make sure to remove the 5105f757f3fSDimitry Andric // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure, 5115f757f3fSDimitry Andric // otherwise, it could result in an unexpected side effect and bug, in case of 5125f757f3fSDimitry Andric // any re-mapping of freed frame indices by later pass(es) like "stack slot 513bdd1243dSDimitry Andric // coloring". 5145f757f3fSDimitry Andric for (auto &R : make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) { 5150b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 5165f757f3fSDimitry Andric SGPRSpillsToVirtualVGPRLanes.erase(R.first); 5170b57cec5SDimitry Andric } 5180b57cec5SDimitry Andric 5195f757f3fSDimitry Andric // Remove the dead frame indices of CSR SGPRs which are spilled to physical 5205f757f3fSDimitry Andric // VGPR lanes during SILowerSGPRSpills pass. 5215f757f3fSDimitry Andric if (!ResetSGPRSpillStackIDs) { 5225f757f3fSDimitry Andric for (auto &R : make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) { 5235f757f3fSDimitry Andric MFI.RemoveStackObject(R.first); 5245f757f3fSDimitry Andric SGPRSpillsToPhysicalVGPRLanes.erase(R.first); 5255f757f3fSDimitry Andric } 5265f757f3fSDimitry Andric } 52781ad6265SDimitry Andric bool HaveSGPRToMemory = false; 52881ad6265SDimitry Andric 52981ad6265SDimitry Andric if (ResetSGPRSpillStackIDs) { 530bdd1243dSDimitry Andric // All other SGPRs must be allocated on the default stack, so reset the 53181ad6265SDimitry Andric // stack ID. 532bdd1243dSDimitry Andric for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; 533bdd1243dSDimitry Andric ++I) { 534bdd1243dSDimitry Andric if (!checkIndexInPrologEpilogSGPRSpills(I)) { 535bdd1243dSDimitry Andric if (MFI.getStackID(I) == TargetStackID::SGPRSpill) { 536bdd1243dSDimitry Andric MFI.setStackID(I, TargetStackID::Default); 53781ad6265SDimitry Andric HaveSGPRToMemory = true; 53881ad6265SDimitry Andric } 53981ad6265SDimitry Andric } 54081ad6265SDimitry Andric } 54181ad6265SDimitry Andric } 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric for (auto &R : VGPRToAGPRSpills) { 5440eae32dcSDimitry Andric if (R.second.IsDead) 5450b57cec5SDimitry Andric MFI.RemoveStackObject(R.first); 5460b57cec5SDimitry Andric } 54781ad6265SDimitry Andric 54881ad6265SDimitry Andric return HaveSGPRToMemory; 54981ad6265SDimitry Andric } 55081ad6265SDimitry Andric 551fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, 552fe6060f1SDimitry Andric const SIRegisterInfo &TRI) { 553fe6060f1SDimitry Andric if (ScavengeFI) 554fe6060f1SDimitry Andric return *ScavengeFI; 5555f757f3fSDimitry Andric if (isBottomOfStack()) { 556fe6060f1SDimitry Andric ScavengeFI = MFI.CreateFixedObject( 557fe6060f1SDimitry Andric TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); 558fe6060f1SDimitry Andric } else { 559fe6060f1SDimitry Andric ScavengeFI = MFI.CreateStackObject( 560fe6060f1SDimitry Andric TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 561fe6060f1SDimitry Andric TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false); 562fe6060f1SDimitry Andric } 563fe6060f1SDimitry Andric return *ScavengeFI; 564fe6060f1SDimitry Andric } 565fe6060f1SDimitry Andric 5660b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { 5670b57cec5SDimitry Andric assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 5680b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs; 5690b57cec5SDimitry Andric } 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { 5720b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 5730b57cec5SDimitry Andric } 5740b57cec5SDimitry Andric 57506c3fb27SDimitry Andric void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) { 57606c3fb27SDimitry Andric VRegFlags.grow(Reg); 57706c3fb27SDimitry Andric } 57806c3fb27SDimitry Andric 57906c3fb27SDimitry Andric void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg, 58006c3fb27SDimitry Andric Register SrcReg) { 58106c3fb27SDimitry Andric VRegFlags.grow(NewReg); 58206c3fb27SDimitry Andric VRegFlags[NewReg] = VRegFlags[SrcReg]; 58306c3fb27SDimitry Andric } 58406c3fb27SDimitry Andric 5855ffd83dbSDimitry Andric Register 5865ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const { 5875ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 5885ffd83dbSDimitry Andric if (!ST.isAmdPalOS()) 5895ffd83dbSDimitry Andric return Register(); 5905ffd83dbSDimitry Andric Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in 5915ffd83dbSDimitry Andric if (ST.hasMergedShaders()) { 5925ffd83dbSDimitry Andric switch (MF.getFunction().getCallingConv()) { 5935ffd83dbSDimitry Andric case CallingConv::AMDGPU_HS: 5945ffd83dbSDimitry Andric case CallingConv::AMDGPU_GS: 5955ffd83dbSDimitry Andric // Low GIT address is passed in s8 rather than s0 for an LS+HS or 5965ffd83dbSDimitry Andric // ES+GS merged shader on gfx9+. 5975ffd83dbSDimitry Andric GitPtrLo = AMDGPU::SGPR8; 5985ffd83dbSDimitry Andric return GitPtrLo; 5995ffd83dbSDimitry Andric default: 6005ffd83dbSDimitry Andric return GitPtrLo; 6015ffd83dbSDimitry Andric } 6025ffd83dbSDimitry Andric } 6035ffd83dbSDimitry Andric return GitPtrLo; 6045ffd83dbSDimitry Andric } 6055ffd83dbSDimitry Andric 6065ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg, 6070b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 6080b57cec5SDimitry Andric yaml::StringValue Dest; 6090b57cec5SDimitry Andric { 6100b57cec5SDimitry Andric raw_string_ostream OS(Dest.Value); 6110b57cec5SDimitry Andric OS << printReg(Reg, &TRI); 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric return Dest; 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric 616bdd1243dSDimitry Andric static std::optional<yaml::SIArgumentInfo> 6170b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, 6180b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 6190b57cec5SDimitry Andric yaml::SIArgumentInfo AI; 6200b57cec5SDimitry Andric 621bdd1243dSDimitry Andric auto convertArg = [&](std::optional<yaml::SIArgument> &A, 6220b57cec5SDimitry Andric const ArgDescriptor &Arg) { 6230b57cec5SDimitry Andric if (!Arg) 6240b57cec5SDimitry Andric return false; 6250b57cec5SDimitry Andric 6260b57cec5SDimitry Andric // Create a register or stack argument. 6270b57cec5SDimitry Andric yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister()); 6280b57cec5SDimitry Andric if (Arg.isRegister()) { 6290b57cec5SDimitry Andric raw_string_ostream OS(SA.RegisterName.Value); 6300b57cec5SDimitry Andric OS << printReg(Arg.getRegister(), &TRI); 6310b57cec5SDimitry Andric } else 6320b57cec5SDimitry Andric SA.StackOffset = Arg.getStackOffset(); 6330b57cec5SDimitry Andric // Check and update the optional mask. 6340b57cec5SDimitry Andric if (Arg.isMasked()) 6350b57cec5SDimitry Andric SA.Mask = Arg.getMask(); 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric A = SA; 6380b57cec5SDimitry Andric return true; 6390b57cec5SDimitry Andric }; 6400b57cec5SDimitry Andric 6415f757f3fSDimitry Andric // TODO: Need to serialize kernarg preloads. 6420b57cec5SDimitry Andric bool Any = false; 6430b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer); 6440b57cec5SDimitry Andric Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr); 6450b57cec5SDimitry Andric Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr); 6460b57cec5SDimitry Andric Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); 6470b57cec5SDimitry Andric Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); 6480b57cec5SDimitry Andric Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); 649fcaf7f86SDimitry Andric Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId); 6500b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); 6510b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); 6520b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); 6530b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ); 6540b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo); 6550b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentWaveByteOffset, 6560b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset); 6570b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr); 6580b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr); 6590b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX); 6600b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY); 6610b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ); 6620b57cec5SDimitry Andric 6630b57cec5SDimitry Andric if (Any) 6640b57cec5SDimitry Andric return AI; 6650b57cec5SDimitry Andric 666bdd1243dSDimitry Andric return std::nullopt; 6670b57cec5SDimitry Andric } 6680b57cec5SDimitry Andric 6690b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( 670fe6060f1SDimitry Andric const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI, 671fe6060f1SDimitry Andric const llvm::MachineFunction &MF) 6720b57cec5SDimitry Andric : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), 673e8d8bef9SDimitry Andric MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()), 67481ad6265SDimitry Andric GDSSize(MFI.getGDSSize()), 675e8d8bef9SDimitry Andric DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()), 6760b57cec5SDimitry Andric NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), 677e8d8bef9SDimitry Andric MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), 678e8d8bef9SDimitry Andric HasSpilledSGPRs(MFI.hasSpilledSGPRs()), 679e8d8bef9SDimitry Andric HasSpilledVGPRs(MFI.hasSpilledVGPRs()), 6808bcb0991SDimitry Andric HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), 681e8d8bef9SDimitry Andric Occupancy(MFI.getOccupancy()), 6820b57cec5SDimitry Andric ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), 6830b57cec5SDimitry Andric FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), 6840b57cec5SDimitry Andric StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), 68581ad6265SDimitry Andric BytesInStackArgArea(MFI.getBytesInStackArgArea()), 68681ad6265SDimitry Andric ReturnsVoid(MFI.returnsVoid()), 68706c3fb27SDimitry Andric ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), 68806c3fb27SDimitry Andric PSInputAddr(MFI.getPSInputAddr()), 68906c3fb27SDimitry Andric PSInputEnable(MFI.getPSInputEnable()), 69006c3fb27SDimitry Andric Mode(MFI.getMode()) { 691bdd1243dSDimitry Andric for (Register Reg : MFI.getWWMReservedRegs()) 69281ad6265SDimitry Andric WWMReservedRegs.push_back(regToString(Reg, TRI)); 69381ad6265SDimitry Andric 69406c3fb27SDimitry Andric if (MFI.getLongBranchReservedReg()) 69506c3fb27SDimitry Andric LongBranchReservedReg = regToString(MFI.getLongBranchReservedReg(), TRI); 69681ad6265SDimitry Andric if (MFI.getVGPRForAGPRCopy()) 69781ad6265SDimitry Andric VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI); 69806c3fb27SDimitry Andric 69906c3fb27SDimitry Andric if (MFI.getSGPRForEXECCopy()) 70006c3fb27SDimitry Andric SGPRForEXECCopy = regToString(MFI.getSGPRForEXECCopy(), TRI); 70106c3fb27SDimitry Andric 702fe6060f1SDimitry Andric auto SFI = MFI.getOptionalScavengeFI(); 703fe6060f1SDimitry Andric if (SFI) 704fe6060f1SDimitry Andric ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo()); 705e8d8bef9SDimitry Andric } 7060b57cec5SDimitry Andric 7070b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { 7080b57cec5SDimitry Andric MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this); 7090b57cec5SDimitry Andric } 7100b57cec5SDimitry Andric 7110b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields( 712fe6060f1SDimitry Andric const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, 713fe6060f1SDimitry Andric PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) { 7140b57cec5SDimitry Andric ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; 71581ad6265SDimitry Andric MaxKernArgAlign = YamlMFI.MaxKernArgAlign; 7160b57cec5SDimitry Andric LDSSize = YamlMFI.LDSSize; 71781ad6265SDimitry Andric GDSSize = YamlMFI.GDSSize; 718e8d8bef9SDimitry Andric DynLDSAlign = YamlMFI.DynLDSAlign; 71906c3fb27SDimitry Andric PSInputAddr = YamlMFI.PSInputAddr; 72006c3fb27SDimitry Andric PSInputEnable = YamlMFI.PSInputEnable; 7218bcb0991SDimitry Andric HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; 722e8d8bef9SDimitry Andric Occupancy = YamlMFI.Occupancy; 7230b57cec5SDimitry Andric IsEntryFunction = YamlMFI.IsEntryFunction; 7240b57cec5SDimitry Andric NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; 7250b57cec5SDimitry Andric MemoryBound = YamlMFI.MemoryBound; 7260b57cec5SDimitry Andric WaveLimiter = YamlMFI.WaveLimiter; 727e8d8bef9SDimitry Andric HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs; 728e8d8bef9SDimitry Andric HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs; 72981ad6265SDimitry Andric BytesInStackArgArea = YamlMFI.BytesInStackArgArea; 73081ad6265SDimitry Andric ReturnsVoid = YamlMFI.ReturnsVoid; 731fe6060f1SDimitry Andric 732fe6060f1SDimitry Andric if (YamlMFI.ScavengeFI) { 733fe6060f1SDimitry Andric auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo()); 734fe6060f1SDimitry Andric if (!FIOrErr) { 735fe6060f1SDimitry Andric // Create a diagnostic for a the frame index. 736fe6060f1SDimitry Andric const MemoryBuffer &Buffer = 737fe6060f1SDimitry Andric *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID()); 738fe6060f1SDimitry Andric 739fe6060f1SDimitry Andric Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1, 740fe6060f1SDimitry Andric SourceMgr::DK_Error, toString(FIOrErr.takeError()), 741bdd1243dSDimitry Andric "", std::nullopt, std::nullopt); 742fe6060f1SDimitry Andric SourceRange = YamlMFI.ScavengeFI->SourceRange; 743fe6060f1SDimitry Andric return true; 744fe6060f1SDimitry Andric } 745fe6060f1SDimitry Andric ScavengeFI = *FIOrErr; 746fe6060f1SDimitry Andric } else { 747bdd1243dSDimitry Andric ScavengeFI = std::nullopt; 748fe6060f1SDimitry Andric } 7490b57cec5SDimitry Andric return false; 7500b57cec5SDimitry Andric } 7515ffd83dbSDimitry Andric 752bdd1243dSDimitry Andric bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const { 753bdd1243dSDimitry Andric for (const BasicBlock &BB : F) { 75481ad6265SDimitry Andric for (const Instruction &I : BB) { 75581ad6265SDimitry Andric const auto *CB = dyn_cast<CallBase>(&I); 75681ad6265SDimitry Andric if (!CB) 75781ad6265SDimitry Andric continue; 75881ad6265SDimitry Andric 75981ad6265SDimitry Andric if (CB->isInlineAsm()) { 76081ad6265SDimitry Andric const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand()); 76181ad6265SDimitry Andric for (const auto &CI : IA->ParseConstraints()) { 76281ad6265SDimitry Andric for (StringRef Code : CI.Codes) { 76381ad6265SDimitry Andric Code.consume_front("{"); 7645f757f3fSDimitry Andric if (Code.starts_with("a")) 76581ad6265SDimitry Andric return true; 76681ad6265SDimitry Andric } 76781ad6265SDimitry Andric } 76881ad6265SDimitry Andric continue; 76981ad6265SDimitry Andric } 77081ad6265SDimitry Andric 77181ad6265SDimitry Andric const Function *Callee = 77281ad6265SDimitry Andric dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); 77381ad6265SDimitry Andric if (!Callee) 77481ad6265SDimitry Andric return true; 77581ad6265SDimitry Andric 77681ad6265SDimitry Andric if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic) 77781ad6265SDimitry Andric return true; 77881ad6265SDimitry Andric } 77981ad6265SDimitry Andric } 78081ad6265SDimitry Andric 78181ad6265SDimitry Andric return false; 78281ad6265SDimitry Andric } 78381ad6265SDimitry Andric 784349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const { 785349cc55cSDimitry Andric if (UsesAGPRs) 786349cc55cSDimitry Andric return *UsesAGPRs; 787349cc55cSDimitry Andric 78881ad6265SDimitry Andric if (!mayNeedAGPRs()) { 78981ad6265SDimitry Andric UsesAGPRs = false; 79081ad6265SDimitry Andric return false; 79181ad6265SDimitry Andric } 79281ad6265SDimitry Andric 793349cc55cSDimitry Andric if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) || 794349cc55cSDimitry Andric MF.getFrameInfo().hasCalls()) { 795349cc55cSDimitry Andric UsesAGPRs = true; 796349cc55cSDimitry Andric return true; 797349cc55cSDimitry Andric } 798349cc55cSDimitry Andric 799349cc55cSDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 800349cc55cSDimitry Andric 801349cc55cSDimitry Andric for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { 802349cc55cSDimitry Andric const Register Reg = Register::index2VirtReg(I); 803349cc55cSDimitry Andric const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg); 804349cc55cSDimitry Andric if (RC && SIRegisterInfo::isAGPRClass(RC)) { 805349cc55cSDimitry Andric UsesAGPRs = true; 806349cc55cSDimitry Andric return true; 807349cc55cSDimitry Andric } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) { 808349cc55cSDimitry Andric // Defer caching UsesAGPRs, function might not yet been regbank selected. 809349cc55cSDimitry Andric return true; 810349cc55cSDimitry Andric } 811349cc55cSDimitry Andric } 812349cc55cSDimitry Andric 813349cc55cSDimitry Andric for (MCRegister Reg : AMDGPU::AGPR_32RegClass) { 814349cc55cSDimitry Andric if (MRI.isPhysRegUsed(Reg)) { 815349cc55cSDimitry Andric UsesAGPRs = true; 816349cc55cSDimitry Andric return true; 817349cc55cSDimitry Andric } 818349cc55cSDimitry Andric } 819349cc55cSDimitry Andric 820349cc55cSDimitry Andric UsesAGPRs = false; 821349cc55cSDimitry Andric return false; 822349cc55cSDimitry Andric } 823