10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
10fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h"
115f757f3fSDimitry Andric #include "AMDGPUTargetMachine.h"
125f757f3fSDimitry Andric #include "GCNSubtarget.h"
13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
145f757f3fSDimitry Andric #include "SIRegisterInfo.h"
15fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
16fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
175f757f3fSDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h"
18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
20fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
21fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
22fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h"
23fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
24fe6060f1SDimitry Andric #include "llvm/IR/Function.h"
25fe6060f1SDimitry Andric #include <cassert>
26bdd1243dSDimitry Andric #include <optional>
27fe6060f1SDimitry Andric #include <vector>
280b57cec5SDimitry Andric
29*0fca6ea1SDimitry Andric enum { MAX_LANES = 64 };
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric using namespace llvm;
320b57cec5SDimitry Andric
getTM(const GCNSubtarget * STI)33bdd1243dSDimitry Andric const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
34bdd1243dSDimitry Andric const SITargetLowering *TLI = STI->getTargetLowering();
35bdd1243dSDimitry Andric return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());
36bdd1243dSDimitry Andric }
37bdd1243dSDimitry Andric
SIMachineFunctionInfo(const Function & F,const GCNSubtarget * STI)38bdd1243dSDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
39bdd1243dSDimitry Andric const GCNSubtarget *STI)
405f757f3fSDimitry Andric : AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)),
415f757f3fSDimitry Andric UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),
425f757f3fSDimitry Andric WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
435f757f3fSDimitry Andric PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
445f757f3fSDimitry Andric WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),
455f757f3fSDimitry Andric GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
46bdd1243dSDimitry Andric const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
470b57cec5SDimitry Andric FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
480b57cec5SDimitry Andric WavesPerEU = ST.getWavesPerEU(F);
49*0fca6ea1SDimitry Andric MaxNumWorkGroups = ST.getMaxNumWorkGroups(F);
50*0fca6ea1SDimitry Andric assert(MaxNumWorkGroups.size() == 3);
510b57cec5SDimitry Andric
525ffd83dbSDimitry Andric Occupancy = ST.computeOccupancy(F, getLDSSize());
530b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv();
540b57cec5SDimitry Andric
5506c3fb27SDimitry Andric VRegFlags.reserve(1024);
5606c3fb27SDimitry Andric
57349cc55cSDimitry Andric const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
58349cc55cSDimitry Andric CC == CallingConv::SPIR_KERNEL;
595ffd83dbSDimitry Andric
60349cc55cSDimitry Andric if (IsKernel) {
610b57cec5SDimitry Andric WorkGroupIDX = true;
620b57cec5SDimitry Andric WorkItemIDX = true;
630b57cec5SDimitry Andric } else if (CC == CallingConv::AMDGPU_PS) {
640b57cec5SDimitry Andric PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
650b57cec5SDimitry Andric }
660b57cec5SDimitry Andric
6781ad6265SDimitry Andric MayNeedAGPRs = ST.hasMAIInsts();
6881ad6265SDimitry Andric
695f757f3fSDimitry Andric if (AMDGPU::isChainCC(CC)) {
705f757f3fSDimitry Andric // Chain functions don't receive an SP from their caller, but are free to
715f757f3fSDimitry Andric // set one up. For now, we can use s32 to match what amdgpu_gfx functions
725f757f3fSDimitry Andric // would use if called, but this can be revisited.
735f757f3fSDimitry Andric // FIXME: Only reserve this if we actually need it.
745f757f3fSDimitry Andric StackPtrOffsetReg = AMDGPU::SGPR32;
755f757f3fSDimitry Andric
765f757f3fSDimitry Andric ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;
775f757f3fSDimitry Andric
785f757f3fSDimitry Andric ArgInfo.PrivateSegmentBuffer =
795f757f3fSDimitry Andric ArgDescriptor::createRegister(ScratchRSrcReg);
805f757f3fSDimitry Andric
815f757f3fSDimitry Andric ImplicitArgPtr = false;
825f757f3fSDimitry Andric } else if (!isEntryFunction()) {
830eae32dcSDimitry Andric if (CC != CallingConv::AMDGPU_Gfx)
84fe6060f1SDimitry Andric ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
85fe6060f1SDimitry Andric
865ffd83dbSDimitry Andric FrameOffsetReg = AMDGPU::SGPR33;
870b57cec5SDimitry Andric StackPtrOffsetReg = AMDGPU::SGPR32;
880b57cec5SDimitry Andric
89e8d8bef9SDimitry Andric if (!ST.enableFlatScratch()) {
90e8d8bef9SDimitry Andric // Non-entry functions have no special inputs for now, other registers
91e8d8bef9SDimitry Andric // required for scratch access.
92e8d8bef9SDimitry Andric ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
93e8d8bef9SDimitry Andric
940b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer =
950b57cec5SDimitry Andric ArgDescriptor::createRegister(ScratchRSrcReg);
96e8d8bef9SDimitry Andric }
970b57cec5SDimitry Andric
98349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
990b57cec5SDimitry Andric ImplicitArgPtr = true;
1000b57cec5SDimitry Andric } else {
101349cc55cSDimitry Andric ImplicitArgPtr = false;
1020b57cec5SDimitry Andric MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
1030b57cec5SDimitry Andric MaxKernArgAlign);
10481ad6265SDimitry Andric
10581ad6265SDimitry Andric if (ST.hasGFX90AInsts() &&
10681ad6265SDimitry Andric ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
107bdd1243dSDimitry Andric !mayUseAGPRs(F))
10881ad6265SDimitry Andric MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
1090b57cec5SDimitry Andric }
110349cc55cSDimitry Andric
11106c3fb27SDimitry Andric if (!AMDGPU::isGraphics(CC) ||
112*0fca6ea1SDimitry Andric ((CC == CallingConv::AMDGPU_CS || CC == CallingConv::AMDGPU_Gfx) &&
113*0fca6ea1SDimitry Andric ST.hasArchitectedSGPRs())) {
114349cc55cSDimitry Andric if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
1150b57cec5SDimitry Andric WorkGroupIDX = true;
1160b57cec5SDimitry Andric
117349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
1180b57cec5SDimitry Andric WorkGroupIDY = true;
1190b57cec5SDimitry Andric
120349cc55cSDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
1210b57cec5SDimitry Andric WorkGroupIDZ = true;
12206c3fb27SDimitry Andric }
1230b57cec5SDimitry Andric
12406c3fb27SDimitry Andric if (!AMDGPU::isGraphics(CC)) {
125349cc55cSDimitry Andric if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
1260b57cec5SDimitry Andric WorkItemIDX = true;
1270b57cec5SDimitry Andric
12804eeddc0SDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
12904eeddc0SDimitry Andric ST.getMaxWorkitemID(F, 1) != 0)
1300b57cec5SDimitry Andric WorkItemIDY = true;
1310b57cec5SDimitry Andric
13204eeddc0SDimitry Andric if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
13304eeddc0SDimitry Andric ST.getMaxWorkitemID(F, 2) != 0)
1340b57cec5SDimitry Andric WorkItemIDZ = true;
135349cc55cSDimitry Andric
136fcaf7f86SDimitry Andric if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
137fcaf7f86SDimitry Andric LDSKernelId = true;
1385ffd83dbSDimitry Andric }
1390b57cec5SDimitry Andric
1400b57cec5SDimitry Andric if (isEntryFunction()) {
1410b57cec5SDimitry Andric // X, XY, and XYZ are the only supported combinations, so make sure Y is
1420b57cec5SDimitry Andric // enabled if Z is.
1430b57cec5SDimitry Andric if (WorkItemIDZ)
1440b57cec5SDimitry Andric WorkItemIDY = true;
1450b57cec5SDimitry Andric
146fe6060f1SDimitry Andric if (!ST.flatScratchIsArchitected()) {
1470b57cec5SDimitry Andric PrivateSegmentWaveByteOffset = true;
1480b57cec5SDimitry Andric
1490b57cec5SDimitry Andric // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
1500b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
1510b57cec5SDimitry Andric (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
1520b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset =
1530b57cec5SDimitry Andric ArgDescriptor::createRegister(AMDGPU::SGPR5);
1540b57cec5SDimitry Andric }
155fe6060f1SDimitry Andric }
1560b57cec5SDimitry Andric
1570b57cec5SDimitry Andric Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
1580b57cec5SDimitry Andric StringRef S = A.getValueAsString();
1590b57cec5SDimitry Andric if (!S.empty())
1600b57cec5SDimitry Andric S.consumeInteger(0, GITPtrHigh);
1610b57cec5SDimitry Andric
1620b57cec5SDimitry Andric A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
1630b57cec5SDimitry Andric S = A.getValueAsString();
1640b57cec5SDimitry Andric if (!S.empty())
1650b57cec5SDimitry Andric S.consumeInteger(0, HighBitsOf32BitAddress);
1660b57cec5SDimitry Andric
16781ad6265SDimitry Andric // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
16881ad6265SDimitry Andric // VGPR available at all times. For now, reserve highest available VGPR. After
16981ad6265SDimitry Andric // RA, shift it to the lowest available unused VGPR if the one exist.
17081ad6265SDimitry Andric if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
17181ad6265SDimitry Andric VGPRForAGPRCopy =
17281ad6265SDimitry Andric AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
17381ad6265SDimitry Andric }
17481ad6265SDimitry Andric }
17581ad6265SDimitry Andric
clone(BumpPtrAllocator & Allocator,MachineFunction & DestMF,const DenseMap<MachineBasicBlock *,MachineBasicBlock * > & Src2DstMBB) const17681ad6265SDimitry Andric MachineFunctionInfo *SIMachineFunctionInfo::clone(
17781ad6265SDimitry Andric BumpPtrAllocator &Allocator, MachineFunction &DestMF,
17881ad6265SDimitry Andric const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
17981ad6265SDimitry Andric const {
18081ad6265SDimitry Andric return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
1810b57cec5SDimitry Andric }
1820b57cec5SDimitry Andric
limitOccupancy(const MachineFunction & MF)1830b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
1840b57cec5SDimitry Andric limitOccupancy(getMaxWavesPerEU());
1850b57cec5SDimitry Andric const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
1860b57cec5SDimitry Andric limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
1870b57cec5SDimitry Andric MF.getFunction()));
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric
addPrivateSegmentBuffer(const SIRegisterInfo & TRI)1905ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
1910b57cec5SDimitry Andric const SIRegisterInfo &TRI) {
1920b57cec5SDimitry Andric ArgInfo.PrivateSegmentBuffer =
1930b57cec5SDimitry Andric ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
1948bcb0991SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
1950b57cec5SDimitry Andric NumUserSGPRs += 4;
1960b57cec5SDimitry Andric return ArgInfo.PrivateSegmentBuffer.getRegister();
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric
addDispatchPtr(const SIRegisterInfo & TRI)1995ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
2000b57cec5SDimitry Andric ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2010b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2020b57cec5SDimitry Andric NumUserSGPRs += 2;
2030b57cec5SDimitry Andric return ArgInfo.DispatchPtr.getRegister();
2040b57cec5SDimitry Andric }
2050b57cec5SDimitry Andric
addQueuePtr(const SIRegisterInfo & TRI)2065ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
2070b57cec5SDimitry Andric ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2080b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2090b57cec5SDimitry Andric NumUserSGPRs += 2;
2100b57cec5SDimitry Andric return ArgInfo.QueuePtr.getRegister();
2110b57cec5SDimitry Andric }
2120b57cec5SDimitry Andric
addKernargSegmentPtr(const SIRegisterInfo & TRI)2135ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
2140b57cec5SDimitry Andric ArgInfo.KernargSegmentPtr
2150b57cec5SDimitry Andric = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2160b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2170b57cec5SDimitry Andric NumUserSGPRs += 2;
2180b57cec5SDimitry Andric return ArgInfo.KernargSegmentPtr.getRegister();
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric
addDispatchID(const SIRegisterInfo & TRI)2215ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
2220b57cec5SDimitry Andric ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2230b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2240b57cec5SDimitry Andric NumUserSGPRs += 2;
2250b57cec5SDimitry Andric return ArgInfo.DispatchID.getRegister();
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric
addFlatScratchInit(const SIRegisterInfo & TRI)2285ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
2290b57cec5SDimitry Andric ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2300b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2310b57cec5SDimitry Andric NumUserSGPRs += 2;
2320b57cec5SDimitry Andric return ArgInfo.FlatScratchInit.getRegister();
2330b57cec5SDimitry Andric }
2340b57cec5SDimitry Andric
addPrivateSegmentSize(const SIRegisterInfo & TRI)235*0fca6ea1SDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentSize(const SIRegisterInfo &TRI) {
236*0fca6ea1SDimitry Andric ArgInfo.PrivateSegmentSize = ArgDescriptor::createRegister(getNextUserSGPR());
237*0fca6ea1SDimitry Andric NumUserSGPRs += 1;
238*0fca6ea1SDimitry Andric return ArgInfo.PrivateSegmentSize.getRegister();
239*0fca6ea1SDimitry Andric }
240*0fca6ea1SDimitry Andric
addImplicitBufferPtr(const SIRegisterInfo & TRI)2415ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
2420b57cec5SDimitry Andric ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2430b57cec5SDimitry Andric getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2440b57cec5SDimitry Andric NumUserSGPRs += 2;
2450b57cec5SDimitry Andric return ArgInfo.ImplicitBufferPtr.getRegister();
2460b57cec5SDimitry Andric }
2470b57cec5SDimitry Andric
addLDSKernelId()248fcaf7f86SDimitry Andric Register SIMachineFunctionInfo::addLDSKernelId() {
249fcaf7f86SDimitry Andric ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());
250fcaf7f86SDimitry Andric NumUserSGPRs += 1;
251fcaf7f86SDimitry Andric return ArgInfo.LDSKernelId.getRegister();
252fcaf7f86SDimitry Andric }
253fcaf7f86SDimitry Andric
addPreloadedKernArg(const SIRegisterInfo & TRI,const TargetRegisterClass * RC,unsigned AllocSizeDWord,int KernArgIdx,int PaddingSGPRs)2545f757f3fSDimitry Andric SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
2555f757f3fSDimitry Andric const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
2565f757f3fSDimitry Andric unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) {
2575f757f3fSDimitry Andric assert(!ArgInfo.PreloadKernArgs.count(KernArgIdx) &&
2585f757f3fSDimitry Andric "Preload kernel argument allocated twice.");
2595f757f3fSDimitry Andric NumUserSGPRs += PaddingSGPRs;
2605f757f3fSDimitry Andric // If the available register tuples are aligned with the kernarg to be
2615f757f3fSDimitry Andric // preloaded use that register, otherwise we need to use a set of SGPRs and
2625f757f3fSDimitry Andric // merge them.
2635f757f3fSDimitry Andric Register PreloadReg =
2645f757f3fSDimitry Andric TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);
2655f757f3fSDimitry Andric if (PreloadReg &&
2665f757f3fSDimitry Andric (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
2675f757f3fSDimitry Andric ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(PreloadReg);
2685f757f3fSDimitry Andric NumUserSGPRs += AllocSizeDWord;
2695f757f3fSDimitry Andric } else {
2705f757f3fSDimitry Andric for (unsigned I = 0; I < AllocSizeDWord; ++I) {
2715f757f3fSDimitry Andric ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(getNextUserSGPR());
2725f757f3fSDimitry Andric NumUserSGPRs++;
2735f757f3fSDimitry Andric }
2745f757f3fSDimitry Andric }
2755f757f3fSDimitry Andric
2765f757f3fSDimitry Andric // Track the actual number of SGPRs that HW will preload to.
2775f757f3fSDimitry Andric UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs);
2785f757f3fSDimitry Andric return &ArgInfo.PreloadKernArgs[KernArgIdx].Regs;
2795f757f3fSDimitry Andric }
2805f757f3fSDimitry Andric
allocateWWMSpill(MachineFunction & MF,Register VGPR,uint64_t Size,Align Alignment)281bdd1243dSDimitry Andric void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR,
282bdd1243dSDimitry Andric uint64_t Size, Align Alignment) {
283bdd1243dSDimitry Andric // Skip if it is an entry function or the register is already added.
284bdd1243dSDimitry Andric if (isEntryFunction() || WWMSpills.count(VGPR))
285bdd1243dSDimitry Andric return;
286bdd1243dSDimitry Andric
2875f757f3fSDimitry Andric // Skip if this is a function with the amdgpu_cs_chain or
2885f757f3fSDimitry Andric // amdgpu_cs_chain_preserve calling convention and this is a scratch register.
2895f757f3fSDimitry Andric // We never need to allocate a spill for these because we don't even need to
2905f757f3fSDimitry Andric // restore the inactive lanes for them (they're scratchier than the usual
2915f757f3fSDimitry Andric // scratch registers).
2925f757f3fSDimitry Andric if (isChainFunction() && SIRegisterInfo::isChainScratchRegister(VGPR))
2935f757f3fSDimitry Andric return;
2945f757f3fSDimitry Andric
295bdd1243dSDimitry Andric WWMSpills.insert(std::make_pair(
296bdd1243dSDimitry Andric VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));
297bdd1243dSDimitry Andric }
298bdd1243dSDimitry Andric
299bdd1243dSDimitry Andric // Separate out the callee-saved and scratch registers.
splitWWMSpillRegisters(MachineFunction & MF,SmallVectorImpl<std::pair<Register,int>> & CalleeSavedRegs,SmallVectorImpl<std::pair<Register,int>> & ScratchRegs) const300bdd1243dSDimitry Andric void SIMachineFunctionInfo::splitWWMSpillRegisters(
301bdd1243dSDimitry Andric MachineFunction &MF,
302bdd1243dSDimitry Andric SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
303bdd1243dSDimitry Andric SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {
304bdd1243dSDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
305bdd1243dSDimitry Andric for (auto &Reg : WWMSpills) {
306bdd1243dSDimitry Andric if (isCalleeSavedReg(CSRegs, Reg.first))
307bdd1243dSDimitry Andric CalleeSavedRegs.push_back(Reg);
308bdd1243dSDimitry Andric else
309bdd1243dSDimitry Andric ScratchRegs.push_back(Reg);
310bdd1243dSDimitry Andric }
311bdd1243dSDimitry Andric }
312bdd1243dSDimitry Andric
isCalleeSavedReg(const MCPhysReg * CSRegs,MCPhysReg Reg) const3135ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
314bdd1243dSDimitry Andric MCPhysReg Reg) const {
3150b57cec5SDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) {
3160b57cec5SDimitry Andric if (CSRegs[I] == Reg)
3170b57cec5SDimitry Andric return true;
3180b57cec5SDimitry Andric }
3190b57cec5SDimitry Andric
3200b57cec5SDimitry Andric return false;
3210b57cec5SDimitry Andric }
3220b57cec5SDimitry Andric
shiftSpillPhysVGPRsToLowestRange(MachineFunction & MF)3237a6dacacSDimitry Andric void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange(
3247a6dacacSDimitry Andric MachineFunction &MF) {
3257a6dacacSDimitry Andric const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
3267a6dacacSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
327*0fca6ea1SDimitry Andric for (Register &Reg : SpillPhysVGPRs) {
3287a6dacacSDimitry Andric Register NewReg =
3297a6dacacSDimitry Andric TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
3307a6dacacSDimitry Andric if (!NewReg || NewReg >= Reg)
3317a6dacacSDimitry Andric break;
3327a6dacacSDimitry Andric
3337a6dacacSDimitry Andric MRI.replaceRegWith(Reg, NewReg);
3347a6dacacSDimitry Andric
3357a6dacacSDimitry Andric // Update various tables with the new VGPR.
3367a6dacacSDimitry Andric WWMReservedRegs.remove(Reg);
3377a6dacacSDimitry Andric WWMReservedRegs.insert(NewReg);
3387a6dacacSDimitry Andric WWMSpills.insert(std::make_pair(NewReg, WWMSpills[Reg]));
3397a6dacacSDimitry Andric WWMSpills.erase(Reg);
3407a6dacacSDimitry Andric
3417a6dacacSDimitry Andric for (MachineBasicBlock &MBB : MF) {
3427a6dacacSDimitry Andric MBB.removeLiveIn(Reg);
3437a6dacacSDimitry Andric MBB.sortUniqueLiveIns();
3447a6dacacSDimitry Andric }
345*0fca6ea1SDimitry Andric
346*0fca6ea1SDimitry Andric Reg = NewReg;
3477a6dacacSDimitry Andric }
3487a6dacacSDimitry Andric }
3497a6dacacSDimitry Andric
allocateVirtualVGPRForSGPRSpills(MachineFunction & MF,int FI,unsigned LaneIndex)3505f757f3fSDimitry Andric bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
3515f757f3fSDimitry Andric MachineFunction &MF, int FI, unsigned LaneIndex) {
352bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
353bdd1243dSDimitry Andric Register LaneVGPR;
354bdd1243dSDimitry Andric if (!LaneIndex) {
3555f757f3fSDimitry Andric LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
356bdd1243dSDimitry Andric SpillVGPRs.push_back(LaneVGPR);
357bdd1243dSDimitry Andric } else {
358bdd1243dSDimitry Andric LaneVGPR = SpillVGPRs.back();
359bdd1243dSDimitry Andric }
360bdd1243dSDimitry Andric
361*0fca6ea1SDimitry Andric SGPRSpillsToVirtualVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
362bdd1243dSDimitry Andric return true;
363bdd1243dSDimitry Andric }
364bdd1243dSDimitry Andric
allocatePhysicalVGPRForSGPRSpills(MachineFunction & MF,int FI,unsigned LaneIndex,bool IsPrologEpilog)3655f757f3fSDimitry Andric bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
3667a6dacacSDimitry Andric MachineFunction &MF, int FI, unsigned LaneIndex, bool IsPrologEpilog) {
367bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
368bdd1243dSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
369bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
370bdd1243dSDimitry Andric Register LaneVGPR;
371bdd1243dSDimitry Andric if (!LaneIndex) {
3727a6dacacSDimitry Andric // Find the highest available register if called before RA to ensure the
3737a6dacacSDimitry Andric // lowest registers are available for allocation. The LaneVGPR, in that
3747a6dacacSDimitry Andric // case, will be shifted back to the lowest range after VGPR allocation.
3757a6dacacSDimitry Andric LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF,
3767a6dacacSDimitry Andric !IsPrologEpilog);
377bdd1243dSDimitry Andric if (LaneVGPR == AMDGPU::NoRegister) {
378bdd1243dSDimitry Andric // We have no VGPRs left for spilling SGPRs. Reset because we will not
379bdd1243dSDimitry Andric // partially spill the SGPR to VGPRs.
3805f757f3fSDimitry Andric SGPRSpillsToPhysicalVGPRLanes.erase(FI);
381bdd1243dSDimitry Andric return false;
382bdd1243dSDimitry Andric }
383bdd1243dSDimitry Andric
384bdd1243dSDimitry Andric allocateWWMSpill(MF, LaneVGPR);
3855f757f3fSDimitry Andric reserveWWMRegister(LaneVGPR);
3865f757f3fSDimitry Andric for (MachineBasicBlock &MBB : MF) {
3875f757f3fSDimitry Andric MBB.addLiveIn(LaneVGPR);
3885f757f3fSDimitry Andric MBB.sortUniqueLiveIns();
3895f757f3fSDimitry Andric }
3905f757f3fSDimitry Andric SpillPhysVGPRs.push_back(LaneVGPR);
391bdd1243dSDimitry Andric } else {
3925f757f3fSDimitry Andric LaneVGPR = SpillPhysVGPRs.back();
393bdd1243dSDimitry Andric }
394bdd1243dSDimitry Andric
395*0fca6ea1SDimitry Andric SGPRSpillsToPhysicalVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
396bdd1243dSDimitry Andric return true;
397bdd1243dSDimitry Andric }
398bdd1243dSDimitry Andric
allocateSGPRSpillToVGPRLane(MachineFunction & MF,int FI,bool SpillToPhysVGPRLane,bool IsPrologEpilog)3997a6dacacSDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(
4007a6dacacSDimitry Andric MachineFunction &MF, int FI, bool SpillToPhysVGPRLane,
401bdd1243dSDimitry Andric bool IsPrologEpilog) {
402bdd1243dSDimitry Andric std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
4037a6dacacSDimitry Andric SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]
4045f757f3fSDimitry Andric : SGPRSpillsToVirtualVGPRLanes[FI];
4050b57cec5SDimitry Andric
4060b57cec5SDimitry Andric // This has already been allocated.
4070b57cec5SDimitry Andric if (!SpillLanes.empty())
4080b57cec5SDimitry Andric return true;
4090b57cec5SDimitry Andric
4100b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
4110b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
4120b57cec5SDimitry Andric unsigned WaveSize = ST.getWavefrontSize();
4130b57cec5SDimitry Andric
4140b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI);
4155ffd83dbSDimitry Andric unsigned NumLanes = Size / 4;
4160b57cec5SDimitry Andric
4175ffd83dbSDimitry Andric if (NumLanes > WaveSize)
4185ffd83dbSDimitry Andric return false;
4195ffd83dbSDimitry Andric
4205ffd83dbSDimitry Andric assert(Size >= 4 && "invalid sgpr spill size");
421bdd1243dSDimitry Andric assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
422bdd1243dSDimitry Andric "not spilling SGPRs to VGPRs");
4230b57cec5SDimitry Andric
4247a6dacacSDimitry Andric unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes
4257a6dacacSDimitry Andric : NumVirtualVGPRSpillLanes;
4260b57cec5SDimitry Andric
427bdd1243dSDimitry Andric for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
428bdd1243dSDimitry Andric unsigned LaneIndex = (NumSpillLanes % WaveSize);
429fe6060f1SDimitry Andric
4307a6dacacSDimitry Andric bool Allocated = SpillToPhysVGPRLane
4317a6dacacSDimitry Andric ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,
4327a6dacacSDimitry Andric IsPrologEpilog)
4335f757f3fSDimitry Andric : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
434bdd1243dSDimitry Andric if (!Allocated) {
435bdd1243dSDimitry Andric NumSpillLanes -= I;
4360b57cec5SDimitry Andric return false;
4370b57cec5SDimitry Andric }
4380b57cec5SDimitry Andric }
4390b57cec5SDimitry Andric
4400b57cec5SDimitry Andric return true;
4410b57cec5SDimitry Andric }
4420b57cec5SDimitry Andric
4430b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
4440b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
4450b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
allocateVGPRSpillToAGPR(MachineFunction & MF,int FI,bool isAGPRtoVGPR)4460b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
4470b57cec5SDimitry Andric int FI,
4480b57cec5SDimitry Andric bool isAGPRtoVGPR) {
4490b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
4500b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
4510b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
4520b57cec5SDimitry Andric
4530b57cec5SDimitry Andric assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
4540b57cec5SDimitry Andric
4550b57cec5SDimitry Andric auto &Spill = VGPRToAGPRSpills[FI];
4560b57cec5SDimitry Andric
4570b57cec5SDimitry Andric // This has already been allocated.
4580b57cec5SDimitry Andric if (!Spill.Lanes.empty())
4590b57cec5SDimitry Andric return Spill.FullyAllocated;
4600b57cec5SDimitry Andric
4610b57cec5SDimitry Andric unsigned Size = FrameInfo.getObjectSize(FI);
4620b57cec5SDimitry Andric unsigned NumLanes = Size / 4;
4630b57cec5SDimitry Andric Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
4640b57cec5SDimitry Andric
4650b57cec5SDimitry Andric const TargetRegisterClass &RC =
4660b57cec5SDimitry Andric isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
4670b57cec5SDimitry Andric auto Regs = RC.getRegisters();
4680b57cec5SDimitry Andric
4690b57cec5SDimitry Andric auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
4700b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
4710b57cec5SDimitry Andric Spill.FullyAllocated = true;
4720b57cec5SDimitry Andric
4730b57cec5SDimitry Andric // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
4740b57cec5SDimitry Andric // once.
4750b57cec5SDimitry Andric BitVector OtherUsedRegs;
4760b57cec5SDimitry Andric OtherUsedRegs.resize(TRI->getNumRegs());
4770b57cec5SDimitry Andric
4780b57cec5SDimitry Andric const uint32_t *CSRMask =
4790b57cec5SDimitry Andric TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
4800b57cec5SDimitry Andric if (CSRMask)
4810b57cec5SDimitry Andric OtherUsedRegs.setBitsInMask(CSRMask);
4820b57cec5SDimitry Andric
4830b57cec5SDimitry Andric // TODO: Should include register tuples, but doesn't matter with current
4840b57cec5SDimitry Andric // usage.
4850b57cec5SDimitry Andric for (MCPhysReg Reg : SpillAGPR)
4860b57cec5SDimitry Andric OtherUsedRegs.set(Reg);
4870b57cec5SDimitry Andric for (MCPhysReg Reg : SpillVGPR)
4880b57cec5SDimitry Andric OtherUsedRegs.set(Reg);
4890b57cec5SDimitry Andric
4900b57cec5SDimitry Andric SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
491349cc55cSDimitry Andric for (int I = NumLanes - 1; I >= 0; --I) {
4920b57cec5SDimitry Andric NextSpillReg = std::find_if(
4930b57cec5SDimitry Andric NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
4940b57cec5SDimitry Andric return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
4950b57cec5SDimitry Andric !OtherUsedRegs[Reg];
4960b57cec5SDimitry Andric });
4970b57cec5SDimitry Andric
4980b57cec5SDimitry Andric if (NextSpillReg == Regs.end()) { // Registers exhausted
4990b57cec5SDimitry Andric Spill.FullyAllocated = false;
5000b57cec5SDimitry Andric break;
5010b57cec5SDimitry Andric }
5020b57cec5SDimitry Andric
5030b57cec5SDimitry Andric OtherUsedRegs.set(*NextSpillReg);
5040b57cec5SDimitry Andric SpillRegs.push_back(*NextSpillReg);
505bdd1243dSDimitry Andric MRI.reserveReg(*NextSpillReg, TRI);
5060b57cec5SDimitry Andric Spill.Lanes[I] = *NextSpillReg++;
5070b57cec5SDimitry Andric }
5080b57cec5SDimitry Andric
5090b57cec5SDimitry Andric return Spill.FullyAllocated;
5100b57cec5SDimitry Andric }
5110b57cec5SDimitry Andric
removeDeadFrameIndices(MachineFrameInfo & MFI,bool ResetSGPRSpillStackIDs)51281ad6265SDimitry Andric bool SIMachineFunctionInfo::removeDeadFrameIndices(
51381ad6265SDimitry Andric MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
5145f757f3fSDimitry Andric // Remove dead frame indices from function frame, however keep FP & BP since
5155f757f3fSDimitry Andric // spills for them haven't been inserted yet. And also make sure to remove the
5165f757f3fSDimitry Andric // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
5175f757f3fSDimitry Andric // otherwise, it could result in an unexpected side effect and bug, in case of
5185f757f3fSDimitry Andric // any re-mapping of freed frame indices by later pass(es) like "stack slot
519bdd1243dSDimitry Andric // coloring".
5205f757f3fSDimitry Andric for (auto &R : make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) {
5210b57cec5SDimitry Andric MFI.RemoveStackObject(R.first);
5225f757f3fSDimitry Andric SGPRSpillsToVirtualVGPRLanes.erase(R.first);
5230b57cec5SDimitry Andric }
5240b57cec5SDimitry Andric
5255f757f3fSDimitry Andric // Remove the dead frame indices of CSR SGPRs which are spilled to physical
5265f757f3fSDimitry Andric // VGPR lanes during SILowerSGPRSpills pass.
5275f757f3fSDimitry Andric if (!ResetSGPRSpillStackIDs) {
5285f757f3fSDimitry Andric for (auto &R : make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) {
5295f757f3fSDimitry Andric MFI.RemoveStackObject(R.first);
5305f757f3fSDimitry Andric SGPRSpillsToPhysicalVGPRLanes.erase(R.first);
5315f757f3fSDimitry Andric }
5325f757f3fSDimitry Andric }
53381ad6265SDimitry Andric bool HaveSGPRToMemory = false;
53481ad6265SDimitry Andric
53581ad6265SDimitry Andric if (ResetSGPRSpillStackIDs) {
536bdd1243dSDimitry Andric // All other SGPRs must be allocated on the default stack, so reset the
53781ad6265SDimitry Andric // stack ID.
538bdd1243dSDimitry Andric for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;
539bdd1243dSDimitry Andric ++I) {
540bdd1243dSDimitry Andric if (!checkIndexInPrologEpilogSGPRSpills(I)) {
541bdd1243dSDimitry Andric if (MFI.getStackID(I) == TargetStackID::SGPRSpill) {
542bdd1243dSDimitry Andric MFI.setStackID(I, TargetStackID::Default);
54381ad6265SDimitry Andric HaveSGPRToMemory = true;
54481ad6265SDimitry Andric }
54581ad6265SDimitry Andric }
54681ad6265SDimitry Andric }
54781ad6265SDimitry Andric }
5480b57cec5SDimitry Andric
5490b57cec5SDimitry Andric for (auto &R : VGPRToAGPRSpills) {
5500eae32dcSDimitry Andric if (R.second.IsDead)
5510b57cec5SDimitry Andric MFI.RemoveStackObject(R.first);
5520b57cec5SDimitry Andric }
55381ad6265SDimitry Andric
55481ad6265SDimitry Andric return HaveSGPRToMemory;
55581ad6265SDimitry Andric }
55681ad6265SDimitry Andric
getScavengeFI(MachineFrameInfo & MFI,const SIRegisterInfo & TRI)557fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
558fe6060f1SDimitry Andric const SIRegisterInfo &TRI) {
559fe6060f1SDimitry Andric if (ScavengeFI)
560fe6060f1SDimitry Andric return *ScavengeFI;
561*0fca6ea1SDimitry Andric
562*0fca6ea1SDimitry Andric ScavengeFI =
563*0fca6ea1SDimitry Andric MFI.CreateStackObject(TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
564fe6060f1SDimitry Andric TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
565fe6060f1SDimitry Andric return *ScavengeFI;
566fe6060f1SDimitry Andric }
567fe6060f1SDimitry Andric
getNextUserSGPR() const5680b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
5690b57cec5SDimitry Andric assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
5700b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs;
5710b57cec5SDimitry Andric }
5720b57cec5SDimitry Andric
getNextSystemSGPR() const5730b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
5740b57cec5SDimitry Andric return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
5750b57cec5SDimitry Andric }
5760b57cec5SDimitry Andric
MRI_NoteNewVirtualRegister(Register Reg)57706c3fb27SDimitry Andric void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) {
57806c3fb27SDimitry Andric VRegFlags.grow(Reg);
57906c3fb27SDimitry Andric }
58006c3fb27SDimitry Andric
MRI_NoteCloneVirtualRegister(Register NewReg,Register SrcReg)58106c3fb27SDimitry Andric void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg,
58206c3fb27SDimitry Andric Register SrcReg) {
58306c3fb27SDimitry Andric VRegFlags.grow(NewReg);
58406c3fb27SDimitry Andric VRegFlags[NewReg] = VRegFlags[SrcReg];
58506c3fb27SDimitry Andric }
58606c3fb27SDimitry Andric
5875ffd83dbSDimitry Andric Register
getGITPtrLoReg(const MachineFunction & MF) const5885ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
5895ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
5905ffd83dbSDimitry Andric if (!ST.isAmdPalOS())
5915ffd83dbSDimitry Andric return Register();
5925ffd83dbSDimitry Andric Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
5935ffd83dbSDimitry Andric if (ST.hasMergedShaders()) {
5945ffd83dbSDimitry Andric switch (MF.getFunction().getCallingConv()) {
5955ffd83dbSDimitry Andric case CallingConv::AMDGPU_HS:
5965ffd83dbSDimitry Andric case CallingConv::AMDGPU_GS:
5975ffd83dbSDimitry Andric // Low GIT address is passed in s8 rather than s0 for an LS+HS or
5985ffd83dbSDimitry Andric // ES+GS merged shader on gfx9+.
5995ffd83dbSDimitry Andric GitPtrLo = AMDGPU::SGPR8;
6005ffd83dbSDimitry Andric return GitPtrLo;
6015ffd83dbSDimitry Andric default:
6025ffd83dbSDimitry Andric return GitPtrLo;
6035ffd83dbSDimitry Andric }
6045ffd83dbSDimitry Andric }
6055ffd83dbSDimitry Andric return GitPtrLo;
6065ffd83dbSDimitry Andric }
6075ffd83dbSDimitry Andric
regToString(Register Reg,const TargetRegisterInfo & TRI)6085ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg,
6090b57cec5SDimitry Andric const TargetRegisterInfo &TRI) {
6100b57cec5SDimitry Andric yaml::StringValue Dest;
6110b57cec5SDimitry Andric {
6120b57cec5SDimitry Andric raw_string_ostream OS(Dest.Value);
6130b57cec5SDimitry Andric OS << printReg(Reg, &TRI);
6140b57cec5SDimitry Andric }
6150b57cec5SDimitry Andric return Dest;
6160b57cec5SDimitry Andric }
6170b57cec5SDimitry Andric
618bdd1243dSDimitry Andric static std::optional<yaml::SIArgumentInfo>
convertArgumentInfo(const AMDGPUFunctionArgInfo & ArgInfo,const TargetRegisterInfo & TRI)6190b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
6200b57cec5SDimitry Andric const TargetRegisterInfo &TRI) {
6210b57cec5SDimitry Andric yaml::SIArgumentInfo AI;
6220b57cec5SDimitry Andric
623bdd1243dSDimitry Andric auto convertArg = [&](std::optional<yaml::SIArgument> &A,
6240b57cec5SDimitry Andric const ArgDescriptor &Arg) {
6250b57cec5SDimitry Andric if (!Arg)
6260b57cec5SDimitry Andric return false;
6270b57cec5SDimitry Andric
6280b57cec5SDimitry Andric // Create a register or stack argument.
6290b57cec5SDimitry Andric yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
6300b57cec5SDimitry Andric if (Arg.isRegister()) {
6310b57cec5SDimitry Andric raw_string_ostream OS(SA.RegisterName.Value);
6320b57cec5SDimitry Andric OS << printReg(Arg.getRegister(), &TRI);
6330b57cec5SDimitry Andric } else
6340b57cec5SDimitry Andric SA.StackOffset = Arg.getStackOffset();
6350b57cec5SDimitry Andric // Check and update the optional mask.
6360b57cec5SDimitry Andric if (Arg.isMasked())
6370b57cec5SDimitry Andric SA.Mask = Arg.getMask();
6380b57cec5SDimitry Andric
6390b57cec5SDimitry Andric A = SA;
6400b57cec5SDimitry Andric return true;
6410b57cec5SDimitry Andric };
6420b57cec5SDimitry Andric
6435f757f3fSDimitry Andric // TODO: Need to serialize kernarg preloads.
6440b57cec5SDimitry Andric bool Any = false;
6450b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
6460b57cec5SDimitry Andric Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
6470b57cec5SDimitry Andric Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
6480b57cec5SDimitry Andric Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
6490b57cec5SDimitry Andric Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
6500b57cec5SDimitry Andric Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
651fcaf7f86SDimitry Andric Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);
6520b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
6530b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
6540b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
6550b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
6560b57cec5SDimitry Andric Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
6570b57cec5SDimitry Andric Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
6580b57cec5SDimitry Andric ArgInfo.PrivateSegmentWaveByteOffset);
6590b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
6600b57cec5SDimitry Andric Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
6610b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
6620b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
6630b57cec5SDimitry Andric Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
6640b57cec5SDimitry Andric
6650b57cec5SDimitry Andric if (Any)
6660b57cec5SDimitry Andric return AI;
6670b57cec5SDimitry Andric
668bdd1243dSDimitry Andric return std::nullopt;
6690b57cec5SDimitry Andric }
6700b57cec5SDimitry Andric
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo & MFI,const TargetRegisterInfo & TRI,const llvm::MachineFunction & MF)6710b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
672fe6060f1SDimitry Andric const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
673fe6060f1SDimitry Andric const llvm::MachineFunction &MF)
6740b57cec5SDimitry Andric : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
675e8d8bef9SDimitry Andric MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
67681ad6265SDimitry Andric GDSSize(MFI.getGDSSize()),
677e8d8bef9SDimitry Andric DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
6780b57cec5SDimitry Andric NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
679e8d8bef9SDimitry Andric MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
680e8d8bef9SDimitry Andric HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
681e8d8bef9SDimitry Andric HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
6828bcb0991SDimitry Andric HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
683e8d8bef9SDimitry Andric Occupancy(MFI.getOccupancy()),
6840b57cec5SDimitry Andric ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
6850b57cec5SDimitry Andric FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
6860b57cec5SDimitry Andric StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
68781ad6265SDimitry Andric BytesInStackArgArea(MFI.getBytesInStackArgArea()),
68881ad6265SDimitry Andric ReturnsVoid(MFI.returnsVoid()),
68906c3fb27SDimitry Andric ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
69006c3fb27SDimitry Andric PSInputAddr(MFI.getPSInputAddr()),
69106c3fb27SDimitry Andric PSInputEnable(MFI.getPSInputEnable()),
69206c3fb27SDimitry Andric Mode(MFI.getMode()) {
693bdd1243dSDimitry Andric for (Register Reg : MFI.getWWMReservedRegs())
69481ad6265SDimitry Andric WWMReservedRegs.push_back(regToString(Reg, TRI));
69581ad6265SDimitry Andric
69606c3fb27SDimitry Andric if (MFI.getLongBranchReservedReg())
69706c3fb27SDimitry Andric LongBranchReservedReg = regToString(MFI.getLongBranchReservedReg(), TRI);
69881ad6265SDimitry Andric if (MFI.getVGPRForAGPRCopy())
69981ad6265SDimitry Andric VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
70006c3fb27SDimitry Andric
70106c3fb27SDimitry Andric if (MFI.getSGPRForEXECCopy())
70206c3fb27SDimitry Andric SGPRForEXECCopy = regToString(MFI.getSGPRForEXECCopy(), TRI);
70306c3fb27SDimitry Andric
704fe6060f1SDimitry Andric auto SFI = MFI.getOptionalScavengeFI();
705fe6060f1SDimitry Andric if (SFI)
706fe6060f1SDimitry Andric ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
707e8d8bef9SDimitry Andric }
7080b57cec5SDimitry Andric
mappingImpl(yaml::IO & YamlIO)7090b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
7100b57cec5SDimitry Andric MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
7110b57cec5SDimitry Andric }
7120b57cec5SDimitry Andric
initializeBaseYamlFields(const yaml::SIMachineFunctionInfo & YamlMFI,const MachineFunction & MF,PerFunctionMIParsingState & PFS,SMDiagnostic & Error,SMRange & SourceRange)7130b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
714fe6060f1SDimitry Andric const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
715fe6060f1SDimitry Andric PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
7160b57cec5SDimitry Andric ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
71781ad6265SDimitry Andric MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
7180b57cec5SDimitry Andric LDSSize = YamlMFI.LDSSize;
71981ad6265SDimitry Andric GDSSize = YamlMFI.GDSSize;
720e8d8bef9SDimitry Andric DynLDSAlign = YamlMFI.DynLDSAlign;
72106c3fb27SDimitry Andric PSInputAddr = YamlMFI.PSInputAddr;
72206c3fb27SDimitry Andric PSInputEnable = YamlMFI.PSInputEnable;
7238bcb0991SDimitry Andric HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
724e8d8bef9SDimitry Andric Occupancy = YamlMFI.Occupancy;
7250b57cec5SDimitry Andric IsEntryFunction = YamlMFI.IsEntryFunction;
7260b57cec5SDimitry Andric NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
7270b57cec5SDimitry Andric MemoryBound = YamlMFI.MemoryBound;
7280b57cec5SDimitry Andric WaveLimiter = YamlMFI.WaveLimiter;
729e8d8bef9SDimitry Andric HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
730e8d8bef9SDimitry Andric HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
73181ad6265SDimitry Andric BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
73281ad6265SDimitry Andric ReturnsVoid = YamlMFI.ReturnsVoid;
733fe6060f1SDimitry Andric
734fe6060f1SDimitry Andric if (YamlMFI.ScavengeFI) {
735fe6060f1SDimitry Andric auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
736fe6060f1SDimitry Andric if (!FIOrErr) {
737fe6060f1SDimitry Andric // Create a diagnostic for a the frame index.
738fe6060f1SDimitry Andric const MemoryBuffer &Buffer =
739fe6060f1SDimitry Andric *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
740fe6060f1SDimitry Andric
741fe6060f1SDimitry Andric Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
742fe6060f1SDimitry Andric SourceMgr::DK_Error, toString(FIOrErr.takeError()),
743bdd1243dSDimitry Andric "", std::nullopt, std::nullopt);
744fe6060f1SDimitry Andric SourceRange = YamlMFI.ScavengeFI->SourceRange;
745fe6060f1SDimitry Andric return true;
746fe6060f1SDimitry Andric }
747fe6060f1SDimitry Andric ScavengeFI = *FIOrErr;
748fe6060f1SDimitry Andric } else {
749bdd1243dSDimitry Andric ScavengeFI = std::nullopt;
750fe6060f1SDimitry Andric }
7510b57cec5SDimitry Andric return false;
7520b57cec5SDimitry Andric }
7535ffd83dbSDimitry Andric
mayUseAGPRs(const Function & F) const754bdd1243dSDimitry Andric bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
755*0fca6ea1SDimitry Andric return !F.hasFnAttribute("amdgpu-no-agpr");
75681ad6265SDimitry Andric }
75781ad6265SDimitry Andric
usesAGPRs(const MachineFunction & MF) const758349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
759349cc55cSDimitry Andric if (UsesAGPRs)
760349cc55cSDimitry Andric return *UsesAGPRs;
761349cc55cSDimitry Andric
76281ad6265SDimitry Andric if (!mayNeedAGPRs()) {
76381ad6265SDimitry Andric UsesAGPRs = false;
76481ad6265SDimitry Andric return false;
76581ad6265SDimitry Andric }
76681ad6265SDimitry Andric
767349cc55cSDimitry Andric if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
768349cc55cSDimitry Andric MF.getFrameInfo().hasCalls()) {
769349cc55cSDimitry Andric UsesAGPRs = true;
770349cc55cSDimitry Andric return true;
771349cc55cSDimitry Andric }
772349cc55cSDimitry Andric
773349cc55cSDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo();
774349cc55cSDimitry Andric
775349cc55cSDimitry Andric for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
776349cc55cSDimitry Andric const Register Reg = Register::index2VirtReg(I);
777349cc55cSDimitry Andric const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
778349cc55cSDimitry Andric if (RC && SIRegisterInfo::isAGPRClass(RC)) {
779349cc55cSDimitry Andric UsesAGPRs = true;
780349cc55cSDimitry Andric return true;
781*0fca6ea1SDimitry Andric }
782*0fca6ea1SDimitry Andric if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
783349cc55cSDimitry Andric // Defer caching UsesAGPRs, function might not yet been regbank selected.
784349cc55cSDimitry Andric return true;
785349cc55cSDimitry Andric }
786349cc55cSDimitry Andric }
787349cc55cSDimitry Andric
788349cc55cSDimitry Andric for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
789349cc55cSDimitry Andric if (MRI.isPhysRegUsed(Reg)) {
790349cc55cSDimitry Andric UsesAGPRs = true;
791349cc55cSDimitry Andric return true;
792349cc55cSDimitry Andric }
793349cc55cSDimitry Andric }
794349cc55cSDimitry Andric
795349cc55cSDimitry Andric UsesAGPRs = false;
796349cc55cSDimitry Andric return false;
797349cc55cSDimitry Andric }
798