10b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "AMDGPUArgumentUsageInfo.h"
10e8d8bef9SDimitry Andric #include "AMDGPU.h"
115ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h"
125ffd83dbSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
130b57cec5SDimitry Andric #include "SIRegisterInfo.h"
14e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
15e8d8bef9SDimitry Andric #include "llvm/IR/Function.h"
160b57cec5SDimitry Andric #include "llvm/Support/NativeFormatting.h"
170b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
180b57cec5SDimitry Andric
190b57cec5SDimitry Andric using namespace llvm;
200b57cec5SDimitry Andric
210b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
220b57cec5SDimitry Andric
230b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUArgumentUsageInfo, DEBUG_TYPE,
240b57cec5SDimitry Andric "Argument Register Usage Information Storage", false, true)
250b57cec5SDimitry Andric
print(raw_ostream & OS,const TargetRegisterInfo * TRI) const260b57cec5SDimitry Andric void ArgDescriptor::print(raw_ostream &OS,
270b57cec5SDimitry Andric const TargetRegisterInfo *TRI) const {
280b57cec5SDimitry Andric if (!isSet()) {
290b57cec5SDimitry Andric OS << "<not set>\n";
300b57cec5SDimitry Andric return;
310b57cec5SDimitry Andric }
320b57cec5SDimitry Andric
330b57cec5SDimitry Andric if (isRegister())
340b57cec5SDimitry Andric OS << "Reg " << printReg(getRegister(), TRI);
350b57cec5SDimitry Andric else
360b57cec5SDimitry Andric OS << "Stack offset " << getStackOffset();
370b57cec5SDimitry Andric
380b57cec5SDimitry Andric if (isMasked()) {
390b57cec5SDimitry Andric OS << " & ";
400b57cec5SDimitry Andric llvm::write_hex(OS, Mask, llvm::HexPrintStyle::PrefixLower);
410b57cec5SDimitry Andric }
420b57cec5SDimitry Andric
430b57cec5SDimitry Andric OS << '\n';
440b57cec5SDimitry Andric }
450b57cec5SDimitry Andric
460b57cec5SDimitry Andric char AMDGPUArgumentUsageInfo::ID = 0;
470b57cec5SDimitry Andric
480b57cec5SDimitry Andric const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
490b57cec5SDimitry Andric
505ffd83dbSDimitry Andric // Hardcoded registers from fixed function ABI
515ffd83dbSDimitry Andric const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
525ffd83dbSDimitry Andric = AMDGPUFunctionArgInfo::fixedABILayout();
535ffd83dbSDimitry Andric
doInitialization(Module & M)540b57cec5SDimitry Andric bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) {
550b57cec5SDimitry Andric return false;
560b57cec5SDimitry Andric }
570b57cec5SDimitry Andric
doFinalization(Module & M)580b57cec5SDimitry Andric bool AMDGPUArgumentUsageInfo::doFinalization(Module &M) {
590b57cec5SDimitry Andric ArgInfoMap.clear();
600b57cec5SDimitry Andric return false;
610b57cec5SDimitry Andric }
620b57cec5SDimitry Andric
635f757f3fSDimitry Andric // TODO: Print preload kernargs?
print(raw_ostream & OS,const Module * M) const640b57cec5SDimitry Andric void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
650b57cec5SDimitry Andric for (const auto &FI : ArgInfoMap) {
660b57cec5SDimitry Andric OS << "Arguments for " << FI.first->getName() << '\n'
670b57cec5SDimitry Andric << " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
680b57cec5SDimitry Andric << " DispatchPtr: " << FI.second.DispatchPtr
690b57cec5SDimitry Andric << " QueuePtr: " << FI.second.QueuePtr
700b57cec5SDimitry Andric << " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
710b57cec5SDimitry Andric << " DispatchID: " << FI.second.DispatchID
720b57cec5SDimitry Andric << " FlatScratchInit: " << FI.second.FlatScratchInit
730b57cec5SDimitry Andric << " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
740b57cec5SDimitry Andric << " WorkGroupIDX: " << FI.second.WorkGroupIDX
750b57cec5SDimitry Andric << " WorkGroupIDY: " << FI.second.WorkGroupIDY
760b57cec5SDimitry Andric << " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
770b57cec5SDimitry Andric << " WorkGroupInfo: " << FI.second.WorkGroupInfo
78fcaf7f86SDimitry Andric << " LDSKernelId: " << FI.second.LDSKernelId
790b57cec5SDimitry Andric << " PrivateSegmentWaveByteOffset: "
800b57cec5SDimitry Andric << FI.second.PrivateSegmentWaveByteOffset
810b57cec5SDimitry Andric << " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
820b57cec5SDimitry Andric << " ImplicitArgPtr: " << FI.second.ImplicitArgPtr
830b57cec5SDimitry Andric << " WorkItemIDX " << FI.second.WorkItemIDX
840b57cec5SDimitry Andric << " WorkItemIDY " << FI.second.WorkItemIDY
850b57cec5SDimitry Andric << " WorkItemIDZ " << FI.second.WorkItemIDZ
860b57cec5SDimitry Andric << '\n';
870b57cec5SDimitry Andric }
880b57cec5SDimitry Andric }
890b57cec5SDimitry Andric
905ffd83dbSDimitry Andric std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const910b57cec5SDimitry Andric AMDGPUFunctionArgInfo::getPreloadedValue(
920b57cec5SDimitry Andric AMDGPUFunctionArgInfo::PreloadedValue Value) const {
930b57cec5SDimitry Andric switch (Value) {
940b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
95bdd1243dSDimitry Andric return std::tuple(PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
96fe6060f1SDimitry Andric &AMDGPU::SGPR_128RegClass, LLT::fixed_vector(4, 32));
970b57cec5SDimitry Andric }
980b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
99bdd1243dSDimitry Andric return std::tuple(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
1005ffd83dbSDimitry Andric &AMDGPU::SGPR_64RegClass,
1015ffd83dbSDimitry Andric LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
1020b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
103bdd1243dSDimitry Andric return std::tuple(WorkGroupIDX ? &WorkGroupIDX : nullptr,
1045ffd83dbSDimitry Andric &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
1050b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
106bdd1243dSDimitry Andric return std::tuple(WorkGroupIDY ? &WorkGroupIDY : nullptr,
1075ffd83dbSDimitry Andric &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
1080b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
109bdd1243dSDimitry Andric return std::tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
1105ffd83dbSDimitry Andric &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
111fcaf7f86SDimitry Andric case AMDGPUFunctionArgInfo::LDS_KERNEL_ID:
112bdd1243dSDimitry Andric return std::tuple(LDSKernelId ? &LDSKernelId : nullptr,
113fcaf7f86SDimitry Andric &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
1140b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
115bdd1243dSDimitry Andric return std::tuple(
1160b57cec5SDimitry Andric PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
1175ffd83dbSDimitry Andric &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
118*0fca6ea1SDimitry Andric case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_SIZE:
119*0fca6ea1SDimitry Andric return {PrivateSegmentSize ? &PrivateSegmentSize : nullptr,
120*0fca6ea1SDimitry Andric &AMDGPU::SGPR_32RegClass, LLT::scalar(32)};
1210b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
122bdd1243dSDimitry Andric return std::tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
1235ffd83dbSDimitry Andric &AMDGPU::SGPR_64RegClass,
1245ffd83dbSDimitry Andric LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
1250b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR:
126bdd1243dSDimitry Andric return std::tuple(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
1275ffd83dbSDimitry Andric &AMDGPU::SGPR_64RegClass,
1285ffd83dbSDimitry Andric LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
1290b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::DISPATCH_ID:
130bdd1243dSDimitry Andric return std::tuple(DispatchID ? &DispatchID : nullptr,
1315ffd83dbSDimitry Andric &AMDGPU::SGPR_64RegClass, LLT::scalar(64));
1320b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
133bdd1243dSDimitry Andric return std::tuple(FlatScratchInit ? &FlatScratchInit : nullptr,
1345ffd83dbSDimitry Andric &AMDGPU::SGPR_64RegClass, LLT::scalar(64));
1350b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::DISPATCH_PTR:
136bdd1243dSDimitry Andric return std::tuple(DispatchPtr ? &DispatchPtr : nullptr,
1375ffd83dbSDimitry Andric &AMDGPU::SGPR_64RegClass,
1385ffd83dbSDimitry Andric LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
1390b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::QUEUE_PTR:
140bdd1243dSDimitry Andric return std::tuple(QueuePtr ? &QueuePtr : nullptr, &AMDGPU::SGPR_64RegClass,
1415ffd83dbSDimitry Andric LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
1420b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
143bdd1243dSDimitry Andric return std::tuple(WorkItemIDX ? &WorkItemIDX : nullptr,
1445ffd83dbSDimitry Andric &AMDGPU::VGPR_32RegClass, LLT::scalar(32));
1450b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
146bdd1243dSDimitry Andric return std::tuple(WorkItemIDY ? &WorkItemIDY : nullptr,
1475ffd83dbSDimitry Andric &AMDGPU::VGPR_32RegClass, LLT::scalar(32));
1480b57cec5SDimitry Andric case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
149bdd1243dSDimitry Andric return std::tuple(WorkItemIDZ ? &WorkItemIDZ : nullptr,
1505ffd83dbSDimitry Andric &AMDGPU::VGPR_32RegClass, LLT::scalar(32));
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric llvm_unreachable("unexpected preloaded value type");
1530b57cec5SDimitry Andric }
1545ffd83dbSDimitry Andric
fixedABILayout()1555f757f3fSDimitry Andric AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
1565ffd83dbSDimitry Andric AMDGPUFunctionArgInfo AI;
1575ffd83dbSDimitry Andric AI.PrivateSegmentBuffer
1585ffd83dbSDimitry Andric = ArgDescriptor::createRegister(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3);
1595ffd83dbSDimitry Andric AI.DispatchPtr = ArgDescriptor::createRegister(AMDGPU::SGPR4_SGPR5);
1605ffd83dbSDimitry Andric AI.QueuePtr = ArgDescriptor::createRegister(AMDGPU::SGPR6_SGPR7);
1615ffd83dbSDimitry Andric
1625ffd83dbSDimitry Andric // Do not pass kernarg segment pointer, only pass increment version in its
1635ffd83dbSDimitry Andric // place.
1645ffd83dbSDimitry Andric AI.ImplicitArgPtr = ArgDescriptor::createRegister(AMDGPU::SGPR8_SGPR9);
1655ffd83dbSDimitry Andric AI.DispatchID = ArgDescriptor::createRegister(AMDGPU::SGPR10_SGPR11);
1665ffd83dbSDimitry Andric
1675ffd83dbSDimitry Andric // Skip FlatScratchInit/PrivateSegmentSize
1685ffd83dbSDimitry Andric AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12);
1695ffd83dbSDimitry Andric AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13);
1705ffd83dbSDimitry Andric AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14);
171fcaf7f86SDimitry Andric AI.LDSKernelId = ArgDescriptor::createRegister(AMDGPU::SGPR15);
1725ffd83dbSDimitry Andric
1735ffd83dbSDimitry Andric const unsigned Mask = 0x3ff;
1745ffd83dbSDimitry Andric AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask);
1755ffd83dbSDimitry Andric AI.WorkItemIDY = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 10);
1765ffd83dbSDimitry Andric AI.WorkItemIDZ = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 20);
1775ffd83dbSDimitry Andric return AI;
1785ffd83dbSDimitry Andric }
1795ffd83dbSDimitry Andric
1805ffd83dbSDimitry Andric const AMDGPUFunctionArgInfo &
lookupFuncArgInfo(const Function & F) const1815ffd83dbSDimitry Andric AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
1825ffd83dbSDimitry Andric auto I = ArgInfoMap.find(&F);
1830eae32dcSDimitry Andric if (I == ArgInfoMap.end())
1845ffd83dbSDimitry Andric return FixedABIFunctionInfo;
1855ffd83dbSDimitry Andric return I->second;
1865ffd83dbSDimitry Andric }
187