10b57cec5SDimitry Andric //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file provides AMDGPU specific target streamer methods.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "AMDGPUTargetStreamer.h"
14*0fca6ea1SDimitry Andric #include "AMDGPUMCKernelDescriptor.h"
15e8d8bef9SDimitry Andric #include "AMDGPUPTNote.h"
160b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
170b57cec5SDimitry Andric #include "Utils/AMDKernelCodeTUtils.h"
180b57cec5SDimitry Andric #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
190b57cec5SDimitry Andric #include "llvm/BinaryFormat/ELF.h"
2081ad6265SDimitry Andric #include "llvm/MC/MCAssembler.h"
210b57cec5SDimitry Andric #include "llvm/MC/MCContext.h"
220b57cec5SDimitry Andric #include "llvm/MC/MCELFStreamer.h"
237a6dacacSDimitry Andric #include "llvm/MC/MCObjectWriter.h"
240b57cec5SDimitry Andric #include "llvm/MC/MCSectionELF.h"
2581ad6265SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
26e8d8bef9SDimitry Andric #include "llvm/Support/AMDGPUMetadata.h"
27e8d8bef9SDimitry Andric #include "llvm/Support/AMDHSAKernelDescriptor.h"
2881ad6265SDimitry Andric #include "llvm/Support/Casting.h"
29*0fca6ea1SDimitry Andric #include "llvm/Support/CommandLine.h"
300b57cec5SDimitry Andric #include "llvm/Support/FormattedStream.h"
3106c3fb27SDimitry Andric #include "llvm/TargetParser/TargetParser.h"
320b57cec5SDimitry Andric
330b57cec5SDimitry Andric using namespace llvm;
340b57cec5SDimitry Andric using namespace llvm::AMDGPU;
350b57cec5SDimitry Andric
360b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
370b57cec5SDimitry Andric // AMDGPUTargetStreamer
380b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
390b57cec5SDimitry Andric
40*0fca6ea1SDimitry Andric static cl::opt<unsigned>
41*0fca6ea1SDimitry Andric ForceGenericVersion("amdgpu-force-generic-version",
42*0fca6ea1SDimitry Andric cl::desc("Force a specific generic_v<N> flag to be "
43*0fca6ea1SDimitry Andric "added. For testing purposes only."),
44*0fca6ea1SDimitry Andric cl::ReallyHidden, cl::init(0));
45*0fca6ea1SDimitry Andric
EmitHSAMetadataV3(StringRef HSAMetadataString)460b57cec5SDimitry Andric bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
470b57cec5SDimitry Andric msgpack::Document HSAMetadataDoc;
480b57cec5SDimitry Andric if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
490b57cec5SDimitry Andric return false;
500b57cec5SDimitry Andric return EmitHSAMetadata(HSAMetadataDoc, false);
510b57cec5SDimitry Andric }
520b57cec5SDimitry Andric
getArchNameFromElfMach(unsigned ElfMach)530b57cec5SDimitry Andric StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
540b57cec5SDimitry Andric AMDGPU::GPUKind AK;
550b57cec5SDimitry Andric
565f757f3fSDimitry Andric // clang-format off
570b57cec5SDimitry Andric switch (ElfMach) {
580b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
590b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
600b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
610b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
620b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
630b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
640b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
650b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
660b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
670b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
680b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
690b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
700b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
710b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
720b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
730b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
740b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
750b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
76e8d8bef9SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
770b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
780b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
790b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
800b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
810b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
82e8d8bef9SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
830b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
840b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
850b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
86e8d8bef9SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
870b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
880b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
890b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
900b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
910b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
920b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
930b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
94fe6060f1SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
95e8d8bef9SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
9681ad6265SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
9706c3fb27SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
9806c3fb27SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
990b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
1000b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
1010b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
102fe6060f1SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
1035ffd83dbSDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
104e8d8bef9SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
105e8d8bef9SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
106e8d8bef9SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
107fe6060f1SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
108fe6060f1SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
10981ad6265SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break;
11081ad6265SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break;
11181ad6265SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
11281ad6265SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
11381ad6265SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
11406c3fb27SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
11506c3fb27SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
116*0fca6ea1SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break;
1175f757f3fSDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
1185f757f3fSDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
119*0fca6ea1SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
120*0fca6ea1SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
121*0fca6ea1SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
122*0fca6ea1SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
123*0fca6ea1SDimitry Andric case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: AK = GK_GFX12_GENERIC; break;
1240b57cec5SDimitry Andric case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
1255f757f3fSDimitry Andric default: AK = GK_NONE; break;
1260b57cec5SDimitry Andric }
1275f757f3fSDimitry Andric // clang-format on
1280b57cec5SDimitry Andric
1290b57cec5SDimitry Andric StringRef GPUName = getArchNameAMDGCN(AK);
1300b57cec5SDimitry Andric if (GPUName != "")
1310b57cec5SDimitry Andric return GPUName;
1320b57cec5SDimitry Andric return getArchNameR600(AK);
1330b57cec5SDimitry Andric }
1340b57cec5SDimitry Andric
getElfMach(StringRef GPU)1350b57cec5SDimitry Andric unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
1360b57cec5SDimitry Andric AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
1370b57cec5SDimitry Andric if (AK == AMDGPU::GPUKind::GK_NONE)
1380b57cec5SDimitry Andric AK = parseArchR600(GPU);
1390b57cec5SDimitry Andric
1405f757f3fSDimitry Andric // clang-format off
1410b57cec5SDimitry Andric switch (AK) {
1420b57cec5SDimitry Andric case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
1430b57cec5SDimitry Andric case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
1440b57cec5SDimitry Andric case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
1450b57cec5SDimitry Andric case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
1460b57cec5SDimitry Andric case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
1470b57cec5SDimitry Andric case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
1480b57cec5SDimitry Andric case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
1490b57cec5SDimitry Andric case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
1500b57cec5SDimitry Andric case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
1510b57cec5SDimitry Andric case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
1520b57cec5SDimitry Andric case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
1530b57cec5SDimitry Andric case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
1540b57cec5SDimitry Andric case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
1550b57cec5SDimitry Andric case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
1560b57cec5SDimitry Andric case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
1570b57cec5SDimitry Andric case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
1580b57cec5SDimitry Andric case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
1590b57cec5SDimitry Andric case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
160e8d8bef9SDimitry Andric case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
1610b57cec5SDimitry Andric case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
1620b57cec5SDimitry Andric case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
1630b57cec5SDimitry Andric case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
1640b57cec5SDimitry Andric case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
1650b57cec5SDimitry Andric case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
166e8d8bef9SDimitry Andric case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
1670b57cec5SDimitry Andric case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
1680b57cec5SDimitry Andric case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
1690b57cec5SDimitry Andric case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
170e8d8bef9SDimitry Andric case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
1710b57cec5SDimitry Andric case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
1720b57cec5SDimitry Andric case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
1730b57cec5SDimitry Andric case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
1740b57cec5SDimitry Andric case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
1750b57cec5SDimitry Andric case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
1760b57cec5SDimitry Andric case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
1770b57cec5SDimitry Andric case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
178fe6060f1SDimitry Andric case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
179e8d8bef9SDimitry Andric case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
18081ad6265SDimitry Andric case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
18106c3fb27SDimitry Andric case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
18206c3fb27SDimitry Andric case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
1830b57cec5SDimitry Andric case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
1840b57cec5SDimitry Andric case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
1850b57cec5SDimitry Andric case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
186fe6060f1SDimitry Andric case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
1875ffd83dbSDimitry Andric case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
188e8d8bef9SDimitry Andric case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
189e8d8bef9SDimitry Andric case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
190e8d8bef9SDimitry Andric case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
191fe6060f1SDimitry Andric case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
192fe6060f1SDimitry Andric case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
19381ad6265SDimitry Andric case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
19481ad6265SDimitry Andric case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
19581ad6265SDimitry Andric case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
19681ad6265SDimitry Andric case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
19781ad6265SDimitry Andric case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
19806c3fb27SDimitry Andric case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
19906c3fb27SDimitry Andric case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
200*0fca6ea1SDimitry Andric case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152;
2015f757f3fSDimitry Andric case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
2025f757f3fSDimitry Andric case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
203*0fca6ea1SDimitry Andric case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
204*0fca6ea1SDimitry Andric case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
205*0fca6ea1SDimitry Andric case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
206*0fca6ea1SDimitry Andric case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
207*0fca6ea1SDimitry Andric case GK_GFX12_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC;
2080b57cec5SDimitry Andric case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
2090b57cec5SDimitry Andric }
2105f757f3fSDimitry Andric // clang-format on
2110b57cec5SDimitry Andric
2120b57cec5SDimitry Andric llvm_unreachable("unknown GPU");
2130b57cec5SDimitry Andric }
2140b57cec5SDimitry Andric
2150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2160b57cec5SDimitry Andric // AMDGPUTargetAsmStreamer
2170b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2180b57cec5SDimitry Andric
AMDGPUTargetAsmStreamer(MCStreamer & S,formatted_raw_ostream & OS)2190b57cec5SDimitry Andric AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
2200b57cec5SDimitry Andric formatted_raw_ostream &OS)
2210b57cec5SDimitry Andric : AMDGPUTargetStreamer(S), OS(OS) { }
2220b57cec5SDimitry Andric
2230b57cec5SDimitry Andric // A hook for emitting stuff at the end.
2240b57cec5SDimitry Andric // We use it for emitting the accumulated PAL metadata as directives.
225e8d8bef9SDimitry Andric // The PAL metadata is reset after it is emitted.
finish()2260b57cec5SDimitry Andric void AMDGPUTargetAsmStreamer::finish() {
2270b57cec5SDimitry Andric std::string S;
2280b57cec5SDimitry Andric getPALMetadata()->toString(S);
2290b57cec5SDimitry Andric OS << S;
230e8d8bef9SDimitry Andric
231e8d8bef9SDimitry Andric // Reset the pal metadata so its data will not affect a compilation that
232e8d8bef9SDimitry Andric // reuses this object.
233e8d8bef9SDimitry Andric getPALMetadata()->reset();
2340b57cec5SDimitry Andric }
2350b57cec5SDimitry Andric
EmitDirectiveAMDGCNTarget()236fe6060f1SDimitry Andric void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
237fe6060f1SDimitry Andric OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
2380b57cec5SDimitry Andric }
2390b57cec5SDimitry Andric
EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)2407a6dacacSDimitry Andric void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
2417a6dacacSDimitry Andric unsigned COV) {
2427a6dacacSDimitry Andric AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV);
2437a6dacacSDimitry Andric OS << "\t.amdhsa_code_object_version " << COV << '\n';
2440b57cec5SDimitry Andric }
2450b57cec5SDimitry Andric
EmitAMDKernelCodeT(AMDGPUMCKernelCodeT & Header)246*0fca6ea1SDimitry Andric void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
2470b57cec5SDimitry Andric OS << "\t.amd_kernel_code_t\n";
248*0fca6ea1SDimitry Andric Header.EmitKernelCodeT(OS, getContext());
2490b57cec5SDimitry Andric OS << "\t.end_amd_kernel_code_t\n";
2500b57cec5SDimitry Andric }
2510b57cec5SDimitry Andric
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)2520b57cec5SDimitry Andric void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
2530b57cec5SDimitry Andric unsigned Type) {
2540b57cec5SDimitry Andric switch (Type) {
2550b57cec5SDimitry Andric default: llvm_unreachable("Invalid AMDGPU symbol type");
2560b57cec5SDimitry Andric case ELF::STT_AMDGPU_HSA_KERNEL:
2570b57cec5SDimitry Andric OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
2580b57cec5SDimitry Andric break;
2590b57cec5SDimitry Andric }
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric
emitAMDGPULDS(MCSymbol * Symbol,unsigned Size,Align Alignment)2620b57cec5SDimitry Andric void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
2635ffd83dbSDimitry Andric Align Alignment) {
2645ffd83dbSDimitry Andric OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
2655ffd83dbSDimitry Andric << Alignment.value() << '\n';
2660b57cec5SDimitry Andric }
2670b57cec5SDimitry Andric
EmitISAVersion()268fe6060f1SDimitry Andric bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
269fe6060f1SDimitry Andric OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
2700b57cec5SDimitry Andric return true;
2710b57cec5SDimitry Andric }
2720b57cec5SDimitry Andric
EmitHSAMetadata(msgpack::Document & HSAMetadataDoc,bool Strict)2730b57cec5SDimitry Andric bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
2740b57cec5SDimitry Andric msgpack::Document &HSAMetadataDoc, bool Strict) {
275e8d8bef9SDimitry Andric HSAMD::V3::MetadataVerifier Verifier(Strict);
2760b57cec5SDimitry Andric if (!Verifier.verify(HSAMetadataDoc.getRoot()))
2770b57cec5SDimitry Andric return false;
2780b57cec5SDimitry Andric
2790b57cec5SDimitry Andric std::string HSAMetadataString;
2800b57cec5SDimitry Andric raw_string_ostream StrOS(HSAMetadataString);
2810b57cec5SDimitry Andric HSAMetadataDoc.toYAML(StrOS);
2820b57cec5SDimitry Andric
283e8d8bef9SDimitry Andric OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
2840b57cec5SDimitry Andric OS << StrOS.str() << '\n';
285e8d8bef9SDimitry Andric OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
2860b57cec5SDimitry Andric return true;
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric
EmitKernargPreloadHeader(const MCSubtargetInfo & STI,bool TrapEnabled)289*0fca6ea1SDimitry Andric bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
290*0fca6ea1SDimitry Andric const MCSubtargetInfo &STI, bool TrapEnabled) {
291*0fca6ea1SDimitry Andric OS << (TrapEnabled ? "\ts_trap 2" : "\ts_endpgm")
292*0fca6ea1SDimitry Andric << " ; Kernarg preload header. Trap with incompatible firmware that "
293*0fca6ea1SDimitry Andric "doesn't support preloading kernel arguments.\n";
294*0fca6ea1SDimitry Andric OS << "\t.fill 63, 4, 0xbf800000 ; s_nop 0\n";
295*0fca6ea1SDimitry Andric return true;
296*0fca6ea1SDimitry Andric }
297*0fca6ea1SDimitry Andric
EmitCodeEnd(const MCSubtargetInfo & STI)298fe6060f1SDimitry Andric bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
2990b57cec5SDimitry Andric const uint32_t Encoded_s_code_end = 0xbf9f0000;
300fe6060f1SDimitry Andric const uint32_t Encoded_s_nop = 0xbf800000;
301fe6060f1SDimitry Andric uint32_t Encoded_pad = Encoded_s_code_end;
302fe6060f1SDimitry Andric
303fe6060f1SDimitry Andric // Instruction cache line size in bytes.
30481ad6265SDimitry Andric const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
305fe6060f1SDimitry Andric const unsigned CacheLineSize = 1u << Log2CacheLineSize;
306fe6060f1SDimitry Andric
307fe6060f1SDimitry Andric // Extra padding amount in bytes to support prefetch mode 3.
308fe6060f1SDimitry Andric unsigned FillSize = 3 * CacheLineSize;
309fe6060f1SDimitry Andric
310fe6060f1SDimitry Andric if (AMDGPU::isGFX90A(STI)) {
311fe6060f1SDimitry Andric Encoded_pad = Encoded_s_nop;
312fe6060f1SDimitry Andric FillSize = 16 * CacheLineSize;
313fe6060f1SDimitry Andric }
314fe6060f1SDimitry Andric
315fe6060f1SDimitry Andric OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
316fe6060f1SDimitry Andric OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
3170b57cec5SDimitry Andric return true;
3180b57cec5SDimitry Andric }
3190b57cec5SDimitry Andric
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const MCKernelDescriptor & KD,const MCExpr * NextVGPR,const MCExpr * NextSGPR,const MCExpr * ReserveVCC,const MCExpr * ReserveFlatScr)3200b57cec5SDimitry Andric void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
3210b57cec5SDimitry Andric const MCSubtargetInfo &STI, StringRef KernelName,
322*0fca6ea1SDimitry Andric const MCKernelDescriptor &KD, const MCExpr *NextVGPR,
323*0fca6ea1SDimitry Andric const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
324*0fca6ea1SDimitry Andric const MCExpr *ReserveFlatScr) {
3250b57cec5SDimitry Andric IsaVersion IVersion = getIsaVersion(STI.getCPU());
326*0fca6ea1SDimitry Andric const MCAsmInfo *MAI = getContext().getAsmInfo();
3270b57cec5SDimitry Andric
3280b57cec5SDimitry Andric OS << "\t.amdhsa_kernel " << KernelName << '\n';
3290b57cec5SDimitry Andric
330*0fca6ea1SDimitry Andric auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
331*0fca6ea1SDimitry Andric StringRef Directive) {
332*0fca6ea1SDimitry Andric int64_t IVal;
333*0fca6ea1SDimitry Andric OS << "\t\t" << Directive << ' ';
334*0fca6ea1SDimitry Andric const MCExpr *pgm_rsrc1_bits =
335*0fca6ea1SDimitry Andric MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext());
336*0fca6ea1SDimitry Andric if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal))
337*0fca6ea1SDimitry Andric OS << static_cast<uint64_t>(IVal);
338*0fca6ea1SDimitry Andric else
339*0fca6ea1SDimitry Andric pgm_rsrc1_bits->print(OS, MAI);
340*0fca6ea1SDimitry Andric OS << '\n';
341*0fca6ea1SDimitry Andric };
3420b57cec5SDimitry Andric
343*0fca6ea1SDimitry Andric auto EmitMCExpr = [&](const MCExpr *Value) {
344*0fca6ea1SDimitry Andric int64_t evaluatableValue;
345*0fca6ea1SDimitry Andric if (Value->evaluateAsAbsolute(evaluatableValue)) {
346*0fca6ea1SDimitry Andric OS << static_cast<uint64_t>(evaluatableValue);
347*0fca6ea1SDimitry Andric } else {
348*0fca6ea1SDimitry Andric Value->print(OS, MAI);
3495f757f3fSDimitry Andric }
350*0fca6ea1SDimitry Andric };
351*0fca6ea1SDimitry Andric
352*0fca6ea1SDimitry Andric OS << "\t\t.amdhsa_group_segment_fixed_size ";
353*0fca6ea1SDimitry Andric EmitMCExpr(KD.group_segment_fixed_size);
354*0fca6ea1SDimitry Andric OS << '\n';
355*0fca6ea1SDimitry Andric
356*0fca6ea1SDimitry Andric OS << "\t\t.amdhsa_private_segment_fixed_size ";
357*0fca6ea1SDimitry Andric EmitMCExpr(KD.private_segment_fixed_size);
358*0fca6ea1SDimitry Andric OS << '\n';
359*0fca6ea1SDimitry Andric
360*0fca6ea1SDimitry Andric OS << "\t\t.amdhsa_kernarg_size ";
361*0fca6ea1SDimitry Andric EmitMCExpr(KD.kernarg_size);
362*0fca6ea1SDimitry Andric OS << '\n';
363*0fca6ea1SDimitry Andric
364*0fca6ea1SDimitry Andric PrintField(
365*0fca6ea1SDimitry Andric KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
366*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
367*0fca6ea1SDimitry Andric
368*0fca6ea1SDimitry Andric if (!hasArchitectedFlatScratch(STI))
369*0fca6ea1SDimitry Andric PrintField(
370*0fca6ea1SDimitry Andric KD.kernel_code_properties,
371*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
372*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
373*0fca6ea1SDimitry Andric ".amdhsa_user_sgpr_private_segment_buffer");
374*0fca6ea1SDimitry Andric PrintField(KD.kernel_code_properties,
375*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
376*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
377*0fca6ea1SDimitry Andric ".amdhsa_user_sgpr_dispatch_ptr");
378*0fca6ea1SDimitry Andric PrintField(KD.kernel_code_properties,
379*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
380*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
381*0fca6ea1SDimitry Andric ".amdhsa_user_sgpr_queue_ptr");
382*0fca6ea1SDimitry Andric PrintField(KD.kernel_code_properties,
383*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
384*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
385*0fca6ea1SDimitry Andric ".amdhsa_user_sgpr_kernarg_segment_ptr");
386*0fca6ea1SDimitry Andric PrintField(KD.kernel_code_properties,
387*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
388*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
389*0fca6ea1SDimitry Andric ".amdhsa_user_sgpr_dispatch_id");
390*0fca6ea1SDimitry Andric if (!hasArchitectedFlatScratch(STI))
391*0fca6ea1SDimitry Andric PrintField(KD.kernel_code_properties,
392*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
393*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
394*0fca6ea1SDimitry Andric ".amdhsa_user_sgpr_flat_scratch_init");
395*0fca6ea1SDimitry Andric if (hasKernargPreload(STI)) {
396*0fca6ea1SDimitry Andric PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
397*0fca6ea1SDimitry Andric amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
398*0fca6ea1SDimitry Andric ".amdhsa_user_sgpr_kernarg_preload_length");
399*0fca6ea1SDimitry Andric PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
400*0fca6ea1SDimitry Andric amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
401*0fca6ea1SDimitry Andric ".amdhsa_user_sgpr_kernarg_preload_offset");
402*0fca6ea1SDimitry Andric }
403*0fca6ea1SDimitry Andric PrintField(
404*0fca6ea1SDimitry Andric KD.kernel_code_properties,
405*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
406*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
407*0fca6ea1SDimitry Andric ".amdhsa_user_sgpr_private_segment_size");
4080b57cec5SDimitry Andric if (IVersion.Major >= 10)
409*0fca6ea1SDimitry Andric PrintField(KD.kernel_code_properties,
410*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
411*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
412*0fca6ea1SDimitry Andric ".amdhsa_wavefront_size32");
41306c3fb27SDimitry Andric if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
414*0fca6ea1SDimitry Andric PrintField(KD.kernel_code_properties,
415*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
416*0fca6ea1SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
417*0fca6ea1SDimitry Andric ".amdhsa_uses_dynamic_stack");
418*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc2,
419*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
420*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
421fe6060f1SDimitry Andric (hasArchitectedFlatScratch(STI)
422fe6060f1SDimitry Andric ? ".amdhsa_enable_private_segment"
423*0fca6ea1SDimitry Andric : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
424*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc2,
425*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
426*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
427*0fca6ea1SDimitry Andric ".amdhsa_system_sgpr_workgroup_id_x");
428*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc2,
429*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
430*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
431*0fca6ea1SDimitry Andric ".amdhsa_system_sgpr_workgroup_id_y");
432*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc2,
433*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
434*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
435*0fca6ea1SDimitry Andric ".amdhsa_system_sgpr_workgroup_id_z");
436*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc2,
437*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
438*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
439*0fca6ea1SDimitry Andric ".amdhsa_system_sgpr_workgroup_info");
440*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc2,
441*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
442*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
443*0fca6ea1SDimitry Andric ".amdhsa_system_vgpr_workitem_id");
4440b57cec5SDimitry Andric
4450b57cec5SDimitry Andric // These directives are required.
446*0fca6ea1SDimitry Andric OS << "\t\t.amdhsa_next_free_vgpr ";
447*0fca6ea1SDimitry Andric EmitMCExpr(NextVGPR);
448*0fca6ea1SDimitry Andric OS << '\n';
4490b57cec5SDimitry Andric
450*0fca6ea1SDimitry Andric OS << "\t\t.amdhsa_next_free_sgpr ";
451*0fca6ea1SDimitry Andric EmitMCExpr(NextSGPR);
452*0fca6ea1SDimitry Andric OS << '\n';
453fe6060f1SDimitry Andric
454*0fca6ea1SDimitry Andric if (AMDGPU::isGFX90A(STI)) {
455*0fca6ea1SDimitry Andric // MCExpr equivalent of taking the (accum_offset + 1) * 4.
456*0fca6ea1SDimitry Andric const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
457*0fca6ea1SDimitry Andric KD.compute_pgm_rsrc3,
458*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
459*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
460*0fca6ea1SDimitry Andric accum_bits = MCBinaryExpr::createAdd(
461*0fca6ea1SDimitry Andric accum_bits, MCConstantExpr::create(1, getContext()), getContext());
462*0fca6ea1SDimitry Andric accum_bits = MCBinaryExpr::createMul(
463*0fca6ea1SDimitry Andric accum_bits, MCConstantExpr::create(4, getContext()), getContext());
464*0fca6ea1SDimitry Andric OS << "\t\t.amdhsa_accum_offset ";
465*0fca6ea1SDimitry Andric EmitMCExpr(accum_bits);
466*0fca6ea1SDimitry Andric OS << '\n';
467*0fca6ea1SDimitry Andric }
468*0fca6ea1SDimitry Andric
469*0fca6ea1SDimitry Andric OS << "\t\t.amdhsa_reserve_vcc ";
470*0fca6ea1SDimitry Andric EmitMCExpr(ReserveVCC);
471*0fca6ea1SDimitry Andric OS << '\n';
472*0fca6ea1SDimitry Andric
473*0fca6ea1SDimitry Andric if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) {
474*0fca6ea1SDimitry Andric OS << "\t\t.amdhsa_reserve_flat_scratch ";
475*0fca6ea1SDimitry Andric EmitMCExpr(ReserveFlatScr);
476*0fca6ea1SDimitry Andric OS << '\n';
477*0fca6ea1SDimitry Andric }
478fe6060f1SDimitry Andric
47906c3fb27SDimitry Andric switch (CodeObjectVersion) {
480fe6060f1SDimitry Andric default:
481fe6060f1SDimitry Andric break;
48206c3fb27SDimitry Andric case AMDGPU::AMDHSA_COV4:
48306c3fb27SDimitry Andric case AMDGPU::AMDHSA_COV5:
484fe6060f1SDimitry Andric if (getTargetID()->isXnackSupported())
485fe6060f1SDimitry Andric OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
486fe6060f1SDimitry Andric break;
487fe6060f1SDimitry Andric }
4880b57cec5SDimitry Andric
489*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
490*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
491*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
492*0fca6ea1SDimitry Andric ".amdhsa_float_round_mode_32");
493*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
494*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
495*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
496*0fca6ea1SDimitry Andric ".amdhsa_float_round_mode_16_64");
497*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
498*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
499*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
500*0fca6ea1SDimitry Andric ".amdhsa_float_denorm_mode_32");
501*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
502*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
503*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
504*0fca6ea1SDimitry Andric ".amdhsa_float_denorm_mode_16_64");
5055f757f3fSDimitry Andric if (IVersion.Major < 12) {
506*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
507*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
508*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
509*0fca6ea1SDimitry Andric ".amdhsa_dx10_clamp");
510*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
511*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
512*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
513*0fca6ea1SDimitry Andric ".amdhsa_ieee_mode");
5145f757f3fSDimitry Andric }
515*0fca6ea1SDimitry Andric if (IVersion.Major >= 9) {
516*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
517*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
518*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
519*0fca6ea1SDimitry Andric ".amdhsa_fp16_overflow");
520*0fca6ea1SDimitry Andric }
521fe6060f1SDimitry Andric if (AMDGPU::isGFX90A(STI))
522*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc3,
523*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
524*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
5250b57cec5SDimitry Andric if (IVersion.Major >= 10) {
526*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
527*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
528*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
529*0fca6ea1SDimitry Andric ".amdhsa_workgroup_processor_mode");
530*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
531*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
532*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
533*0fca6ea1SDimitry Andric ".amdhsa_memory_ordered");
534*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
535*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
536*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
537*0fca6ea1SDimitry Andric ".amdhsa_forward_progress");
538647cbc5dSDimitry Andric }
539647cbc5dSDimitry Andric if (IVersion.Major >= 10 && IVersion.Major < 12) {
540*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc3,
541*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
542*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
543*0fca6ea1SDimitry Andric ".amdhsa_shared_vgpr_count");
5440b57cec5SDimitry Andric }
545*0fca6ea1SDimitry Andric if (IVersion.Major >= 12) {
546*0fca6ea1SDimitry Andric PrintField(KD.compute_pgm_rsrc1,
547*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
548*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
549*0fca6ea1SDimitry Andric ".amdhsa_round_robin_scheduling");
550*0fca6ea1SDimitry Andric }
551*0fca6ea1SDimitry Andric PrintField(
552*0fca6ea1SDimitry Andric KD.compute_pgm_rsrc2,
553*0fca6ea1SDimitry Andric amdhsa::
554*0fca6ea1SDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
555*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
556*0fca6ea1SDimitry Andric ".amdhsa_exception_fp_ieee_invalid_op");
557*0fca6ea1SDimitry Andric PrintField(
558*0fca6ea1SDimitry Andric KD.compute_pgm_rsrc2,
559*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
560*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
561*0fca6ea1SDimitry Andric ".amdhsa_exception_fp_denorm_src");
562*0fca6ea1SDimitry Andric PrintField(
563*0fca6ea1SDimitry Andric KD.compute_pgm_rsrc2,
564*0fca6ea1SDimitry Andric amdhsa::
565*0fca6ea1SDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
566*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
567*0fca6ea1SDimitry Andric ".amdhsa_exception_fp_ieee_div_zero");
568*0fca6ea1SDimitry Andric PrintField(
569*0fca6ea1SDimitry Andric KD.compute_pgm_rsrc2,
570*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
571*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
572*0fca6ea1SDimitry Andric ".amdhsa_exception_fp_ieee_overflow");
573*0fca6ea1SDimitry Andric PrintField(
574*0fca6ea1SDimitry Andric KD.compute_pgm_rsrc2,
575*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
576*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
577*0fca6ea1SDimitry Andric ".amdhsa_exception_fp_ieee_underflow");
578*0fca6ea1SDimitry Andric PrintField(
579*0fca6ea1SDimitry Andric KD.compute_pgm_rsrc2,
580*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
581*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
582*0fca6ea1SDimitry Andric ".amdhsa_exception_fp_ieee_inexact");
583*0fca6ea1SDimitry Andric PrintField(
584*0fca6ea1SDimitry Andric KD.compute_pgm_rsrc2,
585*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
586*0fca6ea1SDimitry Andric amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
587*0fca6ea1SDimitry Andric ".amdhsa_exception_int_div_zero");
5880b57cec5SDimitry Andric
5890b57cec5SDimitry Andric OS << "\t.end_amdhsa_kernel\n";
5900b57cec5SDimitry Andric }
5910b57cec5SDimitry Andric
5920b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
5930b57cec5SDimitry Andric // AMDGPUTargetELFStreamer
5940b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
5950b57cec5SDimitry Andric
AMDGPUTargetELFStreamer(MCStreamer & S,const MCSubtargetInfo & STI)5965ffd83dbSDimitry Andric AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
5975ffd83dbSDimitry Andric const MCSubtargetInfo &STI)
598fe6060f1SDimitry Andric : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
5990b57cec5SDimitry Andric
getStreamer()6000b57cec5SDimitry Andric MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
6010b57cec5SDimitry Andric return static_cast<MCELFStreamer &>(Streamer);
6020b57cec5SDimitry Andric }
6030b57cec5SDimitry Andric
6040b57cec5SDimitry Andric // A hook for emitting stuff at the end.
6050b57cec5SDimitry Andric // We use it for emitting the accumulated PAL metadata as a .note record.
606e8d8bef9SDimitry Andric // The PAL metadata is reset after it is emitted.
finish()6070b57cec5SDimitry Andric void AMDGPUTargetELFStreamer::finish() {
608*0fca6ea1SDimitry Andric ELFObjectWriter &W = getStreamer().getWriter();
609*0fca6ea1SDimitry Andric W.setELFHeaderEFlags(getEFlags());
610*0fca6ea1SDimitry Andric W.setOverrideABIVersion(
6117a6dacacSDimitry Andric getELFABIVersion(STI.getTargetTriple(), CodeObjectVersion));
612fe6060f1SDimitry Andric
6130b57cec5SDimitry Andric std::string Blob;
6140b57cec5SDimitry Andric const char *Vendor = getPALMetadata()->getVendor();
6150b57cec5SDimitry Andric unsigned Type = getPALMetadata()->getType();
6160b57cec5SDimitry Andric getPALMetadata()->toBlob(Type, Blob);
6170b57cec5SDimitry Andric if (Blob.empty())
6180b57cec5SDimitry Andric return;
6190b57cec5SDimitry Andric EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
6205ffd83dbSDimitry Andric [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
621e8d8bef9SDimitry Andric
622e8d8bef9SDimitry Andric // Reset the pal metadata so its data will not affect a compilation that
623e8d8bef9SDimitry Andric // reuses this object.
624e8d8bef9SDimitry Andric getPALMetadata()->reset();
6250b57cec5SDimitry Andric }
6260b57cec5SDimitry Andric
EmitNote(StringRef Name,const MCExpr * DescSZ,unsigned NoteType,function_ref<void (MCELFStreamer &)> EmitDesc)6270b57cec5SDimitry Andric void AMDGPUTargetELFStreamer::EmitNote(
6280b57cec5SDimitry Andric StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
6290b57cec5SDimitry Andric function_ref<void(MCELFStreamer &)> EmitDesc) {
6300b57cec5SDimitry Andric auto &S = getStreamer();
6310b57cec5SDimitry Andric auto &Context = S.getContext();
6320b57cec5SDimitry Andric
6330b57cec5SDimitry Andric auto NameSZ = Name.size() + 1;
6340b57cec5SDimitry Andric
6355ffd83dbSDimitry Andric unsigned NoteFlags = 0;
6365ffd83dbSDimitry Andric // TODO Apparently, this is currently needed for OpenCL as mentioned in
6375ffd83dbSDimitry Andric // https://reviews.llvm.org/D74995
6385f757f3fSDimitry Andric if (isHsaAbi(STI))
6395ffd83dbSDimitry Andric NoteFlags = ELF::SHF_ALLOC;
6405ffd83dbSDimitry Andric
64181ad6265SDimitry Andric S.pushSection();
64281ad6265SDimitry Andric S.switchSection(
6435ffd83dbSDimitry Andric Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
6445ffd83dbSDimitry Andric S.emitInt32(NameSZ); // namesz
6455ffd83dbSDimitry Andric S.emitValue(DescSZ, 4); // descz
6465ffd83dbSDimitry Andric S.emitInt32(NoteType); // type
6475ffd83dbSDimitry Andric S.emitBytes(Name); // name
648bdd1243dSDimitry Andric S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
6490b57cec5SDimitry Andric EmitDesc(S); // desc
650bdd1243dSDimitry Andric S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
65181ad6265SDimitry Andric S.popSection();
6520b57cec5SDimitry Andric }
6530b57cec5SDimitry Andric
getEFlags()654fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlags() {
655fe6060f1SDimitry Andric switch (STI.getTargetTriple().getArch()) {
656fe6060f1SDimitry Andric default:
657fe6060f1SDimitry Andric llvm_unreachable("Unsupported Arch");
658fe6060f1SDimitry Andric case Triple::r600:
659fe6060f1SDimitry Andric return getEFlagsR600();
660fe6060f1SDimitry Andric case Triple::amdgcn:
661fe6060f1SDimitry Andric return getEFlagsAMDGCN();
662fe6060f1SDimitry Andric }
663fe6060f1SDimitry Andric }
664fe6060f1SDimitry Andric
getEFlagsR600()665fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
666fe6060f1SDimitry Andric assert(STI.getTargetTriple().getArch() == Triple::r600);
667fe6060f1SDimitry Andric
668fe6060f1SDimitry Andric return getElfMach(STI.getCPU());
669fe6060f1SDimitry Andric }
670fe6060f1SDimitry Andric
getEFlagsAMDGCN()671fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
672fe6060f1SDimitry Andric assert(STI.getTargetTriple().getArch() == Triple::amdgcn);
673fe6060f1SDimitry Andric
674fe6060f1SDimitry Andric switch (STI.getTargetTriple().getOS()) {
675fe6060f1SDimitry Andric default:
676fe6060f1SDimitry Andric // TODO: Why are some tests have "mingw" listed as OS?
677fe6060f1SDimitry Andric // llvm_unreachable("Unsupported OS");
678fe6060f1SDimitry Andric case Triple::UnknownOS:
679fe6060f1SDimitry Andric return getEFlagsUnknownOS();
680fe6060f1SDimitry Andric case Triple::AMDHSA:
681fe6060f1SDimitry Andric return getEFlagsAMDHSA();
682fe6060f1SDimitry Andric case Triple::AMDPAL:
683fe6060f1SDimitry Andric return getEFlagsAMDPAL();
684fe6060f1SDimitry Andric case Triple::Mesa3D:
685fe6060f1SDimitry Andric return getEFlagsMesa3D();
686fe6060f1SDimitry Andric }
687fe6060f1SDimitry Andric }
688fe6060f1SDimitry Andric
getEFlagsUnknownOS()689fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
690fe6060f1SDimitry Andric // TODO: Why are some tests have "mingw" listed as OS?
691fe6060f1SDimitry Andric // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
692fe6060f1SDimitry Andric
693fe6060f1SDimitry Andric return getEFlagsV3();
694fe6060f1SDimitry Andric }
695fe6060f1SDimitry Andric
getEFlagsAMDHSA()696fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
6975f757f3fSDimitry Andric assert(isHsaAbi(STI));
698fe6060f1SDimitry Andric
699*0fca6ea1SDimitry Andric if (CodeObjectVersion >= 6)
700*0fca6ea1SDimitry Andric return getEFlagsV6();
701fe6060f1SDimitry Andric return getEFlagsV4();
702fe6060f1SDimitry Andric }
703fe6060f1SDimitry Andric
getEFlagsAMDPAL()704fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
705fe6060f1SDimitry Andric assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
706fe6060f1SDimitry Andric
707fe6060f1SDimitry Andric return getEFlagsV3();
708fe6060f1SDimitry Andric }
709fe6060f1SDimitry Andric
getEFlagsMesa3D()710fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
711fe6060f1SDimitry Andric assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
712fe6060f1SDimitry Andric
713fe6060f1SDimitry Andric return getEFlagsV3();
714fe6060f1SDimitry Andric }
715fe6060f1SDimitry Andric
getEFlagsV3()716fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
717fe6060f1SDimitry Andric unsigned EFlagsV3 = 0;
718fe6060f1SDimitry Andric
719fe6060f1SDimitry Andric // mach.
720fe6060f1SDimitry Andric EFlagsV3 |= getElfMach(STI.getCPU());
721fe6060f1SDimitry Andric
722fe6060f1SDimitry Andric // xnack.
723fe6060f1SDimitry Andric if (getTargetID()->isXnackOnOrAny())
724fe6060f1SDimitry Andric EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
725fe6060f1SDimitry Andric // sramecc.
726fe6060f1SDimitry Andric if (getTargetID()->isSramEccOnOrAny())
727fe6060f1SDimitry Andric EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
728fe6060f1SDimitry Andric
729fe6060f1SDimitry Andric return EFlagsV3;
730fe6060f1SDimitry Andric }
731fe6060f1SDimitry Andric
getEFlagsV4()732fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
733fe6060f1SDimitry Andric unsigned EFlagsV4 = 0;
734fe6060f1SDimitry Andric
735fe6060f1SDimitry Andric // mach.
736fe6060f1SDimitry Andric EFlagsV4 |= getElfMach(STI.getCPU());
737fe6060f1SDimitry Andric
738fe6060f1SDimitry Andric // xnack.
739fe6060f1SDimitry Andric switch (getTargetID()->getXnackSetting()) {
740fe6060f1SDimitry Andric case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
741fe6060f1SDimitry Andric EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
742fe6060f1SDimitry Andric break;
743fe6060f1SDimitry Andric case AMDGPU::IsaInfo::TargetIDSetting::Any:
744fe6060f1SDimitry Andric EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
745fe6060f1SDimitry Andric break;
746fe6060f1SDimitry Andric case AMDGPU::IsaInfo::TargetIDSetting::Off:
747fe6060f1SDimitry Andric EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
748fe6060f1SDimitry Andric break;
749fe6060f1SDimitry Andric case AMDGPU::IsaInfo::TargetIDSetting::On:
750fe6060f1SDimitry Andric EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
751fe6060f1SDimitry Andric break;
752fe6060f1SDimitry Andric }
753fe6060f1SDimitry Andric // sramecc.
754fe6060f1SDimitry Andric switch (getTargetID()->getSramEccSetting()) {
755fe6060f1SDimitry Andric case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
756fe6060f1SDimitry Andric EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
757fe6060f1SDimitry Andric break;
758fe6060f1SDimitry Andric case AMDGPU::IsaInfo::TargetIDSetting::Any:
759fe6060f1SDimitry Andric EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
760fe6060f1SDimitry Andric break;
761fe6060f1SDimitry Andric case AMDGPU::IsaInfo::TargetIDSetting::Off:
762fe6060f1SDimitry Andric EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
763fe6060f1SDimitry Andric break;
764fe6060f1SDimitry Andric case AMDGPU::IsaInfo::TargetIDSetting::On:
765fe6060f1SDimitry Andric EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
766fe6060f1SDimitry Andric break;
767fe6060f1SDimitry Andric }
768fe6060f1SDimitry Andric
769fe6060f1SDimitry Andric return EFlagsV4;
770fe6060f1SDimitry Andric }
771fe6060f1SDimitry Andric
getEFlagsV6()772*0fca6ea1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
773*0fca6ea1SDimitry Andric unsigned Flags = getEFlagsV4();
774*0fca6ea1SDimitry Andric
775*0fca6ea1SDimitry Andric unsigned Version = ForceGenericVersion;
776*0fca6ea1SDimitry Andric if (!Version) {
777*0fca6ea1SDimitry Andric switch (parseArchAMDGCN(STI.getCPU())) {
778*0fca6ea1SDimitry Andric case AMDGPU::GK_GFX9_GENERIC:
779*0fca6ea1SDimitry Andric Version = GenericVersion::GFX9;
780*0fca6ea1SDimitry Andric break;
781*0fca6ea1SDimitry Andric case AMDGPU::GK_GFX10_1_GENERIC:
782*0fca6ea1SDimitry Andric Version = GenericVersion::GFX10_1;
783*0fca6ea1SDimitry Andric break;
784*0fca6ea1SDimitry Andric case AMDGPU::GK_GFX10_3_GENERIC:
785*0fca6ea1SDimitry Andric Version = GenericVersion::GFX10_3;
786*0fca6ea1SDimitry Andric break;
787*0fca6ea1SDimitry Andric case AMDGPU::GK_GFX11_GENERIC:
788*0fca6ea1SDimitry Andric Version = GenericVersion::GFX11;
789*0fca6ea1SDimitry Andric break;
790*0fca6ea1SDimitry Andric case AMDGPU::GK_GFX12_GENERIC:
791*0fca6ea1SDimitry Andric Version = GenericVersion::GFX12;
792*0fca6ea1SDimitry Andric break;
793*0fca6ea1SDimitry Andric default:
794*0fca6ea1SDimitry Andric break;
795*0fca6ea1SDimitry Andric }
796*0fca6ea1SDimitry Andric }
797*0fca6ea1SDimitry Andric
798*0fca6ea1SDimitry Andric // Versions start at 1.
799*0fca6ea1SDimitry Andric if (Version) {
800*0fca6ea1SDimitry Andric if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX)
801*0fca6ea1SDimitry Andric report_fatal_error("Cannot encode generic code object version " +
802*0fca6ea1SDimitry Andric Twine(Version) +
803*0fca6ea1SDimitry Andric " - no ELF flag can represent this version!");
804*0fca6ea1SDimitry Andric Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
805*0fca6ea1SDimitry Andric }
806*0fca6ea1SDimitry Andric
807*0fca6ea1SDimitry Andric return Flags;
808*0fca6ea1SDimitry Andric }
809*0fca6ea1SDimitry Andric
EmitDirectiveAMDGCNTarget()810fe6060f1SDimitry Andric void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
8110b57cec5SDimitry Andric
EmitAMDKernelCodeT(AMDGPUMCKernelCodeT & Header)812*0fca6ea1SDimitry Andric void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
8130b57cec5SDimitry Andric MCStreamer &OS = getStreamer();
81481ad6265SDimitry Andric OS.pushSection();
815*0fca6ea1SDimitry Andric Header.EmitKernelCodeT(OS, getContext());
81681ad6265SDimitry Andric OS.popSection();
8170b57cec5SDimitry Andric }
8180b57cec5SDimitry Andric
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)8190b57cec5SDimitry Andric void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
8200b57cec5SDimitry Andric unsigned Type) {
8210b57cec5SDimitry Andric MCSymbolELF *Symbol = cast<MCSymbolELF>(
8220b57cec5SDimitry Andric getStreamer().getContext().getOrCreateSymbol(SymbolName));
8230b57cec5SDimitry Andric Symbol->setType(Type);
8240b57cec5SDimitry Andric }
8250b57cec5SDimitry Andric
emitAMDGPULDS(MCSymbol * Symbol,unsigned Size,Align Alignment)8260b57cec5SDimitry Andric void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
8275ffd83dbSDimitry Andric Align Alignment) {
8280b57cec5SDimitry Andric MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
8290b57cec5SDimitry Andric SymbolELF->setType(ELF::STT_OBJECT);
8300b57cec5SDimitry Andric
831*0fca6ea1SDimitry Andric if (!SymbolELF->isBindingSet())
8320b57cec5SDimitry Andric SymbolELF->setBinding(ELF::STB_GLOBAL);
8330b57cec5SDimitry Andric
834bdd1243dSDimitry Andric if (SymbolELF->declareCommon(Size, Alignment, true)) {
8350b57cec5SDimitry Andric report_fatal_error("Symbol: " + Symbol->getName() +
8360b57cec5SDimitry Andric " redeclared as different type");
8370b57cec5SDimitry Andric }
8380b57cec5SDimitry Andric
8390b57cec5SDimitry Andric SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
8400b57cec5SDimitry Andric SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
8410b57cec5SDimitry Andric }
8420b57cec5SDimitry Andric
EmitISAVersion()843fe6060f1SDimitry Andric bool AMDGPUTargetELFStreamer::EmitISAVersion() {
8440b57cec5SDimitry Andric // Create two labels to mark the beginning and end of the desc field
8450b57cec5SDimitry Andric // and a MCExpr to calculate the size of the desc field.
8460b57cec5SDimitry Andric auto &Context = getContext();
8470b57cec5SDimitry Andric auto *DescBegin = Context.createTempSymbol();
8480b57cec5SDimitry Andric auto *DescEnd = Context.createTempSymbol();
8490b57cec5SDimitry Andric auto *DescSZ = MCBinaryExpr::createSub(
8500b57cec5SDimitry Andric MCSymbolRefExpr::create(DescEnd, Context),
8510b57cec5SDimitry Andric MCSymbolRefExpr::create(DescBegin, Context), Context);
8520b57cec5SDimitry Andric
853fe6060f1SDimitry Andric EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME,
8540b57cec5SDimitry Andric [&](MCELFStreamer &OS) {
8555ffd83dbSDimitry Andric OS.emitLabel(DescBegin);
856fe6060f1SDimitry Andric OS.emitBytes(getTargetID()->toString());
8575ffd83dbSDimitry Andric OS.emitLabel(DescEnd);
8580b57cec5SDimitry Andric });
8590b57cec5SDimitry Andric return true;
8600b57cec5SDimitry Andric }
8610b57cec5SDimitry Andric
EmitHSAMetadata(msgpack::Document & HSAMetadataDoc,bool Strict)8620b57cec5SDimitry Andric bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
8630b57cec5SDimitry Andric bool Strict) {
864e8d8bef9SDimitry Andric HSAMD::V3::MetadataVerifier Verifier(Strict);
8650b57cec5SDimitry Andric if (!Verifier.verify(HSAMetadataDoc.getRoot()))
8660b57cec5SDimitry Andric return false;
8670b57cec5SDimitry Andric
8680b57cec5SDimitry Andric std::string HSAMetadataString;
8690b57cec5SDimitry Andric HSAMetadataDoc.writeToBlob(HSAMetadataString);
8700b57cec5SDimitry Andric
8710b57cec5SDimitry Andric // Create two labels to mark the beginning and end of the desc field
8720b57cec5SDimitry Andric // and a MCExpr to calculate the size of the desc field.
8730b57cec5SDimitry Andric auto &Context = getContext();
8740b57cec5SDimitry Andric auto *DescBegin = Context.createTempSymbol();
8750b57cec5SDimitry Andric auto *DescEnd = Context.createTempSymbol();
8760b57cec5SDimitry Andric auto *DescSZ = MCBinaryExpr::createSub(
8770b57cec5SDimitry Andric MCSymbolRefExpr::create(DescEnd, Context),
8780b57cec5SDimitry Andric MCSymbolRefExpr::create(DescBegin, Context), Context);
8790b57cec5SDimitry Andric
8800b57cec5SDimitry Andric EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
8810b57cec5SDimitry Andric [&](MCELFStreamer &OS) {
8825ffd83dbSDimitry Andric OS.emitLabel(DescBegin);
8835ffd83dbSDimitry Andric OS.emitBytes(HSAMetadataString);
8845ffd83dbSDimitry Andric OS.emitLabel(DescEnd);
8850b57cec5SDimitry Andric });
8860b57cec5SDimitry Andric return true;
8870b57cec5SDimitry Andric }
8880b57cec5SDimitry Andric
EmitKernargPreloadHeader(const MCSubtargetInfo & STI,bool TrapEnabled)8895f757f3fSDimitry Andric bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader(
890*0fca6ea1SDimitry Andric const MCSubtargetInfo &STI, bool TrapEnabled) {
8915f757f3fSDimitry Andric const uint32_t Encoded_s_nop = 0xbf800000;
892*0fca6ea1SDimitry Andric const uint32_t Encoded_s_trap = 0xbf920002;
893*0fca6ea1SDimitry Andric const uint32_t Encoded_s_endpgm = 0xbf810000;
894*0fca6ea1SDimitry Andric const uint32_t TrapInstr = TrapEnabled ? Encoded_s_trap : Encoded_s_endpgm;
8955f757f3fSDimitry Andric MCStreamer &OS = getStreamer();
896*0fca6ea1SDimitry Andric OS.emitInt32(TrapInstr);
897*0fca6ea1SDimitry Andric for (int i = 0; i < 63; ++i) {
8985f757f3fSDimitry Andric OS.emitInt32(Encoded_s_nop);
8995f757f3fSDimitry Andric }
9005f757f3fSDimitry Andric return true;
9015f757f3fSDimitry Andric }
9025f757f3fSDimitry Andric
EmitCodeEnd(const MCSubtargetInfo & STI)903fe6060f1SDimitry Andric bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
9040b57cec5SDimitry Andric const uint32_t Encoded_s_code_end = 0xbf9f0000;
905fe6060f1SDimitry Andric const uint32_t Encoded_s_nop = 0xbf800000;
906fe6060f1SDimitry Andric uint32_t Encoded_pad = Encoded_s_code_end;
907fe6060f1SDimitry Andric
908fe6060f1SDimitry Andric // Instruction cache line size in bytes.
90981ad6265SDimitry Andric const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
910fe6060f1SDimitry Andric const unsigned CacheLineSize = 1u << Log2CacheLineSize;
911fe6060f1SDimitry Andric
912fe6060f1SDimitry Andric // Extra padding amount in bytes to support prefetch mode 3.
913fe6060f1SDimitry Andric unsigned FillSize = 3 * CacheLineSize;
914fe6060f1SDimitry Andric
915fe6060f1SDimitry Andric if (AMDGPU::isGFX90A(STI)) {
916fe6060f1SDimitry Andric Encoded_pad = Encoded_s_nop;
917fe6060f1SDimitry Andric FillSize = 16 * CacheLineSize;
918fe6060f1SDimitry Andric }
9190b57cec5SDimitry Andric
9200b57cec5SDimitry Andric MCStreamer &OS = getStreamer();
92181ad6265SDimitry Andric OS.pushSection();
922bdd1243dSDimitry Andric OS.emitValueToAlignment(Align(CacheLineSize), Encoded_pad, 4);
923fe6060f1SDimitry Andric for (unsigned I = 0; I < FillSize; I += 4)
924fe6060f1SDimitry Andric OS.emitInt32(Encoded_pad);
92581ad6265SDimitry Andric OS.popSection();
9260b57cec5SDimitry Andric return true;
9270b57cec5SDimitry Andric }
9280b57cec5SDimitry Andric
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const MCKernelDescriptor & KernelDescriptor,const MCExpr * NextVGPR,const MCExpr * NextSGPR,const MCExpr * ReserveVCC,const MCExpr * ReserveFlatScr)9290b57cec5SDimitry Andric void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
9300b57cec5SDimitry Andric const MCSubtargetInfo &STI, StringRef KernelName,
931*0fca6ea1SDimitry Andric const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR,
932*0fca6ea1SDimitry Andric const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
933*0fca6ea1SDimitry Andric const MCExpr *ReserveFlatScr) {
9340b57cec5SDimitry Andric auto &Streamer = getStreamer();
9350b57cec5SDimitry Andric auto &Context = Streamer.getContext();
9360b57cec5SDimitry Andric
9370b57cec5SDimitry Andric MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
9380b57cec5SDimitry Andric Context.getOrCreateSymbol(Twine(KernelName)));
9390b57cec5SDimitry Andric MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
9400b57cec5SDimitry Andric Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
9410b57cec5SDimitry Andric
9420b57cec5SDimitry Andric // Copy kernel descriptor symbol's binding, other and visibility from the
9430b57cec5SDimitry Andric // kernel code symbol.
9440b57cec5SDimitry Andric KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
9450b57cec5SDimitry Andric KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
9460b57cec5SDimitry Andric KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
9470b57cec5SDimitry Andric // Kernel descriptor symbol's type and size are fixed.
9480b57cec5SDimitry Andric KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
9490b57cec5SDimitry Andric KernelDescriptorSymbol->setSize(
950*0fca6ea1SDimitry Andric MCConstantExpr::create(sizeof(amdhsa::kernel_descriptor_t), Context));
9510b57cec5SDimitry Andric
9520b57cec5SDimitry Andric // The visibility of the kernel code symbol must be protected or less to allow
9530b57cec5SDimitry Andric // static relocations from the kernel descriptor to be used.
9540b57cec5SDimitry Andric if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
9550b57cec5SDimitry Andric KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
9560b57cec5SDimitry Andric
9575ffd83dbSDimitry Andric Streamer.emitLabel(KernelDescriptorSymbol);
958*0fca6ea1SDimitry Andric Streamer.emitValue(
959*0fca6ea1SDimitry Andric KernelDescriptor.group_segment_fixed_size,
960*0fca6ea1SDimitry Andric sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size));
961*0fca6ea1SDimitry Andric Streamer.emitValue(
962*0fca6ea1SDimitry Andric KernelDescriptor.private_segment_fixed_size,
963*0fca6ea1SDimitry Andric sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size));
964*0fca6ea1SDimitry Andric Streamer.emitValue(KernelDescriptor.kernarg_size,
965*0fca6ea1SDimitry Andric sizeof(amdhsa::kernel_descriptor_t::kernarg_size));
966fe6060f1SDimitry Andric
967*0fca6ea1SDimitry Andric for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
968*0fca6ea1SDimitry Andric Streamer.emitInt8(0u);
969fe6060f1SDimitry Andric
9700b57cec5SDimitry Andric // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
9710b57cec5SDimitry Andric // expression being created is:
9720b57cec5SDimitry Andric // (start of kernel code) - (start of kernel descriptor)
9730b57cec5SDimitry Andric // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
974*0fca6ea1SDimitry Andric Streamer.emitValue(
975*0fca6ea1SDimitry Andric MCBinaryExpr::createSub(
976*0fca6ea1SDimitry Andric MCSymbolRefExpr::create(KernelCodeSymbol,
977*0fca6ea1SDimitry Andric MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
978*0fca6ea1SDimitry Andric MCSymbolRefExpr::create(KernelDescriptorSymbol,
979*0fca6ea1SDimitry Andric MCSymbolRefExpr::VK_None, Context),
9800b57cec5SDimitry Andric Context),
981*0fca6ea1SDimitry Andric sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset));
982*0fca6ea1SDimitry Andric for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
983*0fca6ea1SDimitry Andric Streamer.emitInt8(0u);
984*0fca6ea1SDimitry Andric Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
985*0fca6ea1SDimitry Andric sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3));
986*0fca6ea1SDimitry Andric Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
987*0fca6ea1SDimitry Andric sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1));
988*0fca6ea1SDimitry Andric Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
989*0fca6ea1SDimitry Andric sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2));
990*0fca6ea1SDimitry Andric Streamer.emitValue(
991*0fca6ea1SDimitry Andric KernelDescriptor.kernel_code_properties,
992*0fca6ea1SDimitry Andric sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties));
993*0fca6ea1SDimitry Andric Streamer.emitValue(KernelDescriptor.kernarg_preload,
994*0fca6ea1SDimitry Andric sizeof(amdhsa::kernel_descriptor_t::kernarg_preload));
995*0fca6ea1SDimitry Andric for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
996*0fca6ea1SDimitry Andric Streamer.emitInt8(0u);
9970b57cec5SDimitry Andric }
998