xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file provides AMDGPU specific target streamer methods.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "AMDGPUTargetStreamer.h"
14*0fca6ea1SDimitry Andric #include "AMDGPUMCKernelDescriptor.h"
15e8d8bef9SDimitry Andric #include "AMDGPUPTNote.h"
160b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
170b57cec5SDimitry Andric #include "Utils/AMDKernelCodeTUtils.h"
180b57cec5SDimitry Andric #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
190b57cec5SDimitry Andric #include "llvm/BinaryFormat/ELF.h"
2081ad6265SDimitry Andric #include "llvm/MC/MCAssembler.h"
210b57cec5SDimitry Andric #include "llvm/MC/MCContext.h"
220b57cec5SDimitry Andric #include "llvm/MC/MCELFStreamer.h"
237a6dacacSDimitry Andric #include "llvm/MC/MCObjectWriter.h"
240b57cec5SDimitry Andric #include "llvm/MC/MCSectionELF.h"
2581ad6265SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
26e8d8bef9SDimitry Andric #include "llvm/Support/AMDGPUMetadata.h"
27e8d8bef9SDimitry Andric #include "llvm/Support/AMDHSAKernelDescriptor.h"
2881ad6265SDimitry Andric #include "llvm/Support/Casting.h"
29*0fca6ea1SDimitry Andric #include "llvm/Support/CommandLine.h"
300b57cec5SDimitry Andric #include "llvm/Support/FormattedStream.h"
3106c3fb27SDimitry Andric #include "llvm/TargetParser/TargetParser.h"
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric using namespace llvm;
340b57cec5SDimitry Andric using namespace llvm::AMDGPU;
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
370b57cec5SDimitry Andric // AMDGPUTargetStreamer
380b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
390b57cec5SDimitry Andric 
40*0fca6ea1SDimitry Andric static cl::opt<unsigned>
41*0fca6ea1SDimitry Andric     ForceGenericVersion("amdgpu-force-generic-version",
42*0fca6ea1SDimitry Andric                         cl::desc("Force a specific generic_v<N> flag to be "
43*0fca6ea1SDimitry Andric                                  "added. For testing purposes only."),
44*0fca6ea1SDimitry Andric                         cl::ReallyHidden, cl::init(0));
45*0fca6ea1SDimitry Andric 
EmitHSAMetadataV3(StringRef HSAMetadataString)460b57cec5SDimitry Andric bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
470b57cec5SDimitry Andric   msgpack::Document HSAMetadataDoc;
480b57cec5SDimitry Andric   if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
490b57cec5SDimitry Andric     return false;
500b57cec5SDimitry Andric   return EmitHSAMetadata(HSAMetadataDoc, false);
510b57cec5SDimitry Andric }
520b57cec5SDimitry Andric 
getArchNameFromElfMach(unsigned ElfMach)530b57cec5SDimitry Andric StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
540b57cec5SDimitry Andric   AMDGPU::GPUKind AK;
550b57cec5SDimitry Andric 
565f757f3fSDimitry Andric   // clang-format off
570b57cec5SDimitry Andric   switch (ElfMach) {
580b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_R600:      AK = GK_R600;    break;
590b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_R630:      AK = GK_R630;    break;
600b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_RS880:     AK = GK_RS880;   break;
610b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_RV670:     AK = GK_RV670;   break;
620b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_RV710:     AK = GK_RV710;   break;
630b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_RV730:     AK = GK_RV730;   break;
640b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_RV770:     AK = GK_RV770;   break;
650b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_CEDAR:     AK = GK_CEDAR;   break;
660b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_CYPRESS:   AK = GK_CYPRESS; break;
670b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_JUNIPER:   AK = GK_JUNIPER; break;
680b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_REDWOOD:   AK = GK_REDWOOD; break;
690b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_SUMO:      AK = GK_SUMO;    break;
700b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_BARTS:     AK = GK_BARTS;   break;
710b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_CAICOS:    AK = GK_CAICOS;  break;
720b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_CAYMAN:    AK = GK_CAYMAN;  break;
730b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_R600_TURKS:     AK = GK_TURKS;   break;
740b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600:  AK = GK_GFX600;  break;
750b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601:  AK = GK_GFX601;  break;
76e8d8bef9SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602:  AK = GK_GFX602;  break;
770b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700:  AK = GK_GFX700;  break;
780b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701:  AK = GK_GFX701;  break;
790b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702:  AK = GK_GFX702;  break;
800b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703:  AK = GK_GFX703;  break;
810b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704:  AK = GK_GFX704;  break;
82e8d8bef9SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705:  AK = GK_GFX705;  break;
830b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801:  AK = GK_GFX801;  break;
840b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802:  AK = GK_GFX802;  break;
850b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803:  AK = GK_GFX803;  break;
86e8d8bef9SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805:  AK = GK_GFX805;  break;
870b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810:  AK = GK_GFX810;  break;
880b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900:  AK = GK_GFX900;  break;
890b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902:  AK = GK_GFX902;  break;
900b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904:  AK = GK_GFX904;  break;
910b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906:  AK = GK_GFX906;  break;
920b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908:  AK = GK_GFX908;  break;
930b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909:  AK = GK_GFX909;  break;
94fe6060f1SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A:  AK = GK_GFX90A;  break;
95e8d8bef9SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C:  AK = GK_GFX90C;  break;
9681ad6265SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940:  AK = GK_GFX940;  break;
9706c3fb27SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941:  AK = GK_GFX941;  break;
9806c3fb27SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:  AK = GK_GFX942;  break;
990b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
1000b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
1010b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
102fe6060f1SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
1035ffd83dbSDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
104e8d8bef9SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
105e8d8bef9SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
106e8d8bef9SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
107fe6060f1SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
108fe6060f1SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
10981ad6265SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break;
11081ad6265SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break;
11181ad6265SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
11281ad6265SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
11381ad6265SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
11406c3fb27SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
11506c3fb27SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
116*0fca6ea1SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break;
1175f757f3fSDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
1185f757f3fSDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
119*0fca6ea1SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:     AK = GK_GFX9_GENERIC; break;
120*0fca6ea1SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:  AK = GK_GFX10_1_GENERIC; break;
121*0fca6ea1SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:  AK = GK_GFX10_3_GENERIC; break;
122*0fca6ea1SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:    AK = GK_GFX11_GENERIC; break;
123*0fca6ea1SDimitry Andric   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC:    AK = GK_GFX12_GENERIC; break;
1240b57cec5SDimitry Andric   case ELF::EF_AMDGPU_MACH_NONE:           AK = GK_NONE;    break;
1255f757f3fSDimitry Andric   default:                                 AK = GK_NONE;    break;
1260b57cec5SDimitry Andric   }
1275f757f3fSDimitry Andric   // clang-format on
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric   StringRef GPUName = getArchNameAMDGCN(AK);
1300b57cec5SDimitry Andric   if (GPUName != "")
1310b57cec5SDimitry Andric     return GPUName;
1320b57cec5SDimitry Andric   return getArchNameR600(AK);
1330b57cec5SDimitry Andric }
1340b57cec5SDimitry Andric 
getElfMach(StringRef GPU)1350b57cec5SDimitry Andric unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
1360b57cec5SDimitry Andric   AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
1370b57cec5SDimitry Andric   if (AK == AMDGPU::GPUKind::GK_NONE)
1380b57cec5SDimitry Andric     AK = parseArchR600(GPU);
1390b57cec5SDimitry Andric 
1405f757f3fSDimitry Andric   // clang-format off
1410b57cec5SDimitry Andric   switch (AK) {
1420b57cec5SDimitry Andric   case GK_R600:    return ELF::EF_AMDGPU_MACH_R600_R600;
1430b57cec5SDimitry Andric   case GK_R630:    return ELF::EF_AMDGPU_MACH_R600_R630;
1440b57cec5SDimitry Andric   case GK_RS880:   return ELF::EF_AMDGPU_MACH_R600_RS880;
1450b57cec5SDimitry Andric   case GK_RV670:   return ELF::EF_AMDGPU_MACH_R600_RV670;
1460b57cec5SDimitry Andric   case GK_RV710:   return ELF::EF_AMDGPU_MACH_R600_RV710;
1470b57cec5SDimitry Andric   case GK_RV730:   return ELF::EF_AMDGPU_MACH_R600_RV730;
1480b57cec5SDimitry Andric   case GK_RV770:   return ELF::EF_AMDGPU_MACH_R600_RV770;
1490b57cec5SDimitry Andric   case GK_CEDAR:   return ELF::EF_AMDGPU_MACH_R600_CEDAR;
1500b57cec5SDimitry Andric   case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
1510b57cec5SDimitry Andric   case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
1520b57cec5SDimitry Andric   case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
1530b57cec5SDimitry Andric   case GK_SUMO:    return ELF::EF_AMDGPU_MACH_R600_SUMO;
1540b57cec5SDimitry Andric   case GK_BARTS:   return ELF::EF_AMDGPU_MACH_R600_BARTS;
1550b57cec5SDimitry Andric   case GK_CAICOS:  return ELF::EF_AMDGPU_MACH_R600_CAICOS;
1560b57cec5SDimitry Andric   case GK_CAYMAN:  return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
1570b57cec5SDimitry Andric   case GK_TURKS:   return ELF::EF_AMDGPU_MACH_R600_TURKS;
1580b57cec5SDimitry Andric   case GK_GFX600:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
1590b57cec5SDimitry Andric   case GK_GFX601:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
160e8d8bef9SDimitry Andric   case GK_GFX602:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
1610b57cec5SDimitry Andric   case GK_GFX700:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
1620b57cec5SDimitry Andric   case GK_GFX701:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
1630b57cec5SDimitry Andric   case GK_GFX702:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
1640b57cec5SDimitry Andric   case GK_GFX703:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
1650b57cec5SDimitry Andric   case GK_GFX704:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
166e8d8bef9SDimitry Andric   case GK_GFX705:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
1670b57cec5SDimitry Andric   case GK_GFX801:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
1680b57cec5SDimitry Andric   case GK_GFX802:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
1690b57cec5SDimitry Andric   case GK_GFX803:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
170e8d8bef9SDimitry Andric   case GK_GFX805:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
1710b57cec5SDimitry Andric   case GK_GFX810:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
1720b57cec5SDimitry Andric   case GK_GFX900:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
1730b57cec5SDimitry Andric   case GK_GFX902:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
1740b57cec5SDimitry Andric   case GK_GFX904:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
1750b57cec5SDimitry Andric   case GK_GFX906:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
1760b57cec5SDimitry Andric   case GK_GFX908:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
1770b57cec5SDimitry Andric   case GK_GFX909:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
178fe6060f1SDimitry Andric   case GK_GFX90A:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
179e8d8bef9SDimitry Andric   case GK_GFX90C:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
18081ad6265SDimitry Andric   case GK_GFX940:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
18106c3fb27SDimitry Andric   case GK_GFX941:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
18206c3fb27SDimitry Andric   case GK_GFX942:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
1830b57cec5SDimitry Andric   case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
1840b57cec5SDimitry Andric   case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
1850b57cec5SDimitry Andric   case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
186fe6060f1SDimitry Andric   case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
1875ffd83dbSDimitry Andric   case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
188e8d8bef9SDimitry Andric   case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
189e8d8bef9SDimitry Andric   case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
190e8d8bef9SDimitry Andric   case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
191fe6060f1SDimitry Andric   case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
192fe6060f1SDimitry Andric   case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
19381ad6265SDimitry Andric   case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
19481ad6265SDimitry Andric   case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
19581ad6265SDimitry Andric   case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
19681ad6265SDimitry Andric   case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
19781ad6265SDimitry Andric   case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
19806c3fb27SDimitry Andric   case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
19906c3fb27SDimitry Andric   case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
200*0fca6ea1SDimitry Andric   case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152;
2015f757f3fSDimitry Andric   case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
2025f757f3fSDimitry Andric   case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
203*0fca6ea1SDimitry Andric   case GK_GFX9_GENERIC:     return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
204*0fca6ea1SDimitry Andric   case GK_GFX10_1_GENERIC:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
205*0fca6ea1SDimitry Andric   case GK_GFX10_3_GENERIC:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
206*0fca6ea1SDimitry Andric   case GK_GFX11_GENERIC:    return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
207*0fca6ea1SDimitry Andric   case GK_GFX12_GENERIC:    return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC;
2080b57cec5SDimitry Andric   case GK_NONE:    return ELF::EF_AMDGPU_MACH_NONE;
2090b57cec5SDimitry Andric   }
2105f757f3fSDimitry Andric   // clang-format on
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric   llvm_unreachable("unknown GPU");
2130b57cec5SDimitry Andric }
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2160b57cec5SDimitry Andric // AMDGPUTargetAsmStreamer
2170b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2180b57cec5SDimitry Andric 
AMDGPUTargetAsmStreamer(MCStreamer & S,formatted_raw_ostream & OS)2190b57cec5SDimitry Andric AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
2200b57cec5SDimitry Andric                                                  formatted_raw_ostream &OS)
2210b57cec5SDimitry Andric     : AMDGPUTargetStreamer(S), OS(OS) { }
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric // A hook for emitting stuff at the end.
2240b57cec5SDimitry Andric // We use it for emitting the accumulated PAL metadata as directives.
225e8d8bef9SDimitry Andric // The PAL metadata is reset after it is emitted.
finish()2260b57cec5SDimitry Andric void AMDGPUTargetAsmStreamer::finish() {
2270b57cec5SDimitry Andric   std::string S;
2280b57cec5SDimitry Andric   getPALMetadata()->toString(S);
2290b57cec5SDimitry Andric   OS << S;
230e8d8bef9SDimitry Andric 
231e8d8bef9SDimitry Andric   // Reset the pal metadata so its data will not affect a compilation that
232e8d8bef9SDimitry Andric   // reuses this object.
233e8d8bef9SDimitry Andric   getPALMetadata()->reset();
2340b57cec5SDimitry Andric }
2350b57cec5SDimitry Andric 
EmitDirectiveAMDGCNTarget()236fe6060f1SDimitry Andric void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
237fe6060f1SDimitry Andric   OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
2380b57cec5SDimitry Andric }
2390b57cec5SDimitry Andric 
EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)2407a6dacacSDimitry Andric void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
2417a6dacacSDimitry Andric     unsigned COV) {
2427a6dacacSDimitry Andric   AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV);
2437a6dacacSDimitry Andric   OS << "\t.amdhsa_code_object_version " << COV << '\n';
2440b57cec5SDimitry Andric }
2450b57cec5SDimitry Andric 
EmitAMDKernelCodeT(AMDGPUMCKernelCodeT & Header)246*0fca6ea1SDimitry Andric void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
2470b57cec5SDimitry Andric   OS << "\t.amd_kernel_code_t\n";
248*0fca6ea1SDimitry Andric   Header.EmitKernelCodeT(OS, getContext());
2490b57cec5SDimitry Andric   OS << "\t.end_amd_kernel_code_t\n";
2500b57cec5SDimitry Andric }
2510b57cec5SDimitry Andric 
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)2520b57cec5SDimitry Andric void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
2530b57cec5SDimitry Andric                                                    unsigned Type) {
2540b57cec5SDimitry Andric   switch (Type) {
2550b57cec5SDimitry Andric     default: llvm_unreachable("Invalid AMDGPU symbol type");
2560b57cec5SDimitry Andric     case ELF::STT_AMDGPU_HSA_KERNEL:
2570b57cec5SDimitry Andric       OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
2580b57cec5SDimitry Andric       break;
2590b57cec5SDimitry Andric   }
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric 
emitAMDGPULDS(MCSymbol * Symbol,unsigned Size,Align Alignment)2620b57cec5SDimitry Andric void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
2635ffd83dbSDimitry Andric                                             Align Alignment) {
2645ffd83dbSDimitry Andric   OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
2655ffd83dbSDimitry Andric      << Alignment.value() << '\n';
2660b57cec5SDimitry Andric }
2670b57cec5SDimitry Andric 
EmitISAVersion()268fe6060f1SDimitry Andric bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
269fe6060f1SDimitry Andric   OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
2700b57cec5SDimitry Andric   return true;
2710b57cec5SDimitry Andric }
2720b57cec5SDimitry Andric 
EmitHSAMetadata(msgpack::Document & HSAMetadataDoc,bool Strict)2730b57cec5SDimitry Andric bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
2740b57cec5SDimitry Andric     msgpack::Document &HSAMetadataDoc, bool Strict) {
275e8d8bef9SDimitry Andric   HSAMD::V3::MetadataVerifier Verifier(Strict);
2760b57cec5SDimitry Andric   if (!Verifier.verify(HSAMetadataDoc.getRoot()))
2770b57cec5SDimitry Andric     return false;
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric   std::string HSAMetadataString;
2800b57cec5SDimitry Andric   raw_string_ostream StrOS(HSAMetadataString);
2810b57cec5SDimitry Andric   HSAMetadataDoc.toYAML(StrOS);
2820b57cec5SDimitry Andric 
283e8d8bef9SDimitry Andric   OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
2840b57cec5SDimitry Andric   OS << StrOS.str() << '\n';
285e8d8bef9SDimitry Andric   OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
2860b57cec5SDimitry Andric   return true;
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric 
EmitKernargPreloadHeader(const MCSubtargetInfo & STI,bool TrapEnabled)289*0fca6ea1SDimitry Andric bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
290*0fca6ea1SDimitry Andric     const MCSubtargetInfo &STI, bool TrapEnabled) {
291*0fca6ea1SDimitry Andric   OS << (TrapEnabled ? "\ts_trap 2" : "\ts_endpgm")
292*0fca6ea1SDimitry Andric      << " ; Kernarg preload header. Trap with incompatible firmware that "
293*0fca6ea1SDimitry Andric         "doesn't support preloading kernel arguments.\n";
294*0fca6ea1SDimitry Andric   OS << "\t.fill 63, 4, 0xbf800000 ; s_nop 0\n";
295*0fca6ea1SDimitry Andric   return true;
296*0fca6ea1SDimitry Andric }
297*0fca6ea1SDimitry Andric 
EmitCodeEnd(const MCSubtargetInfo & STI)298fe6060f1SDimitry Andric bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
2990b57cec5SDimitry Andric   const uint32_t Encoded_s_code_end = 0xbf9f0000;
300fe6060f1SDimitry Andric   const uint32_t Encoded_s_nop = 0xbf800000;
301fe6060f1SDimitry Andric   uint32_t Encoded_pad = Encoded_s_code_end;
302fe6060f1SDimitry Andric 
303fe6060f1SDimitry Andric   // Instruction cache line size in bytes.
30481ad6265SDimitry Andric   const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
305fe6060f1SDimitry Andric   const unsigned CacheLineSize = 1u << Log2CacheLineSize;
306fe6060f1SDimitry Andric 
307fe6060f1SDimitry Andric   // Extra padding amount in bytes to support prefetch mode 3.
308fe6060f1SDimitry Andric   unsigned FillSize = 3 * CacheLineSize;
309fe6060f1SDimitry Andric 
310fe6060f1SDimitry Andric   if (AMDGPU::isGFX90A(STI)) {
311fe6060f1SDimitry Andric     Encoded_pad = Encoded_s_nop;
312fe6060f1SDimitry Andric     FillSize = 16 * CacheLineSize;
313fe6060f1SDimitry Andric   }
314fe6060f1SDimitry Andric 
315fe6060f1SDimitry Andric   OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
316fe6060f1SDimitry Andric   OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
3170b57cec5SDimitry Andric   return true;
3180b57cec5SDimitry Andric }
3190b57cec5SDimitry Andric 
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const MCKernelDescriptor & KD,const MCExpr * NextVGPR,const MCExpr * NextSGPR,const MCExpr * ReserveVCC,const MCExpr * ReserveFlatScr)3200b57cec5SDimitry Andric void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
3210b57cec5SDimitry Andric     const MCSubtargetInfo &STI, StringRef KernelName,
322*0fca6ea1SDimitry Andric     const MCKernelDescriptor &KD, const MCExpr *NextVGPR,
323*0fca6ea1SDimitry Andric     const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
324*0fca6ea1SDimitry Andric     const MCExpr *ReserveFlatScr) {
3250b57cec5SDimitry Andric   IsaVersion IVersion = getIsaVersion(STI.getCPU());
326*0fca6ea1SDimitry Andric   const MCAsmInfo *MAI = getContext().getAsmInfo();
3270b57cec5SDimitry Andric 
3280b57cec5SDimitry Andric   OS << "\t.amdhsa_kernel " << KernelName << '\n';
3290b57cec5SDimitry Andric 
330*0fca6ea1SDimitry Andric   auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
331*0fca6ea1SDimitry Andric                         StringRef Directive) {
332*0fca6ea1SDimitry Andric     int64_t IVal;
333*0fca6ea1SDimitry Andric     OS << "\t\t" << Directive << ' ';
334*0fca6ea1SDimitry Andric     const MCExpr *pgm_rsrc1_bits =
335*0fca6ea1SDimitry Andric         MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext());
336*0fca6ea1SDimitry Andric     if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal))
337*0fca6ea1SDimitry Andric       OS << static_cast<uint64_t>(IVal);
338*0fca6ea1SDimitry Andric     else
339*0fca6ea1SDimitry Andric       pgm_rsrc1_bits->print(OS, MAI);
340*0fca6ea1SDimitry Andric     OS << '\n';
341*0fca6ea1SDimitry Andric   };
3420b57cec5SDimitry Andric 
343*0fca6ea1SDimitry Andric   auto EmitMCExpr = [&](const MCExpr *Value) {
344*0fca6ea1SDimitry Andric     int64_t evaluatableValue;
345*0fca6ea1SDimitry Andric     if (Value->evaluateAsAbsolute(evaluatableValue)) {
346*0fca6ea1SDimitry Andric       OS << static_cast<uint64_t>(evaluatableValue);
347*0fca6ea1SDimitry Andric     } else {
348*0fca6ea1SDimitry Andric       Value->print(OS, MAI);
3495f757f3fSDimitry Andric     }
350*0fca6ea1SDimitry Andric   };
351*0fca6ea1SDimitry Andric 
352*0fca6ea1SDimitry Andric   OS << "\t\t.amdhsa_group_segment_fixed_size ";
353*0fca6ea1SDimitry Andric   EmitMCExpr(KD.group_segment_fixed_size);
354*0fca6ea1SDimitry Andric   OS << '\n';
355*0fca6ea1SDimitry Andric 
356*0fca6ea1SDimitry Andric   OS << "\t\t.amdhsa_private_segment_fixed_size ";
357*0fca6ea1SDimitry Andric   EmitMCExpr(KD.private_segment_fixed_size);
358*0fca6ea1SDimitry Andric   OS << '\n';
359*0fca6ea1SDimitry Andric 
360*0fca6ea1SDimitry Andric   OS << "\t\t.amdhsa_kernarg_size ";
361*0fca6ea1SDimitry Andric   EmitMCExpr(KD.kernarg_size);
362*0fca6ea1SDimitry Andric   OS << '\n';
363*0fca6ea1SDimitry Andric 
364*0fca6ea1SDimitry Andric   PrintField(
365*0fca6ea1SDimitry Andric       KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
366*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
367*0fca6ea1SDimitry Andric 
368*0fca6ea1SDimitry Andric   if (!hasArchitectedFlatScratch(STI))
369*0fca6ea1SDimitry Andric     PrintField(
370*0fca6ea1SDimitry Andric         KD.kernel_code_properties,
371*0fca6ea1SDimitry Andric         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
372*0fca6ea1SDimitry Andric         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
373*0fca6ea1SDimitry Andric         ".amdhsa_user_sgpr_private_segment_buffer");
374*0fca6ea1SDimitry Andric   PrintField(KD.kernel_code_properties,
375*0fca6ea1SDimitry Andric              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
376*0fca6ea1SDimitry Andric              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
377*0fca6ea1SDimitry Andric              ".amdhsa_user_sgpr_dispatch_ptr");
378*0fca6ea1SDimitry Andric   PrintField(KD.kernel_code_properties,
379*0fca6ea1SDimitry Andric              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
380*0fca6ea1SDimitry Andric              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
381*0fca6ea1SDimitry Andric              ".amdhsa_user_sgpr_queue_ptr");
382*0fca6ea1SDimitry Andric   PrintField(KD.kernel_code_properties,
383*0fca6ea1SDimitry Andric              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
384*0fca6ea1SDimitry Andric              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
385*0fca6ea1SDimitry Andric              ".amdhsa_user_sgpr_kernarg_segment_ptr");
386*0fca6ea1SDimitry Andric   PrintField(KD.kernel_code_properties,
387*0fca6ea1SDimitry Andric              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
388*0fca6ea1SDimitry Andric              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
389*0fca6ea1SDimitry Andric              ".amdhsa_user_sgpr_dispatch_id");
390*0fca6ea1SDimitry Andric   if (!hasArchitectedFlatScratch(STI))
391*0fca6ea1SDimitry Andric     PrintField(KD.kernel_code_properties,
392*0fca6ea1SDimitry Andric                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
393*0fca6ea1SDimitry Andric                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
394*0fca6ea1SDimitry Andric                ".amdhsa_user_sgpr_flat_scratch_init");
395*0fca6ea1SDimitry Andric   if (hasKernargPreload(STI)) {
396*0fca6ea1SDimitry Andric     PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
397*0fca6ea1SDimitry Andric                amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
398*0fca6ea1SDimitry Andric                ".amdhsa_user_sgpr_kernarg_preload_length");
399*0fca6ea1SDimitry Andric     PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
400*0fca6ea1SDimitry Andric                amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
401*0fca6ea1SDimitry Andric                ".amdhsa_user_sgpr_kernarg_preload_offset");
402*0fca6ea1SDimitry Andric   }
403*0fca6ea1SDimitry Andric   PrintField(
404*0fca6ea1SDimitry Andric       KD.kernel_code_properties,
405*0fca6ea1SDimitry Andric       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
406*0fca6ea1SDimitry Andric       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
407*0fca6ea1SDimitry Andric       ".amdhsa_user_sgpr_private_segment_size");
4080b57cec5SDimitry Andric   if (IVersion.Major >= 10)
409*0fca6ea1SDimitry Andric     PrintField(KD.kernel_code_properties,
410*0fca6ea1SDimitry Andric                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
411*0fca6ea1SDimitry Andric                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
412*0fca6ea1SDimitry Andric                ".amdhsa_wavefront_size32");
41306c3fb27SDimitry Andric   if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
414*0fca6ea1SDimitry Andric     PrintField(KD.kernel_code_properties,
415*0fca6ea1SDimitry Andric                amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
416*0fca6ea1SDimitry Andric                amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
417*0fca6ea1SDimitry Andric                ".amdhsa_uses_dynamic_stack");
418*0fca6ea1SDimitry Andric   PrintField(KD.compute_pgm_rsrc2,
419*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
420*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
421fe6060f1SDimitry Andric              (hasArchitectedFlatScratch(STI)
422fe6060f1SDimitry Andric                   ? ".amdhsa_enable_private_segment"
423*0fca6ea1SDimitry Andric                   : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
424*0fca6ea1SDimitry Andric   PrintField(KD.compute_pgm_rsrc2,
425*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
426*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
427*0fca6ea1SDimitry Andric              ".amdhsa_system_sgpr_workgroup_id_x");
428*0fca6ea1SDimitry Andric   PrintField(KD.compute_pgm_rsrc2,
429*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
430*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
431*0fca6ea1SDimitry Andric              ".amdhsa_system_sgpr_workgroup_id_y");
432*0fca6ea1SDimitry Andric   PrintField(KD.compute_pgm_rsrc2,
433*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
434*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
435*0fca6ea1SDimitry Andric              ".amdhsa_system_sgpr_workgroup_id_z");
436*0fca6ea1SDimitry Andric   PrintField(KD.compute_pgm_rsrc2,
437*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
438*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
439*0fca6ea1SDimitry Andric              ".amdhsa_system_sgpr_workgroup_info");
440*0fca6ea1SDimitry Andric   PrintField(KD.compute_pgm_rsrc2,
441*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
442*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
443*0fca6ea1SDimitry Andric              ".amdhsa_system_vgpr_workitem_id");
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric   // These directives are required.
446*0fca6ea1SDimitry Andric   OS << "\t\t.amdhsa_next_free_vgpr ";
447*0fca6ea1SDimitry Andric   EmitMCExpr(NextVGPR);
448*0fca6ea1SDimitry Andric   OS << '\n';
4490b57cec5SDimitry Andric 
450*0fca6ea1SDimitry Andric   OS << "\t\t.amdhsa_next_free_sgpr ";
451*0fca6ea1SDimitry Andric   EmitMCExpr(NextSGPR);
452*0fca6ea1SDimitry Andric   OS << '\n';
453fe6060f1SDimitry Andric 
454*0fca6ea1SDimitry Andric   if (AMDGPU::isGFX90A(STI)) {
455*0fca6ea1SDimitry Andric     // MCExpr equivalent of taking the (accum_offset + 1) * 4.
456*0fca6ea1SDimitry Andric     const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
457*0fca6ea1SDimitry Andric         KD.compute_pgm_rsrc3,
458*0fca6ea1SDimitry Andric         amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
459*0fca6ea1SDimitry Andric         amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
460*0fca6ea1SDimitry Andric     accum_bits = MCBinaryExpr::createAdd(
461*0fca6ea1SDimitry Andric         accum_bits, MCConstantExpr::create(1, getContext()), getContext());
462*0fca6ea1SDimitry Andric     accum_bits = MCBinaryExpr::createMul(
463*0fca6ea1SDimitry Andric         accum_bits, MCConstantExpr::create(4, getContext()), getContext());
464*0fca6ea1SDimitry Andric     OS << "\t\t.amdhsa_accum_offset ";
465*0fca6ea1SDimitry Andric     EmitMCExpr(accum_bits);
466*0fca6ea1SDimitry Andric     OS << '\n';
467*0fca6ea1SDimitry Andric   }
468*0fca6ea1SDimitry Andric 
469*0fca6ea1SDimitry Andric   OS << "\t\t.amdhsa_reserve_vcc ";
470*0fca6ea1SDimitry Andric   EmitMCExpr(ReserveVCC);
471*0fca6ea1SDimitry Andric   OS << '\n';
472*0fca6ea1SDimitry Andric 
473*0fca6ea1SDimitry Andric   if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) {
474*0fca6ea1SDimitry Andric     OS << "\t\t.amdhsa_reserve_flat_scratch ";
475*0fca6ea1SDimitry Andric     EmitMCExpr(ReserveFlatScr);
476*0fca6ea1SDimitry Andric     OS << '\n';
477*0fca6ea1SDimitry Andric   }
478fe6060f1SDimitry Andric 
47906c3fb27SDimitry Andric   switch (CodeObjectVersion) {
480fe6060f1SDimitry Andric   default:
481fe6060f1SDimitry Andric     break;
48206c3fb27SDimitry Andric   case AMDGPU::AMDHSA_COV4:
48306c3fb27SDimitry Andric   case AMDGPU::AMDHSA_COV5:
484fe6060f1SDimitry Andric     if (getTargetID()->isXnackSupported())
485fe6060f1SDimitry Andric       OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
486fe6060f1SDimitry Andric     break;
487fe6060f1SDimitry Andric   }
4880b57cec5SDimitry Andric 
489*0fca6ea1SDimitry Andric   PrintField(KD.compute_pgm_rsrc1,
490*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
491*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
492*0fca6ea1SDimitry Andric              ".amdhsa_float_round_mode_32");
493*0fca6ea1SDimitry Andric   PrintField(KD.compute_pgm_rsrc1,
494*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
495*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
496*0fca6ea1SDimitry Andric              ".amdhsa_float_round_mode_16_64");
497*0fca6ea1SDimitry Andric   PrintField(KD.compute_pgm_rsrc1,
498*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
499*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
500*0fca6ea1SDimitry Andric              ".amdhsa_float_denorm_mode_32");
501*0fca6ea1SDimitry Andric   PrintField(KD.compute_pgm_rsrc1,
502*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
503*0fca6ea1SDimitry Andric              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
504*0fca6ea1SDimitry Andric              ".amdhsa_float_denorm_mode_16_64");
5055f757f3fSDimitry Andric   if (IVersion.Major < 12) {
506*0fca6ea1SDimitry Andric     PrintField(KD.compute_pgm_rsrc1,
507*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
508*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
509*0fca6ea1SDimitry Andric                ".amdhsa_dx10_clamp");
510*0fca6ea1SDimitry Andric     PrintField(KD.compute_pgm_rsrc1,
511*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
512*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
513*0fca6ea1SDimitry Andric                ".amdhsa_ieee_mode");
5145f757f3fSDimitry Andric   }
515*0fca6ea1SDimitry Andric   if (IVersion.Major >= 9) {
516*0fca6ea1SDimitry Andric     PrintField(KD.compute_pgm_rsrc1,
517*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
518*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
519*0fca6ea1SDimitry Andric                ".amdhsa_fp16_overflow");
520*0fca6ea1SDimitry Andric   }
521fe6060f1SDimitry Andric   if (AMDGPU::isGFX90A(STI))
522*0fca6ea1SDimitry Andric     PrintField(KD.compute_pgm_rsrc3,
523*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
524*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
5250b57cec5SDimitry Andric   if (IVersion.Major >= 10) {
526*0fca6ea1SDimitry Andric     PrintField(KD.compute_pgm_rsrc1,
527*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
528*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
529*0fca6ea1SDimitry Andric                ".amdhsa_workgroup_processor_mode");
530*0fca6ea1SDimitry Andric     PrintField(KD.compute_pgm_rsrc1,
531*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
532*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
533*0fca6ea1SDimitry Andric                ".amdhsa_memory_ordered");
534*0fca6ea1SDimitry Andric     PrintField(KD.compute_pgm_rsrc1,
535*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
536*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
537*0fca6ea1SDimitry Andric                ".amdhsa_forward_progress");
538647cbc5dSDimitry Andric   }
539647cbc5dSDimitry Andric   if (IVersion.Major >= 10 && IVersion.Major < 12) {
540*0fca6ea1SDimitry Andric     PrintField(KD.compute_pgm_rsrc3,
541*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
542*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
543*0fca6ea1SDimitry Andric                ".amdhsa_shared_vgpr_count");
5440b57cec5SDimitry Andric   }
545*0fca6ea1SDimitry Andric   if (IVersion.Major >= 12) {
546*0fca6ea1SDimitry Andric     PrintField(KD.compute_pgm_rsrc1,
547*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
548*0fca6ea1SDimitry Andric                amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
549*0fca6ea1SDimitry Andric                ".amdhsa_round_robin_scheduling");
550*0fca6ea1SDimitry Andric   }
551*0fca6ea1SDimitry Andric   PrintField(
552*0fca6ea1SDimitry Andric       KD.compute_pgm_rsrc2,
553*0fca6ea1SDimitry Andric       amdhsa::
554*0fca6ea1SDimitry Andric           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
555*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
556*0fca6ea1SDimitry Andric       ".amdhsa_exception_fp_ieee_invalid_op");
557*0fca6ea1SDimitry Andric   PrintField(
558*0fca6ea1SDimitry Andric       KD.compute_pgm_rsrc2,
559*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
560*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
561*0fca6ea1SDimitry Andric       ".amdhsa_exception_fp_denorm_src");
562*0fca6ea1SDimitry Andric   PrintField(
563*0fca6ea1SDimitry Andric       KD.compute_pgm_rsrc2,
564*0fca6ea1SDimitry Andric       amdhsa::
565*0fca6ea1SDimitry Andric           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
566*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
567*0fca6ea1SDimitry Andric       ".amdhsa_exception_fp_ieee_div_zero");
568*0fca6ea1SDimitry Andric   PrintField(
569*0fca6ea1SDimitry Andric       KD.compute_pgm_rsrc2,
570*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
571*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
572*0fca6ea1SDimitry Andric       ".amdhsa_exception_fp_ieee_overflow");
573*0fca6ea1SDimitry Andric   PrintField(
574*0fca6ea1SDimitry Andric       KD.compute_pgm_rsrc2,
575*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
576*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
577*0fca6ea1SDimitry Andric       ".amdhsa_exception_fp_ieee_underflow");
578*0fca6ea1SDimitry Andric   PrintField(
579*0fca6ea1SDimitry Andric       KD.compute_pgm_rsrc2,
580*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
581*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
582*0fca6ea1SDimitry Andric       ".amdhsa_exception_fp_ieee_inexact");
583*0fca6ea1SDimitry Andric   PrintField(
584*0fca6ea1SDimitry Andric       KD.compute_pgm_rsrc2,
585*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
586*0fca6ea1SDimitry Andric       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
587*0fca6ea1SDimitry Andric       ".amdhsa_exception_int_div_zero");
5880b57cec5SDimitry Andric 
5890b57cec5SDimitry Andric   OS << "\t.end_amdhsa_kernel\n";
5900b57cec5SDimitry Andric }
5910b57cec5SDimitry Andric 
5920b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
5930b57cec5SDimitry Andric // AMDGPUTargetELFStreamer
5940b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
5950b57cec5SDimitry Andric 
AMDGPUTargetELFStreamer(MCStreamer & S,const MCSubtargetInfo & STI)5965ffd83dbSDimitry Andric AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
5975ffd83dbSDimitry Andric                                                  const MCSubtargetInfo &STI)
598fe6060f1SDimitry Andric     : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
5990b57cec5SDimitry Andric 
getStreamer()6000b57cec5SDimitry Andric MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
6010b57cec5SDimitry Andric   return static_cast<MCELFStreamer &>(Streamer);
6020b57cec5SDimitry Andric }
6030b57cec5SDimitry Andric 
6040b57cec5SDimitry Andric // A hook for emitting stuff at the end.
6050b57cec5SDimitry Andric // We use it for emitting the accumulated PAL metadata as a .note record.
606e8d8bef9SDimitry Andric // The PAL metadata is reset after it is emitted.
finish()6070b57cec5SDimitry Andric void AMDGPUTargetELFStreamer::finish() {
608*0fca6ea1SDimitry Andric   ELFObjectWriter &W = getStreamer().getWriter();
609*0fca6ea1SDimitry Andric   W.setELFHeaderEFlags(getEFlags());
610*0fca6ea1SDimitry Andric   W.setOverrideABIVersion(
6117a6dacacSDimitry Andric       getELFABIVersion(STI.getTargetTriple(), CodeObjectVersion));
612fe6060f1SDimitry Andric 
6130b57cec5SDimitry Andric   std::string Blob;
6140b57cec5SDimitry Andric   const char *Vendor = getPALMetadata()->getVendor();
6150b57cec5SDimitry Andric   unsigned Type = getPALMetadata()->getType();
6160b57cec5SDimitry Andric   getPALMetadata()->toBlob(Type, Blob);
6170b57cec5SDimitry Andric   if (Blob.empty())
6180b57cec5SDimitry Andric     return;
6190b57cec5SDimitry Andric   EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
6205ffd83dbSDimitry Andric            [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
621e8d8bef9SDimitry Andric 
622e8d8bef9SDimitry Andric   // Reset the pal metadata so its data will not affect a compilation that
623e8d8bef9SDimitry Andric   // reuses this object.
624e8d8bef9SDimitry Andric   getPALMetadata()->reset();
6250b57cec5SDimitry Andric }
6260b57cec5SDimitry Andric 
EmitNote(StringRef Name,const MCExpr * DescSZ,unsigned NoteType,function_ref<void (MCELFStreamer &)> EmitDesc)6270b57cec5SDimitry Andric void AMDGPUTargetELFStreamer::EmitNote(
6280b57cec5SDimitry Andric     StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
6290b57cec5SDimitry Andric     function_ref<void(MCELFStreamer &)> EmitDesc) {
6300b57cec5SDimitry Andric   auto &S = getStreamer();
6310b57cec5SDimitry Andric   auto &Context = S.getContext();
6320b57cec5SDimitry Andric 
6330b57cec5SDimitry Andric   auto NameSZ = Name.size() + 1;
6340b57cec5SDimitry Andric 
6355ffd83dbSDimitry Andric   unsigned NoteFlags = 0;
6365ffd83dbSDimitry Andric   // TODO Apparently, this is currently needed for OpenCL as mentioned in
6375ffd83dbSDimitry Andric   // https://reviews.llvm.org/D74995
6385f757f3fSDimitry Andric   if (isHsaAbi(STI))
6395ffd83dbSDimitry Andric     NoteFlags = ELF::SHF_ALLOC;
6405ffd83dbSDimitry Andric 
64181ad6265SDimitry Andric   S.pushSection();
64281ad6265SDimitry Andric   S.switchSection(
6435ffd83dbSDimitry Andric       Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
6445ffd83dbSDimitry Andric   S.emitInt32(NameSZ);                                        // namesz
6455ffd83dbSDimitry Andric   S.emitValue(DescSZ, 4);                                     // descz
6465ffd83dbSDimitry Andric   S.emitInt32(NoteType);                                      // type
6475ffd83dbSDimitry Andric   S.emitBytes(Name);                                          // name
648bdd1243dSDimitry Andric   S.emitValueToAlignment(Align(4), 0, 1, 0);                  // padding 0
6490b57cec5SDimitry Andric   EmitDesc(S);                                                // desc
650bdd1243dSDimitry Andric   S.emitValueToAlignment(Align(4), 0, 1, 0);                  // padding 0
65181ad6265SDimitry Andric   S.popSection();
6520b57cec5SDimitry Andric }
6530b57cec5SDimitry Andric 
getEFlags()654fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlags() {
655fe6060f1SDimitry Andric   switch (STI.getTargetTriple().getArch()) {
656fe6060f1SDimitry Andric   default:
657fe6060f1SDimitry Andric     llvm_unreachable("Unsupported Arch");
658fe6060f1SDimitry Andric   case Triple::r600:
659fe6060f1SDimitry Andric     return getEFlagsR600();
660fe6060f1SDimitry Andric   case Triple::amdgcn:
661fe6060f1SDimitry Andric     return getEFlagsAMDGCN();
662fe6060f1SDimitry Andric   }
663fe6060f1SDimitry Andric }
664fe6060f1SDimitry Andric 
getEFlagsR600()665fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
666fe6060f1SDimitry Andric   assert(STI.getTargetTriple().getArch() == Triple::r600);
667fe6060f1SDimitry Andric 
668fe6060f1SDimitry Andric   return getElfMach(STI.getCPU());
669fe6060f1SDimitry Andric }
670fe6060f1SDimitry Andric 
getEFlagsAMDGCN()671fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
672fe6060f1SDimitry Andric   assert(STI.getTargetTriple().getArch() == Triple::amdgcn);
673fe6060f1SDimitry Andric 
674fe6060f1SDimitry Andric   switch (STI.getTargetTriple().getOS()) {
675fe6060f1SDimitry Andric   default:
676fe6060f1SDimitry Andric     // TODO: Why are some tests have "mingw" listed as OS?
677fe6060f1SDimitry Andric     // llvm_unreachable("Unsupported OS");
678fe6060f1SDimitry Andric   case Triple::UnknownOS:
679fe6060f1SDimitry Andric     return getEFlagsUnknownOS();
680fe6060f1SDimitry Andric   case Triple::AMDHSA:
681fe6060f1SDimitry Andric     return getEFlagsAMDHSA();
682fe6060f1SDimitry Andric   case Triple::AMDPAL:
683fe6060f1SDimitry Andric     return getEFlagsAMDPAL();
684fe6060f1SDimitry Andric   case Triple::Mesa3D:
685fe6060f1SDimitry Andric     return getEFlagsMesa3D();
686fe6060f1SDimitry Andric   }
687fe6060f1SDimitry Andric }
688fe6060f1SDimitry Andric 
getEFlagsUnknownOS()689fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
690fe6060f1SDimitry Andric   // TODO: Why are some tests have "mingw" listed as OS?
691fe6060f1SDimitry Andric   // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
692fe6060f1SDimitry Andric 
693fe6060f1SDimitry Andric   return getEFlagsV3();
694fe6060f1SDimitry Andric }
695fe6060f1SDimitry Andric 
getEFlagsAMDHSA()696fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
6975f757f3fSDimitry Andric   assert(isHsaAbi(STI));
698fe6060f1SDimitry Andric 
699*0fca6ea1SDimitry Andric   if (CodeObjectVersion >= 6)
700*0fca6ea1SDimitry Andric     return getEFlagsV6();
701fe6060f1SDimitry Andric   return getEFlagsV4();
702fe6060f1SDimitry Andric }
703fe6060f1SDimitry Andric 
getEFlagsAMDPAL()704fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
705fe6060f1SDimitry Andric   assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
706fe6060f1SDimitry Andric 
707fe6060f1SDimitry Andric   return getEFlagsV3();
708fe6060f1SDimitry Andric }
709fe6060f1SDimitry Andric 
getEFlagsMesa3D()710fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
711fe6060f1SDimitry Andric   assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
712fe6060f1SDimitry Andric 
713fe6060f1SDimitry Andric   return getEFlagsV3();
714fe6060f1SDimitry Andric }
715fe6060f1SDimitry Andric 
getEFlagsV3()716fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
717fe6060f1SDimitry Andric   unsigned EFlagsV3 = 0;
718fe6060f1SDimitry Andric 
719fe6060f1SDimitry Andric   // mach.
720fe6060f1SDimitry Andric   EFlagsV3 |= getElfMach(STI.getCPU());
721fe6060f1SDimitry Andric 
722fe6060f1SDimitry Andric   // xnack.
723fe6060f1SDimitry Andric   if (getTargetID()->isXnackOnOrAny())
724fe6060f1SDimitry Andric     EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
725fe6060f1SDimitry Andric   // sramecc.
726fe6060f1SDimitry Andric   if (getTargetID()->isSramEccOnOrAny())
727fe6060f1SDimitry Andric     EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
728fe6060f1SDimitry Andric 
729fe6060f1SDimitry Andric   return EFlagsV3;
730fe6060f1SDimitry Andric }
731fe6060f1SDimitry Andric 
getEFlagsV4()732fe6060f1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
733fe6060f1SDimitry Andric   unsigned EFlagsV4 = 0;
734fe6060f1SDimitry Andric 
735fe6060f1SDimitry Andric   // mach.
736fe6060f1SDimitry Andric   EFlagsV4 |= getElfMach(STI.getCPU());
737fe6060f1SDimitry Andric 
738fe6060f1SDimitry Andric   // xnack.
739fe6060f1SDimitry Andric   switch (getTargetID()->getXnackSetting()) {
740fe6060f1SDimitry Andric   case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
741fe6060f1SDimitry Andric     EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
742fe6060f1SDimitry Andric     break;
743fe6060f1SDimitry Andric   case AMDGPU::IsaInfo::TargetIDSetting::Any:
744fe6060f1SDimitry Andric     EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
745fe6060f1SDimitry Andric     break;
746fe6060f1SDimitry Andric   case AMDGPU::IsaInfo::TargetIDSetting::Off:
747fe6060f1SDimitry Andric     EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
748fe6060f1SDimitry Andric     break;
749fe6060f1SDimitry Andric   case AMDGPU::IsaInfo::TargetIDSetting::On:
750fe6060f1SDimitry Andric     EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
751fe6060f1SDimitry Andric     break;
752fe6060f1SDimitry Andric   }
753fe6060f1SDimitry Andric   // sramecc.
754fe6060f1SDimitry Andric   switch (getTargetID()->getSramEccSetting()) {
755fe6060f1SDimitry Andric   case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
756fe6060f1SDimitry Andric     EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
757fe6060f1SDimitry Andric     break;
758fe6060f1SDimitry Andric   case AMDGPU::IsaInfo::TargetIDSetting::Any:
759fe6060f1SDimitry Andric     EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
760fe6060f1SDimitry Andric     break;
761fe6060f1SDimitry Andric   case AMDGPU::IsaInfo::TargetIDSetting::Off:
762fe6060f1SDimitry Andric     EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
763fe6060f1SDimitry Andric     break;
764fe6060f1SDimitry Andric   case AMDGPU::IsaInfo::TargetIDSetting::On:
765fe6060f1SDimitry Andric     EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
766fe6060f1SDimitry Andric     break;
767fe6060f1SDimitry Andric   }
768fe6060f1SDimitry Andric 
769fe6060f1SDimitry Andric   return EFlagsV4;
770fe6060f1SDimitry Andric }
771fe6060f1SDimitry Andric 
getEFlagsV6()772*0fca6ea1SDimitry Andric unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
773*0fca6ea1SDimitry Andric   unsigned Flags = getEFlagsV4();
774*0fca6ea1SDimitry Andric 
775*0fca6ea1SDimitry Andric   unsigned Version = ForceGenericVersion;
776*0fca6ea1SDimitry Andric   if (!Version) {
777*0fca6ea1SDimitry Andric     switch (parseArchAMDGCN(STI.getCPU())) {
778*0fca6ea1SDimitry Andric     case AMDGPU::GK_GFX9_GENERIC:
779*0fca6ea1SDimitry Andric       Version = GenericVersion::GFX9;
780*0fca6ea1SDimitry Andric       break;
781*0fca6ea1SDimitry Andric     case AMDGPU::GK_GFX10_1_GENERIC:
782*0fca6ea1SDimitry Andric       Version = GenericVersion::GFX10_1;
783*0fca6ea1SDimitry Andric       break;
784*0fca6ea1SDimitry Andric     case AMDGPU::GK_GFX10_3_GENERIC:
785*0fca6ea1SDimitry Andric       Version = GenericVersion::GFX10_3;
786*0fca6ea1SDimitry Andric       break;
787*0fca6ea1SDimitry Andric     case AMDGPU::GK_GFX11_GENERIC:
788*0fca6ea1SDimitry Andric       Version = GenericVersion::GFX11;
789*0fca6ea1SDimitry Andric       break;
790*0fca6ea1SDimitry Andric     case AMDGPU::GK_GFX12_GENERIC:
791*0fca6ea1SDimitry Andric       Version = GenericVersion::GFX12;
792*0fca6ea1SDimitry Andric       break;
793*0fca6ea1SDimitry Andric     default:
794*0fca6ea1SDimitry Andric       break;
795*0fca6ea1SDimitry Andric     }
796*0fca6ea1SDimitry Andric   }
797*0fca6ea1SDimitry Andric 
798*0fca6ea1SDimitry Andric   // Versions start at 1.
799*0fca6ea1SDimitry Andric   if (Version) {
800*0fca6ea1SDimitry Andric     if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX)
801*0fca6ea1SDimitry Andric       report_fatal_error("Cannot encode generic code object version " +
802*0fca6ea1SDimitry Andric                          Twine(Version) +
803*0fca6ea1SDimitry Andric                          " - no ELF flag can represent this version!");
804*0fca6ea1SDimitry Andric     Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
805*0fca6ea1SDimitry Andric   }
806*0fca6ea1SDimitry Andric 
807*0fca6ea1SDimitry Andric   return Flags;
808*0fca6ea1SDimitry Andric }
809*0fca6ea1SDimitry Andric 
EmitDirectiveAMDGCNTarget()810fe6060f1SDimitry Andric void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
8110b57cec5SDimitry Andric 
EmitAMDKernelCodeT(AMDGPUMCKernelCodeT & Header)812*0fca6ea1SDimitry Andric void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
8130b57cec5SDimitry Andric   MCStreamer &OS = getStreamer();
81481ad6265SDimitry Andric   OS.pushSection();
815*0fca6ea1SDimitry Andric   Header.EmitKernelCodeT(OS, getContext());
81681ad6265SDimitry Andric   OS.popSection();
8170b57cec5SDimitry Andric }
8180b57cec5SDimitry Andric 
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)8190b57cec5SDimitry Andric void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
8200b57cec5SDimitry Andric                                                    unsigned Type) {
8210b57cec5SDimitry Andric   MCSymbolELF *Symbol = cast<MCSymbolELF>(
8220b57cec5SDimitry Andric       getStreamer().getContext().getOrCreateSymbol(SymbolName));
8230b57cec5SDimitry Andric   Symbol->setType(Type);
8240b57cec5SDimitry Andric }
8250b57cec5SDimitry Andric 
emitAMDGPULDS(MCSymbol * Symbol,unsigned Size,Align Alignment)8260b57cec5SDimitry Andric void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
8275ffd83dbSDimitry Andric                                             Align Alignment) {
8280b57cec5SDimitry Andric   MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
8290b57cec5SDimitry Andric   SymbolELF->setType(ELF::STT_OBJECT);
8300b57cec5SDimitry Andric 
831*0fca6ea1SDimitry Andric   if (!SymbolELF->isBindingSet())
8320b57cec5SDimitry Andric     SymbolELF->setBinding(ELF::STB_GLOBAL);
8330b57cec5SDimitry Andric 
834bdd1243dSDimitry Andric   if (SymbolELF->declareCommon(Size, Alignment, true)) {
8350b57cec5SDimitry Andric     report_fatal_error("Symbol: " + Symbol->getName() +
8360b57cec5SDimitry Andric                        " redeclared as different type");
8370b57cec5SDimitry Andric   }
8380b57cec5SDimitry Andric 
8390b57cec5SDimitry Andric   SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
8400b57cec5SDimitry Andric   SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
8410b57cec5SDimitry Andric }
8420b57cec5SDimitry Andric 
EmitISAVersion()843fe6060f1SDimitry Andric bool AMDGPUTargetELFStreamer::EmitISAVersion() {
8440b57cec5SDimitry Andric   // Create two labels to mark the beginning and end of the desc field
8450b57cec5SDimitry Andric   // and a MCExpr to calculate the size of the desc field.
8460b57cec5SDimitry Andric   auto &Context = getContext();
8470b57cec5SDimitry Andric   auto *DescBegin = Context.createTempSymbol();
8480b57cec5SDimitry Andric   auto *DescEnd = Context.createTempSymbol();
8490b57cec5SDimitry Andric   auto *DescSZ = MCBinaryExpr::createSub(
8500b57cec5SDimitry Andric     MCSymbolRefExpr::create(DescEnd, Context),
8510b57cec5SDimitry Andric     MCSymbolRefExpr::create(DescBegin, Context), Context);
8520b57cec5SDimitry Andric 
853fe6060f1SDimitry Andric   EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME,
8540b57cec5SDimitry Andric            [&](MCELFStreamer &OS) {
8555ffd83dbSDimitry Andric              OS.emitLabel(DescBegin);
856fe6060f1SDimitry Andric              OS.emitBytes(getTargetID()->toString());
8575ffd83dbSDimitry Andric              OS.emitLabel(DescEnd);
8580b57cec5SDimitry Andric            });
8590b57cec5SDimitry Andric   return true;
8600b57cec5SDimitry Andric }
8610b57cec5SDimitry Andric 
EmitHSAMetadata(msgpack::Document & HSAMetadataDoc,bool Strict)8620b57cec5SDimitry Andric bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
8630b57cec5SDimitry Andric                                               bool Strict) {
864e8d8bef9SDimitry Andric   HSAMD::V3::MetadataVerifier Verifier(Strict);
8650b57cec5SDimitry Andric   if (!Verifier.verify(HSAMetadataDoc.getRoot()))
8660b57cec5SDimitry Andric     return false;
8670b57cec5SDimitry Andric 
8680b57cec5SDimitry Andric   std::string HSAMetadataString;
8690b57cec5SDimitry Andric   HSAMetadataDoc.writeToBlob(HSAMetadataString);
8700b57cec5SDimitry Andric 
8710b57cec5SDimitry Andric   // Create two labels to mark the beginning and end of the desc field
8720b57cec5SDimitry Andric   // and a MCExpr to calculate the size of the desc field.
8730b57cec5SDimitry Andric   auto &Context = getContext();
8740b57cec5SDimitry Andric   auto *DescBegin = Context.createTempSymbol();
8750b57cec5SDimitry Andric   auto *DescEnd = Context.createTempSymbol();
8760b57cec5SDimitry Andric   auto *DescSZ = MCBinaryExpr::createSub(
8770b57cec5SDimitry Andric       MCSymbolRefExpr::create(DescEnd, Context),
8780b57cec5SDimitry Andric       MCSymbolRefExpr::create(DescBegin, Context), Context);
8790b57cec5SDimitry Andric 
8800b57cec5SDimitry Andric   EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
8810b57cec5SDimitry Andric            [&](MCELFStreamer &OS) {
8825ffd83dbSDimitry Andric              OS.emitLabel(DescBegin);
8835ffd83dbSDimitry Andric              OS.emitBytes(HSAMetadataString);
8845ffd83dbSDimitry Andric              OS.emitLabel(DescEnd);
8850b57cec5SDimitry Andric            });
8860b57cec5SDimitry Andric   return true;
8870b57cec5SDimitry Andric }
8880b57cec5SDimitry Andric 
EmitKernargPreloadHeader(const MCSubtargetInfo & STI,bool TrapEnabled)8895f757f3fSDimitry Andric bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader(
890*0fca6ea1SDimitry Andric     const MCSubtargetInfo &STI, bool TrapEnabled) {
8915f757f3fSDimitry Andric   const uint32_t Encoded_s_nop = 0xbf800000;
892*0fca6ea1SDimitry Andric   const uint32_t Encoded_s_trap = 0xbf920002;
893*0fca6ea1SDimitry Andric   const uint32_t Encoded_s_endpgm = 0xbf810000;
894*0fca6ea1SDimitry Andric   const uint32_t TrapInstr = TrapEnabled ? Encoded_s_trap : Encoded_s_endpgm;
8955f757f3fSDimitry Andric   MCStreamer &OS = getStreamer();
896*0fca6ea1SDimitry Andric   OS.emitInt32(TrapInstr);
897*0fca6ea1SDimitry Andric   for (int i = 0; i < 63; ++i) {
8985f757f3fSDimitry Andric     OS.emitInt32(Encoded_s_nop);
8995f757f3fSDimitry Andric   }
9005f757f3fSDimitry Andric   return true;
9015f757f3fSDimitry Andric }
9025f757f3fSDimitry Andric 
EmitCodeEnd(const MCSubtargetInfo & STI)903fe6060f1SDimitry Andric bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
9040b57cec5SDimitry Andric   const uint32_t Encoded_s_code_end = 0xbf9f0000;
905fe6060f1SDimitry Andric   const uint32_t Encoded_s_nop = 0xbf800000;
906fe6060f1SDimitry Andric   uint32_t Encoded_pad = Encoded_s_code_end;
907fe6060f1SDimitry Andric 
908fe6060f1SDimitry Andric   // Instruction cache line size in bytes.
90981ad6265SDimitry Andric   const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
910fe6060f1SDimitry Andric   const unsigned CacheLineSize = 1u << Log2CacheLineSize;
911fe6060f1SDimitry Andric 
912fe6060f1SDimitry Andric   // Extra padding amount in bytes to support prefetch mode 3.
913fe6060f1SDimitry Andric   unsigned FillSize = 3 * CacheLineSize;
914fe6060f1SDimitry Andric 
915fe6060f1SDimitry Andric   if (AMDGPU::isGFX90A(STI)) {
916fe6060f1SDimitry Andric     Encoded_pad = Encoded_s_nop;
917fe6060f1SDimitry Andric     FillSize = 16 * CacheLineSize;
918fe6060f1SDimitry Andric   }
9190b57cec5SDimitry Andric 
9200b57cec5SDimitry Andric   MCStreamer &OS = getStreamer();
92181ad6265SDimitry Andric   OS.pushSection();
922bdd1243dSDimitry Andric   OS.emitValueToAlignment(Align(CacheLineSize), Encoded_pad, 4);
923fe6060f1SDimitry Andric   for (unsigned I = 0; I < FillSize; I += 4)
924fe6060f1SDimitry Andric     OS.emitInt32(Encoded_pad);
92581ad6265SDimitry Andric   OS.popSection();
9260b57cec5SDimitry Andric   return true;
9270b57cec5SDimitry Andric }
9280b57cec5SDimitry Andric 
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const MCKernelDescriptor & KernelDescriptor,const MCExpr * NextVGPR,const MCExpr * NextSGPR,const MCExpr * ReserveVCC,const MCExpr * ReserveFlatScr)9290b57cec5SDimitry Andric void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
9300b57cec5SDimitry Andric     const MCSubtargetInfo &STI, StringRef KernelName,
931*0fca6ea1SDimitry Andric     const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR,
932*0fca6ea1SDimitry Andric     const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
933*0fca6ea1SDimitry Andric     const MCExpr *ReserveFlatScr) {
9340b57cec5SDimitry Andric   auto &Streamer = getStreamer();
9350b57cec5SDimitry Andric   auto &Context = Streamer.getContext();
9360b57cec5SDimitry Andric 
9370b57cec5SDimitry Andric   MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
9380b57cec5SDimitry Andric       Context.getOrCreateSymbol(Twine(KernelName)));
9390b57cec5SDimitry Andric   MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
9400b57cec5SDimitry Andric       Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
9410b57cec5SDimitry Andric 
9420b57cec5SDimitry Andric   // Copy kernel descriptor symbol's binding, other and visibility from the
9430b57cec5SDimitry Andric   // kernel code symbol.
9440b57cec5SDimitry Andric   KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
9450b57cec5SDimitry Andric   KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
9460b57cec5SDimitry Andric   KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
9470b57cec5SDimitry Andric   // Kernel descriptor symbol's type and size are fixed.
9480b57cec5SDimitry Andric   KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
9490b57cec5SDimitry Andric   KernelDescriptorSymbol->setSize(
950*0fca6ea1SDimitry Andric       MCConstantExpr::create(sizeof(amdhsa::kernel_descriptor_t), Context));
9510b57cec5SDimitry Andric 
9520b57cec5SDimitry Andric   // The visibility of the kernel code symbol must be protected or less to allow
9530b57cec5SDimitry Andric   // static relocations from the kernel descriptor to be used.
9540b57cec5SDimitry Andric   if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
9550b57cec5SDimitry Andric     KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
9560b57cec5SDimitry Andric 
9575ffd83dbSDimitry Andric   Streamer.emitLabel(KernelDescriptorSymbol);
958*0fca6ea1SDimitry Andric   Streamer.emitValue(
959*0fca6ea1SDimitry Andric       KernelDescriptor.group_segment_fixed_size,
960*0fca6ea1SDimitry Andric       sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size));
961*0fca6ea1SDimitry Andric   Streamer.emitValue(
962*0fca6ea1SDimitry Andric       KernelDescriptor.private_segment_fixed_size,
963*0fca6ea1SDimitry Andric       sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size));
964*0fca6ea1SDimitry Andric   Streamer.emitValue(KernelDescriptor.kernarg_size,
965*0fca6ea1SDimitry Andric                      sizeof(amdhsa::kernel_descriptor_t::kernarg_size));
966fe6060f1SDimitry Andric 
967*0fca6ea1SDimitry Andric   for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
968*0fca6ea1SDimitry Andric     Streamer.emitInt8(0u);
969fe6060f1SDimitry Andric 
9700b57cec5SDimitry Andric   // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
9710b57cec5SDimitry Andric   // expression being created is:
9720b57cec5SDimitry Andric   //   (start of kernel code) - (start of kernel descriptor)
9730b57cec5SDimitry Andric   // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
974*0fca6ea1SDimitry Andric   Streamer.emitValue(
975*0fca6ea1SDimitry Andric       MCBinaryExpr::createSub(
976*0fca6ea1SDimitry Andric           MCSymbolRefExpr::create(KernelCodeSymbol,
977*0fca6ea1SDimitry Andric                                   MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
978*0fca6ea1SDimitry Andric           MCSymbolRefExpr::create(KernelDescriptorSymbol,
979*0fca6ea1SDimitry Andric                                   MCSymbolRefExpr::VK_None, Context),
9800b57cec5SDimitry Andric           Context),
981*0fca6ea1SDimitry Andric       sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset));
982*0fca6ea1SDimitry Andric   for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
983*0fca6ea1SDimitry Andric     Streamer.emitInt8(0u);
984*0fca6ea1SDimitry Andric   Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
985*0fca6ea1SDimitry Andric                      sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3));
986*0fca6ea1SDimitry Andric   Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
987*0fca6ea1SDimitry Andric                      sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1));
988*0fca6ea1SDimitry Andric   Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
989*0fca6ea1SDimitry Andric                      sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2));
990*0fca6ea1SDimitry Andric   Streamer.emitValue(
991*0fca6ea1SDimitry Andric       KernelDescriptor.kernel_code_properties,
992*0fca6ea1SDimitry Andric       sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties));
993*0fca6ea1SDimitry Andric   Streamer.emitValue(KernelDescriptor.kernarg_preload,
994*0fca6ea1SDimitry Andric                      sizeof(amdhsa::kernel_descriptor_t::kernarg_preload));
995*0fca6ea1SDimitry Andric   for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
996*0fca6ea1SDimitry Andric     Streamer.emitInt8(0u);
9970b57cec5SDimitry Andric }
998