1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides AMDGPU specific target streamer methods. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPUTargetStreamer.h" 14 #include "AMDGPUMCExpr.h" 15 #include "AMDGPUMCKernelDescriptor.h" 16 #include "AMDGPUPTNote.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" 20 #include "llvm/BinaryFormat/ELF.h" 21 #include "llvm/MC/MCAsmInfo.h" 22 #include "llvm/MC/MCAssembler.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCELFObjectWriter.h" 25 #include "llvm/MC/MCELFStreamer.h" 26 #include "llvm/MC/MCSubtargetInfo.h" 27 #include "llvm/Support/AMDGPUMetadata.h" 28 #include "llvm/Support/AMDHSAKernelDescriptor.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/FormattedStream.h" 32 #include "llvm/TargetParser/TargetParser.h" 33 34 using namespace llvm; 35 using namespace llvm::AMDGPU; 36 37 //===----------------------------------------------------------------------===// 38 // AMDGPUTargetStreamer 39 //===----------------------------------------------------------------------===// 40 41 static cl::opt<unsigned> 42 ForceGenericVersion("amdgpu-force-generic-version", 43 cl::desc("Force a specific generic_v<N> flag to be " 44 "added. For testing purposes only."), 45 cl::ReallyHidden, cl::init(0)); 46 47 bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { 48 msgpack::Document HSAMetadataDoc; 49 if (!HSAMetadataDoc.fromYAML(HSAMetadataString)) 50 return false; 51 return EmitHSAMetadata(HSAMetadataDoc, false); 52 } 53 54 StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { 55 AMDGPU::GPUKind AK; 56 57 // clang-format off 58 switch (ElfMach) { 59 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; 60 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; 61 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; 62 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break; 63 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break; 64 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break; 65 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break; 66 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break; 67 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break; 68 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break; 69 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break; 70 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break; 71 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break; 72 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break; 73 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break; 74 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break; 75 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break; 76 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break; 77 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break; 78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break; 79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break; 80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break; 81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break; 82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break; 83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break; 84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break; 85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break; 86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break; 87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break; 88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break; 89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break; 90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break; 91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break; 92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; 93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break; 94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; 95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break; 96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break; 97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break; 98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break; 99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; 100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; 101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; 102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break; 103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break; 104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break; 105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break; 106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break; 107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break; 108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break; 109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break; 110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break; 111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break; 112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break; 113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break; 114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break; 115 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break; 116 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break; 117 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153: AK = GK_GFX1153; break; 118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break; 119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break; 120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250: AK = GK_GFX1250; break; 121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break; 122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: AK = GK_GFX9_4_GENERIC; break; 123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break; 124 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break; 125 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break; 126 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: AK = GK_GFX12_GENERIC; break; 127 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; 128 default: AK = GK_NONE; break; 129 } 130 // clang-format on 131 132 StringRef GPUName = getArchNameAMDGCN(AK); 133 if (GPUName != "") 134 return GPUName; 135 return getArchNameR600(AK); 136 } 137 138 unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { 139 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); 140 if (AK == AMDGPU::GPUKind::GK_NONE) 141 AK = parseArchR600(GPU); 142 143 // clang-format off 144 switch (AK) { 145 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; 146 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; 147 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880; 148 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670; 149 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710; 150 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730; 151 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770; 152 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR; 153 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS; 154 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER; 155 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD; 156 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO; 157 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS; 158 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS; 159 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN; 160 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS; 161 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600; 162 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601; 163 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602; 164 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700; 165 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701; 166 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702; 167 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703; 168 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704; 169 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705; 170 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801; 171 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802; 172 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803; 173 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805; 174 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810; 175 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900; 176 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902; 177 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904; 178 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; 179 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908; 180 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; 181 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A; 182 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C; 183 case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942; 184 case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950; 185 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; 186 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; 187 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; 188 case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013; 189 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030; 190 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031; 191 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032; 192 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033; 193 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034; 194 case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035; 195 case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036; 196 case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100; 197 case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101; 198 case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102; 199 case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103; 200 case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150; 201 case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151; 202 case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152; 203 case GK_GFX1153: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153; 204 case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200; 205 case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201; 206 case GK_GFX1250: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250; 207 case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC; 208 case GK_GFX9_4_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC; 209 case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC; 210 case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC; 211 case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC; 212 case GK_GFX12_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC; 213 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; 214 } 215 // clang-format on 216 217 llvm_unreachable("unknown GPU"); 218 } 219 220 //===----------------------------------------------------------------------===// 221 // AMDGPUTargetAsmStreamer 222 //===----------------------------------------------------------------------===// 223 224 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, 225 formatted_raw_ostream &OS) 226 : AMDGPUTargetStreamer(S), OS(OS) { } 227 228 // A hook for emitting stuff at the end. 229 // We use it for emitting the accumulated PAL metadata as directives. 230 // The PAL metadata is reset after it is emitted. 231 void AMDGPUTargetAsmStreamer::finish() { 232 std::string S; 233 getPALMetadata()->toString(S); 234 OS << S; 235 236 // Reset the pal metadata so its data will not affect a compilation that 237 // reuses this object. 238 getPALMetadata()->reset(); 239 } 240 241 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() { 242 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n"; 243 } 244 245 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion( 246 unsigned COV) { 247 AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV); 248 OS << "\t.amdhsa_code_object_version " << COV << '\n'; 249 } 250 251 void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) { 252 auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS, 253 const MCAsmInfo *MAI) { 254 printAMDGPUMCExpr(foldAMDGPUMCExpr(Expr, getContext()), OS, MAI); 255 }; 256 257 OS << "\t.amd_kernel_code_t\n"; 258 Header.EmitKernelCodeT(OS, getContext(), FoldAndPrint); 259 OS << "\t.end_amd_kernel_code_t\n"; 260 } 261 262 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, 263 unsigned Type) { 264 switch (Type) { 265 default: llvm_unreachable("Invalid AMDGPU symbol type"); 266 case ELF::STT_AMDGPU_HSA_KERNEL: 267 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ; 268 break; 269 } 270 } 271 272 void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, 273 Align Alignment) { 274 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " 275 << Alignment.value() << '\n'; 276 } 277 278 void AMDGPUTargetAsmStreamer::EmitMCResourceInfo( 279 const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, 280 const MCSymbol *NumExplicitSGPR, const MCSymbol *PrivateSegmentSize, 281 const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, 282 const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, 283 const MCSymbol *HasIndirectCall) { 284 #define PRINT_RES_INFO(ARG) \ 285 OS << "\t.set "; \ 286 ARG->print(OS, getContext().getAsmInfo()); \ 287 OS << ", "; \ 288 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \ 289 Streamer.addBlankLine(); 290 291 PRINT_RES_INFO(NumVGPR); 292 PRINT_RES_INFO(NumAGPR); 293 PRINT_RES_INFO(NumExplicitSGPR); 294 PRINT_RES_INFO(PrivateSegmentSize); 295 PRINT_RES_INFO(UsesVCC); 296 PRINT_RES_INFO(UsesFlatScratch); 297 PRINT_RES_INFO(HasDynamicallySizedStack); 298 PRINT_RES_INFO(HasRecursion); 299 PRINT_RES_INFO(HasIndirectCall); 300 #undef PRINT_RES_INFO 301 } 302 303 void AMDGPUTargetAsmStreamer::EmitMCResourceMaximums(const MCSymbol *MaxVGPR, 304 const MCSymbol *MaxAGPR, 305 const MCSymbol *MaxSGPR) { 306 #define PRINT_RES_INFO(ARG) \ 307 OS << "\t.set "; \ 308 ARG->print(OS, getContext().getAsmInfo()); \ 309 OS << ", "; \ 310 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \ 311 Streamer.addBlankLine(); 312 313 PRINT_RES_INFO(MaxVGPR); 314 PRINT_RES_INFO(MaxAGPR); 315 PRINT_RES_INFO(MaxSGPR); 316 #undef PRINT_RES_INFO 317 } 318 319 bool AMDGPUTargetAsmStreamer::EmitISAVersion() { 320 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n"; 321 return true; 322 } 323 324 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( 325 msgpack::Document &HSAMetadataDoc, bool Strict) { 326 HSAMD::V3::MetadataVerifier Verifier(Strict); 327 if (!Verifier.verify(HSAMetadataDoc.getRoot())) 328 return false; 329 330 std::string HSAMetadataString; 331 raw_string_ostream StrOS(HSAMetadataString); 332 HSAMetadataDoc.toYAML(StrOS); 333 334 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n'; 335 OS << StrOS.str() << '\n'; 336 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n'; 337 return true; 338 } 339 340 bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { 341 const uint32_t Encoded_s_code_end = 0xbf9f0000; 342 const uint32_t Encoded_s_nop = 0xbf800000; 343 uint32_t Encoded_pad = Encoded_s_code_end; 344 345 // Instruction cache line size in bytes. 346 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6; 347 const unsigned CacheLineSize = 1u << Log2CacheLineSize; 348 349 // Extra padding amount in bytes to support prefetch mode 3. 350 unsigned FillSize = 3 * CacheLineSize; 351 352 if (AMDGPU::isGFX90A(STI)) { 353 Encoded_pad = Encoded_s_nop; 354 FillSize = 16 * CacheLineSize; 355 } 356 357 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n'; 358 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n'; 359 return true; 360 } 361 362 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( 363 const MCSubtargetInfo &STI, StringRef KernelName, 364 const MCKernelDescriptor &KD, const MCExpr *NextVGPR, 365 const MCExpr *NextSGPR, const MCExpr *ReserveVCC, 366 const MCExpr *ReserveFlatScr) { 367 IsaVersion IVersion = getIsaVersion(STI.getCPU()); 368 const MCAsmInfo *MAI = getContext().getAsmInfo(); 369 370 OS << "\t.amdhsa_kernel " << KernelName << '\n'; 371 372 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask, 373 StringRef Directive) { 374 OS << "\t\t" << Directive << ' '; 375 const MCExpr *ShiftedAndMaskedExpr = 376 MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext()); 377 const MCExpr *New = foldAMDGPUMCExpr(ShiftedAndMaskedExpr, getContext()); 378 printAMDGPUMCExpr(New, OS, MAI); 379 OS << '\n'; 380 }; 381 382 auto EmitMCExpr = [&](const MCExpr *Value) { 383 const MCExpr *NewExpr = foldAMDGPUMCExpr(Value, getContext()); 384 printAMDGPUMCExpr(NewExpr, OS, MAI); 385 }; 386 387 OS << "\t\t.amdhsa_group_segment_fixed_size "; 388 EmitMCExpr(KD.group_segment_fixed_size); 389 OS << '\n'; 390 391 OS << "\t\t.amdhsa_private_segment_fixed_size "; 392 EmitMCExpr(KD.private_segment_fixed_size); 393 OS << '\n'; 394 395 OS << "\t\t.amdhsa_kernarg_size "; 396 EmitMCExpr(KD.kernarg_size); 397 OS << '\n'; 398 399 PrintField( 400 KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, 401 amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count"); 402 403 if (!hasArchitectedFlatScratch(STI)) 404 PrintField( 405 KD.kernel_code_properties, 406 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT, 407 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 408 ".amdhsa_user_sgpr_private_segment_buffer"); 409 PrintField(KD.kernel_code_properties, 410 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT, 411 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, 412 ".amdhsa_user_sgpr_dispatch_ptr"); 413 PrintField(KD.kernel_code_properties, 414 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT, 415 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, 416 ".amdhsa_user_sgpr_queue_ptr"); 417 PrintField(KD.kernel_code_properties, 418 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT, 419 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 420 ".amdhsa_user_sgpr_kernarg_segment_ptr"); 421 PrintField(KD.kernel_code_properties, 422 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT, 423 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, 424 ".amdhsa_user_sgpr_dispatch_id"); 425 if (!hasArchitectedFlatScratch(STI)) 426 PrintField(KD.kernel_code_properties, 427 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT, 428 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, 429 ".amdhsa_user_sgpr_flat_scratch_init"); 430 if (hasKernargPreload(STI)) { 431 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT, 432 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH, 433 ".amdhsa_user_sgpr_kernarg_preload_length"); 434 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT, 435 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET, 436 ".amdhsa_user_sgpr_kernarg_preload_offset"); 437 } 438 PrintField( 439 KD.kernel_code_properties, 440 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT, 441 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 442 ".amdhsa_user_sgpr_private_segment_size"); 443 if (IVersion.Major >= 10) 444 PrintField(KD.kernel_code_properties, 445 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT, 446 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 447 ".amdhsa_wavefront_size32"); 448 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) 449 PrintField(KD.kernel_code_properties, 450 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, 451 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, 452 ".amdhsa_uses_dynamic_stack"); 453 PrintField(KD.compute_pgm_rsrc2, 454 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT, 455 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, 456 (hasArchitectedFlatScratch(STI) 457 ? ".amdhsa_enable_private_segment" 458 : ".amdhsa_system_sgpr_private_segment_wavefront_offset")); 459 PrintField(KD.compute_pgm_rsrc2, 460 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT, 461 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 462 ".amdhsa_system_sgpr_workgroup_id_x"); 463 PrintField(KD.compute_pgm_rsrc2, 464 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT, 465 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, 466 ".amdhsa_system_sgpr_workgroup_id_y"); 467 PrintField(KD.compute_pgm_rsrc2, 468 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT, 469 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, 470 ".amdhsa_system_sgpr_workgroup_id_z"); 471 PrintField(KD.compute_pgm_rsrc2, 472 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT, 473 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, 474 ".amdhsa_system_sgpr_workgroup_info"); 475 PrintField(KD.compute_pgm_rsrc2, 476 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT, 477 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, 478 ".amdhsa_system_vgpr_workitem_id"); 479 480 // These directives are required. 481 OS << "\t\t.amdhsa_next_free_vgpr "; 482 EmitMCExpr(NextVGPR); 483 OS << '\n'; 484 485 OS << "\t\t.amdhsa_next_free_sgpr "; 486 EmitMCExpr(NextSGPR); 487 OS << '\n'; 488 489 if (AMDGPU::isGFX90A(STI)) { 490 // MCExpr equivalent of taking the (accum_offset + 1) * 4. 491 const MCExpr *accum_bits = MCKernelDescriptor::bits_get( 492 KD.compute_pgm_rsrc3, 493 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, 494 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext()); 495 accum_bits = MCBinaryExpr::createAdd( 496 accum_bits, MCConstantExpr::create(1, getContext()), getContext()); 497 accum_bits = MCBinaryExpr::createMul( 498 accum_bits, MCConstantExpr::create(4, getContext()), getContext()); 499 OS << "\t\t.amdhsa_accum_offset "; 500 const MCExpr *New = foldAMDGPUMCExpr(accum_bits, getContext()); 501 printAMDGPUMCExpr(New, OS, MAI); 502 OS << '\n'; 503 } 504 505 OS << "\t\t.amdhsa_reserve_vcc "; 506 EmitMCExpr(ReserveVCC); 507 OS << '\n'; 508 509 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) { 510 OS << "\t\t.amdhsa_reserve_flat_scratch "; 511 EmitMCExpr(ReserveFlatScr); 512 OS << '\n'; 513 } 514 515 switch (CodeObjectVersion) { 516 default: 517 break; 518 case AMDGPU::AMDHSA_COV4: 519 case AMDGPU::AMDHSA_COV5: 520 if (getTargetID()->isXnackSupported()) 521 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n'; 522 break; 523 } 524 525 PrintField(KD.compute_pgm_rsrc1, 526 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT, 527 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, 528 ".amdhsa_float_round_mode_32"); 529 PrintField(KD.compute_pgm_rsrc1, 530 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT, 531 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, 532 ".amdhsa_float_round_mode_16_64"); 533 PrintField(KD.compute_pgm_rsrc1, 534 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT, 535 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, 536 ".amdhsa_float_denorm_mode_32"); 537 PrintField(KD.compute_pgm_rsrc1, 538 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, 539 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 540 ".amdhsa_float_denorm_mode_16_64"); 541 if (IVersion.Major < 12) { 542 PrintField(KD.compute_pgm_rsrc1, 543 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, 544 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 545 ".amdhsa_dx10_clamp"); 546 PrintField(KD.compute_pgm_rsrc1, 547 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT, 548 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 549 ".amdhsa_ieee_mode"); 550 } 551 if (IVersion.Major >= 9) { 552 PrintField(KD.compute_pgm_rsrc1, 553 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT, 554 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, 555 ".amdhsa_fp16_overflow"); 556 } 557 if (AMDGPU::isGFX90A(STI)) 558 PrintField(KD.compute_pgm_rsrc3, 559 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, 560 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split"); 561 if (IVersion.Major >= 10) { 562 PrintField(KD.compute_pgm_rsrc1, 563 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT, 564 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, 565 ".amdhsa_workgroup_processor_mode"); 566 PrintField(KD.compute_pgm_rsrc1, 567 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT, 568 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 569 ".amdhsa_memory_ordered"); 570 PrintField(KD.compute_pgm_rsrc1, 571 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT, 572 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, 573 ".amdhsa_forward_progress"); 574 } 575 if (IVersion.Major >= 10 && IVersion.Major < 12) { 576 PrintField(KD.compute_pgm_rsrc3, 577 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT, 578 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, 579 ".amdhsa_shared_vgpr_count"); 580 } 581 if (IVersion.Major == 11) { 582 PrintField(KD.compute_pgm_rsrc3, 583 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT, 584 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, 585 ".amdhsa_inst_pref_size"); 586 } 587 if (IVersion.Major >= 12) { 588 PrintField(KD.compute_pgm_rsrc3, 589 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT, 590 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, 591 ".amdhsa_inst_pref_size"); 592 PrintField(KD.compute_pgm_rsrc1, 593 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT, 594 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 595 ".amdhsa_round_robin_scheduling"); 596 } 597 PrintField( 598 KD.compute_pgm_rsrc2, 599 amdhsa:: 600 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT, 601 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 602 ".amdhsa_exception_fp_ieee_invalid_op"); 603 PrintField( 604 KD.compute_pgm_rsrc2, 605 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT, 606 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 607 ".amdhsa_exception_fp_denorm_src"); 608 PrintField( 609 KD.compute_pgm_rsrc2, 610 amdhsa:: 611 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT, 612 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 613 ".amdhsa_exception_fp_ieee_div_zero"); 614 PrintField( 615 KD.compute_pgm_rsrc2, 616 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT, 617 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 618 ".amdhsa_exception_fp_ieee_overflow"); 619 PrintField( 620 KD.compute_pgm_rsrc2, 621 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT, 622 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 623 ".amdhsa_exception_fp_ieee_underflow"); 624 PrintField( 625 KD.compute_pgm_rsrc2, 626 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT, 627 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 628 ".amdhsa_exception_fp_ieee_inexact"); 629 PrintField( 630 KD.compute_pgm_rsrc2, 631 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT, 632 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 633 ".amdhsa_exception_int_div_zero"); 634 635 OS << "\t.end_amdhsa_kernel\n"; 636 } 637 638 //===----------------------------------------------------------------------===// 639 // AMDGPUTargetELFStreamer 640 //===----------------------------------------------------------------------===// 641 642 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S, 643 const MCSubtargetInfo &STI) 644 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {} 645 646 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { 647 return static_cast<MCELFStreamer &>(Streamer); 648 } 649 650 // A hook for emitting stuff at the end. 651 // We use it for emitting the accumulated PAL metadata as a .note record. 652 // The PAL metadata is reset after it is emitted. 653 void AMDGPUTargetELFStreamer::finish() { 654 ELFObjectWriter &W = getStreamer().getWriter(); 655 W.setELFHeaderEFlags(getEFlags()); 656 W.setOverrideABIVersion( 657 getELFABIVersion(STI.getTargetTriple(), CodeObjectVersion)); 658 659 std::string Blob; 660 const char *Vendor = getPALMetadata()->getVendor(); 661 unsigned Type = getPALMetadata()->getType(); 662 getPALMetadata()->toBlob(Type, Blob); 663 if (Blob.empty()) 664 return; 665 EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type, 666 [&](MCELFStreamer &OS) { OS.emitBytes(Blob); }); 667 668 // Reset the pal metadata so its data will not affect a compilation that 669 // reuses this object. 670 getPALMetadata()->reset(); 671 } 672 673 void AMDGPUTargetELFStreamer::EmitNote( 674 StringRef Name, const MCExpr *DescSZ, unsigned NoteType, 675 function_ref<void(MCELFStreamer &)> EmitDesc) { 676 auto &S = getStreamer(); 677 auto &Context = S.getContext(); 678 679 auto NameSZ = Name.size() + 1; 680 681 unsigned NoteFlags = 0; 682 // TODO Apparently, this is currently needed for OpenCL as mentioned in 683 // https://reviews.llvm.org/D74995 684 if (isHsaAbi(STI)) 685 NoteFlags = ELF::SHF_ALLOC; 686 687 S.pushSection(); 688 S.switchSection( 689 Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags)); 690 S.emitInt32(NameSZ); // namesz 691 S.emitValue(DescSZ, 4); // descz 692 S.emitInt32(NoteType); // type 693 S.emitBytes(Name); // name 694 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0 695 EmitDesc(S); // desc 696 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0 697 S.popSection(); 698 } 699 700 unsigned AMDGPUTargetELFStreamer::getEFlags() { 701 switch (STI.getTargetTriple().getArch()) { 702 default: 703 llvm_unreachable("Unsupported Arch"); 704 case Triple::r600: 705 return getEFlagsR600(); 706 case Triple::amdgcn: 707 return getEFlagsAMDGCN(); 708 } 709 } 710 711 unsigned AMDGPUTargetELFStreamer::getEFlagsR600() { 712 assert(STI.getTargetTriple().getArch() == Triple::r600); 713 714 return getElfMach(STI.getCPU()); 715 } 716 717 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() { 718 assert(STI.getTargetTriple().isAMDGCN()); 719 720 switch (STI.getTargetTriple().getOS()) { 721 default: 722 // TODO: Why are some tests have "mingw" listed as OS? 723 // llvm_unreachable("Unsupported OS"); 724 case Triple::UnknownOS: 725 return getEFlagsUnknownOS(); 726 case Triple::AMDHSA: 727 return getEFlagsAMDHSA(); 728 case Triple::AMDPAL: 729 return getEFlagsAMDPAL(); 730 case Triple::Mesa3D: 731 return getEFlagsMesa3D(); 732 } 733 } 734 735 unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() { 736 // TODO: Why are some tests have "mingw" listed as OS? 737 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS); 738 739 return getEFlagsV3(); 740 } 741 742 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() { 743 assert(isHsaAbi(STI)); 744 745 if (CodeObjectVersion >= 6) 746 return getEFlagsV6(); 747 return getEFlagsV4(); 748 } 749 750 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() { 751 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL); 752 753 return getEFlagsV3(); 754 } 755 756 unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() { 757 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D); 758 759 return getEFlagsV3(); 760 } 761 762 unsigned AMDGPUTargetELFStreamer::getEFlagsV3() { 763 unsigned EFlagsV3 = 0; 764 765 // mach. 766 EFlagsV3 |= getElfMach(STI.getCPU()); 767 768 // xnack. 769 if (getTargetID()->isXnackOnOrAny()) 770 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3; 771 // sramecc. 772 if (getTargetID()->isSramEccOnOrAny()) 773 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3; 774 775 return EFlagsV3; 776 } 777 778 unsigned AMDGPUTargetELFStreamer::getEFlagsV4() { 779 unsigned EFlagsV4 = 0; 780 781 // mach. 782 EFlagsV4 |= getElfMach(STI.getCPU()); 783 784 // xnack. 785 switch (getTargetID()->getXnackSetting()) { 786 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported: 787 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4; 788 break; 789 case AMDGPU::IsaInfo::TargetIDSetting::Any: 790 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4; 791 break; 792 case AMDGPU::IsaInfo::TargetIDSetting::Off: 793 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4; 794 break; 795 case AMDGPU::IsaInfo::TargetIDSetting::On: 796 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4; 797 break; 798 } 799 // sramecc. 800 switch (getTargetID()->getSramEccSetting()) { 801 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported: 802 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4; 803 break; 804 case AMDGPU::IsaInfo::TargetIDSetting::Any: 805 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4; 806 break; 807 case AMDGPU::IsaInfo::TargetIDSetting::Off: 808 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4; 809 break; 810 case AMDGPU::IsaInfo::TargetIDSetting::On: 811 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4; 812 break; 813 } 814 815 return EFlagsV4; 816 } 817 818 unsigned AMDGPUTargetELFStreamer::getEFlagsV6() { 819 unsigned Flags = getEFlagsV4(); 820 821 unsigned Version = ForceGenericVersion; 822 if (!Version) { 823 switch (parseArchAMDGCN(STI.getCPU())) { 824 case AMDGPU::GK_GFX9_GENERIC: 825 Version = GenericVersion::GFX9; 826 break; 827 case AMDGPU::GK_GFX9_4_GENERIC: 828 Version = GenericVersion::GFX9_4; 829 break; 830 case AMDGPU::GK_GFX10_1_GENERIC: 831 Version = GenericVersion::GFX10_1; 832 break; 833 case AMDGPU::GK_GFX10_3_GENERIC: 834 Version = GenericVersion::GFX10_3; 835 break; 836 case AMDGPU::GK_GFX11_GENERIC: 837 Version = GenericVersion::GFX11; 838 break; 839 case AMDGPU::GK_GFX12_GENERIC: 840 Version = GenericVersion::GFX12; 841 break; 842 default: 843 break; 844 } 845 } 846 847 // Versions start at 1. 848 if (Version) { 849 if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX) 850 report_fatal_error("Cannot encode generic code object version " + 851 Twine(Version) + 852 " - no ELF flag can represent this version!"); 853 Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET); 854 } 855 856 return Flags; 857 } 858 859 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {} 860 861 void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) { 862 MCStreamer &OS = getStreamer(); 863 OS.pushSection(); 864 Header.EmitKernelCodeT(OS, getContext()); 865 OS.popSection(); 866 } 867 868 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, 869 unsigned Type) { 870 MCSymbolELF *Symbol = cast<MCSymbolELF>( 871 getStreamer().getContext().getOrCreateSymbol(SymbolName)); 872 Symbol->setType(Type); 873 } 874 875 void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, 876 Align Alignment) { 877 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol); 878 SymbolELF->setType(ELF::STT_OBJECT); 879 880 if (!SymbolELF->isBindingSet()) 881 SymbolELF->setBinding(ELF::STB_GLOBAL); 882 883 if (SymbolELF->declareCommon(Size, Alignment, true)) { 884 report_fatal_error("Symbol: " + Symbol->getName() + 885 " redeclared as different type"); 886 } 887 888 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS); 889 SymbolELF->setSize(MCConstantExpr::create(Size, getContext())); 890 } 891 892 bool AMDGPUTargetELFStreamer::EmitISAVersion() { 893 // Create two labels to mark the beginning and end of the desc field 894 // and a MCExpr to calculate the size of the desc field. 895 auto &Context = getContext(); 896 auto *DescBegin = Context.createTempSymbol(); 897 auto *DescEnd = Context.createTempSymbol(); 898 auto *DescSZ = MCBinaryExpr::createSub( 899 MCSymbolRefExpr::create(DescEnd, Context), 900 MCSymbolRefExpr::create(DescBegin, Context), Context); 901 902 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME, 903 [&](MCELFStreamer &OS) { 904 OS.emitLabel(DescBegin); 905 OS.emitBytes(getTargetID()->toString()); 906 OS.emitLabel(DescEnd); 907 }); 908 return true; 909 } 910 911 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc, 912 bool Strict) { 913 HSAMD::V3::MetadataVerifier Verifier(Strict); 914 if (!Verifier.verify(HSAMetadataDoc.getRoot())) 915 return false; 916 917 std::string HSAMetadataString; 918 HSAMetadataDoc.writeToBlob(HSAMetadataString); 919 920 // Create two labels to mark the beginning and end of the desc field 921 // and a MCExpr to calculate the size of the desc field. 922 auto &Context = getContext(); 923 auto *DescBegin = Context.createTempSymbol(); 924 auto *DescEnd = Context.createTempSymbol(); 925 auto *DescSZ = MCBinaryExpr::createSub( 926 MCSymbolRefExpr::create(DescEnd, Context), 927 MCSymbolRefExpr::create(DescBegin, Context), Context); 928 929 EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA, 930 [&](MCELFStreamer &OS) { 931 OS.emitLabel(DescBegin); 932 OS.emitBytes(HSAMetadataString); 933 OS.emitLabel(DescEnd); 934 }); 935 return true; 936 } 937 938 bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { 939 const uint32_t Encoded_s_code_end = 0xbf9f0000; 940 const uint32_t Encoded_s_nop = 0xbf800000; 941 uint32_t Encoded_pad = Encoded_s_code_end; 942 943 // Instruction cache line size in bytes. 944 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6; 945 const unsigned CacheLineSize = 1u << Log2CacheLineSize; 946 947 // Extra padding amount in bytes to support prefetch mode 3. 948 unsigned FillSize = 3 * CacheLineSize; 949 950 if (AMDGPU::isGFX90A(STI)) { 951 Encoded_pad = Encoded_s_nop; 952 FillSize = 16 * CacheLineSize; 953 } 954 955 MCStreamer &OS = getStreamer(); 956 OS.pushSection(); 957 OS.emitValueToAlignment(Align(CacheLineSize), Encoded_pad, 4); 958 for (unsigned I = 0; I < FillSize; I += 4) 959 OS.emitInt32(Encoded_pad); 960 OS.popSection(); 961 return true; 962 } 963 964 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( 965 const MCSubtargetInfo &STI, StringRef KernelName, 966 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, 967 const MCExpr *NextSGPR, const MCExpr *ReserveVCC, 968 const MCExpr *ReserveFlatScr) { 969 auto &Streamer = getStreamer(); 970 auto &Context = Streamer.getContext(); 971 972 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>( 973 Context.getOrCreateSymbol(Twine(KernelName))); 974 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>( 975 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd"))); 976 977 // Copy kernel descriptor symbol's binding, other and visibility from the 978 // kernel code symbol. 979 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding()); 980 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther()); 981 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility()); 982 // Kernel descriptor symbol's type and size are fixed. 983 KernelDescriptorSymbol->setType(ELF::STT_OBJECT); 984 KernelDescriptorSymbol->setSize( 985 MCConstantExpr::create(sizeof(amdhsa::kernel_descriptor_t), Context)); 986 987 // The visibility of the kernel code symbol must be protected or less to allow 988 // static relocations from the kernel descriptor to be used. 989 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT) 990 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED); 991 992 Streamer.emitLabel(KernelDescriptorSymbol); 993 Streamer.emitValue( 994 KernelDescriptor.group_segment_fixed_size, 995 sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size)); 996 Streamer.emitValue( 997 KernelDescriptor.private_segment_fixed_size, 998 sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size)); 999 Streamer.emitValue(KernelDescriptor.kernarg_size, 1000 sizeof(amdhsa::kernel_descriptor_t::kernarg_size)); 1001 1002 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i) 1003 Streamer.emitInt8(0u); 1004 1005 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The 1006 // expression being created is: 1007 // (start of kernel code) - (start of kernel descriptor) 1008 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. 1009 Streamer.emitValue( 1010 MCBinaryExpr::createSub( 1011 MCSymbolRefExpr::create(KernelCodeSymbol, AMDGPUMCExpr::S_REL64, 1012 Context), 1013 MCSymbolRefExpr::create(KernelDescriptorSymbol, Context), Context), 1014 sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset)); 1015 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i) 1016 Streamer.emitInt8(0u); 1017 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3, 1018 sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3)); 1019 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1, 1020 sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1)); 1021 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2, 1022 sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2)); 1023 Streamer.emitValue( 1024 KernelDescriptor.kernel_code_properties, 1025 sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties)); 1026 Streamer.emitValue(KernelDescriptor.kernarg_preload, 1027 sizeof(amdhsa::kernel_descriptor_t::kernarg_preload)); 1028 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i) 1029 Streamer.emitInt8(0u); 1030 } 1031