1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides AMDGPU specific target streamer methods. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPUTargetStreamer.h" 14 #include "AMDGPUMCKernelDescriptor.h" 15 #include "AMDGPUPTNote.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" 19 #include "llvm/BinaryFormat/ELF.h" 20 #include "llvm/MC/MCAssembler.h" 21 #include "llvm/MC/MCContext.h" 22 #include "llvm/MC/MCELFStreamer.h" 23 #include "llvm/MC/MCObjectWriter.h" 24 #include "llvm/MC/MCSectionELF.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/Support/AMDGPUMetadata.h" 27 #include "llvm/Support/AMDHSAKernelDescriptor.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/FormattedStream.h" 31 #include "llvm/TargetParser/TargetParser.h" 32 33 using namespace llvm; 34 using namespace llvm::AMDGPU; 35 36 //===----------------------------------------------------------------------===// 37 // AMDGPUTargetStreamer 38 //===----------------------------------------------------------------------===// 39 40 static cl::opt<unsigned> 41 ForceGenericVersion("amdgpu-force-generic-version", 42 cl::desc("Force a specific generic_v<N> flag to be " 43 "added. For testing purposes only."), 44 cl::ReallyHidden, cl::init(0)); 45 46 bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { 47 msgpack::Document HSAMetadataDoc; 48 if (!HSAMetadataDoc.fromYAML(HSAMetadataString)) 49 return false; 50 return EmitHSAMetadata(HSAMetadataDoc, false); 51 } 52 53 StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { 54 AMDGPU::GPUKind AK; 55 56 // clang-format off 57 switch (ElfMach) { 58 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; 59 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; 60 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; 61 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break; 62 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break; 63 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break; 64 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break; 65 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break; 66 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break; 67 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break; 68 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break; 69 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break; 70 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break; 71 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break; 72 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break; 73 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break; 74 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break; 75 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break; 76 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break; 77 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break; 78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break; 79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break; 80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break; 81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break; 82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break; 83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break; 84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break; 85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break; 86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break; 87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break; 88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break; 89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break; 90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break; 91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; 92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break; 93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; 94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break; 95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break; 96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break; 97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break; 98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break; 99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; 100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; 101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; 102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break; 103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break; 104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break; 105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break; 106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break; 107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break; 108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break; 109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break; 110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break; 111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break; 112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break; 113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break; 114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break; 115 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break; 116 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break; 117 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break; 118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break; 119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break; 120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break; 121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break; 122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break; 123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: AK = GK_GFX12_GENERIC; break; 124 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; 125 default: AK = GK_NONE; break; 126 } 127 // clang-format on 128 129 StringRef GPUName = getArchNameAMDGCN(AK); 130 if (GPUName != "") 131 return GPUName; 132 return getArchNameR600(AK); 133 } 134 135 unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { 136 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); 137 if (AK == AMDGPU::GPUKind::GK_NONE) 138 AK = parseArchR600(GPU); 139 140 // clang-format off 141 switch (AK) { 142 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; 143 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; 144 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880; 145 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670; 146 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710; 147 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730; 148 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770; 149 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR; 150 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS; 151 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER; 152 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD; 153 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO; 154 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS; 155 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS; 156 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN; 157 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS; 158 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600; 159 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601; 160 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602; 161 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700; 162 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701; 163 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702; 164 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703; 165 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704; 166 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705; 167 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801; 168 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802; 169 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803; 170 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805; 171 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810; 172 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900; 173 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902; 174 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904; 175 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; 176 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908; 177 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; 178 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A; 179 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C; 180 case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940; 181 case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941; 182 case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942; 183 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; 184 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; 185 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; 186 case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013; 187 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030; 188 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031; 189 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032; 190 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033; 191 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034; 192 case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035; 193 case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036; 194 case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100; 195 case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101; 196 case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102; 197 case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103; 198 case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150; 199 case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151; 200 case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152; 201 case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200; 202 case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201; 203 case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC; 204 case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC; 205 case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC; 206 case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC; 207 case GK_GFX12_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC; 208 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; 209 } 210 // clang-format on 211 212 llvm_unreachable("unknown GPU"); 213 } 214 215 //===----------------------------------------------------------------------===// 216 // AMDGPUTargetAsmStreamer 217 //===----------------------------------------------------------------------===// 218 219 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, 220 formatted_raw_ostream &OS) 221 : AMDGPUTargetStreamer(S), OS(OS) { } 222 223 // A hook for emitting stuff at the end. 224 // We use it for emitting the accumulated PAL metadata as directives. 225 // The PAL metadata is reset after it is emitted. 226 void AMDGPUTargetAsmStreamer::finish() { 227 std::string S; 228 getPALMetadata()->toString(S); 229 OS << S; 230 231 // Reset the pal metadata so its data will not affect a compilation that 232 // reuses this object. 233 getPALMetadata()->reset(); 234 } 235 236 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() { 237 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n"; 238 } 239 240 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion( 241 unsigned COV) { 242 AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV); 243 OS << "\t.amdhsa_code_object_version " << COV << '\n'; 244 } 245 246 void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) { 247 OS << "\t.amd_kernel_code_t\n"; 248 Header.EmitKernelCodeT(OS, getContext()); 249 OS << "\t.end_amd_kernel_code_t\n"; 250 } 251 252 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, 253 unsigned Type) { 254 switch (Type) { 255 default: llvm_unreachable("Invalid AMDGPU symbol type"); 256 case ELF::STT_AMDGPU_HSA_KERNEL: 257 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ; 258 break; 259 } 260 } 261 262 void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, 263 Align Alignment) { 264 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " 265 << Alignment.value() << '\n'; 266 } 267 268 bool AMDGPUTargetAsmStreamer::EmitISAVersion() { 269 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n"; 270 return true; 271 } 272 273 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( 274 msgpack::Document &HSAMetadataDoc, bool Strict) { 275 HSAMD::V3::MetadataVerifier Verifier(Strict); 276 if (!Verifier.verify(HSAMetadataDoc.getRoot())) 277 return false; 278 279 std::string HSAMetadataString; 280 raw_string_ostream StrOS(HSAMetadataString); 281 HSAMetadataDoc.toYAML(StrOS); 282 283 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n'; 284 OS << StrOS.str() << '\n'; 285 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n'; 286 return true; 287 } 288 289 bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader( 290 const MCSubtargetInfo &STI, bool TrapEnabled) { 291 OS << (TrapEnabled ? "\ts_trap 2" : "\ts_endpgm") 292 << " ; Kernarg preload header. Trap with incompatible firmware that " 293 "doesn't support preloading kernel arguments.\n"; 294 OS << "\t.fill 63, 4, 0xbf800000 ; s_nop 0\n"; 295 return true; 296 } 297 298 bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { 299 const uint32_t Encoded_s_code_end = 0xbf9f0000; 300 const uint32_t Encoded_s_nop = 0xbf800000; 301 uint32_t Encoded_pad = Encoded_s_code_end; 302 303 // Instruction cache line size in bytes. 304 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6; 305 const unsigned CacheLineSize = 1u << Log2CacheLineSize; 306 307 // Extra padding amount in bytes to support prefetch mode 3. 308 unsigned FillSize = 3 * CacheLineSize; 309 310 if (AMDGPU::isGFX90A(STI)) { 311 Encoded_pad = Encoded_s_nop; 312 FillSize = 16 * CacheLineSize; 313 } 314 315 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n'; 316 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n'; 317 return true; 318 } 319 320 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( 321 const MCSubtargetInfo &STI, StringRef KernelName, 322 const MCKernelDescriptor &KD, const MCExpr *NextVGPR, 323 const MCExpr *NextSGPR, const MCExpr *ReserveVCC, 324 const MCExpr *ReserveFlatScr) { 325 IsaVersion IVersion = getIsaVersion(STI.getCPU()); 326 const MCAsmInfo *MAI = getContext().getAsmInfo(); 327 328 OS << "\t.amdhsa_kernel " << KernelName << '\n'; 329 330 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask, 331 StringRef Directive) { 332 int64_t IVal; 333 OS << "\t\t" << Directive << ' '; 334 const MCExpr *pgm_rsrc1_bits = 335 MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext()); 336 if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal)) 337 OS << static_cast<uint64_t>(IVal); 338 else 339 pgm_rsrc1_bits->print(OS, MAI); 340 OS << '\n'; 341 }; 342 343 auto EmitMCExpr = [&](const MCExpr *Value) { 344 int64_t evaluatableValue; 345 if (Value->evaluateAsAbsolute(evaluatableValue)) { 346 OS << static_cast<uint64_t>(evaluatableValue); 347 } else { 348 Value->print(OS, MAI); 349 } 350 }; 351 352 OS << "\t\t.amdhsa_group_segment_fixed_size "; 353 EmitMCExpr(KD.group_segment_fixed_size); 354 OS << '\n'; 355 356 OS << "\t\t.amdhsa_private_segment_fixed_size "; 357 EmitMCExpr(KD.private_segment_fixed_size); 358 OS << '\n'; 359 360 OS << "\t\t.amdhsa_kernarg_size "; 361 EmitMCExpr(KD.kernarg_size); 362 OS << '\n'; 363 364 PrintField( 365 KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, 366 amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count"); 367 368 if (!hasArchitectedFlatScratch(STI)) 369 PrintField( 370 KD.kernel_code_properties, 371 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT, 372 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 373 ".amdhsa_user_sgpr_private_segment_buffer"); 374 PrintField(KD.kernel_code_properties, 375 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT, 376 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, 377 ".amdhsa_user_sgpr_dispatch_ptr"); 378 PrintField(KD.kernel_code_properties, 379 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT, 380 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, 381 ".amdhsa_user_sgpr_queue_ptr"); 382 PrintField(KD.kernel_code_properties, 383 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT, 384 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 385 ".amdhsa_user_sgpr_kernarg_segment_ptr"); 386 PrintField(KD.kernel_code_properties, 387 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT, 388 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, 389 ".amdhsa_user_sgpr_dispatch_id"); 390 if (!hasArchitectedFlatScratch(STI)) 391 PrintField(KD.kernel_code_properties, 392 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT, 393 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, 394 ".amdhsa_user_sgpr_flat_scratch_init"); 395 if (hasKernargPreload(STI)) { 396 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT, 397 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH, 398 ".amdhsa_user_sgpr_kernarg_preload_length"); 399 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT, 400 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET, 401 ".amdhsa_user_sgpr_kernarg_preload_offset"); 402 } 403 PrintField( 404 KD.kernel_code_properties, 405 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT, 406 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 407 ".amdhsa_user_sgpr_private_segment_size"); 408 if (IVersion.Major >= 10) 409 PrintField(KD.kernel_code_properties, 410 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT, 411 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 412 ".amdhsa_wavefront_size32"); 413 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) 414 PrintField(KD.kernel_code_properties, 415 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, 416 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, 417 ".amdhsa_uses_dynamic_stack"); 418 PrintField(KD.compute_pgm_rsrc2, 419 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT, 420 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, 421 (hasArchitectedFlatScratch(STI) 422 ? ".amdhsa_enable_private_segment" 423 : ".amdhsa_system_sgpr_private_segment_wavefront_offset")); 424 PrintField(KD.compute_pgm_rsrc2, 425 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT, 426 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 427 ".amdhsa_system_sgpr_workgroup_id_x"); 428 PrintField(KD.compute_pgm_rsrc2, 429 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT, 430 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, 431 ".amdhsa_system_sgpr_workgroup_id_y"); 432 PrintField(KD.compute_pgm_rsrc2, 433 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT, 434 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, 435 ".amdhsa_system_sgpr_workgroup_id_z"); 436 PrintField(KD.compute_pgm_rsrc2, 437 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT, 438 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, 439 ".amdhsa_system_sgpr_workgroup_info"); 440 PrintField(KD.compute_pgm_rsrc2, 441 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT, 442 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, 443 ".amdhsa_system_vgpr_workitem_id"); 444 445 // These directives are required. 446 OS << "\t\t.amdhsa_next_free_vgpr "; 447 EmitMCExpr(NextVGPR); 448 OS << '\n'; 449 450 OS << "\t\t.amdhsa_next_free_sgpr "; 451 EmitMCExpr(NextSGPR); 452 OS << '\n'; 453 454 if (AMDGPU::isGFX90A(STI)) { 455 // MCExpr equivalent of taking the (accum_offset + 1) * 4. 456 const MCExpr *accum_bits = MCKernelDescriptor::bits_get( 457 KD.compute_pgm_rsrc3, 458 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, 459 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext()); 460 accum_bits = MCBinaryExpr::createAdd( 461 accum_bits, MCConstantExpr::create(1, getContext()), getContext()); 462 accum_bits = MCBinaryExpr::createMul( 463 accum_bits, MCConstantExpr::create(4, getContext()), getContext()); 464 OS << "\t\t.amdhsa_accum_offset "; 465 EmitMCExpr(accum_bits); 466 OS << '\n'; 467 } 468 469 OS << "\t\t.amdhsa_reserve_vcc "; 470 EmitMCExpr(ReserveVCC); 471 OS << '\n'; 472 473 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) { 474 OS << "\t\t.amdhsa_reserve_flat_scratch "; 475 EmitMCExpr(ReserveFlatScr); 476 OS << '\n'; 477 } 478 479 switch (CodeObjectVersion) { 480 default: 481 break; 482 case AMDGPU::AMDHSA_COV4: 483 case AMDGPU::AMDHSA_COV5: 484 if (getTargetID()->isXnackSupported()) 485 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n'; 486 break; 487 } 488 489 PrintField(KD.compute_pgm_rsrc1, 490 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT, 491 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, 492 ".amdhsa_float_round_mode_32"); 493 PrintField(KD.compute_pgm_rsrc1, 494 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT, 495 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, 496 ".amdhsa_float_round_mode_16_64"); 497 PrintField(KD.compute_pgm_rsrc1, 498 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT, 499 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, 500 ".amdhsa_float_denorm_mode_32"); 501 PrintField(KD.compute_pgm_rsrc1, 502 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, 503 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 504 ".amdhsa_float_denorm_mode_16_64"); 505 if (IVersion.Major < 12) { 506 PrintField(KD.compute_pgm_rsrc1, 507 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, 508 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 509 ".amdhsa_dx10_clamp"); 510 PrintField(KD.compute_pgm_rsrc1, 511 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT, 512 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 513 ".amdhsa_ieee_mode"); 514 } 515 if (IVersion.Major >= 9) { 516 PrintField(KD.compute_pgm_rsrc1, 517 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT, 518 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, 519 ".amdhsa_fp16_overflow"); 520 } 521 if (AMDGPU::isGFX90A(STI)) 522 PrintField(KD.compute_pgm_rsrc3, 523 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, 524 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split"); 525 if (IVersion.Major >= 10) { 526 PrintField(KD.compute_pgm_rsrc1, 527 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT, 528 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, 529 ".amdhsa_workgroup_processor_mode"); 530 PrintField(KD.compute_pgm_rsrc1, 531 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT, 532 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 533 ".amdhsa_memory_ordered"); 534 PrintField(KD.compute_pgm_rsrc1, 535 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT, 536 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, 537 ".amdhsa_forward_progress"); 538 } 539 if (IVersion.Major >= 10 && IVersion.Major < 12) { 540 PrintField(KD.compute_pgm_rsrc3, 541 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT, 542 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, 543 ".amdhsa_shared_vgpr_count"); 544 } 545 if (IVersion.Major >= 12) { 546 PrintField(KD.compute_pgm_rsrc1, 547 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT, 548 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 549 ".amdhsa_round_robin_scheduling"); 550 } 551 PrintField( 552 KD.compute_pgm_rsrc2, 553 amdhsa:: 554 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT, 555 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 556 ".amdhsa_exception_fp_ieee_invalid_op"); 557 PrintField( 558 KD.compute_pgm_rsrc2, 559 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT, 560 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 561 ".amdhsa_exception_fp_denorm_src"); 562 PrintField( 563 KD.compute_pgm_rsrc2, 564 amdhsa:: 565 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT, 566 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 567 ".amdhsa_exception_fp_ieee_div_zero"); 568 PrintField( 569 KD.compute_pgm_rsrc2, 570 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT, 571 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 572 ".amdhsa_exception_fp_ieee_overflow"); 573 PrintField( 574 KD.compute_pgm_rsrc2, 575 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT, 576 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 577 ".amdhsa_exception_fp_ieee_underflow"); 578 PrintField( 579 KD.compute_pgm_rsrc2, 580 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT, 581 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 582 ".amdhsa_exception_fp_ieee_inexact"); 583 PrintField( 584 KD.compute_pgm_rsrc2, 585 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT, 586 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 587 ".amdhsa_exception_int_div_zero"); 588 589 OS << "\t.end_amdhsa_kernel\n"; 590 } 591 592 //===----------------------------------------------------------------------===// 593 // AMDGPUTargetELFStreamer 594 //===----------------------------------------------------------------------===// 595 596 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S, 597 const MCSubtargetInfo &STI) 598 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {} 599 600 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { 601 return static_cast<MCELFStreamer &>(Streamer); 602 } 603 604 // A hook for emitting stuff at the end. 605 // We use it for emitting the accumulated PAL metadata as a .note record. 606 // The PAL metadata is reset after it is emitted. 607 void AMDGPUTargetELFStreamer::finish() { 608 ELFObjectWriter &W = getStreamer().getWriter(); 609 W.setELFHeaderEFlags(getEFlags()); 610 W.setOverrideABIVersion( 611 getELFABIVersion(STI.getTargetTriple(), CodeObjectVersion)); 612 613 std::string Blob; 614 const char *Vendor = getPALMetadata()->getVendor(); 615 unsigned Type = getPALMetadata()->getType(); 616 getPALMetadata()->toBlob(Type, Blob); 617 if (Blob.empty()) 618 return; 619 EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type, 620 [&](MCELFStreamer &OS) { OS.emitBytes(Blob); }); 621 622 // Reset the pal metadata so its data will not affect a compilation that 623 // reuses this object. 624 getPALMetadata()->reset(); 625 } 626 627 void AMDGPUTargetELFStreamer::EmitNote( 628 StringRef Name, const MCExpr *DescSZ, unsigned NoteType, 629 function_ref<void(MCELFStreamer &)> EmitDesc) { 630 auto &S = getStreamer(); 631 auto &Context = S.getContext(); 632 633 auto NameSZ = Name.size() + 1; 634 635 unsigned NoteFlags = 0; 636 // TODO Apparently, this is currently needed for OpenCL as mentioned in 637 // https://reviews.llvm.org/D74995 638 if (isHsaAbi(STI)) 639 NoteFlags = ELF::SHF_ALLOC; 640 641 S.pushSection(); 642 S.switchSection( 643 Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags)); 644 S.emitInt32(NameSZ); // namesz 645 S.emitValue(DescSZ, 4); // descz 646 S.emitInt32(NoteType); // type 647 S.emitBytes(Name); // name 648 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0 649 EmitDesc(S); // desc 650 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0 651 S.popSection(); 652 } 653 654 unsigned AMDGPUTargetELFStreamer::getEFlags() { 655 switch (STI.getTargetTriple().getArch()) { 656 default: 657 llvm_unreachable("Unsupported Arch"); 658 case Triple::r600: 659 return getEFlagsR600(); 660 case Triple::amdgcn: 661 return getEFlagsAMDGCN(); 662 } 663 } 664 665 unsigned AMDGPUTargetELFStreamer::getEFlagsR600() { 666 assert(STI.getTargetTriple().getArch() == Triple::r600); 667 668 return getElfMach(STI.getCPU()); 669 } 670 671 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() { 672 assert(STI.getTargetTriple().getArch() == Triple::amdgcn); 673 674 switch (STI.getTargetTriple().getOS()) { 675 default: 676 // TODO: Why are some tests have "mingw" listed as OS? 677 // llvm_unreachable("Unsupported OS"); 678 case Triple::UnknownOS: 679 return getEFlagsUnknownOS(); 680 case Triple::AMDHSA: 681 return getEFlagsAMDHSA(); 682 case Triple::AMDPAL: 683 return getEFlagsAMDPAL(); 684 case Triple::Mesa3D: 685 return getEFlagsMesa3D(); 686 } 687 } 688 689 unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() { 690 // TODO: Why are some tests have "mingw" listed as OS? 691 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS); 692 693 return getEFlagsV3(); 694 } 695 696 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() { 697 assert(isHsaAbi(STI)); 698 699 if (CodeObjectVersion >= 6) 700 return getEFlagsV6(); 701 return getEFlagsV4(); 702 } 703 704 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() { 705 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL); 706 707 return getEFlagsV3(); 708 } 709 710 unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() { 711 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D); 712 713 return getEFlagsV3(); 714 } 715 716 unsigned AMDGPUTargetELFStreamer::getEFlagsV3() { 717 unsigned EFlagsV3 = 0; 718 719 // mach. 720 EFlagsV3 |= getElfMach(STI.getCPU()); 721 722 // xnack. 723 if (getTargetID()->isXnackOnOrAny()) 724 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3; 725 // sramecc. 726 if (getTargetID()->isSramEccOnOrAny()) 727 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3; 728 729 return EFlagsV3; 730 } 731 732 unsigned AMDGPUTargetELFStreamer::getEFlagsV4() { 733 unsigned EFlagsV4 = 0; 734 735 // mach. 736 EFlagsV4 |= getElfMach(STI.getCPU()); 737 738 // xnack. 739 switch (getTargetID()->getXnackSetting()) { 740 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported: 741 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4; 742 break; 743 case AMDGPU::IsaInfo::TargetIDSetting::Any: 744 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4; 745 break; 746 case AMDGPU::IsaInfo::TargetIDSetting::Off: 747 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4; 748 break; 749 case AMDGPU::IsaInfo::TargetIDSetting::On: 750 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4; 751 break; 752 } 753 // sramecc. 754 switch (getTargetID()->getSramEccSetting()) { 755 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported: 756 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4; 757 break; 758 case AMDGPU::IsaInfo::TargetIDSetting::Any: 759 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4; 760 break; 761 case AMDGPU::IsaInfo::TargetIDSetting::Off: 762 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4; 763 break; 764 case AMDGPU::IsaInfo::TargetIDSetting::On: 765 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4; 766 break; 767 } 768 769 return EFlagsV4; 770 } 771 772 unsigned AMDGPUTargetELFStreamer::getEFlagsV6() { 773 unsigned Flags = getEFlagsV4(); 774 775 unsigned Version = ForceGenericVersion; 776 if (!Version) { 777 switch (parseArchAMDGCN(STI.getCPU())) { 778 case AMDGPU::GK_GFX9_GENERIC: 779 Version = GenericVersion::GFX9; 780 break; 781 case AMDGPU::GK_GFX10_1_GENERIC: 782 Version = GenericVersion::GFX10_1; 783 break; 784 case AMDGPU::GK_GFX10_3_GENERIC: 785 Version = GenericVersion::GFX10_3; 786 break; 787 case AMDGPU::GK_GFX11_GENERIC: 788 Version = GenericVersion::GFX11; 789 break; 790 case AMDGPU::GK_GFX12_GENERIC: 791 Version = GenericVersion::GFX12; 792 break; 793 default: 794 break; 795 } 796 } 797 798 // Versions start at 1. 799 if (Version) { 800 if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX) 801 report_fatal_error("Cannot encode generic code object version " + 802 Twine(Version) + 803 " - no ELF flag can represent this version!"); 804 Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET); 805 } 806 807 return Flags; 808 } 809 810 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {} 811 812 void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) { 813 MCStreamer &OS = getStreamer(); 814 OS.pushSection(); 815 Header.EmitKernelCodeT(OS, getContext()); 816 OS.popSection(); 817 } 818 819 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, 820 unsigned Type) { 821 MCSymbolELF *Symbol = cast<MCSymbolELF>( 822 getStreamer().getContext().getOrCreateSymbol(SymbolName)); 823 Symbol->setType(Type); 824 } 825 826 void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, 827 Align Alignment) { 828 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol); 829 SymbolELF->setType(ELF::STT_OBJECT); 830 831 if (!SymbolELF->isBindingSet()) 832 SymbolELF->setBinding(ELF::STB_GLOBAL); 833 834 if (SymbolELF->declareCommon(Size, Alignment, true)) { 835 report_fatal_error("Symbol: " + Symbol->getName() + 836 " redeclared as different type"); 837 } 838 839 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS); 840 SymbolELF->setSize(MCConstantExpr::create(Size, getContext())); 841 } 842 843 bool AMDGPUTargetELFStreamer::EmitISAVersion() { 844 // Create two labels to mark the beginning and end of the desc field 845 // and a MCExpr to calculate the size of the desc field. 846 auto &Context = getContext(); 847 auto *DescBegin = Context.createTempSymbol(); 848 auto *DescEnd = Context.createTempSymbol(); 849 auto *DescSZ = MCBinaryExpr::createSub( 850 MCSymbolRefExpr::create(DescEnd, Context), 851 MCSymbolRefExpr::create(DescBegin, Context), Context); 852 853 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME, 854 [&](MCELFStreamer &OS) { 855 OS.emitLabel(DescBegin); 856 OS.emitBytes(getTargetID()->toString()); 857 OS.emitLabel(DescEnd); 858 }); 859 return true; 860 } 861 862 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc, 863 bool Strict) { 864 HSAMD::V3::MetadataVerifier Verifier(Strict); 865 if (!Verifier.verify(HSAMetadataDoc.getRoot())) 866 return false; 867 868 std::string HSAMetadataString; 869 HSAMetadataDoc.writeToBlob(HSAMetadataString); 870 871 // Create two labels to mark the beginning and end of the desc field 872 // and a MCExpr to calculate the size of the desc field. 873 auto &Context = getContext(); 874 auto *DescBegin = Context.createTempSymbol(); 875 auto *DescEnd = Context.createTempSymbol(); 876 auto *DescSZ = MCBinaryExpr::createSub( 877 MCSymbolRefExpr::create(DescEnd, Context), 878 MCSymbolRefExpr::create(DescBegin, Context), Context); 879 880 EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA, 881 [&](MCELFStreamer &OS) { 882 OS.emitLabel(DescBegin); 883 OS.emitBytes(HSAMetadataString); 884 OS.emitLabel(DescEnd); 885 }); 886 return true; 887 } 888 889 bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader( 890 const MCSubtargetInfo &STI, bool TrapEnabled) { 891 const uint32_t Encoded_s_nop = 0xbf800000; 892 const uint32_t Encoded_s_trap = 0xbf920002; 893 const uint32_t Encoded_s_endpgm = 0xbf810000; 894 const uint32_t TrapInstr = TrapEnabled ? Encoded_s_trap : Encoded_s_endpgm; 895 MCStreamer &OS = getStreamer(); 896 OS.emitInt32(TrapInstr); 897 for (int i = 0; i < 63; ++i) { 898 OS.emitInt32(Encoded_s_nop); 899 } 900 return true; 901 } 902 903 bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { 904 const uint32_t Encoded_s_code_end = 0xbf9f0000; 905 const uint32_t Encoded_s_nop = 0xbf800000; 906 uint32_t Encoded_pad = Encoded_s_code_end; 907 908 // Instruction cache line size in bytes. 909 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6; 910 const unsigned CacheLineSize = 1u << Log2CacheLineSize; 911 912 // Extra padding amount in bytes to support prefetch mode 3. 913 unsigned FillSize = 3 * CacheLineSize; 914 915 if (AMDGPU::isGFX90A(STI)) { 916 Encoded_pad = Encoded_s_nop; 917 FillSize = 16 * CacheLineSize; 918 } 919 920 MCStreamer &OS = getStreamer(); 921 OS.pushSection(); 922 OS.emitValueToAlignment(Align(CacheLineSize), Encoded_pad, 4); 923 for (unsigned I = 0; I < FillSize; I += 4) 924 OS.emitInt32(Encoded_pad); 925 OS.popSection(); 926 return true; 927 } 928 929 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( 930 const MCSubtargetInfo &STI, StringRef KernelName, 931 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, 932 const MCExpr *NextSGPR, const MCExpr *ReserveVCC, 933 const MCExpr *ReserveFlatScr) { 934 auto &Streamer = getStreamer(); 935 auto &Context = Streamer.getContext(); 936 937 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>( 938 Context.getOrCreateSymbol(Twine(KernelName))); 939 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>( 940 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd"))); 941 942 // Copy kernel descriptor symbol's binding, other and visibility from the 943 // kernel code symbol. 944 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding()); 945 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther()); 946 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility()); 947 // Kernel descriptor symbol's type and size are fixed. 948 KernelDescriptorSymbol->setType(ELF::STT_OBJECT); 949 KernelDescriptorSymbol->setSize( 950 MCConstantExpr::create(sizeof(amdhsa::kernel_descriptor_t), Context)); 951 952 // The visibility of the kernel code symbol must be protected or less to allow 953 // static relocations from the kernel descriptor to be used. 954 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT) 955 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED); 956 957 Streamer.emitLabel(KernelDescriptorSymbol); 958 Streamer.emitValue( 959 KernelDescriptor.group_segment_fixed_size, 960 sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size)); 961 Streamer.emitValue( 962 KernelDescriptor.private_segment_fixed_size, 963 sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size)); 964 Streamer.emitValue(KernelDescriptor.kernarg_size, 965 sizeof(amdhsa::kernel_descriptor_t::kernarg_size)); 966 967 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i) 968 Streamer.emitInt8(0u); 969 970 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The 971 // expression being created is: 972 // (start of kernel code) - (start of kernel descriptor) 973 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. 974 Streamer.emitValue( 975 MCBinaryExpr::createSub( 976 MCSymbolRefExpr::create(KernelCodeSymbol, 977 MCSymbolRefExpr::VK_AMDGPU_REL64, Context), 978 MCSymbolRefExpr::create(KernelDescriptorSymbol, 979 MCSymbolRefExpr::VK_None, Context), 980 Context), 981 sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset)); 982 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i) 983 Streamer.emitInt8(0u); 984 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3, 985 sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3)); 986 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1, 987 sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1)); 988 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2, 989 sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2)); 990 Streamer.emitValue( 991 KernelDescriptor.kernel_code_properties, 992 sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties)); 993 Streamer.emitValue(KernelDescriptor.kernarg_preload, 994 sizeof(amdhsa::kernel_descriptor_t::kernarg_preload)); 995 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i) 996 Streamer.emitInt8(0u); 997 } 998