1 //===- AMDKernelCodeTUtils.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file - utility functions to parse/print AMDGPUMCKernelCodeT structure
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDKernelCodeTUtils.h"
14 #include "AMDKernelCodeT.h"
15 #include "SIDefines.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/SIDefinesUtils.h"
18 #include "llvm/ADT/IndexedMap.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCParser/MCAsmLexer.h"
23 #include "llvm/MC/MCParser/MCAsmParser.h"
24 #include "llvm/MC/MCStreamer.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 using namespace llvm;
29 using namespace llvm::AMDGPU;
30
31 // Generates the following for AMDGPUMCKernelCodeT struct members:
32 // - HasMemberXXXXX class
33 // A check to see if AMDGPUMCKernelCodeT has a specific member so it can
34 // determine which of the original amd_kernel_code_t members are duplicated
35 // (if the names don't match, the table driven strategy won't work).
36 // - IsMCExprXXXXX class
37 // Check whether a AMDGPUMCKernelcodeT struct member is MCExpr-ified or not.
38 // - GetMemberXXXXX class
39 // A retrieval helper for said member (of type const MCExpr *&). Will return
40 // a `Phony` const MCExpr * initialized to nullptr to preserve reference
41 // returns.
42 #define GEN_HAS_MEMBER(member) \
43 class HasMember##member { \
44 private: \
45 struct KnownWithMember { \
46 int member; \
47 }; \
48 class AmbiguousDerived : public AMDGPUMCKernelCodeT, \
49 public KnownWithMember {}; \
50 template <typename U> \
51 static constexpr std::false_type Test(decltype(U::member) *); \
52 template <typename U> static constexpr std::true_type Test(...); \
53 \
54 public: \
55 static constexpr bool RESULT = \
56 std::is_same_v<decltype(Test<AmbiguousDerived>(nullptr)), \
57 std::true_type>; \
58 }; \
59 class IsMCExpr##member { \
60 template <typename U, \
61 typename std::enable_if_t< \
62 HasMember##member::RESULT && \
63 std::is_same_v<decltype(U::member), const MCExpr *>, \
64 U> * = nullptr> \
65 static constexpr std::true_type HasMCExprType(decltype(U::member) *); \
66 template <typename U> static constexpr std::false_type HasMCExprType(...); \
67 \
68 public: \
69 static constexpr bool RESULT = \
70 std::is_same_v<decltype(HasMCExprType<AMDGPUMCKernelCodeT>(nullptr)), \
71 std::true_type>; \
72 }; \
73 class GetMember##member { \
74 public: \
75 static const MCExpr *Phony; \
76 template <typename U, typename std::enable_if_t<IsMCExpr##member::RESULT, \
77 U> * = nullptr> \
78 static const MCExpr *&Get(U &C) { \
79 assert(IsMCExpr##member::RESULT && \
80 "Trying to retrieve member that does not exist."); \
81 return C.member; \
82 } \
83 template <typename U, typename std::enable_if_t<!IsMCExpr##member::RESULT, \
84 U> * = nullptr> \
85 static const MCExpr *&Get(U &C) { \
86 return Phony; \
87 } \
88 }; \
89 const MCExpr *GetMember##member::Phony = nullptr;
90
91 // Cannot generate class declarations using the table driver approach (see table
92 // in AMDKernelCodeTInfo.h). Luckily, if any are missing here or eventually
93 // added to the table, an error should occur when trying to retrieve the table
94 // in getMCExprIndexTable.
95 GEN_HAS_MEMBER(amd_code_version_major)
GEN_HAS_MEMBER(amd_code_version_minor)96 GEN_HAS_MEMBER(amd_code_version_minor)
97 GEN_HAS_MEMBER(amd_machine_kind)
98 GEN_HAS_MEMBER(amd_machine_version_major)
99 GEN_HAS_MEMBER(amd_machine_version_minor)
100 GEN_HAS_MEMBER(amd_machine_version_stepping)
101
102 GEN_HAS_MEMBER(kernel_code_entry_byte_offset)
103 GEN_HAS_MEMBER(kernel_code_prefetch_byte_size)
104
105 GEN_HAS_MEMBER(granulated_workitem_vgpr_count)
106 GEN_HAS_MEMBER(granulated_wavefront_sgpr_count)
107 GEN_HAS_MEMBER(priority)
108 GEN_HAS_MEMBER(float_mode)
109 GEN_HAS_MEMBER(priv)
110 GEN_HAS_MEMBER(enable_dx10_clamp)
111 GEN_HAS_MEMBER(debug_mode)
112 GEN_HAS_MEMBER(enable_ieee_mode)
113 GEN_HAS_MEMBER(enable_wgp_mode)
114 GEN_HAS_MEMBER(enable_mem_ordered)
115 GEN_HAS_MEMBER(enable_fwd_progress)
116
117 GEN_HAS_MEMBER(enable_sgpr_private_segment_wave_byte_offset)
118 GEN_HAS_MEMBER(user_sgpr_count)
119 GEN_HAS_MEMBER(enable_trap_handler)
120 GEN_HAS_MEMBER(enable_sgpr_workgroup_id_x)
121 GEN_HAS_MEMBER(enable_sgpr_workgroup_id_y)
122 GEN_HAS_MEMBER(enable_sgpr_workgroup_id_z)
123 GEN_HAS_MEMBER(enable_sgpr_workgroup_info)
124 GEN_HAS_MEMBER(enable_vgpr_workitem_id)
125 GEN_HAS_MEMBER(enable_exception_msb)
126 GEN_HAS_MEMBER(granulated_lds_size)
127 GEN_HAS_MEMBER(enable_exception)
128
129 GEN_HAS_MEMBER(enable_sgpr_private_segment_buffer)
130 GEN_HAS_MEMBER(enable_sgpr_dispatch_ptr)
131 GEN_HAS_MEMBER(enable_sgpr_queue_ptr)
132 GEN_HAS_MEMBER(enable_sgpr_kernarg_segment_ptr)
133 GEN_HAS_MEMBER(enable_sgpr_dispatch_id)
134 GEN_HAS_MEMBER(enable_sgpr_flat_scratch_init)
135 GEN_HAS_MEMBER(enable_sgpr_private_segment_size)
136 GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_x)
137 GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_y)
138 GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_z)
139 GEN_HAS_MEMBER(enable_wavefront_size32)
140 GEN_HAS_MEMBER(enable_ordered_append_gds)
141 GEN_HAS_MEMBER(private_element_size)
142 GEN_HAS_MEMBER(is_ptr64)
143 GEN_HAS_MEMBER(is_dynamic_callstack)
144 GEN_HAS_MEMBER(is_debug_enabled)
145 GEN_HAS_MEMBER(is_xnack_enabled)
146
147 GEN_HAS_MEMBER(workitem_private_segment_byte_size)
148 GEN_HAS_MEMBER(workgroup_group_segment_byte_size)
149 GEN_HAS_MEMBER(gds_segment_byte_size)
150 GEN_HAS_MEMBER(kernarg_segment_byte_size)
151 GEN_HAS_MEMBER(workgroup_fbarrier_count)
152 GEN_HAS_MEMBER(wavefront_sgpr_count)
153 GEN_HAS_MEMBER(workitem_vgpr_count)
154 GEN_HAS_MEMBER(reserved_vgpr_first)
155 GEN_HAS_MEMBER(reserved_vgpr_count)
156 GEN_HAS_MEMBER(reserved_sgpr_first)
157 GEN_HAS_MEMBER(reserved_sgpr_count)
158 GEN_HAS_MEMBER(debug_wavefront_private_segment_offset_sgpr)
159 GEN_HAS_MEMBER(debug_private_segment_buffer_sgpr)
160 GEN_HAS_MEMBER(kernarg_segment_alignment)
161 GEN_HAS_MEMBER(group_segment_alignment)
162 GEN_HAS_MEMBER(private_segment_alignment)
163 GEN_HAS_MEMBER(wavefront_size)
164 GEN_HAS_MEMBER(call_convention)
165 GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
166
167 static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldNames() {
168 static constexpr StringLiteral const Table[] = {
169 "", // not found placeholder
170 #define RECORD(name, altName, print, parse) #name
171 #include "Utils/AMDKernelCodeTInfo.h"
172 #undef RECORD
173 };
174 return ArrayRef(Table);
175 }
176
get_amd_kernel_code_t_FldAltNames()177 static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldAltNames() {
178 static constexpr StringLiteral const Table[] = {
179 "", // not found placeholder
180 #define RECORD(name, altName, print, parse) #altName
181 #include "Utils/AMDKernelCodeTInfo.h"
182 #undef RECORD
183 };
184 return ArrayRef(Table);
185 }
186
hasMCExprVersionTable()187 static ArrayRef<bool> hasMCExprVersionTable() {
188 static bool const Table[] = {
189 #define RECORD(name, altName, print, parse) (IsMCExpr##name::RESULT)
190 #include "Utils/AMDKernelCodeTInfo.h"
191 #undef RECORD
192 };
193 return ArrayRef(Table);
194 }
195
196 using RetrieveFx = const MCExpr *&(*)(AMDGPUMCKernelCodeT &);
197
getMCExprIndexTable()198 static ArrayRef<RetrieveFx> getMCExprIndexTable() {
199 static const RetrieveFx Table[] = {
200 #define RECORD(name, altName, print, parse) GetMember##name::Get
201 #include "Utils/AMDKernelCodeTInfo.h"
202 #undef RECORD
203 };
204 return ArrayRef(Table);
205 }
206
createIndexMap(ArrayRef<StringLiteral> names,ArrayRef<StringLiteral> altNames)207 static StringMap<int> createIndexMap(ArrayRef<StringLiteral> names,
208 ArrayRef<StringLiteral> altNames) {
209 StringMap<int> map;
210 assert(names.size() == altNames.size());
211 for (unsigned i = 0; i < names.size(); ++i) {
212 map.insert(std::pair(names[i], i));
213 map.insert(std::pair(altNames[i], i));
214 }
215 return map;
216 }
217
get_amd_kernel_code_t_FieldIndex(StringRef name)218 static int get_amd_kernel_code_t_FieldIndex(StringRef name) {
219 static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(),
220 get_amd_kernel_code_t_FldAltNames());
221 return map.lookup(name) - 1; // returns -1 if not found
222 }
223
224 class PrintField {
225 public:
226 template <typename T, T AMDGPUMCKernelCodeT::*ptr,
227 typename std::enable_if_t<!std::is_integral_v<T>, T> * = nullptr>
printField(StringRef Name,const AMDGPUMCKernelCodeT & C,raw_ostream & OS,MCContext & Ctx)228 static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
229 raw_ostream &OS, MCContext &Ctx) {
230 OS << Name << " = ";
231 const MCExpr *Value = C.*ptr;
232 int64_t Val;
233 if (Value->evaluateAsAbsolute(Val))
234 OS << Val;
235 else
236 Value->print(OS, Ctx.getAsmInfo());
237 }
238
239 template <typename T, T AMDGPUMCKernelCodeT::*ptr,
240 typename std::enable_if_t<std::is_integral_v<T>, T> * = nullptr>
printField(StringRef Name,const AMDGPUMCKernelCodeT & C,raw_ostream & OS,MCContext &)241 static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
242 raw_ostream &OS, MCContext &) {
243 OS << Name << " = " << (int)(C.*ptr);
244 }
245 };
246
247 template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1>
printBitField(StringRef Name,const AMDGPUMCKernelCodeT & C,raw_ostream & OS,MCContext &)248 static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C,
249 raw_ostream &OS, MCContext &) {
250 const auto Mask = (static_cast<T>(1) << width) - 1;
251 OS << Name << " = " << (int)((C.*ptr >> shift) & Mask);
252 }
253
254 using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &,
255 MCContext &);
256
getPrinterTable()257 static ArrayRef<PrintFx> getPrinterTable() {
258 static const PrintFx Table[] = {
259 #define COMPPGM1(name, aname, AccMacro) \
260 COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0)
261 #define COMPPGM2(name, aname, AccMacro) \
262 COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
263 #define PRINTFIELD(sname, aname, name) PrintField::printField<FLD_T(name)>
264 #define PRINTCOMP(Complement, PGMType) \
265 [](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS, \
266 MCContext &Ctx) { \
267 OS << Name << " = "; \
268 auto [Shift, Mask] = getShiftMask(Complement); \
269 const MCExpr *Value; \
270 if (PGMType == 0) { \
271 Value = \
272 maskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx); \
273 } else { \
274 Value = \
275 maskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \
276 } \
277 int64_t Val; \
278 if (Value->evaluateAsAbsolute(Val)) \
279 OS << Val; \
280 else \
281 Value->print(OS, Ctx.getAsmInfo()); \
282 }
283 #define RECORD(name, altName, print, parse) print
284 #include "Utils/AMDKernelCodeTInfo.h"
285 #undef RECORD
286 };
287 return ArrayRef(Table);
288 }
289
expectAbsExpression(MCAsmParser & MCParser,int64_t & Value,raw_ostream & Err)290 static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value,
291 raw_ostream &Err) {
292
293 if (MCParser.getLexer().isNot(AsmToken::Equal)) {
294 Err << "expected '='";
295 return false;
296 }
297 MCParser.getLexer().Lex();
298
299 if (MCParser.parseAbsoluteExpression(Value)) {
300 Err << "integer absolute expression expected";
301 return false;
302 }
303 return true;
304 }
305
306 template <typename T, T AMDGPUMCKernelCodeT::*ptr>
parseField(AMDGPUMCKernelCodeT & C,MCAsmParser & MCParser,raw_ostream & Err)307 static bool parseField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
308 raw_ostream &Err) {
309 int64_t Value = 0;
310 if (!expectAbsExpression(MCParser, Value, Err))
311 return false;
312 C.*ptr = (T)Value;
313 return true;
314 }
315
316 template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1>
parseBitField(AMDGPUMCKernelCodeT & C,MCAsmParser & MCParser,raw_ostream & Err)317 static bool parseBitField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
318 raw_ostream &Err) {
319 int64_t Value = 0;
320 if (!expectAbsExpression(MCParser, Value, Err))
321 return false;
322 const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift;
323 C.*ptr &= (T)~Mask;
324 C.*ptr |= (T)((Value << shift) & Mask);
325 return true;
326 }
327
parseExpr(MCAsmParser & MCParser,const MCExpr * & Value,raw_ostream & Err)328 static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value,
329 raw_ostream &Err) {
330 if (MCParser.getLexer().isNot(AsmToken::Equal)) {
331 Err << "expected '='";
332 return false;
333 }
334 MCParser.getLexer().Lex();
335
336 if (MCParser.parseExpression(Value)) {
337 Err << "Could not parse expression";
338 return false;
339 }
340 return true;
341 }
342
343 using ParseFx = bool (*)(AMDGPUMCKernelCodeT &, MCAsmParser &, raw_ostream &);
344
getParserTable()345 static ArrayRef<ParseFx> getParserTable() {
346 static const ParseFx Table[] = {
347 #define COMPPGM1(name, aname, AccMacro) \
348 COMPPGM(name, aname, G_00B848_##AccMacro, C_00B848_##AccMacro, 0)
349 #define COMPPGM2(name, aname, AccMacro) \
350 COMPPGM(name, aname, G_00B84C_##AccMacro, C_00B84C_##AccMacro, 32)
351 #define PARSECOMP(Complement, PGMType) \
352 [](AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser, \
353 raw_ostream &Err) -> bool { \
354 MCContext &Ctx = MCParser.getContext(); \
355 const MCExpr *Value; \
356 if (!parseExpr(MCParser, Value, Err)) \
357 return false; \
358 auto [Shift, Mask] = getShiftMask(Complement); \
359 Value = maskShiftSet(Value, Mask, Shift, Ctx); \
360 const MCExpr *Compl = MCConstantExpr::create(Complement, Ctx); \
361 if (PGMType == 0) { \
362 C.compute_pgm_resource1_registers = MCBinaryExpr::createAnd( \
363 C.compute_pgm_resource1_registers, Compl, Ctx); \
364 C.compute_pgm_resource1_registers = MCBinaryExpr::createOr( \
365 C.compute_pgm_resource1_registers, Value, Ctx); \
366 } else { \
367 C.compute_pgm_resource2_registers = MCBinaryExpr::createAnd( \
368 C.compute_pgm_resource2_registers, Compl, Ctx); \
369 C.compute_pgm_resource2_registers = MCBinaryExpr::createOr( \
370 C.compute_pgm_resource2_registers, Value, Ctx); \
371 } \
372 return true; \
373 }
374 #define RECORD(name, altName, print, parse) parse
375 #include "Utils/AMDKernelCodeTInfo.h"
376 #undef RECORD
377 };
378 return ArrayRef(Table);
379 }
380
printAmdKernelCodeField(const AMDGPUMCKernelCodeT & C,int FldIndex,raw_ostream & OS,MCContext & Ctx)381 static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex,
382 raw_ostream &OS, MCContext &Ctx) {
383 auto Printer = getPrinterTable()[FldIndex];
384 if (Printer)
385 Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx);
386 }
387
initDefault(const MCSubtargetInfo * STI,MCContext & Ctx,bool InitMCExpr)388 void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI,
389 MCContext &Ctx, bool InitMCExpr) {
390 AMDGPUMCKernelCodeT();
391
392 AMDGPU::initDefaultAMDKernelCodeT(*this, STI);
393
394 if (InitMCExpr) {
395 const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
396 compute_pgm_resource1_registers =
397 MCConstantExpr::create(Lo_32(compute_pgm_resource_registers), Ctx);
398 compute_pgm_resource2_registers =
399 MCConstantExpr::create(Hi_32(compute_pgm_resource_registers), Ctx);
400 is_dynamic_callstack = ZeroExpr;
401 wavefront_sgpr_count = ZeroExpr;
402 workitem_vgpr_count = ZeroExpr;
403 workitem_private_segment_byte_size = ZeroExpr;
404 }
405 }
406
validate(const MCSubtargetInfo * STI,MCContext & Ctx)407 void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
408 int64_t Value;
409 if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value))
410 return;
411
412 if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX12Plus(*STI)) {
413 Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX12+");
414 return;
415 }
416
417 if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX12Plus(*STI)) {
418 Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX12+");
419 return;
420 }
421
422 if (G_00B848_WGP_MODE(Value) && !AMDGPU::isGFX10Plus(*STI)) {
423 Ctx.reportError({}, "enable_wgp_mode=1 is only allowed on GFX10+");
424 return;
425 }
426
427 if (G_00B848_MEM_ORDERED(Value) && !AMDGPU::isGFX10Plus(*STI)) {
428 Ctx.reportError({}, "enable_mem_ordered=1 is only allowed on GFX10+");
429 return;
430 }
431
432 if (G_00B848_FWD_PROGRESS(Value) && !AMDGPU::isGFX10Plus(*STI)) {
433 Ctx.reportError({}, "enable_fwd_progress=1 is only allowed on GFX10+");
434 return;
435 }
436 }
437
getMCExprForIndex(int Index)438 const MCExpr *&AMDGPUMCKernelCodeT::getMCExprForIndex(int Index) {
439 static const auto IndexTable = getMCExprIndexTable();
440 return IndexTable[Index](*this);
441 }
442
ParseKernelCodeT(StringRef ID,MCAsmParser & MCParser,raw_ostream & Err)443 bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
444 raw_ostream &Err) {
445 const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
446 if (Idx < 0) {
447 Err << "unexpected amd_kernel_code_t field name " << ID;
448 return false;
449 }
450
451 if (hasMCExprVersionTable()[Idx]) {
452 const MCExpr *Value;
453 if (!parseExpr(MCParser, Value, Err))
454 return false;
455 getMCExprForIndex(Idx) = Value;
456 return true;
457 }
458 auto Parser = getParserTable()[Idx];
459 return Parser ? Parser(*this, MCParser, Err) : false;
460 }
461
EmitKernelCodeT(raw_ostream & OS,MCContext & Ctx)462 void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx) {
463 const int Size = hasMCExprVersionTable().size();
464 for (int i = 0; i < Size; ++i) {
465 OS << "\t\t";
466 if (hasMCExprVersionTable()[i]) {
467 OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = ";
468 int64_t Val;
469 const MCExpr *Value = getMCExprForIndex(i);
470 if (Value->evaluateAsAbsolute(Val))
471 OS << Val;
472 else
473 Value->print(OS, Ctx.getAsmInfo());
474 } else {
475 printAmdKernelCodeField(*this, i, OS, Ctx);
476 }
477 OS << '\n';
478 }
479 }
480
EmitKernelCodeT(MCStreamer & OS,MCContext & Ctx)481 void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
482 OS.emitIntValue(amd_kernel_code_version_major, /*Size=*/4);
483 OS.emitIntValue(amd_kernel_code_version_minor, /*Size=*/4);
484 OS.emitIntValue(amd_machine_kind, /*Size=*/2);
485 OS.emitIntValue(amd_machine_version_major, /*Size=*/2);
486 OS.emitIntValue(amd_machine_version_minor, /*Size=*/2);
487 OS.emitIntValue(amd_machine_version_stepping, /*Size=*/2);
488 OS.emitIntValue(kernel_code_entry_byte_offset, /*Size=*/8);
489 OS.emitIntValue(kernel_code_prefetch_byte_offset, /*Size=*/8);
490 OS.emitIntValue(kernel_code_prefetch_byte_size, /*Size=*/8);
491 OS.emitIntValue(reserved0, /*Size=*/8);
492
493 if (compute_pgm_resource1_registers != nullptr)
494 OS.emitValue(compute_pgm_resource1_registers, /*Size=*/4);
495 else
496 OS.emitIntValue(Lo_32(compute_pgm_resource_registers),
497 /*Size=*/4);
498
499 if (compute_pgm_resource2_registers != nullptr)
500 OS.emitValue(compute_pgm_resource2_registers, /*Size=*/4);
501 else
502 OS.emitIntValue(Hi_32(compute_pgm_resource_registers),
503 /*Size=*/4);
504
505 if (is_dynamic_callstack != nullptr) {
506 const MCExpr *CodeProps = MCConstantExpr::create(code_properties, Ctx);
507 CodeProps = MCBinaryExpr::createOr(
508 CodeProps,
509 maskShiftSet(is_dynamic_callstack,
510 (1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1,
511 AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx),
512 Ctx);
513 OS.emitValue(CodeProps, /*Size=*/4);
514 } else
515 OS.emitIntValue(code_properties, /*Size=*/4);
516
517 if (workitem_private_segment_byte_size != nullptr)
518 OS.emitValue(workitem_private_segment_byte_size, /*Size=*/4);
519 else
520 OS.emitIntValue(0, /*Size=*/4);
521
522 OS.emitIntValue(workgroup_group_segment_byte_size, /*Size=*/4);
523 OS.emitIntValue(gds_segment_byte_size, /*Size=*/4);
524 OS.emitIntValue(kernarg_segment_byte_size, /*Size=*/8);
525 OS.emitIntValue(workgroup_fbarrier_count, /*Size=*/4);
526
527 if (wavefront_sgpr_count != nullptr)
528 OS.emitValue(wavefront_sgpr_count, /*Size=*/2);
529 else
530 OS.emitIntValue(0, /*Size=*/2);
531
532 if (workitem_vgpr_count != nullptr)
533 OS.emitValue(workitem_vgpr_count, /*Size=*/2);
534 else
535 OS.emitIntValue(0, /*Size=*/2);
536
537 OS.emitIntValue(reserved_vgpr_first, /*Size=*/2);
538 OS.emitIntValue(reserved_vgpr_count, /*Size=*/2);
539 OS.emitIntValue(reserved_sgpr_first, /*Size=*/2);
540 OS.emitIntValue(reserved_sgpr_count, /*Size=*/2);
541 OS.emitIntValue(debug_wavefront_private_segment_offset_sgpr,
542 /*Size=*/2);
543 OS.emitIntValue(debug_private_segment_buffer_sgpr, /*Size=*/2);
544 OS.emitIntValue(kernarg_segment_alignment, /*Size=*/1);
545 OS.emitIntValue(group_segment_alignment, /*Size=*/1);
546 OS.emitIntValue(private_segment_alignment, /*Size=*/1);
547 OS.emitIntValue(wavefront_size, /*Size=*/1);
548
549 OS.emitIntValue(call_convention, /*Size=*/4);
550 OS.emitBytes(StringRef((const char *)reserved3, /*Size=*/12));
551 OS.emitIntValue(runtime_loader_kernel_symbol, /*Size=*/8);
552 OS.emitBytes(StringRef((const char *)control_directives, /*Size=*/16 * 8));
553 }
554