1 //===- AMDKernelCodeTUtils.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file - utility functions to parse/print AMDGPUMCKernelCodeT structure
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDKernelCodeTUtils.h"
14 #include "AMDKernelCodeT.h"
15 #include "SIDefines.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/SIDefinesUtils.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCParser/AsmLexer.h"
22 #include "llvm/MC/MCParser/MCAsmParser.h"
23 #include "llvm/MC/MCStreamer.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26
27 using namespace llvm;
28 using namespace llvm::AMDGPU;
29
30 // Generates the following for AMDGPUMCKernelCodeT struct members:
31 // - HasMemberXXXXX class
32 // A check to see if AMDGPUMCKernelCodeT has a specific member so it can
33 // determine which of the original amd_kernel_code_t members are duplicated
34 // (if the names don't match, the table driven strategy won't work).
35 // - IsMCExprXXXXX class
36 // Check whether a AMDGPUMCKernelcodeT struct member is MCExpr-ified or not.
37 // - GetMemberXXXXX class
38 // A retrieval helper for said member (of type const MCExpr *&). Will return
39 // a `Phony` const MCExpr * initialized to nullptr to preserve reference
40 // returns.
41 #define GEN_HAS_MEMBER(member) \
42 class HasMember##member { \
43 private: \
44 struct KnownWithMember { \
45 int member; \
46 }; \
47 class AmbiguousDerived : public AMDGPUMCKernelCodeT, \
48 public KnownWithMember {}; \
49 template <typename U> \
50 static constexpr std::false_type Test(decltype(U::member) *); \
51 template <typename U> static constexpr std::true_type Test(...); \
52 \
53 public: \
54 static constexpr bool RESULT = \
55 std::is_same_v<decltype(Test<AmbiguousDerived>(nullptr)), \
56 std::true_type>; \
57 }; \
58 class IsMCExpr##member { \
59 template <typename U, \
60 typename std::enable_if_t< \
61 HasMember##member::RESULT && \
62 std::is_same_v<decltype(U::member), const MCExpr *>, \
63 U> * = nullptr> \
64 static constexpr std::true_type HasMCExprType(decltype(U::member) *); \
65 template <typename U> static constexpr std::false_type HasMCExprType(...); \
66 \
67 public: \
68 static constexpr bool RESULT = \
69 std::is_same_v<decltype(HasMCExprType<AMDGPUMCKernelCodeT>(nullptr)), \
70 std::true_type>; \
71 }; \
72 class GetMember##member { \
73 public: \
74 static const MCExpr *Phony; \
75 template <typename U> static const MCExpr *&Get(U &C) { \
76 if constexpr (IsMCExpr##member::RESULT) \
77 return C.member; \
78 else \
79 return Phony; \
80 } \
81 }; \
82 const MCExpr *GetMember##member::Phony = nullptr;
83
84 // Cannot generate class declarations using the table driver approach (see table
85 // in AMDKernelCodeTInfo.h). Luckily, if any are missing here or eventually
86 // added to the table, an error should occur when trying to retrieve the table
87 // in getMCExprIndexTable.
88 GEN_HAS_MEMBER(amd_code_version_major)
GEN_HAS_MEMBER(amd_code_version_minor)89 GEN_HAS_MEMBER(amd_code_version_minor)
90 GEN_HAS_MEMBER(amd_machine_kind)
91 GEN_HAS_MEMBER(amd_machine_version_major)
92 GEN_HAS_MEMBER(amd_machine_version_minor)
93 GEN_HAS_MEMBER(amd_machine_version_stepping)
94
95 GEN_HAS_MEMBER(kernel_code_entry_byte_offset)
96 GEN_HAS_MEMBER(kernel_code_prefetch_byte_size)
97
98 GEN_HAS_MEMBER(granulated_workitem_vgpr_count)
99 GEN_HAS_MEMBER(granulated_wavefront_sgpr_count)
100 GEN_HAS_MEMBER(priority)
101 GEN_HAS_MEMBER(float_mode)
102 GEN_HAS_MEMBER(priv)
103 GEN_HAS_MEMBER(enable_dx10_clamp)
104 GEN_HAS_MEMBER(debug_mode)
105 GEN_HAS_MEMBER(enable_ieee_mode)
106 GEN_HAS_MEMBER(enable_wgp_mode)
107 GEN_HAS_MEMBER(enable_mem_ordered)
108 GEN_HAS_MEMBER(enable_fwd_progress)
109
110 GEN_HAS_MEMBER(enable_sgpr_private_segment_wave_byte_offset)
111 GEN_HAS_MEMBER(user_sgpr_count)
112 GEN_HAS_MEMBER(enable_trap_handler)
113 GEN_HAS_MEMBER(enable_sgpr_workgroup_id_x)
114 GEN_HAS_MEMBER(enable_sgpr_workgroup_id_y)
115 GEN_HAS_MEMBER(enable_sgpr_workgroup_id_z)
116 GEN_HAS_MEMBER(enable_sgpr_workgroup_info)
117 GEN_HAS_MEMBER(enable_vgpr_workitem_id)
118 GEN_HAS_MEMBER(enable_exception_msb)
119 GEN_HAS_MEMBER(granulated_lds_size)
120 GEN_HAS_MEMBER(enable_exception)
121
122 GEN_HAS_MEMBER(enable_sgpr_private_segment_buffer)
123 GEN_HAS_MEMBER(enable_sgpr_dispatch_ptr)
124 GEN_HAS_MEMBER(enable_sgpr_queue_ptr)
125 GEN_HAS_MEMBER(enable_sgpr_kernarg_segment_ptr)
126 GEN_HAS_MEMBER(enable_sgpr_dispatch_id)
127 GEN_HAS_MEMBER(enable_sgpr_flat_scratch_init)
128 GEN_HAS_MEMBER(enable_sgpr_private_segment_size)
129 GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_x)
130 GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_y)
131 GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_z)
132 GEN_HAS_MEMBER(enable_wavefront_size32)
133 GEN_HAS_MEMBER(enable_ordered_append_gds)
134 GEN_HAS_MEMBER(private_element_size)
135 GEN_HAS_MEMBER(is_ptr64)
136 GEN_HAS_MEMBER(is_dynamic_callstack)
137 GEN_HAS_MEMBER(is_debug_enabled)
138 GEN_HAS_MEMBER(is_xnack_enabled)
139
140 GEN_HAS_MEMBER(workitem_private_segment_byte_size)
141 GEN_HAS_MEMBER(workgroup_group_segment_byte_size)
142 GEN_HAS_MEMBER(gds_segment_byte_size)
143 GEN_HAS_MEMBER(kernarg_segment_byte_size)
144 GEN_HAS_MEMBER(workgroup_fbarrier_count)
145 GEN_HAS_MEMBER(wavefront_sgpr_count)
146 GEN_HAS_MEMBER(workitem_vgpr_count)
147 GEN_HAS_MEMBER(reserved_vgpr_first)
148 GEN_HAS_MEMBER(reserved_vgpr_count)
149 GEN_HAS_MEMBER(reserved_sgpr_first)
150 GEN_HAS_MEMBER(reserved_sgpr_count)
151 GEN_HAS_MEMBER(debug_wavefront_private_segment_offset_sgpr)
152 GEN_HAS_MEMBER(debug_private_segment_buffer_sgpr)
153 GEN_HAS_MEMBER(kernarg_segment_alignment)
154 GEN_HAS_MEMBER(group_segment_alignment)
155 GEN_HAS_MEMBER(private_segment_alignment)
156 GEN_HAS_MEMBER(wavefront_size)
157 GEN_HAS_MEMBER(call_convention)
158 GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
159
160 static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldNames() {
161 static constexpr StringLiteral const Table[] = {
162 "", // not found placeholder
163 #define RECORD(name, altName, print, parse) #name
164 #include "Utils/AMDKernelCodeTInfo.h"
165 #undef RECORD
166 };
167 return ArrayRef(Table);
168 }
169
get_amd_kernel_code_t_FldAltNames()170 static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldAltNames() {
171 static constexpr StringLiteral const Table[] = {
172 "", // not found placeholder
173 #define RECORD(name, altName, print, parse) #altName
174 #include "Utils/AMDKernelCodeTInfo.h"
175 #undef RECORD
176 };
177 return ArrayRef(Table);
178 }
179
hasMCExprVersionTable()180 static ArrayRef<bool> hasMCExprVersionTable() {
181 static bool const Table[] = {
182 #define RECORD(name, altName, print, parse) (IsMCExpr##name::RESULT)
183 #include "Utils/AMDKernelCodeTInfo.h"
184 #undef RECORD
185 };
186 return ArrayRef(Table);
187 }
188
189 using RetrieveFx = const MCExpr *&(*)(AMDGPUMCKernelCodeT &);
190
getMCExprIndexTable()191 static ArrayRef<RetrieveFx> getMCExprIndexTable() {
192 static const RetrieveFx Table[] = {
193 #define RECORD(name, altName, print, parse) GetMember##name::Get
194 #include "Utils/AMDKernelCodeTInfo.h"
195 #undef RECORD
196 };
197 return ArrayRef(Table);
198 }
199
createIndexMap(ArrayRef<StringLiteral> names,ArrayRef<StringLiteral> altNames)200 static StringMap<int> createIndexMap(ArrayRef<StringLiteral> names,
201 ArrayRef<StringLiteral> altNames) {
202 StringMap<int> map;
203 assert(names.size() == altNames.size());
204 for (unsigned i = 0; i < names.size(); ++i) {
205 map.insert(std::pair(names[i], i));
206 map.insert(std::pair(altNames[i], i));
207 }
208 return map;
209 }
210
get_amd_kernel_code_t_FieldIndex(StringRef name)211 static int get_amd_kernel_code_t_FieldIndex(StringRef name) {
212 static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(),
213 get_amd_kernel_code_t_FldAltNames());
214 return map.lookup(name) - 1; // returns -1 if not found
215 }
216
217 class PrintField {
218 public:
219 template <typename T, T AMDGPUMCKernelCodeT::*ptr>
printField(StringRef Name,const AMDGPUMCKernelCodeT & C,raw_ostream & OS,MCContext & Ctx,AMDGPUMCKernelCodeT::PrintHelper Helper)220 static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
221 raw_ostream &OS, MCContext &Ctx,
222 AMDGPUMCKernelCodeT::PrintHelper Helper) {
223 if constexpr (!std::is_integral_v<T>) {
224 OS << Name << " = ";
225 const MCExpr *Value = C.*ptr;
226 Helper(Value, OS, Ctx.getAsmInfo());
227 } else {
228 OS << Name << " = " << (int)(C.*ptr);
229 }
230 }
231 };
232
233 template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1>
printBitField(StringRef Name,const AMDGPUMCKernelCodeT & C,raw_ostream & OS,MCContext &,AMDGPUMCKernelCodeT::PrintHelper)234 static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C,
235 raw_ostream &OS, MCContext &,
236 AMDGPUMCKernelCodeT::PrintHelper) {
237 const auto Mask = (static_cast<T>(1) << width) - 1;
238 OS << Name << " = " << (int)((C.*ptr >> shift) & Mask);
239 }
240
241 using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &,
242 MCContext &, AMDGPUMCKernelCodeT::PrintHelper Helper);
243
244 static ArrayRef<PrintFx>
getPrinterTable(AMDGPUMCKernelCodeT::PrintHelper Helper)245 getPrinterTable(AMDGPUMCKernelCodeT::PrintHelper Helper) {
246 static const PrintFx Table[] = {
247 #define COMPPGM1(name, aname, AccMacro) \
248 COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0)
249 #define COMPPGM2(name, aname, AccMacro) \
250 COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
251 #define PRINTFIELD(sname, aname, name) PrintField::printField<FLD_T(name)>
252 #define PRINTCOMP(Complement, PGMType) \
253 [](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS, \
254 MCContext &Ctx, AMDGPUMCKernelCodeT::PrintHelper Helper) { \
255 OS << Name << " = "; \
256 auto [Shift, Mask] = getShiftMask(Complement); \
257 const MCExpr *Value; \
258 if (PGMType == 0) { \
259 Value = \
260 maskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx); \
261 } else { \
262 Value = \
263 maskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \
264 } \
265 Helper(Value, OS, Ctx.getAsmInfo()); \
266 }
267 #define RECORD(name, altName, print, parse) print
268 #include "Utils/AMDKernelCodeTInfo.h"
269 #undef RECORD
270 };
271 return ArrayRef(Table);
272 }
273
expectAbsExpression(MCAsmParser & MCParser,int64_t & Value,raw_ostream & Err)274 static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value,
275 raw_ostream &Err) {
276
277 if (MCParser.getLexer().isNot(AsmToken::Equal)) {
278 Err << "expected '='";
279 return false;
280 }
281 MCParser.getLexer().Lex();
282
283 if (MCParser.parseAbsoluteExpression(Value)) {
284 Err << "integer absolute expression expected";
285 return false;
286 }
287 return true;
288 }
289
290 template <typename T, T AMDGPUMCKernelCodeT::*ptr>
parseField(AMDGPUMCKernelCodeT & C,MCAsmParser & MCParser,raw_ostream & Err)291 static bool parseField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
292 raw_ostream &Err) {
293 int64_t Value = 0;
294 if (!expectAbsExpression(MCParser, Value, Err))
295 return false;
296 C.*ptr = (T)Value;
297 return true;
298 }
299
300 template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1>
parseBitField(AMDGPUMCKernelCodeT & C,MCAsmParser & MCParser,raw_ostream & Err)301 static bool parseBitField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
302 raw_ostream &Err) {
303 int64_t Value = 0;
304 if (!expectAbsExpression(MCParser, Value, Err))
305 return false;
306 const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift;
307 C.*ptr &= (T)~Mask;
308 C.*ptr |= (T)((Value << shift) & Mask);
309 return true;
310 }
311
parseExpr(MCAsmParser & MCParser,const MCExpr * & Value,raw_ostream & Err)312 static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value,
313 raw_ostream &Err) {
314 if (MCParser.getLexer().isNot(AsmToken::Equal)) {
315 Err << "expected '='";
316 return false;
317 }
318 MCParser.getLexer().Lex();
319
320 if (MCParser.parseExpression(Value)) {
321 Err << "Could not parse expression";
322 return false;
323 }
324 return true;
325 }
326
327 using ParseFx = bool (*)(AMDGPUMCKernelCodeT &, MCAsmParser &, raw_ostream &);
328
getParserTable()329 static ArrayRef<ParseFx> getParserTable() {
330 static const ParseFx Table[] = {
331 #define COMPPGM1(name, aname, AccMacro) \
332 COMPPGM(name, aname, G_00B848_##AccMacro, C_00B848_##AccMacro, 0)
333 #define COMPPGM2(name, aname, AccMacro) \
334 COMPPGM(name, aname, G_00B84C_##AccMacro, C_00B84C_##AccMacro, 32)
335 #define PARSECOMP(Complement, PGMType) \
336 [](AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser, \
337 raw_ostream &Err) -> bool { \
338 MCContext &Ctx = MCParser.getContext(); \
339 const MCExpr *Value; \
340 if (!parseExpr(MCParser, Value, Err)) \
341 return false; \
342 auto [Shift, Mask] = getShiftMask(Complement); \
343 Value = maskShiftSet(Value, Mask, Shift, Ctx); \
344 const MCExpr *Compl = MCConstantExpr::create(Complement, Ctx); \
345 if (PGMType == 0) { \
346 C.compute_pgm_resource1_registers = MCBinaryExpr::createAnd( \
347 C.compute_pgm_resource1_registers, Compl, Ctx); \
348 C.compute_pgm_resource1_registers = MCBinaryExpr::createOr( \
349 C.compute_pgm_resource1_registers, Value, Ctx); \
350 } else { \
351 C.compute_pgm_resource2_registers = MCBinaryExpr::createAnd( \
352 C.compute_pgm_resource2_registers, Compl, Ctx); \
353 C.compute_pgm_resource2_registers = MCBinaryExpr::createOr( \
354 C.compute_pgm_resource2_registers, Value, Ctx); \
355 } \
356 return true; \
357 }
358 #define RECORD(name, altName, print, parse) parse
359 #include "Utils/AMDKernelCodeTInfo.h"
360 #undef RECORD
361 };
362 return ArrayRef(Table);
363 }
364
printAmdKernelCodeField(const AMDGPUMCKernelCodeT & C,int FldIndex,raw_ostream & OS,MCContext & Ctx,AMDGPUMCKernelCodeT::PrintHelper Helper)365 static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex,
366 raw_ostream &OS, MCContext &Ctx,
367 AMDGPUMCKernelCodeT::PrintHelper Helper) {
368 auto Printer = getPrinterTable(Helper)[FldIndex];
369 if (Printer)
370 Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx, Helper);
371 }
372
initDefault(const MCSubtargetInfo * STI,MCContext & Ctx,bool InitMCExpr)373 void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI,
374 MCContext &Ctx, bool InitMCExpr) {
375 AMDGPUMCKernelCodeT();
376
377 AMDGPU::initDefaultAMDKernelCodeT(*this, STI);
378
379 if (InitMCExpr) {
380 const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
381 compute_pgm_resource1_registers =
382 MCConstantExpr::create(Lo_32(compute_pgm_resource_registers), Ctx);
383 compute_pgm_resource2_registers =
384 MCConstantExpr::create(Hi_32(compute_pgm_resource_registers), Ctx);
385 is_dynamic_callstack = ZeroExpr;
386 wavefront_sgpr_count = ZeroExpr;
387 workitem_vgpr_count = ZeroExpr;
388 workitem_private_segment_byte_size = ZeroExpr;
389 }
390 }
391
validate(const MCSubtargetInfo * STI,MCContext & Ctx)392 void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
393 int64_t Value;
394 if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value))
395 return;
396
397 if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX12Plus(*STI)) {
398 Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX12+");
399 return;
400 }
401
402 if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX12Plus(*STI)) {
403 Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX12+");
404 return;
405 }
406
407 if (G_00B848_WGP_MODE(Value) && !AMDGPU::isGFX10Plus(*STI)) {
408 Ctx.reportError({}, "enable_wgp_mode=1 is only allowed on GFX10+");
409 return;
410 }
411
412 if (G_00B848_MEM_ORDERED(Value) && !AMDGPU::isGFX10Plus(*STI)) {
413 Ctx.reportError({}, "enable_mem_ordered=1 is only allowed on GFX10+");
414 return;
415 }
416
417 if (G_00B848_FWD_PROGRESS(Value) && !AMDGPU::isGFX10Plus(*STI)) {
418 Ctx.reportError({}, "enable_fwd_progress=1 is only allowed on GFX10+");
419 return;
420 }
421 }
422
getMCExprForIndex(int Index)423 const MCExpr *&AMDGPUMCKernelCodeT::getMCExprForIndex(int Index) {
424 static const auto IndexTable = getMCExprIndexTable();
425 return IndexTable[Index](*this);
426 }
427
ParseKernelCodeT(StringRef ID,MCAsmParser & MCParser,raw_ostream & Err)428 bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
429 raw_ostream &Err) {
430 const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
431 if (Idx < 0) {
432 Err << "unexpected amd_kernel_code_t field name " << ID;
433 return false;
434 }
435
436 if (hasMCExprVersionTable()[Idx]) {
437 const MCExpr *Value;
438 if (!parseExpr(MCParser, Value, Err))
439 return false;
440 getMCExprForIndex(Idx) = Value;
441 return true;
442 }
443 auto Parser = getParserTable()[Idx];
444 return Parser && Parser(*this, MCParser, Err);
445 }
446
EmitKernelCodeT(raw_ostream & OS,MCContext & Ctx,PrintHelper Helper)447 void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx,
448 PrintHelper Helper) {
449 const int Size = hasMCExprVersionTable().size();
450 for (int i = 0; i < Size; ++i) {
451 OS << "\t\t";
452 if (hasMCExprVersionTable()[i]) {
453 OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = ";
454 const MCExpr *Value = getMCExprForIndex(i);
455 Helper(Value, OS, Ctx.getAsmInfo());
456 } else {
457 printAmdKernelCodeField(*this, i, OS, Ctx, Helper);
458 }
459 OS << '\n';
460 }
461 }
462
EmitKernelCodeT(MCStreamer & OS,MCContext & Ctx)463 void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
464 OS.emitIntValue(amd_kernel_code_version_major, /*Size=*/4);
465 OS.emitIntValue(amd_kernel_code_version_minor, /*Size=*/4);
466 OS.emitIntValue(amd_machine_kind, /*Size=*/2);
467 OS.emitIntValue(amd_machine_version_major, /*Size=*/2);
468 OS.emitIntValue(amd_machine_version_minor, /*Size=*/2);
469 OS.emitIntValue(amd_machine_version_stepping, /*Size=*/2);
470 OS.emitIntValue(kernel_code_entry_byte_offset, /*Size=*/8);
471 OS.emitIntValue(kernel_code_prefetch_byte_offset, /*Size=*/8);
472 OS.emitIntValue(kernel_code_prefetch_byte_size, /*Size=*/8);
473 OS.emitIntValue(reserved0, /*Size=*/8);
474
475 if (compute_pgm_resource1_registers != nullptr)
476 OS.emitValue(compute_pgm_resource1_registers, /*Size=*/4);
477 else
478 OS.emitIntValue(Lo_32(compute_pgm_resource_registers),
479 /*Size=*/4);
480
481 if (compute_pgm_resource2_registers != nullptr)
482 OS.emitValue(compute_pgm_resource2_registers, /*Size=*/4);
483 else
484 OS.emitIntValue(Hi_32(compute_pgm_resource_registers),
485 /*Size=*/4);
486
487 if (is_dynamic_callstack != nullptr) {
488 const MCExpr *CodeProps = MCConstantExpr::create(code_properties, Ctx);
489 CodeProps = MCBinaryExpr::createOr(
490 CodeProps,
491 maskShiftSet(is_dynamic_callstack,
492 (1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1,
493 AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx),
494 Ctx);
495 OS.emitValue(CodeProps, /*Size=*/4);
496 } else
497 OS.emitIntValue(code_properties, /*Size=*/4);
498
499 if (workitem_private_segment_byte_size != nullptr)
500 OS.emitValue(workitem_private_segment_byte_size, /*Size=*/4);
501 else
502 OS.emitIntValue(0, /*Size=*/4);
503
504 OS.emitIntValue(workgroup_group_segment_byte_size, /*Size=*/4);
505 OS.emitIntValue(gds_segment_byte_size, /*Size=*/4);
506 OS.emitIntValue(kernarg_segment_byte_size, /*Size=*/8);
507 OS.emitIntValue(workgroup_fbarrier_count, /*Size=*/4);
508
509 if (wavefront_sgpr_count != nullptr)
510 OS.emitValue(wavefront_sgpr_count, /*Size=*/2);
511 else
512 OS.emitIntValue(0, /*Size=*/2);
513
514 if (workitem_vgpr_count != nullptr)
515 OS.emitValue(workitem_vgpr_count, /*Size=*/2);
516 else
517 OS.emitIntValue(0, /*Size=*/2);
518
519 OS.emitIntValue(reserved_vgpr_first, /*Size=*/2);
520 OS.emitIntValue(reserved_vgpr_count, /*Size=*/2);
521 OS.emitIntValue(reserved_sgpr_first, /*Size=*/2);
522 OS.emitIntValue(reserved_sgpr_count, /*Size=*/2);
523 OS.emitIntValue(debug_wavefront_private_segment_offset_sgpr,
524 /*Size=*/2);
525 OS.emitIntValue(debug_private_segment_buffer_sgpr, /*Size=*/2);
526 OS.emitIntValue(kernarg_segment_alignment, /*Size=*/1);
527 OS.emitIntValue(group_segment_alignment, /*Size=*/1);
528 OS.emitIntValue(private_segment_alignment, /*Size=*/1);
529 OS.emitIntValue(wavefront_size, /*Size=*/1);
530
531 OS.emitIntValue(call_convention, /*Size=*/4);
532 OS.emitBytes(StringRef((const char *)reserved3, /*Size=*/12));
533 OS.emitIntValue(runtime_loader_kernel_symbol, /*Size=*/8);
534 OS.emitBytes(StringRef((const char *)control_directives, /*Size=*/16 * 8));
535 }
536