xref: /freebsd/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDHSA kernel descriptor definitions. For more information, visit
11 /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
12 ///
13 /// \warning
14 /// Any changes to this file should also be audited for corresponding changes
15 /// needed in both the assembler and disassembler, namely:
16 /// * AMDGPUAsmPrinter.{cpp,h}
17 /// * AMDGPUTargetStreamer.{cpp,h}
18 /// * AMDGPUDisassembler.{cpp,h}
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
23 #define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
24 
25 #include <cstddef>
26 #include <cstdint>
27 
28 // Gets offset of specified member in specified type.
29 #ifndef offsetof
30 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER)
31 #endif // offsetof
32 
33 // Creates enumeration entries used for packing bits into integers. Enumeration
34 // entries include bit shift amount, bit width, and bit mask.
35 #ifndef AMDHSA_BITS_ENUM_ENTRY
36 #define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \
37   NAME ## _SHIFT = (SHIFT),                        \
38   NAME ## _WIDTH = (WIDTH),                        \
39   NAME = (((1 << (WIDTH)) - 1) << (SHIFT))
40 #endif // AMDHSA_BITS_ENUM_ENTRY
41 
42 // Gets bits for specified bit mask from specified source.
43 #ifndef AMDHSA_BITS_GET
44 #define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT)
45 #endif // AMDHSA_BITS_GET
46 
47 // Sets bits for specified bit mask in specified destination.
48 #ifndef AMDHSA_BITS_SET
49 #define AMDHSA_BITS_SET(DST, MSK, VAL)                                         \
50   do {                                                                         \
51     auto local = VAL;                                                          \
52     DST &= ~MSK;                                                               \
53     DST |= ((local << MSK##_SHIFT) & MSK);                                     \
54   } while (0)
55 #endif // AMDHSA_BITS_SET
56 
57 namespace llvm {
58 namespace amdhsa {
59 
60 // Floating point rounding modes. Must match hardware definition.
61 enum : uint8_t {
62   FLOAT_ROUND_MODE_NEAR_EVEN = 0,
63   FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
64   FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
65   FLOAT_ROUND_MODE_ZERO = 3,
66 };
67 
68 // Floating point denorm modes. Must match hardware definition.
69 enum : uint8_t {
70   FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
71   FLOAT_DENORM_MODE_FLUSH_DST = 1,
72   FLOAT_DENORM_MODE_FLUSH_SRC = 2,
73   FLOAT_DENORM_MODE_FLUSH_NONE = 3,
74 };
75 
76 // System VGPR workitem IDs. Must match hardware definition.
77 enum : uint8_t {
78   SYSTEM_VGPR_WORKITEM_ID_X = 0,
79   SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
80   SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
81   SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
82 };
83 
84 // Compute program resource register 1. Must match hardware definition.
85 // GFX6+.
86 #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
87   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
88 // [GFX6-GFX8].
89 #define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \
90   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH)
91 // [GFX6-GFX9].
92 #define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
93   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
94 // [GFX6-GFX11].
95 #define COMPUTE_PGM_RSRC1_GFX6_GFX11(NAME, SHIFT, WIDTH)                       \
96   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX11_##NAME, SHIFT, WIDTH)
97 // GFX9+.
98 #define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
99   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
100 // GFX10+.
101 #define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
102   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
103 // GFX12+.
104 #define COMPUTE_PGM_RSRC1_GFX12_PLUS(NAME, SHIFT, WIDTH)                       \
105   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX12_PLUS_##NAME, SHIFT, WIDTH)
106 enum : int32_t {
107   COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
108   COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
109   COMPUTE_PGM_RSRC1(PRIORITY, 10, 2),
110   COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2),
111   COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2),
112   COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
113   COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
114   COMPUTE_PGM_RSRC1(PRIV, 20, 1),
115   COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_DX10_CLAMP, 21, 1),
116   COMPUTE_PGM_RSRC1_GFX12_PLUS(ENABLE_WG_RR_EN, 21, 1),
117   COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
118   COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_IEEE_MODE, 23, 1),
119   COMPUTE_PGM_RSRC1_GFX12_PLUS(DISABLE_PERF, 23, 1),
120   COMPUTE_PGM_RSRC1(BULKY, 24, 1),
121   COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
122   COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
123   COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
124   COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
125   COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
126   COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
127   COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
128   COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
129 };
130 #undef COMPUTE_PGM_RSRC1
131 
132 // Compute program resource register 2. Must match hardware definition.
133 // GFX6+.
134 #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
135   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
136 // [GFX6-GFX11].
137 #define COMPUTE_PGM_RSRC2_GFX6_GFX11(NAME, SHIFT, WIDTH)                       \
138   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX11_##NAME, SHIFT, WIDTH)
139 // GFX12+.
140 #define COMPUTE_PGM_RSRC2_GFX12_PLUS(NAME, SHIFT, WIDTH)                       \
141   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX12_PLUS_##NAME, SHIFT, WIDTH)
142 enum : int32_t {
143   COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
144   COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
145   COMPUTE_PGM_RSRC2_GFX6_GFX11(ENABLE_TRAP_HANDLER, 6, 1),
146   COMPUTE_PGM_RSRC2_GFX12_PLUS(RESERVED1, 6, 1),
147   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
148   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
149   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
150   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
151   COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2),
152   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
153   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1),
154   COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9),
155   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
156   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
157   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
158   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
159   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
160   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
161   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
162   COMPUTE_PGM_RSRC2(RESERVED0, 31, 1),
163 };
164 #undef COMPUTE_PGM_RSRC2
165 
166 // Compute program resource register 3 for GFX90A+. Must match hardware
167 // definition.
168 #define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \
169   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH)
170 enum : int32_t {
171   COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6),
172   COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10),
173   COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1),
174   COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15),
175 };
176 #undef COMPUTE_PGM_RSRC3_GFX90A
177 
178 // Compute program resource register 3 for GFX10+. Must match hardware
179 // definition.
180 // GFX10+.
181 #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
182   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
183 // [GFX10].
184 #define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH)                            \
185   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_##NAME, SHIFT, WIDTH)
186 // [GFX10-GFX11].
187 #define COMPUTE_PGM_RSRC3_GFX10_GFX11(NAME, SHIFT, WIDTH)                      \
188   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_GFX11_##NAME, SHIFT, WIDTH)
189 // GFX11+.
190 #define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
191   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
192 // [GFX11].
193 #define COMPUTE_PGM_RSRC3_GFX11(NAME, SHIFT, WIDTH)                            \
194   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_##NAME, SHIFT, WIDTH)
195 // GFX12+.
196 #define COMPUTE_PGM_RSRC3_GFX12_PLUS(NAME, SHIFT, WIDTH)                       \
197   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX12_PLUS_##NAME, SHIFT, WIDTH)
198 enum : int32_t {
199   COMPUTE_PGM_RSRC3_GFX10_GFX11(SHARED_VGPR_COUNT, 0, 4),
200   COMPUTE_PGM_RSRC3_GFX12_PLUS(RESERVED0, 0, 4),
201   COMPUTE_PGM_RSRC3_GFX10(RESERVED1, 4, 8),
202   COMPUTE_PGM_RSRC3_GFX11(INST_PREF_SIZE, 4, 6),
203   COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_START, 10, 1),
204   COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_END, 11, 1),
205   COMPUTE_PGM_RSRC3_GFX12_PLUS(INST_PREF_SIZE, 4, 8),
206   COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED2, 12, 1),
207   COMPUTE_PGM_RSRC3_GFX10_GFX11(RESERVED3, 13, 1),
208   COMPUTE_PGM_RSRC3_GFX12_PLUS(GLG_EN, 13, 1),
209   COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED4, 14, 17),
210   COMPUTE_PGM_RSRC3_GFX10(RESERVED5, 31, 1),
211   COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
212 };
213 #undef COMPUTE_PGM_RSRC3_GFX10_PLUS
214 
215 // Kernel code properties. Must be kept backwards compatible.
216 #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
217   AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
218 enum : int32_t {
219   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
220   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1),
221   KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1),
222   KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
223   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
224   KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
225   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
226   KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
227   KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
228   KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
229   KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
230 };
231 #undef KERNEL_CODE_PROPERTY
232 
233 // Kernarg preload specification.
234 #define KERNARG_PRELOAD_SPEC(NAME, SHIFT, WIDTH)                               \
235   AMDHSA_BITS_ENUM_ENTRY(KERNARG_PRELOAD_SPEC_##NAME, SHIFT, WIDTH)
236 enum : int32_t {
237   KERNARG_PRELOAD_SPEC(LENGTH, 0, 7),
238   KERNARG_PRELOAD_SPEC(OFFSET, 7, 9),
239 };
240 #undef KERNARG_PRELOAD_SPEC
241 
242 // Kernel descriptor. Must be kept backwards compatible.
243 struct kernel_descriptor_t {
244   uint32_t group_segment_fixed_size;
245   uint32_t private_segment_fixed_size;
246   uint32_t kernarg_size;
247   uint8_t reserved0[4];
248   int64_t kernel_code_entry_byte_offset;
249   uint8_t reserved1[20];
250   uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
251   uint32_t compute_pgm_rsrc1;
252   uint32_t compute_pgm_rsrc2;
253   uint16_t kernel_code_properties;
254   uint16_t kernarg_preload;
255   uint8_t reserved3[4];
256 };
257 
258 enum : uint32_t {
259   GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
260   PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
261   KERNARG_SIZE_OFFSET = 8,
262   RESERVED0_OFFSET = 12,
263   KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
264   RESERVED1_OFFSET = 24,
265   COMPUTE_PGM_RSRC3_OFFSET = 44,
266   COMPUTE_PGM_RSRC1_OFFSET = 48,
267   COMPUTE_PGM_RSRC2_OFFSET = 52,
268   KERNEL_CODE_PROPERTIES_OFFSET = 56,
269   KERNARG_PRELOAD_OFFSET = 58,
270   RESERVED3_OFFSET = 60
271 };
272 
273 static_assert(
274     sizeof(kernel_descriptor_t) == 64,
275     "invalid size for kernel_descriptor_t");
276 static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
277                   GROUP_SEGMENT_FIXED_SIZE_OFFSET,
278               "invalid offset for group_segment_fixed_size");
279 static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
280                   PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
281               "invalid offset for private_segment_fixed_size");
282 static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
283                   KERNARG_SIZE_OFFSET,
284               "invalid offset for kernarg_size");
285 static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
286               "invalid offset for reserved0");
287 static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
288                   KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
289               "invalid offset for kernel_code_entry_byte_offset");
290 static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
291               "invalid offset for reserved1");
292 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
293                   COMPUTE_PGM_RSRC3_OFFSET,
294               "invalid offset for compute_pgm_rsrc3");
295 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
296                   COMPUTE_PGM_RSRC1_OFFSET,
297               "invalid offset for compute_pgm_rsrc1");
298 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
299                   COMPUTE_PGM_RSRC2_OFFSET,
300               "invalid offset for compute_pgm_rsrc2");
301 static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
302                   KERNEL_CODE_PROPERTIES_OFFSET,
303               "invalid offset for kernel_code_properties");
304 static_assert(offsetof(kernel_descriptor_t, kernarg_preload) ==
305                   KERNARG_PRELOAD_OFFSET,
306               "invalid offset for kernarg_preload");
307 static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET,
308               "invalid offset for reserved3");
309 
310 } // end namespace amdhsa
311 } // end namespace llvm
312 
313 #endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
314