xref: /linux/drivers/accel/amdxdna/aie2_msg_priv.h (revision 53597deca0e38c30e6cd4ba2114fa42d2bcd85bb)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
4  */
5 
6 #ifndef _AIE2_MSG_PRIV_H_
7 #define _AIE2_MSG_PRIV_H_
8 
9 enum aie2_msg_opcode {
10 	MSG_OP_CREATE_CONTEXT              = 0x2,
11 	MSG_OP_DESTROY_CONTEXT             = 0x3,
12 	MSG_OP_GET_TELEMETRY               = 0x4,
13 	MSG_OP_SYNC_BO                     = 0x7,
14 	MSG_OP_EXECUTE_BUFFER_CF           = 0xC,
15 	MSG_OP_QUERY_COL_STATUS            = 0xD,
16 	MSG_OP_QUERY_AIE_TILE_INFO         = 0xE,
17 	MSG_OP_QUERY_AIE_VERSION           = 0xF,
18 	MSG_OP_EXEC_DPU                    = 0x10,
19 	MSG_OP_CONFIG_CU                   = 0x11,
20 	MSG_OP_CHAIN_EXEC_BUFFER_CF        = 0x12,
21 	MSG_OP_CHAIN_EXEC_DPU              = 0x13,
22 	MSG_OP_CONFIG_DEBUG_BO             = 0x14,
23 	MSG_OP_CHAIN_EXEC_NPU              = 0x18,
24 	MSG_OP_MAX_XRT_OPCODE,
25 	MSG_OP_SUSPEND                     = 0x101,
26 	MSG_OP_RESUME                      = 0x102,
27 	MSG_OP_ASSIGN_MGMT_PASID           = 0x103,
28 	MSG_OP_INVOKE_SELF_TEST            = 0x104,
29 	MSG_OP_MAP_HOST_BUFFER             = 0x106,
30 	MSG_OP_GET_FIRMWARE_VERSION        = 0x108,
31 	MSG_OP_SET_RUNTIME_CONFIG          = 0x10A,
32 	MSG_OP_GET_RUNTIME_CONFIG          = 0x10B,
33 	MSG_OP_REGISTER_ASYNC_EVENT_MSG    = 0x10C,
34 	MSG_OP_GET_APP_HEALTH              = 0x114,
35 	MSG_OP_MAX_DRV_OPCODE,
36 	MSG_OP_GET_PROTOCOL_VERSION        = 0x301,
37 	MSG_OP_MAX_OPCODE
38 };
39 
40 enum aie2_msg_status {
41 	AIE2_STATUS_SUCCESS				= 0x0,
42 	/* AIE Error codes */
43 	AIE2_STATUS_AIE_SATURATION_ERROR		= 0x1000001,
44 	AIE2_STATUS_AIE_FP_ERROR			= 0x1000002,
45 	AIE2_STATUS_AIE_STREAM_ERROR			= 0x1000003,
46 	AIE2_STATUS_AIE_ACCESS_ERROR			= 0x1000004,
47 	AIE2_STATUS_AIE_BUS_ERROR			= 0x1000005,
48 	AIE2_STATUS_AIE_INSTRUCTION_ERROR		= 0x1000006,
49 	AIE2_STATUS_AIE_ECC_ERROR			= 0x1000007,
50 	AIE2_STATUS_AIE_LOCK_ERROR			= 0x1000008,
51 	AIE2_STATUS_AIE_DMA_ERROR			= 0x1000009,
52 	AIE2_STATUS_AIE_MEM_PARITY_ERROR		= 0x100000a,
53 	AIE2_STATUS_AIE_PWR_CFG_ERROR			= 0x100000b,
54 	AIE2_STATUS_AIE_BACKTRACK_ERROR			= 0x100000c,
55 	AIE2_STATUS_MAX_AIE_STATUS_CODE,
56 	/* MGMT ERT Error codes */
57 	AIE2_STATUS_MGMT_ERT_SELF_TEST_FAILURE		= 0x2000001,
58 	AIE2_STATUS_MGMT_ERT_HASH_MISMATCH,
59 	AIE2_STATUS_MGMT_ERT_NOAVAIL,
60 	AIE2_STATUS_MGMT_ERT_INVALID_PARAM,
61 	AIE2_STATUS_MGMT_ERT_ENTER_SUSPEND_FAILURE,
62 	AIE2_STATUS_MGMT_ERT_BUSY,
63 	AIE2_STATUS_MGMT_ERT_APPLICATION_ACTIVE,
64 	MAX_MGMT_ERT_STATUS_CODE,
65 	/* APP ERT Error codes */
66 	AIE2_STATUS_APP_ERT_FIRST_ERROR			= 0x3000001,
67 	AIE2_STATUS_APP_INVALID_INSTR,
68 	AIE2_STATUS_APP_LOAD_PDI_FAIL,
69 	MAX_APP_ERT_STATUS_CODE,
70 	/* NPU RTOS Error Codes */
71 	AIE2_STATUS_INVALID_INPUT_BUFFER		= 0x4000001,
72 	AIE2_STATUS_INVALID_COMMAND,
73 	AIE2_STATUS_INVALID_PARAM,
74 	AIE2_STATUS_INVALID_OPERATION			= 0x4000006,
75 	AIE2_STATUS_ASYNC_EVENT_MSGS_FULL,
76 	AIE2_STATUS_MAX_RTOS_STATUS_CODE,
77 	MAX_AIE2_STATUS_CODE
78 };
79 
80 struct assign_mgmt_pasid_req {
81 	__u16	pasid;
82 	__u16	reserved;
83 } __packed;
84 
85 struct assign_mgmt_pasid_resp {
86 	enum aie2_msg_status	status;
87 } __packed;
88 
89 struct map_host_buffer_req {
90 	__u32		context_id;
91 	__u64		buf_addr;
92 	__u64		buf_size;
93 } __packed;
94 
95 struct map_host_buffer_resp {
96 	enum aie2_msg_status	status;
97 } __packed;
98 
99 #define MAX_CQ_PAIRS		2
100 struct cq_info {
101 	__u32 head_addr;
102 	__u32 tail_addr;
103 	__u32 buf_addr;
104 	__u32 buf_size;
105 };
106 
107 struct cq_pair {
108 	struct cq_info x2i_q;
109 	struct cq_info i2x_q;
110 };
111 
112 #define PRIORITY_REALTIME	1
113 #define PRIORITY_HIGH		2
114 #define PRIORITY_NORMAL		3
115 #define PRIORITY_LOW		4
116 
117 struct create_ctx_req {
118 	__u32	aie_type;
119 	__u8	start_col;
120 	__u8	num_col;
121 	__u8    num_unused_col;
122 	__u8	reserved;
123 	__u8	num_cq_pairs_requested;
124 	__u8	reserved1;
125 	__u16	pasid;
126 	__u32	pad[2];
127 	__u32	sec_comm_target_type;
128 	__u32	context_priority;
129 } __packed;
130 
131 struct create_ctx_resp {
132 	enum aie2_msg_status	status;
133 	__u32			context_id;
134 	__u16			msix_id;
135 	__u8			num_cq_pairs_allocated;
136 	__u8			reserved;
137 	struct cq_pair		cq_pair[MAX_CQ_PAIRS];
138 } __packed;
139 
140 struct destroy_ctx_req {
141 	__u32	context_id;
142 } __packed;
143 
144 struct destroy_ctx_resp {
145 	enum aie2_msg_status	status;
146 } __packed;
147 
148 enum telemetry_type {
149 	TELEMETRY_TYPE_DISABLED,
150 	TELEMETRY_TYPE_HEALTH,
151 	TELEMETRY_TYPE_ERROR_INFO,
152 	TELEMETRY_TYPE_PROFILING,
153 	TELEMETRY_TYPE_DEBUG,
154 	MAX_TELEMETRY_TYPE
155 };
156 
157 struct get_telemetry_req {
158 	enum telemetry_type	type;
159 	__u64	buf_addr;
160 	__u32	buf_size;
161 } __packed;
162 
163 struct get_telemetry_resp {
164 	__u32	major;
165 	__u32	minor;
166 	__u32	size;
167 	enum aie2_msg_status	status;
168 } __packed;
169 
170 struct execute_buffer_req {
171 	__u32	cu_idx;
172 	__u32	payload[19];
173 } __packed;
174 
175 struct exec_dpu_req {
176 	__u64	inst_buf_addr;
177 	__u32	inst_size;
178 	__u32	inst_prop_cnt;
179 	__u32	cu_idx;
180 	__u32	payload[35];
181 } __packed;
182 
183 enum exec_npu_type {
184 	EXEC_NPU_TYPE_NON_ELF		= 0x1,
185 	EXEC_NPU_TYPE_PARTIAL_ELF	= 0x2,
186 	EXEC_NPU_TYPE_PREEMPT		= 0x3,
187 	EXEC_NPU_TYPE_ELF		= 0x4,
188 };
189 
190 union exec_req {
191 	struct execute_buffer_req ebuf;
192 	struct exec_dpu_req dpu_req;
193 };
194 
195 struct execute_buffer_resp {
196 	enum aie2_msg_status	status;
197 } __packed;
198 
199 struct aie_tile_info {
200 	__u32		size;
201 	__u16		major;
202 	__u16		minor;
203 	__u16		cols;
204 	__u16		rows;
205 	__u16		core_rows;
206 	__u16		mem_rows;
207 	__u16		shim_rows;
208 	__u16		core_row_start;
209 	__u16		mem_row_start;
210 	__u16		shim_row_start;
211 	__u16		core_dma_channels;
212 	__u16		mem_dma_channels;
213 	__u16		shim_dma_channels;
214 	__u16		core_locks;
215 	__u16		mem_locks;
216 	__u16		shim_locks;
217 	__u16		core_events;
218 	__u16		mem_events;
219 	__u16		shim_events;
220 	__u16		reserved;
221 };
222 
223 struct aie_tile_info_req {
224 	__u32	reserved;
225 } __packed;
226 
227 struct aie_tile_info_resp {
228 	enum aie2_msg_status	status;
229 	struct aie_tile_info	info;
230 } __packed;
231 
232 struct aie_version_info_req {
233 	__u32		reserved;
234 } __packed;
235 
236 struct aie_version_info_resp {
237 	enum aie2_msg_status	status;
238 	__u16			major;
239 	__u16			minor;
240 } __packed;
241 
242 struct aie_column_info_req {
243 	__u64 dump_buff_addr;
244 	__u32 dump_buff_size;
245 	__u32 num_cols;
246 	__u32 aie_bitmap;
247 } __packed;
248 
249 struct aie_column_info_resp {
250 	enum aie2_msg_status	status;
251 	__u32 size;
252 } __packed;
253 
254 struct suspend_req {
255 	__u32		place_holder;
256 } __packed;
257 
258 struct suspend_resp {
259 	enum aie2_msg_status	status;
260 } __packed;
261 
262 struct resume_req {
263 	__u32		place_holder;
264 } __packed;
265 
266 struct resume_resp {
267 	enum aie2_msg_status	status;
268 } __packed;
269 
270 struct check_header_hash_req {
271 	__u64		hash_high;
272 	__u64		hash_low;
273 } __packed;
274 
275 struct check_header_hash_resp {
276 	enum aie2_msg_status	status;
277 } __packed;
278 
279 struct query_error_req {
280 	__u64		buf_addr;
281 	__u32		buf_size;
282 	__u32		next_row;
283 	__u32		next_column;
284 	__u32		next_module;
285 } __packed;
286 
287 struct query_error_resp {
288 	enum aie2_msg_status	status;
289 	__u32			num_err;
290 	__u32			has_next_err;
291 	__u32			next_row;
292 	__u32			next_column;
293 	__u32			next_module;
294 } __packed;
295 
296 struct protocol_version_req {
297 	__u32		reserved;
298 } __packed;
299 
300 struct protocol_version_resp {
301 	enum aie2_msg_status	status;
302 	__u32			major;
303 	__u32			minor;
304 } __packed;
305 
306 struct firmware_version_req {
307 	__u32		reserved;
308 } __packed;
309 
310 struct firmware_version_resp {
311 	enum aie2_msg_status	status;
312 	__u32			major;
313 	__u32			minor;
314 	__u32			sub;
315 	__u32			build;
316 } __packed;
317 
318 #define MAX_NUM_CUS			32
319 #define AIE2_MSG_CFG_CU_PDI_ADDR	GENMASK(16, 0)
320 #define AIE2_MSG_CFG_CU_FUNC		GENMASK(24, 17)
321 struct config_cu_req {
322 	__u32	num_cus;
323 	__u32	cfgs[MAX_NUM_CUS];
324 } __packed;
325 
326 struct config_cu_resp {
327 	enum aie2_msg_status	status;
328 } __packed;
329 
330 struct set_runtime_cfg_req {
331 	__u32	type;
332 	__u64	value;
333 } __packed;
334 
335 struct set_runtime_cfg_resp {
336 	enum aie2_msg_status	status;
337 } __packed;
338 
339 struct get_runtime_cfg_req {
340 	__u32	type;
341 } __packed;
342 
343 struct get_runtime_cfg_resp {
344 	enum aie2_msg_status	status;
345 	__u64			value;
346 } __packed;
347 
348 enum async_event_type {
349 	ASYNC_EVENT_TYPE_AIE_ERROR,
350 	ASYNC_EVENT_TYPE_EXCEPTION,
351 	MAX_ASYNC_EVENT_TYPE
352 };
353 
354 #define ASYNC_BUF_SIZE SZ_8K
355 struct async_event_msg_req {
356 	__u64 buf_addr;
357 	__u32 buf_size;
358 } __packed;
359 
360 struct async_event_msg_resp {
361 	enum aie2_msg_status	status;
362 	enum async_event_type	type;
363 } __packed;
364 
365 #define MAX_CHAIN_CMDBUF_SIZE SZ_4K
366 
367 struct cmd_chain_slot_execbuf_cf {
368 	__u32 cu_idx;
369 	__u32 arg_cnt;
370 	__u32 args[] __counted_by(arg_cnt);
371 };
372 
373 struct cmd_chain_slot_dpu {
374 	__u64 inst_buf_addr;
375 	__u32 inst_size;
376 	__u32 inst_prop_cnt;
377 	__u32 cu_idx;
378 	__u32 arg_cnt;
379 #define MAX_DPU_ARGS_SIZE (34 * sizeof(__u32))
380 	__u32 args[] __counted_by(arg_cnt);
381 };
382 
383 #define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
384 #define AIE2_EXEC_BUFFER_KERNEL_OP_TXN	3
385 struct cmd_chain_slot_npu {
386 	enum exec_npu_type type;
387 	u64 inst_buf_addr;
388 	u64 save_buf_addr;
389 	u64 restore_buf_addr;
390 	u32 inst_size;
391 	u32 save_size;
392 	u32 restore_size;
393 	u32 inst_prop_cnt;
394 	u32 cu_idx;
395 	u32 arg_cnt;
396 	u32 args[] __counted_by(arg_cnt);
397 } __packed;
398 
399 struct cmd_chain_req {
400 	__u64 buf_addr;
401 	__u32 buf_size;
402 	__u32 count;
403 } __packed;
404 
405 struct cmd_chain_npu_req {
406 	u32 flags;
407 	u32 reserved;
408 	u64 buf_addr;
409 	u32 buf_size;
410 	u32 count;
411 } __packed;
412 
413 union exec_chain_req {
414 	struct cmd_chain_npu_req npu_req;
415 	struct cmd_chain_req req;
416 };
417 
418 struct cmd_chain_resp {
419 	enum aie2_msg_status	status;
420 	__u32			fail_cmd_idx;
421 	enum aie2_msg_status	fail_cmd_status;
422 } __packed;
423 
424 #define AIE2_MSG_SYNC_BO_SRC_TYPE	GENMASK(3, 0)
425 #define AIE2_MSG_SYNC_BO_DST_TYPE	GENMASK(7, 4)
426 struct sync_bo_req {
427 	__u64 src_addr;
428 	__u64 dst_addr;
429 	__u32 size;
430 #define SYNC_BO_DEV_MEM  0
431 #define SYNC_BO_HOST_MEM 2
432 	__u32 type;
433 } __packed;
434 
435 struct sync_bo_resp {
436 	enum aie2_msg_status	status;
437 } __packed;
438 
439 #define DEBUG_BO_UNREGISTER 0
440 #define DEBUG_BO_REGISTER   1
441 struct config_debug_bo_req {
442 	__u64	offset;
443 	__u64	size;
444 	/*
445 	 * config operations.
446 	 *   DEBUG_BO_REGISTER: Register debug buffer
447 	 *   DEBUG_BO_UNREGISTER: Unregister debug buffer
448 	 */
449 	__u32	config;
450 } __packed;
451 
452 struct config_debug_bo_resp {
453 	enum aie2_msg_status	status;
454 } __packed;
455 
456 struct fatal_error_info {
457 	__u32 fatal_type;         /* Fatal error type */
458 	__u32 exception_type;     /* Only valid if fatal_type is a specific value */
459 	__u32 exception_argument; /* Argument based on exception type */
460 	__u32 exception_pc;       /* Program Counter at the time of the exception */
461 	__u32 app_module;         /* Error module name */
462 	__u32 task_index;         /* Index of the task in which the error occurred */
463 	__u32 reserved[128];
464 };
465 
466 struct app_health_report {
467 	__u16 major;
468 	__u16 minor;
469 	__u32 size;
470 	__u32 context_id;
471 	/*
472 	 * Program Counter (PC) of the last initiated DPU opcode, as reported by the ERT
473 	 * application. Before execution begins or after successful completion, the value is set
474 	 * to UINT_MAX. If execution halts prematurely due to an error, this field retains the
475 	 * opcode's PC value.
476 	 * Note: To optimize performance, the ERT may simplify certain aspects of reporting.
477 	 * Proper interpretation requires familiarity with the implementation details.
478 	 */
479 	__u32 dpu_pc;
480 	/*
481 	 * Index of the last initiated TXN opcode.
482 	 * Before execution starts or after successful completion, the value is set to UINT_MAX.
483 	 * If execution halts prematurely due to an error, this field retains the opcode's ID.
484 	 * Note: To optimize performance, the ERT may simplify certain aspects of reporting.
485 	 * Proper interpretation requires familiarity with the implementation details.
486 	 */
487 	__u32 txn_op_id;
488 	/* The PC of the context at the time of the report */
489 	__u32 ctx_pc;
490 	struct fatal_error_info		fatal_info;
491 	/* Index of the most recently executed run list entry. */
492 	__u32 run_list_id;
493 };
494 
495 struct get_app_health_req {
496 	__u32 context_id;
497 	__u32 buf_size;
498 	__u64 buf_addr;
499 } __packed;
500 
501 struct get_app_health_resp {
502 	enum aie2_msg_status status;
503 	__u32 required_buffer_size;
504 	__u32 reserved[7];
505 } __packed;
506 #endif /* _AIE2_MSG_PRIV_H_ */
507