xref: /linux/drivers/accel/amdxdna/aie2_message.c (revision 509d3f45847627f4c5cdce004c3ec79262b5239c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
4  */
5 
6 #include <drm/amdxdna_accel.h>
7 #include <drm/drm_cache.h>
8 #include <drm/drm_device.h>
9 #include <drm/drm_gem.h>
10 #include <drm/drm_gem_shmem_helper.h>
11 #include <drm/drm_print.h>
12 #include <drm/gpu_scheduler.h>
13 #include <linux/bitfield.h>
14 #include <linux/errno.h>
15 #include <linux/pci.h>
16 #include <linux/types.h>
17 #include <linux/xarray.h>
18 
19 #include "aie2_msg_priv.h"
20 #include "aie2_pci.h"
21 #include "amdxdna_ctx.h"
22 #include "amdxdna_gem.h"
23 #include "amdxdna_mailbox.h"
24 #include "amdxdna_mailbox_helper.h"
25 #include "amdxdna_pci_drv.h"
26 
27 #define DECLARE_AIE2_MSG(name, op) \
28 	DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
29 
30 #define EXEC_MSG_OPS(xdna)	((xdna)->dev_handle->exec_msg_ops)
31 
32 static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
33 				   struct xdna_mailbox_msg *msg)
34 {
35 	struct amdxdna_dev *xdna = ndev->xdna;
36 	struct xdna_notify *hdl = msg->handle;
37 	int ret;
38 
39 	if (!ndev->mgmt_chann)
40 		return -ENODEV;
41 
42 	drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock));
43 	ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
44 	if (ret == -ETIME) {
45 		xdna_mailbox_stop_channel(ndev->mgmt_chann);
46 		xdna_mailbox_destroy_channel(ndev->mgmt_chann);
47 		ndev->mgmt_chann = NULL;
48 	}
49 
50 	if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
51 		XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
52 			 msg->opcode, *hdl->data);
53 		ret = -EINVAL;
54 	}
55 
56 	return ret;
57 }
58 
59 int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
60 {
61 	DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND);
62 
63 	return aie2_send_mgmt_msg_wait(ndev, &msg);
64 }
65 
66 int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
67 {
68 	DECLARE_AIE2_MSG(suspend, MSG_OP_RESUME);
69 
70 	return aie2_send_mgmt_msg_wait(ndev, &msg);
71 }
72 
73 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
74 {
75 	DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
76 	int ret;
77 
78 	req.type = type;
79 	req.value = value;
80 
81 	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
82 	if (ret) {
83 		XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
84 		return ret;
85 	}
86 
87 	return 0;
88 }
89 
90 int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
91 {
92 	DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG);
93 	int ret;
94 
95 	req.type = type;
96 	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
97 	if (ret) {
98 		XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret);
99 		return ret;
100 	}
101 
102 	*value = resp.value;
103 	return 0;
104 }
105 
106 int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid)
107 {
108 	DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID);
109 
110 	req.pasid = pasid;
111 
112 	return aie2_send_mgmt_msg_wait(ndev, &msg);
113 }
114 
115 int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version)
116 {
117 	DECLARE_AIE2_MSG(aie_version_info, MSG_OP_QUERY_AIE_VERSION);
118 	struct amdxdna_dev *xdna = ndev->xdna;
119 	int ret;
120 
121 	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
122 	if (ret)
123 		return ret;
124 
125 	XDNA_DBG(xdna, "Query AIE version - major: %u minor: %u completed",
126 		 resp.major, resp.minor);
127 
128 	version->major = resp.major;
129 	version->minor = resp.minor;
130 
131 	return 0;
132 }
133 
134 int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
135 {
136 	DECLARE_AIE2_MSG(aie_tile_info, MSG_OP_QUERY_AIE_TILE_INFO);
137 	int ret;
138 
139 	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
140 	if (ret)
141 		return ret;
142 
143 	metadata->size = resp.info.size;
144 	metadata->cols = resp.info.cols;
145 	metadata->rows = resp.info.rows;
146 
147 	metadata->version.major = resp.info.major;
148 	metadata->version.minor = resp.info.minor;
149 
150 	metadata->core.row_count = resp.info.core_rows;
151 	metadata->core.row_start = resp.info.core_row_start;
152 	metadata->core.dma_channel_count = resp.info.core_dma_channels;
153 	metadata->core.lock_count = resp.info.core_locks;
154 	metadata->core.event_reg_count = resp.info.core_events;
155 
156 	metadata->mem.row_count = resp.info.mem_rows;
157 	metadata->mem.row_start = resp.info.mem_row_start;
158 	metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
159 	metadata->mem.lock_count = resp.info.mem_locks;
160 	metadata->mem.event_reg_count = resp.info.mem_events;
161 
162 	metadata->shim.row_count = resp.info.shim_rows;
163 	metadata->shim.row_start = resp.info.shim_row_start;
164 	metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
165 	metadata->shim.lock_count = resp.info.shim_locks;
166 	metadata->shim.event_reg_count = resp.info.shim_events;
167 
168 	return 0;
169 }
170 
171 int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
172 				struct amdxdna_fw_ver *fw_ver)
173 {
174 	DECLARE_AIE2_MSG(firmware_version, MSG_OP_GET_FIRMWARE_VERSION);
175 	int ret;
176 
177 	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
178 	if (ret)
179 		return ret;
180 
181 	fw_ver->major = resp.major;
182 	fw_ver->minor = resp.minor;
183 	fw_ver->sub = resp.sub;
184 	fw_ver->build = resp.build;
185 
186 	return 0;
187 }
188 
189 int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
190 {
191 	DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
192 	struct amdxdna_dev *xdna = ndev->xdna;
193 	struct xdna_mailbox_chann_res x2i;
194 	struct xdna_mailbox_chann_res i2x;
195 	struct cq_pair *cq_pair;
196 	u32 intr_reg;
197 	int ret;
198 
199 	req.aie_type = 1;
200 	req.start_col = hwctx->start_col;
201 	req.num_col = hwctx->num_col;
202 	req.num_cq_pairs_requested = 1;
203 	req.pasid = hwctx->client->pasid;
204 	req.context_priority = 2;
205 
206 	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
207 	if (ret)
208 		return ret;
209 
210 	hwctx->fw_ctx_id = resp.context_id;
211 	WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
212 
213 	if (ndev->force_preempt_enabled) {
214 		ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id);
215 		if (ret) {
216 			XDNA_ERR(xdna, "failed to enable force preempt %d", ret);
217 			return ret;
218 		}
219 	}
220 
221 	cq_pair = &resp.cq_pair[0];
222 	x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
223 	x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
224 	x2i.rb_start_addr   = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr);
225 	x2i.rb_size	    = cq_pair->x2i_q.buf_size;
226 
227 	i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr);
228 	i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr);
229 	i2x.rb_start_addr   = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr);
230 	i2x.rb_size	    = cq_pair->i2x_q.buf_size;
231 
232 	ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
233 	if (ret == -EINVAL) {
234 		XDNA_ERR(xdna, "not able to create channel");
235 		goto out_destroy_context;
236 	}
237 
238 	intr_reg = i2x.mb_head_ptr_reg + 4;
239 	hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
240 							      intr_reg, ret);
241 	if (!hwctx->priv->mbox_chann) {
242 		XDNA_ERR(xdna, "not able to create channel");
243 		ret = -EINVAL;
244 		goto out_destroy_context;
245 	}
246 	ndev->hwctx_num++;
247 
248 	XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
249 		 hwctx->name, ret, resp.msix_id);
250 	XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name,
251 		 hwctx->fw_ctx_id, hwctx->client->pasid);
252 
253 	return 0;
254 
255 out_destroy_context:
256 	aie2_destroy_context(ndev, hwctx);
257 	return ret;
258 }
259 
260 int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
261 {
262 	DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
263 	struct amdxdna_dev *xdna = ndev->xdna;
264 	int ret;
265 
266 	if (hwctx->fw_ctx_id == -1)
267 		return 0;
268 
269 	xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
270 
271 	req.context_id = hwctx->fw_ctx_id;
272 	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
273 	if (ret)
274 		XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret);
275 
276 	xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
277 	XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name,
278 		 hwctx->fw_ctx_id);
279 	hwctx->priv->mbox_chann = NULL;
280 	hwctx->fw_ctx_id = -1;
281 	ndev->hwctx_num--;
282 
283 	return ret;
284 }
285 
286 int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size)
287 {
288 	DECLARE_AIE2_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER);
289 	struct amdxdna_dev *xdna = ndev->xdna;
290 	int ret;
291 
292 	req.context_id = context_id;
293 	req.buf_addr = addr;
294 	req.buf_size = size;
295 	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
296 	if (ret)
297 		return ret;
298 
299 	XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx",
300 		 context_id, addr, size);
301 
302 	return 0;
303 }
304 
305 static int amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx, void *arg)
306 {
307 	u32 *bitmap = arg;
308 
309 	*bitmap |= GENMASK(hwctx->start_col + hwctx->num_col - 1, hwctx->start_col);
310 
311 	return 0;
312 }
313 
314 int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
315 		      u32 size, u32 *cols_filled)
316 {
317 	DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS);
318 	struct amdxdna_dev *xdna = ndev->xdna;
319 	struct amdxdna_client *client;
320 	dma_addr_t dma_addr;
321 	u32 aie_bitmap = 0;
322 	u8 *buff_addr;
323 	int ret;
324 
325 	buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
326 					  DMA_FROM_DEVICE, GFP_KERNEL);
327 	if (!buff_addr)
328 		return -ENOMEM;
329 
330 	/* Go through each hardware context and mark the AIE columns that are active */
331 	list_for_each_entry(client, &xdna->client_list, node)
332 		amdxdna_hwctx_walk(client, &aie_bitmap, amdxdna_hwctx_col_map);
333 
334 	*cols_filled = 0;
335 	req.dump_buff_addr = dma_addr;
336 	req.dump_buff_size = size;
337 	req.num_cols = hweight32(aie_bitmap);
338 	req.aie_bitmap = aie_bitmap;
339 
340 	drm_clflush_virt_range(buff_addr, size); /* device can access */
341 	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
342 	if (ret) {
343 		XDNA_ERR(xdna, "Error during NPU query, status %d", ret);
344 		goto fail;
345 	}
346 
347 	XDNA_DBG(xdna, "Query NPU status completed");
348 
349 	if (size < resp.size) {
350 		ret = -EINVAL;
351 		XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
352 		goto fail;
353 	}
354 
355 	if (copy_to_user(buf, buff_addr, resp.size)) {
356 		ret = -EFAULT;
357 		XDNA_ERR(xdna, "Failed to copy NPU status to user space");
358 		goto fail;
359 	}
360 
361 	*cols_filled = aie_bitmap;
362 
363 fail:
364 	dma_free_noncoherent(xdna->ddev.dev, size, buff_addr, dma_addr, DMA_FROM_DEVICE);
365 	return ret;
366 }
367 
368 int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
369 			 char __user *buf, u32 size,
370 			 struct amdxdna_drm_query_telemetry_header *header)
371 {
372 	DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY);
373 	struct amdxdna_dev *xdna = ndev->xdna;
374 	dma_addr_t dma_addr;
375 	u8 *addr;
376 	int ret;
377 
378 	if (header->type >= MAX_TELEMETRY_TYPE)
379 		return -EINVAL;
380 
381 	addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
382 				     DMA_FROM_DEVICE, GFP_KERNEL);
383 	if (!addr)
384 		return -ENOMEM;
385 
386 	req.buf_addr = dma_addr;
387 	req.buf_size = size;
388 	req.type = header->type;
389 
390 	drm_clflush_virt_range(addr, size); /* device can access */
391 	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
392 	if (ret) {
393 		XDNA_ERR(xdna, "Query telemetry failed, status %d", ret);
394 		goto free_buf;
395 	}
396 
397 	if (size < resp.size) {
398 		ret = -EINVAL;
399 		XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
400 		goto free_buf;
401 	}
402 
403 	if (copy_to_user(buf, addr, resp.size)) {
404 		ret = -EFAULT;
405 		XDNA_ERR(xdna, "Failed to copy telemetry to user space");
406 		goto free_buf;
407 	}
408 
409 	header->major = resp.major;
410 	header->minor = resp.minor;
411 
412 free_buf:
413 	dma_free_noncoherent(xdna->ddev.dev, size, addr, dma_addr, DMA_FROM_DEVICE);
414 	return ret;
415 }
416 
417 int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
418 				 void *handle, int (*cb)(void*, void __iomem *, size_t))
419 {
420 	struct async_event_msg_req req = { 0 };
421 	struct xdna_mailbox_msg msg = {
422 		.send_data = (u8 *)&req,
423 		.send_size = sizeof(req),
424 		.handle = handle,
425 		.opcode = MSG_OP_REGISTER_ASYNC_EVENT_MSG,
426 		.notify_cb = cb,
427 	};
428 
429 	req.buf_addr = addr;
430 	req.buf_size = size;
431 
432 	XDNA_DBG(ndev->xdna, "Register addr 0x%llx size 0x%x", addr, size);
433 	return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
434 }
435 
436 int aie2_config_cu(struct amdxdna_hwctx *hwctx,
437 		   int (*notify_cb)(void *, void __iomem *, size_t))
438 {
439 	struct mailbox_channel *chann = hwctx->priv->mbox_chann;
440 	struct amdxdna_dev *xdna = hwctx->client->xdna;
441 	u32 shift = xdna->dev_info->dev_mem_buf_shift;
442 	struct config_cu_req req = { 0 };
443 	struct xdna_mailbox_msg msg;
444 	struct drm_gem_object *gobj;
445 	struct amdxdna_gem_obj *abo;
446 	int i;
447 
448 	if (!chann)
449 		return -ENODEV;
450 
451 	if (hwctx->cus->num_cus > MAX_NUM_CUS) {
452 		XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
453 		return -EINVAL;
454 	}
455 
456 	for (i = 0; i < hwctx->cus->num_cus; i++) {
457 		struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i];
458 
459 		if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad)))
460 			return -EINVAL;
461 
462 		gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo);
463 		if (!gobj) {
464 			XDNA_ERR(xdna, "Lookup GEM object failed");
465 			return -EINVAL;
466 		}
467 		abo = to_xdna_obj(gobj);
468 
469 		if (abo->type != AMDXDNA_BO_DEV) {
470 			drm_gem_object_put(gobj);
471 			XDNA_ERR(xdna, "Invalid BO type");
472 			return -EINVAL;
473 		}
474 
475 		req.cfgs[i] = FIELD_PREP(AIE2_MSG_CFG_CU_PDI_ADDR,
476 					 abo->mem.dev_addr >> shift);
477 		req.cfgs[i] |= FIELD_PREP(AIE2_MSG_CFG_CU_FUNC, cu->cu_func);
478 		XDNA_DBG(xdna, "CU %d full addr 0x%llx, cfg 0x%x", i,
479 			 abo->mem.dev_addr, req.cfgs[i]);
480 		drm_gem_object_put(gobj);
481 	}
482 	req.num_cus = hwctx->cus->num_cus;
483 
484 	msg.send_data = (u8 *)&req;
485 	msg.send_size = sizeof(req);
486 	msg.handle = hwctx;
487 	msg.opcode = MSG_OP_CONFIG_CU;
488 	msg.notify_cb = notify_cb;
489 	return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
490 }
491 
492 static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
493 				 size_t *size, u32 *msg_op)
494 {
495 	struct execute_buffer_req *cu_req = req;
496 	u32 cmd_len;
497 	void *cmd;
498 
499 	cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
500 	if (cmd_len > sizeof(cu_req->payload))
501 		return -EINVAL;
502 
503 	cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
504 	if (cu_req->cu_idx == INVALID_CU_IDX)
505 		return -EINVAL;
506 
507 	memcpy(cu_req->payload, cmd, cmd_len);
508 
509 	*size = sizeof(*cu_req);
510 	*msg_op = MSG_OP_EXECUTE_BUFFER_CF;
511 	return 0;
512 }
513 
514 static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
515 				  size_t *size, u32 *msg_op)
516 {
517 	struct exec_dpu_req *dpu_req = req;
518 	struct amdxdna_cmd_start_npu *sn;
519 	u32 cmd_len;
520 
521 	sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
522 	if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
523 		return -EINVAL;
524 
525 	dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
526 	if (dpu_req->cu_idx == INVALID_CU_IDX)
527 		return -EINVAL;
528 
529 	dpu_req->inst_buf_addr = sn->buffer;
530 	dpu_req->inst_size = sn->buffer_size;
531 	dpu_req->inst_prop_cnt = sn->prop_count;
532 	memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));
533 
534 	*size = sizeof(*dpu_req);
535 	*msg_op = MSG_OP_EXEC_DPU;
536 	return 0;
537 }
538 
539 static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
540 {
541 	struct cmd_chain_req *chain_req = req;
542 
543 	chain_req->buf_addr = slot_addr;
544 	chain_req->buf_size = size;
545 	chain_req->count = cmd_cnt;
546 }
547 
548 static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
549 {
550 	struct cmd_chain_npu_req *npu_chain_req = req;
551 
552 	npu_chain_req->flags = 0;
553 	npu_chain_req->reserved = 0;
554 	npu_chain_req->buf_addr = slot_addr;
555 	npu_chain_req->buf_size = size;
556 	npu_chain_req->count = cmd_cnt;
557 }
558 
559 static int
560 aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
561 {
562 	struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
563 	u32 cmd_len;
564 	void *cmd;
565 
566 	cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
567 	if (*size < sizeof(*cf_slot) + cmd_len)
568 		return -EINVAL;
569 
570 	cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
571 	if (cf_slot->cu_idx == INVALID_CU_IDX)
572 		return -EINVAL;
573 
574 	cf_slot->arg_cnt = cmd_len / sizeof(u32);
575 	memcpy(cf_slot->args, cmd, cmd_len);
576 	/* Accurate slot size to hint firmware to do necessary copy */
577 	*size = sizeof(*cf_slot) + cmd_len;
578 	return 0;
579 }
580 
581 static int
582 aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
583 {
584 	struct cmd_chain_slot_dpu *dpu_slot = slot;
585 	struct amdxdna_cmd_start_npu *sn;
586 	u32 cmd_len;
587 	u32 arg_sz;
588 
589 	sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
590 	arg_sz = cmd_len - sizeof(*sn);
591 	if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
592 		return -EINVAL;
593 
594 	if (*size < sizeof(*dpu_slot) + arg_sz)
595 		return -EINVAL;
596 
597 	dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
598 	if (dpu_slot->cu_idx == INVALID_CU_IDX)
599 		return -EINVAL;
600 
601 	dpu_slot->inst_buf_addr = sn->buffer;
602 	dpu_slot->inst_size = sn->buffer_size;
603 	dpu_slot->inst_prop_cnt = sn->prop_count;
604 	dpu_slot->arg_cnt = arg_sz / sizeof(u32);
605 	memcpy(dpu_slot->args, sn->prop_args, arg_sz);
606 
607 	/* Accurate slot size to hint firmware to do necessary copy */
608 	*size = sizeof(*dpu_slot) + arg_sz;
609 	return 0;
610 }
611 
612 static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
613 {
614 	return -EOPNOTSUPP;
615 }
616 
617 static u32 aie2_get_chain_msg_op(u32 cmd_op)
618 {
619 	switch (cmd_op) {
620 	case ERT_START_CU:
621 		return MSG_OP_CHAIN_EXEC_BUFFER_CF;
622 	case ERT_START_NPU:
623 		return MSG_OP_CHAIN_EXEC_DPU;
624 	default:
625 		break;
626 	}
627 
628 	return MSG_OP_MAX_OPCODE;
629 }
630 
631 static struct aie2_exec_msg_ops legacy_exec_message_ops = {
632 	.init_cu_req = aie2_init_exec_cu_req,
633 	.init_dpu_req = aie2_init_exec_dpu_req,
634 	.init_chain_req = aie2_init_exec_chain_req,
635 	.fill_cf_slot = aie2_cmdlist_fill_cf,
636 	.fill_dpu_slot = aie2_cmdlist_fill_dpu,
637 	.fill_preempt_slot = aie2_cmdlist_unsupp,
638 	.fill_elf_slot = aie2_cmdlist_unsupp,
639 	.get_chain_msg_op = aie2_get_chain_msg_op,
640 };
641 
642 static int
643 aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
644 {
645 	struct cmd_chain_slot_npu *npu_slot = slot;
646 	u32 cmd_len;
647 	void *cmd;
648 
649 	cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
650 	if (*size < sizeof(*npu_slot) + cmd_len)
651 		return -EINVAL;
652 
653 	npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
654 	if (npu_slot->cu_idx == INVALID_CU_IDX)
655 		return -EINVAL;
656 
657 	memset(npu_slot, 0, sizeof(*npu_slot));
658 	npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
659 	npu_slot->arg_cnt = cmd_len / sizeof(u32);
660 	memcpy(npu_slot->args, cmd, cmd_len);
661 
662 	*size = sizeof(*npu_slot) + cmd_len;
663 	return 0;
664 }
665 
666 static int
667 aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
668 {
669 	struct cmd_chain_slot_npu *npu_slot = slot;
670 	struct amdxdna_cmd_start_npu *sn;
671 	u32 cmd_len;
672 	u32 arg_sz;
673 
674 	sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
675 	arg_sz = cmd_len - sizeof(*sn);
676 	if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
677 		return -EINVAL;
678 
679 	if (*size < sizeof(*npu_slot) + arg_sz)
680 		return -EINVAL;
681 
682 	npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
683 	if (npu_slot->cu_idx == INVALID_CU_IDX)
684 		return -EINVAL;
685 
686 	memset(npu_slot, 0, sizeof(*npu_slot));
687 	npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
688 	npu_slot->inst_buf_addr = sn->buffer;
689 	npu_slot->inst_size = sn->buffer_size;
690 	npu_slot->inst_prop_cnt = sn->prop_count;
691 	npu_slot->arg_cnt = arg_sz / sizeof(u32);
692 	memcpy(npu_slot->args, sn->prop_args, arg_sz);
693 
694 	*size = sizeof(*npu_slot) + arg_sz;
695 	return 0;
696 }
697 
698 static int
699 aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
700 {
701 	struct cmd_chain_slot_npu *npu_slot = slot;
702 	struct amdxdna_cmd_preempt_data *pd;
703 	u32 cmd_len;
704 	u32 arg_sz;
705 
706 	pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
707 	arg_sz = cmd_len - sizeof(*pd);
708 	if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
709 		return -EINVAL;
710 
711 	if (*size < sizeof(*npu_slot) + arg_sz)
712 		return -EINVAL;
713 
714 	npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
715 	if (npu_slot->cu_idx == INVALID_CU_IDX)
716 		return -EINVAL;
717 
718 	memset(npu_slot, 0, sizeof(*npu_slot));
719 	npu_slot->type = EXEC_NPU_TYPE_PREEMPT;
720 	npu_slot->inst_buf_addr = pd->inst_buf;
721 	npu_slot->save_buf_addr = pd->save_buf;
722 	npu_slot->restore_buf_addr = pd->restore_buf;
723 	npu_slot->inst_size = pd->inst_size;
724 	npu_slot->save_size = pd->save_size;
725 	npu_slot->restore_size = pd->restore_size;
726 	npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
727 	npu_slot->arg_cnt = arg_sz / sizeof(u32);
728 	memcpy(npu_slot->args, pd->prop_args, arg_sz);
729 
730 	*size = sizeof(*npu_slot) + arg_sz;
731 	return 0;
732 }
733 
734 static int
735 aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
736 {
737 	struct cmd_chain_slot_npu *npu_slot = slot;
738 	struct amdxdna_cmd_preempt_data *pd;
739 	u32 cmd_len;
740 	u32 arg_sz;
741 
742 	pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
743 	arg_sz = cmd_len - sizeof(*pd);
744 	if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
745 		return -EINVAL;
746 
747 	if (*size < sizeof(*npu_slot) + arg_sz)
748 		return -EINVAL;
749 
750 	memset(npu_slot, 0, sizeof(*npu_slot));
751 	npu_slot->type = EXEC_NPU_TYPE_ELF;
752 	npu_slot->inst_buf_addr = pd->inst_buf;
753 	npu_slot->save_buf_addr = pd->save_buf;
754 	npu_slot->restore_buf_addr = pd->restore_buf;
755 	npu_slot->inst_size = pd->inst_size;
756 	npu_slot->save_size = pd->save_size;
757 	npu_slot->restore_size = pd->restore_size;
758 	npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
759 	npu_slot->arg_cnt = 1;
760 	npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN;
761 
762 	*size = struct_size(npu_slot, args, npu_slot->arg_cnt);
763 	return 0;
764 }
765 
766 static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
767 {
768 	return MSG_OP_CHAIN_EXEC_NPU;
769 }
770 
771 static struct aie2_exec_msg_ops npu_exec_message_ops = {
772 	.init_cu_req = aie2_init_exec_cu_req,
773 	.init_dpu_req = aie2_init_exec_dpu_req,
774 	.init_chain_req = aie2_init_npu_chain_req,
775 	.fill_cf_slot = aie2_cmdlist_fill_npu_cf,
776 	.fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
777 	.fill_preempt_slot = aie2_cmdlist_fill_npu_preempt,
778 	.fill_elf_slot = aie2_cmdlist_fill_npu_elf,
779 	.get_chain_msg_op = aie2_get_npu_chain_msg_op,
780 };
781 
782 static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo,
783 			      size_t *size, u32 *msg_op)
784 {
785 	struct amdxdna_dev *xdna = cmd_abo->client->xdna;
786 	int ret;
787 	u32 op;
788 
789 
790 	op = amdxdna_cmd_get_op(cmd_abo);
791 	switch (op) {
792 	case ERT_START_CU:
793 		ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op);
794 		if (ret) {
795 			XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
796 			return ret;
797 		}
798 		break;
799 	case ERT_START_NPU:
800 		ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op);
801 		if (ret) {
802 			XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
803 			return ret;
804 		}
805 
806 		break;
807 	default:
808 		XDNA_ERR(xdna, "Unsupported op %d", op);
809 		ret = -EOPNOTSUPP;
810 		break;
811 	}
812 
813 	return ret;
814 }
815 
816 static int
817 aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
818 		       size_t *size, u32 *cmd_op)
819 {
820 	struct amdxdna_dev *xdna = cmd_abo->client->xdna;
821 	int ret;
822 	u32 op;
823 
824 	op = amdxdna_cmd_get_op(cmd_abo);
825 	if (*cmd_op == ERT_INVALID_CMD)
826 		*cmd_op = op;
827 	else if (op != *cmd_op)
828 		return -EINVAL;
829 
830 	switch (op) {
831 	case ERT_START_CU:
832 		ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
833 		break;
834 	case ERT_START_NPU:
835 		ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
836 		break;
837 	case ERT_START_NPU_PREEMPT:
838 		if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
839 			return -EOPNOTSUPP;
840 		ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size);
841 		break;
842 	case ERT_START_NPU_PREEMPT_ELF:
843 		if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
844 			return -EOPNOTSUPP;
845 		ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size);
846 		break;
847 	default:
848 		XDNA_INFO(xdna, "Unsupported op %d", op);
849 		ret = -EOPNOTSUPP;
850 		break;
851 	}
852 
853 	return ret;
854 }
855 
856 void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
857 {
858 	if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
859 		ndev->exec_msg_ops = &npu_exec_message_ops;
860 	else
861 		ndev->exec_msg_ops = &legacy_exec_message_ops;
862 }
863 
864 static inline struct amdxdna_gem_obj *
865 aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
866 {
867 	int idx = get_job_idx(job->seq);
868 
869 	return job->hwctx->priv->cmd_buf[idx];
870 }
871 
872 int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
873 		 int (*notify_cb)(void *, void __iomem *, size_t))
874 {
875 	struct mailbox_channel *chann = hwctx->priv->mbox_chann;
876 	struct amdxdna_dev *xdna = hwctx->client->xdna;
877 	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
878 	struct xdna_mailbox_msg msg;
879 	union exec_req req;
880 	int ret;
881 
882 	if (!chann)
883 		return -ENODEV;
884 
885 	ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode);
886 	if (ret)
887 		return ret;
888 
889 	msg.handle = job;
890 	msg.notify_cb = notify_cb;
891 	msg.send_data = (u8 *)&req;
892 	print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
893 			     0x40, false);
894 
895 	ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
896 	if (ret) {
897 		XDNA_ERR(xdna, "Send message failed");
898 		return ret;
899 	}
900 
901 	return 0;
902 }
903 
904 int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
905 			       struct amdxdna_sched_job *job,
906 			       int (*notify_cb)(void *, void __iomem *, size_t))
907 {
908 	struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
909 	struct mailbox_channel *chann = hwctx->priv->mbox_chann;
910 	struct amdxdna_client *client = hwctx->client;
911 	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
912 	struct amdxdna_dev *xdna = client->xdna;
913 	struct amdxdna_cmd_chain *payload;
914 	struct xdna_mailbox_msg msg;
915 	union exec_chain_req req;
916 	u32 payload_len;
917 	u32 offset = 0;
918 	size_t size;
919 	int ret;
920 	u32 op;
921 	u32 i;
922 
923 	op = amdxdna_cmd_get_op(cmd_abo);
924 	payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
925 	if (op != ERT_CMD_CHAIN || !payload ||
926 	    payload_len < struct_size(payload, data, payload->command_count))
927 		return -EINVAL;
928 
929 	op = ERT_INVALID_CMD;
930 	for (i = 0; i < payload->command_count; i++) {
931 		u32 boh = (u32)(payload->data[i]);
932 		struct amdxdna_gem_obj *abo;
933 
934 		abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
935 		if (!abo) {
936 			XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
937 			return -ENOENT;
938 		}
939 
940 		size = cmdbuf_abo->mem.size - offset;
941 		ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset,
942 					     abo, &size, &op);
943 		amdxdna_gem_put_obj(abo);
944 		if (ret)
945 			return ret;
946 
947 		offset += size;
948 	}
949 	msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
950 	if (msg.opcode == MSG_OP_MAX_OPCODE)
951 		return -EOPNOTSUPP;
952 
953 	/* The offset is the accumulated total size of the cmd buffer */
954 	EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
955 					   offset, payload->command_count);
956 	drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset);
957 
958 	msg.handle = job;
959 	msg.notify_cb = notify_cb;
960 	msg.send_data = (u8 *)&req;
961 	msg.send_size = sizeof(req);
962 	ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
963 	if (ret) {
964 		XDNA_ERR(xdna, "Send message failed");
965 		return ret;
966 	}
967 
968 	return 0;
969 }
970 
971 int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
972 				struct amdxdna_sched_job *job,
973 				int (*notify_cb)(void *, void __iomem *, size_t))
974 {
975 	struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
976 	struct mailbox_channel *chann = hwctx->priv->mbox_chann;
977 	struct amdxdna_dev *xdna = hwctx->client->xdna;
978 	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
979 	struct xdna_mailbox_msg msg;
980 	union exec_chain_req req;
981 	u32 op = ERT_INVALID_CMD;
982 	size_t size;
983 	int ret;
984 
985 	size = cmdbuf_abo->mem.size;
986 	ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op);
987 	if (ret)
988 		return ret;
989 
990 	msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
991 	if (msg.opcode == MSG_OP_MAX_OPCODE)
992 		return -EOPNOTSUPP;
993 
994 	EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
995 					   size, 1);
996 	drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
997 
998 	msg.handle = job;
999 	msg.notify_cb = notify_cb;
1000 	msg.send_data = (u8 *)&req;
1001 	msg.send_size = sizeof(req);
1002 	ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
1003 	if (ret) {
1004 		XDNA_ERR(hwctx->client->xdna, "Send message failed");
1005 		return ret;
1006 	}
1007 
1008 	return 0;
1009 }
1010 
1011 int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
1012 		 int (*notify_cb)(void *, void __iomem *, size_t))
1013 {
1014 	struct mailbox_channel *chann = hwctx->priv->mbox_chann;
1015 	struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
1016 	struct amdxdna_dev *xdna = hwctx->client->xdna;
1017 	struct xdna_mailbox_msg msg;
1018 	struct sync_bo_req req;
1019 	int ret = 0;
1020 
1021 	req.src_addr = 0;
1022 	req.dst_addr = amdxdna_dev_bo_offset(abo);
1023 	req.size = abo->mem.size;
1024 
1025 	/* Device to Host */
1026 	req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) |
1027 		FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM);
1028 
1029 	XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed",
1030 		 req.size, req.src_addr, req.dst_addr);
1031 
1032 	msg.handle = job;
1033 	msg.notify_cb = notify_cb;
1034 	msg.send_data = (u8 *)&req;
1035 	msg.send_size = sizeof(req);
1036 	msg.opcode = MSG_OP_SYNC_BO;
1037 
1038 	ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
1039 	if (ret) {
1040 		XDNA_ERR(xdna, "Send message failed");
1041 		return ret;
1042 	}
1043 
1044 	return 0;
1045 }
1046 
1047 int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
1048 			 int (*notify_cb)(void *, void __iomem *, size_t))
1049 {
1050 	struct mailbox_channel *chann = hwctx->priv->mbox_chann;
1051 	struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
1052 	struct amdxdna_dev *xdna = hwctx->client->xdna;
1053 	struct config_debug_bo_req req;
1054 	struct xdna_mailbox_msg msg;
1055 
1056 	if (job->drv_cmd->opcode == ATTACH_DEBUG_BO)
1057 		req.config = DEBUG_BO_REGISTER;
1058 	else
1059 		req.config = DEBUG_BO_UNREGISTER;
1060 
1061 	req.offset = amdxdna_dev_bo_offset(abo);
1062 	req.size = abo->mem.size;
1063 
1064 	XDNA_DBG(xdna, "offset 0x%llx size 0x%llx config %d",
1065 		 req.offset, req.size, req.config);
1066 
1067 	msg.handle = job;
1068 	msg.notify_cb = notify_cb;
1069 	msg.send_data = (u8 *)&req;
1070 	msg.send_size = sizeof(req);
1071 	msg.opcode = MSG_OP_CONFIG_DEBUG_BO;
1072 
1073 	return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
1074 }
1075