1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. 4 */ 5 6 #include <drm/amdxdna_accel.h> 7 #include <drm/drm_cache.h> 8 #include <drm/drm_device.h> 9 #include <drm/drm_gem.h> 10 #include <drm/drm_gem_shmem_helper.h> 11 #include <drm/drm_print.h> 12 #include <drm/gpu_scheduler.h> 13 #include <linux/bitfield.h> 14 #include <linux/errno.h> 15 #include <linux/pci.h> 16 #include <linux/types.h> 17 #include <linux/xarray.h> 18 19 #include "aie2_msg_priv.h" 20 #include "aie2_pci.h" 21 #include "amdxdna_ctx.h" 22 #include "amdxdna_gem.h" 23 #include "amdxdna_mailbox.h" 24 #include "amdxdna_mailbox_helper.h" 25 #include "amdxdna_pci_drv.h" 26 27 #define DECLARE_AIE2_MSG(name, op) \ 28 DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE) 29 30 #define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops) 31 32 static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, 33 struct xdna_mailbox_msg *msg) 34 { 35 struct amdxdna_dev *xdna = ndev->xdna; 36 struct xdna_notify *hdl = msg->handle; 37 int ret; 38 39 if (!ndev->mgmt_chann) 40 return -ENODEV; 41 42 ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg); 43 if (ret == -ETIME) { 44 xdna_mailbox_stop_channel(ndev->mgmt_chann); 45 xdna_mailbox_destroy_channel(ndev->mgmt_chann); 46 ndev->mgmt_chann = NULL; 47 } 48 49 if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) { 50 XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x", 51 msg->opcode, *hdl->data); 52 ret = -EINVAL; 53 } 54 55 return ret; 56 } 57 58 void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size, 59 dma_addr_t *dma_addr) 60 { 61 struct amdxdna_dev *xdna = ndev->xdna; 62 int order; 63 64 *size = max(*size, SZ_8K); 65 order = get_order(*size); 66 if (order > MAX_PAGE_ORDER) 67 return NULL; 68 *size = PAGE_SIZE << order; 69 70 return dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr, 71 DMA_FROM_DEVICE, GFP_KERNEL); 72 } 73 74 int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev) 75 { 76 DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND); 77 int ret; 78 79 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 80 if (ret) { 81 XDNA_ERR(ndev->xdna, "Failed to suspend fw, ret %d", ret); 82 return ret; 83 } 84 85 return aie2_psp_waitmode_poll(ndev->psp_hdl); 86 } 87 88 int aie2_resume_fw(struct amdxdna_dev_hdl *ndev) 89 { 90 DECLARE_AIE2_MSG(suspend, MSG_OP_RESUME); 91 92 return aie2_send_mgmt_msg_wait(ndev, &msg); 93 } 94 95 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value) 96 { 97 DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG); 98 int ret; 99 100 req.type = type; 101 req.value = value; 102 103 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 104 if (ret) { 105 XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret); 106 return ret; 107 } 108 109 return 0; 110 } 111 112 int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value) 113 { 114 DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG); 115 int ret; 116 117 req.type = type; 118 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 119 if (ret) { 120 XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret); 121 return ret; 122 } 123 124 *value = resp.value; 125 return 0; 126 } 127 128 int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid) 129 { 130 DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID); 131 132 req.pasid = pasid; 133 134 return aie2_send_mgmt_msg_wait(ndev, &msg); 135 } 136 137 int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version) 138 { 139 DECLARE_AIE2_MSG(aie_version_info, MSG_OP_QUERY_AIE_VERSION); 140 struct amdxdna_dev *xdna = ndev->xdna; 141 int ret; 142 143 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 144 if (ret) 145 return ret; 146 147 XDNA_DBG(xdna, "Query AIE version - major: %u minor: %u completed", 148 resp.major, resp.minor); 149 150 version->major = resp.major; 151 version->minor = resp.minor; 152 153 return 0; 154 } 155 156 int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata) 157 { 158 DECLARE_AIE2_MSG(aie_tile_info, MSG_OP_QUERY_AIE_TILE_INFO); 159 int ret; 160 161 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 162 if (ret) 163 return ret; 164 165 metadata->size = resp.info.size; 166 metadata->cols = resp.info.cols; 167 metadata->rows = resp.info.rows; 168 169 metadata->version.major = resp.info.major; 170 metadata->version.minor = resp.info.minor; 171 172 metadata->core.row_count = resp.info.core_rows; 173 metadata->core.row_start = resp.info.core_row_start; 174 metadata->core.dma_channel_count = resp.info.core_dma_channels; 175 metadata->core.lock_count = resp.info.core_locks; 176 metadata->core.event_reg_count = resp.info.core_events; 177 178 metadata->mem.row_count = resp.info.mem_rows; 179 metadata->mem.row_start = resp.info.mem_row_start; 180 metadata->mem.dma_channel_count = resp.info.mem_dma_channels; 181 metadata->mem.lock_count = resp.info.mem_locks; 182 metadata->mem.event_reg_count = resp.info.mem_events; 183 184 metadata->shim.row_count = resp.info.shim_rows; 185 metadata->shim.row_start = resp.info.shim_row_start; 186 metadata->shim.dma_channel_count = resp.info.shim_dma_channels; 187 metadata->shim.lock_count = resp.info.shim_locks; 188 metadata->shim.event_reg_count = resp.info.shim_events; 189 190 return 0; 191 } 192 193 int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, 194 struct amdxdna_fw_ver *fw_ver) 195 { 196 DECLARE_AIE2_MSG(firmware_version, MSG_OP_GET_FIRMWARE_VERSION); 197 int ret; 198 199 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 200 if (ret) 201 return ret; 202 203 fw_ver->major = resp.major; 204 fw_ver->minor = resp.minor; 205 fw_ver->sub = resp.sub; 206 fw_ver->build = resp.build; 207 208 return 0; 209 } 210 211 static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id) 212 { 213 DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT); 214 struct amdxdna_dev *xdna = ndev->xdna; 215 int ret; 216 217 req.context_id = id; 218 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 219 if (ret && ret != -ENODEV) 220 XDNA_WARN(xdna, "Destroy context failed, ret %d", ret); 221 else if (ret == -ENODEV) 222 XDNA_DBG(xdna, "Destroy context: device already stopped"); 223 224 return ret; 225 } 226 227 static u32 aie2_get_context_priority(struct amdxdna_dev_hdl *ndev, 228 struct amdxdna_hwctx *hwctx) 229 { 230 if (!AIE2_FEATURE_ON(ndev, AIE2_PREEMPT)) 231 return PRIORITY_HIGH; 232 233 switch (hwctx->qos.priority) { 234 case AMDXDNA_QOS_REALTIME_PRIORITY: 235 return PRIORITY_REALTIME; 236 case AMDXDNA_QOS_HIGH_PRIORITY: 237 return PRIORITY_HIGH; 238 case AMDXDNA_QOS_NORMAL_PRIORITY: 239 return PRIORITY_NORMAL; 240 case AMDXDNA_QOS_LOW_PRIORITY: 241 return PRIORITY_LOW; 242 default: 243 return PRIORITY_HIGH; 244 } 245 } 246 247 int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) 248 { 249 DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT); 250 struct amdxdna_dev *xdna = ndev->xdna; 251 struct xdna_mailbox_chann_res x2i; 252 struct xdna_mailbox_chann_res i2x; 253 struct cq_pair *cq_pair; 254 u32 intr_reg; 255 int ret; 256 257 req.aie_type = 1; 258 req.start_col = hwctx->start_col; 259 req.num_col = hwctx->num_col; 260 req.num_unused_col = hwctx->num_unused_col; 261 req.num_cq_pairs_requested = 1; 262 req.pasid = hwctx->client->pasid; 263 req.context_priority = aie2_get_context_priority(ndev, hwctx); 264 265 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 266 if (ret) 267 return ret; 268 269 hwctx->fw_ctx_id = resp.context_id; 270 if (WARN_ON_ONCE(hwctx->fw_ctx_id == -1)) 271 return -EINVAL; 272 273 if (ndev->force_preempt_enabled) { 274 ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id); 275 if (ret) { 276 XDNA_ERR(xdna, "failed to enable force preempt %d", ret); 277 goto del_ctx_req; 278 } 279 } 280 281 cq_pair = &resp.cq_pair[0]; 282 x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr); 283 x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr); 284 x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr); 285 x2i.rb_size = cq_pair->x2i_q.buf_size; 286 287 i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr); 288 i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr); 289 i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr); 290 i2x.rb_size = cq_pair->i2x_q.buf_size; 291 292 ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id); 293 if (ret == -EINVAL) { 294 XDNA_ERR(xdna, "Alloc IRQ failed %d", ret); 295 goto del_ctx_req; 296 } 297 298 intr_reg = i2x.mb_head_ptr_reg + 4; 299 hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x, 300 intr_reg, ret); 301 if (!hwctx->priv->mbox_chann) { 302 XDNA_ERR(xdna, "Not able to create channel"); 303 ret = -EINVAL; 304 goto del_ctx_req; 305 } 306 ndev->hwctx_num++; 307 308 XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, resp.msix_id); 309 XDNA_DBG(xdna, "Created fw ctx %d pasid %d", hwctx->fw_ctx_id, hwctx->client->pasid); 310 311 return 0; 312 313 del_ctx_req: 314 aie2_destroy_context_req(ndev, hwctx->fw_ctx_id); 315 return ret; 316 } 317 318 int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) 319 { 320 struct amdxdna_dev *xdna = ndev->xdna; 321 int ret; 322 323 if (!hwctx->priv->mbox_chann) 324 return 0; 325 326 xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); 327 ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id); 328 xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); 329 XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id); 330 hwctx->priv->mbox_chann = NULL; 331 hwctx->fw_ctx_id = -1; 332 ndev->hwctx_num--; 333 334 return ret; 335 } 336 337 int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size) 338 { 339 DECLARE_AIE2_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER); 340 struct amdxdna_dev *xdna = ndev->xdna; 341 int ret; 342 343 req.context_id = context_id; 344 req.buf_addr = addr; 345 req.buf_size = size; 346 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 347 if (ret) 348 return ret; 349 350 XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx", 351 context_id, addr, size); 352 353 return 0; 354 } 355 356 static int amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx, void *arg) 357 { 358 u32 *bitmap = arg; 359 360 *bitmap |= GENMASK(hwctx->start_col + hwctx->num_col - 1, hwctx->start_col); 361 362 return 0; 363 } 364 365 int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, 366 u32 size, u32 *cols_filled) 367 { 368 DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS); 369 struct amdxdna_dev *xdna = ndev->xdna; 370 u32 buf_sz = size, aie_bitmap = 0; 371 struct amdxdna_client *client; 372 dma_addr_t dma_addr; 373 u8 *buff_addr; 374 int ret; 375 376 buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr); 377 if (!buff_addr) 378 return -ENOMEM; 379 380 /* Go through each hardware context and mark the AIE columns that are active */ 381 list_for_each_entry(client, &xdna->client_list, node) 382 amdxdna_hwctx_walk(client, &aie_bitmap, amdxdna_hwctx_col_map); 383 384 *cols_filled = 0; 385 req.dump_buff_addr = dma_addr; 386 req.dump_buff_size = buf_sz; 387 req.num_cols = hweight32(aie_bitmap); 388 req.aie_bitmap = aie_bitmap; 389 390 drm_clflush_virt_range(buff_addr, size); /* device can access */ 391 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 392 if (ret) { 393 XDNA_ERR(xdna, "Error during NPU query, status %d", ret); 394 goto fail; 395 } 396 397 XDNA_DBG(xdna, "Query NPU status completed"); 398 399 if (size < resp.size) { 400 ret = -EINVAL; 401 XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size); 402 goto fail; 403 } 404 405 if (copy_to_user(buf, buff_addr, resp.size)) { 406 ret = -EFAULT; 407 XDNA_ERR(xdna, "Failed to copy NPU status to user space"); 408 goto fail; 409 } 410 411 *cols_filled = aie_bitmap; 412 413 fail: 414 aie2_free_msg_buffer(ndev, buf_sz, buff_addr, dma_addr); 415 return ret; 416 } 417 418 int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, 419 char __user *buf, u32 size, 420 struct amdxdna_drm_query_telemetry_header *header) 421 { 422 DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY); 423 struct amdxdna_dev *xdna = ndev->xdna; 424 dma_addr_t dma_addr; 425 u32 buf_sz = size; 426 u8 *addr; 427 int ret; 428 429 if (header->type >= MAX_TELEMETRY_TYPE) 430 return -EINVAL; 431 432 addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr); 433 if (!addr) 434 return -ENOMEM; 435 436 req.buf_addr = dma_addr; 437 req.buf_size = buf_sz; 438 req.type = header->type; 439 440 drm_clflush_virt_range(addr, size); /* device can access */ 441 ret = aie2_send_mgmt_msg_wait(ndev, &msg); 442 if (ret) { 443 XDNA_ERR(xdna, "Query telemetry failed, status %d", ret); 444 goto free_buf; 445 } 446 447 if (size < resp.size) { 448 ret = -EINVAL; 449 XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size); 450 goto free_buf; 451 } 452 453 if (copy_to_user(buf, addr, resp.size)) { 454 ret = -EFAULT; 455 XDNA_ERR(xdna, "Failed to copy telemetry to user space"); 456 goto free_buf; 457 } 458 459 header->major = resp.major; 460 header->minor = resp.minor; 461 462 free_buf: 463 aie2_free_msg_buffer(ndev, buf_sz, addr, dma_addr); 464 return ret; 465 } 466 467 int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, 468 void *handle, int (*cb)(void*, void __iomem *, size_t)) 469 { 470 struct async_event_msg_req req = { 0 }; 471 struct xdna_mailbox_msg msg = { 472 .send_data = (u8 *)&req, 473 .send_size = sizeof(req), 474 .handle = handle, 475 .opcode = MSG_OP_REGISTER_ASYNC_EVENT_MSG, 476 .notify_cb = cb, 477 }; 478 479 req.buf_addr = addr; 480 req.buf_size = size; 481 482 XDNA_DBG(ndev->xdna, "Register addr 0x%llx size 0x%x", addr, size); 483 return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT); 484 } 485 486 int aie2_config_cu(struct amdxdna_hwctx *hwctx, 487 int (*notify_cb)(void *, void __iomem *, size_t)) 488 { 489 struct mailbox_channel *chann = hwctx->priv->mbox_chann; 490 struct amdxdna_dev *xdna = hwctx->client->xdna; 491 u32 shift = xdna->dev_info->dev_mem_buf_shift; 492 struct config_cu_req req = { 0 }; 493 struct xdna_mailbox_msg msg; 494 struct drm_gem_object *gobj; 495 struct amdxdna_gem_obj *abo; 496 int i; 497 498 if (!chann) 499 return -ENODEV; 500 501 if (!hwctx->cus) 502 return 0; 503 504 if (hwctx->cus->num_cus > MAX_NUM_CUS) { 505 XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS); 506 return -EINVAL; 507 } 508 509 for (i = 0; i < hwctx->cus->num_cus; i++) { 510 struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i]; 511 512 if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad))) 513 return -EINVAL; 514 515 gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo); 516 if (!gobj) { 517 XDNA_ERR(xdna, "Lookup GEM object failed"); 518 return -EINVAL; 519 } 520 abo = to_xdna_obj(gobj); 521 522 if (abo->type != AMDXDNA_BO_DEV) { 523 drm_gem_object_put(gobj); 524 XDNA_ERR(xdna, "Invalid BO type"); 525 return -EINVAL; 526 } 527 528 req.cfgs[i] = FIELD_PREP(AIE2_MSG_CFG_CU_PDI_ADDR, 529 abo->mem.dev_addr >> shift); 530 req.cfgs[i] |= FIELD_PREP(AIE2_MSG_CFG_CU_FUNC, cu->cu_func); 531 XDNA_DBG(xdna, "CU %d full addr 0x%llx, cfg 0x%x", i, 532 abo->mem.dev_addr, req.cfgs[i]); 533 drm_gem_object_put(gobj); 534 } 535 req.num_cus = hwctx->cus->num_cus; 536 537 msg.send_data = (u8 *)&req; 538 msg.send_size = sizeof(req); 539 msg.handle = hwctx; 540 msg.opcode = MSG_OP_CONFIG_CU; 541 msg.notify_cb = notify_cb; 542 return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 543 } 544 545 static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req, 546 size_t *size, u32 *msg_op) 547 { 548 struct execute_buffer_req *cu_req = req; 549 u32 cmd_len; 550 void *cmd; 551 552 cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 553 if (cmd_len > sizeof(cu_req->payload)) 554 return -EINVAL; 555 556 cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 557 if (cu_req->cu_idx == INVALID_CU_IDX) 558 return -EINVAL; 559 560 memcpy(cu_req->payload, cmd, cmd_len); 561 562 *size = sizeof(*cu_req); 563 *msg_op = MSG_OP_EXECUTE_BUFFER_CF; 564 return 0; 565 } 566 567 static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req, 568 size_t *size, u32 *msg_op) 569 { 570 struct exec_dpu_req *dpu_req = req; 571 struct amdxdna_cmd_start_npu *sn; 572 u32 cmd_len; 573 574 sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 575 if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload)) 576 return -EINVAL; 577 578 dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 579 if (dpu_req->cu_idx == INVALID_CU_IDX) 580 return -EINVAL; 581 582 dpu_req->inst_buf_addr = sn->buffer; 583 dpu_req->inst_size = sn->buffer_size; 584 dpu_req->inst_prop_cnt = sn->prop_count; 585 memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn)); 586 587 *size = sizeof(*dpu_req); 588 *msg_op = MSG_OP_EXEC_DPU; 589 return 0; 590 } 591 592 static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt) 593 { 594 struct cmd_chain_req *chain_req = req; 595 596 chain_req->buf_addr = slot_addr; 597 chain_req->buf_size = size; 598 chain_req->count = cmd_cnt; 599 } 600 601 static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt) 602 { 603 struct cmd_chain_npu_req *npu_chain_req = req; 604 605 npu_chain_req->flags = 0; 606 npu_chain_req->reserved = 0; 607 npu_chain_req->buf_addr = slot_addr; 608 npu_chain_req->buf_size = size; 609 npu_chain_req->count = cmd_cnt; 610 } 611 612 static int 613 aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 614 { 615 struct cmd_chain_slot_execbuf_cf *cf_slot = slot; 616 u32 cmd_len; 617 void *cmd; 618 619 cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 620 if (*size < sizeof(*cf_slot) + cmd_len) 621 return -EINVAL; 622 623 cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 624 if (cf_slot->cu_idx == INVALID_CU_IDX) 625 return -EINVAL; 626 627 cf_slot->arg_cnt = cmd_len / sizeof(u32); 628 memcpy(cf_slot->args, cmd, cmd_len); 629 /* Accurate slot size to hint firmware to do necessary copy */ 630 *size = sizeof(*cf_slot) + cmd_len; 631 return 0; 632 } 633 634 static int 635 aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 636 { 637 struct cmd_chain_slot_dpu *dpu_slot = slot; 638 struct amdxdna_cmd_start_npu *sn; 639 u32 cmd_len; 640 u32 arg_sz; 641 642 sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 643 arg_sz = cmd_len - sizeof(*sn); 644 if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) 645 return -EINVAL; 646 647 if (*size < sizeof(*dpu_slot) + arg_sz) 648 return -EINVAL; 649 650 dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 651 if (dpu_slot->cu_idx == INVALID_CU_IDX) 652 return -EINVAL; 653 654 dpu_slot->inst_buf_addr = sn->buffer; 655 dpu_slot->inst_size = sn->buffer_size; 656 dpu_slot->inst_prop_cnt = sn->prop_count; 657 dpu_slot->arg_cnt = arg_sz / sizeof(u32); 658 memcpy(dpu_slot->args, sn->prop_args, arg_sz); 659 660 /* Accurate slot size to hint firmware to do necessary copy */ 661 *size = sizeof(*dpu_slot) + arg_sz; 662 return 0; 663 } 664 665 static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 666 { 667 return -EOPNOTSUPP; 668 } 669 670 static u32 aie2_get_chain_msg_op(u32 cmd_op) 671 { 672 switch (cmd_op) { 673 case ERT_START_CU: 674 return MSG_OP_CHAIN_EXEC_BUFFER_CF; 675 case ERT_START_NPU: 676 return MSG_OP_CHAIN_EXEC_DPU; 677 default: 678 break; 679 } 680 681 return MSG_OP_MAX_OPCODE; 682 } 683 684 static struct aie2_exec_msg_ops legacy_exec_message_ops = { 685 .init_cu_req = aie2_init_exec_cu_req, 686 .init_dpu_req = aie2_init_exec_dpu_req, 687 .init_chain_req = aie2_init_exec_chain_req, 688 .fill_cf_slot = aie2_cmdlist_fill_cf, 689 .fill_dpu_slot = aie2_cmdlist_fill_dpu, 690 .fill_preempt_slot = aie2_cmdlist_unsupp, 691 .fill_elf_slot = aie2_cmdlist_unsupp, 692 .get_chain_msg_op = aie2_get_chain_msg_op, 693 }; 694 695 static int 696 aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 697 { 698 struct cmd_chain_slot_npu *npu_slot = slot; 699 u32 cmd_len; 700 void *cmd; 701 702 cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 703 if (*size < sizeof(*npu_slot) + cmd_len) 704 return -EINVAL; 705 706 memset(npu_slot, 0, sizeof(*npu_slot)); 707 npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 708 if (npu_slot->cu_idx == INVALID_CU_IDX) 709 return -EINVAL; 710 711 npu_slot->type = EXEC_NPU_TYPE_NON_ELF; 712 npu_slot->arg_cnt = cmd_len / sizeof(u32); 713 memcpy(npu_slot->args, cmd, cmd_len); 714 715 *size = sizeof(*npu_slot) + cmd_len; 716 return 0; 717 } 718 719 static int 720 aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 721 { 722 struct cmd_chain_slot_npu *npu_slot = slot; 723 struct amdxdna_cmd_start_npu *sn; 724 u32 cmd_len; 725 u32 arg_sz; 726 727 sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 728 arg_sz = cmd_len - sizeof(*sn); 729 if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE) 730 return -EINVAL; 731 732 if (*size < sizeof(*npu_slot) + arg_sz) 733 return -EINVAL; 734 735 memset(npu_slot, 0, sizeof(*npu_slot)); 736 npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 737 if (npu_slot->cu_idx == INVALID_CU_IDX) 738 return -EINVAL; 739 740 npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF; 741 npu_slot->inst_buf_addr = sn->buffer; 742 npu_slot->inst_size = sn->buffer_size; 743 npu_slot->inst_prop_cnt = sn->prop_count; 744 npu_slot->arg_cnt = arg_sz / sizeof(u32); 745 memcpy(npu_slot->args, sn->prop_args, arg_sz); 746 747 *size = sizeof(*npu_slot) + arg_sz; 748 return 0; 749 } 750 751 static int 752 aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 753 { 754 struct cmd_chain_slot_npu *npu_slot = slot; 755 struct amdxdna_cmd_preempt_data *pd; 756 u32 cmd_len; 757 u32 arg_sz; 758 759 pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 760 arg_sz = cmd_len - sizeof(*pd); 761 if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) 762 return -EINVAL; 763 764 if (*size < sizeof(*npu_slot) + arg_sz) 765 return -EINVAL; 766 767 memset(npu_slot, 0, sizeof(*npu_slot)); 768 npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 769 if (npu_slot->cu_idx == INVALID_CU_IDX) 770 return -EINVAL; 771 772 npu_slot->type = EXEC_NPU_TYPE_PREEMPT; 773 npu_slot->inst_buf_addr = pd->inst_buf; 774 npu_slot->save_buf_addr = pd->save_buf; 775 npu_slot->restore_buf_addr = pd->restore_buf; 776 npu_slot->inst_size = pd->inst_size; 777 npu_slot->save_size = pd->save_size; 778 npu_slot->restore_size = pd->restore_size; 779 npu_slot->inst_prop_cnt = pd->inst_prop_cnt; 780 npu_slot->arg_cnt = arg_sz / sizeof(u32); 781 memcpy(npu_slot->args, pd->prop_args, arg_sz); 782 783 *size = sizeof(*npu_slot) + arg_sz; 784 return 0; 785 } 786 787 static int 788 aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 789 { 790 struct cmd_chain_slot_npu *npu_slot = slot; 791 struct amdxdna_cmd_preempt_data *pd; 792 u32 cmd_len; 793 u32 arg_sz; 794 795 pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 796 arg_sz = cmd_len - sizeof(*pd); 797 if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) 798 return -EINVAL; 799 800 if (*size < sizeof(*npu_slot) + arg_sz) 801 return -EINVAL; 802 803 memset(npu_slot, 0, sizeof(*npu_slot)); 804 npu_slot->type = EXEC_NPU_TYPE_ELF; 805 npu_slot->inst_buf_addr = pd->inst_buf; 806 npu_slot->save_buf_addr = pd->save_buf; 807 npu_slot->restore_buf_addr = pd->restore_buf; 808 npu_slot->inst_size = pd->inst_size; 809 npu_slot->save_size = pd->save_size; 810 npu_slot->restore_size = pd->restore_size; 811 npu_slot->inst_prop_cnt = pd->inst_prop_cnt; 812 npu_slot->arg_cnt = 1; 813 npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN; 814 815 *size = struct_size(npu_slot, args, npu_slot->arg_cnt); 816 return 0; 817 } 818 819 static u32 aie2_get_npu_chain_msg_op(u32 cmd_op) 820 { 821 return MSG_OP_CHAIN_EXEC_NPU; 822 } 823 824 static struct aie2_exec_msg_ops npu_exec_message_ops = { 825 .init_cu_req = aie2_init_exec_cu_req, 826 .init_dpu_req = aie2_init_exec_dpu_req, 827 .init_chain_req = aie2_init_npu_chain_req, 828 .fill_cf_slot = aie2_cmdlist_fill_npu_cf, 829 .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu, 830 .fill_preempt_slot = aie2_cmdlist_fill_npu_preempt, 831 .fill_elf_slot = aie2_cmdlist_fill_npu_elf, 832 .get_chain_msg_op = aie2_get_npu_chain_msg_op, 833 }; 834 835 static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo, 836 size_t *size, u32 *msg_op) 837 { 838 struct amdxdna_dev *xdna = cmd_abo->client->xdna; 839 int ret; 840 u32 op; 841 842 843 op = amdxdna_cmd_get_op(cmd_abo); 844 switch (op) { 845 case ERT_START_CU: 846 ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op); 847 if (ret) { 848 XDNA_DBG(xdna, "Init CU req failed ret %d", ret); 849 return ret; 850 } 851 break; 852 case ERT_START_NPU: 853 ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op); 854 if (ret) { 855 XDNA_DBG(xdna, "Init DPU req failed ret %d", ret); 856 return ret; 857 } 858 859 break; 860 default: 861 XDNA_ERR(xdna, "Unsupported op %d", op); 862 ret = -EOPNOTSUPP; 863 break; 864 } 865 866 return ret; 867 } 868 869 static int 870 aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo, 871 size_t *size, u32 *cmd_op) 872 { 873 struct amdxdna_dev *xdna = cmd_abo->client->xdna; 874 int ret; 875 u32 op; 876 877 op = amdxdna_cmd_get_op(cmd_abo); 878 if (*cmd_op == ERT_INVALID_CMD) 879 *cmd_op = op; 880 else if (op != *cmd_op) 881 return -EINVAL; 882 883 switch (op) { 884 case ERT_START_CU: 885 ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size); 886 break; 887 case ERT_START_NPU: 888 ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size); 889 break; 890 case ERT_START_NPU_PREEMPT: 891 if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT)) 892 return -EOPNOTSUPP; 893 ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size); 894 break; 895 case ERT_START_NPU_PREEMPT_ELF: 896 if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT)) 897 return -EOPNOTSUPP; 898 ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size); 899 break; 900 default: 901 XDNA_INFO(xdna, "Unsupported op %d", op); 902 ret = -EOPNOTSUPP; 903 break; 904 } 905 906 return ret; 907 } 908 909 void aie2_msg_init(struct amdxdna_dev_hdl *ndev) 910 { 911 if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND)) 912 ndev->exec_msg_ops = &npu_exec_message_ops; 913 else 914 ndev->exec_msg_ops = &legacy_exec_message_ops; 915 } 916 917 static inline struct amdxdna_gem_obj * 918 aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) 919 { 920 int idx = get_job_idx(job->seq); 921 922 return job->hwctx->priv->cmd_buf[idx]; 923 } 924 925 int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 926 int (*notify_cb)(void *, void __iomem *, size_t)) 927 { 928 struct mailbox_channel *chann = hwctx->priv->mbox_chann; 929 struct amdxdna_dev *xdna = hwctx->client->xdna; 930 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 931 struct xdna_mailbox_msg msg; 932 union exec_req req; 933 int ret; 934 935 if (!chann) 936 return -ENODEV; 937 938 ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode); 939 if (ret) 940 return ret; 941 942 msg.handle = job; 943 msg.notify_cb = notify_cb; 944 msg.send_data = (u8 *)&req; 945 print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req, 946 0x40, false); 947 948 ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 949 if (ret) { 950 XDNA_ERR(xdna, "Send message failed"); 951 return ret; 952 } 953 954 return 0; 955 } 956 957 int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, 958 struct amdxdna_sched_job *job, 959 int (*notify_cb)(void *, void __iomem *, size_t)) 960 { 961 struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); 962 struct mailbox_channel *chann = hwctx->priv->mbox_chann; 963 struct amdxdna_client *client = hwctx->client; 964 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 965 struct amdxdna_dev *xdna = client->xdna; 966 struct amdxdna_cmd_chain *payload; 967 struct xdna_mailbox_msg msg; 968 union exec_chain_req req; 969 u32 payload_len; 970 u32 offset = 0; 971 size_t size; 972 int ret; 973 u32 op; 974 u32 i; 975 976 op = amdxdna_cmd_get_op(cmd_abo); 977 payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); 978 if (op != ERT_CMD_CHAIN || !payload || 979 payload_len < struct_size(payload, data, payload->command_count)) 980 return -EINVAL; 981 982 op = ERT_INVALID_CMD; 983 for (i = 0; i < payload->command_count; i++) { 984 u32 boh = (u32)(payload->data[i]); 985 struct amdxdna_gem_obj *abo; 986 987 abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD); 988 if (!abo) { 989 XDNA_ERR(xdna, "Failed to find cmd BO %d", boh); 990 return -ENOENT; 991 } 992 993 size = cmdbuf_abo->mem.size - offset; 994 ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset, 995 abo, &size, &op); 996 amdxdna_gem_put_obj(abo); 997 if (ret) 998 return ret; 999 1000 offset += size; 1001 } 1002 msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op); 1003 if (msg.opcode == MSG_OP_MAX_OPCODE) 1004 return -EOPNOTSUPP; 1005 1006 /* The offset is the accumulated total size of the cmd buffer */ 1007 EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr, 1008 offset, payload->command_count); 1009 drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset); 1010 1011 msg.handle = job; 1012 msg.notify_cb = notify_cb; 1013 msg.send_data = (u8 *)&req; 1014 msg.send_size = sizeof(req); 1015 ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 1016 if (ret) { 1017 XDNA_ERR(xdna, "Send message failed"); 1018 return ret; 1019 } 1020 1021 return 0; 1022 } 1023 1024 int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, 1025 struct amdxdna_sched_job *job, 1026 int (*notify_cb)(void *, void __iomem *, size_t)) 1027 { 1028 struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); 1029 struct mailbox_channel *chann = hwctx->priv->mbox_chann; 1030 struct amdxdna_dev *xdna = hwctx->client->xdna; 1031 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 1032 struct xdna_mailbox_msg msg; 1033 union exec_chain_req req; 1034 u32 op = ERT_INVALID_CMD; 1035 size_t size; 1036 int ret; 1037 1038 size = cmdbuf_abo->mem.size; 1039 ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op); 1040 if (ret) 1041 return ret; 1042 1043 msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op); 1044 if (msg.opcode == MSG_OP_MAX_OPCODE) 1045 return -EOPNOTSUPP; 1046 1047 EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr, 1048 size, 1); 1049 drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); 1050 1051 msg.handle = job; 1052 msg.notify_cb = notify_cb; 1053 msg.send_data = (u8 *)&req; 1054 msg.send_size = sizeof(req); 1055 ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 1056 if (ret) { 1057 XDNA_ERR(hwctx->client->xdna, "Send message failed"); 1058 return ret; 1059 } 1060 1061 return 0; 1062 } 1063 1064 int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 1065 int (*notify_cb)(void *, void __iomem *, size_t)) 1066 { 1067 struct mailbox_channel *chann = hwctx->priv->mbox_chann; 1068 struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]); 1069 struct amdxdna_dev *xdna = hwctx->client->xdna; 1070 struct xdna_mailbox_msg msg; 1071 struct sync_bo_req req; 1072 int ret = 0; 1073 1074 req.src_addr = 0; 1075 req.dst_addr = amdxdna_dev_bo_offset(abo); 1076 req.size = abo->mem.size; 1077 1078 /* Device to Host */ 1079 req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) | 1080 FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM); 1081 1082 XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed", 1083 req.size, req.src_addr, req.dst_addr); 1084 1085 msg.handle = job; 1086 msg.notify_cb = notify_cb; 1087 msg.send_data = (u8 *)&req; 1088 msg.send_size = sizeof(req); 1089 msg.opcode = MSG_OP_SYNC_BO; 1090 1091 ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 1092 if (ret) { 1093 XDNA_ERR(xdna, "Send message failed"); 1094 return ret; 1095 } 1096 1097 return 0; 1098 } 1099 1100 int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 1101 int (*notify_cb)(void *, void __iomem *, size_t)) 1102 { 1103 struct mailbox_channel *chann = hwctx->priv->mbox_chann; 1104 struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]); 1105 struct amdxdna_dev *xdna = hwctx->client->xdna; 1106 struct config_debug_bo_req req; 1107 struct xdna_mailbox_msg msg; 1108 1109 if (job->drv_cmd->opcode == ATTACH_DEBUG_BO) 1110 req.config = DEBUG_BO_REGISTER; 1111 else 1112 req.config = DEBUG_BO_UNREGISTER; 1113 1114 req.offset = amdxdna_dev_bo_offset(abo); 1115 req.size = abo->mem.size; 1116 1117 XDNA_DBG(xdna, "offset 0x%llx size 0x%llx config %d", 1118 req.offset, req.size, req.config); 1119 1120 msg.handle = job; 1121 msg.notify_cb = notify_cb; 1122 msg.send_data = (u8 *)&req; 1123 msg.send_size = sizeof(req); 1124 msg.opcode = MSG_OP_CONFIG_DEBUG_BO; 1125 1126 return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 1127 } 1128