1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2024, Advanced Micro Devices, Inc. 4 */ 5 6 #include <drm/amdxdna_accel.h> 7 #include <drm/drm_device.h> 8 #include <drm/drm_gem.h> 9 #include <drm/drm_gem_shmem_helper.h> 10 #include <drm/drm_print.h> 11 #include <drm/drm_syncobj.h> 12 #include <linux/hmm.h> 13 #include <linux/types.h> 14 #include <linux/xarray.h> 15 #include <trace/events/amdxdna.h> 16 17 #include "aie2_msg_priv.h" 18 #include "aie2_pci.h" 19 #include "aie2_solver.h" 20 #include "amdxdna_ctx.h" 21 #include "amdxdna_gem.h" 22 #include "amdxdna_mailbox.h" 23 #include "amdxdna_pci_drv.h" 24 #include "amdxdna_pm.h" 25 26 static bool force_cmdlist = true; 27 module_param(force_cmdlist, bool, 0600); 28 MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)"); 29 30 #define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */ 31 32 static void aie2_job_release(struct kref *ref) 33 { 34 struct amdxdna_sched_job *job; 35 36 job = container_of(ref, struct amdxdna_sched_job, refcnt); 37 amdxdna_sched_job_cleanup(job); 38 atomic64_inc(&job->hwctx->job_free_cnt); 39 wake_up(&job->hwctx->priv->job_free_wq); 40 if (job->out_fence) 41 dma_fence_put(job->out_fence); 42 kfree(job); 43 } 44 45 static void aie2_job_put(struct amdxdna_sched_job *job) 46 { 47 kref_put(&job->refcnt, aie2_job_release); 48 } 49 50 /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */ 51 static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, 52 struct drm_sched_job *bad_job) 53 { 54 drm_sched_stop(&hwctx->priv->sched, bad_job); 55 aie2_destroy_context(xdna->dev_handle, hwctx); 56 drm_sched_start(&hwctx->priv->sched, 0); 57 } 58 59 static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) 60 { 61 struct amdxdna_gem_obj *heap = hwctx->priv->heap; 62 int ret; 63 64 ret = aie2_create_context(xdna->dev_handle, hwctx); 65 if (ret) { 66 XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret); 67 goto out; 68 } 69 70 ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, 71 heap->mem.userptr, heap->mem.size); 72 if (ret) { 73 XDNA_ERR(xdna, "Map host buf failed, ret %d", ret); 74 goto out; 75 } 76 77 ret = aie2_config_cu(hwctx, NULL); 78 if (ret) { 79 XDNA_ERR(xdna, "Config cu failed, ret %d", ret); 80 goto out; 81 } 82 83 out: 84 XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); 85 return ret; 86 } 87 88 static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq) 89 { 90 struct dma_fence *fence, *out_fence = NULL; 91 int ret; 92 93 fence = drm_syncobj_fence_get(hwctx->priv->syncobj); 94 if (!fence) 95 return NULL; 96 97 ret = dma_fence_chain_find_seqno(&fence, seq); 98 if (ret) 99 goto out; 100 101 out_fence = dma_fence_get(dma_fence_chain_contained(fence)); 102 103 out: 104 dma_fence_put(fence); 105 return out_fence; 106 } 107 108 static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx) 109 { 110 struct dma_fence *fence; 111 112 fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1); 113 if (!fence) 114 return; 115 116 /* Wait up to 2 seconds for fw to finish all pending requests */ 117 dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000)); 118 dma_fence_put(fence); 119 } 120 121 static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg) 122 { 123 struct amdxdna_dev *xdna = hwctx->client->xdna; 124 125 aie2_hwctx_wait_for_idle(hwctx); 126 aie2_hwctx_stop(xdna, hwctx, NULL); 127 128 return 0; 129 } 130 131 void aie2_hwctx_suspend(struct amdxdna_client *client) 132 { 133 struct amdxdna_dev *xdna = client->xdna; 134 135 /* 136 * Command timeout is unlikely. But if it happens, it doesn't 137 * break the system. aie2_hwctx_stop() will destroy mailbox 138 * and abort all commands. 139 */ 140 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); 141 amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb); 142 } 143 144 static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg) 145 { 146 struct amdxdna_dev *xdna = hwctx->client->xdna; 147 148 return aie2_hwctx_restart(xdna, hwctx); 149 } 150 151 int aie2_hwctx_resume(struct amdxdna_client *client) 152 { 153 /* 154 * The resume path cannot guarantee that mailbox channel can be 155 * regenerated. If this happen, when submit message to this 156 * mailbox channel, error will return. 157 */ 158 return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb); 159 } 160 161 static void 162 aie2_sched_notify(struct amdxdna_sched_job *job) 163 { 164 struct dma_fence *fence = job->fence; 165 166 trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); 167 168 job->hwctx->priv->completed++; 169 dma_fence_signal(fence); 170 171 up(&job->hwctx->priv->job_sem); 172 job->job_done = true; 173 mmput_async(job->mm); 174 aie2_job_put(job); 175 } 176 177 static int 178 aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size) 179 { 180 struct amdxdna_sched_job *job = handle; 181 struct amdxdna_gem_obj *cmd_abo; 182 int ret = 0; 183 u32 status; 184 185 cmd_abo = job->cmd_bo; 186 187 if (unlikely(job->job_timeout)) { 188 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT); 189 ret = -EINVAL; 190 goto out; 191 } 192 193 if (unlikely(!data) || unlikely(size != sizeof(u32))) { 194 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT); 195 ret = -EINVAL; 196 goto out; 197 } 198 199 status = readl(data); 200 XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); 201 if (status == AIE2_STATUS_SUCCESS) 202 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); 203 else 204 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR); 205 206 out: 207 aie2_sched_notify(job); 208 return ret; 209 } 210 211 static int 212 aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size) 213 { 214 struct amdxdna_sched_job *job = handle; 215 int ret = 0; 216 217 if (unlikely(!data)) 218 goto out; 219 220 if (unlikely(size != sizeof(u32))) { 221 ret = -EINVAL; 222 goto out; 223 } 224 225 job->drv_cmd->result = readl(data); 226 227 out: 228 aie2_sched_notify(job); 229 return ret; 230 } 231 232 static int 233 aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) 234 { 235 struct amdxdna_sched_job *job = handle; 236 struct amdxdna_gem_obj *cmd_abo; 237 struct amdxdna_dev *xdna; 238 u32 fail_cmd_status; 239 u32 fail_cmd_idx; 240 u32 cmd_status; 241 int ret = 0; 242 243 cmd_abo = job->cmd_bo; 244 245 if (unlikely(job->job_timeout)) { 246 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT); 247 ret = -EINVAL; 248 goto out; 249 } 250 251 if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { 252 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT); 253 ret = -EINVAL; 254 goto out; 255 } 256 257 cmd_status = readl(data + offsetof(struct cmd_chain_resp, status)); 258 xdna = job->hwctx->client->xdna; 259 XDNA_DBG(xdna, "Status 0x%x", cmd_status); 260 if (cmd_status == AIE2_STATUS_SUCCESS) { 261 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); 262 goto out; 263 } 264 265 /* Slow path to handle error, read from ringbuf on BAR */ 266 fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx)); 267 fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status)); 268 XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x", 269 fail_cmd_idx, fail_cmd_status); 270 271 if (fail_cmd_status == AIE2_STATUS_SUCCESS) { 272 amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT); 273 ret = -EINVAL; 274 } else { 275 amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR); 276 } 277 278 out: 279 aie2_sched_notify(job); 280 return ret; 281 } 282 283 static struct dma_fence * 284 aie2_sched_job_run(struct drm_sched_job *sched_job) 285 { 286 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 287 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 288 struct amdxdna_hwctx *hwctx = job->hwctx; 289 struct dma_fence *fence; 290 int ret; 291 292 if (!hwctx->priv->mbox_chann) 293 return NULL; 294 295 if (!mmget_not_zero(job->mm)) 296 return ERR_PTR(-ESRCH); 297 298 kref_get(&job->refcnt); 299 fence = dma_fence_get(job->fence); 300 301 if (job->drv_cmd) { 302 switch (job->drv_cmd->opcode) { 303 case SYNC_DEBUG_BO: 304 ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler); 305 break; 306 case ATTACH_DEBUG_BO: 307 ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler); 308 break; 309 default: 310 ret = -EINVAL; 311 break; 312 } 313 goto out; 314 } 315 316 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW); 317 318 if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) 319 ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); 320 else if (force_cmdlist) 321 ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); 322 else 323 ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler); 324 325 out: 326 if (ret) { 327 dma_fence_put(job->fence); 328 aie2_job_put(job); 329 mmput(job->mm); 330 fence = ERR_PTR(ret); 331 } 332 trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq); 333 334 return fence; 335 } 336 337 static void aie2_sched_job_free(struct drm_sched_job *sched_job) 338 { 339 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 340 struct amdxdna_hwctx *hwctx = job->hwctx; 341 342 trace_xdna_job(sched_job, hwctx->name, "job free", job->seq); 343 if (!job->job_done) 344 up(&hwctx->priv->job_sem); 345 346 drm_sched_job_cleanup(sched_job); 347 aie2_job_put(job); 348 } 349 350 static enum drm_gpu_sched_stat 351 aie2_sched_job_timedout(struct drm_sched_job *sched_job) 352 { 353 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 354 struct amdxdna_hwctx *hwctx = job->hwctx; 355 struct amdxdna_dev *xdna; 356 357 xdna = hwctx->client->xdna; 358 trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq); 359 job->job_timeout = true; 360 mutex_lock(&xdna->dev_lock); 361 aie2_hwctx_stop(xdna, hwctx, sched_job); 362 363 aie2_hwctx_restart(xdna, hwctx); 364 mutex_unlock(&xdna->dev_lock); 365 366 return DRM_GPU_SCHED_STAT_RESET; 367 } 368 369 static const struct drm_sched_backend_ops sched_ops = { 370 .run_job = aie2_sched_job_run, 371 .free_job = aie2_sched_job_free, 372 .timedout_job = aie2_sched_job_timedout, 373 }; 374 375 static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) 376 { 377 struct amdxdna_dev *xdna = hwctx->client->xdna; 378 struct amdxdna_dev_hdl *ndev; 379 int start, end, first, last; 380 u32 width = 1, entries = 0; 381 int i; 382 383 if (!hwctx->num_tiles) { 384 XDNA_ERR(xdna, "Number of tiles is zero"); 385 return -EINVAL; 386 } 387 388 ndev = xdna->dev_handle; 389 if (unlikely(!ndev->metadata.core.row_count)) { 390 XDNA_WARN(xdna, "Core tile row count is zero"); 391 return -EINVAL; 392 } 393 394 hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count; 395 if (!hwctx->num_col || hwctx->num_col > ndev->total_col) { 396 XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col); 397 return -EINVAL; 398 } 399 400 if (ndev->priv->col_align == COL_ALIGN_NATURE) 401 width = hwctx->num_col; 402 403 /* 404 * In range [start, end], find out columns that is multiple of width. 405 * 'first' is the first column, 406 * 'last' is the last column, 407 * 'entries' is the total number of columns. 408 */ 409 start = xdna->dev_info->first_col; 410 end = ndev->total_col - hwctx->num_col; 411 if (start > 0 && end == 0) { 412 XDNA_DBG(xdna, "Force start from col 0"); 413 start = 0; 414 } 415 first = start + (width - start % width) % width; 416 last = end - end % width; 417 if (last >= first) 418 entries = (last - first) / width + 1; 419 XDNA_DBG(xdna, "start %d end %d first %d last %d", 420 start, end, first, last); 421 422 if (unlikely(!entries)) { 423 XDNA_ERR(xdna, "Start %d end %d width %d", 424 start, end, width); 425 return -EINVAL; 426 } 427 428 hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL); 429 if (!hwctx->col_list) 430 return -ENOMEM; 431 432 hwctx->col_list_len = entries; 433 hwctx->col_list[0] = first; 434 for (i = 1; i < entries; i++) 435 hwctx->col_list[i] = hwctx->col_list[i - 1] + width; 436 437 print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list, 438 entries * sizeof(*hwctx->col_list), false); 439 return 0; 440 } 441 442 static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx) 443 { 444 struct amdxdna_dev *xdna = hwctx->client->xdna; 445 struct alloc_requests *xrs_req; 446 int ret; 447 448 if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) { 449 hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col; 450 hwctx->num_col = xdna->dev_handle->total_col; 451 return aie2_create_context(xdna->dev_handle, hwctx); 452 } 453 454 xrs_req = kzalloc_obj(*xrs_req); 455 if (!xrs_req) 456 return -ENOMEM; 457 458 xrs_req->cdo.start_cols = hwctx->col_list; 459 xrs_req->cdo.cols_len = hwctx->col_list_len; 460 xrs_req->cdo.ncols = hwctx->num_col; 461 xrs_req->cdo.qos_cap.opc = hwctx->max_opc; 462 463 xrs_req->rqos.gops = hwctx->qos.gops; 464 xrs_req->rqos.fps = hwctx->qos.fps; 465 xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth; 466 xrs_req->rqos.latency = hwctx->qos.latency; 467 xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time; 468 xrs_req->rqos.priority = hwctx->qos.priority; 469 470 xrs_req->rid = (uintptr_t)hwctx; 471 472 ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx); 473 if (ret) 474 XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret); 475 476 kfree(xrs_req); 477 return ret; 478 } 479 480 static void aie2_release_resource(struct amdxdna_hwctx *hwctx) 481 { 482 struct amdxdna_dev *xdna = hwctx->client->xdna; 483 int ret; 484 485 if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) { 486 ret = aie2_destroy_context(xdna->dev_handle, hwctx); 487 if (ret && ret != -ENODEV) 488 XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret); 489 } else { 490 ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); 491 if (ret) 492 XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); 493 } 494 } 495 496 static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx) 497 { 498 struct amdxdna_dev *xdna = hwctx->client->xdna; 499 struct drm_file *filp = hwctx->client->filp; 500 struct drm_syncobj *syncobj; 501 u32 hdl; 502 int ret; 503 504 hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE; 505 506 ret = drm_syncobj_create(&syncobj, 0, NULL); 507 if (ret) { 508 XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret); 509 return ret; 510 } 511 ret = drm_syncobj_get_handle(filp, syncobj, &hdl); 512 if (ret) { 513 drm_syncobj_put(syncobj); 514 XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret); 515 return ret; 516 } 517 hwctx->priv->syncobj = syncobj; 518 hwctx->syncobj_hdl = hdl; 519 520 return 0; 521 } 522 523 static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx) 524 { 525 /* 526 * The syncobj_hdl is owned by user space and will be cleaned up 527 * separately. 528 */ 529 drm_syncobj_put(hwctx->priv->syncobj); 530 } 531 532 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) 533 { 534 struct amdxdna_client *client = hwctx->client; 535 struct amdxdna_dev *xdna = client->xdna; 536 const struct drm_sched_init_args args = { 537 .ops = &sched_ops, 538 .num_rqs = DRM_SCHED_PRIORITY_COUNT, 539 .credit_limit = HWCTX_MAX_CMDS, 540 .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT), 541 .name = "amdxdna_js", 542 .dev = xdna->ddev.dev, 543 }; 544 struct drm_gpu_scheduler *sched; 545 struct amdxdna_hwctx_priv *priv; 546 struct amdxdna_gem_obj *heap; 547 int i, ret; 548 549 priv = kzalloc_obj(*hwctx->priv); 550 if (!priv) 551 return -ENOMEM; 552 hwctx->priv = priv; 553 554 mutex_lock(&client->mm_lock); 555 heap = client->dev_heap; 556 if (!heap) { 557 XDNA_ERR(xdna, "The client dev heap object not exist"); 558 mutex_unlock(&client->mm_lock); 559 ret = -ENOENT; 560 goto free_priv; 561 } 562 drm_gem_object_get(to_gobj(heap)); 563 mutex_unlock(&client->mm_lock); 564 priv->heap = heap; 565 sema_init(&priv->job_sem, HWCTX_MAX_CMDS); 566 567 ret = amdxdna_gem_pin(heap); 568 if (ret) { 569 XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret); 570 goto put_heap; 571 } 572 573 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { 574 struct amdxdna_gem_obj *abo; 575 struct amdxdna_drm_create_bo args = { 576 .flags = 0, 577 .type = AMDXDNA_BO_DEV, 578 .vaddr = 0, 579 .size = MAX_CHAIN_CMDBUF_SIZE, 580 }; 581 582 abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp); 583 if (IS_ERR(abo)) { 584 ret = PTR_ERR(abo); 585 goto free_cmd_bufs; 586 } 587 588 XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx", 589 i, abo->mem.dev_addr, abo->mem.size); 590 priv->cmd_buf[i] = abo; 591 } 592 593 sched = &priv->sched; 594 mutex_init(&priv->io_lock); 595 596 fs_reclaim_acquire(GFP_KERNEL); 597 might_lock(&priv->io_lock); 598 fs_reclaim_release(GFP_KERNEL); 599 600 ret = drm_sched_init(sched, &args); 601 if (ret) { 602 XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); 603 goto free_cmd_bufs; 604 } 605 606 ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL, 607 &sched, 1, NULL); 608 if (ret) { 609 XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret); 610 goto free_sched; 611 } 612 613 ret = aie2_hwctx_col_list(hwctx); 614 if (ret) { 615 XDNA_ERR(xdna, "Create col list failed, ret %d", ret); 616 goto free_entity; 617 } 618 619 ret = amdxdna_pm_resume_get_locked(xdna); 620 if (ret) 621 goto free_col_list; 622 623 ret = aie2_alloc_resource(hwctx); 624 if (ret) { 625 XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret); 626 goto suspend_put; 627 } 628 629 ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, 630 heap->mem.userptr, heap->mem.size); 631 if (ret) { 632 XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); 633 goto release_resource; 634 } 635 636 ret = aie2_ctx_syncobj_create(hwctx); 637 if (ret) { 638 XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); 639 goto release_resource; 640 } 641 amdxdna_pm_suspend_put(xdna); 642 643 init_waitqueue_head(&priv->job_free_wq); 644 645 XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); 646 647 return 0; 648 649 release_resource: 650 aie2_release_resource(hwctx); 651 suspend_put: 652 amdxdna_pm_suspend_put(xdna); 653 free_col_list: 654 kfree(hwctx->col_list); 655 free_entity: 656 drm_sched_entity_destroy(&priv->entity); 657 free_sched: 658 drm_sched_fini(&priv->sched); 659 free_cmd_bufs: 660 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { 661 if (!priv->cmd_buf[i]) 662 continue; 663 drm_gem_object_put(to_gobj(priv->cmd_buf[i])); 664 } 665 amdxdna_gem_unpin(heap); 666 put_heap: 667 drm_gem_object_put(to_gobj(heap)); 668 free_priv: 669 kfree(priv); 670 return ret; 671 } 672 673 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) 674 { 675 struct amdxdna_dev *xdna; 676 int idx; 677 678 xdna = hwctx->client->xdna; 679 680 XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); 681 aie2_hwctx_wait_for_idle(hwctx); 682 683 /* Request fw to destroy hwctx and cancel the rest pending requests */ 684 drm_sched_stop(&hwctx->priv->sched, NULL); 685 aie2_release_resource(hwctx); 686 drm_sched_start(&hwctx->priv->sched, 0); 687 688 mutex_unlock(&xdna->dev_lock); 689 drm_sched_entity_destroy(&hwctx->priv->entity); 690 691 /* Wait for all submitted jobs to be completed or canceled */ 692 wait_event(hwctx->priv->job_free_wq, 693 atomic64_read(&hwctx->job_submit_cnt) == 694 atomic64_read(&hwctx->job_free_cnt)); 695 mutex_lock(&xdna->dev_lock); 696 697 drm_sched_fini(&hwctx->priv->sched); 698 aie2_ctx_syncobj_destroy(hwctx); 699 700 for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) 701 drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); 702 amdxdna_gem_unpin(hwctx->priv->heap); 703 drm_gem_object_put(to_gobj(hwctx->priv->heap)); 704 705 mutex_destroy(&hwctx->priv->io_lock); 706 kfree(hwctx->col_list); 707 kfree(hwctx->priv); 708 kfree(hwctx->cus); 709 } 710 711 static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size) 712 { 713 struct amdxdna_hwctx *hwctx = handle; 714 715 amdxdna_pm_suspend_put(hwctx->client->xdna); 716 return 0; 717 } 718 719 static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size) 720 { 721 struct amdxdna_hwctx_param_config_cu *config = buf; 722 struct amdxdna_dev *xdna = hwctx->client->xdna; 723 u32 total_size; 724 int ret; 725 726 XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name); 727 if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad))) 728 return -EINVAL; 729 730 if (hwctx->cus) { 731 XDNA_ERR(xdna, "Not support re-config CU"); 732 return -EINVAL; 733 } 734 735 if (!config->num_cus) { 736 XDNA_ERR(xdna, "Number of CU is zero"); 737 return -EINVAL; 738 } 739 740 total_size = struct_size(config, cu_configs, config->num_cus); 741 if (total_size > size) { 742 XDNA_ERR(xdna, "CU config larger than size"); 743 return -EINVAL; 744 } 745 746 hwctx->cus = kmemdup(config, total_size, GFP_KERNEL); 747 if (!hwctx->cus) 748 return -ENOMEM; 749 750 ret = amdxdna_pm_resume_get_locked(xdna); 751 if (ret) 752 goto free_cus; 753 754 ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler); 755 if (ret) { 756 XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret); 757 goto pm_suspend_put; 758 } 759 760 wmb(); /* To avoid locking in command submit when check status */ 761 762 return 0; 763 764 pm_suspend_put: 765 amdxdna_pm_suspend_put(xdna); 766 free_cus: 767 kfree(hwctx->cus); 768 hwctx->cus = NULL; 769 return ret; 770 } 771 772 static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq) 773 { 774 struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq); 775 776 if (!out_fence) { 777 XDNA_ERR(hwctx->client->xdna, "Failed to get fence"); 778 return; 779 } 780 781 dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT); 782 dma_fence_put(out_fence); 783 } 784 785 static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl, 786 bool attach) 787 { 788 struct amdxdna_client *client = hwctx->client; 789 struct amdxdna_dev *xdna = client->xdna; 790 struct amdxdna_drv_cmd cmd = { 0 }; 791 struct amdxdna_gem_obj *abo; 792 u64 seq; 793 int ret; 794 795 abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV); 796 if (!abo) { 797 XDNA_ERR(xdna, "Get bo %d failed", bo_hdl); 798 return -EINVAL; 799 } 800 801 if (attach) { 802 if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) { 803 ret = -EBUSY; 804 goto put_obj; 805 } 806 cmd.opcode = ATTACH_DEBUG_BO; 807 } else { 808 if (abo->assigned_hwctx != hwctx->id) { 809 ret = -EINVAL; 810 goto put_obj; 811 } 812 cmd.opcode = DETACH_DEBUG_BO; 813 } 814 815 ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE, 816 &bo_hdl, 1, hwctx->id, &seq); 817 if (ret) { 818 XDNA_ERR(xdna, "Submit command failed"); 819 goto put_obj; 820 } 821 822 aie2_cmd_wait(hwctx, seq); 823 if (cmd.result) { 824 XDNA_ERR(xdna, "Response failure 0x%x", cmd.result); 825 goto put_obj; 826 } 827 828 if (attach) 829 abo->assigned_hwctx = hwctx->id; 830 else 831 abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE; 832 833 XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name); 834 835 put_obj: 836 amdxdna_gem_put_obj(abo); 837 return ret; 838 } 839 840 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size) 841 { 842 struct amdxdna_dev *xdna = hwctx->client->xdna; 843 844 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); 845 switch (type) { 846 case DRM_AMDXDNA_HWCTX_CONFIG_CU: 847 return aie2_hwctx_cu_config(hwctx, buf, size); 848 case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF: 849 return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true); 850 case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF: 851 return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false); 852 default: 853 XDNA_DBG(xdna, "Not supported type %d", type); 854 return -EOPNOTSUPP; 855 } 856 } 857 858 int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl) 859 { 860 struct amdxdna_client *client = hwctx->client; 861 struct amdxdna_dev *xdna = client->xdna; 862 struct amdxdna_drv_cmd cmd = { 0 }; 863 u64 seq; 864 int ret; 865 866 cmd.opcode = SYNC_DEBUG_BO; 867 ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE, 868 &debug_bo_hdl, 1, hwctx->id, &seq); 869 if (ret) { 870 XDNA_ERR(xdna, "Submit command failed"); 871 return ret; 872 } 873 874 aie2_cmd_wait(hwctx, seq); 875 if (cmd.result) { 876 XDNA_ERR(xdna, "Response failure 0x%x", cmd.result); 877 return -EINVAL; 878 } 879 880 return 0; 881 } 882 883 static int aie2_populate_range(struct amdxdna_gem_obj *abo) 884 { 885 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); 886 struct amdxdna_umap *mapp; 887 unsigned long timeout; 888 struct mm_struct *mm; 889 bool found; 890 int ret; 891 892 timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 893 again: 894 found = false; 895 down_write(&xdna->notifier_lock); 896 list_for_each_entry(mapp, &abo->mem.umap_list, node) { 897 if (mapp->invalid) { 898 found = true; 899 break; 900 } 901 } 902 903 if (!found) { 904 abo->mem.map_invalid = false; 905 up_write(&xdna->notifier_lock); 906 return 0; 907 } 908 kref_get(&mapp->refcnt); 909 up_write(&xdna->notifier_lock); 910 911 XDNA_DBG(xdna, "populate memory range %lx %lx", 912 mapp->vma->vm_start, mapp->vma->vm_end); 913 mm = mapp->notifier.mm; 914 if (!mmget_not_zero(mm)) { 915 amdxdna_umap_put(mapp); 916 return -EFAULT; 917 } 918 919 mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier); 920 mmap_read_lock(mm); 921 ret = hmm_range_fault(&mapp->range); 922 mmap_read_unlock(mm); 923 if (ret) { 924 if (time_after(jiffies, timeout)) { 925 ret = -ETIME; 926 goto put_mm; 927 } 928 929 if (ret == -EBUSY) { 930 amdxdna_umap_put(mapp); 931 goto again; 932 } 933 934 goto put_mm; 935 } 936 937 down_write(&xdna->notifier_lock); 938 if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) { 939 up_write(&xdna->notifier_lock); 940 amdxdna_umap_put(mapp); 941 goto again; 942 } 943 mapp->invalid = false; 944 up_write(&xdna->notifier_lock); 945 amdxdna_umap_put(mapp); 946 goto again; 947 948 put_mm: 949 amdxdna_umap_put(mapp); 950 mmput(mm); 951 return ret; 952 } 953 954 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq) 955 { 956 struct amdxdna_dev *xdna = hwctx->client->xdna; 957 struct ww_acquire_ctx acquire_ctx; 958 struct dma_fence_chain *chain; 959 struct amdxdna_gem_obj *abo; 960 unsigned long timeout = 0; 961 int ret, i; 962 963 ret = down_interruptible(&hwctx->priv->job_sem); 964 if (ret) { 965 XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret); 966 return ret; 967 } 968 969 chain = dma_fence_chain_alloc(); 970 if (!chain) { 971 XDNA_ERR(xdna, "Alloc fence chain failed"); 972 ret = -ENOMEM; 973 goto up_sem; 974 } 975 976 ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx, 977 hwctx->client->filp->client_id); 978 if (ret) { 979 XDNA_ERR(xdna, "DRM job init failed, ret %d", ret); 980 goto free_chain; 981 } 982 983 retry: 984 ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 985 if (ret) { 986 XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); 987 goto cleanup_job; 988 } 989 990 for (i = 0; i < job->bo_cnt; i++) { 991 ret = dma_resv_reserve_fences(job->bos[i]->resv, 1); 992 if (ret) { 993 XDNA_WARN(xdna, "Failed to reserve fences %d", ret); 994 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 995 goto cleanup_job; 996 } 997 } 998 999 down_read(&xdna->notifier_lock); 1000 for (i = 0; i < job->bo_cnt; i++) { 1001 abo = to_xdna_obj(job->bos[i]); 1002 if (abo->mem.map_invalid) { 1003 up_read(&xdna->notifier_lock); 1004 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 1005 if (!timeout) { 1006 timeout = jiffies + 1007 msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1008 } else if (time_after(jiffies, timeout)) { 1009 ret = -ETIME; 1010 goto cleanup_job; 1011 } 1012 1013 ret = aie2_populate_range(abo); 1014 if (ret) 1015 goto cleanup_job; 1016 goto retry; 1017 } 1018 } 1019 1020 mutex_lock(&hwctx->priv->io_lock); 1021 drm_sched_job_arm(&job->base); 1022 job->out_fence = dma_fence_get(&job->base.s_fence->finished); 1023 for (i = 0; i < job->bo_cnt; i++) 1024 dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE); 1025 job->seq = hwctx->priv->seq++; 1026 kref_get(&job->refcnt); 1027 drm_sched_entity_push_job(&job->base); 1028 1029 *seq = job->seq; 1030 drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq); 1031 mutex_unlock(&hwctx->priv->io_lock); 1032 1033 up_read(&xdna->notifier_lock); 1034 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 1035 1036 aie2_job_put(job); 1037 atomic64_inc(&hwctx->job_submit_cnt); 1038 1039 return 0; 1040 1041 cleanup_job: 1042 drm_sched_job_cleanup(&job->base); 1043 free_chain: 1044 dma_fence_chain_free(chain); 1045 up_sem: 1046 up(&hwctx->priv->job_sem); 1047 job->job_done = true; 1048 return ret; 1049 } 1050 1051 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, 1052 unsigned long cur_seq) 1053 { 1054 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); 1055 struct drm_gem_object *gobj = to_gobj(abo); 1056 long ret; 1057 1058 ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, 1059 true, MAX_SCHEDULE_TIMEOUT); 1060 if (!ret) 1061 XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); 1062 else if (ret == -ERESTARTSYS) 1063 XDNA_DBG(xdna, "Wait for bo interrupted by signal"); 1064 } 1065