1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2024, Advanced Micro Devices, Inc. 4 */ 5 6 #include <drm/amdxdna_accel.h> 7 #include <drm/drm_device.h> 8 #include <drm/drm_gem.h> 9 #include <drm/drm_gem_shmem_helper.h> 10 #include <drm/drm_print.h> 11 #include <drm/drm_syncobj.h> 12 #include <linux/hmm.h> 13 #include <linux/types.h> 14 #include <linux/xarray.h> 15 #include <trace/events/amdxdna.h> 16 17 #include "aie2_msg_priv.h" 18 #include "aie2_pci.h" 19 #include "aie2_solver.h" 20 #include "amdxdna_ctx.h" 21 #include "amdxdna_gem.h" 22 #include "amdxdna_mailbox.h" 23 #include "amdxdna_pci_drv.h" 24 #include "amdxdna_pm.h" 25 26 static bool force_cmdlist = true; 27 module_param(force_cmdlist, bool, 0600); 28 MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)"); 29 30 #define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */ 31 32 struct aie2_ctx_health { 33 struct amdxdna_ctx_health header; 34 u32 txn_op_idx; 35 u32 ctx_pc; 36 u32 fatal_error_type; 37 u32 fatal_error_exception_type; 38 u32 fatal_error_exception_pc; 39 u32 fatal_error_app_module; 40 }; 41 42 static void aie2_job_release(struct kref *ref) 43 { 44 struct amdxdna_sched_job *job; 45 46 job = container_of(ref, struct amdxdna_sched_job, refcnt); 47 amdxdna_sched_job_cleanup(job); 48 atomic64_inc(&job->hwctx->job_free_cnt); 49 wake_up(&job->hwctx->priv->job_free_wq); 50 if (job->out_fence) 51 dma_fence_put(job->out_fence); 52 kfree(job->aie2_job_health); 53 kfree(job); 54 } 55 56 static void aie2_job_put(struct amdxdna_sched_job *job) 57 { 58 kref_put(&job->refcnt, aie2_job_release); 59 } 60 61 /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */ 62 static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, 63 struct drm_sched_job *bad_job) 64 { 65 drm_sched_stop(&hwctx->priv->sched, bad_job); 66 aie2_destroy_context(xdna->dev_handle, hwctx); 67 drm_sched_start(&hwctx->priv->sched, 0); 68 } 69 70 static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) 71 { 72 struct amdxdna_gem_obj *heap = hwctx->priv->heap; 73 int ret; 74 75 ret = aie2_create_context(xdna->dev_handle, hwctx); 76 if (ret) { 77 XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret); 78 goto out; 79 } 80 81 ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, 82 amdxdna_obj_dma_addr(heap), 83 heap->mem.size); 84 if (ret) { 85 XDNA_ERR(xdna, "Map host buf failed, ret %d", ret); 86 goto out; 87 } 88 89 ret = aie2_config_cu(hwctx, NULL); 90 if (ret) { 91 XDNA_ERR(xdna, "Config cu failed, ret %d", ret); 92 goto out; 93 } 94 95 out: 96 XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); 97 return ret; 98 } 99 100 static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq) 101 { 102 struct dma_fence *fence, *out_fence = NULL; 103 int ret; 104 105 fence = drm_syncobj_fence_get(hwctx->priv->syncobj); 106 if (!fence) 107 return NULL; 108 109 ret = dma_fence_chain_find_seqno(&fence, seq); 110 if (ret) 111 goto out; 112 113 out_fence = dma_fence_get(dma_fence_chain_contained(fence)); 114 115 out: 116 dma_fence_put(fence); 117 return out_fence; 118 } 119 120 static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx) 121 { 122 struct dma_fence *fence; 123 124 fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1); 125 if (!fence) 126 return; 127 128 /* Wait up to 2 seconds for fw to finish all pending requests */ 129 dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000)); 130 dma_fence_put(fence); 131 } 132 133 static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg) 134 { 135 struct amdxdna_dev *xdna = hwctx->client->xdna; 136 137 aie2_hwctx_wait_for_idle(hwctx); 138 aie2_hwctx_stop(xdna, hwctx, NULL); 139 140 return 0; 141 } 142 143 void aie2_hwctx_suspend(struct amdxdna_client *client) 144 { 145 struct amdxdna_dev *xdna = client->xdna; 146 147 /* 148 * Command timeout is unlikely. But if it happens, it doesn't 149 * break the system. aie2_hwctx_stop() will destroy mailbox 150 * and abort all commands. 151 */ 152 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); 153 amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb); 154 } 155 156 static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg) 157 { 158 struct amdxdna_dev *xdna = hwctx->client->xdna; 159 160 return aie2_hwctx_restart(xdna, hwctx); 161 } 162 163 int aie2_hwctx_resume(struct amdxdna_client *client) 164 { 165 /* 166 * The resume path cannot guarantee that mailbox channel can be 167 * regenerated. If this happen, when submit message to this 168 * mailbox channel, error will return. 169 */ 170 return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb); 171 } 172 173 static void 174 aie2_sched_notify(struct amdxdna_sched_job *job) 175 { 176 struct dma_fence *fence = job->fence; 177 178 trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); 179 180 job->hwctx->priv->completed++; 181 dma_fence_signal(fence); 182 183 up(&job->hwctx->priv->job_sem); 184 job->job_done = true; 185 mmput_async(job->mm); 186 aie2_job_put(job); 187 } 188 189 static void aie2_set_cmd_timeout(struct amdxdna_sched_job *job) 190 { 191 struct aie2_ctx_health *aie2_health __free(kfree) = NULL; 192 struct amdxdna_dev *xdna = job->hwctx->client->xdna; 193 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 194 struct app_health_report *report = job->aie2_job_health; 195 u32 fail_cmd_idx = 0; 196 197 if (!report) 198 goto set_timeout; 199 200 XDNA_ERR(xdna, "Firmware timeout state capture:"); 201 XDNA_ERR(xdna, "\tVersion: %d.%d", report->major, report->minor); 202 XDNA_ERR(xdna, "\tReport size: 0x%x", report->size); 203 XDNA_ERR(xdna, "\tContext ID: %d", report->context_id); 204 XDNA_ERR(xdna, "\tDPU PC: 0x%x", report->dpu_pc); 205 XDNA_ERR(xdna, "\tTXN OP ID: 0x%x", report->txn_op_id); 206 XDNA_ERR(xdna, "\tContext PC: 0x%x", report->ctx_pc); 207 XDNA_ERR(xdna, "\tFatal error type: 0x%x", report->fatal_info.fatal_type); 208 XDNA_ERR(xdna, "\tFatal error exception type: 0x%x", report->fatal_info.exception_type); 209 XDNA_ERR(xdna, "\tFatal error exception PC: 0x%x", report->fatal_info.exception_pc); 210 XDNA_ERR(xdna, "\tFatal error app module: 0x%x", report->fatal_info.app_module); 211 XDNA_ERR(xdna, "\tFatal error task ID: %d", report->fatal_info.task_index); 212 XDNA_ERR(xdna, "\tTimed out sub command ID: %d", report->run_list_id); 213 214 fail_cmd_idx = report->run_list_id; 215 aie2_health = kzalloc_obj(*aie2_health); 216 if (!aie2_health) 217 goto set_timeout; 218 219 aie2_health->header.version = AMDXDNA_CMD_CTX_HEALTH_V1; 220 aie2_health->header.npu_gen = AMDXDNA_CMD_CTX_HEALTH_AIE2; 221 aie2_health->txn_op_idx = report->txn_op_id; 222 aie2_health->ctx_pc = report->ctx_pc; 223 aie2_health->fatal_error_type = report->fatal_info.fatal_type; 224 aie2_health->fatal_error_exception_type = report->fatal_info.exception_type; 225 aie2_health->fatal_error_exception_pc = report->fatal_info.exception_pc; 226 aie2_health->fatal_error_app_module = report->fatal_info.app_module; 227 228 set_timeout: 229 amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_TIMEOUT, 230 aie2_health, sizeof(*aie2_health)); 231 } 232 233 static int 234 aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size) 235 { 236 struct amdxdna_sched_job *job = handle; 237 struct amdxdna_gem_obj *cmd_abo; 238 int ret = 0; 239 u32 status; 240 241 cmd_abo = job->cmd_bo; 242 243 if (unlikely(job->job_timeout)) { 244 aie2_set_cmd_timeout(job); 245 ret = -EINVAL; 246 goto out; 247 } 248 249 if (unlikely(!data) || unlikely(size != sizeof(u32))) { 250 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT, NULL, 0); 251 ret = -EINVAL; 252 goto out; 253 } 254 255 status = readl(data); 256 XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); 257 if (status == AIE2_STATUS_SUCCESS) 258 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); 259 else 260 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR, NULL, 0); 261 262 out: 263 aie2_sched_notify(job); 264 return ret; 265 } 266 267 static int 268 aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size) 269 { 270 struct amdxdna_sched_job *job = handle; 271 int ret = 0; 272 273 if (unlikely(!data)) 274 goto out; 275 276 if (unlikely(size != sizeof(u32))) { 277 ret = -EINVAL; 278 goto out; 279 } 280 281 job->drv_cmd->result = readl(data); 282 283 out: 284 aie2_sched_notify(job); 285 return ret; 286 } 287 288 static int 289 aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) 290 { 291 struct amdxdna_sched_job *job = handle; 292 struct amdxdna_gem_obj *cmd_abo; 293 struct amdxdna_dev *xdna; 294 u32 fail_cmd_idx = 0; 295 u32 fail_cmd_status; 296 u32 cmd_status; 297 int ret = 0; 298 299 cmd_abo = job->cmd_bo; 300 301 if (unlikely(job->job_timeout)) { 302 aie2_set_cmd_timeout(job); 303 ret = -EINVAL; 304 goto out; 305 } 306 307 if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { 308 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT, NULL, 0); 309 ret = -EINVAL; 310 goto out; 311 } 312 313 cmd_status = readl(data + offsetof(struct cmd_chain_resp, status)); 314 xdna = job->hwctx->client->xdna; 315 XDNA_DBG(xdna, "Status 0x%x", cmd_status); 316 if (cmd_status == AIE2_STATUS_SUCCESS) { 317 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); 318 goto out; 319 } 320 321 /* Slow path to handle error, read from ringbuf on BAR */ 322 fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx)); 323 fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status)); 324 XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x", 325 fail_cmd_idx, fail_cmd_status); 326 327 if (fail_cmd_status == AIE2_STATUS_SUCCESS) { 328 amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT, NULL, 0); 329 ret = -EINVAL; 330 } else { 331 amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR, NULL, 0); 332 } 333 334 out: 335 aie2_sched_notify(job); 336 return ret; 337 } 338 339 static struct dma_fence * 340 aie2_sched_job_run(struct drm_sched_job *sched_job) 341 { 342 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 343 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 344 struct amdxdna_hwctx *hwctx = job->hwctx; 345 struct dma_fence *fence; 346 int ret; 347 348 if (!hwctx->priv->mbox_chann) 349 return NULL; 350 351 if (!mmget_not_zero(job->mm)) 352 return ERR_PTR(-ESRCH); 353 354 kref_get(&job->refcnt); 355 fence = dma_fence_get(job->fence); 356 357 if (job->drv_cmd) { 358 switch (job->drv_cmd->opcode) { 359 case SYNC_DEBUG_BO: 360 ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler); 361 break; 362 case ATTACH_DEBUG_BO: 363 ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler); 364 break; 365 default: 366 ret = -EINVAL; 367 break; 368 } 369 goto out; 370 } 371 372 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW); 373 374 if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) 375 ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); 376 else if (force_cmdlist) 377 ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); 378 else 379 ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler); 380 381 out: 382 if (ret) { 383 dma_fence_put(job->fence); 384 aie2_job_put(job); 385 mmput(job->mm); 386 fence = ERR_PTR(ret); 387 } 388 trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq); 389 390 return fence; 391 } 392 393 static void aie2_sched_job_free(struct drm_sched_job *sched_job) 394 { 395 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 396 struct amdxdna_hwctx *hwctx = job->hwctx; 397 398 trace_xdna_job(sched_job, hwctx->name, "job free", job->seq); 399 if (!job->job_done) 400 up(&hwctx->priv->job_sem); 401 402 drm_sched_job_cleanup(sched_job); 403 aie2_job_put(job); 404 } 405 406 static enum drm_gpu_sched_stat 407 aie2_sched_job_timedout(struct drm_sched_job *sched_job) 408 { 409 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 410 struct amdxdna_hwctx *hwctx = job->hwctx; 411 struct app_health_report *report; 412 struct amdxdna_dev *xdna; 413 int ret; 414 415 xdna = hwctx->client->xdna; 416 trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq); 417 job->job_timeout = true; 418 419 mutex_lock(&xdna->dev_lock); 420 report = kzalloc_obj(*report); 421 if (!report) 422 goto reset_hwctx; 423 424 ret = aie2_query_app_health(xdna->dev_handle, hwctx->fw_ctx_id, report); 425 if (ret) 426 kfree(report); 427 else 428 job->aie2_job_health = report; 429 430 reset_hwctx: 431 aie2_hwctx_stop(xdna, hwctx, sched_job); 432 433 aie2_hwctx_restart(xdna, hwctx); 434 mutex_unlock(&xdna->dev_lock); 435 436 return DRM_GPU_SCHED_STAT_RESET; 437 } 438 439 static const struct drm_sched_backend_ops sched_ops = { 440 .run_job = aie2_sched_job_run, 441 .free_job = aie2_sched_job_free, 442 .timedout_job = aie2_sched_job_timedout, 443 }; 444 445 static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) 446 { 447 struct amdxdna_dev *xdna = hwctx->client->xdna; 448 struct amdxdna_dev_hdl *ndev; 449 int start, end, first, last; 450 u32 width = 1, entries = 0; 451 int i; 452 453 if (!hwctx->num_tiles) { 454 XDNA_ERR(xdna, "Number of tiles is zero"); 455 return -EINVAL; 456 } 457 458 ndev = xdna->dev_handle; 459 if (unlikely(!ndev->metadata.core.row_count)) { 460 XDNA_WARN(xdna, "Core tile row count is zero"); 461 return -EINVAL; 462 } 463 464 hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count; 465 if (!hwctx->num_col || hwctx->num_col > ndev->total_col) { 466 XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col); 467 return -EINVAL; 468 } 469 470 if (ndev->priv->col_align == COL_ALIGN_NATURE) 471 width = hwctx->num_col; 472 473 /* 474 * In range [start, end], find out columns that is multiple of width. 475 * 'first' is the first column, 476 * 'last' is the last column, 477 * 'entries' is the total number of columns. 478 */ 479 start = xdna->dev_info->first_col; 480 end = ndev->total_col - hwctx->num_col; 481 if (start > 0 && end == 0) { 482 XDNA_DBG(xdna, "Force start from col 0"); 483 start = 0; 484 } 485 first = start + (width - start % width) % width; 486 last = end - end % width; 487 if (last >= first) 488 entries = (last - first) / width + 1; 489 XDNA_DBG(xdna, "start %d end %d first %d last %d", 490 start, end, first, last); 491 492 if (unlikely(!entries)) { 493 XDNA_ERR(xdna, "Start %d end %d width %d", 494 start, end, width); 495 return -EINVAL; 496 } 497 498 hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL); 499 if (!hwctx->col_list) 500 return -ENOMEM; 501 502 hwctx->col_list_len = entries; 503 hwctx->col_list[0] = first; 504 for (i = 1; i < entries; i++) 505 hwctx->col_list[i] = hwctx->col_list[i - 1] + width; 506 507 print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list, 508 entries * sizeof(*hwctx->col_list), false); 509 return 0; 510 } 511 512 static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx) 513 { 514 struct amdxdna_dev *xdna = hwctx->client->xdna; 515 struct alloc_requests *xrs_req; 516 int ret; 517 518 if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) { 519 hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col; 520 hwctx->num_col = xdna->dev_handle->total_col; 521 return aie2_create_context(xdna->dev_handle, hwctx); 522 } 523 524 xrs_req = kzalloc_obj(*xrs_req); 525 if (!xrs_req) 526 return -ENOMEM; 527 528 xrs_req->cdo.start_cols = hwctx->col_list; 529 xrs_req->cdo.cols_len = hwctx->col_list_len; 530 xrs_req->cdo.ncols = hwctx->num_col; 531 xrs_req->cdo.qos_cap.opc = hwctx->max_opc; 532 533 xrs_req->rqos.gops = hwctx->qos.gops; 534 xrs_req->rqos.fps = hwctx->qos.fps; 535 xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth; 536 xrs_req->rqos.latency = hwctx->qos.latency; 537 xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time; 538 xrs_req->rqos.priority = hwctx->qos.priority; 539 540 xrs_req->rid = (uintptr_t)hwctx; 541 542 ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx); 543 if (ret) 544 XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret); 545 546 kfree(xrs_req); 547 return ret; 548 } 549 550 static void aie2_release_resource(struct amdxdna_hwctx *hwctx) 551 { 552 struct amdxdna_dev *xdna = hwctx->client->xdna; 553 int ret; 554 555 if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) { 556 ret = aie2_destroy_context(xdna->dev_handle, hwctx); 557 if (ret && ret != -ENODEV) 558 XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret); 559 } else { 560 ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); 561 if (ret) 562 XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); 563 } 564 } 565 566 static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx) 567 { 568 struct amdxdna_dev *xdna = hwctx->client->xdna; 569 struct drm_file *filp = hwctx->client->filp; 570 struct drm_syncobj *syncobj; 571 u32 hdl; 572 int ret; 573 574 hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE; 575 576 ret = drm_syncobj_create(&syncobj, 0, NULL); 577 if (ret) { 578 XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret); 579 return ret; 580 } 581 ret = drm_syncobj_get_handle(filp, syncobj, &hdl); 582 if (ret) { 583 drm_syncobj_put(syncobj); 584 XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret); 585 return ret; 586 } 587 hwctx->priv->syncobj = syncobj; 588 hwctx->syncobj_hdl = hdl; 589 590 return 0; 591 } 592 593 static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx) 594 { 595 /* 596 * The syncobj_hdl is owned by user space and will be cleaned up 597 * separately. 598 */ 599 drm_syncobj_put(hwctx->priv->syncobj); 600 } 601 602 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) 603 { 604 struct amdxdna_client *client = hwctx->client; 605 struct amdxdna_dev *xdna = client->xdna; 606 const struct drm_sched_init_args args = { 607 .ops = &sched_ops, 608 .num_rqs = DRM_SCHED_PRIORITY_COUNT, 609 .credit_limit = HWCTX_MAX_CMDS, 610 .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT), 611 .name = "amdxdna_js", 612 .dev = xdna->ddev.dev, 613 }; 614 struct drm_gpu_scheduler *sched; 615 struct amdxdna_hwctx_priv *priv; 616 struct amdxdna_gem_obj *heap; 617 int i, ret; 618 619 priv = kzalloc_obj(*hwctx->priv); 620 if (!priv) 621 return -ENOMEM; 622 hwctx->priv = priv; 623 624 mutex_lock(&client->mm_lock); 625 heap = client->dev_heap; 626 if (!heap) { 627 XDNA_ERR(xdna, "The client dev heap object not exist"); 628 mutex_unlock(&client->mm_lock); 629 ret = -ENOENT; 630 goto free_priv; 631 } 632 drm_gem_object_get(to_gobj(heap)); 633 mutex_unlock(&client->mm_lock); 634 priv->heap = heap; 635 sema_init(&priv->job_sem, HWCTX_MAX_CMDS); 636 637 ret = amdxdna_gem_pin(heap); 638 if (ret) { 639 XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret); 640 goto put_heap; 641 } 642 643 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { 644 struct amdxdna_gem_obj *abo; 645 struct amdxdna_drm_create_bo args = { 646 .flags = 0, 647 .type = AMDXDNA_BO_DEV, 648 .vaddr = 0, 649 .size = MAX_CHAIN_CMDBUF_SIZE, 650 }; 651 652 abo = amdxdna_drm_create_dev_bo(&xdna->ddev, &args, client->filp); 653 if (IS_ERR(abo)) { 654 ret = PTR_ERR(abo); 655 goto free_cmd_bufs; 656 } 657 658 XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx", 659 i, amdxdna_gem_dev_addr(abo), abo->mem.size); 660 priv->cmd_buf[i] = abo; 661 } 662 663 sched = &priv->sched; 664 mutex_init(&priv->io_lock); 665 666 fs_reclaim_acquire(GFP_KERNEL); 667 might_lock(&priv->io_lock); 668 fs_reclaim_release(GFP_KERNEL); 669 670 ret = drm_sched_init(sched, &args); 671 if (ret) { 672 XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); 673 goto free_cmd_bufs; 674 } 675 676 ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL, 677 &sched, 1, NULL); 678 if (ret) { 679 XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret); 680 goto free_sched; 681 } 682 683 ret = aie2_hwctx_col_list(hwctx); 684 if (ret) { 685 XDNA_ERR(xdna, "Create col list failed, ret %d", ret); 686 goto free_entity; 687 } 688 689 ret = amdxdna_pm_resume_get_locked(xdna); 690 if (ret) 691 goto free_col_list; 692 693 ret = aie2_alloc_resource(hwctx); 694 if (ret) { 695 XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret); 696 goto suspend_put; 697 } 698 699 ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, 700 amdxdna_obj_dma_addr(heap), 701 heap->mem.size); 702 if (ret) { 703 XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); 704 goto release_resource; 705 } 706 707 ret = aie2_ctx_syncobj_create(hwctx); 708 if (ret) { 709 XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); 710 goto release_resource; 711 } 712 amdxdna_pm_suspend_put(xdna); 713 714 init_waitqueue_head(&priv->job_free_wq); 715 716 XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); 717 718 return 0; 719 720 release_resource: 721 aie2_release_resource(hwctx); 722 suspend_put: 723 amdxdna_pm_suspend_put(xdna); 724 free_col_list: 725 kfree(hwctx->col_list); 726 free_entity: 727 drm_sched_entity_destroy(&priv->entity); 728 free_sched: 729 drm_sched_fini(&priv->sched); 730 free_cmd_bufs: 731 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { 732 if (!priv->cmd_buf[i]) 733 continue; 734 drm_gem_object_put(to_gobj(priv->cmd_buf[i])); 735 } 736 amdxdna_gem_unpin(heap); 737 put_heap: 738 drm_gem_object_put(to_gobj(heap)); 739 free_priv: 740 kfree(priv); 741 return ret; 742 } 743 744 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) 745 { 746 struct amdxdna_dev *xdna; 747 int idx; 748 749 xdna = hwctx->client->xdna; 750 751 XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); 752 aie2_hwctx_wait_for_idle(hwctx); 753 754 /* Request fw to destroy hwctx and cancel the rest pending requests */ 755 drm_sched_stop(&hwctx->priv->sched, NULL); 756 aie2_release_resource(hwctx); 757 drm_sched_start(&hwctx->priv->sched, 0); 758 759 mutex_unlock(&xdna->dev_lock); 760 drm_sched_entity_destroy(&hwctx->priv->entity); 761 762 /* Wait for all submitted jobs to be completed or canceled */ 763 wait_event(hwctx->priv->job_free_wq, 764 atomic64_read(&hwctx->job_submit_cnt) == 765 atomic64_read(&hwctx->job_free_cnt)); 766 mutex_lock(&xdna->dev_lock); 767 768 drm_sched_fini(&hwctx->priv->sched); 769 aie2_ctx_syncobj_destroy(hwctx); 770 771 for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) 772 drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); 773 amdxdna_gem_unpin(hwctx->priv->heap); 774 drm_gem_object_put(to_gobj(hwctx->priv->heap)); 775 776 mutex_destroy(&hwctx->priv->io_lock); 777 kfree(hwctx->col_list); 778 kfree(hwctx->priv); 779 kfree(hwctx->cus); 780 } 781 782 static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size) 783 { 784 struct amdxdna_hwctx *hwctx = handle; 785 786 amdxdna_pm_suspend_put(hwctx->client->xdna); 787 return 0; 788 } 789 790 static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size) 791 { 792 struct amdxdna_hwctx_param_config_cu *config = buf; 793 struct amdxdna_dev *xdna = hwctx->client->xdna; 794 u32 total_size; 795 int ret; 796 797 XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name); 798 if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad))) 799 return -EINVAL; 800 801 if (hwctx->cus) { 802 XDNA_ERR(xdna, "Not support re-config CU"); 803 return -EINVAL; 804 } 805 806 if (!config->num_cus) { 807 XDNA_ERR(xdna, "Number of CU is zero"); 808 return -EINVAL; 809 } 810 811 total_size = struct_size(config, cu_configs, config->num_cus); 812 if (total_size > size) { 813 XDNA_ERR(xdna, "CU config larger than size"); 814 return -EINVAL; 815 } 816 817 hwctx->cus = kmemdup(config, total_size, GFP_KERNEL); 818 if (!hwctx->cus) 819 return -ENOMEM; 820 821 ret = amdxdna_pm_resume_get_locked(xdna); 822 if (ret) 823 goto free_cus; 824 825 ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler); 826 if (ret) { 827 XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret); 828 goto pm_suspend_put; 829 } 830 831 wmb(); /* To avoid locking in command submit when check status */ 832 833 return 0; 834 835 pm_suspend_put: 836 amdxdna_pm_suspend_put(xdna); 837 free_cus: 838 kfree(hwctx->cus); 839 hwctx->cus = NULL; 840 return ret; 841 } 842 843 static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq) 844 { 845 struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq); 846 847 if (!out_fence) { 848 XDNA_ERR(hwctx->client->xdna, "Failed to get fence"); 849 return; 850 } 851 852 dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT); 853 dma_fence_put(out_fence); 854 } 855 856 static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl, 857 bool attach) 858 { 859 struct amdxdna_client *client = hwctx->client; 860 struct amdxdna_dev *xdna = client->xdna; 861 struct amdxdna_drv_cmd cmd = { 0 }; 862 struct amdxdna_gem_obj *abo; 863 u64 seq; 864 int ret; 865 866 abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV); 867 if (!abo) { 868 XDNA_ERR(xdna, "Get bo %d failed", bo_hdl); 869 return -EINVAL; 870 } 871 872 if (attach) { 873 if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) { 874 ret = -EBUSY; 875 goto put_obj; 876 } 877 cmd.opcode = ATTACH_DEBUG_BO; 878 } else { 879 if (abo->assigned_hwctx != hwctx->id) { 880 ret = -EINVAL; 881 goto put_obj; 882 } 883 cmd.opcode = DETACH_DEBUG_BO; 884 } 885 886 ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE, 887 &bo_hdl, 1, hwctx->id, &seq); 888 if (ret) { 889 XDNA_ERR(xdna, "Submit command failed"); 890 goto put_obj; 891 } 892 893 aie2_cmd_wait(hwctx, seq); 894 if (cmd.result) { 895 XDNA_ERR(xdna, "Response failure 0x%x", cmd.result); 896 goto put_obj; 897 } 898 899 if (attach) 900 abo->assigned_hwctx = hwctx->id; 901 else 902 abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE; 903 904 XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name); 905 906 put_obj: 907 amdxdna_gem_put_obj(abo); 908 return ret; 909 } 910 911 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size) 912 { 913 struct amdxdna_dev *xdna = hwctx->client->xdna; 914 915 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); 916 switch (type) { 917 case DRM_AMDXDNA_HWCTX_CONFIG_CU: 918 return aie2_hwctx_cu_config(hwctx, buf, size); 919 case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF: 920 return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true); 921 case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF: 922 return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false); 923 default: 924 XDNA_DBG(xdna, "Not supported type %d", type); 925 return -EOPNOTSUPP; 926 } 927 } 928 929 int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl) 930 { 931 struct amdxdna_client *client = hwctx->client; 932 struct amdxdna_dev *xdna = client->xdna; 933 struct amdxdna_drv_cmd cmd = { 0 }; 934 u64 seq; 935 int ret; 936 937 cmd.opcode = SYNC_DEBUG_BO; 938 ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE, 939 &debug_bo_hdl, 1, hwctx->id, &seq); 940 if (ret) { 941 XDNA_ERR(xdna, "Submit command failed"); 942 return ret; 943 } 944 945 aie2_cmd_wait(hwctx, seq); 946 if (cmd.result) { 947 XDNA_ERR(xdna, "Response failure 0x%x", cmd.result); 948 return -EINVAL; 949 } 950 951 return 0; 952 } 953 954 static int aie2_populate_range(struct amdxdna_gem_obj *abo) 955 { 956 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); 957 struct amdxdna_umap *mapp; 958 unsigned long timeout; 959 struct mm_struct *mm; 960 bool found; 961 int ret; 962 963 timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 964 again: 965 found = false; 966 down_write(&xdna->notifier_lock); 967 list_for_each_entry(mapp, &abo->mem.umap_list, node) { 968 if (mapp->invalid) { 969 found = true; 970 break; 971 } 972 } 973 974 if (!found) { 975 abo->mem.map_invalid = false; 976 up_write(&xdna->notifier_lock); 977 return 0; 978 } 979 kref_get(&mapp->refcnt); 980 up_write(&xdna->notifier_lock); 981 982 XDNA_DBG(xdna, "populate memory range %lx %lx", 983 mapp->vma->vm_start, mapp->vma->vm_end); 984 mm = mapp->notifier.mm; 985 if (!mmget_not_zero(mm)) { 986 amdxdna_umap_put(mapp); 987 return -EFAULT; 988 } 989 990 mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier); 991 mmap_read_lock(mm); 992 ret = hmm_range_fault(&mapp->range); 993 mmap_read_unlock(mm); 994 if (ret) { 995 if (time_after(jiffies, timeout)) { 996 ret = -ETIME; 997 goto put_mm; 998 } 999 1000 if (ret == -EBUSY) { 1001 amdxdna_umap_put(mapp); 1002 goto again; 1003 } 1004 1005 goto put_mm; 1006 } 1007 1008 down_write(&xdna->notifier_lock); 1009 if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) { 1010 up_write(&xdna->notifier_lock); 1011 amdxdna_umap_put(mapp); 1012 goto again; 1013 } 1014 mapp->invalid = false; 1015 up_write(&xdna->notifier_lock); 1016 amdxdna_umap_put(mapp); 1017 goto again; 1018 1019 put_mm: 1020 amdxdna_umap_put(mapp); 1021 mmput(mm); 1022 return ret; 1023 } 1024 1025 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq) 1026 { 1027 struct amdxdna_dev *xdna = hwctx->client->xdna; 1028 struct ww_acquire_ctx acquire_ctx; 1029 struct dma_fence_chain *chain; 1030 struct amdxdna_gem_obj *abo; 1031 unsigned long timeout = 0; 1032 int ret, i; 1033 1034 ret = down_interruptible(&hwctx->priv->job_sem); 1035 if (ret) { 1036 XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret); 1037 return ret; 1038 } 1039 1040 chain = dma_fence_chain_alloc(); 1041 if (!chain) { 1042 XDNA_ERR(xdna, "Alloc fence chain failed"); 1043 ret = -ENOMEM; 1044 goto up_sem; 1045 } 1046 1047 ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx, 1048 hwctx->client->filp->client_id); 1049 if (ret) { 1050 XDNA_ERR(xdna, "DRM job init failed, ret %d", ret); 1051 goto free_chain; 1052 } 1053 1054 retry: 1055 ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 1056 if (ret) { 1057 XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); 1058 goto cleanup_job; 1059 } 1060 1061 for (i = 0; i < job->bo_cnt; i++) { 1062 ret = dma_resv_reserve_fences(job->bos[i]->resv, 1); 1063 if (ret) { 1064 XDNA_WARN(xdna, "Failed to reserve fences %d", ret); 1065 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 1066 goto cleanup_job; 1067 } 1068 } 1069 1070 down_read(&xdna->notifier_lock); 1071 for (i = 0; i < job->bo_cnt; i++) { 1072 abo = to_xdna_obj(job->bos[i]); 1073 if (abo->mem.map_invalid) { 1074 up_read(&xdna->notifier_lock); 1075 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 1076 if (!timeout) { 1077 timeout = jiffies + 1078 msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1079 } else if (time_after(jiffies, timeout)) { 1080 ret = -ETIME; 1081 goto cleanup_job; 1082 } 1083 1084 ret = aie2_populate_range(abo); 1085 if (ret) 1086 goto cleanup_job; 1087 goto retry; 1088 } 1089 } 1090 1091 mutex_lock(&hwctx->priv->io_lock); 1092 drm_sched_job_arm(&job->base); 1093 job->out_fence = dma_fence_get(&job->base.s_fence->finished); 1094 for (i = 0; i < job->bo_cnt; i++) 1095 dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE); 1096 job->seq = hwctx->priv->seq++; 1097 kref_get(&job->refcnt); 1098 drm_sched_entity_push_job(&job->base); 1099 1100 *seq = job->seq; 1101 drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq); 1102 mutex_unlock(&hwctx->priv->io_lock); 1103 1104 up_read(&xdna->notifier_lock); 1105 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 1106 1107 aie2_job_put(job); 1108 atomic64_inc(&hwctx->job_submit_cnt); 1109 1110 return 0; 1111 1112 cleanup_job: 1113 drm_sched_job_cleanup(&job->base); 1114 free_chain: 1115 dma_fence_chain_free(chain); 1116 up_sem: 1117 up(&hwctx->priv->job_sem); 1118 job->job_done = true; 1119 return ret; 1120 } 1121 1122 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, 1123 unsigned long cur_seq) 1124 { 1125 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); 1126 struct drm_gem_object *gobj = to_gobj(abo); 1127 long ret; 1128 1129 ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, 1130 true, MAX_SCHEDULE_TIMEOUT); 1131 if (!ret) 1132 XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); 1133 else if (ret == -ERESTARTSYS) 1134 XDNA_DBG(xdna, "Wait for bo interrupted by signal"); 1135 } 1136