1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2024, Advanced Micro Devices, Inc. 4 */ 5 6 #include <drm/amdxdna_accel.h> 7 #include <drm/drm_device.h> 8 #include <drm/drm_gem.h> 9 #include <drm/drm_gem_shmem_helper.h> 10 #include <drm/drm_print.h> 11 #include <drm/drm_syncobj.h> 12 #include <linux/hmm.h> 13 #include <linux/types.h> 14 #include <linux/xarray.h> 15 #include <trace/events/amdxdna.h> 16 17 #include "aie2_msg_priv.h" 18 #include "aie2_pci.h" 19 #include "aie2_solver.h" 20 #include "amdxdna_ctx.h" 21 #include "amdxdna_gem.h" 22 #include "amdxdna_mailbox.h" 23 #include "amdxdna_pci_drv.h" 24 #include "amdxdna_pm.h" 25 26 static bool force_cmdlist = true; 27 module_param(force_cmdlist, bool, 0600); 28 MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)"); 29 30 uint tdr_timeout_ms = 2000; 31 module_param(tdr_timeout_ms, int, 0400); 32 MODULE_PARM_DESC(tdr_timeout_ms, "TDR (Timeout Detection and Recovery) timeout in milliseconds (0 = disable)"); 33 34 struct aie2_ctx_health { 35 struct amdxdna_ctx_health header; 36 u32 txn_op_idx; 37 u32 ctx_pc; 38 u32 fatal_error_type; 39 u32 fatal_error_exception_type; 40 u32 fatal_error_exception_pc; 41 u32 fatal_error_app_module; 42 }; 43 44 static inline void aie2_tdr_signal(struct amdxdna_dev *xdna) 45 { 46 WRITE_ONCE(xdna->dev_handle->tdr_status, AIE2_TDR_SIGNALED); 47 } 48 49 static bool aie2_tdr_detect(struct amdxdna_dev *xdna) 50 { 51 struct amdxdna_dev_hdl *ndev = xdna->dev_handle; 52 53 if (READ_ONCE(ndev->tdr_status) == AIE2_TDR_WAIT) { 54 XDNA_ERR(xdna, "TDR timeout detected"); 55 return true; 56 } 57 58 WRITE_ONCE(ndev->tdr_status, AIE2_TDR_WAIT); 59 return false; 60 } 61 62 static void aie2_job_release(struct kref *ref) 63 { 64 struct amdxdna_sched_job *job; 65 66 job = container_of(ref, struct amdxdna_sched_job, refcnt); 67 68 amdxdna_sched_job_cleanup(job); 69 atomic64_inc(&job->hwctx->job_free_cnt); 70 wake_up(&job->hwctx->priv->job_free_wq); 71 if (job->out_fence) 72 dma_fence_put(job->out_fence); 73 kfree(job->aie2_job_health); 74 kfree(job); 75 } 76 77 static void aie2_job_put(struct amdxdna_sched_job *job) 78 { 79 kref_put(&job->refcnt, aie2_job_release); 80 } 81 82 /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */ 83 static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, 84 struct drm_sched_job *bad_job) 85 { 86 drm_sched_stop(&hwctx->priv->sched, bad_job); 87 aie2_destroy_context(xdna->dev_handle, hwctx); 88 drm_sched_start(&hwctx->priv->sched, 0); 89 } 90 91 static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) 92 { 93 struct amdxdna_gem_obj *heap = hwctx->priv->heap; 94 unsigned long heap_id; 95 int ret; 96 97 ret = aie2_create_context(xdna->dev_handle, hwctx); 98 if (ret) { 99 XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret); 100 goto out; 101 } 102 103 ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, 104 amdxdna_obj_dma_addr(heap), 105 heap->mem.size); 106 if (ret) { 107 XDNA_ERR(xdna, "Map host buf failed, ret %d", ret); 108 goto out; 109 } 110 111 xa_for_each_range(&hwctx->client->dev_heap_xa, heap_id, heap, 1, 112 hwctx->last_attached_heap) { 113 ret = aie2_add_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, 114 amdxdna_obj_dma_addr(heap), 115 heap->mem.size); 116 if (ret) { 117 XDNA_ERR(xdna, "Add heap %ld failed ret %d", heap_id, ret); 118 goto out; 119 } 120 } 121 122 ret = aie2_config_cu(hwctx, NULL); 123 if (ret) { 124 XDNA_ERR(xdna, "Config cu failed, ret %d", ret); 125 goto out; 126 } 127 128 out: 129 XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); 130 return ret; 131 } 132 133 static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq) 134 { 135 struct dma_fence *fence, *out_fence = NULL; 136 int ret; 137 138 fence = drm_syncobj_fence_get(hwctx->priv->syncobj); 139 if (!fence) 140 return NULL; 141 142 ret = dma_fence_chain_find_seqno(&fence, seq); 143 if (ret) 144 goto out; 145 146 out_fence = dma_fence_get(dma_fence_chain_contained(fence)); 147 148 out: 149 dma_fence_put(fence); 150 return out_fence; 151 } 152 153 static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx) 154 { 155 struct dma_fence *fence; 156 157 fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1); 158 if (!fence) 159 return; 160 161 /* Wait up to 2 seconds for fw to finish all pending requests */ 162 dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000)); 163 dma_fence_put(fence); 164 } 165 166 static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg) 167 { 168 struct amdxdna_dev *xdna = hwctx->client->xdna; 169 170 aie2_hwctx_wait_for_idle(hwctx); 171 aie2_hwctx_stop(xdna, hwctx, NULL); 172 173 return 0; 174 } 175 176 void aie2_hwctx_suspend(struct amdxdna_client *client) 177 { 178 struct amdxdna_dev *xdna = client->xdna; 179 180 /* 181 * Command timeout is unlikely. But if it happens, it doesn't 182 * break the system. aie2_hwctx_stop() will destroy mailbox 183 * and abort all commands. 184 */ 185 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); 186 amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb); 187 } 188 189 static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg) 190 { 191 struct amdxdna_dev *xdna = hwctx->client->xdna; 192 193 return aie2_hwctx_restart(xdna, hwctx); 194 } 195 196 int aie2_hwctx_resume(struct amdxdna_client *client) 197 { 198 /* 199 * The resume path cannot guarantee that mailbox channel can be 200 * regenerated. If this happen, when submit message to this 201 * mailbox channel, error will return. 202 */ 203 return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb); 204 } 205 206 static void 207 aie2_sched_notify(struct amdxdna_sched_job *job) 208 { 209 struct dma_fence *fence = job->fence; 210 211 trace_xdna_job(&job->base, job->hwctx->name, "signaling fence", 212 job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO); 213 214 aie2_tdr_signal(job->hwctx->client->xdna); 215 job->hwctx->priv->completed++; 216 dma_fence_signal(fence); 217 218 up(&job->hwctx->priv->job_sem); 219 job->job_done = true; 220 mmput_async(job->mm); 221 aie2_job_put(job); 222 } 223 224 static void aie2_set_cmd_timeout(struct amdxdna_sched_job *job) 225 { 226 struct aie2_ctx_health *aie2_health __free(kfree) = NULL; 227 struct amdxdna_dev *xdna = job->hwctx->client->xdna; 228 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 229 struct app_health_report *report = job->aie2_job_health; 230 u32 fail_cmd_idx = 0; 231 232 if (!report) 233 goto set_timeout; 234 235 XDNA_ERR(xdna, "Firmware timeout state capture:"); 236 XDNA_ERR(xdna, "\tVersion: %d.%d", report->major, report->minor); 237 XDNA_ERR(xdna, "\tReport size: 0x%x", report->size); 238 XDNA_ERR(xdna, "\tContext ID: %d", report->context_id); 239 XDNA_ERR(xdna, "\tDPU PC: 0x%x", report->dpu_pc); 240 XDNA_ERR(xdna, "\tTXN OP ID: 0x%x", report->txn_op_id); 241 XDNA_ERR(xdna, "\tContext PC: 0x%x", report->ctx_pc); 242 XDNA_ERR(xdna, "\tFatal error type: 0x%x", report->fatal_info.fatal_type); 243 XDNA_ERR(xdna, "\tFatal error exception type: 0x%x", report->fatal_info.exception_type); 244 XDNA_ERR(xdna, "\tFatal error exception PC: 0x%x", report->fatal_info.exception_pc); 245 XDNA_ERR(xdna, "\tFatal error app module: 0x%x", report->fatal_info.app_module); 246 XDNA_ERR(xdna, "\tFatal error task ID: %d", report->fatal_info.task_index); 247 XDNA_ERR(xdna, "\tTimed out sub command ID: %d", report->run_list_id); 248 249 fail_cmd_idx = report->run_list_id; 250 aie2_health = kzalloc_obj(*aie2_health); 251 if (!aie2_health) 252 goto set_timeout; 253 254 aie2_health->header.version = AMDXDNA_CMD_CTX_HEALTH_V1; 255 aie2_health->header.npu_gen = AMDXDNA_CMD_CTX_HEALTH_AIE2; 256 aie2_health->txn_op_idx = report->txn_op_id; 257 aie2_health->ctx_pc = report->ctx_pc; 258 aie2_health->fatal_error_type = report->fatal_info.fatal_type; 259 aie2_health->fatal_error_exception_type = report->fatal_info.exception_type; 260 aie2_health->fatal_error_exception_pc = report->fatal_info.exception_pc; 261 aie2_health->fatal_error_app_module = report->fatal_info.app_module; 262 263 set_timeout: 264 amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_TIMEOUT, 265 aie2_health, sizeof(*aie2_health)); 266 } 267 268 static int 269 aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size) 270 { 271 struct amdxdna_sched_job *job = handle; 272 struct amdxdna_gem_obj *cmd_abo; 273 int ret = 0; 274 u32 status; 275 276 cmd_abo = job->cmd_bo; 277 278 if (unlikely(job->job_timeout)) { 279 aie2_set_cmd_timeout(job); 280 ret = -EINVAL; 281 goto out; 282 } 283 284 if (unlikely(!data) || unlikely(size != sizeof(u32))) { 285 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT, NULL, 0); 286 ret = -EINVAL; 287 goto out; 288 } 289 290 status = readl(data); 291 XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); 292 if (status == AIE2_STATUS_SUCCESS) 293 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); 294 else 295 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR, NULL, 0); 296 297 out: 298 aie2_sched_notify(job); 299 return ret; 300 } 301 302 static int 303 aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size) 304 { 305 struct amdxdna_sched_job *job = handle; 306 int ret = 0; 307 308 if (unlikely(!data || size != sizeof(u32))) { 309 job->drv_cmd->result = U32_MAX; 310 ret = -EINVAL; 311 } else { 312 job->drv_cmd->result = readl(data); 313 } 314 315 aie2_sched_notify(job); 316 return ret; 317 } 318 319 static int 320 aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) 321 { 322 struct amdxdna_sched_job *job = handle; 323 struct amdxdna_gem_obj *cmd_abo; 324 struct amdxdna_dev *xdna; 325 u32 fail_cmd_idx = 0; 326 u32 fail_cmd_status; 327 u32 cmd_status; 328 int ret = 0; 329 330 cmd_abo = job->cmd_bo; 331 332 if (unlikely(job->job_timeout)) { 333 aie2_set_cmd_timeout(job); 334 ret = -EINVAL; 335 goto out; 336 } 337 338 if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { 339 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT, NULL, 0); 340 ret = -EINVAL; 341 goto out; 342 } 343 344 cmd_status = readl(data + offsetof(struct cmd_chain_resp, status)); 345 xdna = job->hwctx->client->xdna; 346 XDNA_DBG(xdna, "Status 0x%x", cmd_status); 347 if (cmd_status == AIE2_STATUS_SUCCESS) { 348 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); 349 goto out; 350 } 351 352 /* Slow path to handle error, read from ringbuf on BAR */ 353 fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx)); 354 fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status)); 355 XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x", 356 fail_cmd_idx, fail_cmd_status); 357 358 if (fail_cmd_status == AIE2_STATUS_SUCCESS) { 359 amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT, NULL, 0); 360 ret = -EINVAL; 361 } else { 362 amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR, NULL, 0); 363 } 364 365 out: 366 aie2_sched_notify(job); 367 return ret; 368 } 369 370 static struct dma_fence * 371 aie2_sched_job_run(struct drm_sched_job *sched_job) 372 { 373 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 374 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 375 struct amdxdna_hwctx *hwctx = job->hwctx; 376 struct dma_fence *fence; 377 int ret; 378 379 trace_xdna_job(sched_job, hwctx->name, "job run", 380 job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO); 381 382 if (!hwctx->priv->mbox_chann) 383 return NULL; 384 385 if (!mmget_not_zero(job->mm)) 386 return ERR_PTR(-ESRCH); 387 388 kref_get(&job->refcnt); 389 fence = dma_fence_get(job->fence); 390 391 if (job->drv_cmd) { 392 switch (job->drv_cmd->opcode) { 393 case SYNC_DEBUG_BO: 394 ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler); 395 break; 396 case ATTACH_DEBUG_BO: 397 case DETACH_DEBUG_BO: 398 ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler); 399 break; 400 default: 401 ret = -EINVAL; 402 break; 403 } 404 goto out; 405 } 406 407 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW); 408 409 if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) 410 ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); 411 else if (force_cmdlist) 412 ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); 413 else 414 ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler); 415 416 out: 417 if (ret) { 418 dma_fence_put(job->fence); 419 aie2_job_put(job); 420 mmput(job->mm); 421 fence = ERR_PTR(ret); 422 } else { 423 aie2_tdr_signal(hwctx->client->xdna); 424 } 425 trace_xdna_job(sched_job, hwctx->name, "sent to device", 426 job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO); 427 428 return fence; 429 } 430 431 static void aie2_sched_job_free(struct drm_sched_job *sched_job) 432 { 433 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 434 struct amdxdna_hwctx *hwctx = job->hwctx; 435 436 /* job->drv_cmd could be freed, so use DEFAULT_IO */ 437 trace_xdna_job(sched_job, hwctx->name, "job free", 438 job->seq, DEFAULT_IO); 439 if (!job->job_done) 440 up(&hwctx->priv->job_sem); 441 442 drm_sched_job_cleanup(sched_job); 443 aie2_job_put(job); 444 } 445 446 static enum drm_gpu_sched_stat 447 aie2_sched_job_timedout(struct drm_sched_job *sched_job) 448 { 449 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 450 struct amdxdna_hwctx *hwctx = job->hwctx; 451 struct app_health_report *report; 452 struct amdxdna_dev *xdna; 453 int ret; 454 455 xdna = hwctx->client->xdna; 456 457 guard(mutex)(&xdna->dev_lock); 458 459 if (!aie2_tdr_detect(xdna)) 460 return DRM_GPU_SCHED_STAT_NO_HANG; 461 462 report = kzalloc_obj(*report); 463 if (!report) 464 goto reset_hwctx; 465 466 ret = aie2_query_app_health(xdna->dev_handle, hwctx->fw_ctx_id, report); 467 if (ret) 468 kfree(report); 469 else 470 job->aie2_job_health = report; 471 472 reset_hwctx: 473 job->job_timeout = true; 474 aie2_hwctx_stop(xdna, hwctx, sched_job); 475 476 aie2_hwctx_restart(xdna, hwctx); 477 478 return DRM_GPU_SCHED_STAT_RESET; 479 } 480 481 static const struct drm_sched_backend_ops sched_ops = { 482 .run_job = aie2_sched_job_run, 483 .free_job = aie2_sched_job_free, 484 .timedout_job = aie2_sched_job_timedout, 485 }; 486 487 static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) 488 { 489 struct amdxdna_dev *xdna = hwctx->client->xdna; 490 struct amdxdna_dev_hdl *ndev; 491 int start, end, first, last; 492 u32 width = 1, entries = 0; 493 int i; 494 495 if (!hwctx->num_tiles) { 496 XDNA_ERR(xdna, "Number of tiles is zero"); 497 return -EINVAL; 498 } 499 500 ndev = xdna->dev_handle; 501 if (unlikely(!ndev->aie.metadata.core.row_count)) { 502 XDNA_WARN(xdna, "Core tile row count is zero"); 503 return -EINVAL; 504 } 505 506 hwctx->num_col = hwctx->num_tiles / ndev->aie.metadata.core.row_count; 507 if (!hwctx->num_col || hwctx->num_col > ndev->total_col) { 508 XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col); 509 return -EINVAL; 510 } 511 512 if (ndev->priv->col_align == COL_ALIGN_NATURE) 513 width = hwctx->num_col; 514 515 /* 516 * In range [start, end], find out columns that is multiple of width. 517 * 'first' is the first column, 518 * 'last' is the last column, 519 * 'entries' is the total number of columns. 520 */ 521 start = xdna->dev_info->first_col; 522 end = ndev->total_col - hwctx->num_col; 523 if (start > 0 && end == 0) { 524 XDNA_DBG(xdna, "Force start from col 0"); 525 start = 0; 526 } 527 first = start + (width - start % width) % width; 528 last = end - end % width; 529 if (last >= first) 530 entries = (last - first) / width + 1; 531 XDNA_DBG(xdna, "start %d end %d first %d last %d", 532 start, end, first, last); 533 534 if (unlikely(!entries)) { 535 XDNA_ERR(xdna, "Start %d end %d width %d", 536 start, end, width); 537 return -EINVAL; 538 } 539 540 hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL); 541 if (!hwctx->col_list) 542 return -ENOMEM; 543 544 hwctx->col_list_len = entries; 545 hwctx->col_list[0] = first; 546 for (i = 1; i < entries; i++) 547 hwctx->col_list[i] = hwctx->col_list[i - 1] + width; 548 549 print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list, 550 entries * sizeof(*hwctx->col_list), false); 551 return 0; 552 } 553 554 static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx) 555 { 556 struct amdxdna_dev *xdna = hwctx->client->xdna; 557 struct alloc_requests *xrs_req; 558 u32 temporal_only_col = 0; 559 int ret; 560 561 xrs_req = kzalloc_obj(*xrs_req); 562 if (!xrs_req) 563 return -ENOMEM; 564 565 if (AIE_FEATURE_ON(&xdna->dev_handle->aie, AIE2_TEMPORAL_ONLY)) { 566 xrs_req->cdo.start_cols = &temporal_only_col; 567 xrs_req->cdo.cols_len = 1; 568 xrs_req->cdo.ncols = xdna->dev_handle->total_col; 569 } else { 570 xrs_req->cdo.start_cols = hwctx->col_list; 571 xrs_req->cdo.cols_len = hwctx->col_list_len; 572 xrs_req->cdo.ncols = hwctx->num_col; 573 } 574 /* Use platform opc */ 575 xrs_req->cdo.qos_cap.opc = xdna->dev_handle->priv->col_opc * hwctx->num_col; 576 577 xrs_req->rqos.gops = hwctx->qos.gops; 578 xrs_req->rqos.fps = hwctx->qos.fps; 579 xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth; 580 xrs_req->rqos.latency = hwctx->qos.latency; 581 xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time; 582 xrs_req->rqos.priority = hwctx->qos.priority; 583 584 xrs_req->rid = (uintptr_t)hwctx; 585 586 ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx); 587 if (ret) 588 XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret); 589 590 kfree(xrs_req); 591 return ret; 592 } 593 594 static void aie2_release_resource(struct amdxdna_hwctx *hwctx) 595 { 596 struct amdxdna_dev *xdna = hwctx->client->xdna; 597 int ret; 598 599 ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); 600 if (ret) 601 XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); 602 } 603 604 static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx) 605 { 606 struct amdxdna_dev *xdna = hwctx->client->xdna; 607 struct drm_file *filp = hwctx->client->filp; 608 struct drm_syncobj *syncobj; 609 u32 hdl; 610 int ret; 611 612 hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE; 613 614 ret = drm_syncobj_create(&syncobj, 0, NULL); 615 if (ret) { 616 XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret); 617 return ret; 618 } 619 ret = drm_syncobj_get_handle(filp, syncobj, &hdl); 620 if (ret) { 621 drm_syncobj_put(syncobj); 622 XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret); 623 return ret; 624 } 625 hwctx->priv->syncobj = syncobj; 626 hwctx->syncobj_hdl = hdl; 627 628 return 0; 629 } 630 631 static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx) 632 { 633 /* 634 * The syncobj_hdl is owned by user space and will be cleaned up 635 * separately. 636 */ 637 drm_syncobj_put(hwctx->priv->syncobj); 638 } 639 640 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) 641 { 642 struct amdxdna_client *client = hwctx->client; 643 struct amdxdna_dev *xdna = client->xdna; 644 const struct drm_sched_init_args args = { 645 .ops = &sched_ops, 646 .credit_limit = HWCTX_MAX_CMDS, 647 .timeout = msecs_to_jiffies(tdr_timeout_ms), 648 .name = "amdxdna_js", 649 .dev = xdna->ddev.dev, 650 }; 651 struct drm_gpu_scheduler *sched; 652 struct amdxdna_hwctx_priv *priv; 653 struct amdxdna_gem_obj *heap; 654 int i, ret; 655 656 priv = kzalloc_obj(*hwctx->priv); 657 if (!priv) 658 return -ENOMEM; 659 hwctx->priv = priv; 660 661 mutex_lock(&client->mm_lock); 662 heap = xa_load(&client->dev_heap_xa, 0); 663 if (!heap) { 664 XDNA_ERR(xdna, "The client dev heap object not exist"); 665 mutex_unlock(&client->mm_lock); 666 ret = -ENOENT; 667 goto free_priv; 668 } 669 drm_gem_object_get(to_gobj(heap)); 670 mutex_unlock(&client->mm_lock); 671 priv->heap = heap; 672 sema_init(&priv->job_sem, HWCTX_MAX_CMDS); 673 674 ret = amdxdna_gem_pin(heap); 675 if (ret) { 676 XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret); 677 goto put_heap; 678 } 679 680 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { 681 struct amdxdna_gem_obj *abo; 682 struct amdxdna_drm_create_bo args = { 683 .flags = 0, 684 .type = AMDXDNA_BO_DEV, 685 .vaddr = 0, 686 .size = MAX_CHAIN_CMDBUF_SIZE, 687 }; 688 689 abo = amdxdna_drm_create_dev_bo(&xdna->ddev, &args, client->filp); 690 if (IS_ERR(abo)) { 691 ret = PTR_ERR(abo); 692 goto free_cmd_bufs; 693 } 694 695 XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx", 696 i, amdxdna_gem_dev_addr(abo), abo->mem.size); 697 priv->cmd_buf[i] = abo; 698 } 699 700 sched = &priv->sched; 701 mutex_init(&priv->io_lock); 702 703 fs_reclaim_acquire(GFP_KERNEL); 704 might_lock(&priv->io_lock); 705 fs_reclaim_release(GFP_KERNEL); 706 707 ret = drm_sched_init(sched, &args); 708 if (ret) { 709 XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); 710 goto free_cmd_bufs; 711 } 712 713 ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL, 714 &sched, 1, NULL); 715 if (ret) { 716 XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret); 717 goto free_sched; 718 } 719 720 ret = aie2_hwctx_col_list(hwctx); 721 if (ret) { 722 XDNA_ERR(xdna, "Create col list failed, ret %d", ret); 723 goto free_entity; 724 } 725 726 ret = amdxdna_pm_resume_get_locked(xdna); 727 if (ret) 728 goto free_col_list; 729 730 ret = aie2_alloc_resource(hwctx); 731 if (ret) { 732 XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret); 733 goto suspend_put; 734 } 735 736 ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, 737 amdxdna_obj_dma_addr(heap), 738 heap->mem.size); 739 if (ret) { 740 XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); 741 goto release_resource; 742 } 743 744 ret = amdxdna_update_heap(client, hwctx); 745 if (ret) { 746 XDNA_ERR(xdna, "Update heap failed, ret %d", ret); 747 goto release_resource; 748 } 749 750 ret = aie2_ctx_syncobj_create(hwctx); 751 if (ret) { 752 XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); 753 goto release_resource; 754 } 755 amdxdna_pm_suspend_put(xdna); 756 757 init_waitqueue_head(&priv->job_free_wq); 758 759 XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); 760 761 return 0; 762 763 release_resource: 764 aie2_release_resource(hwctx); 765 suspend_put: 766 amdxdna_pm_suspend_put(xdna); 767 free_col_list: 768 kfree(hwctx->col_list); 769 free_entity: 770 drm_sched_entity_destroy(&priv->entity); 771 free_sched: 772 drm_sched_fini(&priv->sched); 773 free_cmd_bufs: 774 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { 775 if (!priv->cmd_buf[i]) 776 continue; 777 drm_gem_object_put(to_gobj(priv->cmd_buf[i])); 778 } 779 amdxdna_gem_unpin(heap); 780 put_heap: 781 drm_gem_object_put(to_gobj(heap)); 782 free_priv: 783 kfree(priv); 784 return ret; 785 } 786 787 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) 788 { 789 struct amdxdna_dev *xdna; 790 int idx; 791 792 xdna = hwctx->client->xdna; 793 794 XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); 795 aie2_hwctx_wait_for_idle(hwctx); 796 797 /* Request fw to destroy hwctx and cancel the rest pending requests */ 798 drm_sched_stop(&hwctx->priv->sched, NULL); 799 aie2_release_resource(hwctx); 800 drm_sched_start(&hwctx->priv->sched, 0); 801 802 mutex_unlock(&xdna->dev_lock); 803 drm_sched_entity_destroy(&hwctx->priv->entity); 804 805 /* Wait for all submitted jobs to be completed or canceled */ 806 wait_event(hwctx->priv->job_free_wq, 807 atomic64_read(&hwctx->job_submit_cnt) == 808 atomic64_read(&hwctx->job_free_cnt)); 809 mutex_lock(&xdna->dev_lock); 810 811 drm_sched_fini(&hwctx->priv->sched); 812 aie2_ctx_syncobj_destroy(hwctx); 813 814 for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) 815 drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); 816 amdxdna_gem_unpin(hwctx->priv->heap); 817 drm_gem_object_put(to_gobj(hwctx->priv->heap)); 818 819 mutex_destroy(&hwctx->priv->io_lock); 820 kfree(hwctx->col_list); 821 kfree(hwctx->priv); 822 kfree(hwctx->cus); 823 } 824 825 static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size) 826 { 827 struct amdxdna_hwctx *hwctx = handle; 828 829 amdxdna_pm_suspend_put(hwctx->client->xdna); 830 return 0; 831 } 832 833 static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size) 834 { 835 struct amdxdna_hwctx_param_config_cu *config = buf; 836 struct amdxdna_dev *xdna = hwctx->client->xdna; 837 u32 total_size; 838 int ret; 839 840 XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name); 841 if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad))) 842 return -EINVAL; 843 844 if (hwctx->cus) { 845 XDNA_ERR(xdna, "Not support re-config CU"); 846 return -EINVAL; 847 } 848 849 if (!config->num_cus) { 850 XDNA_ERR(xdna, "Number of CU is zero"); 851 return -EINVAL; 852 } 853 854 total_size = struct_size(config, cu_configs, config->num_cus); 855 if (total_size > size) { 856 XDNA_ERR(xdna, "CU config larger than size"); 857 return -EINVAL; 858 } 859 860 hwctx->cus = kmemdup(config, total_size, GFP_KERNEL); 861 if (!hwctx->cus) 862 return -ENOMEM; 863 864 ret = amdxdna_pm_resume_get_locked(xdna); 865 if (ret) 866 goto free_cus; 867 868 ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler); 869 if (ret) { 870 XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret); 871 goto pm_suspend_put; 872 } 873 874 wmb(); /* To avoid locking in command submit when check status */ 875 876 return 0; 877 878 pm_suspend_put: 879 amdxdna_pm_suspend_put(xdna); 880 free_cus: 881 kfree(hwctx->cus); 882 hwctx->cus = NULL; 883 return ret; 884 } 885 886 static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq) 887 { 888 struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq); 889 890 if (!out_fence) { 891 XDNA_ERR(hwctx->client->xdna, "Failed to get fence"); 892 return; 893 } 894 895 dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT); 896 dma_fence_put(out_fence); 897 } 898 899 static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl, 900 bool attach) 901 { 902 struct amdxdna_client *client = hwctx->client; 903 struct amdxdna_dev *xdna = client->xdna; 904 struct amdxdna_drv_cmd cmd = { 0 }; 905 struct amdxdna_gem_obj *abo; 906 u64 seq; 907 int ret; 908 909 abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV); 910 if (!abo) { 911 XDNA_ERR(xdna, "Get bo %d failed", bo_hdl); 912 return -EINVAL; 913 } 914 915 if (attach) { 916 if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) { 917 ret = -EBUSY; 918 goto put_obj; 919 } 920 cmd.opcode = ATTACH_DEBUG_BO; 921 } else { 922 if (abo->assigned_hwctx != hwctx->id) { 923 ret = -EINVAL; 924 goto put_obj; 925 } 926 cmd.opcode = DETACH_DEBUG_BO; 927 } 928 929 ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE, 930 &bo_hdl, 1, hwctx->id, &seq); 931 if (ret) { 932 XDNA_ERR(xdna, "Submit command failed"); 933 goto put_obj; 934 } 935 936 aie2_cmd_wait(hwctx, seq); 937 if (cmd.result) { 938 XDNA_ERR(xdna, "Response failure 0x%x", cmd.result); 939 ret = -EINVAL; 940 goto put_obj; 941 } 942 943 if (attach) 944 abo->assigned_hwctx = hwctx->id; 945 else 946 abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE; 947 948 XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name); 949 950 put_obj: 951 amdxdna_gem_put_obj(abo); 952 return ret; 953 } 954 955 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size) 956 { 957 struct amdxdna_dev *xdna = hwctx->client->xdna; 958 959 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); 960 switch (type) { 961 case DRM_AMDXDNA_HWCTX_CONFIG_CU: 962 return aie2_hwctx_cu_config(hwctx, buf, size); 963 case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF: 964 return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true); 965 case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF: 966 return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false); 967 default: 968 XDNA_DBG(xdna, "Not supported type %d", type); 969 return -EOPNOTSUPP; 970 } 971 } 972 973 int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl) 974 { 975 struct amdxdna_client *client = hwctx->client; 976 struct amdxdna_dev *xdna = client->xdna; 977 struct amdxdna_drv_cmd cmd = { 0 }; 978 u64 seq; 979 int ret; 980 981 cmd.opcode = SYNC_DEBUG_BO; 982 ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE, 983 &debug_bo_hdl, 1, hwctx->id, &seq); 984 if (ret) { 985 XDNA_ERR(xdna, "Submit command failed"); 986 return ret; 987 } 988 989 aie2_cmd_wait(hwctx, seq); 990 if (cmd.result) { 991 XDNA_ERR(xdna, "Response failure 0x%x", cmd.result); 992 return -EINVAL; 993 } 994 995 return 0; 996 } 997 998 static int aie2_populate_range(struct amdxdna_gem_obj *abo) 999 { 1000 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); 1001 struct amdxdna_umap *mapp; 1002 unsigned long timeout; 1003 struct mm_struct *mm; 1004 bool found; 1005 int ret; 1006 1007 timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1008 again: 1009 found = false; 1010 down_write(&xdna->notifier_lock); 1011 list_for_each_entry(mapp, &abo->mem.umap_list, node) { 1012 if (mapp->invalid) { 1013 found = true; 1014 break; 1015 } 1016 } 1017 1018 if (!found) { 1019 abo->mem.map_invalid = false; 1020 up_write(&xdna->notifier_lock); 1021 return 0; 1022 } 1023 kref_get(&mapp->refcnt); 1024 up_write(&xdna->notifier_lock); 1025 1026 XDNA_DBG(xdna, "populate memory range %lx %lx", 1027 mapp->vma->vm_start, mapp->vma->vm_end); 1028 mm = mapp->notifier.mm; 1029 if (!mmget_not_zero(mm)) { 1030 amdxdna_umap_put(mapp); 1031 return -EFAULT; 1032 } 1033 1034 mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier); 1035 mmap_read_lock(mm); 1036 ret = hmm_range_fault(&mapp->range); 1037 mmap_read_unlock(mm); 1038 if (ret) { 1039 if (time_after(jiffies, timeout)) { 1040 ret = -ETIME; 1041 goto put_mm; 1042 } 1043 1044 if (ret == -EBUSY) { 1045 amdxdna_umap_put(mapp); 1046 mmput(mm); 1047 goto again; 1048 } 1049 1050 goto put_mm; 1051 } 1052 1053 down_write(&xdna->notifier_lock); 1054 if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) { 1055 up_write(&xdna->notifier_lock); 1056 amdxdna_umap_put(mapp); 1057 mmput(mm); 1058 goto again; 1059 } 1060 mapp->invalid = false; 1061 up_write(&xdna->notifier_lock); 1062 amdxdna_umap_put(mapp); 1063 mmput(mm); 1064 goto again; 1065 1066 put_mm: 1067 amdxdna_umap_put(mapp); 1068 mmput(mm); 1069 return ret; 1070 } 1071 1072 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq) 1073 { 1074 struct amdxdna_dev *xdna = hwctx->client->xdna; 1075 struct ww_acquire_ctx acquire_ctx; 1076 struct dma_fence_chain *chain; 1077 struct amdxdna_gem_obj *abo; 1078 unsigned long timeout = 0; 1079 int ret, i; 1080 1081 ret = down_interruptible(&hwctx->priv->job_sem); 1082 if (ret) { 1083 XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret); 1084 return ret; 1085 } 1086 1087 chain = dma_fence_chain_alloc(); 1088 if (!chain) { 1089 XDNA_ERR(xdna, "Alloc fence chain failed"); 1090 ret = -ENOMEM; 1091 goto up_sem; 1092 } 1093 1094 ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx, 1095 hwctx->client->filp->client_id); 1096 if (ret) { 1097 XDNA_ERR(xdna, "DRM job init failed, ret %d", ret); 1098 goto free_chain; 1099 } 1100 1101 retry: 1102 ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 1103 if (ret) { 1104 XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); 1105 goto cleanup_job; 1106 } 1107 1108 for (i = 0; i < job->bo_cnt; i++) { 1109 ret = dma_resv_reserve_fences(job->bos[i]->resv, 1); 1110 if (ret) { 1111 XDNA_WARN(xdna, "Failed to reserve fences %d", ret); 1112 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 1113 goto cleanup_job; 1114 } 1115 } 1116 1117 down_read(&xdna->notifier_lock); 1118 for (i = 0; i < job->bo_cnt; i++) { 1119 abo = to_xdna_obj(job->bos[i]); 1120 if (abo->mem.map_invalid) { 1121 up_read(&xdna->notifier_lock); 1122 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 1123 if (!timeout) { 1124 timeout = jiffies + 1125 msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1126 } else if (time_after(jiffies, timeout)) { 1127 ret = -ETIME; 1128 goto cleanup_job; 1129 } 1130 1131 ret = aie2_populate_range(abo); 1132 if (ret) 1133 goto cleanup_job; 1134 goto retry; 1135 } 1136 } 1137 1138 mutex_lock(&hwctx->priv->io_lock); 1139 drm_sched_job_arm(&job->base); 1140 job->out_fence = dma_fence_get(&job->base.s_fence->finished); 1141 for (i = 0; i < job->bo_cnt; i++) 1142 dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE); 1143 job->seq = hwctx->priv->seq++; 1144 kref_get(&job->refcnt); 1145 drm_sched_entity_push_job(&job->base); 1146 1147 *seq = job->seq; 1148 drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq); 1149 mutex_unlock(&hwctx->priv->io_lock); 1150 1151 up_read(&xdna->notifier_lock); 1152 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 1153 1154 aie2_job_put(job); 1155 atomic64_inc(&hwctx->job_submit_cnt); 1156 1157 return 0; 1158 1159 cleanup_job: 1160 drm_sched_job_cleanup(&job->base); 1161 free_chain: 1162 dma_fence_chain_free(chain); 1163 up_sem: 1164 up(&hwctx->priv->job_sem); 1165 job->job_done = true; 1166 return ret; 1167 } 1168 1169 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, 1170 unsigned long cur_seq) 1171 { 1172 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); 1173 struct drm_gem_object *gobj = to_gobj(abo); 1174 long ret; 1175 1176 ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, 1177 true, MAX_SCHEDULE_TIMEOUT); 1178 if (!ret) 1179 XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); 1180 else if (ret == -ERESTARTSYS) 1181 XDNA_DBG(xdna, "Wait for bo interrupted by signal"); 1182 } 1183 1184 int aie2_hwctx_heap_expand(struct amdxdna_hwctx *hwctx, 1185 struct amdxdna_gem_obj *heap) 1186 { 1187 struct amdxdna_client *client = hwctx->client; 1188 struct amdxdna_dev *xdna = client->xdna; 1189 u64 addr; 1190 int ret; 1191 1192 ret = amdxdna_pm_resume_get_locked(xdna); 1193 if (ret) 1194 return ret; 1195 1196 addr = amdxdna_obj_dma_addr(heap); 1197 ret = aie2_add_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, 1198 addr, heap->mem.size); 1199 if (ret) { 1200 XDNA_ERR(xdna, "Add heap failed hwctx %s 0x%lx ret %d", 1201 hwctx->name, heap->mem.size, ret); 1202 } 1203 1204 amdxdna_pm_suspend_put(xdna); 1205 1206 return ret; 1207 } 1208