1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2020-2025 Intel Corporation 4 */ 5 6 #include <drm/drm_file.h> 7 8 #include <linux/bitfield.h> 9 #include <linux/highmem.h> 10 #include <linux/pci.h> 11 #include <linux/pm_runtime.h> 12 #include <linux/module.h> 13 #include <uapi/drm/ivpu_accel.h> 14 15 #include "ivpu_drv.h" 16 #include "ivpu_fw.h" 17 #include "ivpu_hw.h" 18 #include "ivpu_ipc.h" 19 #include "ivpu_job.h" 20 #include "ivpu_jsm_msg.h" 21 #include "ivpu_mmu.h" 22 #include "ivpu_pm.h" 23 #include "ivpu_trace.h" 24 #include "vpu_boot_api.h" 25 26 #define CMD_BUF_IDX 0 27 #define JOB_MAX_BUFFER_COUNT 65535 28 29 static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) 30 { 31 ivpu_hw_db_set(vdev, cmdq->db_id); 32 } 33 34 static int ivpu_preemption_buffers_create(struct ivpu_device *vdev, 35 struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 36 { 37 if (ivpu_fw_preempt_buf_size(vdev) == 0) 38 return 0; 39 40 cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user, 41 vdev->fw->primary_preempt_buf_size, 42 DRM_IVPU_BO_WC); 43 if (!cmdq->primary_preempt_buf) { 44 ivpu_err(vdev, "Failed to create primary preemption buffer\n"); 45 return -ENOMEM; 46 } 47 48 cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma, 49 vdev->fw->secondary_preempt_buf_size, 50 DRM_IVPU_BO_WC); 51 if (!cmdq->secondary_preempt_buf) { 52 ivpu_err(vdev, "Failed to create secondary preemption buffer\n"); 53 goto err_free_primary; 54 } 55 56 return 0; 57 58 err_free_primary: 59 ivpu_bo_free(cmdq->primary_preempt_buf); 60 cmdq->primary_preempt_buf = NULL; 61 return -ENOMEM; 62 } 63 64 static void ivpu_preemption_buffers_free(struct ivpu_device *vdev, 65 struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 66 { 67 if (cmdq->primary_preempt_buf) 68 ivpu_bo_free(cmdq->primary_preempt_buf); 69 if (cmdq->secondary_preempt_buf) 70 ivpu_bo_free(cmdq->secondary_preempt_buf); 71 } 72 73 static int ivpu_preemption_job_init(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv, 74 struct ivpu_cmdq *cmdq, struct ivpu_job *job) 75 { 76 int ret; 77 78 /* Use preemption buffer provided by the user space */ 79 if (job->primary_preempt_buf) 80 return 0; 81 82 if (!cmdq->primary_preempt_buf) { 83 /* Allocate per command queue preemption buffers */ 84 ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq); 85 if (ret) 86 return ret; 87 } 88 89 /* Use preemption buffers allocated by the kernel */ 90 job->primary_preempt_buf = cmdq->primary_preempt_buf; 91 job->secondary_preempt_buf = cmdq->secondary_preempt_buf; 92 93 return 0; 94 } 95 96 static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) 97 { 98 struct ivpu_device *vdev = file_priv->vdev; 99 struct ivpu_cmdq *cmdq; 100 101 cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL); 102 if (!cmdq) 103 return NULL; 104 105 cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); 106 if (!cmdq->mem) 107 goto err_free_cmdq; 108 109 return cmdq; 110 111 err_free_cmdq: 112 kfree(cmdq); 113 return NULL; 114 } 115 116 /** 117 * ivpu_cmdq_get_entry_count - Calculate the number of entries in the command queue. 118 * @cmdq: Pointer to the command queue structure. 119 * 120 * Returns the number of entries that can fit in the command queue memory. 121 */ 122 static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq) 123 { 124 size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header); 125 126 return size / sizeof(struct vpu_job_queue_entry); 127 } 128 129 /** 130 * ivpu_cmdq_get_flags - Get command queue flags based on input flags and test mode. 131 * @vdev: Pointer to the ivpu device structure. 132 * @flags: Input flags to determine the command queue flags. 133 * 134 * Returns the calculated command queue flags, considering both the input flags 135 * and the current test mode settings. 136 */ 137 static u32 ivpu_cmdq_get_flags(struct ivpu_device *vdev, u32 flags) 138 { 139 u32 cmdq_flags = 0; 140 141 if ((flags & DRM_IVPU_CMDQ_FLAG_TURBO) && (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX)) 142 cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE; 143 144 /* Test mode can override the TURBO flag coming from the application */ 145 if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_ENABLE) 146 cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE; 147 if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_DISABLE) 148 cmdq_flags &= ~VPU_JOB_QUEUE_FLAGS_TURBO_MODE; 149 150 return cmdq_flags; 151 } 152 153 static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 154 { 155 ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq); 156 ivpu_bo_free(cmdq->mem); 157 kfree(cmdq); 158 } 159 160 static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, u32 flags) 161 { 162 struct ivpu_device *vdev = file_priv->vdev; 163 struct ivpu_cmdq *cmdq = NULL; 164 int ret; 165 166 lockdep_assert_held(&file_priv->lock); 167 168 cmdq = ivpu_cmdq_alloc(file_priv); 169 if (!cmdq) { 170 ivpu_err(vdev, "Failed to allocate command queue\n"); 171 return NULL; 172 } 173 ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit, 174 &file_priv->cmdq_id_next, GFP_KERNEL); 175 if (ret < 0) { 176 ivpu_err(vdev, "Failed to allocate command queue ID: %d\n", ret); 177 goto err_free_cmdq; 178 } 179 180 cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq); 181 cmdq->priority = priority; 182 183 cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); 184 cmdq->jobq->header.engine_idx = VPU_ENGINE_COMPUTE; 185 cmdq->jobq->header.flags = ivpu_cmdq_get_flags(vdev, flags); 186 187 ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d, flags 0x%08x\n", 188 cmdq->id, file_priv->ctx.id, cmdq->jobq->header.flags); 189 return cmdq; 190 191 err_free_cmdq: 192 ivpu_cmdq_free(file_priv, cmdq); 193 return NULL; 194 } 195 196 static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine, 197 u8 priority) 198 { 199 struct ivpu_device *vdev = file_priv->vdev; 200 int ret; 201 202 ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->id, 203 task_pid_nr(current), engine, 204 cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); 205 if (ret) 206 return ret; 207 208 ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->id, 209 priority); 210 if (ret) 211 return ret; 212 213 return 0; 214 } 215 216 static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 217 { 218 struct ivpu_device *vdev = file_priv->vdev; 219 int ret; 220 221 ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next, 222 GFP_KERNEL); 223 if (ret < 0) { 224 ivpu_err(vdev, "Failed to allocate doorbell ID: %d\n", ret); 225 return ret; 226 } 227 228 if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) 229 ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id, 230 cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); 231 else 232 ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, 233 cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); 234 235 if (!ret) { 236 ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d priority %d\n", 237 cmdq->db_id, cmdq->id, file_priv->ctx.id, cmdq->priority); 238 } else { 239 xa_erase(&vdev->db_xa, cmdq->db_id); 240 cmdq->db_id = 0; 241 } 242 243 return ret; 244 } 245 246 static void ivpu_cmdq_jobq_reset(struct ivpu_device *vdev, struct vpu_job_queue *jobq) 247 { 248 jobq->header.head = 0; 249 jobq->header.tail = 0; 250 251 wmb(); /* Flush WC buffer for jobq->header */ 252 } 253 254 static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 255 { 256 struct ivpu_device *vdev = file_priv->vdev; 257 int ret; 258 259 lockdep_assert_held(&file_priv->lock); 260 261 if (cmdq->db_id) 262 return 0; 263 264 ivpu_cmdq_jobq_reset(vdev, cmdq->jobq); 265 266 if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { 267 ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority); 268 if (ret) 269 return ret; 270 } 271 272 ret = ivpu_register_db(file_priv, cmdq); 273 if (ret) 274 return ret; 275 276 return 0; 277 } 278 279 static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 280 { 281 struct ivpu_device *vdev = file_priv->vdev; 282 int ret; 283 284 lockdep_assert_held(&file_priv->lock); 285 286 if (!cmdq->db_id) 287 return 0; 288 289 ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); 290 if (!ret) 291 ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); 292 293 if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { 294 ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); 295 if (!ret) 296 ivpu_dbg(vdev, JOB, "Command queue %d destroyed, ctx %d\n", 297 cmdq->id, file_priv->ctx.id); 298 } 299 300 xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); 301 cmdq->db_id = 0; 302 303 return 0; 304 } 305 306 static inline u8 ivpu_job_to_jsm_priority(u8 priority) 307 { 308 if (priority == DRM_IVPU_JOB_PRIORITY_DEFAULT) 309 return VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL; 310 311 return priority - 1; 312 } 313 314 static void ivpu_cmdq_destroy(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 315 { 316 ivpu_cmdq_unregister(file_priv, cmdq); 317 xa_erase(&file_priv->cmdq_xa, cmdq->id); 318 ivpu_cmdq_free(file_priv, cmdq); 319 } 320 321 static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_priv, u8 priority) 322 { 323 struct ivpu_cmdq *cmdq; 324 unsigned long id; 325 326 lockdep_assert_held(&file_priv->lock); 327 328 xa_for_each(&file_priv->cmdq_xa, id, cmdq) 329 if (cmdq->is_legacy && cmdq->priority == priority) 330 break; 331 332 if (!cmdq) { 333 cmdq = ivpu_cmdq_create(file_priv, priority, 0); 334 if (!cmdq) 335 return NULL; 336 cmdq->is_legacy = true; 337 } 338 339 return cmdq; 340 } 341 342 static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u32 cmdq_id) 343 { 344 struct ivpu_device *vdev = file_priv->vdev; 345 struct ivpu_cmdq *cmdq; 346 347 lockdep_assert_held(&file_priv->lock); 348 349 cmdq = xa_load(&file_priv->cmdq_xa, cmdq_id); 350 if (!cmdq) { 351 ivpu_dbg(vdev, IOCTL, "Failed to find command queue with ID: %u\n", cmdq_id); 352 return NULL; 353 } 354 355 return cmdq; 356 } 357 358 void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) 359 { 360 struct ivpu_cmdq *cmdq; 361 unsigned long cmdq_id; 362 363 lockdep_assert_held(&file_priv->lock); 364 365 xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) 366 ivpu_cmdq_destroy(file_priv, cmdq); 367 } 368 369 /* 370 * Mark the doorbell as unregistered 371 * This function needs to be called when the VPU hardware is restarted 372 * and FW loses job queue state. The next time job queue is used it 373 * will be registered again. 374 */ 375 static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv) 376 { 377 struct ivpu_cmdq *cmdq; 378 unsigned long cmdq_id; 379 380 mutex_lock(&file_priv->lock); 381 382 xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) { 383 xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); 384 cmdq->db_id = 0; 385 } 386 387 mutex_unlock(&file_priv->lock); 388 } 389 390 void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) 391 { 392 struct ivpu_file_priv *file_priv; 393 unsigned long ctx_id; 394 395 mutex_lock(&vdev->context_list_lock); 396 397 xa_for_each(&vdev->context_xa, ctx_id, file_priv) 398 ivpu_cmdq_reset(file_priv); 399 400 mutex_unlock(&vdev->context_list_lock); 401 } 402 403 void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv) 404 { 405 struct ivpu_device *vdev = file_priv->vdev; 406 struct ivpu_cmdq *cmdq; 407 unsigned long cmdq_id; 408 409 lockdep_assert_held(&file_priv->lock); 410 ivpu_dbg(vdev, JOB, "Context ID: %u abort\n", file_priv->ctx.id); 411 412 xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) 413 ivpu_cmdq_unregister(file_priv, cmdq); 414 415 if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) 416 ivpu_jsm_context_release(vdev, file_priv->ctx.id); 417 418 ivpu_mmu_disable_ssid_events(vdev, file_priv->ctx.id); 419 420 file_priv->aborted = true; 421 } 422 423 static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) 424 { 425 struct ivpu_device *vdev = job->vdev; 426 struct vpu_job_queue_header *header = &cmdq->jobq->header; 427 struct vpu_job_queue_entry *entry; 428 u32 tail = READ_ONCE(header->tail); 429 u32 next_entry = (tail + 1) % cmdq->entry_count; 430 431 /* Check if there is space left in job queue */ 432 if (next_entry == header->head) { 433 ivpu_dbg(vdev, JOB, "Job queue full: ctx %d cmdq %d db %d head %d tail %d\n", 434 job->file_priv->ctx.id, cmdq->id, cmdq->db_id, header->head, tail); 435 return -EBUSY; 436 } 437 438 entry = &cmdq->jobq->slot[tail].job; 439 entry->batch_buf_addr = job->cmd_buf_vpu_addr; 440 entry->job_id = job->job_id; 441 entry->flags = 0; 442 if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION)) 443 entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK; 444 445 if (job->primary_preempt_buf) { 446 entry->primary_preempt_buf_addr = job->primary_preempt_buf->vpu_addr; 447 entry->primary_preempt_buf_size = ivpu_bo_size(job->primary_preempt_buf); 448 } 449 450 if (job->secondary_preempt_buf) { 451 entry->secondary_preempt_buf_addr = job->secondary_preempt_buf->vpu_addr; 452 entry->secondary_preempt_buf_size = ivpu_bo_size(job->secondary_preempt_buf); 453 } 454 455 wmb(); /* Ensure that tail is updated after filling entry */ 456 header->tail = next_entry; 457 wmb(); /* Flush WC buffer for jobq header */ 458 459 return 0; 460 } 461 462 struct ivpu_fence { 463 struct dma_fence base; 464 spinlock_t lock; /* protects base */ 465 struct ivpu_device *vdev; 466 }; 467 468 static inline struct ivpu_fence *to_vpu_fence(struct dma_fence *fence) 469 { 470 return container_of(fence, struct ivpu_fence, base); 471 } 472 473 static const char *ivpu_fence_get_driver_name(struct dma_fence *fence) 474 { 475 return DRIVER_NAME; 476 } 477 478 static const char *ivpu_fence_get_timeline_name(struct dma_fence *fence) 479 { 480 struct ivpu_fence *ivpu_fence = to_vpu_fence(fence); 481 482 return dev_name(ivpu_fence->vdev->drm.dev); 483 } 484 485 static const struct dma_fence_ops ivpu_fence_ops = { 486 .get_driver_name = ivpu_fence_get_driver_name, 487 .get_timeline_name = ivpu_fence_get_timeline_name, 488 }; 489 490 static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) 491 { 492 struct ivpu_fence *fence; 493 494 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 495 if (!fence) 496 return NULL; 497 498 fence->vdev = vdev; 499 spin_lock_init(&fence->lock); 500 dma_fence_init(&fence->base, &ivpu_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1); 501 502 return &fence->base; 503 } 504 505 static void ivpu_job_destroy(struct ivpu_job *job) 506 { 507 struct ivpu_device *vdev = job->vdev; 508 u32 i; 509 510 ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d", 511 job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx); 512 513 for (i = 0; i < job->bo_count; i++) 514 if (job->bos[i]) 515 drm_gem_object_put(&job->bos[i]->base.base); 516 517 dma_fence_put(job->done_fence); 518 ivpu_file_priv_put(&job->file_priv); 519 kfree(job); 520 } 521 522 static struct ivpu_job * 523 ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) 524 { 525 struct ivpu_device *vdev = file_priv->vdev; 526 struct ivpu_job *job; 527 528 job = kzalloc(struct_size(job, bos, bo_count), GFP_KERNEL); 529 if (!job) 530 return NULL; 531 532 job->vdev = vdev; 533 job->engine_idx = engine_idx; 534 job->bo_count = bo_count; 535 job->done_fence = ivpu_fence_create(vdev); 536 if (!job->done_fence) { 537 ivpu_err(vdev, "Failed to create a fence\n"); 538 goto err_free_job; 539 } 540 541 job->file_priv = ivpu_file_priv_get(file_priv); 542 543 trace_job("create", job); 544 ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); 545 return job; 546 547 err_free_job: 548 kfree(job); 549 return NULL; 550 } 551 552 static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *vdev, u32 job_id) 553 { 554 struct ivpu_job *job; 555 556 lockdep_assert_held(&vdev->submitted_jobs_lock); 557 558 job = xa_erase(&vdev->submitted_jobs_xa, job_id); 559 if (xa_empty(&vdev->submitted_jobs_xa) && job) { 560 vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts), 561 vdev->busy_time); 562 } 563 564 return job; 565 } 566 567 bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status) 568 { 569 lockdep_assert_held(&vdev->submitted_jobs_lock); 570 571 switch (job_status) { 572 case VPU_JSM_STATUS_PROCESSING_ERR: 573 case VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN ... VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX: 574 { 575 struct ivpu_job *job = xa_load(&vdev->submitted_jobs_xa, job_id); 576 577 if (!job) 578 return false; 579 580 /* Trigger an engine reset */ 581 guard(mutex)(&job->file_priv->lock); 582 583 job->job_status = job_status; 584 585 if (job->file_priv->has_mmu_faults) 586 return false; 587 588 /* 589 * Mark context as faulty and defer destruction of the job to jobs abort thread 590 * handler to synchronize between both faults and jobs returning context violation 591 * status and ensure both are handled in the same way 592 */ 593 job->file_priv->has_mmu_faults = true; 594 queue_work(system_percpu_wq, &vdev->context_abort_work); 595 return true; 596 } 597 default: 598 /* Complete job with error status, engine reset not required */ 599 break; 600 } 601 602 return false; 603 } 604 605 static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) 606 { 607 struct ivpu_job *job; 608 609 lockdep_assert_held(&vdev->submitted_jobs_lock); 610 611 job = xa_load(&vdev->submitted_jobs_xa, job_id); 612 if (!job) 613 return -ENOENT; 614 615 ivpu_job_remove_from_submitted_jobs(vdev, job_id); 616 617 if (job->job_status == VPU_JSM_STATUS_SUCCESS) { 618 if (job->file_priv->has_mmu_faults) 619 job->job_status = DRM_IVPU_JOB_STATUS_ABORTED; 620 else 621 job->job_status = job_status; 622 } 623 624 job->bos[CMD_BUF_IDX]->job_status = job->job_status; 625 dma_fence_signal(job->done_fence); 626 627 trace_job("done", job); 628 ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", 629 job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, 630 job->job_status); 631 632 ivpu_job_destroy(job); 633 ivpu_stop_job_timeout_detection(vdev); 634 635 ivpu_rpm_put(vdev); 636 637 if (!xa_empty(&vdev->submitted_jobs_xa)) 638 ivpu_start_job_timeout_detection(vdev); 639 640 return 0; 641 } 642 643 void ivpu_jobs_abort_all(struct ivpu_device *vdev) 644 { 645 struct ivpu_job *job; 646 unsigned long id; 647 648 mutex_lock(&vdev->submitted_jobs_lock); 649 650 xa_for_each(&vdev->submitted_jobs_xa, id, job) 651 ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); 652 653 mutex_unlock(&vdev->submitted_jobs_lock); 654 } 655 656 void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id) 657 { 658 struct ivpu_job *job; 659 unsigned long id; 660 661 mutex_lock(&vdev->submitted_jobs_lock); 662 663 xa_for_each(&vdev->submitted_jobs_xa, id, job) 664 if (job->file_priv->ctx.id == ctx_id && job->cmdq_id == cmdq_id) 665 ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); 666 667 mutex_unlock(&vdev->submitted_jobs_lock); 668 } 669 670 static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) 671 { 672 struct ivpu_file_priv *file_priv = job->file_priv; 673 struct ivpu_device *vdev = job->vdev; 674 struct ivpu_cmdq *cmdq; 675 bool is_first_job; 676 int ret; 677 678 ret = ivpu_rpm_get(vdev); 679 if (ret < 0) 680 return ret; 681 682 mutex_lock(&vdev->submitted_jobs_lock); 683 mutex_lock(&file_priv->lock); 684 685 if (cmdq_id == 0) 686 cmdq = ivpu_cmdq_acquire_legacy(file_priv, priority); 687 else 688 cmdq = ivpu_cmdq_acquire(file_priv, cmdq_id); 689 if (!cmdq) { 690 ret = -EINVAL; 691 goto err_unlock; 692 } 693 694 ret = ivpu_cmdq_register(file_priv, cmdq); 695 if (ret) { 696 ivpu_err(vdev, "Failed to register command queue: %d\n", ret); 697 goto err_unlock; 698 } 699 700 ret = ivpu_preemption_job_init(vdev, file_priv, cmdq, job); 701 if (ret) { 702 ivpu_err(vdev, "Failed to initialize preemption buffers for job %d: %d\n", 703 job->job_id, ret); 704 goto err_unlock; 705 } 706 707 job->cmdq_id = cmdq->id; 708 709 is_first_job = xa_empty(&vdev->submitted_jobs_xa); 710 ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, 711 &file_priv->job_id_next, GFP_KERNEL); 712 if (ret < 0) { 713 ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", 714 file_priv->ctx.id); 715 ret = -EBUSY; 716 goto err_unlock; 717 } 718 719 ret = ivpu_cmdq_push_job(cmdq, job); 720 if (ret) 721 goto err_erase_xa; 722 723 ivpu_start_job_timeout_detection(vdev); 724 725 if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { 726 cmdq->jobq->header.head = cmdq->jobq->header.tail; 727 wmb(); /* Flush WC buffer for jobq header */ 728 } else { 729 ivpu_cmdq_ring_db(vdev, cmdq); 730 if (is_first_job) 731 vdev->busy_start_ts = ktime_get(); 732 } 733 734 trace_job("submit", job); 735 ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n", 736 job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority, 737 job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); 738 739 mutex_unlock(&file_priv->lock); 740 741 if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { 742 ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); 743 } 744 745 mutex_unlock(&vdev->submitted_jobs_lock); 746 747 return 0; 748 749 err_erase_xa: 750 xa_erase(&vdev->submitted_jobs_xa, job->job_id); 751 err_unlock: 752 mutex_unlock(&file_priv->lock); 753 mutex_unlock(&vdev->submitted_jobs_lock); 754 ivpu_rpm_put(vdev); 755 return ret; 756 } 757 758 static int 759 ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles, 760 u32 buf_count, u32 commands_offset, u32 preempt_buffer_index) 761 { 762 struct ivpu_file_priv *file_priv = job->file_priv; 763 struct ivpu_device *vdev = file_priv->vdev; 764 struct ww_acquire_ctx acquire_ctx; 765 enum dma_resv_usage usage; 766 struct ivpu_bo *bo; 767 int ret; 768 u32 i; 769 770 for (i = 0; i < buf_count; i++) { 771 struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]); 772 773 if (!obj) { 774 ivpu_dbg(vdev, IOCTL, "Failed to lookup GEM object with handle %u\n", 775 buf_handles[i]); 776 return -ENOENT; 777 } 778 779 job->bos[i] = to_ivpu_bo(obj); 780 781 ret = ivpu_bo_bind(job->bos[i]); 782 if (ret) 783 return ret; 784 } 785 786 bo = job->bos[CMD_BUF_IDX]; 787 if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) { 788 ivpu_dbg(vdev, IOCTL, "Buffer is already in use by another job\n"); 789 return -EBUSY; 790 } 791 792 if (commands_offset >= ivpu_bo_size(bo)) { 793 ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u for buffer size %zu\n", 794 commands_offset, ivpu_bo_size(bo)); 795 return -EINVAL; 796 } 797 798 job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; 799 800 if (preempt_buffer_index) { 801 struct ivpu_bo *preempt_bo = job->bos[preempt_buffer_index]; 802 803 if (ivpu_bo_size(preempt_bo) < ivpu_fw_preempt_buf_size(vdev)) { 804 ivpu_dbg(vdev, IOCTL, "Preemption buffer is too small\n"); 805 return -EINVAL; 806 } 807 if (ivpu_bo_is_mappable(preempt_bo)) { 808 ivpu_dbg(vdev, IOCTL, "Preemption buffer cannot be mappable\n"); 809 return -EINVAL; 810 } 811 job->primary_preempt_buf = preempt_bo; 812 } 813 814 ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, 815 &acquire_ctx); 816 if (ret) { 817 ivpu_warn_ratelimited(vdev, "Failed to lock reservations: %d\n", ret); 818 return ret; 819 } 820 821 for (i = 0; i < buf_count; i++) { 822 ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1); 823 if (ret) { 824 ivpu_warn_ratelimited(vdev, "Failed to reserve fences: %d\n", ret); 825 goto unlock_reservations; 826 } 827 } 828 829 for (i = 0; i < buf_count; i++) { 830 usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP; 831 dma_resv_add_fence(job->bos[i]->base.base.resv, job->done_fence, usage); 832 } 833 834 unlock_reservations: 835 drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); 836 837 wmb(); /* Flush write combining buffers */ 838 839 return ret; 840 } 841 842 static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, u32 cmdq_id, 843 u32 buffer_count, u32 engine, void __user *buffers_ptr, u32 cmds_offset, 844 u32 preempt_buffer_index, u8 priority) 845 { 846 struct ivpu_device *vdev = file_priv->vdev; 847 struct ivpu_job *job; 848 u32 *buf_handles; 849 int idx, ret; 850 851 buf_handles = kcalloc(buffer_count, sizeof(u32), GFP_KERNEL); 852 if (!buf_handles) 853 return -ENOMEM; 854 855 ret = copy_from_user(buf_handles, buffers_ptr, buffer_count * sizeof(u32)); 856 if (ret) { 857 ret = -EFAULT; 858 goto err_free_handles; 859 } 860 861 if (!drm_dev_enter(&vdev->drm, &idx)) { 862 ret = -ENODEV; 863 goto err_free_handles; 864 } 865 866 ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n", 867 file_priv->ctx.id, cmdq_id, buffer_count); 868 869 job = ivpu_job_create(file_priv, engine, buffer_count); 870 if (!job) { 871 ret = -ENOMEM; 872 goto err_exit_dev; 873 } 874 875 ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, buffer_count, cmds_offset, 876 preempt_buffer_index); 877 if (ret) 878 goto err_destroy_job; 879 880 down_read(&vdev->pm->reset_lock); 881 ret = ivpu_job_submit(job, priority, cmdq_id); 882 up_read(&vdev->pm->reset_lock); 883 if (ret) 884 goto err_signal_fence; 885 886 drm_dev_exit(idx); 887 kfree(buf_handles); 888 return ret; 889 890 err_signal_fence: 891 dma_fence_signal(job->done_fence); 892 err_destroy_job: 893 ivpu_job_destroy(job); 894 err_exit_dev: 895 drm_dev_exit(idx); 896 err_free_handles: 897 kfree(buf_handles); 898 return ret; 899 } 900 901 int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 902 { 903 struct ivpu_file_priv *file_priv = file->driver_priv; 904 struct ivpu_device *vdev = file_priv->vdev; 905 struct drm_ivpu_submit *args = data; 906 u8 priority; 907 908 if (args->engine != DRM_IVPU_ENGINE_COMPUTE) { 909 ivpu_dbg(vdev, IOCTL, "Invalid engine %d\n", args->engine); 910 return -EINVAL; 911 } 912 913 if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) { 914 ivpu_dbg(vdev, IOCTL, "Invalid priority %d\n", args->priority); 915 return -EINVAL; 916 } 917 918 if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) { 919 ivpu_dbg(vdev, IOCTL, "Invalid buffer count %u\n", args->buffer_count); 920 return -EINVAL; 921 } 922 923 if (!IS_ALIGNED(args->commands_offset, 8)) { 924 ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u\n", args->commands_offset); 925 return -EINVAL; 926 } 927 928 if (!file_priv->ctx.id) { 929 ivpu_dbg(vdev, IOCTL, "Context not initialized\n"); 930 return -EINVAL; 931 } 932 933 if (file_priv->has_mmu_faults) { 934 ivpu_dbg(vdev, IOCTL, "Context %u has MMU faults\n", file_priv->ctx.id); 935 return -EBADFD; 936 } 937 938 priority = ivpu_job_to_jsm_priority(args->priority); 939 940 return ivpu_submit(file, file_priv, 0, args->buffer_count, args->engine, 941 (void __user *)args->buffers_ptr, args->commands_offset, 0, priority); 942 } 943 944 int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 945 { 946 struct ivpu_file_priv *file_priv = file->driver_priv; 947 struct ivpu_device *vdev = file_priv->vdev; 948 struct drm_ivpu_cmdq_submit *args = data; 949 950 if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) { 951 ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n"); 952 return -ENODEV; 953 } 954 955 if (args->cmdq_id < IVPU_CMDQ_MIN_ID || args->cmdq_id > IVPU_CMDQ_MAX_ID) { 956 ivpu_dbg(vdev, IOCTL, "Invalid command queue ID %u\n", args->cmdq_id); 957 return -EINVAL; 958 } 959 960 if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) { 961 ivpu_dbg(vdev, IOCTL, "Invalid buffer count %u\n", args->buffer_count); 962 return -EINVAL; 963 } 964 965 if (args->preempt_buffer_index >= args->buffer_count) { 966 ivpu_dbg(vdev, IOCTL, "Invalid preemption buffer index %u\n", 967 args->preempt_buffer_index); 968 return -EINVAL; 969 } 970 971 if (!IS_ALIGNED(args->commands_offset, 8)) { 972 ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u\n", args->commands_offset); 973 return -EINVAL; 974 } 975 976 if (!file_priv->ctx.id) { 977 ivpu_dbg(vdev, IOCTL, "Context not initialized\n"); 978 return -EINVAL; 979 } 980 981 if (file_priv->has_mmu_faults) { 982 ivpu_dbg(vdev, IOCTL, "Context %u has MMU faults\n", file_priv->ctx.id); 983 return -EBADFD; 984 } 985 986 return ivpu_submit(file, file_priv, args->cmdq_id, args->buffer_count, VPU_ENGINE_COMPUTE, 987 (void __user *)args->buffers_ptr, args->commands_offset, 988 args->preempt_buffer_index, 0); 989 } 990 991 int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 992 { 993 struct ivpu_file_priv *file_priv = file->driver_priv; 994 struct ivpu_device *vdev = file_priv->vdev; 995 struct drm_ivpu_cmdq_create *args = data; 996 struct ivpu_cmdq *cmdq; 997 int ret; 998 999 if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) { 1000 ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n"); 1001 return -ENODEV; 1002 } 1003 1004 if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) { 1005 ivpu_dbg(vdev, IOCTL, "Invalid priority %d\n", args->priority); 1006 return -EINVAL; 1007 } 1008 1009 ret = ivpu_rpm_get(vdev); 1010 if (ret < 0) 1011 return ret; 1012 1013 mutex_lock(&file_priv->lock); 1014 1015 cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), args->flags); 1016 if (cmdq) 1017 args->cmdq_id = cmdq->id; 1018 1019 mutex_unlock(&file_priv->lock); 1020 1021 ivpu_rpm_put(vdev); 1022 1023 return cmdq ? 0 : -ENOMEM; 1024 } 1025 1026 int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 1027 { 1028 struct ivpu_file_priv *file_priv = file->driver_priv; 1029 struct ivpu_device *vdev = file_priv->vdev; 1030 struct drm_ivpu_cmdq_destroy *args = data; 1031 struct ivpu_cmdq *cmdq; 1032 u32 cmdq_id = 0; 1033 int ret; 1034 1035 if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) { 1036 ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n"); 1037 return -ENODEV; 1038 } 1039 1040 ret = ivpu_rpm_get(vdev); 1041 if (ret < 0) 1042 return ret; 1043 1044 mutex_lock(&file_priv->lock); 1045 1046 cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id); 1047 if (!cmdq || cmdq->is_legacy) { 1048 ret = -ENOENT; 1049 } else { 1050 cmdq_id = cmdq->id; 1051 ivpu_cmdq_destroy(file_priv, cmdq); 1052 ret = 0; 1053 } 1054 1055 mutex_unlock(&file_priv->lock); 1056 1057 /* Abort any pending jobs only if cmdq was destroyed */ 1058 if (!ret) 1059 ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); 1060 1061 ivpu_rpm_put(vdev); 1062 1063 return ret; 1064 } 1065 1066 static void 1067 ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, 1068 struct vpu_jsm_msg *jsm_msg) 1069 { 1070 struct vpu_ipc_msg_payload_job_done *payload; 1071 1072 if (!jsm_msg) { 1073 ivpu_err(vdev, "IPC message has no JSM payload\n"); 1074 return; 1075 } 1076 1077 if (jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { 1078 ivpu_err(vdev, "Invalid JSM message result: %d\n", jsm_msg->result); 1079 return; 1080 } 1081 1082 payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; 1083 1084 mutex_lock(&vdev->submitted_jobs_lock); 1085 if (!ivpu_job_handle_engine_error(vdev, payload->job_id, payload->job_status)) 1086 /* No engine error, complete the job normally */ 1087 ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); 1088 mutex_unlock(&vdev->submitted_jobs_lock); 1089 } 1090 1091 void ivpu_job_done_consumer_init(struct ivpu_device *vdev) 1092 { 1093 ivpu_ipc_consumer_add(vdev, &vdev->job_done_consumer, 1094 VPU_IPC_CHAN_JOB_RET, ivpu_job_done_callback); 1095 } 1096 1097 void ivpu_job_done_consumer_fini(struct ivpu_device *vdev) 1098 { 1099 ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer); 1100 } 1101 1102 void ivpu_context_abort_work_fn(struct work_struct *work) 1103 { 1104 struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work); 1105 struct ivpu_file_priv *file_priv; 1106 struct ivpu_job *job; 1107 unsigned long ctx_id; 1108 unsigned long id; 1109 1110 if (drm_WARN_ON(&vdev->drm, pm_runtime_get_if_active(vdev->drm.dev) <= 0)) 1111 return; 1112 1113 if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) 1114 if (ivpu_jsm_reset_engine(vdev, 0)) 1115 goto runtime_put; 1116 1117 mutex_lock(&vdev->context_list_lock); 1118 xa_for_each(&vdev->context_xa, ctx_id, file_priv) { 1119 if (!file_priv->has_mmu_faults || file_priv->aborted) 1120 continue; 1121 1122 mutex_lock(&file_priv->lock); 1123 ivpu_context_abort_locked(file_priv); 1124 mutex_unlock(&file_priv->lock); 1125 } 1126 mutex_unlock(&vdev->context_list_lock); 1127 1128 /* 1129 * We will not receive new MMU event interrupts until existing events are discarded 1130 * however, we want to discard these events only after aborting the faulty context 1131 * to avoid generating new faults from that context 1132 */ 1133 ivpu_mmu_discard_events(vdev); 1134 1135 if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) 1136 goto runtime_put; 1137 1138 if (ivpu_jsm_hws_resume_engine(vdev, 0)) 1139 goto runtime_put; 1140 /* 1141 * In hardware scheduling mode NPU already has stopped processing jobs 1142 * and won't send us any further notifications, thus we have to free job related resources 1143 * and notify userspace 1144 */ 1145 mutex_lock(&vdev->submitted_jobs_lock); 1146 xa_for_each(&vdev->submitted_jobs_xa, id, job) 1147 if (job->file_priv->aborted) 1148 ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED); 1149 mutex_unlock(&vdev->submitted_jobs_lock); 1150 1151 runtime_put: 1152 pm_runtime_put_autosuspend(vdev->drm.dev); 1153 } 1154