1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2020-2026 Intel Corporation 4 */ 5 6 #include <drm/drm_file.h> 7 8 #include <linux/bitfield.h> 9 #include <linux/highmem.h> 10 #include <linux/pci.h> 11 #include <linux/pm_runtime.h> 12 #include <linux/module.h> 13 #include <uapi/drm/ivpu_accel.h> 14 15 #include "ivpu_drv.h" 16 #include "ivpu_fw.h" 17 #include "ivpu_hw.h" 18 #include "ivpu_ipc.h" 19 #include "ivpu_job.h" 20 #include "ivpu_jsm_msg.h" 21 #include "ivpu_mmu.h" 22 #include "ivpu_pm.h" 23 #include "ivpu_trace.h" 24 #include "vpu_boot_api.h" 25 26 #define CMD_BUF_IDX 0 27 #define JOB_MAX_BUFFER_COUNT 65535 28 29 static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) 30 { 31 ivpu_hw_db_set(vdev, cmdq->db_id); 32 } 33 34 static int ivpu_preemption_buffers_create(struct ivpu_device *vdev, 35 struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 36 { 37 if (ivpu_fw_preempt_buf_size(vdev) == 0) 38 return 0; 39 40 cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user, 41 vdev->fw->primary_preempt_buf_size, 42 DRM_IVPU_BO_WC); 43 if (!cmdq->primary_preempt_buf) { 44 ivpu_err(vdev, "Failed to create primary preemption buffer\n"); 45 return -ENOMEM; 46 } 47 48 cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma, 49 vdev->fw->secondary_preempt_buf_size, 50 DRM_IVPU_BO_WC); 51 if (!cmdq->secondary_preempt_buf) { 52 ivpu_err(vdev, "Failed to create secondary preemption buffer\n"); 53 goto err_free_primary; 54 } 55 56 return 0; 57 58 err_free_primary: 59 ivpu_bo_free(cmdq->primary_preempt_buf); 60 cmdq->primary_preempt_buf = NULL; 61 return -ENOMEM; 62 } 63 64 static void ivpu_preemption_buffers_free(struct ivpu_device *vdev, 65 struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 66 { 67 if (cmdq->primary_preempt_buf) 68 ivpu_bo_free(cmdq->primary_preempt_buf); 69 if (cmdq->secondary_preempt_buf) 70 ivpu_bo_free(cmdq->secondary_preempt_buf); 71 } 72 73 static int ivpu_preemption_job_init(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv, 74 struct ivpu_cmdq *cmdq, struct ivpu_job *job) 75 { 76 int ret; 77 78 /* Use preemption buffer provided by the user space */ 79 if (job->primary_preempt_buf) 80 return 0; 81 82 if (!cmdq->primary_preempt_buf) { 83 /* Allocate per command queue preemption buffers */ 84 ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq); 85 if (ret) 86 return ret; 87 } 88 89 /* Use preemption buffers allocated by the kernel */ 90 job->primary_preempt_buf = cmdq->primary_preempt_buf; 91 job->secondary_preempt_buf = cmdq->secondary_preempt_buf; 92 93 return 0; 94 } 95 96 static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) 97 { 98 struct ivpu_device *vdev = file_priv->vdev; 99 struct ivpu_cmdq *cmdq; 100 101 cmdq = kzalloc_obj(*cmdq); 102 if (!cmdq) 103 return NULL; 104 105 cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); 106 if (!cmdq->mem) 107 goto err_free_cmdq; 108 109 return cmdq; 110 111 err_free_cmdq: 112 kfree(cmdq); 113 return NULL; 114 } 115 116 /** 117 * ivpu_cmdq_get_entry_count - Calculate the number of entries in the command queue. 118 * @cmdq: Pointer to the command queue structure. 119 * 120 * Returns the number of entries that can fit in the command queue memory. 121 */ 122 static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq) 123 { 124 size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header); 125 126 return size / sizeof(struct vpu_job_queue_entry); 127 } 128 129 /** 130 * ivpu_cmdq_get_flags - Get command queue flags based on input flags and test mode. 131 * @vdev: Pointer to the ivpu device structure. 132 * @flags: Input flags to determine the command queue flags. 133 * 134 * Returns the calculated command queue flags, considering both the input flags 135 * and the current test mode settings. 136 */ 137 static u32 ivpu_cmdq_get_flags(struct ivpu_device *vdev, u32 flags) 138 { 139 u32 cmdq_flags = 0; 140 141 if ((flags & DRM_IVPU_CMDQ_FLAG_TURBO) && (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX)) 142 cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE; 143 144 /* Test mode can override the TURBO flag coming from the application */ 145 if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_ENABLE) 146 cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE; 147 if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_DISABLE) 148 cmdq_flags &= ~VPU_JOB_QUEUE_FLAGS_TURBO_MODE; 149 150 return cmdq_flags; 151 } 152 153 static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 154 { 155 ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq); 156 ivpu_bo_free(cmdq->mem); 157 kfree(cmdq); 158 } 159 160 static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, u32 flags) 161 { 162 struct ivpu_device *vdev = file_priv->vdev; 163 struct ivpu_cmdq *cmdq = NULL; 164 int ret; 165 166 lockdep_assert_held(&file_priv->lock); 167 168 cmdq = ivpu_cmdq_alloc(file_priv); 169 if (!cmdq) { 170 ivpu_err(vdev, "Failed to allocate command queue\n"); 171 return NULL; 172 } 173 ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit, 174 &file_priv->cmdq_id_next, GFP_KERNEL); 175 if (ret < 0) { 176 ivpu_dbg(vdev, IOCTL, "Failed to allocate command queue ID: %d\n", ret); 177 goto err_free_cmdq; 178 } 179 180 cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq); 181 cmdq->priority = priority; 182 183 cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); 184 cmdq->jobq->header.engine_idx = VPU_ENGINE_COMPUTE; 185 cmdq->jobq->header.flags = ivpu_cmdq_get_flags(vdev, flags); 186 187 ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d, flags 0x%08x\n", 188 cmdq->id, file_priv->ctx.id, cmdq->jobq->header.flags); 189 return cmdq; 190 191 err_free_cmdq: 192 ivpu_cmdq_free(file_priv, cmdq); 193 return NULL; 194 } 195 196 static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine, 197 u8 priority) 198 { 199 struct ivpu_device *vdev = file_priv->vdev; 200 int ret; 201 202 ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->id, 203 task_pid_nr(current), engine, 204 cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); 205 if (ret) 206 return ret; 207 208 ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->id, 209 priority); 210 if (ret) 211 return ret; 212 213 return 0; 214 } 215 216 static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 217 { 218 struct ivpu_user_limits *limits = file_priv->user_limits; 219 struct ivpu_device *vdev = file_priv->vdev; 220 int ret; 221 222 if (atomic_inc_return(&limits->db_count) > limits->max_db_count) { 223 ivpu_dbg(vdev, IOCTL, "Maximum number of %u doorbells for uid %u reached\n", 224 limits->max_db_count, limits->uid); 225 ret = -EBUSY; 226 goto err_dec_db_count; 227 } 228 229 ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next, 230 GFP_KERNEL); 231 if (ret < 0) { 232 ivpu_dbg(vdev, IOCTL, "Failed to allocate doorbell ID: %d\n", ret); 233 goto err_dec_db_count; 234 } 235 236 if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) 237 ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id, 238 cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); 239 else 240 ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, 241 cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); 242 if (ret) { 243 xa_erase(&vdev->db_xa, cmdq->db_id); 244 cmdq->db_id = 0; 245 goto err_dec_db_count; 246 } 247 248 ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d priority %d\n", 249 cmdq->db_id, cmdq->id, file_priv->ctx.id, cmdq->priority); 250 return 0; 251 252 err_dec_db_count: 253 atomic_dec(&limits->db_count); 254 return ret; 255 } 256 257 static void ivpu_cmdq_jobq_reset(struct ivpu_device *vdev, struct vpu_job_queue *jobq) 258 { 259 jobq->header.head = 0; 260 jobq->header.tail = 0; 261 262 wmb(); /* Flush WC buffer for jobq->header */ 263 } 264 265 static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 266 { 267 struct ivpu_device *vdev = file_priv->vdev; 268 int ret; 269 270 lockdep_assert_held(&file_priv->lock); 271 272 if (cmdq->db_id) 273 return 0; 274 275 ivpu_cmdq_jobq_reset(vdev, cmdq->jobq); 276 277 if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { 278 ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority); 279 if (ret) 280 return ret; 281 } 282 283 ret = ivpu_register_db(file_priv, cmdq); 284 if (ret) 285 return ret; 286 287 return 0; 288 } 289 290 static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 291 { 292 struct ivpu_device *vdev = file_priv->vdev; 293 int ret; 294 295 lockdep_assert_held(&file_priv->lock); 296 297 if (!cmdq->db_id) 298 return 0; 299 300 ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); 301 if (!ret) 302 ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); 303 304 if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { 305 ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); 306 if (!ret) 307 ivpu_dbg(vdev, JOB, "Command queue %d destroyed, ctx %d\n", 308 cmdq->id, file_priv->ctx.id); 309 } 310 311 xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); 312 atomic_dec(&file_priv->user_limits->db_count); 313 cmdq->db_id = 0; 314 315 return 0; 316 } 317 318 static inline u8 ivpu_job_to_jsm_priority(u8 priority) 319 { 320 if (priority == DRM_IVPU_JOB_PRIORITY_DEFAULT) 321 return VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL; 322 323 return priority - 1; 324 } 325 326 static void ivpu_cmdq_destroy(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 327 { 328 lockdep_assert_held(&file_priv->lock); 329 ivpu_cmdq_unregister(file_priv, cmdq); 330 xa_erase(&file_priv->cmdq_xa, cmdq->id); 331 ivpu_cmdq_free(file_priv, cmdq); 332 } 333 334 static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_priv, u8 priority) 335 { 336 struct ivpu_cmdq *cmdq; 337 unsigned long id; 338 339 lockdep_assert_held(&file_priv->lock); 340 341 xa_for_each(&file_priv->cmdq_xa, id, cmdq) 342 if (cmdq->is_legacy && cmdq->priority == priority) 343 break; 344 345 if (!cmdq) { 346 cmdq = ivpu_cmdq_create(file_priv, priority, 0); 347 if (!cmdq) 348 return NULL; 349 cmdq->is_legacy = true; 350 } 351 352 return cmdq; 353 } 354 355 static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u32 cmdq_id) 356 { 357 struct ivpu_device *vdev = file_priv->vdev; 358 struct ivpu_cmdq *cmdq; 359 360 lockdep_assert_held(&file_priv->lock); 361 362 cmdq = xa_load(&file_priv->cmdq_xa, cmdq_id); 363 if (!cmdq) { 364 ivpu_dbg(vdev, IOCTL, "Failed to find command queue with ID: %u\n", cmdq_id); 365 return NULL; 366 } 367 368 return cmdq; 369 } 370 371 void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) 372 { 373 struct ivpu_cmdq *cmdq; 374 unsigned long cmdq_id; 375 376 lockdep_assert_held(&file_priv->lock); 377 378 xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) 379 ivpu_cmdq_destroy(file_priv, cmdq); 380 } 381 382 /* 383 * Mark the doorbell as unregistered 384 * This function needs to be called when the VPU hardware is restarted 385 * and FW loses job queue state. The next time job queue is used it 386 * will be registered again. 387 */ 388 static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv) 389 { 390 struct ivpu_cmdq *cmdq; 391 unsigned long cmdq_id; 392 393 mutex_lock(&file_priv->lock); 394 395 xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) { 396 if (cmdq->db_id) { 397 xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); 398 atomic_dec(&file_priv->user_limits->db_count); 399 cmdq->db_id = 0; 400 } 401 } 402 403 mutex_unlock(&file_priv->lock); 404 } 405 406 void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) 407 { 408 struct ivpu_file_priv *file_priv; 409 unsigned long ctx_id; 410 411 mutex_lock(&vdev->context_list_lock); 412 413 xa_for_each(&vdev->context_xa, ctx_id, file_priv) 414 ivpu_cmdq_reset(file_priv); 415 416 mutex_unlock(&vdev->context_list_lock); 417 } 418 419 void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv) 420 { 421 struct ivpu_device *vdev = file_priv->vdev; 422 struct ivpu_cmdq *cmdq; 423 unsigned long cmdq_id; 424 425 lockdep_assert_held(&file_priv->lock); 426 ivpu_dbg(vdev, JOB, "Context ID: %u abort\n", file_priv->ctx.id); 427 428 xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) 429 ivpu_cmdq_unregister(file_priv, cmdq); 430 431 if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) 432 ivpu_jsm_context_release(vdev, file_priv->ctx.id); 433 434 ivpu_mmu_disable_ssid_events(vdev, file_priv->ctx.id); 435 436 file_priv->aborted = true; 437 } 438 439 static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) 440 { 441 struct ivpu_device *vdev = job->vdev; 442 struct vpu_job_queue_header *header = &cmdq->jobq->header; 443 struct vpu_job_queue_entry *entry; 444 u32 tail = READ_ONCE(header->tail); 445 u32 next_entry = (tail + 1) % cmdq->entry_count; 446 447 /* Check if there is space left in job queue */ 448 if (next_entry == header->head) { 449 ivpu_dbg(vdev, JOB, "Job queue full: ctx %d cmdq %d db %d head %d tail %d\n", 450 job->file_priv->ctx.id, cmdq->id, cmdq->db_id, header->head, tail); 451 return -EBUSY; 452 } 453 454 entry = &cmdq->jobq->slot[tail].job; 455 entry->batch_buf_addr = job->cmd_buf_vpu_addr; 456 entry->job_id = job->job_id; 457 entry->flags = 0; 458 if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION)) 459 entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK; 460 461 if (job->primary_preempt_buf) { 462 entry->primary_preempt_buf_addr = job->primary_preempt_buf->vpu_addr; 463 entry->primary_preempt_buf_size = ivpu_bo_size(job->primary_preempt_buf); 464 } 465 466 if (job->secondary_preempt_buf) { 467 entry->secondary_preempt_buf_addr = job->secondary_preempt_buf->vpu_addr; 468 entry->secondary_preempt_buf_size = ivpu_bo_size(job->secondary_preempt_buf); 469 } 470 471 wmb(); /* Ensure that tail is updated after filling entry */ 472 header->tail = next_entry; 473 wmb(); /* Flush WC buffer for jobq header */ 474 475 return 0; 476 } 477 478 struct ivpu_fence { 479 struct dma_fence base; 480 spinlock_t lock; /* protects base */ 481 struct ivpu_device *vdev; 482 }; 483 484 static inline struct ivpu_fence *to_vpu_fence(struct dma_fence *fence) 485 { 486 return container_of(fence, struct ivpu_fence, base); 487 } 488 489 static const char *ivpu_fence_get_driver_name(struct dma_fence *fence) 490 { 491 return DRIVER_NAME; 492 } 493 494 static const char *ivpu_fence_get_timeline_name(struct dma_fence *fence) 495 { 496 struct ivpu_fence *ivpu_fence = to_vpu_fence(fence); 497 498 return dev_name(ivpu_fence->vdev->drm.dev); 499 } 500 501 static const struct dma_fence_ops ivpu_fence_ops = { 502 .get_driver_name = ivpu_fence_get_driver_name, 503 .get_timeline_name = ivpu_fence_get_timeline_name, 504 }; 505 506 static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) 507 { 508 struct ivpu_fence *fence; 509 510 fence = kzalloc_obj(*fence); 511 if (!fence) 512 return NULL; 513 514 fence->vdev = vdev; 515 spin_lock_init(&fence->lock); 516 dma_fence_init(&fence->base, &ivpu_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1); 517 518 return &fence->base; 519 } 520 521 static void ivpu_job_destroy(struct ivpu_job *job) 522 { 523 struct ivpu_device *vdev = job->vdev; 524 u32 i; 525 526 ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d", 527 job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx); 528 529 for (i = 0; i < job->bo_count; i++) 530 if (job->bos[i]) 531 drm_gem_object_put(&job->bos[i]->base.base); 532 533 dma_fence_put(job->done_fence); 534 ivpu_file_priv_put(&job->file_priv); 535 kfree(job); 536 } 537 538 static struct ivpu_job * 539 ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) 540 { 541 struct ivpu_device *vdev = file_priv->vdev; 542 struct ivpu_job *job; 543 544 job = kzalloc_flex(*job, bos, bo_count); 545 if (!job) 546 return NULL; 547 548 job->vdev = vdev; 549 job->engine_idx = engine_idx; 550 job->bo_count = bo_count; 551 job->done_fence = ivpu_fence_create(vdev); 552 if (!job->done_fence) { 553 ivpu_err(vdev, "Failed to create a fence\n"); 554 goto err_free_job; 555 } 556 557 job->file_priv = ivpu_file_priv_get(file_priv); 558 559 trace_job("create", job); 560 ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); 561 return job; 562 563 err_free_job: 564 kfree(job); 565 return NULL; 566 } 567 568 static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *vdev, u32 job_id) 569 { 570 struct ivpu_job *job; 571 572 lockdep_assert_held(&vdev->submitted_jobs_lock); 573 574 job = xa_erase(&vdev->submitted_jobs_xa, job_id); 575 if (xa_empty(&vdev->submitted_jobs_xa) && job) { 576 vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts), 577 vdev->busy_time); 578 } 579 580 return job; 581 } 582 583 bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status) 584 { 585 lockdep_assert_held(&vdev->submitted_jobs_lock); 586 587 switch (job_status) { 588 case VPU_JSM_STATUS_PROCESSING_ERR: 589 case VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN ... VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX: 590 { 591 struct ivpu_job *job = xa_load(&vdev->submitted_jobs_xa, job_id); 592 593 if (!job) 594 return false; 595 596 /* Trigger an engine reset */ 597 guard(mutex)(&job->file_priv->lock); 598 599 job->job_status = job_status; 600 601 if (job->file_priv->has_mmu_faults) 602 return false; 603 604 /* 605 * Mark context as faulty and defer destruction of the job to jobs abort thread 606 * handler to synchronize between both faults and jobs returning context violation 607 * status and ensure both are handled in the same way 608 */ 609 job->file_priv->has_mmu_faults = true; 610 atomic_set(&vdev->faults_detected, 1); 611 queue_work(system_percpu_wq, &vdev->context_abort_work); 612 return true; 613 } 614 default: 615 /* Complete job with error status, engine reset not required */ 616 break; 617 } 618 619 return false; 620 } 621 622 static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) 623 { 624 struct ivpu_job *job; 625 626 lockdep_assert_held(&vdev->submitted_jobs_lock); 627 628 job = xa_load(&vdev->submitted_jobs_xa, job_id); 629 if (!job) 630 return -ENOENT; 631 632 ivpu_job_remove_from_submitted_jobs(vdev, job_id); 633 634 if (job->job_status == VPU_JSM_STATUS_SUCCESS) { 635 if (job->file_priv->has_mmu_faults) 636 job->job_status = DRM_IVPU_JOB_STATUS_ABORTED; 637 else 638 job->job_status = job_status; 639 } 640 641 job->bos[CMD_BUF_IDX]->job_status = job->job_status; 642 dma_fence_signal(job->done_fence); 643 644 trace_job("done", job); 645 ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", 646 job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, 647 job->job_status); 648 649 ivpu_job_destroy(job); 650 ivpu_stop_job_timeout_detection(vdev); 651 652 ivpu_rpm_put(vdev); 653 654 if (!xa_empty(&vdev->submitted_jobs_xa)) 655 ivpu_start_job_timeout_detection(vdev); 656 657 return 0; 658 } 659 660 void ivpu_jobs_abort_all(struct ivpu_device *vdev) 661 { 662 struct ivpu_job *job; 663 unsigned long id; 664 665 mutex_lock(&vdev->submitted_jobs_lock); 666 667 xa_for_each(&vdev->submitted_jobs_xa, id, job) 668 ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); 669 670 mutex_unlock(&vdev->submitted_jobs_lock); 671 } 672 673 void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id) 674 { 675 struct ivpu_job *job; 676 unsigned long id; 677 678 mutex_lock(&vdev->submitted_jobs_lock); 679 680 xa_for_each(&vdev->submitted_jobs_xa, id, job) 681 if (job->file_priv->ctx.id == ctx_id && job->cmdq_id == cmdq_id) 682 ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); 683 684 mutex_unlock(&vdev->submitted_jobs_lock); 685 } 686 687 static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) 688 { 689 struct ivpu_file_priv *file_priv = job->file_priv; 690 struct ivpu_device *vdev = job->vdev; 691 struct ivpu_cmdq *cmdq; 692 bool is_first_job; 693 int ret; 694 695 ret = ivpu_rpm_get(vdev); 696 if (ret < 0) 697 return ret; 698 699 mutex_lock(&vdev->submitted_jobs_lock); 700 mutex_lock(&file_priv->lock); 701 702 if (cmdq_id == 0) 703 cmdq = ivpu_cmdq_acquire_legacy(file_priv, priority); 704 else 705 cmdq = ivpu_cmdq_acquire(file_priv, cmdq_id); 706 if (!cmdq) { 707 ret = -EINVAL; 708 goto err_unlock; 709 } 710 711 ret = ivpu_cmdq_register(file_priv, cmdq); 712 if (ret) { 713 ivpu_err(vdev, "Failed to register command queue: %d\n", ret); 714 goto err_unlock; 715 } 716 717 ret = ivpu_preemption_job_init(vdev, file_priv, cmdq, job); 718 if (ret) { 719 ivpu_err(vdev, "Failed to initialize preemption buffers for job %d: %d\n", 720 job->job_id, ret); 721 goto err_unlock; 722 } 723 724 job->cmdq_id = cmdq->id; 725 726 is_first_job = xa_empty(&vdev->submitted_jobs_xa); 727 ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, 728 &file_priv->job_id_next, GFP_KERNEL); 729 if (ret < 0) { 730 ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", 731 file_priv->ctx.id); 732 ret = -EBUSY; 733 goto err_unlock; 734 } 735 736 ret = ivpu_cmdq_push_job(cmdq, job); 737 if (ret) 738 goto err_erase_xa; 739 740 ivpu_start_job_timeout_detection(vdev); 741 742 if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { 743 cmdq->jobq->header.head = cmdq->jobq->header.tail; 744 wmb(); /* Flush WC buffer for jobq header */ 745 } else { 746 ivpu_cmdq_ring_db(vdev, cmdq); 747 if (is_first_job) 748 vdev->busy_start_ts = ktime_get(); 749 } 750 751 trace_job("submit", job); 752 ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n", 753 job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority, 754 job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); 755 756 mutex_unlock(&file_priv->lock); 757 758 if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { 759 ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); 760 } 761 762 mutex_unlock(&vdev->submitted_jobs_lock); 763 764 return 0; 765 766 err_erase_xa: 767 xa_erase(&vdev->submitted_jobs_xa, job->job_id); 768 err_unlock: 769 mutex_unlock(&file_priv->lock); 770 mutex_unlock(&vdev->submitted_jobs_lock); 771 ivpu_rpm_put(vdev); 772 return ret; 773 } 774 775 static int 776 ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles, 777 u32 buf_count, u32 commands_offset, u32 preempt_buffer_index) 778 { 779 struct ivpu_file_priv *file_priv = job->file_priv; 780 struct ivpu_device *vdev = file_priv->vdev; 781 struct ww_acquire_ctx acquire_ctx; 782 enum dma_resv_usage usage; 783 struct ivpu_bo *bo; 784 int ret; 785 u32 i; 786 787 for (i = 0; i < buf_count; i++) { 788 struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]); 789 790 if (!obj) { 791 ivpu_dbg(vdev, IOCTL, "Failed to lookup GEM object with handle %u\n", 792 buf_handles[i]); 793 return -ENOENT; 794 } 795 796 job->bos[i] = to_ivpu_bo(obj); 797 798 ret = ivpu_bo_bind(job->bos[i]); 799 if (ret) 800 return ret; 801 } 802 803 bo = job->bos[CMD_BUF_IDX]; 804 if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) { 805 ivpu_dbg(vdev, IOCTL, "Buffer is already in use by another job\n"); 806 return -EBUSY; 807 } 808 809 if (commands_offset >= ivpu_bo_size(bo)) { 810 ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u for buffer size %zu\n", 811 commands_offset, ivpu_bo_size(bo)); 812 return -EINVAL; 813 } 814 815 job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; 816 817 if (preempt_buffer_index) { 818 struct ivpu_bo *preempt_bo = job->bos[preempt_buffer_index]; 819 820 if (ivpu_bo_size(preempt_bo) < ivpu_fw_preempt_buf_size(vdev)) { 821 ivpu_dbg(vdev, IOCTL, "Preemption buffer is too small\n"); 822 return -EINVAL; 823 } 824 if (ivpu_bo_is_mappable(preempt_bo)) { 825 ivpu_dbg(vdev, IOCTL, "Preemption buffer cannot be mappable\n"); 826 return -EINVAL; 827 } 828 job->primary_preempt_buf = preempt_bo; 829 } 830 831 ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, 832 &acquire_ctx); 833 if (ret) { 834 ivpu_warn_ratelimited(vdev, "Failed to lock reservations: %d\n", ret); 835 return ret; 836 } 837 838 for (i = 0; i < buf_count; i++) { 839 ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1); 840 if (ret) { 841 ivpu_warn_ratelimited(vdev, "Failed to reserve fences: %d\n", ret); 842 goto unlock_reservations; 843 } 844 } 845 846 for (i = 0; i < buf_count; i++) { 847 usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP; 848 dma_resv_add_fence(job->bos[i]->base.base.resv, job->done_fence, usage); 849 } 850 851 unlock_reservations: 852 drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); 853 854 wmb(); /* Flush write combining buffers */ 855 856 return ret; 857 } 858 859 static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, u32 cmdq_id, 860 u32 buffer_count, u32 engine, void __user *buffers_ptr, u32 cmds_offset, 861 u32 preempt_buffer_index, u8 priority) 862 { 863 struct ivpu_device *vdev = file_priv->vdev; 864 struct ivpu_job *job; 865 u32 *buf_handles; 866 int idx, ret; 867 868 buf_handles = kcalloc(buffer_count, sizeof(u32), GFP_KERNEL); 869 if (!buf_handles) 870 return -ENOMEM; 871 872 ret = copy_from_user(buf_handles, buffers_ptr, buffer_count * sizeof(u32)); 873 if (ret) { 874 ret = -EFAULT; 875 goto err_free_handles; 876 } 877 878 if (!drm_dev_enter(&vdev->drm, &idx)) { 879 ret = -ENODEV; 880 goto err_free_handles; 881 } 882 883 ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n", 884 file_priv->ctx.id, cmdq_id, buffer_count); 885 886 job = ivpu_job_create(file_priv, engine, buffer_count); 887 if (!job) { 888 ret = -ENOMEM; 889 goto err_exit_dev; 890 } 891 892 ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, buffer_count, cmds_offset, 893 preempt_buffer_index); 894 if (ret) 895 goto err_destroy_job; 896 897 down_read(&vdev->pm->reset_lock); 898 ret = ivpu_job_submit(job, priority, cmdq_id); 899 up_read(&vdev->pm->reset_lock); 900 if (ret) 901 goto err_signal_fence; 902 903 drm_dev_exit(idx); 904 kfree(buf_handles); 905 return ret; 906 907 err_signal_fence: 908 dma_fence_signal(job->done_fence); 909 err_destroy_job: 910 ivpu_job_destroy(job); 911 err_exit_dev: 912 drm_dev_exit(idx); 913 err_free_handles: 914 kfree(buf_handles); 915 return ret; 916 } 917 918 int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 919 { 920 struct ivpu_file_priv *file_priv = file->driver_priv; 921 struct ivpu_device *vdev = file_priv->vdev; 922 struct drm_ivpu_submit *args = data; 923 u8 priority; 924 925 if (args->engine != DRM_IVPU_ENGINE_COMPUTE) { 926 ivpu_dbg(vdev, IOCTL, "Invalid engine %d\n", args->engine); 927 return -EINVAL; 928 } 929 930 if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) { 931 ivpu_dbg(vdev, IOCTL, "Invalid priority %d\n", args->priority); 932 return -EINVAL; 933 } 934 935 if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) { 936 ivpu_dbg(vdev, IOCTL, "Invalid buffer count %u\n", args->buffer_count); 937 return -EINVAL; 938 } 939 940 if (!IS_ALIGNED(args->commands_offset, 8)) { 941 ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u\n", args->commands_offset); 942 return -EINVAL; 943 } 944 945 if (!file_priv->ctx.id) { 946 ivpu_dbg(vdev, IOCTL, "Context not initialized\n"); 947 return -EINVAL; 948 } 949 950 if (file_priv->has_mmu_faults) { 951 ivpu_dbg(vdev, IOCTL, "Context %u has MMU faults\n", file_priv->ctx.id); 952 return -EBADFD; 953 } 954 955 priority = ivpu_job_to_jsm_priority(args->priority); 956 957 return ivpu_submit(file, file_priv, 0, args->buffer_count, args->engine, 958 (void __user *)args->buffers_ptr, args->commands_offset, 0, priority); 959 } 960 961 int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 962 { 963 struct ivpu_file_priv *file_priv = file->driver_priv; 964 struct ivpu_device *vdev = file_priv->vdev; 965 struct drm_ivpu_cmdq_submit *args = data; 966 967 if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) { 968 ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n"); 969 return -ENODEV; 970 } 971 972 if (args->cmdq_id < IVPU_CMDQ_MIN_ID || args->cmdq_id > IVPU_CMDQ_MAX_ID) { 973 ivpu_dbg(vdev, IOCTL, "Invalid command queue ID %u\n", args->cmdq_id); 974 return -EINVAL; 975 } 976 977 if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT) { 978 ivpu_dbg(vdev, IOCTL, "Invalid buffer count %u\n", args->buffer_count); 979 return -EINVAL; 980 } 981 982 if (args->preempt_buffer_index >= args->buffer_count) { 983 ivpu_dbg(vdev, IOCTL, "Invalid preemption buffer index %u\n", 984 args->preempt_buffer_index); 985 return -EINVAL; 986 } 987 988 if (!IS_ALIGNED(args->commands_offset, 8)) { 989 ivpu_dbg(vdev, IOCTL, "Invalid commands offset %u\n", args->commands_offset); 990 return -EINVAL; 991 } 992 993 if (!file_priv->ctx.id) { 994 ivpu_dbg(vdev, IOCTL, "Context not initialized\n"); 995 return -EINVAL; 996 } 997 998 if (file_priv->has_mmu_faults) { 999 ivpu_dbg(vdev, IOCTL, "Context %u has MMU faults\n", file_priv->ctx.id); 1000 return -EBADFD; 1001 } 1002 1003 return ivpu_submit(file, file_priv, args->cmdq_id, args->buffer_count, VPU_ENGINE_COMPUTE, 1004 (void __user *)args->buffers_ptr, args->commands_offset, 1005 args->preempt_buffer_index, 0); 1006 } 1007 1008 int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 1009 { 1010 struct ivpu_file_priv *file_priv = file->driver_priv; 1011 struct ivpu_device *vdev = file_priv->vdev; 1012 struct drm_ivpu_cmdq_create *args = data; 1013 struct ivpu_cmdq *cmdq; 1014 int ret; 1015 1016 if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) { 1017 ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n"); 1018 return -ENODEV; 1019 } 1020 1021 if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) { 1022 ivpu_dbg(vdev, IOCTL, "Invalid priority %d\n", args->priority); 1023 return -EINVAL; 1024 } 1025 1026 ret = ivpu_rpm_get(vdev); 1027 if (ret < 0) 1028 return ret; 1029 1030 mutex_lock(&file_priv->lock); 1031 1032 cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), args->flags); 1033 if (cmdq) 1034 args->cmdq_id = cmdq->id; 1035 1036 mutex_unlock(&file_priv->lock); 1037 1038 ivpu_rpm_put(vdev); 1039 1040 return cmdq ? 0 : -ENOMEM; 1041 } 1042 1043 int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 1044 { 1045 struct ivpu_file_priv *file_priv = file->driver_priv; 1046 struct ivpu_device *vdev = file_priv->vdev; 1047 struct drm_ivpu_cmdq_destroy *args = data; 1048 struct ivpu_cmdq *cmdq; 1049 u32 cmdq_id = 0; 1050 int ret; 1051 1052 if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) { 1053 ivpu_dbg(vdev, IOCTL, "Command queue management not supported\n"); 1054 return -ENODEV; 1055 } 1056 1057 ret = ivpu_rpm_get(vdev); 1058 if (ret < 0) 1059 return ret; 1060 1061 mutex_lock(&file_priv->lock); 1062 1063 cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id); 1064 if (!cmdq || cmdq->is_legacy) { 1065 ret = -ENOENT; 1066 } else { 1067 cmdq_id = cmdq->id; 1068 ivpu_cmdq_destroy(file_priv, cmdq); 1069 ret = 0; 1070 } 1071 1072 mutex_unlock(&file_priv->lock); 1073 1074 /* Abort any pending jobs only if cmdq was destroyed */ 1075 if (!ret) 1076 ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); 1077 1078 ivpu_rpm_put(vdev); 1079 1080 return ret; 1081 } 1082 1083 static void 1084 ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, 1085 struct vpu_jsm_msg *jsm_msg) 1086 { 1087 struct vpu_ipc_msg_payload_job_done *payload; 1088 1089 if (!jsm_msg) { 1090 ivpu_err(vdev, "IPC message has no JSM payload\n"); 1091 return; 1092 } 1093 1094 if (jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { 1095 ivpu_err(vdev, "Invalid JSM message result: %d\n", jsm_msg->result); 1096 return; 1097 } 1098 1099 payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; 1100 1101 mutex_lock(&vdev->submitted_jobs_lock); 1102 if (!ivpu_job_handle_engine_error(vdev, payload->job_id, payload->job_status)) 1103 /* No engine error, complete the job normally */ 1104 ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); 1105 mutex_unlock(&vdev->submitted_jobs_lock); 1106 } 1107 1108 void ivpu_job_done_consumer_init(struct ivpu_device *vdev) 1109 { 1110 ivpu_ipc_consumer_add(vdev, &vdev->job_done_consumer, 1111 VPU_IPC_CHAN_JOB_RET, ivpu_job_done_callback); 1112 } 1113 1114 void ivpu_job_done_consumer_fini(struct ivpu_device *vdev) 1115 { 1116 ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer); 1117 } 1118 1119 static int reset_engine_and_mark_faulty_contexts(struct ivpu_device *vdev) 1120 { 1121 u32 num_impacted_contexts; 1122 struct vpu_jsm_msg resp; 1123 int ret; 1124 u32 i; 1125 1126 ret = ivpu_jsm_reset_engine(vdev, 0, &resp); 1127 if (ret) 1128 return ret; 1129 1130 /* 1131 * If faults are detected, ignore guilty contexts from engine reset as NPU may not be stuck 1132 * and could return currently running good context and faulty contexts are already marked 1133 */ 1134 if (atomic_cmpxchg(&vdev->faults_detected, 1, 0) == 1) 1135 return 0; 1136 1137 num_impacted_contexts = resp.payload.engine_reset_done.num_impacted_contexts; 1138 1139 ivpu_warn_ratelimited(vdev, "Engine reset performed, impacted contexts: %u\n", 1140 num_impacted_contexts); 1141 1142 if (!in_range(num_impacted_contexts, 1, VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS - 1)) { 1143 ivpu_pm_trigger_recovery(vdev, "Cannot determine guilty contexts"); 1144 return -EIO; 1145 } 1146 1147 /* No faults detected, NPU likely got stuck. Mark returned contexts as guilty */ 1148 guard(mutex)(&vdev->context_list_lock); 1149 1150 for (i = 0; i < num_impacted_contexts; i++) { 1151 u32 ssid = resp.payload.engine_reset_done.impacted_contexts[i].host_ssid; 1152 struct ivpu_file_priv *file_priv = xa_load(&vdev->context_xa, ssid); 1153 1154 if (file_priv) { 1155 mutex_lock(&file_priv->lock); 1156 file_priv->has_mmu_faults = true; 1157 mutex_unlock(&file_priv->lock); 1158 } 1159 } 1160 1161 return 0; 1162 } 1163 1164 void ivpu_context_abort_work_fn(struct work_struct *work) 1165 { 1166 struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work); 1167 struct ivpu_file_priv *file_priv; 1168 struct ivpu_job *job; 1169 unsigned long ctx_id; 1170 unsigned long id; 1171 1172 if (drm_WARN_ON(&vdev->drm, pm_runtime_get_if_active(vdev->drm.dev) <= 0)) 1173 return; 1174 1175 if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) 1176 if (reset_engine_and_mark_faulty_contexts(vdev)) 1177 goto runtime_put; 1178 1179 mutex_lock(&vdev->context_list_lock); 1180 xa_for_each(&vdev->context_xa, ctx_id, file_priv) { 1181 if (!file_priv->has_mmu_faults || file_priv->aborted) 1182 continue; 1183 1184 mutex_lock(&file_priv->lock); 1185 ivpu_context_abort_locked(file_priv); 1186 mutex_unlock(&file_priv->lock); 1187 } 1188 mutex_unlock(&vdev->context_list_lock); 1189 1190 /* 1191 * We will not receive new MMU event interrupts until existing events are discarded 1192 * however, we want to discard these events only after aborting the faulty context 1193 * to avoid generating new faults from that context 1194 */ 1195 ivpu_mmu_discard_events(vdev); 1196 1197 if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) 1198 goto runtime_put; 1199 1200 if (ivpu_jsm_hws_resume_engine(vdev, 0)) 1201 goto runtime_put; 1202 /* 1203 * In hardware scheduling mode NPU already has stopped processing jobs 1204 * and won't send us any further notifications, thus we have to free job related resources 1205 * and notify userspace 1206 */ 1207 mutex_lock(&vdev->submitted_jobs_lock); 1208 xa_for_each(&vdev->submitted_jobs_xa, id, job) 1209 if (job->file_priv->aborted) 1210 ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED); 1211 mutex_unlock(&vdev->submitted_jobs_lock); 1212 1213 runtime_put: 1214 pm_runtime_put_autosuspend(vdev->drm.dev); 1215 } 1216