1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ 3 /* Copyright 2019 Collabora ltd. */ 4 /* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ 5 6 #include <drm/drm_print.h> 7 #include <drm/drm_file.h> 8 #include <drm/drm_gem.h> 9 #include <drm/rocket_accel.h> 10 #include <linux/interrupt.h> 11 #include <linux/iommu.h> 12 #include <linux/platform_device.h> 13 #include <linux/pm_runtime.h> 14 15 #include "rocket_core.h" 16 #include "rocket_device.h" 17 #include "rocket_drv.h" 18 #include "rocket_job.h" 19 #include "rocket_registers.h" 20 21 #define JOB_TIMEOUT_MS 500 22 23 static struct rocket_job * 24 to_rocket_job(struct drm_sched_job *sched_job) 25 { 26 return container_of(sched_job, struct rocket_job, base); 27 } 28 29 static const char *rocket_fence_get_driver_name(struct dma_fence *fence) 30 { 31 return "rocket"; 32 } 33 34 static const char *rocket_fence_get_timeline_name(struct dma_fence *fence) 35 { 36 return "rockchip-npu"; 37 } 38 39 static const struct dma_fence_ops rocket_fence_ops = { 40 .get_driver_name = rocket_fence_get_driver_name, 41 .get_timeline_name = rocket_fence_get_timeline_name, 42 }; 43 44 static struct dma_fence *rocket_fence_create(struct rocket_core *core) 45 { 46 struct dma_fence *fence; 47 48 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 49 if (!fence) 50 return ERR_PTR(-ENOMEM); 51 52 dma_fence_init(fence, &rocket_fence_ops, &core->fence_lock, 53 core->fence_context, ++core->emit_seqno); 54 55 return fence; 56 } 57 58 static int 59 rocket_copy_tasks(struct drm_device *dev, 60 struct drm_file *file_priv, 61 struct drm_rocket_job *job, 62 struct rocket_job *rjob) 63 { 64 int ret = 0; 65 66 if (job->task_struct_size < sizeof(struct drm_rocket_task)) 67 return -EINVAL; 68 69 rjob->task_count = job->task_count; 70 71 if (!rjob->task_count) 72 return 0; 73 74 rjob->tasks = kvmalloc_array(job->task_count, sizeof(*rjob->tasks), GFP_KERNEL); 75 if (!rjob->tasks) { 76 drm_dbg(dev, "Failed to allocate task array\n"); 77 return -ENOMEM; 78 } 79 80 for (int i = 0; i < rjob->task_count; i++) { 81 struct drm_rocket_task task = {0}; 82 83 if (copy_from_user(&task, 84 u64_to_user_ptr(job->tasks) + i * job->task_struct_size, 85 sizeof(task))) { 86 drm_dbg(dev, "Failed to copy incoming tasks\n"); 87 ret = -EFAULT; 88 goto fail; 89 } 90 91 if (task.regcmd_count == 0) { 92 drm_dbg(dev, "regcmd_count field in drm_rocket_task should be > 0.\n"); 93 ret = -EINVAL; 94 goto fail; 95 } 96 97 rjob->tasks[i].regcmd = task.regcmd; 98 rjob->tasks[i].regcmd_count = task.regcmd_count; 99 } 100 101 return 0; 102 103 fail: 104 kvfree(rjob->tasks); 105 return ret; 106 } 107 108 static void rocket_job_hw_submit(struct rocket_core *core, struct rocket_job *job) 109 { 110 struct rocket_task *task; 111 unsigned int extra_bit; 112 113 /* Don't queue the job if a reset is in progress */ 114 if (atomic_read(&core->reset.pending)) 115 return; 116 117 /* GO ! */ 118 119 task = &job->tasks[job->next_task_idx]; 120 job->next_task_idx++; 121 122 rocket_pc_writel(core, BASE_ADDRESS, 0x1); 123 124 /* From rknpu, in the TRM this bit is marked as reserved */ 125 extra_bit = 0x10000000 * core->index; 126 rocket_cna_writel(core, S_POINTER, CNA_S_POINTER_POINTER_PP_EN(1) | 127 CNA_S_POINTER_EXECUTER_PP_EN(1) | 128 CNA_S_POINTER_POINTER_PP_MODE(1) | 129 extra_bit); 130 131 rocket_core_writel(core, S_POINTER, CORE_S_POINTER_POINTER_PP_EN(1) | 132 CORE_S_POINTER_EXECUTER_PP_EN(1) | 133 CORE_S_POINTER_POINTER_PP_MODE(1) | 134 extra_bit); 135 136 rocket_pc_writel(core, BASE_ADDRESS, task->regcmd); 137 rocket_pc_writel(core, REGISTER_AMOUNTS, 138 PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT((task->regcmd_count + 1) / 2 - 1)); 139 140 rocket_pc_writel(core, INTERRUPT_MASK, PC_INTERRUPT_MASK_DPU_0 | PC_INTERRUPT_MASK_DPU_1); 141 rocket_pc_writel(core, INTERRUPT_CLEAR, PC_INTERRUPT_CLEAR_DPU_0 | PC_INTERRUPT_CLEAR_DPU_1); 142 143 rocket_pc_writel(core, TASK_CON, PC_TASK_CON_RESERVED_0(1) | 144 PC_TASK_CON_TASK_COUNT_CLEAR(1) | 145 PC_TASK_CON_TASK_NUMBER(1) | 146 PC_TASK_CON_TASK_PP_EN(1)); 147 148 rocket_pc_writel(core, TASK_DMA_BASE_ADDR, PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR(0x0)); 149 150 rocket_pc_writel(core, OPERATION_ENABLE, PC_OPERATION_ENABLE_OP_EN(1)); 151 152 dev_dbg(core->dev, "Submitted regcmd at 0x%llx to core %d", task->regcmd, core->index); 153 } 154 155 static int rocket_acquire_object_fences(struct drm_gem_object **bos, 156 int bo_count, 157 struct drm_sched_job *job, 158 bool is_write) 159 { 160 int i, ret; 161 162 for (i = 0; i < bo_count; i++) { 163 ret = dma_resv_reserve_fences(bos[i]->resv, 1); 164 if (ret) 165 return ret; 166 167 ret = drm_sched_job_add_implicit_dependencies(job, bos[i], 168 is_write); 169 if (ret) 170 return ret; 171 } 172 173 return 0; 174 } 175 176 static void rocket_attach_object_fences(struct drm_gem_object **bos, 177 int bo_count, 178 struct dma_fence *fence) 179 { 180 int i; 181 182 for (i = 0; i < bo_count; i++) 183 dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); 184 } 185 186 static int rocket_job_push(struct rocket_job *job) 187 { 188 struct rocket_device *rdev = job->rdev; 189 struct drm_gem_object **bos; 190 struct ww_acquire_ctx acquire_ctx; 191 int ret = 0; 192 193 bos = kvmalloc_array(job->in_bo_count + job->out_bo_count, sizeof(void *), 194 GFP_KERNEL); 195 memcpy(bos, job->in_bos, job->in_bo_count * sizeof(void *)); 196 memcpy(&bos[job->in_bo_count], job->out_bos, job->out_bo_count * sizeof(void *)); 197 198 ret = drm_gem_lock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx); 199 if (ret) 200 goto err; 201 202 scoped_guard(mutex, &rdev->sched_lock) { 203 drm_sched_job_arm(&job->base); 204 205 job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished); 206 207 ret = rocket_acquire_object_fences(job->in_bos, job->in_bo_count, &job->base, false); 208 if (ret) 209 goto err_unlock; 210 211 ret = rocket_acquire_object_fences(job->out_bos, job->out_bo_count, &job->base, true); 212 if (ret) 213 goto err_unlock; 214 215 kref_get(&job->refcount); /* put by scheduler job completion */ 216 217 drm_sched_entity_push_job(&job->base); 218 } 219 220 rocket_attach_object_fences(job->out_bos, job->out_bo_count, job->inference_done_fence); 221 222 err_unlock: 223 drm_gem_unlock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx); 224 err: 225 kvfree(bos); 226 227 return ret; 228 } 229 230 static void rocket_job_cleanup(struct kref *ref) 231 { 232 struct rocket_job *job = container_of(ref, struct rocket_job, 233 refcount); 234 unsigned int i; 235 236 rocket_iommu_domain_put(job->domain); 237 238 dma_fence_put(job->done_fence); 239 dma_fence_put(job->inference_done_fence); 240 241 if (job->in_bos) { 242 for (i = 0; i < job->in_bo_count; i++) 243 drm_gem_object_put(job->in_bos[i]); 244 245 kvfree(job->in_bos); 246 } 247 248 if (job->out_bos) { 249 for (i = 0; i < job->out_bo_count; i++) 250 drm_gem_object_put(job->out_bos[i]); 251 252 kvfree(job->out_bos); 253 } 254 255 kvfree(job->tasks); 256 257 kfree(job); 258 } 259 260 static void rocket_job_put(struct rocket_job *job) 261 { 262 kref_put(&job->refcount, rocket_job_cleanup); 263 } 264 265 static void rocket_job_free(struct drm_sched_job *sched_job) 266 { 267 struct rocket_job *job = to_rocket_job(sched_job); 268 269 drm_sched_job_cleanup(sched_job); 270 271 rocket_job_put(job); 272 } 273 274 static struct rocket_core *sched_to_core(struct rocket_device *rdev, 275 struct drm_gpu_scheduler *sched) 276 { 277 unsigned int core; 278 279 for (core = 0; core < rdev->num_cores; core++) { 280 if (&rdev->cores[core].sched == sched) 281 return &rdev->cores[core]; 282 } 283 284 return NULL; 285 } 286 287 static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job) 288 { 289 struct rocket_job *job = to_rocket_job(sched_job); 290 struct rocket_device *rdev = job->rdev; 291 struct rocket_core *core = sched_to_core(rdev, sched_job->sched); 292 struct dma_fence *fence = NULL; 293 int ret; 294 295 if (unlikely(job->base.s_fence->finished.error)) 296 return NULL; 297 298 /* 299 * Nothing to execute: can happen if the job has finished while 300 * we were resetting the NPU. 301 */ 302 if (job->next_task_idx == job->task_count) 303 return NULL; 304 305 fence = rocket_fence_create(core); 306 if (IS_ERR(fence)) 307 return fence; 308 309 if (job->done_fence) 310 dma_fence_put(job->done_fence); 311 job->done_fence = dma_fence_get(fence); 312 313 ret = pm_runtime_get_sync(core->dev); 314 if (ret < 0) 315 return fence; 316 317 ret = iommu_attach_group(job->domain->domain, core->iommu_group); 318 if (ret < 0) 319 return fence; 320 321 scoped_guard(mutex, &core->job_lock) { 322 core->in_flight_job = job; 323 rocket_job_hw_submit(core, job); 324 } 325 326 return fence; 327 } 328 329 static void rocket_job_handle_irq(struct rocket_core *core) 330 { 331 pm_runtime_mark_last_busy(core->dev); 332 333 rocket_pc_writel(core, OPERATION_ENABLE, 0x0); 334 rocket_pc_writel(core, INTERRUPT_CLEAR, 0x1ffff); 335 336 scoped_guard(mutex, &core->job_lock) 337 if (core->in_flight_job) { 338 if (core->in_flight_job->next_task_idx < core->in_flight_job->task_count) { 339 rocket_job_hw_submit(core, core->in_flight_job); 340 return; 341 } 342 343 iommu_detach_group(NULL, iommu_group_get(core->dev)); 344 dma_fence_signal(core->in_flight_job->done_fence); 345 pm_runtime_put_autosuspend(core->dev); 346 core->in_flight_job = NULL; 347 } 348 } 349 350 static void 351 rocket_reset(struct rocket_core *core, struct drm_sched_job *bad) 352 { 353 if (!atomic_read(&core->reset.pending)) 354 return; 355 356 drm_sched_stop(&core->sched, bad); 357 358 /* 359 * Remaining interrupts have been handled, but we might still have 360 * stuck jobs. Let's make sure the PM counters stay balanced by 361 * manually calling pm_runtime_put_noidle(). 362 */ 363 scoped_guard(mutex, &core->job_lock) { 364 if (core->in_flight_job) 365 pm_runtime_put_noidle(core->dev); 366 367 iommu_detach_group(NULL, core->iommu_group); 368 369 core->in_flight_job = NULL; 370 } 371 372 /* Proceed with reset now. */ 373 rocket_core_reset(core); 374 375 /* NPU has been reset, we can clear the reset pending bit. */ 376 atomic_set(&core->reset.pending, 0); 377 378 /* Restart the scheduler */ 379 drm_sched_start(&core->sched, 0); 380 } 381 382 static enum drm_gpu_sched_stat rocket_job_timedout(struct drm_sched_job *sched_job) 383 { 384 struct rocket_job *job = to_rocket_job(sched_job); 385 struct rocket_device *rdev = job->rdev; 386 struct rocket_core *core = sched_to_core(rdev, sched_job->sched); 387 388 dev_err(core->dev, "NPU job timed out"); 389 390 atomic_set(&core->reset.pending, 1); 391 rocket_reset(core, sched_job); 392 393 return DRM_GPU_SCHED_STAT_RESET; 394 } 395 396 static void rocket_reset_work(struct work_struct *work) 397 { 398 struct rocket_core *core; 399 400 core = container_of(work, struct rocket_core, reset.work); 401 rocket_reset(core, NULL); 402 } 403 404 static const struct drm_sched_backend_ops rocket_sched_ops = { 405 .run_job = rocket_job_run, 406 .timedout_job = rocket_job_timedout, 407 .free_job = rocket_job_free 408 }; 409 410 static irqreturn_t rocket_job_irq_handler_thread(int irq, void *data) 411 { 412 struct rocket_core *core = data; 413 414 rocket_job_handle_irq(core); 415 416 return IRQ_HANDLED; 417 } 418 419 static irqreturn_t rocket_job_irq_handler(int irq, void *data) 420 { 421 struct rocket_core *core = data; 422 u32 raw_status = rocket_pc_readl(core, INTERRUPT_RAW_STATUS); 423 424 WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR); 425 WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_WRITE_ERROR); 426 427 if (!(raw_status & PC_INTERRUPT_RAW_STATUS_DPU_0 || 428 raw_status & PC_INTERRUPT_RAW_STATUS_DPU_1)) 429 return IRQ_NONE; 430 431 rocket_pc_writel(core, INTERRUPT_MASK, 0x0); 432 433 return IRQ_WAKE_THREAD; 434 } 435 436 int rocket_job_init(struct rocket_core *core) 437 { 438 struct drm_sched_init_args args = { 439 .ops = &rocket_sched_ops, 440 .num_rqs = DRM_SCHED_PRIORITY_COUNT, 441 .credit_limit = 1, 442 .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), 443 .name = dev_name(core->dev), 444 .dev = core->dev, 445 }; 446 int ret; 447 448 INIT_WORK(&core->reset.work, rocket_reset_work); 449 spin_lock_init(&core->fence_lock); 450 mutex_init(&core->job_lock); 451 452 core->irq = platform_get_irq(to_platform_device(core->dev), 0); 453 if (core->irq < 0) 454 return core->irq; 455 456 ret = devm_request_threaded_irq(core->dev, core->irq, 457 rocket_job_irq_handler, 458 rocket_job_irq_handler_thread, 459 IRQF_SHARED, dev_name(core->dev), 460 core); 461 if (ret) { 462 dev_err(core->dev, "failed to request job irq"); 463 return ret; 464 } 465 466 core->reset.wq = alloc_ordered_workqueue("rocket-reset-%d", 0, core->index); 467 if (!core->reset.wq) 468 return -ENOMEM; 469 470 core->fence_context = dma_fence_context_alloc(1); 471 472 args.timeout_wq = core->reset.wq; 473 ret = drm_sched_init(&core->sched, &args); 474 if (ret) { 475 dev_err(core->dev, "Failed to create scheduler: %d.", ret); 476 goto err_sched; 477 } 478 479 return 0; 480 481 err_sched: 482 drm_sched_fini(&core->sched); 483 484 destroy_workqueue(core->reset.wq); 485 return ret; 486 } 487 488 void rocket_job_fini(struct rocket_core *core) 489 { 490 drm_sched_fini(&core->sched); 491 492 cancel_work_sync(&core->reset.work); 493 destroy_workqueue(core->reset.wq); 494 } 495 496 int rocket_job_open(struct rocket_file_priv *rocket_priv) 497 { 498 struct rocket_device *rdev = rocket_priv->rdev; 499 struct drm_gpu_scheduler **scheds = kmalloc_array(rdev->num_cores, 500 sizeof(*scheds), 501 GFP_KERNEL); 502 unsigned int core; 503 int ret; 504 505 for (core = 0; core < rdev->num_cores; core++) 506 scheds[core] = &rdev->cores[core].sched; 507 508 ret = drm_sched_entity_init(&rocket_priv->sched_entity, 509 DRM_SCHED_PRIORITY_NORMAL, 510 scheds, 511 rdev->num_cores, NULL); 512 if (WARN_ON(ret)) 513 return ret; 514 515 return 0; 516 } 517 518 void rocket_job_close(struct rocket_file_priv *rocket_priv) 519 { 520 struct drm_sched_entity *entity = &rocket_priv->sched_entity; 521 522 kfree(entity->sched_list); 523 drm_sched_entity_destroy(entity); 524 } 525 526 int rocket_job_is_idle(struct rocket_core *core) 527 { 528 /* If there are any jobs in this HW queue, we're not idle */ 529 if (atomic_read(&core->sched.credit_count)) 530 return false; 531 532 return true; 533 } 534 535 static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file, 536 struct drm_rocket_job *job) 537 { 538 struct rocket_device *rdev = to_rocket_device(dev); 539 struct rocket_file_priv *file_priv = file->driver_priv; 540 struct rocket_job *rjob = NULL; 541 int ret = 0; 542 543 if (job->task_count == 0) 544 return -EINVAL; 545 546 rjob = kzalloc(sizeof(*rjob), GFP_KERNEL); 547 if (!rjob) 548 return -ENOMEM; 549 550 kref_init(&rjob->refcount); 551 552 rjob->rdev = rdev; 553 554 ret = drm_sched_job_init(&rjob->base, 555 &file_priv->sched_entity, 556 1, NULL, file->client_id); 557 if (ret) 558 goto out_put_job; 559 560 ret = rocket_copy_tasks(dev, file, job, rjob); 561 if (ret) 562 goto out_cleanup_job; 563 564 ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->in_bo_handles), 565 job->in_bo_handle_count, &rjob->in_bos); 566 if (ret) 567 goto out_cleanup_job; 568 569 rjob->in_bo_count = job->in_bo_handle_count; 570 571 ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->out_bo_handles), 572 job->out_bo_handle_count, &rjob->out_bos); 573 if (ret) 574 goto out_cleanup_job; 575 576 rjob->out_bo_count = job->out_bo_handle_count; 577 578 rjob->domain = rocket_iommu_domain_get(file_priv); 579 580 ret = rocket_job_push(rjob); 581 if (ret) 582 goto out_cleanup_job; 583 584 out_cleanup_job: 585 if (ret) 586 drm_sched_job_cleanup(&rjob->base); 587 out_put_job: 588 rocket_job_put(rjob); 589 590 return ret; 591 } 592 593 int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) 594 { 595 struct drm_rocket_submit *args = data; 596 struct drm_rocket_job *jobs; 597 int ret = 0; 598 unsigned int i = 0; 599 600 if (args->job_count == 0) 601 return 0; 602 603 if (args->job_struct_size < sizeof(struct drm_rocket_job)) { 604 drm_dbg(dev, "job_struct_size field in drm_rocket_submit struct is too small.\n"); 605 return -EINVAL; 606 } 607 608 if (args->reserved != 0) { 609 drm_dbg(dev, "Reserved field in drm_rocket_submit struct should be 0.\n"); 610 return -EINVAL; 611 } 612 613 jobs = kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); 614 if (!jobs) { 615 drm_dbg(dev, "Failed to allocate incoming job array\n"); 616 return -ENOMEM; 617 } 618 619 for (i = 0; i < args->job_count; i++) { 620 if (copy_from_user(&jobs[i], 621 u64_to_user_ptr(args->jobs) + i * args->job_struct_size, 622 sizeof(*jobs))) { 623 ret = -EFAULT; 624 drm_dbg(dev, "Failed to copy incoming job array\n"); 625 goto exit; 626 } 627 } 628 629 630 for (i = 0; i < args->job_count; i++) 631 rocket_ioctl_submit_job(dev, file, &jobs[i]); 632 633 exit: 634 kvfree(jobs); 635 636 return ret; 637 } 638