1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
3 /* Copyright 2019 Collabora ltd. */
4 /* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
5
6 #include <drm/drm_print.h>
7 #include <drm/drm_file.h>
8 #include <drm/drm_gem.h>
9 #include <drm/rocket_accel.h>
10 #include <linux/interrupt.h>
11 #include <linux/iommu.h>
12 #include <linux/platform_device.h>
13 #include <linux/pm_runtime.h>
14
15 #include "rocket_core.h"
16 #include "rocket_device.h"
17 #include "rocket_drv.h"
18 #include "rocket_job.h"
19 #include "rocket_registers.h"
20
21 #define JOB_TIMEOUT_MS 500
22
23 static struct rocket_job *
to_rocket_job(struct drm_sched_job * sched_job)24 to_rocket_job(struct drm_sched_job *sched_job)
25 {
26 return container_of(sched_job, struct rocket_job, base);
27 }
28
rocket_fence_get_driver_name(struct dma_fence * fence)29 static const char *rocket_fence_get_driver_name(struct dma_fence *fence)
30 {
31 return "rocket";
32 }
33
rocket_fence_get_timeline_name(struct dma_fence * fence)34 static const char *rocket_fence_get_timeline_name(struct dma_fence *fence)
35 {
36 return "rockchip-npu";
37 }
38
39 static const struct dma_fence_ops rocket_fence_ops = {
40 .get_driver_name = rocket_fence_get_driver_name,
41 .get_timeline_name = rocket_fence_get_timeline_name,
42 };
43
rocket_fence_create(struct rocket_core * core)44 static struct dma_fence *rocket_fence_create(struct rocket_core *core)
45 {
46 struct dma_fence *fence;
47
48 fence = kzalloc_obj(*fence);
49 if (!fence)
50 return ERR_PTR(-ENOMEM);
51
52 dma_fence_init(fence, &rocket_fence_ops, &core->fence_lock,
53 core->fence_context, ++core->emit_seqno);
54
55 return fence;
56 }
57
58 static int
rocket_copy_tasks(struct drm_device * dev,struct drm_file * file_priv,struct drm_rocket_job * job,struct rocket_job * rjob)59 rocket_copy_tasks(struct drm_device *dev,
60 struct drm_file *file_priv,
61 struct drm_rocket_job *job,
62 struct rocket_job *rjob)
63 {
64 int ret = 0;
65
66 if (job->task_struct_size < sizeof(struct drm_rocket_task))
67 return -EINVAL;
68
69 rjob->task_count = job->task_count;
70
71 if (!rjob->task_count)
72 return 0;
73
74 rjob->tasks = kvmalloc_objs(*rjob->tasks, job->task_count);
75 if (!rjob->tasks) {
76 drm_dbg(dev, "Failed to allocate task array\n");
77 return -ENOMEM;
78 }
79
80 for (int i = 0; i < rjob->task_count; i++) {
81 struct drm_rocket_task task = {0};
82
83 if (copy_from_user(&task,
84 u64_to_user_ptr(job->tasks) + i * job->task_struct_size,
85 sizeof(task))) {
86 drm_dbg(dev, "Failed to copy incoming tasks\n");
87 ret = -EFAULT;
88 goto fail;
89 }
90
91 if (task.regcmd_count == 0) {
92 drm_dbg(dev, "regcmd_count field in drm_rocket_task should be > 0.\n");
93 ret = -EINVAL;
94 goto fail;
95 }
96
97 rjob->tasks[i].regcmd = task.regcmd;
98 rjob->tasks[i].regcmd_count = task.regcmd_count;
99 }
100
101 return 0;
102
103 fail:
104 kvfree(rjob->tasks);
105 return ret;
106 }
107
rocket_job_hw_submit(struct rocket_core * core,struct rocket_job * job)108 static void rocket_job_hw_submit(struct rocket_core *core, struct rocket_job *job)
109 {
110 struct rocket_task *task;
111 unsigned int extra_bit;
112
113 /* Don't queue the job if a reset is in progress */
114 if (atomic_read(&core->reset.pending))
115 return;
116
117 /* GO ! */
118
119 task = &job->tasks[job->next_task_idx];
120 job->next_task_idx++;
121
122 rocket_pc_writel(core, BASE_ADDRESS, 0x1);
123
124 /* From rknpu, in the TRM this bit is marked as reserved */
125 extra_bit = 0x10000000 * core->index;
126 rocket_cna_writel(core, S_POINTER, CNA_S_POINTER_POINTER_PP_EN(1) |
127 CNA_S_POINTER_EXECUTER_PP_EN(1) |
128 CNA_S_POINTER_POINTER_PP_MODE(1) |
129 extra_bit);
130
131 rocket_core_writel(core, S_POINTER, CORE_S_POINTER_POINTER_PP_EN(1) |
132 CORE_S_POINTER_EXECUTER_PP_EN(1) |
133 CORE_S_POINTER_POINTER_PP_MODE(1) |
134 extra_bit);
135
136 rocket_pc_writel(core, BASE_ADDRESS, task->regcmd);
137 rocket_pc_writel(core, REGISTER_AMOUNTS,
138 PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT((task->regcmd_count + 1) / 2 - 1));
139
140 rocket_pc_writel(core, INTERRUPT_MASK, PC_INTERRUPT_MASK_DPU_0 | PC_INTERRUPT_MASK_DPU_1);
141 rocket_pc_writel(core, INTERRUPT_CLEAR, PC_INTERRUPT_CLEAR_DPU_0 | PC_INTERRUPT_CLEAR_DPU_1);
142
143 rocket_pc_writel(core, TASK_CON, PC_TASK_CON_RESERVED_0(1) |
144 PC_TASK_CON_TASK_COUNT_CLEAR(1) |
145 PC_TASK_CON_TASK_NUMBER(1) |
146 PC_TASK_CON_TASK_PP_EN(1));
147
148 rocket_pc_writel(core, TASK_DMA_BASE_ADDR, PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR(0x0));
149
150 rocket_pc_writel(core, OPERATION_ENABLE, PC_OPERATION_ENABLE_OP_EN(1));
151
152 dev_dbg(core->dev, "Submitted regcmd at 0x%llx to core %d", task->regcmd, core->index);
153 }
154
rocket_acquire_object_fences(struct drm_gem_object ** bos,int bo_count,struct drm_sched_job * job,bool is_write)155 static int rocket_acquire_object_fences(struct drm_gem_object **bos,
156 int bo_count,
157 struct drm_sched_job *job,
158 bool is_write)
159 {
160 int i, ret;
161
162 for (i = 0; i < bo_count; i++) {
163 ret = dma_resv_reserve_fences(bos[i]->resv, 1);
164 if (ret)
165 return ret;
166
167 ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
168 is_write);
169 if (ret)
170 return ret;
171 }
172
173 return 0;
174 }
175
rocket_attach_object_fences(struct drm_gem_object ** bos,int bo_count,struct dma_fence * fence)176 static void rocket_attach_object_fences(struct drm_gem_object **bos,
177 int bo_count,
178 struct dma_fence *fence)
179 {
180 int i;
181
182 for (i = 0; i < bo_count; i++)
183 dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
184 }
185
rocket_job_push(struct rocket_job * job)186 static int rocket_job_push(struct rocket_job *job)
187 {
188 struct rocket_device *rdev = job->rdev;
189 struct drm_gem_object **bos;
190 struct ww_acquire_ctx acquire_ctx;
191 int ret = 0;
192
193 bos = kvmalloc_array(job->in_bo_count + job->out_bo_count, sizeof(void *),
194 GFP_KERNEL);
195 memcpy(bos, job->in_bos, job->in_bo_count * sizeof(void *));
196 memcpy(&bos[job->in_bo_count], job->out_bos, job->out_bo_count * sizeof(void *));
197
198 ret = drm_gem_lock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx);
199 if (ret)
200 goto err;
201
202 scoped_guard(mutex, &rdev->sched_lock) {
203 drm_sched_job_arm(&job->base);
204
205 job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished);
206
207 ret = rocket_acquire_object_fences(job->in_bos, job->in_bo_count, &job->base, false);
208 if (ret)
209 goto err_unlock;
210
211 ret = rocket_acquire_object_fences(job->out_bos, job->out_bo_count, &job->base, true);
212 if (ret)
213 goto err_unlock;
214
215 kref_get(&job->refcount); /* put by scheduler job completion */
216
217 drm_sched_entity_push_job(&job->base);
218 }
219
220 rocket_attach_object_fences(job->out_bos, job->out_bo_count, job->inference_done_fence);
221
222 err_unlock:
223 drm_gem_unlock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx);
224 err:
225 kvfree(bos);
226
227 return ret;
228 }
229
rocket_job_cleanup(struct kref * ref)230 static void rocket_job_cleanup(struct kref *ref)
231 {
232 struct rocket_job *job = container_of(ref, struct rocket_job,
233 refcount);
234 unsigned int i;
235
236 rocket_iommu_domain_put(job->domain);
237
238 dma_fence_put(job->done_fence);
239 dma_fence_put(job->inference_done_fence);
240
241 if (job->in_bos) {
242 for (i = 0; i < job->in_bo_count; i++)
243 drm_gem_object_put(job->in_bos[i]);
244
245 kvfree(job->in_bos);
246 }
247
248 if (job->out_bos) {
249 for (i = 0; i < job->out_bo_count; i++)
250 drm_gem_object_put(job->out_bos[i]);
251
252 kvfree(job->out_bos);
253 }
254
255 kvfree(job->tasks);
256
257 kfree(job);
258 }
259
rocket_job_put(struct rocket_job * job)260 static void rocket_job_put(struct rocket_job *job)
261 {
262 kref_put(&job->refcount, rocket_job_cleanup);
263 }
264
rocket_job_free(struct drm_sched_job * sched_job)265 static void rocket_job_free(struct drm_sched_job *sched_job)
266 {
267 struct rocket_job *job = to_rocket_job(sched_job);
268
269 drm_sched_job_cleanup(sched_job);
270
271 rocket_job_put(job);
272 }
273
sched_to_core(struct rocket_device * rdev,struct drm_gpu_scheduler * sched)274 static struct rocket_core *sched_to_core(struct rocket_device *rdev,
275 struct drm_gpu_scheduler *sched)
276 {
277 unsigned int core;
278
279 for (core = 0; core < rdev->num_cores; core++) {
280 if (&rdev->cores[core].sched == sched)
281 return &rdev->cores[core];
282 }
283
284 return NULL;
285 }
286
rocket_job_run(struct drm_sched_job * sched_job)287 static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job)
288 {
289 struct rocket_job *job = to_rocket_job(sched_job);
290 struct rocket_device *rdev = job->rdev;
291 struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
292 struct dma_fence *fence = NULL;
293 int ret;
294
295 if (unlikely(job->base.s_fence->finished.error))
296 return NULL;
297
298 /*
299 * Nothing to execute: can happen if the job has finished while
300 * we were resetting the NPU.
301 */
302 if (job->next_task_idx == job->task_count)
303 return NULL;
304
305 fence = rocket_fence_create(core);
306 if (IS_ERR(fence))
307 return fence;
308
309 if (job->done_fence)
310 dma_fence_put(job->done_fence);
311 job->done_fence = dma_fence_get(fence);
312
313 ret = pm_runtime_get_sync(core->dev);
314 if (ret < 0)
315 return fence;
316
317 ret = iommu_attach_group(job->domain->domain, core->iommu_group);
318 if (ret < 0)
319 return fence;
320
321 scoped_guard(mutex, &core->job_lock) {
322 core->in_flight_job = job;
323 rocket_job_hw_submit(core, job);
324 }
325
326 return fence;
327 }
328
rocket_job_handle_irq(struct rocket_core * core)329 static void rocket_job_handle_irq(struct rocket_core *core)
330 {
331 pm_runtime_mark_last_busy(core->dev);
332
333 rocket_pc_writel(core, OPERATION_ENABLE, 0x0);
334 rocket_pc_writel(core, INTERRUPT_CLEAR, 0x1ffff);
335
336 scoped_guard(mutex, &core->job_lock)
337 if (core->in_flight_job) {
338 if (core->in_flight_job->next_task_idx < core->in_flight_job->task_count) {
339 rocket_job_hw_submit(core, core->in_flight_job);
340 return;
341 }
342
343 iommu_detach_group(NULL, iommu_group_get(core->dev));
344 dma_fence_signal(core->in_flight_job->done_fence);
345 pm_runtime_put_autosuspend(core->dev);
346 core->in_flight_job = NULL;
347 }
348 }
349
350 static void
rocket_reset(struct rocket_core * core,struct drm_sched_job * bad)351 rocket_reset(struct rocket_core *core, struct drm_sched_job *bad)
352 {
353 if (!atomic_read(&core->reset.pending))
354 return;
355
356 drm_sched_stop(&core->sched, bad);
357
358 /*
359 * Remaining interrupts have been handled, but we might still have
360 * stuck jobs. Let's make sure the PM counters stay balanced by
361 * manually calling pm_runtime_put_noidle().
362 */
363 scoped_guard(mutex, &core->job_lock) {
364 if (core->in_flight_job)
365 pm_runtime_put_noidle(core->dev);
366
367 iommu_detach_group(NULL, core->iommu_group);
368
369 core->in_flight_job = NULL;
370 }
371
372 /* Proceed with reset now. */
373 rocket_core_reset(core);
374
375 /* NPU has been reset, we can clear the reset pending bit. */
376 atomic_set(&core->reset.pending, 0);
377
378 /* Restart the scheduler */
379 drm_sched_start(&core->sched, 0);
380 }
381
rocket_job_timedout(struct drm_sched_job * sched_job)382 static enum drm_gpu_sched_stat rocket_job_timedout(struct drm_sched_job *sched_job)
383 {
384 struct rocket_job *job = to_rocket_job(sched_job);
385 struct rocket_device *rdev = job->rdev;
386 struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
387
388 dev_err(core->dev, "NPU job timed out");
389
390 atomic_set(&core->reset.pending, 1);
391 rocket_reset(core, sched_job);
392
393 return DRM_GPU_SCHED_STAT_RESET;
394 }
395
rocket_reset_work(struct work_struct * work)396 static void rocket_reset_work(struct work_struct *work)
397 {
398 struct rocket_core *core;
399
400 core = container_of(work, struct rocket_core, reset.work);
401 rocket_reset(core, NULL);
402 }
403
404 static const struct drm_sched_backend_ops rocket_sched_ops = {
405 .run_job = rocket_job_run,
406 .timedout_job = rocket_job_timedout,
407 .free_job = rocket_job_free
408 };
409
rocket_job_irq_handler_thread(int irq,void * data)410 static irqreturn_t rocket_job_irq_handler_thread(int irq, void *data)
411 {
412 struct rocket_core *core = data;
413
414 rocket_job_handle_irq(core);
415
416 return IRQ_HANDLED;
417 }
418
rocket_job_irq_handler(int irq,void * data)419 static irqreturn_t rocket_job_irq_handler(int irq, void *data)
420 {
421 struct rocket_core *core = data;
422 u32 raw_status = rocket_pc_readl(core, INTERRUPT_RAW_STATUS);
423
424 WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR);
425 WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_WRITE_ERROR);
426
427 if (!(raw_status & PC_INTERRUPT_RAW_STATUS_DPU_0 ||
428 raw_status & PC_INTERRUPT_RAW_STATUS_DPU_1))
429 return IRQ_NONE;
430
431 rocket_pc_writel(core, INTERRUPT_MASK, 0x0);
432
433 return IRQ_WAKE_THREAD;
434 }
435
rocket_job_init(struct rocket_core * core)436 int rocket_job_init(struct rocket_core *core)
437 {
438 struct drm_sched_init_args args = {
439 .ops = &rocket_sched_ops,
440 .num_rqs = DRM_SCHED_PRIORITY_COUNT,
441 .credit_limit = 1,
442 .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
443 .name = dev_name(core->dev),
444 .dev = core->dev,
445 };
446 int ret;
447
448 INIT_WORK(&core->reset.work, rocket_reset_work);
449 spin_lock_init(&core->fence_lock);
450 mutex_init(&core->job_lock);
451
452 core->irq = platform_get_irq(to_platform_device(core->dev), 0);
453 if (core->irq < 0)
454 return core->irq;
455
456 ret = devm_request_threaded_irq(core->dev, core->irq,
457 rocket_job_irq_handler,
458 rocket_job_irq_handler_thread,
459 IRQF_SHARED, dev_name(core->dev),
460 core);
461 if (ret) {
462 dev_err(core->dev, "failed to request job irq");
463 return ret;
464 }
465
466 core->reset.wq = alloc_ordered_workqueue("rocket-reset-%d", 0, core->index);
467 if (!core->reset.wq)
468 return -ENOMEM;
469
470 core->fence_context = dma_fence_context_alloc(1);
471
472 args.timeout_wq = core->reset.wq;
473 ret = drm_sched_init(&core->sched, &args);
474 if (ret) {
475 dev_err(core->dev, "Failed to create scheduler: %d.", ret);
476 goto err_sched;
477 }
478
479 return 0;
480
481 err_sched:
482 drm_sched_fini(&core->sched);
483
484 destroy_workqueue(core->reset.wq);
485 return ret;
486 }
487
rocket_job_fini(struct rocket_core * core)488 void rocket_job_fini(struct rocket_core *core)
489 {
490 drm_sched_fini(&core->sched);
491
492 cancel_work_sync(&core->reset.work);
493 destroy_workqueue(core->reset.wq);
494 }
495
rocket_job_open(struct rocket_file_priv * rocket_priv)496 int rocket_job_open(struct rocket_file_priv *rocket_priv)
497 {
498 struct rocket_device *rdev = rocket_priv->rdev;
499 struct drm_gpu_scheduler **scheds = kmalloc_objs(*scheds,
500 rdev->num_cores);
501 unsigned int core;
502 int ret;
503
504 for (core = 0; core < rdev->num_cores; core++)
505 scheds[core] = &rdev->cores[core].sched;
506
507 ret = drm_sched_entity_init(&rocket_priv->sched_entity,
508 DRM_SCHED_PRIORITY_NORMAL,
509 scheds,
510 rdev->num_cores, NULL);
511 if (WARN_ON(ret))
512 return ret;
513
514 return 0;
515 }
516
rocket_job_close(struct rocket_file_priv * rocket_priv)517 void rocket_job_close(struct rocket_file_priv *rocket_priv)
518 {
519 struct drm_sched_entity *entity = &rocket_priv->sched_entity;
520
521 kfree(entity->sched_list);
522 drm_sched_entity_destroy(entity);
523 }
524
rocket_job_is_idle(struct rocket_core * core)525 int rocket_job_is_idle(struct rocket_core *core)
526 {
527 /* If there are any jobs in this HW queue, we're not idle */
528 if (atomic_read(&core->sched.credit_count))
529 return false;
530
531 return true;
532 }
533
rocket_ioctl_submit_job(struct drm_device * dev,struct drm_file * file,struct drm_rocket_job * job)534 static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
535 struct drm_rocket_job *job)
536 {
537 struct rocket_device *rdev = to_rocket_device(dev);
538 struct rocket_file_priv *file_priv = file->driver_priv;
539 struct rocket_job *rjob = NULL;
540 int ret = 0;
541
542 if (job->task_count == 0)
543 return -EINVAL;
544
545 rjob = kzalloc_obj(*rjob);
546 if (!rjob)
547 return -ENOMEM;
548
549 kref_init(&rjob->refcount);
550
551 rjob->rdev = rdev;
552
553 ret = drm_sched_job_init(&rjob->base,
554 &file_priv->sched_entity,
555 1, NULL, file->client_id);
556 if (ret)
557 goto out_put_job;
558
559 ret = rocket_copy_tasks(dev, file, job, rjob);
560 if (ret)
561 goto out_cleanup_job;
562
563 ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->in_bo_handles),
564 job->in_bo_handle_count, &rjob->in_bos);
565 if (ret)
566 goto out_cleanup_job;
567
568 rjob->in_bo_count = job->in_bo_handle_count;
569
570 ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->out_bo_handles),
571 job->out_bo_handle_count, &rjob->out_bos);
572 if (ret)
573 goto out_cleanup_job;
574
575 rjob->out_bo_count = job->out_bo_handle_count;
576
577 rjob->domain = rocket_iommu_domain_get(file_priv);
578
579 ret = rocket_job_push(rjob);
580 if (ret)
581 goto out_cleanup_job;
582
583 out_cleanup_job:
584 if (ret)
585 drm_sched_job_cleanup(&rjob->base);
586 out_put_job:
587 rocket_job_put(rjob);
588
589 return ret;
590 }
591
rocket_ioctl_submit(struct drm_device * dev,void * data,struct drm_file * file)592 int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file)
593 {
594 struct drm_rocket_submit *args = data;
595 struct drm_rocket_job *jobs;
596 int ret = 0;
597 unsigned int i = 0;
598
599 if (args->job_count == 0)
600 return 0;
601
602 if (args->job_struct_size < sizeof(struct drm_rocket_job)) {
603 drm_dbg(dev, "job_struct_size field in drm_rocket_submit struct is too small.\n");
604 return -EINVAL;
605 }
606
607 if (args->reserved != 0) {
608 drm_dbg(dev, "Reserved field in drm_rocket_submit struct should be 0.\n");
609 return -EINVAL;
610 }
611
612 jobs = kvmalloc_objs(*jobs, args->job_count);
613 if (!jobs) {
614 drm_dbg(dev, "Failed to allocate incoming job array\n");
615 return -ENOMEM;
616 }
617
618 for (i = 0; i < args->job_count; i++) {
619 if (copy_from_user(&jobs[i],
620 u64_to_user_ptr(args->jobs) + i * args->job_struct_size,
621 sizeof(*jobs))) {
622 ret = -EFAULT;
623 drm_dbg(dev, "Failed to copy incoming job array\n");
624 goto exit;
625 }
626 }
627
628
629 for (i = 0; i < args->job_count; i++)
630 rocket_ioctl_submit_job(dev, file, &jobs[i]);
631
632 exit:
633 kvfree(jobs);
634
635 return ret;
636 }
637