xref: /linux/drivers/accel/rocket/rocket_job.c (revision 07fdad3a93756b872da7b53647715c48d0f4a2d0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
3 /* Copyright 2019 Collabora ltd. */
4 /* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
5 
6 #include <drm/drm_print.h>
7 #include <drm/drm_file.h>
8 #include <drm/drm_gem.h>
9 #include <drm/rocket_accel.h>
10 #include <linux/interrupt.h>
11 #include <linux/iommu.h>
12 #include <linux/platform_device.h>
13 #include <linux/pm_runtime.h>
14 
15 #include "rocket_core.h"
16 #include "rocket_device.h"
17 #include "rocket_drv.h"
18 #include "rocket_job.h"
19 #include "rocket_registers.h"
20 
21 #define JOB_TIMEOUT_MS 500
22 
23 static struct rocket_job *
24 to_rocket_job(struct drm_sched_job *sched_job)
25 {
26 	return container_of(sched_job, struct rocket_job, base);
27 }
28 
29 static const char *rocket_fence_get_driver_name(struct dma_fence *fence)
30 {
31 	return "rocket";
32 }
33 
34 static const char *rocket_fence_get_timeline_name(struct dma_fence *fence)
35 {
36 	return "rockchip-npu";
37 }
38 
39 static const struct dma_fence_ops rocket_fence_ops = {
40 	.get_driver_name = rocket_fence_get_driver_name,
41 	.get_timeline_name = rocket_fence_get_timeline_name,
42 };
43 
44 static struct dma_fence *rocket_fence_create(struct rocket_core *core)
45 {
46 	struct dma_fence *fence;
47 
48 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
49 	if (!fence)
50 		return ERR_PTR(-ENOMEM);
51 
52 	dma_fence_init(fence, &rocket_fence_ops, &core->fence_lock,
53 		       core->fence_context, ++core->emit_seqno);
54 
55 	return fence;
56 }
57 
58 static int
59 rocket_copy_tasks(struct drm_device *dev,
60 		  struct drm_file *file_priv,
61 		  struct drm_rocket_job *job,
62 		  struct rocket_job *rjob)
63 {
64 	int ret = 0;
65 
66 	if (job->task_struct_size < sizeof(struct drm_rocket_task))
67 		return -EINVAL;
68 
69 	rjob->task_count = job->task_count;
70 
71 	if (!rjob->task_count)
72 		return 0;
73 
74 	rjob->tasks = kvmalloc_array(job->task_count, sizeof(*rjob->tasks), GFP_KERNEL);
75 	if (!rjob->tasks) {
76 		drm_dbg(dev, "Failed to allocate task array\n");
77 		return -ENOMEM;
78 	}
79 
80 	for (int i = 0; i < rjob->task_count; i++) {
81 		struct drm_rocket_task task = {0};
82 
83 		if (copy_from_user(&task,
84 				   u64_to_user_ptr(job->tasks) + i * job->task_struct_size,
85 				   sizeof(task))) {
86 			drm_dbg(dev, "Failed to copy incoming tasks\n");
87 			ret = -EFAULT;
88 			goto fail;
89 		}
90 
91 		if (task.regcmd_count == 0) {
92 			drm_dbg(dev, "regcmd_count field in drm_rocket_task should be > 0.\n");
93 			ret = -EINVAL;
94 			goto fail;
95 		}
96 
97 		rjob->tasks[i].regcmd = task.regcmd;
98 		rjob->tasks[i].regcmd_count = task.regcmd_count;
99 	}
100 
101 	return 0;
102 
103 fail:
104 	kvfree(rjob->tasks);
105 	return ret;
106 }
107 
108 static void rocket_job_hw_submit(struct rocket_core *core, struct rocket_job *job)
109 {
110 	struct rocket_task *task;
111 	unsigned int extra_bit;
112 
113 	/* Don't queue the job if a reset is in progress */
114 	if (atomic_read(&core->reset.pending))
115 		return;
116 
117 	/* GO ! */
118 
119 	task = &job->tasks[job->next_task_idx];
120 	job->next_task_idx++;
121 
122 	rocket_pc_writel(core, BASE_ADDRESS, 0x1);
123 
124 	 /* From rknpu, in the TRM this bit is marked as reserved */
125 	extra_bit = 0x10000000 * core->index;
126 	rocket_cna_writel(core, S_POINTER, CNA_S_POINTER_POINTER_PP_EN(1) |
127 					   CNA_S_POINTER_EXECUTER_PP_EN(1) |
128 					   CNA_S_POINTER_POINTER_PP_MODE(1) |
129 					   extra_bit);
130 
131 	rocket_core_writel(core, S_POINTER, CORE_S_POINTER_POINTER_PP_EN(1) |
132 					    CORE_S_POINTER_EXECUTER_PP_EN(1) |
133 					    CORE_S_POINTER_POINTER_PP_MODE(1) |
134 					    extra_bit);
135 
136 	rocket_pc_writel(core, BASE_ADDRESS, task->regcmd);
137 	rocket_pc_writel(core, REGISTER_AMOUNTS,
138 			 PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT((task->regcmd_count + 1) / 2 - 1));
139 
140 	rocket_pc_writel(core, INTERRUPT_MASK, PC_INTERRUPT_MASK_DPU_0 | PC_INTERRUPT_MASK_DPU_1);
141 	rocket_pc_writel(core, INTERRUPT_CLEAR, PC_INTERRUPT_CLEAR_DPU_0 | PC_INTERRUPT_CLEAR_DPU_1);
142 
143 	rocket_pc_writel(core, TASK_CON, PC_TASK_CON_RESERVED_0(1) |
144 					 PC_TASK_CON_TASK_COUNT_CLEAR(1) |
145 					 PC_TASK_CON_TASK_NUMBER(1) |
146 					 PC_TASK_CON_TASK_PP_EN(1));
147 
148 	rocket_pc_writel(core, TASK_DMA_BASE_ADDR, PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR(0x0));
149 
150 	rocket_pc_writel(core, OPERATION_ENABLE, PC_OPERATION_ENABLE_OP_EN(1));
151 
152 	dev_dbg(core->dev, "Submitted regcmd at 0x%llx to core %d", task->regcmd, core->index);
153 }
154 
155 static int rocket_acquire_object_fences(struct drm_gem_object **bos,
156 					int bo_count,
157 					struct drm_sched_job *job,
158 					bool is_write)
159 {
160 	int i, ret;
161 
162 	for (i = 0; i < bo_count; i++) {
163 		ret = dma_resv_reserve_fences(bos[i]->resv, 1);
164 		if (ret)
165 			return ret;
166 
167 		ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
168 							      is_write);
169 		if (ret)
170 			return ret;
171 	}
172 
173 	return 0;
174 }
175 
176 static void rocket_attach_object_fences(struct drm_gem_object **bos,
177 					int bo_count,
178 					struct dma_fence *fence)
179 {
180 	int i;
181 
182 	for (i = 0; i < bo_count; i++)
183 		dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
184 }
185 
186 static int rocket_job_push(struct rocket_job *job)
187 {
188 	struct rocket_device *rdev = job->rdev;
189 	struct drm_gem_object **bos;
190 	struct ww_acquire_ctx acquire_ctx;
191 	int ret = 0;
192 
193 	bos = kvmalloc_array(job->in_bo_count + job->out_bo_count, sizeof(void *),
194 			     GFP_KERNEL);
195 	memcpy(bos, job->in_bos, job->in_bo_count * sizeof(void *));
196 	memcpy(&bos[job->in_bo_count], job->out_bos, job->out_bo_count * sizeof(void *));
197 
198 	ret = drm_gem_lock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx);
199 	if (ret)
200 		goto err;
201 
202 	scoped_guard(mutex, &rdev->sched_lock) {
203 		drm_sched_job_arm(&job->base);
204 
205 		job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished);
206 
207 		ret = rocket_acquire_object_fences(job->in_bos, job->in_bo_count, &job->base, false);
208 		if (ret)
209 			goto err_unlock;
210 
211 		ret = rocket_acquire_object_fences(job->out_bos, job->out_bo_count, &job->base, true);
212 		if (ret)
213 			goto err_unlock;
214 
215 		kref_get(&job->refcount); /* put by scheduler job completion */
216 
217 		drm_sched_entity_push_job(&job->base);
218 	}
219 
220 	rocket_attach_object_fences(job->out_bos, job->out_bo_count, job->inference_done_fence);
221 
222 err_unlock:
223 	drm_gem_unlock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx);
224 err:
225 	kvfree(bos);
226 
227 	return ret;
228 }
229 
230 static void rocket_job_cleanup(struct kref *ref)
231 {
232 	struct rocket_job *job = container_of(ref, struct rocket_job,
233 						refcount);
234 	unsigned int i;
235 
236 	rocket_iommu_domain_put(job->domain);
237 
238 	dma_fence_put(job->done_fence);
239 	dma_fence_put(job->inference_done_fence);
240 
241 	if (job->in_bos) {
242 		for (i = 0; i < job->in_bo_count; i++)
243 			drm_gem_object_put(job->in_bos[i]);
244 
245 		kvfree(job->in_bos);
246 	}
247 
248 	if (job->out_bos) {
249 		for (i = 0; i < job->out_bo_count; i++)
250 			drm_gem_object_put(job->out_bos[i]);
251 
252 		kvfree(job->out_bos);
253 	}
254 
255 	kvfree(job->tasks);
256 
257 	kfree(job);
258 }
259 
260 static void rocket_job_put(struct rocket_job *job)
261 {
262 	kref_put(&job->refcount, rocket_job_cleanup);
263 }
264 
265 static void rocket_job_free(struct drm_sched_job *sched_job)
266 {
267 	struct rocket_job *job = to_rocket_job(sched_job);
268 
269 	drm_sched_job_cleanup(sched_job);
270 
271 	rocket_job_put(job);
272 }
273 
274 static struct rocket_core *sched_to_core(struct rocket_device *rdev,
275 					 struct drm_gpu_scheduler *sched)
276 {
277 	unsigned int core;
278 
279 	for (core = 0; core < rdev->num_cores; core++) {
280 		if (&rdev->cores[core].sched == sched)
281 			return &rdev->cores[core];
282 	}
283 
284 	return NULL;
285 }
286 
287 static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job)
288 {
289 	struct rocket_job *job = to_rocket_job(sched_job);
290 	struct rocket_device *rdev = job->rdev;
291 	struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
292 	struct dma_fence *fence = NULL;
293 	int ret;
294 
295 	if (unlikely(job->base.s_fence->finished.error))
296 		return NULL;
297 
298 	/*
299 	 * Nothing to execute: can happen if the job has finished while
300 	 * we were resetting the NPU.
301 	 */
302 	if (job->next_task_idx == job->task_count)
303 		return NULL;
304 
305 	fence = rocket_fence_create(core);
306 	if (IS_ERR(fence))
307 		return fence;
308 
309 	if (job->done_fence)
310 		dma_fence_put(job->done_fence);
311 	job->done_fence = dma_fence_get(fence);
312 
313 	ret = pm_runtime_get_sync(core->dev);
314 	if (ret < 0)
315 		return fence;
316 
317 	ret = iommu_attach_group(job->domain->domain, core->iommu_group);
318 	if (ret < 0)
319 		return fence;
320 
321 	scoped_guard(mutex, &core->job_lock) {
322 		core->in_flight_job = job;
323 		rocket_job_hw_submit(core, job);
324 	}
325 
326 	return fence;
327 }
328 
329 static void rocket_job_handle_irq(struct rocket_core *core)
330 {
331 	pm_runtime_mark_last_busy(core->dev);
332 
333 	rocket_pc_writel(core, OPERATION_ENABLE, 0x0);
334 	rocket_pc_writel(core, INTERRUPT_CLEAR, 0x1ffff);
335 
336 	scoped_guard(mutex, &core->job_lock)
337 		if (core->in_flight_job) {
338 			if (core->in_flight_job->next_task_idx < core->in_flight_job->task_count) {
339 				rocket_job_hw_submit(core, core->in_flight_job);
340 				return;
341 			}
342 
343 			iommu_detach_group(NULL, iommu_group_get(core->dev));
344 			dma_fence_signal(core->in_flight_job->done_fence);
345 			pm_runtime_put_autosuspend(core->dev);
346 			core->in_flight_job = NULL;
347 		}
348 }
349 
350 static void
351 rocket_reset(struct rocket_core *core, struct drm_sched_job *bad)
352 {
353 	if (!atomic_read(&core->reset.pending))
354 		return;
355 
356 	drm_sched_stop(&core->sched, bad);
357 
358 	/*
359 	 * Remaining interrupts have been handled, but we might still have
360 	 * stuck jobs. Let's make sure the PM counters stay balanced by
361 	 * manually calling pm_runtime_put_noidle().
362 	 */
363 	scoped_guard(mutex, &core->job_lock) {
364 		if (core->in_flight_job)
365 			pm_runtime_put_noidle(core->dev);
366 
367 		iommu_detach_group(NULL, core->iommu_group);
368 
369 		core->in_flight_job = NULL;
370 	}
371 
372 	/* Proceed with reset now. */
373 	rocket_core_reset(core);
374 
375 	/* NPU has been reset, we can clear the reset pending bit. */
376 	atomic_set(&core->reset.pending, 0);
377 
378 	/* Restart the scheduler */
379 	drm_sched_start(&core->sched, 0);
380 }
381 
382 static enum drm_gpu_sched_stat rocket_job_timedout(struct drm_sched_job *sched_job)
383 {
384 	struct rocket_job *job = to_rocket_job(sched_job);
385 	struct rocket_device *rdev = job->rdev;
386 	struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
387 
388 	dev_err(core->dev, "NPU job timed out");
389 
390 	atomic_set(&core->reset.pending, 1);
391 	rocket_reset(core, sched_job);
392 
393 	return DRM_GPU_SCHED_STAT_RESET;
394 }
395 
396 static void rocket_reset_work(struct work_struct *work)
397 {
398 	struct rocket_core *core;
399 
400 	core = container_of(work, struct rocket_core, reset.work);
401 	rocket_reset(core, NULL);
402 }
403 
404 static const struct drm_sched_backend_ops rocket_sched_ops = {
405 	.run_job = rocket_job_run,
406 	.timedout_job = rocket_job_timedout,
407 	.free_job = rocket_job_free
408 };
409 
410 static irqreturn_t rocket_job_irq_handler_thread(int irq, void *data)
411 {
412 	struct rocket_core *core = data;
413 
414 	rocket_job_handle_irq(core);
415 
416 	return IRQ_HANDLED;
417 }
418 
419 static irqreturn_t rocket_job_irq_handler(int irq, void *data)
420 {
421 	struct rocket_core *core = data;
422 	u32 raw_status = rocket_pc_readl(core, INTERRUPT_RAW_STATUS);
423 
424 	WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR);
425 	WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_WRITE_ERROR);
426 
427 	if (!(raw_status & PC_INTERRUPT_RAW_STATUS_DPU_0 ||
428 	      raw_status & PC_INTERRUPT_RAW_STATUS_DPU_1))
429 		return IRQ_NONE;
430 
431 	rocket_pc_writel(core, INTERRUPT_MASK, 0x0);
432 
433 	return IRQ_WAKE_THREAD;
434 }
435 
436 int rocket_job_init(struct rocket_core *core)
437 {
438 	struct drm_sched_init_args args = {
439 		.ops = &rocket_sched_ops,
440 		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
441 		.credit_limit = 1,
442 		.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
443 		.name = dev_name(core->dev),
444 		.dev = core->dev,
445 	};
446 	int ret;
447 
448 	INIT_WORK(&core->reset.work, rocket_reset_work);
449 	spin_lock_init(&core->fence_lock);
450 	mutex_init(&core->job_lock);
451 
452 	core->irq = platform_get_irq(to_platform_device(core->dev), 0);
453 	if (core->irq < 0)
454 		return core->irq;
455 
456 	ret = devm_request_threaded_irq(core->dev, core->irq,
457 					rocket_job_irq_handler,
458 					rocket_job_irq_handler_thread,
459 					IRQF_SHARED, dev_name(core->dev),
460 					core);
461 	if (ret) {
462 		dev_err(core->dev, "failed to request job irq");
463 		return ret;
464 	}
465 
466 	core->reset.wq = alloc_ordered_workqueue("rocket-reset-%d", 0, core->index);
467 	if (!core->reset.wq)
468 		return -ENOMEM;
469 
470 	core->fence_context = dma_fence_context_alloc(1);
471 
472 	args.timeout_wq = core->reset.wq;
473 	ret = drm_sched_init(&core->sched, &args);
474 	if (ret) {
475 		dev_err(core->dev, "Failed to create scheduler: %d.", ret);
476 		goto err_sched;
477 	}
478 
479 	return 0;
480 
481 err_sched:
482 	drm_sched_fini(&core->sched);
483 
484 	destroy_workqueue(core->reset.wq);
485 	return ret;
486 }
487 
488 void rocket_job_fini(struct rocket_core *core)
489 {
490 	drm_sched_fini(&core->sched);
491 
492 	cancel_work_sync(&core->reset.work);
493 	destroy_workqueue(core->reset.wq);
494 }
495 
496 int rocket_job_open(struct rocket_file_priv *rocket_priv)
497 {
498 	struct rocket_device *rdev = rocket_priv->rdev;
499 	struct drm_gpu_scheduler **scheds = kmalloc_array(rdev->num_cores,
500 							  sizeof(*scheds),
501 							  GFP_KERNEL);
502 	unsigned int core;
503 	int ret;
504 
505 	for (core = 0; core < rdev->num_cores; core++)
506 		scheds[core] = &rdev->cores[core].sched;
507 
508 	ret = drm_sched_entity_init(&rocket_priv->sched_entity,
509 				    DRM_SCHED_PRIORITY_NORMAL,
510 				    scheds,
511 				    rdev->num_cores, NULL);
512 	if (WARN_ON(ret))
513 		return ret;
514 
515 	return 0;
516 }
517 
518 void rocket_job_close(struct rocket_file_priv *rocket_priv)
519 {
520 	struct drm_sched_entity *entity = &rocket_priv->sched_entity;
521 
522 	kfree(entity->sched_list);
523 	drm_sched_entity_destroy(entity);
524 }
525 
526 int rocket_job_is_idle(struct rocket_core *core)
527 {
528 	/* If there are any jobs in this HW queue, we're not idle */
529 	if (atomic_read(&core->sched.credit_count))
530 		return false;
531 
532 	return true;
533 }
534 
535 static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
536 				   struct drm_rocket_job *job)
537 {
538 	struct rocket_device *rdev = to_rocket_device(dev);
539 	struct rocket_file_priv *file_priv = file->driver_priv;
540 	struct rocket_job *rjob = NULL;
541 	int ret = 0;
542 
543 	if (job->task_count == 0)
544 		return -EINVAL;
545 
546 	rjob = kzalloc(sizeof(*rjob), GFP_KERNEL);
547 	if (!rjob)
548 		return -ENOMEM;
549 
550 	kref_init(&rjob->refcount);
551 
552 	rjob->rdev = rdev;
553 
554 	ret = drm_sched_job_init(&rjob->base,
555 				 &file_priv->sched_entity,
556 				 1, NULL, file->client_id);
557 	if (ret)
558 		goto out_put_job;
559 
560 	ret = rocket_copy_tasks(dev, file, job, rjob);
561 	if (ret)
562 		goto out_cleanup_job;
563 
564 	ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->in_bo_handles),
565 				     job->in_bo_handle_count, &rjob->in_bos);
566 	if (ret)
567 		goto out_cleanup_job;
568 
569 	rjob->in_bo_count = job->in_bo_handle_count;
570 
571 	ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->out_bo_handles),
572 				     job->out_bo_handle_count, &rjob->out_bos);
573 	if (ret)
574 		goto out_cleanup_job;
575 
576 	rjob->out_bo_count = job->out_bo_handle_count;
577 
578 	rjob->domain = rocket_iommu_domain_get(file_priv);
579 
580 	ret = rocket_job_push(rjob);
581 	if (ret)
582 		goto out_cleanup_job;
583 
584 out_cleanup_job:
585 	if (ret)
586 		drm_sched_job_cleanup(&rjob->base);
587 out_put_job:
588 	rocket_job_put(rjob);
589 
590 	return ret;
591 }
592 
593 int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file)
594 {
595 	struct drm_rocket_submit *args = data;
596 	struct drm_rocket_job *jobs;
597 	int ret = 0;
598 	unsigned int i = 0;
599 
600 	if (args->job_count == 0)
601 		return 0;
602 
603 	if (args->job_struct_size < sizeof(struct drm_rocket_job)) {
604 		drm_dbg(dev, "job_struct_size field in drm_rocket_submit struct is too small.\n");
605 		return -EINVAL;
606 	}
607 
608 	if (args->reserved != 0) {
609 		drm_dbg(dev, "Reserved field in drm_rocket_submit struct should be 0.\n");
610 		return -EINVAL;
611 	}
612 
613 	jobs = kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL);
614 	if (!jobs) {
615 		drm_dbg(dev, "Failed to allocate incoming job array\n");
616 		return -ENOMEM;
617 	}
618 
619 	for (i = 0; i < args->job_count; i++) {
620 		if (copy_from_user(&jobs[i],
621 				   u64_to_user_ptr(args->jobs) + i * args->job_struct_size,
622 				   sizeof(*jobs))) {
623 			ret = -EFAULT;
624 			drm_dbg(dev, "Failed to copy incoming job array\n");
625 			goto exit;
626 		}
627 	}
628 
629 
630 	for (i = 0; i < args->job_count; i++)
631 		rocket_ioctl_submit_job(dev, file, &jobs[i]);
632 
633 exit:
634 	kvfree(jobs);
635 
636 	return ret;
637 }
638