xref: /linux/drivers/accel/ethosu/ethosu_job.c (revision 9cff90774872ed6613b7571ce018b5b455d86890)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
3 /* Copyright 2025 Arm, Ltd. */
4 
5 #include <linux/bitfield.h>
6 #include <linux/genalloc.h>
7 #include <linux/interrupt.h>
8 #include <linux/iopoll.h>
9 #include <linux/platform_device.h>
10 #include <linux/pm_runtime.h>
11 
12 #include <drm/drm_file.h>
13 #include <drm/drm_gem.h>
14 #include <drm/drm_gem_dma_helper.h>
15 #include <drm/drm_print.h>
16 #include <drm/ethosu_accel.h>
17 
18 #include "ethosu_device.h"
19 #include "ethosu_drv.h"
20 #include "ethosu_gem.h"
21 #include "ethosu_job.h"
22 
23 #define JOB_TIMEOUT_MS 500
24 
25 static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job)
26 {
27 	return container_of(sched_job, struct ethosu_job, base);
28 }
29 
30 static const char *ethosu_fence_get_driver_name(struct dma_fence *fence)
31 {
32 	return "ethosu";
33 }
34 
35 static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence)
36 {
37 	return "ethosu-npu";
38 }
39 
40 static const struct dma_fence_ops ethosu_fence_ops = {
41 	.get_driver_name = ethosu_fence_get_driver_name,
42 	.get_timeline_name = ethosu_fence_get_timeline_name,
43 };
44 
45 static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job)
46 {
47 	struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo);
48 	struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info;
49 
50 	for (int i = 0; i < job->region_cnt; i++) {
51 		struct drm_gem_dma_object *bo;
52 		int region = job->region_bo_num[i];
53 
54 		bo = to_drm_gem_dma_obj(job->region_bo[i]);
55 		writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region));
56 		writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region));
57 		dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr);
58 	}
59 
60 	if (job->sram_size) {
61 		writel_relaxed(lower_32_bits(dev->sramphys),
62 			       dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION));
63 		writel_relaxed(upper_32_bits(dev->sramphys),
64 			       dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION));
65 		dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n",
66 			ETHOSU_SRAM_REGION, &dev->sramphys);
67 	}
68 
69 	writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE);
70 	writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI);
71 	writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE);
72 
73 	writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD);
74 
75 	dev_dbg(dev->base.dev,
76 		"Submitted cmd at %pad to core\n", &cmd_bo->dma_addr);
77 }
78 
79 static int ethosu_acquire_object_fences(struct ethosu_job *job)
80 {
81 	int i, ret;
82 	struct drm_gem_object **bos = job->region_bo;
83 	struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info;
84 
85 	for (i = 0; i < job->region_cnt; i++) {
86 		bool is_write;
87 
88 		if (!bos[i])
89 			break;
90 
91 		ret = dma_resv_reserve_fences(bos[i]->resv, 1);
92 		if (ret)
93 			return ret;
94 
95 		is_write = info->output_region[job->region_bo_num[i]];
96 		ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i],
97 							      is_write);
98 		if (ret)
99 			return ret;
100 	}
101 
102 	return 0;
103 }
104 
105 static void ethosu_attach_object_fences(struct ethosu_job *job)
106 {
107 	int i;
108 	struct dma_fence *fence = job->inference_done_fence;
109 	struct drm_gem_object **bos = job->region_bo;
110 	struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info;
111 
112 	for (i = 0; i < job->region_cnt; i++)
113 		if (info->output_region[job->region_bo_num[i]])
114 			dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
115 }
116 
117 static int ethosu_job_push(struct ethosu_job *job)
118 {
119 	struct ww_acquire_ctx acquire_ctx;
120 	int ret;
121 
122 	ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx);
123 	if (ret)
124 		return ret;
125 
126 	ret = ethosu_acquire_object_fences(job);
127 	if (ret)
128 		goto out;
129 
130 	ret = pm_runtime_resume_and_get(job->dev->base.dev);
131 	if (!ret) {
132 		guard(mutex)(&job->dev->sched_lock);
133 
134 		drm_sched_job_arm(&job->base);
135 		job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished);
136 		kref_get(&job->refcount); /* put by scheduler job completion */
137 		drm_sched_entity_push_job(&job->base);
138 		ethosu_attach_object_fences(job);
139 	}
140 
141 out:
142 	drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx);
143 	return ret;
144 }
145 
146 static void ethosu_job_err_cleanup(struct ethosu_job *job)
147 {
148 	unsigned int i;
149 
150 	for (i = 0; i < job->region_cnt; i++)
151 		drm_gem_object_put(job->region_bo[i]);
152 
153 	drm_gem_object_put(job->cmd_bo);
154 
155 	kfree(job);
156 }
157 
158 static void ethosu_job_cleanup(struct kref *ref)
159 {
160 	struct ethosu_job *job = container_of(ref, struct ethosu_job,
161 						refcount);
162 
163 	pm_runtime_put_autosuspend(job->dev->base.dev);
164 
165 	dma_fence_put(job->done_fence);
166 	dma_fence_put(job->inference_done_fence);
167 
168 	ethosu_job_err_cleanup(job);
169 }
170 
171 static void ethosu_job_put(struct ethosu_job *job)
172 {
173 	kref_put(&job->refcount, ethosu_job_cleanup);
174 }
175 
176 static void ethosu_job_free(struct drm_sched_job *sched_job)
177 {
178 	struct ethosu_job *job = to_ethosu_job(sched_job);
179 
180 	drm_sched_job_cleanup(sched_job);
181 	ethosu_job_put(job);
182 }
183 
184 static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job)
185 {
186 	struct ethosu_job *job = to_ethosu_job(sched_job);
187 	struct ethosu_device *dev = job->dev;
188 	struct dma_fence *fence = job->done_fence;
189 
190 	if (unlikely(job->base.s_fence->finished.error))
191 		return NULL;
192 
193 	dma_fence_init(fence, &ethosu_fence_ops, &dev->fence_lock,
194 		       dev->fence_context, ++dev->emit_seqno);
195 	dma_fence_get(fence);
196 
197 	scoped_guard(mutex, &dev->job_lock) {
198 		dev->in_flight_job = job;
199 		ethosu_job_hw_submit(dev, job);
200 	}
201 
202 	return fence;
203 }
204 
205 static void ethosu_job_handle_irq(struct ethosu_device *dev)
206 {
207 	u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
208 
209 	if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) {
210 		dev_err(dev->base.dev, "Error IRQ - %x\n", status);
211 		drm_sched_fault(&dev->sched);
212 		return;
213 	}
214 
215 	scoped_guard(mutex, &dev->job_lock) {
216 		if (dev->in_flight_job) {
217 			dma_fence_signal(dev->in_flight_job->done_fence);
218 			dev->in_flight_job = NULL;
219 		}
220 	}
221 }
222 
223 static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data)
224 {
225 	struct ethosu_device *dev = data;
226 
227 	ethosu_job_handle_irq(dev);
228 
229 	return IRQ_HANDLED;
230 }
231 
232 static irqreturn_t ethosu_job_irq_handler(int irq, void *data)
233 {
234 	struct ethosu_device *dev = data;
235 	u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
236 
237 	if (!(status & STATUS_IRQ_RAISED))
238 		return IRQ_NONE;
239 
240 	writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD);
241 	return IRQ_WAKE_THREAD;
242 }
243 
244 static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad)
245 {
246 	struct ethosu_job *job = to_ethosu_job(bad);
247 	struct ethosu_device *dev = job->dev;
248 	bool running;
249 	u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr;
250 	u32 cmdaddr;
251 
252 	cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD);
253 	running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS));
254 
255 	if (running) {
256 		int ret;
257 		u32 reg;
258 
259 		ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD,
260 						 reg,
261 						 reg != cmdaddr,
262 						 USEC_PER_MSEC, 100 * USEC_PER_MSEC);
263 
264 		/* If still running and progress is being made, just return */
265 		if (!ret)
266 			return DRM_GPU_SCHED_STAT_NO_HANG;
267 	}
268 
269 	dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n",
270 		running ? "running" : "stopped",
271 		cmdaddr, bocmds[cmdaddr / 4]);
272 
273 	drm_sched_stop(&dev->sched, bad);
274 
275 	scoped_guard(mutex, &dev->job_lock)
276 		dev->in_flight_job = NULL;
277 
278 	/* Proceed with reset now. */
279 	pm_runtime_force_suspend(dev->base.dev);
280 	pm_runtime_force_resume(dev->base.dev);
281 
282 	/* Restart the scheduler */
283 	drm_sched_start(&dev->sched, 0);
284 
285 	return DRM_GPU_SCHED_STAT_RESET;
286 }
287 
288 static const struct drm_sched_backend_ops ethosu_sched_ops = {
289 	.run_job = ethosu_job_run,
290 	.timedout_job = ethosu_job_timedout,
291 	.free_job = ethosu_job_free
292 };
293 
294 int ethosu_job_init(struct ethosu_device *edev)
295 {
296 	struct device *dev = edev->base.dev;
297 	struct drm_sched_init_args args = {
298 		.ops = &ethosu_sched_ops,
299 		.credit_limit = 1,
300 		.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
301 		.name = dev_name(dev),
302 		.dev = dev,
303 	};
304 	int ret;
305 
306 	spin_lock_init(&edev->fence_lock);
307 	ret = devm_mutex_init(dev, &edev->job_lock);
308 	if (ret)
309 		return ret;
310 	ret = devm_mutex_init(dev, &edev->sched_lock);
311 	if (ret)
312 		return ret;
313 
314 	edev->irq = platform_get_irq(to_platform_device(dev), 0);
315 	if (edev->irq < 0)
316 		return edev->irq;
317 
318 	ret = devm_request_threaded_irq(dev, edev->irq,
319 					ethosu_job_irq_handler,
320 					ethosu_job_irq_handler_thread,
321 					IRQF_SHARED, KBUILD_MODNAME,
322 					edev);
323 	if (ret) {
324 		dev_err(dev, "failed to request irq\n");
325 		return ret;
326 	}
327 
328 	edev->fence_context = dma_fence_context_alloc(1);
329 
330 	ret = drm_sched_init(&edev->sched, &args);
331 	if (ret) {
332 		dev_err(dev, "Failed to create scheduler: %d\n", ret);
333 		goto err_sched;
334 	}
335 
336 	return 0;
337 
338 err_sched:
339 	drm_sched_fini(&edev->sched);
340 	return ret;
341 }
342 
343 void ethosu_job_fini(struct ethosu_device *dev)
344 {
345 	drm_sched_fini(&dev->sched);
346 }
347 
348 int ethosu_job_open(struct ethosu_file_priv *ethosu_priv)
349 {
350 	struct ethosu_device *dev = ethosu_priv->edev;
351 	struct drm_gpu_scheduler *sched = &dev->sched;
352 	int ret;
353 
354 	ret = drm_sched_entity_init(&ethosu_priv->sched_entity,
355 				    DRM_SCHED_PRIORITY_NORMAL,
356 				    &sched, 1, NULL);
357 	return WARN_ON(ret);
358 }
359 
360 void ethosu_job_close(struct ethosu_file_priv *ethosu_priv)
361 {
362 	struct drm_sched_entity *entity = &ethosu_priv->sched_entity;
363 
364 	drm_sched_entity_destroy(entity);
365 }
366 
367 static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
368 				   struct drm_ethosu_job *job)
369 {
370 	struct ethosu_device *edev = to_ethosu_device(dev);
371 	struct ethosu_file_priv *file_priv = file->driver_priv;
372 	struct ethosu_job *ejob = NULL;
373 	struct ethosu_validated_cmdstream_info *cmd_info;
374 	int ret = 0;
375 
376 	/* BO region 2 is reserved if SRAM is used */
377 	if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size)
378 		return -EINVAL;
379 
380 	if (edev->npu_info.sram_size < job->sram_size)
381 		return -EINVAL;
382 
383 	ejob = kzalloc_obj(*ejob);
384 	if (!ejob)
385 		return -ENOMEM;
386 
387 	kref_init(&ejob->refcount);
388 
389 	ejob->dev = edev;
390 	ejob->sram_size = job->sram_size;
391 
392 	ejob->done_fence = kzalloc_obj(*ejob->done_fence);
393 	if (!ejob->done_fence) {
394 		ret = -ENOMEM;
395 		goto out_cleanup_job;
396 	}
397 
398 	ret = drm_sched_job_init(&ejob->base,
399 				 &file_priv->sched_entity,
400 				 1, NULL, file->client_id);
401 	if (ret)
402 		goto out_put_job;
403 
404 	ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo);
405 	if (!ejob->cmd_bo) {
406 		ret = -ENOENT;
407 		goto out_cleanup_job;
408 	}
409 	cmd_info = to_ethosu_bo(ejob->cmd_bo)->info;
410 	if (!cmd_info) {
411 		ret = -EINVAL;
412 		goto out_cleanup_job;
413 	}
414 
415 	for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) {
416 		struct drm_gem_object *gem;
417 
418 		/* Can only omit a BO handle if the region is not used or used for SRAM */
419 		if (!job->region_bo_handles[i]) {
420 			if (!cmd_info->region_size[i])
421 				continue;
422 			if (i == ETHOSU_SRAM_REGION) {
423 				if (cmd_info->region_size[i] <= edev->npu_info.sram_size)
424 					continue;
425 
426 				dev_err(dev->dev,
427 					"cmd stream region %d size greater than SRAM size (%llu > %u)\n",
428 					i, cmd_info->region_size[i],
429 					edev->npu_info.sram_size);
430 				ret = -EINVAL;
431 				goto out_cleanup_job;
432 			}
433 		}
434 
435 		if (job->region_bo_handles[i] && !cmd_info->region_size[i]) {
436 			dev_err(dev->dev,
437 				"Cmdstream BO handle %d set for unused region %d\n",
438 				job->region_bo_handles[i], i);
439 			ret = -EINVAL;
440 			goto out_cleanup_job;
441 		}
442 
443 		gem = drm_gem_object_lookup(file, job->region_bo_handles[i]);
444 		if (!gem) {
445 			dev_err(dev->dev,
446 				"Invalid BO handle %d for region %d\n",
447 				job->region_bo_handles[i], i);
448 			ret = -ENOENT;
449 			goto out_cleanup_job;
450 		}
451 
452 		ejob->region_bo[ejob->region_cnt] = gem;
453 		ejob->region_bo_num[ejob->region_cnt] = i;
454 		ejob->region_cnt++;
455 
456 		if (to_ethosu_bo(gem)->info) {
457 			dev_err(dev->dev,
458 				"Cmdstream BO handle %d used for region %d\n",
459 				job->region_bo_handles[i], i);
460 			ret = -EINVAL;
461 			goto out_cleanup_job;
462 		}
463 
464 		/* Verify the command stream doesn't have accesses outside the BO */
465 		if (cmd_info->region_size[i] > gem->size) {
466 			dev_err(dev->dev,
467 				"cmd stream region %d size greater than BO size (%llu > %zu)\n",
468 				i, cmd_info->region_size[i], gem->size);
469 			ret = -EOVERFLOW;
470 			goto out_cleanup_job;
471 		}
472 	}
473 	ret = ethosu_job_push(ejob);
474 	if (!ret) {
475 		ethosu_job_put(ejob);
476 		return 0;
477 	}
478 
479 out_cleanup_job:
480 	if (ret)
481 		drm_sched_job_cleanup(&ejob->base);
482 out_put_job:
483 	ethosu_job_err_cleanup(ejob);
484 
485 	return ret;
486 }
487 
488 int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file)
489 {
490 	struct drm_ethosu_submit *args = data;
491 	int ret = 0;
492 	unsigned int i = 0;
493 
494 	if (args->pad) {
495 		drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n");
496 		return -EINVAL;
497 	}
498 
499 	struct drm_ethosu_job __free(kvfree) *jobs =
500 		kvmalloc_objs(*jobs, args->job_count);
501 	if (!jobs)
502 		return -ENOMEM;
503 
504 	if (copy_from_user(jobs,
505 			   (void __user *)(uintptr_t)args->jobs,
506 			   args->job_count * sizeof(*jobs))) {
507 		drm_dbg(dev, "Failed to copy incoming job array\n");
508 		return -EFAULT;
509 	}
510 
511 	for (i = 0; i < args->job_count; i++) {
512 		ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]);
513 		if (ret)
514 			return ret;
515 	}
516 
517 	return 0;
518 }
519