xref: /linux/drivers/gpu/drm/panfrost/panfrost_job.c (revision face6a3615a649456eb4549f6d474221d877d604)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
3 /* Copyright 2019 Collabora ltd. */
4 #include <linux/delay.h>
5 #include <linux/interrupt.h>
6 #include <linux/io.h>
7 #include <linux/iopoll.h>
8 #include <linux/platform_device.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/dma-resv.h>
11 #include <drm/gpu_scheduler.h>
12 #include <drm/panfrost_drm.h>
13 
14 #include "panfrost_device.h"
15 #include "panfrost_devfreq.h"
16 #include "panfrost_job.h"
17 #include "panfrost_features.h"
18 #include "panfrost_issues.h"
19 #include "panfrost_gem.h"
20 #include "panfrost_regs.h"
21 #include "panfrost_gpu.h"
22 #include "panfrost_mmu.h"
23 #include "panfrost_dump.h"
24 
25 #define MAX_JM_CTX_PER_FILE 64
26 #define JOB_TIMEOUT_MS 500
27 
28 #define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
29 #define job_read(dev, reg) readl(dev->iomem + (reg))
30 
31 const char * const panfrost_engine_names[] = {
32 	"fragment", "vertex-tiler", "compute-only"
33 };
34 
35 struct panfrost_queue_state {
36 	struct drm_gpu_scheduler sched;
37 	u64 fence_context;
38 	u64 emit_seqno;
39 };
40 
41 struct panfrost_job_slot {
42 	struct panfrost_queue_state queue[NUM_JOB_SLOTS];
43 	spinlock_t job_lock;
44 	int irq;
45 };
46 
47 static struct panfrost_job *
48 to_panfrost_job(struct drm_sched_job *sched_job)
49 {
50 	return container_of(sched_job, struct panfrost_job, base);
51 }
52 
53 struct panfrost_fence {
54 	struct dma_fence base;
55 	struct drm_device *dev;
56 	/* panfrost seqno for signaled() test */
57 	u64 seqno;
58 	int queue;
59 };
60 
61 static inline struct panfrost_fence *
62 to_panfrost_fence(struct dma_fence *fence)
63 {
64 	return (struct panfrost_fence *)fence;
65 }
66 
67 static const char *panfrost_fence_get_driver_name(struct dma_fence *fence)
68 {
69 	return "panfrost";
70 }
71 
72 static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence)
73 {
74 	struct panfrost_fence *f = to_panfrost_fence(fence);
75 
76 	switch (f->queue) {
77 	case 0:
78 		return "panfrost-js-0";
79 	case 1:
80 		return "panfrost-js-1";
81 	case 2:
82 		return "panfrost-js-2";
83 	default:
84 		return NULL;
85 	}
86 }
87 
88 static const struct dma_fence_ops panfrost_fence_ops = {
89 	.get_driver_name = panfrost_fence_get_driver_name,
90 	.get_timeline_name = panfrost_fence_get_timeline_name,
91 };
92 
93 static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num)
94 {
95 	struct panfrost_fence *fence;
96 	struct panfrost_job_slot *js = pfdev->js;
97 
98 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
99 	if (!fence)
100 		return ERR_PTR(-ENOMEM);
101 
102 	fence->dev = &pfdev->base;
103 	fence->queue = js_num;
104 	fence->seqno = ++js->queue[js_num].emit_seqno;
105 	dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock,
106 		       js->queue[js_num].fence_context, fence->seqno);
107 
108 	return &fence->base;
109 }
110 
111 int panfrost_job_get_slot(struct panfrost_job *job)
112 {
113 	/* JS0: fragment jobs.
114 	 * JS1: vertex/tiler jobs
115 	 * JS2: compute jobs
116 	 */
117 	if (job->requirements & PANFROST_JD_REQ_FS)
118 		return 0;
119 
120 /* Not exposed to userspace yet */
121 #if 0
122 	if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) {
123 		if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) &&
124 		    (job->pfdev->features.nr_core_groups == 2))
125 			return 2;
126 		if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987))
127 			return 2;
128 	}
129 #endif
130 	return 1;
131 }
132 
133 static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
134 					u32 requirements,
135 					int js)
136 {
137 	u64 affinity;
138 
139 	/*
140 	 * Use all cores for now.
141 	 * Eventually we may need to support tiler only jobs and h/w with
142 	 * multiple (2) coherent core groups
143 	 */
144 	affinity = pfdev->features.shader_present;
145 
146 	job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity));
147 	job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity));
148 }
149 
150 static u32
151 panfrost_get_job_chain_flag(const struct panfrost_job *job)
152 {
153 	struct panfrost_fence *f = to_panfrost_fence(job->done_fence);
154 
155 	if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
156 		return 0;
157 
158 	return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0;
159 }
160 
161 static struct panfrost_job *
162 panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
163 {
164 	struct panfrost_job *job = pfdev->jobs[slot][0];
165 
166 	WARN_ON(!job);
167 
168 	if (job->is_profiled && job->engine_usage) {
169 		job->engine_usage->elapsed_ns[slot] +=
170 			ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
171 		job->engine_usage->cycles[slot] +=
172 			panfrost_cycle_counter_read(pfdev) - job->start_cycles;
173 	}
174 
175 	if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT || job->is_profiled)
176 		panfrost_cycle_counter_put(pfdev);
177 
178 	pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
179 	pfdev->jobs[slot][1] = NULL;
180 
181 	return job;
182 }
183 
184 static unsigned int
185 panfrost_enqueue_job(struct panfrost_device *pfdev, int slot,
186 		     struct panfrost_job *job)
187 {
188 	if (WARN_ON(!job))
189 		return 0;
190 
191 	if (!pfdev->jobs[slot][0]) {
192 		pfdev->jobs[slot][0] = job;
193 		return 0;
194 	}
195 
196 	WARN_ON(pfdev->jobs[slot][1]);
197 	pfdev->jobs[slot][1] = job;
198 	WARN_ON(panfrost_get_job_chain_flag(job) ==
199 		panfrost_get_job_chain_flag(pfdev->jobs[slot][0]));
200 	return 1;
201 }
202 
203 static int panfrost_job_hw_submit(struct panfrost_job *job, int js)
204 {
205 	struct panfrost_device *pfdev = job->pfdev;
206 	unsigned int subslot;
207 	u32 cfg;
208 	u64 jc_head = job->jc;
209 	int ret;
210 
211 	ret = pm_runtime_get_sync(pfdev->base.dev);
212 	if (ret < 0)
213 		goto err_hwsubmit;
214 
215 	if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) {
216 		ret = -EINVAL;
217 		goto err_hwsubmit;
218 	}
219 
220 	ret = panfrost_mmu_as_get(pfdev, job->mmu);
221 	if (ret < 0)
222 		goto err_hwsubmit;
223 
224 	cfg = ret;
225 
226 	panfrost_devfreq_record_busy(&pfdev->pfdevfreq);
227 
228 	job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head));
229 	job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head));
230 
231 	panfrost_job_write_affinity(pfdev, job->requirements, js);
232 
233 	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
234 	 * start */
235 	cfg |= JS_CONFIG_THREAD_PRI(8) |
236 		JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
237 		JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
238 		panfrost_get_job_chain_flag(job);
239 
240 	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
241 		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
242 
243 	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649))
244 		cfg |= JS_CONFIG_START_MMU;
245 
246 	job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
247 
248 	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
249 		job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
250 
251 	/* GO ! */
252 
253 	spin_lock(&pfdev->js->job_lock);
254 	subslot = panfrost_enqueue_job(pfdev, js, job);
255 	/* Don't queue the job if a reset is in progress */
256 	if (!atomic_read(&pfdev->reset.pending)) {
257 		job->is_profiled = pfdev->profile_mode;
258 
259 		if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT ||
260 		    job->is_profiled)
261 			panfrost_cycle_counter_get(pfdev);
262 
263 		if (job->is_profiled) {
264 			job->start_time = ktime_get();
265 			job->start_cycles = panfrost_cycle_counter_read(pfdev);
266 		}
267 
268 		job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
269 		dev_dbg(pfdev->base.dev,
270 			"JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
271 			job, js, subslot, jc_head, cfg & 0xf);
272 	}
273 	spin_unlock(&pfdev->js->job_lock);
274 
275 	return 0;
276 
277 err_hwsubmit:
278 	pm_runtime_put_autosuspend(pfdev->base.dev);
279 	return ret;
280 }
281 
282 static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
283 					  int bo_count,
284 					  struct drm_sched_job *job)
285 {
286 	int i, ret;
287 
288 	for (i = 0; i < bo_count; i++) {
289 		ret = dma_resv_reserve_fences(bos[i]->resv, 1);
290 		if (ret)
291 			return ret;
292 
293 		/* panfrost always uses write mode in its current uapi */
294 		ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
295 							      true);
296 		if (ret)
297 			return ret;
298 	}
299 
300 	return 0;
301 }
302 
303 static void panfrost_attach_object_fences(struct drm_gem_object **bos,
304 					  int bo_count,
305 					  struct dma_fence *fence)
306 {
307 	int i;
308 
309 	for (i = 0; i < bo_count; i++)
310 		dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
311 }
312 
313 int panfrost_job_push(struct panfrost_job *job)
314 {
315 	struct panfrost_device *pfdev = job->pfdev;
316 	struct ww_acquire_ctx acquire_ctx;
317 	int ret = 0;
318 
319 	ret = drm_gem_lock_reservations(job->bos, job->bo_count,
320 					    &acquire_ctx);
321 	if (ret)
322 		return ret;
323 
324 	mutex_lock(&pfdev->sched_lock);
325 	drm_sched_job_arm(&job->base);
326 
327 	job->render_done_fence = dma_fence_get(&job->base.s_fence->finished);
328 
329 	ret = panfrost_acquire_object_fences(job->bos, job->bo_count,
330 					     &job->base);
331 	if (ret) {
332 		mutex_unlock(&pfdev->sched_lock);
333 		goto unlock;
334 	}
335 
336 	kref_get(&job->refcount); /* put by scheduler job completion */
337 
338 	drm_sched_entity_push_job(&job->base);
339 
340 	mutex_unlock(&pfdev->sched_lock);
341 
342 	panfrost_attach_object_fences(job->bos, job->bo_count,
343 				      job->render_done_fence);
344 
345 unlock:
346 	drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
347 
348 	return ret;
349 }
350 
351 static void panfrost_job_cleanup(struct kref *ref)
352 {
353 	struct panfrost_job *job = container_of(ref, struct panfrost_job,
354 						refcount);
355 	unsigned int i;
356 
357 	dma_fence_put(job->done_fence);
358 	dma_fence_put(job->render_done_fence);
359 
360 	if (job->mappings) {
361 		for (i = 0; i < job->bo_count; i++) {
362 			if (!job->mappings[i])
363 				break;
364 
365 			atomic_dec(&job->mappings[i]->obj->gpu_usecount);
366 			panfrost_gem_mapping_put(job->mappings[i]);
367 		}
368 		kvfree(job->mappings);
369 	}
370 
371 	if (job->bos) {
372 		for (i = 0; i < job->bo_count; i++)
373 			drm_gem_object_put(job->bos[i]);
374 
375 		kvfree(job->bos);
376 	}
377 
378 	panfrost_jm_ctx_put(job->ctx);
379 	kfree(job);
380 }
381 
382 void panfrost_job_put(struct panfrost_job *job)
383 {
384 	kref_put(&job->refcount, panfrost_job_cleanup);
385 }
386 
387 static void panfrost_job_free(struct drm_sched_job *sched_job)
388 {
389 	struct panfrost_job *job = to_panfrost_job(sched_job);
390 
391 	drm_sched_job_cleanup(sched_job);
392 
393 	panfrost_job_put(job);
394 }
395 
396 static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job)
397 {
398 	struct panfrost_job *job = to_panfrost_job(sched_job);
399 	struct panfrost_device *pfdev = job->pfdev;
400 	int slot = panfrost_job_get_slot(job);
401 	struct dma_fence *fence = NULL;
402 	int ret;
403 
404 	if (job->ctx->destroyed)
405 		return ERR_PTR(-ECANCELED);
406 
407 	if (unlikely(job->base.s_fence->finished.error))
408 		return NULL;
409 
410 	/* Nothing to execute: can happen if the job has finished while
411 	 * we were resetting the GPU.
412 	 */
413 	if (!job->jc)
414 		return NULL;
415 
416 	fence = panfrost_fence_create(pfdev, slot);
417 	if (IS_ERR(fence))
418 		return fence;
419 
420 	if (job->done_fence)
421 		dma_fence_put(job->done_fence);
422 	job->done_fence = dma_fence_get(fence);
423 
424 	ret = panfrost_job_hw_submit(job, slot);
425 	if (ret) {
426 		dma_fence_put(fence);
427 		return ERR_PTR(ret);
428 	}
429 
430 	return fence;
431 }
432 
433 void panfrost_jm_reset_interrupts(struct panfrost_device *pfdev)
434 {
435 	job_write(pfdev, JOB_INT_CLEAR, ALL_JS_INT_MASK);
436 }
437 
438 void panfrost_jm_enable_interrupts(struct panfrost_device *pfdev)
439 {
440 	clear_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended);
441 	job_write(pfdev, JOB_INT_MASK, ALL_JS_INT_MASK);
442 }
443 
444 void panfrost_jm_suspend_irq(struct panfrost_device *pfdev)
445 {
446 	set_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended);
447 
448 	job_write(pfdev, JOB_INT_MASK, 0);
449 	synchronize_irq(pfdev->js->irq);
450 }
451 
452 static void panfrost_job_handle_err(struct panfrost_device *pfdev,
453 				    struct panfrost_job *job,
454 				    unsigned int js)
455 {
456 	u32 js_status = job_read(pfdev, JS_STATUS(js));
457 	const char *exception_name = panfrost_exception_name(js_status);
458 	bool signal_fence = true;
459 
460 	if (!panfrost_exception_is_fault(js_status)) {
461 		dev_dbg(pfdev->base.dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x",
462 			js, exception_name,
463 			job_read(pfdev, JS_HEAD_LO(js)),
464 			job_read(pfdev, JS_TAIL_LO(js)));
465 	} else {
466 		dev_err(pfdev->base.dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x",
467 			js, exception_name,
468 			job_read(pfdev, JS_HEAD_LO(js)),
469 			job_read(pfdev, JS_TAIL_LO(js)));
470 	}
471 
472 	if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) {
473 		/* Update the job head so we can resume */
474 		job->jc = job_read(pfdev, JS_TAIL_LO(js)) |
475 			  ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32);
476 
477 		/* The job will be resumed, don't signal the fence */
478 		signal_fence = false;
479 	} else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) {
480 		/* Job has been hard-stopped, flag it as canceled */
481 		dma_fence_set_error(job->done_fence, -ECANCELED);
482 		job->jc = 0;
483 	} else if (panfrost_exception_is_fault(js_status)) {
484 		/* We might want to provide finer-grained error code based on
485 		 * the exception type, but unconditionally setting to EINVAL
486 		 * is good enough for now.
487 		 */
488 		dma_fence_set_error(job->done_fence, -EINVAL);
489 		job->jc = 0;
490 	}
491 
492 	panfrost_mmu_as_put(pfdev, job->mmu);
493 	panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
494 
495 	if (signal_fence)
496 		dma_fence_signal_locked(job->done_fence);
497 
498 	pm_runtime_put_autosuspend(pfdev->base.dev);
499 
500 	if (panfrost_exception_needs_reset(pfdev, js_status)) {
501 		atomic_set(&pfdev->reset.pending, 1);
502 		drm_sched_fault(&pfdev->js->queue[js].sched);
503 	}
504 }
505 
506 static void panfrost_jm_handle_done(struct panfrost_device *pfdev,
507 				    struct panfrost_job *job)
508 {
509 	/* Set ->jc to 0 to avoid re-submitting an already finished job (can
510 	 * happen when we receive the DONE interrupt while doing a GPU reset).
511 	 */
512 	job->jc = 0;
513 	panfrost_mmu_as_put(pfdev, job->mmu);
514 	panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
515 
516 	dma_fence_signal_locked(job->done_fence);
517 	pm_runtime_put_autosuspend(pfdev->base.dev);
518 }
519 
520 static void panfrost_jm_handle_irq(struct panfrost_device *pfdev, u32 status)
521 {
522 	struct panfrost_job *done[NUM_JOB_SLOTS][2] = {};
523 	struct panfrost_job *failed[NUM_JOB_SLOTS] = {};
524 	u32 js_state = 0, js_events = 0;
525 	unsigned int i, j;
526 
527 	/* First we collect all failed/done jobs. */
528 	while (status) {
529 		u32 js_state_mask = 0;
530 
531 		for (j = 0; j < NUM_JOB_SLOTS; j++) {
532 			if (status & MK_JS_MASK(j))
533 				js_state_mask |= MK_JS_MASK(j);
534 
535 			if (status & JOB_INT_MASK_DONE(j)) {
536 				if (done[j][0])
537 					done[j][1] = panfrost_dequeue_job(pfdev, j);
538 				else
539 					done[j][0] = panfrost_dequeue_job(pfdev, j);
540 			}
541 
542 			if (status & JOB_INT_MASK_ERR(j)) {
543 				/* Cancel the next submission. Will be submitted
544 				 * after we're done handling this failure if
545 				 * there's no reset pending.
546 				 */
547 				job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
548 				failed[j] = panfrost_dequeue_job(pfdev, j);
549 			}
550 		}
551 
552 		/* JS_STATE is sampled when JOB_INT_CLEAR is written.
553 		 * For each BIT(slot) or BIT(slot + 16) bit written to
554 		 * JOB_INT_CLEAR, the corresponding bits in JS_STATE
555 		 * (BIT(slot) and BIT(slot + 16)) are updated, but this
556 		 * is racy. If we only have one job done at the time we
557 		 * read JOB_INT_RAWSTAT but the second job fails before we
558 		 * clear the status, we end up with a status containing
559 		 * only the DONE bit and consider both jobs as DONE since
560 		 * JS_STATE reports both NEXT and CURRENT as inactive.
561 		 * To prevent that, let's repeat this clear+read steps
562 		 * until status is 0.
563 		 */
564 		job_write(pfdev, JOB_INT_CLEAR, status);
565 		js_state &= ~js_state_mask;
566 		js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask;
567 		js_events |= status;
568 		status = job_read(pfdev, JOB_INT_RAWSTAT);
569 	}
570 
571 	/* Then we handle the dequeued jobs. */
572 	for (j = 0; j < NUM_JOB_SLOTS; j++) {
573 		if (!(js_events & MK_JS_MASK(j)))
574 			continue;
575 
576 		if (failed[j]) {
577 			panfrost_job_handle_err(pfdev, failed[j], j);
578 		} else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) {
579 			/* When the current job doesn't fail, the JM dequeues
580 			 * the next job without waiting for an ACK, this means
581 			 * we can have 2 jobs dequeued and only catch the
582 			 * interrupt when the second one is done. If both slots
583 			 * are inactive, but one job remains in pfdev->jobs[j],
584 			 * consider it done. Of course that doesn't apply if a
585 			 * failure happened since we cancelled execution of the
586 			 * job in _NEXT (see above).
587 			 */
588 			if (WARN_ON(!done[j][0]))
589 				done[j][0] = panfrost_dequeue_job(pfdev, j);
590 			else
591 				done[j][1] = panfrost_dequeue_job(pfdev, j);
592 		}
593 
594 		for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++)
595 			panfrost_jm_handle_done(pfdev, done[j][i]);
596 	}
597 
598 	/* And finally we requeue jobs that were waiting in the second slot
599 	 * and have been stopped if we detected a failure on the first slot.
600 	 */
601 	for (j = 0; j < NUM_JOB_SLOTS; j++) {
602 		if (!(js_events & MK_JS_MASK(j)))
603 			continue;
604 
605 		if (!failed[j] || !pfdev->jobs[j][0])
606 			continue;
607 
608 		if (pfdev->jobs[j][0]->jc == 0) {
609 			/* The job was cancelled, signal the fence now */
610 			struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j);
611 
612 			dma_fence_set_error(canceled->done_fence, -ECANCELED);
613 			panfrost_jm_handle_done(pfdev, canceled);
614 		} else if (!atomic_read(&pfdev->reset.pending)) {
615 			/* Requeue the job we removed if no reset is pending */
616 			job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START);
617 		}
618 	}
619 }
620 
621 static void panfrost_jm_handle_irqs(struct panfrost_device *pfdev)
622 {
623 	u32 status = job_read(pfdev, JOB_INT_RAWSTAT);
624 
625 	while (status) {
626 		pm_runtime_mark_last_busy(pfdev->base.dev);
627 
628 		spin_lock(&pfdev->js->job_lock);
629 		panfrost_jm_handle_irq(pfdev, status);
630 		spin_unlock(&pfdev->js->job_lock);
631 		status = job_read(pfdev, JOB_INT_RAWSTAT);
632 	}
633 }
634 
635 static u32 panfrost_active_slots(struct panfrost_device *pfdev,
636 				 u32 *js_state_mask, u32 js_state)
637 {
638 	u32 rawstat;
639 
640 	if (!(js_state & *js_state_mask))
641 		return 0;
642 
643 	rawstat = job_read(pfdev, JOB_INT_RAWSTAT);
644 	if (rawstat) {
645 		unsigned int i;
646 
647 		for (i = 0; i < NUM_JOB_SLOTS; i++) {
648 			if (rawstat & MK_JS_MASK(i))
649 				*js_state_mask &= ~MK_JS_MASK(i);
650 		}
651 	}
652 
653 	return js_state & *js_state_mask;
654 }
655 
656 static void
657 panfrost_reset(struct panfrost_device *pfdev,
658 	       struct drm_sched_job *bad)
659 {
660 	u32 js_state, js_state_mask = 0xffffffff;
661 	unsigned int i, j;
662 	bool cookie;
663 	int ret;
664 
665 	if (!atomic_read(&pfdev->reset.pending))
666 		return;
667 
668 	/* Stop the schedulers.
669 	 *
670 	 * FIXME: We temporarily get out of the dma_fence_signalling section
671 	 * because the cleanup path generate lockdep splats when taking locks
672 	 * to release job resources. We should rework the code to follow this
673 	 * pattern:
674 	 *
675 	 *	try_lock
676 	 *	if (locked)
677 	 *		release
678 	 *	else
679 	 *		schedule_work_to_release_later
680 	 */
681 	for (i = 0; i < NUM_JOB_SLOTS; i++)
682 		drm_sched_stop(&pfdev->js->queue[i].sched, bad);
683 
684 	cookie = dma_fence_begin_signalling();
685 
686 	if (bad)
687 		drm_sched_increase_karma(bad);
688 
689 	/* Mask job interrupts and synchronize to make sure we won't be
690 	 * interrupted during our reset.
691 	 */
692 	job_write(pfdev, JOB_INT_MASK, 0);
693 	synchronize_irq(pfdev->js->irq);
694 
695 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
696 		/* Cancel the next job and soft-stop the running job. */
697 		job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
698 		job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP);
699 	}
700 
701 	/* Wait at most 10ms for soft-stops to complete */
702 	ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state,
703 				 !panfrost_active_slots(pfdev, &js_state_mask, js_state),
704 				 10, 10000);
705 
706 	if (ret)
707 		dev_err(pfdev->base.dev, "Soft-stop failed\n");
708 
709 	/* Handle the remaining interrupts before we reset. */
710 	panfrost_jm_handle_irqs(pfdev);
711 
712 	/* Remaining interrupts have been handled, but we might still have
713 	 * stuck jobs. Let's make sure the PM counters stay balanced by
714 	 * manually calling pm_runtime_put_noidle() and
715 	 * panfrost_devfreq_record_idle() for each stuck job.
716 	 * Let's also make sure the cycle counting register's refcnt is
717 	 * kept balanced to prevent it from running forever
718 	 */
719 	spin_lock(&pfdev->js->job_lock);
720 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
721 		for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) {
722 			if (pfdev->jobs[i][j]->requirements & PANFROST_JD_REQ_CYCLE_COUNT ||
723 			    pfdev->jobs[i][j]->is_profiled)
724 				panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev);
725 			pm_runtime_put_noidle(pfdev->base.dev);
726 			panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
727 		}
728 	}
729 	memset(pfdev->jobs, 0, sizeof(pfdev->jobs));
730 	spin_unlock(&pfdev->js->job_lock);
731 
732 	/* Proceed with reset now. */
733 	panfrost_device_reset(pfdev, false);
734 
735 	/* GPU has been reset, we can clear the reset pending bit. */
736 	atomic_set(&pfdev->reset.pending, 0);
737 
738 	/* Now resubmit jobs that were previously queued but didn't have a
739 	 * chance to finish.
740 	 * FIXME: We temporarily get out of the DMA fence signalling section
741 	 * while resubmitting jobs because the job submission logic will
742 	 * allocate memory with the GFP_KERNEL flag which can trigger memory
743 	 * reclaim and exposes a lock ordering issue.
744 	 */
745 	dma_fence_end_signalling(cookie);
746 	for (i = 0; i < NUM_JOB_SLOTS; i++)
747 		drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched);
748 	cookie = dma_fence_begin_signalling();
749 
750 	/* Restart the schedulers */
751 	for (i = 0; i < NUM_JOB_SLOTS; i++)
752 		drm_sched_start(&pfdev->js->queue[i].sched, 0);
753 
754 	/* Re-enable job interrupts now that everything has been restarted. */
755 	panfrost_jm_enable_interrupts(pfdev);
756 
757 	dma_fence_end_signalling(cookie);
758 }
759 
760 static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
761 						     *sched_job)
762 {
763 	struct panfrost_job *job = to_panfrost_job(sched_job);
764 	struct panfrost_device *pfdev = job->pfdev;
765 	int js = panfrost_job_get_slot(job);
766 
767 	/*
768 	 * If the GPU managed to complete this jobs fence, the timeout has
769 	 * fired before free-job worker. The timeout is spurious, so bail out.
770 	 */
771 	if (dma_fence_is_signaled(job->done_fence))
772 		return DRM_GPU_SCHED_STAT_NO_HANG;
773 
774 	/*
775 	 * Panfrost IRQ handler may take a long time to process an interrupt
776 	 * if there is another IRQ handler hogging the processing.
777 	 * For example, the HDMI encoder driver might be stuck in the IRQ
778 	 * handler for a significant time in a case of bad cable connection.
779 	 * In order to catch such cases and not report spurious Panfrost
780 	 * job timeouts, synchronize the IRQ handler and re-check the fence
781 	 * status.
782 	 */
783 	synchronize_irq(pfdev->js->irq);
784 
785 	if (dma_fence_is_signaled(job->done_fence)) {
786 		dev_warn(pfdev->base.dev, "unexpectedly high interrupt latency\n");
787 		return DRM_GPU_SCHED_STAT_NO_HANG;
788 	}
789 
790 	dev_err(pfdev->base.dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
791 		js,
792 		job_read(pfdev, JS_CONFIG(js)),
793 		job_read(pfdev, JS_STATUS(js)),
794 		job_read(pfdev, JS_HEAD_LO(js)),
795 		job_read(pfdev, JS_TAIL_LO(js)),
796 		sched_job);
797 
798 	panfrost_core_dump(job);
799 
800 	atomic_set(&pfdev->reset.pending, 1);
801 	panfrost_reset(pfdev, sched_job);
802 
803 	return DRM_GPU_SCHED_STAT_RESET;
804 }
805 
806 static void panfrost_reset_work(struct work_struct *work)
807 {
808 	struct panfrost_device *pfdev;
809 
810 	pfdev = container_of(work, struct panfrost_device, reset.work);
811 	panfrost_reset(pfdev, NULL);
812 }
813 
814 static const struct drm_sched_backend_ops panfrost_sched_ops = {
815 	.run_job = panfrost_job_run,
816 	.timedout_job = panfrost_job_timedout,
817 	.free_job = panfrost_job_free
818 };
819 
820 static irqreturn_t panfrost_jm_irq_handler_thread(int irq, void *data)
821 {
822 	struct panfrost_device *pfdev = data;
823 
824 	panfrost_jm_handle_irqs(pfdev);
825 
826 	/* Enable interrupts only if we're not about to get suspended */
827 	if (!test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
828 		job_write(pfdev, JOB_INT_MASK, ALL_JS_INT_MASK);
829 
830 	return IRQ_HANDLED;
831 }
832 
833 static irqreturn_t panfrost_jm_irq_handler(int irq, void *data)
834 {
835 	struct panfrost_device *pfdev = data;
836 	u32 status;
837 
838 	if (test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
839 		return IRQ_NONE;
840 
841 	status = job_read(pfdev, JOB_INT_STAT);
842 	if (!status)
843 		return IRQ_NONE;
844 
845 	job_write(pfdev, JOB_INT_MASK, 0);
846 	return IRQ_WAKE_THREAD;
847 }
848 
849 int panfrost_jm_init(struct panfrost_device *pfdev)
850 {
851 	struct drm_sched_init_args args = {
852 		.ops = &panfrost_sched_ops,
853 		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
854 		.credit_limit = 2,
855 		.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
856 		.dev = pfdev->base.dev,
857 	};
858 	struct panfrost_job_slot *js;
859 	int ret, j;
860 
861 	BUILD_BUG_ON(ARRAY_SIZE(panfrost_engine_names) != NUM_JOB_SLOTS);
862 
863 	/* All GPUs have two entries per queue, but without jobchain
864 	 * disambiguation stopping the right job in the close path is tricky,
865 	 * so let's just advertise one entry in that case.
866 	 */
867 	if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
868 		args.credit_limit = 1;
869 
870 	js = devm_kzalloc(pfdev->base.dev, sizeof(*js), GFP_KERNEL);
871 	if (!js)
872 		return -ENOMEM;
873 	pfdev->js = js;
874 
875 	INIT_WORK(&pfdev->reset.work, panfrost_reset_work);
876 	spin_lock_init(&js->job_lock);
877 
878 	js->irq = platform_get_irq_byname(to_platform_device(pfdev->base.dev), "job");
879 	if (js->irq < 0)
880 		return js->irq;
881 
882 	ret = devm_request_threaded_irq(pfdev->base.dev, js->irq,
883 					panfrost_jm_irq_handler,
884 					panfrost_jm_irq_handler_thread,
885 					IRQF_SHARED, KBUILD_MODNAME "-job",
886 					pfdev);
887 	if (ret) {
888 		dev_err(pfdev->base.dev, "failed to request job irq");
889 		return ret;
890 	}
891 
892 	pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0);
893 	if (!pfdev->reset.wq)
894 		return -ENOMEM;
895 	args.timeout_wq = pfdev->reset.wq;
896 
897 	for (j = 0; j < NUM_JOB_SLOTS; j++) {
898 		js->queue[j].fence_context = dma_fence_context_alloc(1);
899 		args.name = panfrost_engine_names[j];
900 
901 		ret = drm_sched_init(&js->queue[j].sched, &args);
902 		if (ret) {
903 			dev_err(pfdev->base.dev, "Failed to create scheduler: %d.", ret);
904 			goto err_sched;
905 		}
906 	}
907 
908 	panfrost_jm_reset_interrupts(pfdev);
909 	panfrost_jm_enable_interrupts(pfdev);
910 
911 	return 0;
912 
913 err_sched:
914 	for (j--; j >= 0; j--)
915 		drm_sched_fini(&js->queue[j].sched);
916 
917 	destroy_workqueue(pfdev->reset.wq);
918 	return ret;
919 }
920 
921 void panfrost_jm_fini(struct panfrost_device *pfdev)
922 {
923 	struct panfrost_job_slot *js = pfdev->js;
924 	int j;
925 
926 	job_write(pfdev, JOB_INT_MASK, 0);
927 
928 	for (j = 0; j < NUM_JOB_SLOTS; j++) {
929 		drm_sched_fini(&js->queue[j].sched);
930 	}
931 
932 	cancel_work_sync(&pfdev->reset.work);
933 	destroy_workqueue(pfdev->reset.wq);
934 }
935 
936 int panfrost_jm_open(struct drm_file *file)
937 {
938 	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
939 	int ret;
940 
941 	struct drm_panfrost_jm_ctx_create default_jm_ctx = {
942 		.priority = PANFROST_JM_CTX_PRIORITY_MEDIUM,
943 	};
944 
945 	xa_init_flags(&panfrost_priv->jm_ctxs, XA_FLAGS_ALLOC);
946 
947 	ret = panfrost_jm_ctx_create(file, &default_jm_ctx);
948 	if (ret)
949 		return ret;
950 
951 	/* We expect the default context to be assigned handle 0. */
952 	if (WARN_ON(default_jm_ctx.handle))
953 		return -EINVAL;
954 
955 	return 0;
956 }
957 
958 void panfrost_jm_close(struct drm_file *file)
959 {
960 	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
961 	struct panfrost_jm_ctx *jm_ctx;
962 	unsigned long i;
963 
964 	xa_for_each(&panfrost_priv->jm_ctxs, i, jm_ctx)
965 		panfrost_jm_ctx_destroy(file, i);
966 
967 	xa_destroy(&panfrost_priv->jm_ctxs);
968 }
969 
970 int panfrost_jm_is_idle(struct panfrost_device *pfdev)
971 {
972 	struct panfrost_job_slot *js = pfdev->js;
973 	int i;
974 
975 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
976 		/* If there are any jobs in the HW queue, we're not idle */
977 		if (atomic_read(&js->queue[i].sched.credit_count))
978 			return false;
979 	}
980 
981 	return true;
982 }
983 
984 static void panfrost_jm_ctx_release(struct kref *kref)
985 {
986 	struct panfrost_jm_ctx *jm_ctx = container_of(kref, struct panfrost_jm_ctx, refcnt);
987 
988 	WARN_ON(!jm_ctx->destroyed);
989 
990 	for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slot_entity); i++)
991 		drm_sched_entity_destroy(&jm_ctx->slot_entity[i]);
992 
993 	kfree(jm_ctx);
994 }
995 
996 void
997 panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx)
998 {
999 	if (jm_ctx)
1000 		kref_put(&jm_ctx->refcnt, panfrost_jm_ctx_release);
1001 }
1002 
1003 struct panfrost_jm_ctx *
1004 panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx)
1005 {
1006 	if (jm_ctx)
1007 		kref_get(&jm_ctx->refcnt);
1008 
1009 	return jm_ctx;
1010 }
1011 
1012 struct panfrost_jm_ctx *
1013 panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle)
1014 {
1015 	struct panfrost_file_priv *priv = file->driver_priv;
1016 	struct panfrost_jm_ctx *jm_ctx;
1017 
1018 	xa_lock(&priv->jm_ctxs);
1019 	jm_ctx = panfrost_jm_ctx_get(xa_load(&priv->jm_ctxs, handle));
1020 	xa_unlock(&priv->jm_ctxs);
1021 
1022 	return jm_ctx;
1023 }
1024 
1025 static int jm_ctx_prio_to_drm_sched_prio(struct drm_file *file,
1026 					 enum drm_panfrost_jm_ctx_priority in,
1027 					 enum drm_sched_priority *out)
1028 {
1029 	switch (in) {
1030 	case PANFROST_JM_CTX_PRIORITY_LOW:
1031 		*out = DRM_SCHED_PRIORITY_LOW;
1032 		return 0;
1033 	case PANFROST_JM_CTX_PRIORITY_MEDIUM:
1034 		*out = DRM_SCHED_PRIORITY_NORMAL;
1035 		return 0;
1036 	case PANFROST_JM_CTX_PRIORITY_HIGH:
1037 		if (!panfrost_high_prio_allowed(file))
1038 			return -EACCES;
1039 
1040 		*out = DRM_SCHED_PRIORITY_HIGH;
1041 		return 0;
1042 	default:
1043 		return -EINVAL;
1044 	}
1045 }
1046 
1047 int panfrost_jm_ctx_create(struct drm_file *file,
1048 			   struct drm_panfrost_jm_ctx_create *args)
1049 {
1050 	struct panfrost_file_priv *priv = file->driver_priv;
1051 	struct panfrost_device *pfdev = priv->pfdev;
1052 	enum drm_sched_priority sched_prio;
1053 	struct panfrost_jm_ctx *jm_ctx;
1054 	int ret;
1055 
1056 	jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
1057 	if (!jm_ctx)
1058 		return -ENOMEM;
1059 
1060 	kref_init(&jm_ctx->refcnt);
1061 
1062 	ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
1063 	if (ret)
1064 		goto err_put_jm_ctx;
1065 
1066 	for (u32 i = 0; i < NUM_JOB_SLOTS; i++) {
1067 		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
1068 
1069 		ret = drm_sched_entity_init(&jm_ctx->slot_entity[i], sched_prio,
1070 					    &sched, 1, NULL);
1071 		if (ret)
1072 			goto err_put_jm_ctx;
1073 	}
1074 
1075 	ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
1076 		       XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
1077 	if (ret)
1078 		goto err_put_jm_ctx;
1079 
1080 	return 0;
1081 
1082 err_put_jm_ctx:
1083 	jm_ctx->destroyed = true;
1084 	panfrost_jm_ctx_put(jm_ctx);
1085 	return ret;
1086 }
1087 
1088 int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle)
1089 {
1090 	struct panfrost_file_priv *priv = file->driver_priv;
1091 	struct panfrost_device *pfdev = priv->pfdev;
1092 	struct panfrost_jm_ctx *jm_ctx;
1093 
1094 	jm_ctx = xa_erase(&priv->jm_ctxs, handle);
1095 	if (!jm_ctx)
1096 		return -EINVAL;
1097 
1098 	jm_ctx->destroyed = true;
1099 
1100 	/* Kill in-flight jobs */
1101 	spin_lock(&pfdev->js->job_lock);
1102 	for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slot_entity); i++) {
1103 		struct drm_sched_entity *entity = &jm_ctx->slot_entity[i];
1104 
1105 		for (int j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
1106 			struct panfrost_job *job = pfdev->jobs[i][j];
1107 			u32 cmd;
1108 
1109 			if (!job || job->base.entity != entity)
1110 				continue;
1111 
1112 			if (j == 1) {
1113 				/* Try to cancel the job before it starts */
1114 				job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
1115 				/* Reset the job head so it doesn't get restarted if
1116 				 * the job in the first slot failed.
1117 				 */
1118 				job->jc = 0;
1119 			}
1120 
1121 			if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
1122 				cmd = panfrost_get_job_chain_flag(job) ?
1123 				      JS_COMMAND_HARD_STOP_1 :
1124 				      JS_COMMAND_HARD_STOP_0;
1125 			} else {
1126 				cmd = JS_COMMAND_HARD_STOP;
1127 			}
1128 
1129 			job_write(pfdev, JS_COMMAND(i), cmd);
1130 
1131 			/* Jobs can outlive their file context */
1132 			job->engine_usage = NULL;
1133 		}
1134 	}
1135 	spin_unlock(&pfdev->js->job_lock);
1136 
1137 	panfrost_jm_ctx_put(jm_ctx);
1138 	return 0;
1139 }
1140