xref: /linux/drivers/gpu/drm/panfrost/panfrost_job.c (revision 3fd6c59042dbba50391e30862beac979491145fe)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
3 /* Copyright 2019 Collabora ltd. */
4 #include <linux/delay.h>
5 #include <linux/interrupt.h>
6 #include <linux/io.h>
7 #include <linux/iopoll.h>
8 #include <linux/platform_device.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/dma-resv.h>
11 #include <drm/gpu_scheduler.h>
12 #include <drm/panfrost_drm.h>
13 
14 #include "panfrost_device.h"
15 #include "panfrost_devfreq.h"
16 #include "panfrost_job.h"
17 #include "panfrost_features.h"
18 #include "panfrost_issues.h"
19 #include "panfrost_gem.h"
20 #include "panfrost_regs.h"
21 #include "panfrost_gpu.h"
22 #include "panfrost_mmu.h"
23 #include "panfrost_dump.h"
24 
25 #define JOB_TIMEOUT_MS 500
26 
27 #define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
28 #define job_read(dev, reg) readl(dev->iomem + (reg))
29 
30 struct panfrost_queue_state {
31 	struct drm_gpu_scheduler sched;
32 	u64 fence_context;
33 	u64 emit_seqno;
34 };
35 
36 struct panfrost_job_slot {
37 	struct panfrost_queue_state queue[NUM_JOB_SLOTS];
38 	spinlock_t job_lock;
39 	int irq;
40 };
41 
42 static struct panfrost_job *
to_panfrost_job(struct drm_sched_job * sched_job)43 to_panfrost_job(struct drm_sched_job *sched_job)
44 {
45 	return container_of(sched_job, struct panfrost_job, base);
46 }
47 
48 struct panfrost_fence {
49 	struct dma_fence base;
50 	struct drm_device *dev;
51 	/* panfrost seqno for signaled() test */
52 	u64 seqno;
53 	int queue;
54 };
55 
56 static inline struct panfrost_fence *
to_panfrost_fence(struct dma_fence * fence)57 to_panfrost_fence(struct dma_fence *fence)
58 {
59 	return (struct panfrost_fence *)fence;
60 }
61 
panfrost_fence_get_driver_name(struct dma_fence * fence)62 static const char *panfrost_fence_get_driver_name(struct dma_fence *fence)
63 {
64 	return "panfrost";
65 }
66 
panfrost_fence_get_timeline_name(struct dma_fence * fence)67 static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence)
68 {
69 	struct panfrost_fence *f = to_panfrost_fence(fence);
70 
71 	switch (f->queue) {
72 	case 0:
73 		return "panfrost-js-0";
74 	case 1:
75 		return "panfrost-js-1";
76 	case 2:
77 		return "panfrost-js-2";
78 	default:
79 		return NULL;
80 	}
81 }
82 
83 static const struct dma_fence_ops panfrost_fence_ops = {
84 	.get_driver_name = panfrost_fence_get_driver_name,
85 	.get_timeline_name = panfrost_fence_get_timeline_name,
86 };
87 
panfrost_fence_create(struct panfrost_device * pfdev,int js_num)88 static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num)
89 {
90 	struct panfrost_fence *fence;
91 	struct panfrost_job_slot *js = pfdev->js;
92 
93 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
94 	if (!fence)
95 		return ERR_PTR(-ENOMEM);
96 
97 	fence->dev = pfdev->ddev;
98 	fence->queue = js_num;
99 	fence->seqno = ++js->queue[js_num].emit_seqno;
100 	dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock,
101 		       js->queue[js_num].fence_context, fence->seqno);
102 
103 	return &fence->base;
104 }
105 
panfrost_job_get_slot(struct panfrost_job * job)106 int panfrost_job_get_slot(struct panfrost_job *job)
107 {
108 	/* JS0: fragment jobs.
109 	 * JS1: vertex/tiler jobs
110 	 * JS2: compute jobs
111 	 */
112 	if (job->requirements & PANFROST_JD_REQ_FS)
113 		return 0;
114 
115 /* Not exposed to userspace yet */
116 #if 0
117 	if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) {
118 		if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) &&
119 		    (job->pfdev->features.nr_core_groups == 2))
120 			return 2;
121 		if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987))
122 			return 2;
123 	}
124 #endif
125 	return 1;
126 }
127 
panfrost_job_write_affinity(struct panfrost_device * pfdev,u32 requirements,int js)128 static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
129 					u32 requirements,
130 					int js)
131 {
132 	u64 affinity;
133 
134 	/*
135 	 * Use all cores for now.
136 	 * Eventually we may need to support tiler only jobs and h/w with
137 	 * multiple (2) coherent core groups
138 	 */
139 	affinity = pfdev->features.shader_present;
140 
141 	job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity));
142 	job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity));
143 }
144 
145 static u32
panfrost_get_job_chain_flag(const struct panfrost_job * job)146 panfrost_get_job_chain_flag(const struct panfrost_job *job)
147 {
148 	struct panfrost_fence *f = to_panfrost_fence(job->done_fence);
149 
150 	if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
151 		return 0;
152 
153 	return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0;
154 }
155 
156 static struct panfrost_job *
panfrost_dequeue_job(struct panfrost_device * pfdev,int slot)157 panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
158 {
159 	struct panfrost_job *job = pfdev->jobs[slot][0];
160 
161 	WARN_ON(!job);
162 
163 	if (job->is_profiled && job->engine_usage) {
164 		job->engine_usage->elapsed_ns[slot] +=
165 			ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
166 		job->engine_usage->cycles[slot] +=
167 			panfrost_cycle_counter_read(pfdev) - job->start_cycles;
168 	}
169 
170 	if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT || job->is_profiled)
171 		panfrost_cycle_counter_put(pfdev);
172 
173 	pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
174 	pfdev->jobs[slot][1] = NULL;
175 
176 	return job;
177 }
178 
179 static unsigned int
panfrost_enqueue_job(struct panfrost_device * pfdev,int slot,struct panfrost_job * job)180 panfrost_enqueue_job(struct panfrost_device *pfdev, int slot,
181 		     struct panfrost_job *job)
182 {
183 	if (WARN_ON(!job))
184 		return 0;
185 
186 	if (!pfdev->jobs[slot][0]) {
187 		pfdev->jobs[slot][0] = job;
188 		return 0;
189 	}
190 
191 	WARN_ON(pfdev->jobs[slot][1]);
192 	pfdev->jobs[slot][1] = job;
193 	WARN_ON(panfrost_get_job_chain_flag(job) ==
194 		panfrost_get_job_chain_flag(pfdev->jobs[slot][0]));
195 	return 1;
196 }
197 
panfrost_job_hw_submit(struct panfrost_job * job,int js)198 static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
199 {
200 	struct panfrost_device *pfdev = job->pfdev;
201 	unsigned int subslot;
202 	u32 cfg;
203 	u64 jc_head = job->jc;
204 	int ret;
205 
206 	panfrost_devfreq_record_busy(&pfdev->pfdevfreq);
207 
208 	ret = pm_runtime_get_sync(pfdev->dev);
209 	if (ret < 0)
210 		return;
211 
212 	if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) {
213 		return;
214 	}
215 
216 	cfg = panfrost_mmu_as_get(pfdev, job->mmu);
217 
218 	job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head));
219 	job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head));
220 
221 	panfrost_job_write_affinity(pfdev, job->requirements, js);
222 
223 	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
224 	 * start */
225 	cfg |= JS_CONFIG_THREAD_PRI(8) |
226 		JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
227 		JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
228 		panfrost_get_job_chain_flag(job);
229 
230 	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
231 		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
232 
233 	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649))
234 		cfg |= JS_CONFIG_START_MMU;
235 
236 	job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
237 
238 	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
239 		job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
240 
241 	/* GO ! */
242 
243 	spin_lock(&pfdev->js->job_lock);
244 	subslot = panfrost_enqueue_job(pfdev, js, job);
245 	/* Don't queue the job if a reset is in progress */
246 	if (!atomic_read(&pfdev->reset.pending)) {
247 		job->is_profiled = pfdev->profile_mode;
248 
249 		if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT ||
250 		    job->is_profiled)
251 			panfrost_cycle_counter_get(pfdev);
252 
253 		if (job->is_profiled) {
254 			job->start_time = ktime_get();
255 			job->start_cycles = panfrost_cycle_counter_read(pfdev);
256 		}
257 
258 		job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
259 		dev_dbg(pfdev->dev,
260 			"JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
261 			job, js, subslot, jc_head, cfg & 0xf);
262 	}
263 	spin_unlock(&pfdev->js->job_lock);
264 }
265 
panfrost_acquire_object_fences(struct drm_gem_object ** bos,int bo_count,struct drm_sched_job * job)266 static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
267 					  int bo_count,
268 					  struct drm_sched_job *job)
269 {
270 	int i, ret;
271 
272 	for (i = 0; i < bo_count; i++) {
273 		ret = dma_resv_reserve_fences(bos[i]->resv, 1);
274 		if (ret)
275 			return ret;
276 
277 		/* panfrost always uses write mode in its current uapi */
278 		ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
279 							      true);
280 		if (ret)
281 			return ret;
282 	}
283 
284 	return 0;
285 }
286 
panfrost_attach_object_fences(struct drm_gem_object ** bos,int bo_count,struct dma_fence * fence)287 static void panfrost_attach_object_fences(struct drm_gem_object **bos,
288 					  int bo_count,
289 					  struct dma_fence *fence)
290 {
291 	int i;
292 
293 	for (i = 0; i < bo_count; i++)
294 		dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
295 }
296 
panfrost_job_push(struct panfrost_job * job)297 int panfrost_job_push(struct panfrost_job *job)
298 {
299 	struct panfrost_device *pfdev = job->pfdev;
300 	struct ww_acquire_ctx acquire_ctx;
301 	int ret = 0;
302 
303 	ret = drm_gem_lock_reservations(job->bos, job->bo_count,
304 					    &acquire_ctx);
305 	if (ret)
306 		return ret;
307 
308 	mutex_lock(&pfdev->sched_lock);
309 	drm_sched_job_arm(&job->base);
310 
311 	job->render_done_fence = dma_fence_get(&job->base.s_fence->finished);
312 
313 	ret = panfrost_acquire_object_fences(job->bos, job->bo_count,
314 					     &job->base);
315 	if (ret) {
316 		mutex_unlock(&pfdev->sched_lock);
317 		goto unlock;
318 	}
319 
320 	kref_get(&job->refcount); /* put by scheduler job completion */
321 
322 	drm_sched_entity_push_job(&job->base);
323 
324 	mutex_unlock(&pfdev->sched_lock);
325 
326 	panfrost_attach_object_fences(job->bos, job->bo_count,
327 				      job->render_done_fence);
328 
329 unlock:
330 	drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
331 
332 	return ret;
333 }
334 
panfrost_job_cleanup(struct kref * ref)335 static void panfrost_job_cleanup(struct kref *ref)
336 {
337 	struct panfrost_job *job = container_of(ref, struct panfrost_job,
338 						refcount);
339 	unsigned int i;
340 
341 	dma_fence_put(job->done_fence);
342 	dma_fence_put(job->render_done_fence);
343 
344 	if (job->mappings) {
345 		for (i = 0; i < job->bo_count; i++) {
346 			if (!job->mappings[i])
347 				break;
348 
349 			atomic_dec(&job->mappings[i]->obj->gpu_usecount);
350 			panfrost_gem_mapping_put(job->mappings[i]);
351 		}
352 		kvfree(job->mappings);
353 	}
354 
355 	if (job->bos) {
356 		for (i = 0; i < job->bo_count; i++)
357 			drm_gem_object_put(job->bos[i]);
358 
359 		kvfree(job->bos);
360 	}
361 
362 	kfree(job);
363 }
364 
panfrost_job_put(struct panfrost_job * job)365 void panfrost_job_put(struct panfrost_job *job)
366 {
367 	kref_put(&job->refcount, panfrost_job_cleanup);
368 }
369 
panfrost_job_free(struct drm_sched_job * sched_job)370 static void panfrost_job_free(struct drm_sched_job *sched_job)
371 {
372 	struct panfrost_job *job = to_panfrost_job(sched_job);
373 
374 	drm_sched_job_cleanup(sched_job);
375 
376 	panfrost_job_put(job);
377 }
378 
panfrost_job_run(struct drm_sched_job * sched_job)379 static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job)
380 {
381 	struct panfrost_job *job = to_panfrost_job(sched_job);
382 	struct panfrost_device *pfdev = job->pfdev;
383 	int slot = panfrost_job_get_slot(job);
384 	struct dma_fence *fence = NULL;
385 
386 	if (unlikely(job->base.s_fence->finished.error))
387 		return NULL;
388 
389 	/* Nothing to execute: can happen if the job has finished while
390 	 * we were resetting the GPU.
391 	 */
392 	if (!job->jc)
393 		return NULL;
394 
395 	fence = panfrost_fence_create(pfdev, slot);
396 	if (IS_ERR(fence))
397 		return fence;
398 
399 	if (job->done_fence)
400 		dma_fence_put(job->done_fence);
401 	job->done_fence = dma_fence_get(fence);
402 
403 	panfrost_job_hw_submit(job, slot);
404 
405 	return fence;
406 }
407 
panfrost_job_enable_interrupts(struct panfrost_device * pfdev)408 void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
409 {
410 	int j;
411 	u32 irq_mask = 0;
412 
413 	clear_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended);
414 
415 	for (j = 0; j < NUM_JOB_SLOTS; j++) {
416 		irq_mask |= MK_JS_MASK(j);
417 	}
418 
419 	job_write(pfdev, JOB_INT_CLEAR, irq_mask);
420 	job_write(pfdev, JOB_INT_MASK, irq_mask);
421 }
422 
panfrost_job_suspend_irq(struct panfrost_device * pfdev)423 void panfrost_job_suspend_irq(struct panfrost_device *pfdev)
424 {
425 	set_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended);
426 
427 	job_write(pfdev, JOB_INT_MASK, 0);
428 	synchronize_irq(pfdev->js->irq);
429 }
430 
panfrost_job_handle_err(struct panfrost_device * pfdev,struct panfrost_job * job,unsigned int js)431 static void panfrost_job_handle_err(struct panfrost_device *pfdev,
432 				    struct panfrost_job *job,
433 				    unsigned int js)
434 {
435 	u32 js_status = job_read(pfdev, JS_STATUS(js));
436 	const char *exception_name = panfrost_exception_name(js_status);
437 	bool signal_fence = true;
438 
439 	if (!panfrost_exception_is_fault(js_status)) {
440 		dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x",
441 			js, exception_name,
442 			job_read(pfdev, JS_HEAD_LO(js)),
443 			job_read(pfdev, JS_TAIL_LO(js)));
444 	} else {
445 		dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x",
446 			js, exception_name,
447 			job_read(pfdev, JS_HEAD_LO(js)),
448 			job_read(pfdev, JS_TAIL_LO(js)));
449 	}
450 
451 	if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) {
452 		/* Update the job head so we can resume */
453 		job->jc = job_read(pfdev, JS_TAIL_LO(js)) |
454 			  ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32);
455 
456 		/* The job will be resumed, don't signal the fence */
457 		signal_fence = false;
458 	} else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) {
459 		/* Job has been hard-stopped, flag it as canceled */
460 		dma_fence_set_error(job->done_fence, -ECANCELED);
461 		job->jc = 0;
462 	} else if (panfrost_exception_is_fault(js_status)) {
463 		/* We might want to provide finer-grained error code based on
464 		 * the exception type, but unconditionally setting to EINVAL
465 		 * is good enough for now.
466 		 */
467 		dma_fence_set_error(job->done_fence, -EINVAL);
468 		job->jc = 0;
469 	}
470 
471 	panfrost_mmu_as_put(pfdev, job->mmu);
472 	panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
473 
474 	if (signal_fence)
475 		dma_fence_signal_locked(job->done_fence);
476 
477 	pm_runtime_put_autosuspend(pfdev->dev);
478 
479 	if (panfrost_exception_needs_reset(pfdev, js_status)) {
480 		atomic_set(&pfdev->reset.pending, 1);
481 		drm_sched_fault(&pfdev->js->queue[js].sched);
482 	}
483 }
484 
panfrost_job_handle_done(struct panfrost_device * pfdev,struct panfrost_job * job)485 static void panfrost_job_handle_done(struct panfrost_device *pfdev,
486 				     struct panfrost_job *job)
487 {
488 	/* Set ->jc to 0 to avoid re-submitting an already finished job (can
489 	 * happen when we receive the DONE interrupt while doing a GPU reset).
490 	 */
491 	job->jc = 0;
492 	panfrost_mmu_as_put(pfdev, job->mmu);
493 	panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
494 
495 	dma_fence_signal_locked(job->done_fence);
496 	pm_runtime_put_autosuspend(pfdev->dev);
497 }
498 
panfrost_job_handle_irq(struct panfrost_device * pfdev,u32 status)499 static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status)
500 {
501 	struct panfrost_job *done[NUM_JOB_SLOTS][2] = {};
502 	struct panfrost_job *failed[NUM_JOB_SLOTS] = {};
503 	u32 js_state = 0, js_events = 0;
504 	unsigned int i, j;
505 
506 	/* First we collect all failed/done jobs. */
507 	while (status) {
508 		u32 js_state_mask = 0;
509 
510 		for (j = 0; j < NUM_JOB_SLOTS; j++) {
511 			if (status & MK_JS_MASK(j))
512 				js_state_mask |= MK_JS_MASK(j);
513 
514 			if (status & JOB_INT_MASK_DONE(j)) {
515 				if (done[j][0])
516 					done[j][1] = panfrost_dequeue_job(pfdev, j);
517 				else
518 					done[j][0] = panfrost_dequeue_job(pfdev, j);
519 			}
520 
521 			if (status & JOB_INT_MASK_ERR(j)) {
522 				/* Cancel the next submission. Will be submitted
523 				 * after we're done handling this failure if
524 				 * there's no reset pending.
525 				 */
526 				job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
527 				failed[j] = panfrost_dequeue_job(pfdev, j);
528 			}
529 		}
530 
531 		/* JS_STATE is sampled when JOB_INT_CLEAR is written.
532 		 * For each BIT(slot) or BIT(slot + 16) bit written to
533 		 * JOB_INT_CLEAR, the corresponding bits in JS_STATE
534 		 * (BIT(slot) and BIT(slot + 16)) are updated, but this
535 		 * is racy. If we only have one job done at the time we
536 		 * read JOB_INT_RAWSTAT but the second job fails before we
537 		 * clear the status, we end up with a status containing
538 		 * only the DONE bit and consider both jobs as DONE since
539 		 * JS_STATE reports both NEXT and CURRENT as inactive.
540 		 * To prevent that, let's repeat this clear+read steps
541 		 * until status is 0.
542 		 */
543 		job_write(pfdev, JOB_INT_CLEAR, status);
544 		js_state &= ~js_state_mask;
545 		js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask;
546 		js_events |= status;
547 		status = job_read(pfdev, JOB_INT_RAWSTAT);
548 	}
549 
550 	/* Then we handle the dequeued jobs. */
551 	for (j = 0; j < NUM_JOB_SLOTS; j++) {
552 		if (!(js_events & MK_JS_MASK(j)))
553 			continue;
554 
555 		if (failed[j]) {
556 			panfrost_job_handle_err(pfdev, failed[j], j);
557 		} else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) {
558 			/* When the current job doesn't fail, the JM dequeues
559 			 * the next job without waiting for an ACK, this means
560 			 * we can have 2 jobs dequeued and only catch the
561 			 * interrupt when the second one is done. If both slots
562 			 * are inactive, but one job remains in pfdev->jobs[j],
563 			 * consider it done. Of course that doesn't apply if a
564 			 * failure happened since we cancelled execution of the
565 			 * job in _NEXT (see above).
566 			 */
567 			if (WARN_ON(!done[j][0]))
568 				done[j][0] = panfrost_dequeue_job(pfdev, j);
569 			else
570 				done[j][1] = panfrost_dequeue_job(pfdev, j);
571 		}
572 
573 		for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++)
574 			panfrost_job_handle_done(pfdev, done[j][i]);
575 	}
576 
577 	/* And finally we requeue jobs that were waiting in the second slot
578 	 * and have been stopped if we detected a failure on the first slot.
579 	 */
580 	for (j = 0; j < NUM_JOB_SLOTS; j++) {
581 		if (!(js_events & MK_JS_MASK(j)))
582 			continue;
583 
584 		if (!failed[j] || !pfdev->jobs[j][0])
585 			continue;
586 
587 		if (pfdev->jobs[j][0]->jc == 0) {
588 			/* The job was cancelled, signal the fence now */
589 			struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j);
590 
591 			dma_fence_set_error(canceled->done_fence, -ECANCELED);
592 			panfrost_job_handle_done(pfdev, canceled);
593 		} else if (!atomic_read(&pfdev->reset.pending)) {
594 			/* Requeue the job we removed if no reset is pending */
595 			job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START);
596 		}
597 	}
598 }
599 
panfrost_job_handle_irqs(struct panfrost_device * pfdev)600 static void panfrost_job_handle_irqs(struct panfrost_device *pfdev)
601 {
602 	u32 status = job_read(pfdev, JOB_INT_RAWSTAT);
603 
604 	while (status) {
605 		pm_runtime_mark_last_busy(pfdev->dev);
606 
607 		spin_lock(&pfdev->js->job_lock);
608 		panfrost_job_handle_irq(pfdev, status);
609 		spin_unlock(&pfdev->js->job_lock);
610 		status = job_read(pfdev, JOB_INT_RAWSTAT);
611 	}
612 }
613 
panfrost_active_slots(struct panfrost_device * pfdev,u32 * js_state_mask,u32 js_state)614 static u32 panfrost_active_slots(struct panfrost_device *pfdev,
615 				 u32 *js_state_mask, u32 js_state)
616 {
617 	u32 rawstat;
618 
619 	if (!(js_state & *js_state_mask))
620 		return 0;
621 
622 	rawstat = job_read(pfdev, JOB_INT_RAWSTAT);
623 	if (rawstat) {
624 		unsigned int i;
625 
626 		for (i = 0; i < NUM_JOB_SLOTS; i++) {
627 			if (rawstat & MK_JS_MASK(i))
628 				*js_state_mask &= ~MK_JS_MASK(i);
629 		}
630 	}
631 
632 	return js_state & *js_state_mask;
633 }
634 
635 static void
panfrost_reset(struct panfrost_device * pfdev,struct drm_sched_job * bad)636 panfrost_reset(struct panfrost_device *pfdev,
637 	       struct drm_sched_job *bad)
638 {
639 	u32 js_state, js_state_mask = 0xffffffff;
640 	unsigned int i, j;
641 	bool cookie;
642 	int ret;
643 
644 	if (!atomic_read(&pfdev->reset.pending))
645 		return;
646 
647 	/* Stop the schedulers.
648 	 *
649 	 * FIXME: We temporarily get out of the dma_fence_signalling section
650 	 * because the cleanup path generate lockdep splats when taking locks
651 	 * to release job resources. We should rework the code to follow this
652 	 * pattern:
653 	 *
654 	 *	try_lock
655 	 *	if (locked)
656 	 *		release
657 	 *	else
658 	 *		schedule_work_to_release_later
659 	 */
660 	for (i = 0; i < NUM_JOB_SLOTS; i++)
661 		drm_sched_stop(&pfdev->js->queue[i].sched, bad);
662 
663 	cookie = dma_fence_begin_signalling();
664 
665 	if (bad)
666 		drm_sched_increase_karma(bad);
667 
668 	/* Mask job interrupts and synchronize to make sure we won't be
669 	 * interrupted during our reset.
670 	 */
671 	job_write(pfdev, JOB_INT_MASK, 0);
672 	synchronize_irq(pfdev->js->irq);
673 
674 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
675 		/* Cancel the next job and soft-stop the running job. */
676 		job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
677 		job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP);
678 	}
679 
680 	/* Wait at most 10ms for soft-stops to complete */
681 	ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state,
682 				 !panfrost_active_slots(pfdev, &js_state_mask, js_state),
683 				 10, 10000);
684 
685 	if (ret)
686 		dev_err(pfdev->dev, "Soft-stop failed\n");
687 
688 	/* Handle the remaining interrupts before we reset. */
689 	panfrost_job_handle_irqs(pfdev);
690 
691 	/* Remaining interrupts have been handled, but we might still have
692 	 * stuck jobs. Let's make sure the PM counters stay balanced by
693 	 * manually calling pm_runtime_put_noidle() and
694 	 * panfrost_devfreq_record_idle() for each stuck job.
695 	 * Let's also make sure the cycle counting register's refcnt is
696 	 * kept balanced to prevent it from running forever
697 	 */
698 	spin_lock(&pfdev->js->job_lock);
699 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
700 		for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) {
701 			if (pfdev->jobs[i][j]->requirements & PANFROST_JD_REQ_CYCLE_COUNT ||
702 			    pfdev->jobs[i][j]->is_profiled)
703 				panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev);
704 			pm_runtime_put_noidle(pfdev->dev);
705 			panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
706 		}
707 	}
708 	memset(pfdev->jobs, 0, sizeof(pfdev->jobs));
709 	spin_unlock(&pfdev->js->job_lock);
710 
711 	/* Proceed with reset now. */
712 	panfrost_device_reset(pfdev);
713 
714 	/* panfrost_device_reset() unmasks job interrupts, but we want to
715 	 * keep them masked a bit longer.
716 	 */
717 	job_write(pfdev, JOB_INT_MASK, 0);
718 
719 	/* GPU has been reset, we can clear the reset pending bit. */
720 	atomic_set(&pfdev->reset.pending, 0);
721 
722 	/* Now resubmit jobs that were previously queued but didn't have a
723 	 * chance to finish.
724 	 * FIXME: We temporarily get out of the DMA fence signalling section
725 	 * while resubmitting jobs because the job submission logic will
726 	 * allocate memory with the GFP_KERNEL flag which can trigger memory
727 	 * reclaim and exposes a lock ordering issue.
728 	 */
729 	dma_fence_end_signalling(cookie);
730 	for (i = 0; i < NUM_JOB_SLOTS; i++)
731 		drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched);
732 	cookie = dma_fence_begin_signalling();
733 
734 	/* Restart the schedulers */
735 	for (i = 0; i < NUM_JOB_SLOTS; i++)
736 		drm_sched_start(&pfdev->js->queue[i].sched, 0);
737 
738 	/* Re-enable job interrupts now that everything has been restarted. */
739 	job_write(pfdev, JOB_INT_MASK,
740 		  GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
741 		  GENMASK(NUM_JOB_SLOTS - 1, 0));
742 
743 	dma_fence_end_signalling(cookie);
744 }
745 
panfrost_job_timedout(struct drm_sched_job * sched_job)746 static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
747 						     *sched_job)
748 {
749 	struct panfrost_job *job = to_panfrost_job(sched_job);
750 	struct panfrost_device *pfdev = job->pfdev;
751 	int js = panfrost_job_get_slot(job);
752 
753 	/*
754 	 * If the GPU managed to complete this jobs fence, the timeout is
755 	 * spurious. Bail out.
756 	 */
757 	if (dma_fence_is_signaled(job->done_fence))
758 		return DRM_GPU_SCHED_STAT_NOMINAL;
759 
760 	/*
761 	 * Panfrost IRQ handler may take a long time to process an interrupt
762 	 * if there is another IRQ handler hogging the processing.
763 	 * For example, the HDMI encoder driver might be stuck in the IRQ
764 	 * handler for a significant time in a case of bad cable connection.
765 	 * In order to catch such cases and not report spurious Panfrost
766 	 * job timeouts, synchronize the IRQ handler and re-check the fence
767 	 * status.
768 	 */
769 	synchronize_irq(pfdev->js->irq);
770 
771 	if (dma_fence_is_signaled(job->done_fence)) {
772 		dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n");
773 		return DRM_GPU_SCHED_STAT_NOMINAL;
774 	}
775 
776 	dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
777 		js,
778 		job_read(pfdev, JS_CONFIG(js)),
779 		job_read(pfdev, JS_STATUS(js)),
780 		job_read(pfdev, JS_HEAD_LO(js)),
781 		job_read(pfdev, JS_TAIL_LO(js)),
782 		sched_job);
783 
784 	panfrost_core_dump(job);
785 
786 	atomic_set(&pfdev->reset.pending, 1);
787 	panfrost_reset(pfdev, sched_job);
788 
789 	return DRM_GPU_SCHED_STAT_NOMINAL;
790 }
791 
panfrost_reset_work(struct work_struct * work)792 static void panfrost_reset_work(struct work_struct *work)
793 {
794 	struct panfrost_device *pfdev;
795 
796 	pfdev = container_of(work, struct panfrost_device, reset.work);
797 	panfrost_reset(pfdev, NULL);
798 }
799 
800 static const struct drm_sched_backend_ops panfrost_sched_ops = {
801 	.run_job = panfrost_job_run,
802 	.timedout_job = panfrost_job_timedout,
803 	.free_job = panfrost_job_free
804 };
805 
panfrost_job_irq_handler_thread(int irq,void * data)806 static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data)
807 {
808 	struct panfrost_device *pfdev = data;
809 
810 	panfrost_job_handle_irqs(pfdev);
811 
812 	/* Enable interrupts only if we're not about to get suspended */
813 	if (!test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
814 		job_write(pfdev, JOB_INT_MASK,
815 			  GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
816 			  GENMASK(NUM_JOB_SLOTS - 1, 0));
817 
818 	return IRQ_HANDLED;
819 }
820 
panfrost_job_irq_handler(int irq,void * data)821 static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
822 {
823 	struct panfrost_device *pfdev = data;
824 	u32 status;
825 
826 	if (test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
827 		return IRQ_NONE;
828 
829 	status = job_read(pfdev, JOB_INT_STAT);
830 	if (!status)
831 		return IRQ_NONE;
832 
833 	job_write(pfdev, JOB_INT_MASK, 0);
834 	return IRQ_WAKE_THREAD;
835 }
836 
panfrost_job_init(struct panfrost_device * pfdev)837 int panfrost_job_init(struct panfrost_device *pfdev)
838 {
839 	struct panfrost_job_slot *js;
840 	unsigned int nentries = 2;
841 	int ret, j;
842 
843 	/* All GPUs have two entries per queue, but without jobchain
844 	 * disambiguation stopping the right job in the close path is tricky,
845 	 * so let's just advertise one entry in that case.
846 	 */
847 	if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
848 		nentries = 1;
849 
850 	pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL);
851 	if (!js)
852 		return -ENOMEM;
853 
854 	INIT_WORK(&pfdev->reset.work, panfrost_reset_work);
855 	spin_lock_init(&js->job_lock);
856 
857 	js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
858 	if (js->irq < 0)
859 		return js->irq;
860 
861 	ret = devm_request_threaded_irq(pfdev->dev, js->irq,
862 					panfrost_job_irq_handler,
863 					panfrost_job_irq_handler_thread,
864 					IRQF_SHARED, KBUILD_MODNAME "-job",
865 					pfdev);
866 	if (ret) {
867 		dev_err(pfdev->dev, "failed to request job irq");
868 		return ret;
869 	}
870 
871 	pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0);
872 	if (!pfdev->reset.wq)
873 		return -ENOMEM;
874 
875 	for (j = 0; j < NUM_JOB_SLOTS; j++) {
876 		js->queue[j].fence_context = dma_fence_context_alloc(1);
877 
878 		ret = drm_sched_init(&js->queue[j].sched,
879 				     &panfrost_sched_ops, NULL,
880 				     DRM_SCHED_PRIORITY_COUNT,
881 				     nentries, 0,
882 				     msecs_to_jiffies(JOB_TIMEOUT_MS),
883 				     pfdev->reset.wq,
884 				     NULL, "pan_js", pfdev->dev);
885 		if (ret) {
886 			dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
887 			goto err_sched;
888 		}
889 	}
890 
891 	panfrost_job_enable_interrupts(pfdev);
892 
893 	return 0;
894 
895 err_sched:
896 	for (j--; j >= 0; j--)
897 		drm_sched_fini(&js->queue[j].sched);
898 
899 	destroy_workqueue(pfdev->reset.wq);
900 	return ret;
901 }
902 
panfrost_job_fini(struct panfrost_device * pfdev)903 void panfrost_job_fini(struct panfrost_device *pfdev)
904 {
905 	struct panfrost_job_slot *js = pfdev->js;
906 	int j;
907 
908 	job_write(pfdev, JOB_INT_MASK, 0);
909 
910 	for (j = 0; j < NUM_JOB_SLOTS; j++) {
911 		drm_sched_fini(&js->queue[j].sched);
912 	}
913 
914 	cancel_work_sync(&pfdev->reset.work);
915 	destroy_workqueue(pfdev->reset.wq);
916 }
917 
panfrost_job_open(struct panfrost_file_priv * panfrost_priv)918 int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
919 {
920 	struct panfrost_device *pfdev = panfrost_priv->pfdev;
921 	struct panfrost_job_slot *js = pfdev->js;
922 	struct drm_gpu_scheduler *sched;
923 	int ret, i;
924 
925 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
926 		sched = &js->queue[i].sched;
927 		ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
928 					    DRM_SCHED_PRIORITY_NORMAL, &sched,
929 					    1, NULL);
930 		if (WARN_ON(ret))
931 			return ret;
932 	}
933 	return 0;
934 }
935 
panfrost_job_close(struct panfrost_file_priv * panfrost_priv)936 void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
937 {
938 	struct panfrost_device *pfdev = panfrost_priv->pfdev;
939 	int i;
940 
941 	for (i = 0; i < NUM_JOB_SLOTS; i++)
942 		drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
943 
944 	/* Kill in-flight jobs */
945 	spin_lock(&pfdev->js->job_lock);
946 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
947 		struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
948 		int j;
949 
950 		for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
951 			struct panfrost_job *job = pfdev->jobs[i][j];
952 			u32 cmd;
953 
954 			if (!job || job->base.entity != entity)
955 				continue;
956 
957 			if (j == 1) {
958 				/* Try to cancel the job before it starts */
959 				job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
960 				/* Reset the job head so it doesn't get restarted if
961 				 * the job in the first slot failed.
962 				 */
963 				job->jc = 0;
964 			}
965 
966 			if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
967 				cmd = panfrost_get_job_chain_flag(job) ?
968 				      JS_COMMAND_HARD_STOP_1 :
969 				      JS_COMMAND_HARD_STOP_0;
970 			} else {
971 				cmd = JS_COMMAND_HARD_STOP;
972 			}
973 
974 			job_write(pfdev, JS_COMMAND(i), cmd);
975 
976 			/* Jobs can outlive their file context */
977 			job->engine_usage = NULL;
978 		}
979 	}
980 	spin_unlock(&pfdev->js->job_lock);
981 }
982 
panfrost_job_is_idle(struct panfrost_device * pfdev)983 int panfrost_job_is_idle(struct panfrost_device *pfdev)
984 {
985 	struct panfrost_job_slot *js = pfdev->js;
986 	int i;
987 
988 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
989 		/* If there are any jobs in the HW queue, we're not idle */
990 		if (atomic_read(&js->queue[i].sched.credit_count))
991 			return false;
992 	}
993 
994 	return true;
995 }
996