xref: /linux/drivers/gpu/drm/imagination/pvr_queue.c (revision c06b6cde2a1c3bcbb561bd57bb6f34eae9030921)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
3 
4 #include <drm/drm_managed.h>
5 #include <drm/gpu_scheduler.h>
6 
7 #include "pvr_cccb.h"
8 #include "pvr_context.h"
9 #include "pvr_device.h"
10 #include "pvr_drv.h"
11 #include "pvr_job.h"
12 #include "pvr_queue.h"
13 #include "pvr_vm.h"
14 
15 #include "pvr_rogue_fwif_client.h"
16 
17 #define MAX_DEADLINE_MS 30000
18 
19 #define CTX_COMPUTE_CCCB_SIZE_LOG2 15
20 #define CTX_FRAG_CCCB_SIZE_LOG2 15
21 #define CTX_GEOM_CCCB_SIZE_LOG2 15
22 #define CTX_TRANSFER_CCCB_SIZE_LOG2 15
23 
24 static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev)
25 {
26 	u32 num_isp_store_registers;
27 
28 	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
29 		num_isp_store_registers = 1;
30 	} else {
31 		int err;
32 
33 		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
34 		if (WARN_ON(err))
35 			return err;
36 	}
37 
38 	return sizeof(struct rogue_fwif_frag_ctx_state) +
39 	       (num_isp_store_registers *
40 		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
41 }
42 
43 static int get_frag_ctx_state_size(struct pvr_device *pvr_dev)
44 {
45 	u32 num_isp_store_registers;
46 	int err;
47 
48 	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
49 		err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers);
50 		if (WARN_ON(err))
51 			return err;
52 
53 		if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) {
54 			u32 xpu_max_slaves;
55 
56 			err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves);
57 			if (WARN_ON(err))
58 				return err;
59 
60 			num_isp_store_registers *= (1 + xpu_max_slaves);
61 		}
62 	} else {
63 		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
64 		if (WARN_ON(err))
65 			return err;
66 	}
67 
68 	return sizeof(struct rogue_fwif_frag_ctx_state) +
69 	       (num_isp_store_registers *
70 		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
71 }
72 
73 static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type)
74 {
75 	switch (type) {
76 	case DRM_PVR_JOB_TYPE_GEOMETRY:
77 		return sizeof(struct rogue_fwif_geom_ctx_state);
78 	case DRM_PVR_JOB_TYPE_FRAGMENT:
79 		return get_frag_ctx_state_size(pvr_dev);
80 	case DRM_PVR_JOB_TYPE_COMPUTE:
81 		return sizeof(struct rogue_fwif_compute_ctx_state);
82 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
83 		return get_xfer_ctx_state_size(pvr_dev);
84 	}
85 
86 	WARN(1, "Invalid queue type");
87 	return -EINVAL;
88 }
89 
90 static u32 get_ctx_offset(enum drm_pvr_job_type type)
91 {
92 	switch (type) {
93 	case DRM_PVR_JOB_TYPE_GEOMETRY:
94 		return offsetof(struct rogue_fwif_fwrendercontext, geom_context);
95 	case DRM_PVR_JOB_TYPE_FRAGMENT:
96 		return offsetof(struct rogue_fwif_fwrendercontext, frag_context);
97 	case DRM_PVR_JOB_TYPE_COMPUTE:
98 		return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context);
99 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
100 		return offsetof(struct rogue_fwif_fwtransfercontext, tq_context);
101 	}
102 
103 	return 0;
104 }
105 
106 static const char *
107 pvr_queue_fence_get_driver_name(struct dma_fence *f)
108 {
109 	return PVR_DRIVER_NAME;
110 }
111 
112 static void pvr_queue_fence_release_work(struct work_struct *w)
113 {
114 	struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work);
115 
116 	pvr_context_put(fence->queue->ctx);
117 	dma_fence_free(&fence->base);
118 }
119 
120 static void pvr_queue_fence_release(struct dma_fence *f)
121 {
122 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
123 	struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev;
124 
125 	queue_work(pvr_dev->sched_wq, &fence->release_work);
126 }
127 
128 static const char *
129 pvr_queue_job_fence_get_timeline_name(struct dma_fence *f)
130 {
131 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
132 
133 	switch (fence->queue->type) {
134 	case DRM_PVR_JOB_TYPE_GEOMETRY:
135 		return "geometry";
136 
137 	case DRM_PVR_JOB_TYPE_FRAGMENT:
138 		return "fragment";
139 
140 	case DRM_PVR_JOB_TYPE_COMPUTE:
141 		return "compute";
142 
143 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
144 		return "transfer";
145 	}
146 
147 	WARN(1, "Invalid queue type");
148 	return "invalid";
149 }
150 
151 static const char *
152 pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f)
153 {
154 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
155 
156 	switch (fence->queue->type) {
157 	case DRM_PVR_JOB_TYPE_GEOMETRY:
158 		return "geometry-cccb";
159 
160 	case DRM_PVR_JOB_TYPE_FRAGMENT:
161 		return "fragment-cccb";
162 
163 	case DRM_PVR_JOB_TYPE_COMPUTE:
164 		return "compute-cccb";
165 
166 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
167 		return "transfer-cccb";
168 	}
169 
170 	WARN(1, "Invalid queue type");
171 	return "invalid";
172 }
173 
174 static const struct dma_fence_ops pvr_queue_job_fence_ops = {
175 	.get_driver_name = pvr_queue_fence_get_driver_name,
176 	.get_timeline_name = pvr_queue_job_fence_get_timeline_name,
177 	.release = pvr_queue_fence_release,
178 };
179 
180 /**
181  * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO.
182  * @f: The dma_fence to check.
183  *
184  * Return:
185  * * true if the dma_fence is backed by a UFO, or
186  * * false otherwise.
187  */
188 static inline bool
189 pvr_queue_fence_is_ufo_backed(struct dma_fence *f)
190 {
191 	/*
192 	 * Currently the only dma_fence backed by a UFO object is the job fence,
193 	 * e.g. pvr_job::done_fence, wrapped by a pvr_queue_fence object.
194 	 */
195 	return f && f->ops == &pvr_queue_job_fence_ops;
196 }
197 
198 /**
199  * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is
200  * already backed by a UFO.
201  * @f: The dma_fence to turn into a pvr_queue_fence.
202  *
203  * This could be called on:
204  * - a job fence directly, in which case it simply returns the containing pvr_queue_fence;
205  * - a drm_sched_fence's scheduled or finished fence, in which case it will first try to follow
206  *   the parent pointer to find the job fence (note that the parent pointer is initialized
207  *   only after the run_job() callback is called on the drm_sched_fence's owning job);
208  * - any other dma_fence, in which case it will return NULL.
209  *
210  * Return:
211  *  * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or
212  *  * NULL otherwise.
213  */
214 static struct pvr_queue_fence *
215 to_pvr_queue_job_fence(struct dma_fence *f)
216 {
217 	struct drm_sched_fence *sched_fence = to_drm_sched_fence(f);
218 
219 	if (sched_fence)
220 		f = sched_fence->parent;
221 
222 	if (pvr_queue_fence_is_ufo_backed(f))
223 		return container_of(f, struct pvr_queue_fence, base);
224 
225 	return NULL;
226 }
227 
228 static const struct dma_fence_ops pvr_queue_cccb_fence_ops = {
229 	.get_driver_name = pvr_queue_fence_get_driver_name,
230 	.get_timeline_name = pvr_queue_cccb_fence_get_timeline_name,
231 	.release = pvr_queue_fence_release,
232 };
233 
234 /**
235  * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects.
236  * @f: The dma_fence object to put.
237  *
238  * If the pvr_queue_fence has been initialized, we call dma_fence_put(),
239  * otherwise we free the object with dma_fence_free(). This allows us
240  * to do the right thing before and after pvr_queue_fence_init() had been
241  * called.
242  */
243 static void pvr_queue_fence_put(struct dma_fence *f)
244 {
245 	if (!f)
246 		return;
247 
248 	if (WARN_ON(f->ops &&
249 		    f->ops != &pvr_queue_cccb_fence_ops &&
250 		    f->ops != &pvr_queue_job_fence_ops))
251 		return;
252 
253 	/* If the fence hasn't been initialized yet, free the object directly. */
254 	if (f->ops)
255 		dma_fence_put(f);
256 	else
257 		dma_fence_free(f);
258 }
259 
260 /**
261  * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object
262  *
263  * Call this function to allocate job CCCB and done fences. This only
264  * allocates the objects. Initialization happens when the underlying
265  * dma_fence object is to be returned to drm_sched (in prepare_job() or
266  * run_job()).
267  *
268  * Return:
269  *  * A valid pointer if the allocation succeeds, or
270  *  * NULL if the allocation fails.
271  */
272 static struct dma_fence *
273 pvr_queue_fence_alloc(void)
274 {
275 	struct pvr_queue_fence *fence;
276 
277 	fence = kzalloc_obj(*fence);
278 	if (!fence)
279 		return NULL;
280 
281 	return &fence->base;
282 }
283 
284 /**
285  * pvr_queue_fence_init() - Initializes a pvr_queue_fence object.
286  * @f: The fence to initialize
287  * @queue: The queue this fence belongs to.
288  * @fence_ops: The fence operations.
289  * @fence_ctx: The fence context.
290  *
291  * Wrapper around dma_fence_init() that takes care of initializing the
292  * pvr_queue_fence::queue field too.
293  */
294 static void
295 pvr_queue_fence_init(struct dma_fence *f,
296 		     struct pvr_queue *queue,
297 		     const struct dma_fence_ops *fence_ops,
298 		     struct pvr_queue_fence_ctx *fence_ctx)
299 {
300 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
301 
302 	pvr_context_get(queue->ctx);
303 	fence->queue = queue;
304 	INIT_WORK(&fence->release_work, pvr_queue_fence_release_work);
305 	dma_fence_init(&fence->base, fence_ops,
306 		       &fence_ctx->lock, fence_ctx->id,
307 		       atomic_inc_return(&fence_ctx->seqno));
308 }
309 
310 /**
311  * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object.
312  * @fence: The fence to initialize.
313  * @queue: The queue this fence belongs to.
314  *
315  * Initializes a fence that can be used to wait for CCCB space.
316  *
317  * Should be called in the ::prepare_job() path, so the fence returned to
318  * drm_sched is valid.
319  */
320 static void
321 pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
322 {
323 	pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops,
324 			     &queue->cccb_fence_ctx.base);
325 }
326 
327 /**
328  * pvr_queue_job_fence_init() - Initializes a job done fence object.
329  * @fence: The fence to initialize.
330  * @queue: The queue this fence belongs to.
331  *
332  * Initializes a fence that will be signaled when the GPU is done executing
333  * a job.
334  *
335  * Should be called *before* the ::run_job() path, so the fence is initialised
336  * before being placed in the pending_list.
337  */
338 static void
339 pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
340 {
341 	if (!fence->ops)
342 		pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops,
343 				     &queue->job_fence_ctx);
344 }
345 
346 /**
347  * pvr_queue_fence_ctx_init() - Queue fence context initialization.
348  * @fence_ctx: The context to initialize
349  */
350 static void
351 pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx)
352 {
353 	spin_lock_init(&fence_ctx->lock);
354 	fence_ctx->id = dma_fence_context_alloc(1);
355 	atomic_set(&fence_ctx->seqno, 0);
356 }
357 
358 static u32 ufo_cmds_size(u32 elem_count)
359 {
360 	/* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */
361 	u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS;
362 	u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS;
363 	u32 size = full_cmd_count *
364 		   pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS *
365 						     sizeof(struct rogue_fwif_ufo));
366 
367 	if (remaining_elems) {
368 		size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems *
369 							  sizeof(struct rogue_fwif_ufo));
370 	}
371 
372 	return size;
373 }
374 
375 static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count)
376 {
377 	/*
378 	 * One UFO command per native fence this job will be waiting on (unless any are
379 	 * signaled by the time the job is submitted), plus a command for the job itself,
380 	 * plus one UFO command for the fence signaling.
381 	 */
382 	return ufo_cmds_size(ufo_wait_count) +
383 	       pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len) +
384 	       ufo_cmds_size(1);
385 }
386 
387 static bool
388 is_paired_job_fence(struct dma_fence *fence, struct pvr_job *job)
389 {
390 	/* This assumes "fence" is one of "job"'s drm_sched_job::dependencies */
391 	return job->type == DRM_PVR_JOB_TYPE_FRAGMENT &&
392 	       job->paired_job &&
393 	       &job->paired_job->base.s_fence->scheduled == fence;
394 }
395 
396 /**
397  * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies.
398  * @job: Job to operate on.
399  *
400  * Returns: Number of non-signaled native deps remaining.
401  */
402 static unsigned long job_count_remaining_native_deps(struct pvr_job *job)
403 {
404 	unsigned long remaining_count = 0;
405 	struct dma_fence *fence = NULL;
406 	unsigned long index;
407 
408 	xa_for_each(&job->base.dependencies, index, fence) {
409 		struct pvr_queue_fence *jfence;
410 
411 		if (is_paired_job_fence(fence, job)) {
412 			/*
413 			 * A fence between paired jobs won't resolve to a pvr_queue_fence (i.e.
414 			 * be backed by a UFO) until the jobs have been submitted, together.
415 			 * The submitting code will insert a partial render fence command for this.
416 			 */
417 			WARN_ON(dma_fence_is_signaled(fence));
418 			remaining_count++;
419 			continue;
420 		}
421 
422 		jfence = to_pvr_queue_job_fence(fence);
423 		if (!jfence)
424 			continue;
425 
426 		if (!dma_fence_is_signaled(&jfence->base))
427 			remaining_count++;
428 	}
429 
430 	return remaining_count;
431 }
432 
433 /**
434  * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job.
435  * @queue: The queue this job will be submitted to.
436  * @job: The job to get the CCCB fence on.
437  *
438  * The CCCB fence is a synchronization primitive allowing us to delay job
439  * submission until there's enough space in the CCCB to submit the job.
440  *
441  * Return:
442  *  * NULL if there's enough space in the CCCB to submit this job, or
443  *  * A valid dma_fence object otherwise.
444  */
445 static struct dma_fence *
446 pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job)
447 {
448 	struct pvr_queue_fence *cccb_fence;
449 	unsigned int native_deps_remaining;
450 
451 	/* If the fence is NULL, that means we already checked that we had
452 	 * enough space in the cccb for our job.
453 	 */
454 	if (!job->cccb_fence)
455 		return NULL;
456 
457 	mutex_lock(&queue->cccb_fence_ctx.job_lock);
458 
459 	/* Count remaining native dependencies and check if the job fits in the CCCB. */
460 	native_deps_remaining = job_count_remaining_native_deps(job);
461 	if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
462 		pvr_queue_fence_put(job->cccb_fence);
463 		job->cccb_fence = NULL;
464 		goto out_unlock;
465 	}
466 
467 	/* There should be no job attached to the CCCB fence context:
468 	 * drm_sched_entity guarantees that jobs are submitted one at a time.
469 	 */
470 	if (WARN_ON(queue->cccb_fence_ctx.job))
471 		pvr_job_put(queue->cccb_fence_ctx.job);
472 
473 	queue->cccb_fence_ctx.job = pvr_job_get(job);
474 
475 	/* Initialize the fence before returning it. */
476 	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
477 	if (!WARN_ON(cccb_fence->queue))
478 		pvr_queue_cccb_fence_init(job->cccb_fence, queue);
479 
480 out_unlock:
481 	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
482 
483 	return dma_fence_get(job->cccb_fence);
484 }
485 
486 /**
487  * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job.
488  * @queue: The queue this job will be submitted to.
489  * @job: The job to get the KCCB fence on.
490  *
491  * The KCCB fence is a synchronization primitive allowing us to delay job
492  * submission until there's enough space in the KCCB to submit the job.
493  *
494  * Return:
495  *  * NULL if there's enough space in the KCCB to submit this job, or
496  *  * A valid dma_fence object otherwise.
497  */
498 static struct dma_fence *
499 pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job)
500 {
501 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
502 	struct dma_fence *kccb_fence = NULL;
503 
504 	/* If the fence is NULL, that means we already checked that we had
505 	 * enough space in the KCCB for our job.
506 	 */
507 	if (!job->kccb_fence)
508 		return NULL;
509 
510 	if (!WARN_ON(job->kccb_fence->ops)) {
511 		kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence);
512 		job->kccb_fence = NULL;
513 	}
514 
515 	return kccb_fence;
516 }
517 
518 static struct dma_fence *
519 pvr_queue_get_paired_frag_job_dep(struct pvr_job *job)
520 {
521 	struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ?
522 				   job->paired_job : NULL;
523 	struct pvr_queue *frag_queue = frag_job ? frag_job->ctx->queues.fragment : NULL;
524 	struct dma_fence *f;
525 	unsigned long index;
526 
527 	if (!frag_job)
528 		return NULL;
529 
530 	/* Have the geometry job wait on the paired fragment job's dependencies as well. */
531 	xa_for_each(&frag_job->base.dependencies, index, f) {
532 		/* Skip already signaled fences. */
533 		if (dma_fence_is_signaled(f))
534 			continue;
535 
536 		/*
537 		 * The paired job fence won't be signaled until both jobs have
538 		 * been submitted, so we can't wait on it to schedule them.
539 		 */
540 		if (f == &job->base.s_fence->scheduled)
541 			continue;
542 
543 		return dma_fence_get(f);
544 	}
545 
546 	/* Initialize the paired fragment job's done_fence, so we can signal it. */
547 	pvr_queue_job_fence_init(frag_job->done_fence, frag_queue);
548 
549 	return pvr_queue_get_job_cccb_fence(frag_queue, frag_job);
550 }
551 
552 /**
553  * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence.
554  * @sched_job: The job to query the next internal dependency on
555  * @s_entity: The entity this job is queue on.
556  *
557  * After iterating over drm_sched_job::dependencies, drm_sched let the driver return
558  * its own internal dependencies. We use this function to return our internal dependencies.
559  */
560 static struct dma_fence *
561 pvr_queue_prepare_job(struct drm_sched_job *sched_job,
562 		      struct drm_sched_entity *s_entity)
563 {
564 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
565 	struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity);
566 	struct dma_fence *internal_dep = NULL;
567 
568 	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
569 		/*
570 		 * This will be called on a paired fragment job after being submitted
571 		 * to the firmware as part of the paired geometry job's submission.
572 		 * We can tell if this is the case and bail early from whether run_job()
573 		 * has been called on the geometry job, which would issue a pm ref on
574 		 * this job as well.
575 		 */
576 		if (job->has_pm_ref)
577 			return NULL;
578 	}
579 
580 	/*
581 	 * Initialize the done_fence, so we can signal it. This must be done
582 	 * here because otherwise by the time of run_job() the job will end up
583 	 * in the pending list without a valid fence.
584 	 */
585 	pvr_queue_job_fence_init(job->done_fence, queue);
586 
587 	/* CCCB fence is used to make sure we have enough space in the CCCB to
588 	 * submit our commands.
589 	 */
590 	internal_dep = pvr_queue_get_job_cccb_fence(queue, job);
591 
592 	/* KCCB fence is used to make sure we have a KCCB slot to queue our
593 	 * CMD_KICK.
594 	 */
595 	if (!internal_dep)
596 		internal_dep = pvr_queue_get_job_kccb_fence(queue, job);
597 
598 	/* Any extra internal dependency should be added here, using the following
599 	 * pattern:
600 	 *
601 	 *	if (!internal_dep)
602 	 *		internal_dep = pvr_queue_get_job_xxxx_fence(queue, job);
603 	 */
604 
605 	/* The paired job fence should come last, when everything else is ready. */
606 	if (!internal_dep)
607 		internal_dep = pvr_queue_get_paired_frag_job_dep(job);
608 
609 	return internal_dep;
610 }
611 
612 /**
613  * pvr_queue_update_active_state_locked() - Update the queue active state.
614  * @queue: Queue to update the state on.
615  *
616  * Locked version of pvr_queue_update_active_state(). Must be called with
617  * pvr_device::queue::lock held.
618  */
619 static void pvr_queue_update_active_state_locked(struct pvr_queue *queue)
620 {
621 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
622 
623 	lockdep_assert_held(&pvr_dev->queues.lock);
624 
625 	/* The queue is temporary out of any list when it's being reset,
626 	 * we don't want a call to pvr_queue_update_active_state_locked()
627 	 * to re-insert it behind our back.
628 	 */
629 	if (list_empty(&queue->node))
630 		return;
631 
632 	if (!atomic_read(&queue->in_flight_job_count))
633 		list_move_tail(&queue->node, &pvr_dev->queues.idle);
634 	else
635 		list_move_tail(&queue->node, &pvr_dev->queues.active);
636 }
637 
638 /**
639  * pvr_queue_update_active_state() - Update the queue active state.
640  * @queue: Queue to update the state on.
641  *
642  * Active state is based on the in_flight_job_count value.
643  *
644  * Updating the active state implies moving the queue in or out of the
645  * active queue list, which also defines whether the queue is checked
646  * or not when a FW event is received.
647  *
648  * This function should be called any time a job is submitted or it done
649  * fence is signaled.
650  */
651 static void pvr_queue_update_active_state(struct pvr_queue *queue)
652 {
653 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
654 
655 	mutex_lock(&pvr_dev->queues.lock);
656 	pvr_queue_update_active_state_locked(queue);
657 	mutex_unlock(&pvr_dev->queues.lock);
658 }
659 
660 static void pvr_queue_submit_job_to_cccb(struct pvr_job *job)
661 {
662 	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
663 	struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS];
664 	struct pvr_cccb *cccb = &queue->cccb;
665 	struct pvr_queue_fence *jfence;
666 	struct dma_fence *fence;
667 	unsigned long index;
668 	u32 ufo_count = 0;
669 
670 	/* We need to add the queue to the active list before updating the CCCB,
671 	 * otherwise we might miss the FW event informing us that something
672 	 * happened on this queue.
673 	 */
674 	atomic_inc(&queue->in_flight_job_count);
675 	pvr_queue_update_active_state(queue);
676 
677 	xa_for_each(&job->base.dependencies, index, fence) {
678 		jfence = to_pvr_queue_job_fence(fence);
679 		if (!jfence)
680 			continue;
681 
682 		/* Some dependencies might have been signaled since prepare_job() */
683 		if (dma_fence_is_signaled(&jfence->base))
684 			continue;
685 
686 		pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
687 					  &ufos[ufo_count].addr);
688 		ufos[ufo_count++].value = jfence->base.seqno;
689 
690 		if (ufo_count == ARRAY_SIZE(ufos)) {
691 			pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
692 							   sizeof(ufos), ufos, 0, 0);
693 			ufo_count = 0;
694 		}
695 	}
696 
697 	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
698 		/*
699 		 * The loop above will only process dependencies backed by a UFO i.e. with
700 		 * a valid parent fence assigned, but the paired job dependency won't have
701 		 * one until both jobs have been submitted. Access the parent fence directly
702 		 * here instead, submitting it last as partial render fence.
703 		 */
704 		jfence = to_pvr_queue_job_fence(job->paired_job->done_fence);
705 		if (!WARN_ON(!jfence)) {
706 			pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
707 						  &ufos[ufo_count].addr);
708 			ufos[ufo_count++].value = job->paired_job->done_fence->seqno;
709 		}
710 	}
711 
712 	if (ufo_count) {
713 		pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
714 						   sizeof(ufos[0]) * ufo_count, ufos, 0, 0);
715 	}
716 
717 	if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) {
718 		struct rogue_fwif_cmd_geom *cmd = job->cmd;
719 
720 		/* Reference value for the partial render test is the current queue fence
721 		 * seqno minus one.
722 		 */
723 		pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj,
724 					  &cmd->partial_render_geom_frag_fence.addr);
725 		cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1;
726 	}
727 
728 	/* Submit job to FW */
729 	pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd,
730 					   job->id, job->id);
731 
732 	/* Update command to signal the job fence. */
733 	pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr);
734 	ufos[0].value = job->done_fence->seqno;
735 	pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE,
736 					   sizeof(ufos[0]), ufos, 0, 0);
737 }
738 
739 /**
740  * pvr_queue_run_job() - Submit a job to the FW.
741  * @sched_job: The job to submit.
742  *
743  * This function is called when all non-native dependencies have been met and
744  * when the commands resulting from this job are guaranteed to fit in the CCCB.
745  */
746 static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job)
747 {
748 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
749 	struct pvr_device *pvr_dev = job->pvr_dev;
750 	int err;
751 
752 	/* The fragment job is issued along the geometry job when we use combined
753 	 * geom+frag kicks. When we get there, we should simply return the
754 	 * done_fence that's been initialized earlier.
755 	 */
756 	if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT &&
757 	    job->done_fence->ops) {
758 		return dma_fence_get(job->done_fence);
759 	}
760 
761 	/* The only kind of jobs that can be paired are geometry and fragment, and
762 	 * we bail out early if we see a fragment job that's paired with a geometry job.
763 	 * Paired jobs must also target the same context and point to the same HWRT.
764 	 */
765 	if (WARN_ON(job->paired_job &&
766 		    (job->type != DRM_PVR_JOB_TYPE_GEOMETRY ||
767 		     job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT ||
768 		     job->hwrt != job->paired_job->hwrt ||
769 		     job->ctx != job->paired_job->ctx)))
770 		return ERR_PTR(-EINVAL);
771 
772 	err = pvr_job_get_pm_ref(job);
773 	if (WARN_ON(err))
774 		return ERR_PTR(err);
775 
776 	if (job->paired_job) {
777 		err = pvr_job_get_pm_ref(job->paired_job);
778 		if (WARN_ON(err))
779 			return ERR_PTR(err);
780 	}
781 
782 	/* Submit our job to the CCCB */
783 	pvr_queue_submit_job_to_cccb(job);
784 
785 	if (job->paired_job) {
786 		struct pvr_job *geom_job = job;
787 		struct pvr_job *frag_job = job->paired_job;
788 		struct pvr_queue *geom_queue = job->ctx->queues.geometry;
789 		struct pvr_queue *frag_queue = job->ctx->queues.fragment;
790 
791 		/* Submit the fragment job along the geometry job and send a combined kick. */
792 		pvr_queue_submit_job_to_cccb(frag_job);
793 		pvr_cccb_send_kccb_combined_kick(pvr_dev,
794 						 &geom_queue->cccb, &frag_queue->cccb,
795 						 pvr_context_get_fw_addr(geom_job->ctx) +
796 						 geom_queue->ctx_offset,
797 						 pvr_context_get_fw_addr(frag_job->ctx) +
798 						 frag_queue->ctx_offset,
799 						 job->hwrt,
800 						 frag_job->fw_ccb_cmd_type ==
801 						 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR);
802 	} else {
803 		struct pvr_queue *queue = container_of(job->base.sched,
804 						       struct pvr_queue, scheduler);
805 
806 		pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb,
807 					pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset,
808 					job->hwrt);
809 	}
810 
811 	return dma_fence_get(job->done_fence);
812 }
813 
814 static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job)
815 {
816 	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
817 }
818 
819 static void pvr_queue_start(struct pvr_queue *queue)
820 {
821 	struct pvr_job *job;
822 
823 	/* Make sure we CPU-signal the UFO object, so other queues don't get
824 	 * blocked waiting on it.
825 	 */
826 	*queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno);
827 
828 	list_for_each_entry(job, &queue->scheduler.pending_list, base.list) {
829 		if (dma_fence_is_signaled(job->done_fence)) {
830 			/* Jobs might have completed after drm_sched_stop() was called.
831 			 * In that case, re-assign the parent field to the done_fence.
832 			 */
833 			WARN_ON(job->base.s_fence->parent);
834 			job->base.s_fence->parent = dma_fence_get(job->done_fence);
835 		} else {
836 			/* If we had unfinished jobs, flag the entity as guilty so no
837 			 * new job can be submitted.
838 			 */
839 			atomic_set(&queue->ctx->faulty, 1);
840 		}
841 	}
842 
843 	drm_sched_start(&queue->scheduler, 0);
844 }
845 
846 /**
847  * pvr_queue_timedout_job() - Handle a job timeout event.
848  * @s_job: The job this timeout occurred on.
849  *
850  * FIXME: We don't do anything here to unblock the situation, we just stop+start
851  * the scheduler, and re-assign parent fences in the middle.
852  *
853  * Return:
854  *  * DRM_GPU_SCHED_STAT_RESET.
855  */
856 static enum drm_gpu_sched_stat
857 pvr_queue_timedout_job(struct drm_sched_job *s_job)
858 {
859 	struct drm_gpu_scheduler *sched = s_job->sched;
860 	struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler);
861 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
862 	struct pvr_job *job;
863 	u32 job_count = 0;
864 
865 	dev_err(sched->dev, "Job timeout\n");
866 
867 	/* Before we stop the scheduler, make sure the queue is out of any list, so
868 	 * any call to pvr_queue_update_active_state_locked() that might happen
869 	 * until the scheduler is really stopped doesn't end up re-inserting the
870 	 * queue in the active list. This would cause
871 	 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each
872 	 * other when accessing the pending_list, since drm_sched_stop() doesn't
873 	 * grab the job_list_lock when modifying the list (it's assuming the
874 	 * only other accessor is the scheduler, and it's safe to not grab the
875 	 * lock since it's stopped).
876 	 */
877 	mutex_lock(&pvr_dev->queues.lock);
878 	list_del_init(&queue->node);
879 	mutex_unlock(&pvr_dev->queues.lock);
880 
881 	drm_sched_stop(sched, s_job);
882 
883 	/* Re-assign job parent fences. */
884 	list_for_each_entry(job, &sched->pending_list, base.list) {
885 		job->base.s_fence->parent = dma_fence_get(job->done_fence);
886 		job_count++;
887 	}
888 	WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count);
889 
890 	/* Re-insert the queue in the proper list, and kick a queue processing
891 	 * operation if there were jobs pending.
892 	 */
893 	mutex_lock(&pvr_dev->queues.lock);
894 	if (!job_count) {
895 		list_move_tail(&queue->node, &pvr_dev->queues.idle);
896 	} else {
897 		atomic_set(&queue->in_flight_job_count, job_count);
898 		list_move_tail(&queue->node, &pvr_dev->queues.active);
899 		pvr_queue_process(queue);
900 	}
901 	mutex_unlock(&pvr_dev->queues.lock);
902 
903 	drm_sched_start(sched, 0);
904 
905 	return DRM_GPU_SCHED_STAT_RESET;
906 }
907 
908 /**
909  * pvr_queue_free_job() - Release the reference the scheduler had on a job object.
910  * @sched_job: Job object to free.
911  */
912 static void pvr_queue_free_job(struct drm_sched_job *sched_job)
913 {
914 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
915 
916 	drm_sched_job_cleanup(sched_job);
917 
918 	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job)
919 		pvr_job_put(job->paired_job);
920 
921 	job->paired_job = NULL;
922 	pvr_job_put(job);
923 }
924 
925 static const struct drm_sched_backend_ops pvr_queue_sched_ops = {
926 	.prepare_job = pvr_queue_prepare_job,
927 	.run_job = pvr_queue_run_job,
928 	.timedout_job = pvr_queue_timedout_job,
929 	.free_job = pvr_queue_free_job,
930 };
931 
932 /**
933  * pvr_queue_fence_is_native() - Check if a dma_fence is native to this driver.
934  * @f: Fence to test.
935  *
936  * Check if the fence we're being passed is a drm_sched_fence that is coming from this driver.
937  *
938  * It may be a UFO-backed fence i.e. a fence that can be signaled or waited upon FW-side,
939  * such as pvr_job::done_fence objects that are backed by the timeline UFO attached to the queue
940  * they are pushed to.
941  */
942 bool pvr_queue_fence_is_native(struct dma_fence *f)
943 {
944 	struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL;
945 
946 	if (sched_fence &&
947 	    sched_fence->sched->ops == &pvr_queue_sched_ops)
948 		return true;
949 
950 	return pvr_queue_fence_is_ufo_backed(f);
951 }
952 
953 /**
954  * pvr_queue_signal_done_fences() - Signal done fences.
955  * @queue: Queue to check.
956  *
957  * Signal done fences of jobs whose seqno is less than the current value of
958  * the UFO object attached to the queue.
959  */
960 static void
961 pvr_queue_signal_done_fences(struct pvr_queue *queue)
962 {
963 	struct pvr_job *job, *tmp_job;
964 	u32 cur_seqno;
965 
966 	spin_lock(&queue->scheduler.job_list_lock);
967 	cur_seqno = *queue->timeline_ufo.value;
968 	list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) {
969 		if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0)
970 			break;
971 
972 		if (!dma_fence_is_signaled(job->done_fence)) {
973 			dma_fence_signal(job->done_fence);
974 			pvr_job_release_pm_ref(job);
975 			atomic_dec(&queue->in_flight_job_count);
976 		}
977 	}
978 	spin_unlock(&queue->scheduler.job_list_lock);
979 }
980 
981 /**
982  * pvr_queue_check_job_waiting_for_cccb_space() - Check if a job waiting for CCCB space
983  * can be unblocked and pushed to the CCCB.
984  * @queue: Queue to check
985  *
986  * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal
987  * its CCCB fence, which should kick drm_sched.
988  */
989 static void
990 pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue)
991 {
992 	struct pvr_queue_fence *cccb_fence;
993 	u32 native_deps_remaining;
994 	struct pvr_job *job;
995 
996 	mutex_lock(&queue->cccb_fence_ctx.job_lock);
997 	job = queue->cccb_fence_ctx.job;
998 	if (!job)
999 		goto out_unlock;
1000 
1001 	/* If we have a job attached to the CCCB fence context, its CCCB fence
1002 	 * shouldn't be NULL.
1003 	 */
1004 	if (WARN_ON(!job->cccb_fence)) {
1005 		job = NULL;
1006 		goto out_unlock;
1007 	}
1008 
1009 	/* If we get there, CCCB fence has to be initialized. */
1010 	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
1011 	if (WARN_ON(!cccb_fence->queue)) {
1012 		job = NULL;
1013 		goto out_unlock;
1014 	}
1015 
1016 	/* Evict signaled dependencies before checking for CCCB space.
1017 	 * If the job fits, signal the CCCB fence, this should unblock
1018 	 * the drm_sched_entity.
1019 	 */
1020 	native_deps_remaining = job_count_remaining_native_deps(job);
1021 	if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
1022 		job = NULL;
1023 		goto out_unlock;
1024 	}
1025 
1026 	dma_fence_signal(job->cccb_fence);
1027 	pvr_queue_fence_put(job->cccb_fence);
1028 	job->cccb_fence = NULL;
1029 	queue->cccb_fence_ctx.job = NULL;
1030 
1031 out_unlock:
1032 	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
1033 
1034 	pvr_job_put(job);
1035 }
1036 
1037 /**
1038  * pvr_queue_process() - Process events that happened on a queue.
1039  * @queue: Queue to check
1040  *
1041  * Signal job fences and check if jobs waiting for CCCB space can be unblocked.
1042  */
1043 void pvr_queue_process(struct pvr_queue *queue)
1044 {
1045 	lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock);
1046 
1047 	pvr_queue_check_job_waiting_for_cccb_space(queue);
1048 	pvr_queue_signal_done_fences(queue);
1049 	pvr_queue_update_active_state_locked(queue);
1050 }
1051 
1052 static u32 get_dm_type(struct pvr_queue *queue)
1053 {
1054 	switch (queue->type) {
1055 	case DRM_PVR_JOB_TYPE_GEOMETRY:
1056 		return PVR_FWIF_DM_GEOM;
1057 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
1058 	case DRM_PVR_JOB_TYPE_FRAGMENT:
1059 		return PVR_FWIF_DM_FRAG;
1060 	case DRM_PVR_JOB_TYPE_COMPUTE:
1061 		return PVR_FWIF_DM_CDM;
1062 	}
1063 
1064 	return ~0;
1065 }
1066 
1067 /**
1068  * init_fw_context() - Initializes the queue part of a FW context.
1069  * @queue: Queue object to initialize the FW context for.
1070  * @fw_ctx_map: The FW context CPU mapping.
1071  *
1072  * FW contexts are containing various states, one of them being a per-queue state
1073  * that needs to be initialized for each queue being exposed by a context. This
1074  * function takes care of that.
1075  */
1076 static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map)
1077 {
1078 	struct pvr_context *ctx = queue->ctx;
1079 	struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx);
1080 	struct rogue_fwif_fwcommoncontext *cctx_fw;
1081 	struct pvr_cccb *cccb = &queue->cccb;
1082 
1083 	cctx_fw = fw_ctx_map + queue->ctx_offset;
1084 	cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr;
1085 	cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr;
1086 
1087 	cctx_fw->dm = get_dm_type(queue);
1088 	cctx_fw->priority = ctx->priority;
1089 	cctx_fw->priority_seq_num = 0;
1090 	cctx_fw->max_deadline_ms = MAX_DEADLINE_MS;
1091 	cctx_fw->pid = task_tgid_nr(current);
1092 	cctx_fw->server_common_context_id = ctx->ctx_id;
1093 
1094 	pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr);
1095 
1096 	pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr);
1097 }
1098 
1099 /**
1100  * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up.
1101  * @queue: Queue on FW context to clean up.
1102  *
1103  * Return:
1104  *  * 0 on success,
1105  *  * Any error returned by pvr_fw_structure_cleanup() otherwise.
1106  */
1107 static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue)
1108 {
1109 	if (!queue->ctx->fw_obj)
1110 		return 0;
1111 
1112 	return pvr_fw_structure_cleanup(queue->ctx->pvr_dev,
1113 					ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT,
1114 					queue->ctx->fw_obj, queue->ctx_offset);
1115 }
1116 
1117 /**
1118  * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object.
1119  * @job: The job to initialize.
1120  * @drm_client_id: drm_file.client_id submitting the job
1121  *
1122  * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm()
1123  * and pvr_queue_job_push() can't fail. We also make sure the context type is
1124  * valid and the job can fit in the CCCB.
1125  *
1126  * Return:
1127  *  * 0 on success, or
1128  *  * An error code if something failed.
1129  */
1130 int pvr_queue_job_init(struct pvr_job *job, u64 drm_client_id)
1131 {
1132 	/* Fragment jobs need at least one native fence wait on the geometry job fence. */
1133 	u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0;
1134 	struct pvr_queue *queue;
1135 	int err;
1136 
1137 	if (atomic_read(&job->ctx->faulty))
1138 		return -EIO;
1139 
1140 	queue = pvr_context_get_queue_for_job(job->ctx, job->type);
1141 	if (!queue)
1142 		return -EINVAL;
1143 
1144 	if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count)))
1145 		return -E2BIG;
1146 
1147 	err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE, drm_client_id);
1148 	if (err)
1149 		return err;
1150 
1151 	job->cccb_fence = pvr_queue_fence_alloc();
1152 	job->kccb_fence = pvr_kccb_fence_alloc();
1153 	job->done_fence = pvr_queue_fence_alloc();
1154 	if (!job->cccb_fence || !job->kccb_fence || !job->done_fence)
1155 		return -ENOMEM;
1156 
1157 	return 0;
1158 }
1159 
1160 /**
1161  * pvr_queue_job_arm() - Arm a job object.
1162  * @job: The job to arm.
1163  *
1164  * Initializes fences and return the drm_sched finished fence so it can
1165  * be exposed to the outside world. Once this function is called, you should
1166  * make sure the job is pushed using pvr_queue_job_push(), or guarantee that
1167  * no one grabbed a reference to the returned fence. The latter can happen if
1168  * we do multi-job submission, and something failed when creating/initializing
1169  * a job. In that case, we know the fence didn't leave the driver, and we
1170  * can thus guarantee nobody will wait on an dead fence object.
1171  *
1172  * Return:
1173  *  * A dma_fence object.
1174  */
1175 struct dma_fence *pvr_queue_job_arm(struct pvr_job *job)
1176 {
1177 	drm_sched_job_arm(&job->base);
1178 
1179 	return &job->base.s_fence->finished;
1180 }
1181 
1182 /**
1183  * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object.
1184  * @job: The job to cleanup.
1185  *
1186  * Should be called in the job release path.
1187  */
1188 void pvr_queue_job_cleanup(struct pvr_job *job)
1189 {
1190 	pvr_queue_fence_put(job->done_fence);
1191 	pvr_queue_fence_put(job->cccb_fence);
1192 	pvr_kccb_fence_put(job->kccb_fence);
1193 
1194 	if (job->base.s_fence)
1195 		drm_sched_job_cleanup(&job->base);
1196 }
1197 
1198 /**
1199  * pvr_queue_job_push() - Push a job to its queue.
1200  * @job: The job to push.
1201  *
1202  * Must be called after pvr_queue_job_init() and after all dependencies
1203  * have been added to the job. This will effectively queue the job to
1204  * the drm_sched_entity attached to the queue. We grab a reference on
1205  * the job object, so the caller is free to drop its reference when it's
1206  * done accessing the job object.
1207  */
1208 void pvr_queue_job_push(struct pvr_job *job)
1209 {
1210 	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
1211 
1212 	/* Keep track of the last queued job scheduled fence for combined submit. */
1213 	dma_fence_put(queue->last_queued_job_scheduled_fence);
1214 	queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled);
1215 
1216 	pvr_job_get(job);
1217 	drm_sched_entity_push_job(&job->base);
1218 }
1219 
1220 static void reg_state_init(void *cpu_ptr, void *priv)
1221 {
1222 	struct pvr_queue *queue = priv;
1223 
1224 	if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) {
1225 		struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr;
1226 
1227 		geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init =
1228 			queue->callstack_addr;
1229 	}
1230 }
1231 
1232 /**
1233  * pvr_queue_create() - Create a queue object.
1234  * @ctx: The context this queue will be attached to.
1235  * @type: The type of jobs being pushed to this queue.
1236  * @args: The arguments passed to the context creation function.
1237  * @fw_ctx_map: CPU mapping of the FW context object.
1238  *
1239  * Create a queue object that will be used to queue and track jobs.
1240  *
1241  * Return:
1242  *  * A valid pointer to a pvr_queue object, or
1243  *  * An error pointer if the creation/initialization failed.
1244  */
1245 struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
1246 				   enum drm_pvr_job_type type,
1247 				   struct drm_pvr_ioctl_create_context_args *args,
1248 				   void *fw_ctx_map)
1249 {
1250 	static const struct {
1251 		u32 cccb_size;
1252 		const char *name;
1253 	} props[] = {
1254 		[DRM_PVR_JOB_TYPE_GEOMETRY] = {
1255 			.cccb_size = CTX_GEOM_CCCB_SIZE_LOG2,
1256 			.name = "geometry",
1257 		},
1258 		[DRM_PVR_JOB_TYPE_FRAGMENT] = {
1259 			.cccb_size = CTX_FRAG_CCCB_SIZE_LOG2,
1260 			.name = "fragment"
1261 		},
1262 		[DRM_PVR_JOB_TYPE_COMPUTE] = {
1263 			.cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2,
1264 			.name = "compute"
1265 		},
1266 		[DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = {
1267 			.cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2,
1268 			.name = "transfer_frag"
1269 		},
1270 	};
1271 	struct pvr_device *pvr_dev = ctx->pvr_dev;
1272 	const struct drm_sched_init_args sched_args = {
1273 		.ops = &pvr_queue_sched_ops,
1274 		.submit_wq = pvr_dev->sched_wq,
1275 		.credit_limit = 64 * 1024,
1276 		.hang_limit = 1,
1277 		.timeout = msecs_to_jiffies(500),
1278 		.timeout_wq = pvr_dev->sched_wq,
1279 		.name = "pvr-queue",
1280 		.dev = pvr_dev->base.dev,
1281 	};
1282 	struct drm_gpu_scheduler *sched;
1283 	struct pvr_queue *queue;
1284 	int ctx_state_size, err;
1285 	void *cpu_map;
1286 
1287 	if (WARN_ON(type >= sizeof(props)))
1288 		return ERR_PTR(-EINVAL);
1289 
1290 	switch (ctx->type) {
1291 	case DRM_PVR_CTX_TYPE_RENDER:
1292 		if (type != DRM_PVR_JOB_TYPE_GEOMETRY &&
1293 		    type != DRM_PVR_JOB_TYPE_FRAGMENT)
1294 			return ERR_PTR(-EINVAL);
1295 		break;
1296 	case DRM_PVR_CTX_TYPE_COMPUTE:
1297 		if (type != DRM_PVR_JOB_TYPE_COMPUTE)
1298 			return ERR_PTR(-EINVAL);
1299 		break;
1300 	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
1301 		if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG)
1302 			return ERR_PTR(-EINVAL);
1303 		break;
1304 	default:
1305 		return ERR_PTR(-EINVAL);
1306 	}
1307 
1308 	ctx_state_size = get_ctx_state_size(pvr_dev, type);
1309 	if (ctx_state_size < 0)
1310 		return ERR_PTR(ctx_state_size);
1311 
1312 	queue = kzalloc_obj(*queue);
1313 	if (!queue)
1314 		return ERR_PTR(-ENOMEM);
1315 
1316 	queue->type = type;
1317 	queue->ctx_offset = get_ctx_offset(type);
1318 	queue->ctx = ctx;
1319 	queue->callstack_addr = args->callstack_addr;
1320 	sched = &queue->scheduler;
1321 	INIT_LIST_HEAD(&queue->node);
1322 	mutex_init(&queue->cccb_fence_ctx.job_lock);
1323 	pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base);
1324 	pvr_queue_fence_ctx_init(&queue->job_fence_ctx);
1325 
1326 	err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name);
1327 	if (err)
1328 		goto err_free_queue;
1329 
1330 	err = pvr_fw_object_create(pvr_dev, ctx_state_size,
1331 				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1332 				   reg_state_init, queue, &queue->reg_state_obj);
1333 	if (err)
1334 		goto err_cccb_fini;
1335 
1336 	init_fw_context(queue, fw_ctx_map);
1337 
1338 	if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT &&
1339 	    args->callstack_addr) {
1340 		err = -EINVAL;
1341 		goto err_release_reg_state;
1342 	}
1343 
1344 	cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value),
1345 					       PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1346 					       NULL, NULL, &queue->timeline_ufo.fw_obj);
1347 	if (IS_ERR(cpu_map)) {
1348 		err = PTR_ERR(cpu_map);
1349 		goto err_release_reg_state;
1350 	}
1351 
1352 	queue->timeline_ufo.value = cpu_map;
1353 
1354 	err = drm_sched_init(&queue->scheduler, &sched_args);
1355 	if (err)
1356 		goto err_release_ufo;
1357 
1358 	err = drm_sched_entity_init(&queue->entity,
1359 				    DRM_SCHED_PRIORITY_KERNEL,
1360 				    &sched, 1, &ctx->faulty);
1361 	if (err)
1362 		goto err_sched_fini;
1363 
1364 	mutex_lock(&pvr_dev->queues.lock);
1365 	list_add_tail(&queue->node, &pvr_dev->queues.idle);
1366 	mutex_unlock(&pvr_dev->queues.lock);
1367 
1368 	return queue;
1369 
1370 err_sched_fini:
1371 	drm_sched_fini(&queue->scheduler);
1372 
1373 err_release_ufo:
1374 	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1375 
1376 err_release_reg_state:
1377 	pvr_fw_object_destroy(queue->reg_state_obj);
1378 
1379 err_cccb_fini:
1380 	pvr_cccb_fini(&queue->cccb);
1381 
1382 err_free_queue:
1383 	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1384 	kfree(queue);
1385 
1386 	return ERR_PTR(err);
1387 }
1388 
1389 void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev)
1390 {
1391 	struct pvr_queue *queue;
1392 
1393 	mutex_lock(&pvr_dev->queues.lock);
1394 	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1395 		pvr_queue_stop(queue, NULL);
1396 	list_for_each_entry(queue, &pvr_dev->queues.active, node)
1397 		pvr_queue_stop(queue, NULL);
1398 	mutex_unlock(&pvr_dev->queues.lock);
1399 }
1400 
1401 void pvr_queue_device_post_reset(struct pvr_device *pvr_dev)
1402 {
1403 	struct pvr_queue *queue;
1404 
1405 	mutex_lock(&pvr_dev->queues.lock);
1406 	list_for_each_entry(queue, &pvr_dev->queues.active, node)
1407 		pvr_queue_start(queue);
1408 	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1409 		pvr_queue_start(queue);
1410 	mutex_unlock(&pvr_dev->queues.lock);
1411 }
1412 
1413 /**
1414  * pvr_queue_kill() - Kill a queue.
1415  * @queue: The queue to kill.
1416  *
1417  * Kill the queue so no new jobs can be pushed. Should be called when the
1418  * context handle is destroyed. The queue object might last longer if jobs
1419  * are still in flight and holding a reference to the context this queue
1420  * belongs to.
1421  */
1422 void pvr_queue_kill(struct pvr_queue *queue)
1423 {
1424 	drm_sched_entity_destroy(&queue->entity);
1425 	dma_fence_put(queue->last_queued_job_scheduled_fence);
1426 	queue->last_queued_job_scheduled_fence = NULL;
1427 }
1428 
1429 /**
1430  * pvr_queue_destroy() - Destroy a queue.
1431  * @queue: The queue to destroy.
1432  *
1433  * Cleanup the queue and free the resources attached to it. Should be
1434  * called from the context release function.
1435  */
1436 void pvr_queue_destroy(struct pvr_queue *queue)
1437 {
1438 	if (!queue)
1439 		return;
1440 
1441 	mutex_lock(&queue->ctx->pvr_dev->queues.lock);
1442 	list_del_init(&queue->node);
1443 	mutex_unlock(&queue->ctx->pvr_dev->queues.lock);
1444 
1445 	drm_sched_fini(&queue->scheduler);
1446 	drm_sched_entity_fini(&queue->entity);
1447 
1448 	if (WARN_ON(queue->last_queued_job_scheduled_fence))
1449 		dma_fence_put(queue->last_queued_job_scheduled_fence);
1450 
1451 	pvr_queue_cleanup_fw_context(queue);
1452 
1453 	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1454 	pvr_fw_object_destroy(queue->reg_state_obj);
1455 	pvr_cccb_fini(&queue->cccb);
1456 	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1457 	kfree(queue);
1458 }
1459 
1460 /**
1461  * pvr_queue_device_init() - Device-level initialization of queue related fields.
1462  * @pvr_dev: The device to initialize.
1463  *
1464  * Initializes all fields related to queue management in pvr_device.
1465  *
1466  * Return:
1467  *  * 0 on success, or
1468  *  * An error code on failure.
1469  */
1470 int pvr_queue_device_init(struct pvr_device *pvr_dev)
1471 {
1472 	int err;
1473 
1474 	INIT_LIST_HEAD(&pvr_dev->queues.active);
1475 	INIT_LIST_HEAD(&pvr_dev->queues.idle);
1476 	err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock);
1477 	if (err)
1478 		return err;
1479 
1480 	pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0);
1481 	if (!pvr_dev->sched_wq)
1482 		return -ENOMEM;
1483 
1484 	return 0;
1485 }
1486 
1487 /**
1488  * pvr_queue_device_fini() - Device-level cleanup of queue related fields.
1489  * @pvr_dev: The device to cleanup.
1490  *
1491  * Cleanup/free all queue-related resources attached to a pvr_device object.
1492  */
1493 void pvr_queue_device_fini(struct pvr_device *pvr_dev)
1494 {
1495 	destroy_workqueue(pvr_dev->sched_wq);
1496 }
1497