xref: /linux/drivers/gpu/drm/imagination/pvr_queue.c (revision c1079aebb4de218caa86c44f9a53700d1a582683)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
3 
4 #include <drm/drm_managed.h>
5 #include <drm/gpu_scheduler.h>
6 
7 #include "pvr_cccb.h"
8 #include "pvr_context.h"
9 #include "pvr_device.h"
10 #include "pvr_drv.h"
11 #include "pvr_job.h"
12 #include "pvr_queue.h"
13 #include "pvr_trace.h"
14 #include "pvr_vm.h"
15 
16 #include "pvr_rogue_fwif_client.h"
17 
18 #define MAX_DEADLINE_MS 30000
19 
20 #define CTX_COMPUTE_CCCB_SIZE_LOG2 15
21 #define CTX_FRAG_CCCB_SIZE_LOG2 15
22 #define CTX_GEOM_CCCB_SIZE_LOG2 15
23 #define CTX_TRANSFER_CCCB_SIZE_LOG2 15
24 
25 static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev)
26 {
27 	u32 num_isp_store_registers;
28 
29 	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
30 		num_isp_store_registers = 1;
31 	} else {
32 		int err;
33 
34 		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
35 		if (WARN_ON(err))
36 			return err;
37 	}
38 
39 	return sizeof(struct rogue_fwif_frag_ctx_state) +
40 	       (num_isp_store_registers *
41 		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
42 }
43 
44 static int get_frag_ctx_state_size(struct pvr_device *pvr_dev)
45 {
46 	u32 num_isp_store_registers;
47 	int err;
48 
49 	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
50 		err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers);
51 		if (WARN_ON(err))
52 			return err;
53 
54 		if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) {
55 			u32 xpu_max_slaves;
56 
57 			err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves);
58 			if (WARN_ON(err))
59 				return err;
60 
61 			num_isp_store_registers *= (1 + xpu_max_slaves);
62 		}
63 	} else {
64 		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
65 		if (WARN_ON(err))
66 			return err;
67 	}
68 
69 	return sizeof(struct rogue_fwif_frag_ctx_state) +
70 	       (num_isp_store_registers *
71 		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
72 }
73 
74 static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type)
75 {
76 	switch (type) {
77 	case DRM_PVR_JOB_TYPE_GEOMETRY:
78 		return sizeof(struct rogue_fwif_geom_ctx_state);
79 	case DRM_PVR_JOB_TYPE_FRAGMENT:
80 		return get_frag_ctx_state_size(pvr_dev);
81 	case DRM_PVR_JOB_TYPE_COMPUTE:
82 		return sizeof(struct rogue_fwif_compute_ctx_state);
83 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
84 		return get_xfer_ctx_state_size(pvr_dev);
85 	}
86 
87 	WARN(1, "Invalid queue type");
88 	return -EINVAL;
89 }
90 
91 static u32 get_ctx_offset(enum drm_pvr_job_type type)
92 {
93 	switch (type) {
94 	case DRM_PVR_JOB_TYPE_GEOMETRY:
95 		return offsetof(struct rogue_fwif_fwrendercontext, geom_context);
96 	case DRM_PVR_JOB_TYPE_FRAGMENT:
97 		return offsetof(struct rogue_fwif_fwrendercontext, frag_context);
98 	case DRM_PVR_JOB_TYPE_COMPUTE:
99 		return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context);
100 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
101 		return offsetof(struct rogue_fwif_fwtransfercontext, tq_context);
102 	}
103 
104 	return 0;
105 }
106 
107 static const char *
108 pvr_queue_fence_get_driver_name(struct dma_fence *f)
109 {
110 	return PVR_DRIVER_NAME;
111 }
112 
113 static void pvr_queue_fence_release_work(struct work_struct *w)
114 {
115 	struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work);
116 
117 	pvr_context_put(fence->queue->ctx);
118 	dma_fence_free(&fence->base);
119 }
120 
121 static void pvr_queue_fence_release(struct dma_fence *f)
122 {
123 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
124 	struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev;
125 
126 	queue_work(pvr_dev->sched_wq, &fence->release_work);
127 }
128 
129 static const char *
130 pvr_queue_job_fence_get_timeline_name(struct dma_fence *f)
131 {
132 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
133 
134 	switch (fence->queue->type) {
135 	case DRM_PVR_JOB_TYPE_GEOMETRY:
136 		return "geometry";
137 
138 	case DRM_PVR_JOB_TYPE_FRAGMENT:
139 		return "fragment";
140 
141 	case DRM_PVR_JOB_TYPE_COMPUTE:
142 		return "compute";
143 
144 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
145 		return "transfer";
146 	}
147 
148 	WARN(1, "Invalid queue type");
149 	return "invalid";
150 }
151 
152 static const char *
153 pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f)
154 {
155 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
156 
157 	switch (fence->queue->type) {
158 	case DRM_PVR_JOB_TYPE_GEOMETRY:
159 		return "geometry-cccb";
160 
161 	case DRM_PVR_JOB_TYPE_FRAGMENT:
162 		return "fragment-cccb";
163 
164 	case DRM_PVR_JOB_TYPE_COMPUTE:
165 		return "compute-cccb";
166 
167 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
168 		return "transfer-cccb";
169 	}
170 
171 	WARN(1, "Invalid queue type");
172 	return "invalid";
173 }
174 
175 static const struct dma_fence_ops pvr_queue_job_fence_ops = {
176 	.get_driver_name = pvr_queue_fence_get_driver_name,
177 	.get_timeline_name = pvr_queue_job_fence_get_timeline_name,
178 	.release = pvr_queue_fence_release,
179 };
180 
181 /**
182  * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO.
183  * @f: The dma_fence to check.
184  *
185  * Return:
186  * * true if the dma_fence is backed by a UFO, or
187  * * false otherwise.
188  */
189 static inline bool
190 pvr_queue_fence_is_ufo_backed(struct dma_fence *f)
191 {
192 	/*
193 	 * Currently the only dma_fence backed by a UFO object is the job fence,
194 	 * e.g. pvr_job::done_fence, wrapped by a pvr_queue_fence object.
195 	 */
196 	return f && f->ops == &pvr_queue_job_fence_ops;
197 }
198 
199 /**
200  * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is
201  * already backed by a UFO.
202  * @f: The dma_fence to turn into a pvr_queue_fence.
203  *
204  * This could be called on:
205  * - a job fence directly, in which case it simply returns the containing pvr_queue_fence;
206  * - a drm_sched_fence's scheduled or finished fence, in which case it will first try to follow
207  *   the parent pointer to find the job fence (note that the parent pointer is initialized
208  *   only after the run_job() callback is called on the drm_sched_fence's owning job);
209  * - any other dma_fence, in which case it will return NULL.
210  *
211  * Return:
212  *  * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or
213  *  * NULL otherwise.
214  */
215 static struct pvr_queue_fence *
216 to_pvr_queue_job_fence(struct dma_fence *f)
217 {
218 	struct drm_sched_fence *sched_fence = to_drm_sched_fence(f);
219 
220 	if (sched_fence)
221 		f = sched_fence->parent;
222 
223 	if (pvr_queue_fence_is_ufo_backed(f))
224 		return container_of(f, struct pvr_queue_fence, base);
225 
226 	return NULL;
227 }
228 
229 static const struct dma_fence_ops pvr_queue_cccb_fence_ops = {
230 	.get_driver_name = pvr_queue_fence_get_driver_name,
231 	.get_timeline_name = pvr_queue_cccb_fence_get_timeline_name,
232 	.release = pvr_queue_fence_release,
233 };
234 
235 /**
236  * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects.
237  * @f: The dma_fence object to put.
238  *
239  * If the pvr_queue_fence has been initialized, we call dma_fence_put(),
240  * otherwise we free the object with dma_fence_free(). This allows us
241  * to do the right thing before and after pvr_queue_fence_init() had been
242  * called.
243  */
244 static void pvr_queue_fence_put(struct dma_fence *f)
245 {
246 	if (!f)
247 		return;
248 
249 	if (WARN_ON(f->ops &&
250 		    f->ops != &pvr_queue_cccb_fence_ops &&
251 		    f->ops != &pvr_queue_job_fence_ops))
252 		return;
253 
254 	/* If the fence hasn't been initialized yet, free the object directly. */
255 	if (f->ops)
256 		dma_fence_put(f);
257 	else
258 		dma_fence_free(f);
259 }
260 
261 /**
262  * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object
263  *
264  * Call this function to allocate job CCCB and done fences. This only
265  * allocates the objects. Initialization happens when the underlying
266  * dma_fence object is to be returned to drm_sched (in prepare_job() or
267  * run_job()).
268  *
269  * Return:
270  *  * A valid pointer if the allocation succeeds, or
271  *  * NULL if the allocation fails.
272  */
273 static struct dma_fence *
274 pvr_queue_fence_alloc(void)
275 {
276 	struct pvr_queue_fence *fence;
277 
278 	fence = kzalloc_obj(*fence);
279 	if (!fence)
280 		return NULL;
281 
282 	return &fence->base;
283 }
284 
285 /**
286  * pvr_queue_fence_init() - Initializes a pvr_queue_fence object.
287  * @f: The fence to initialize
288  * @queue: The queue this fence belongs to.
289  * @fence_ops: The fence operations.
290  * @fence_ctx: The fence context.
291  *
292  * Wrapper around dma_fence_init() that takes care of initializing the
293  * pvr_queue_fence::queue field too.
294  */
295 static void
296 pvr_queue_fence_init(struct dma_fence *f,
297 		     struct pvr_queue *queue,
298 		     const struct dma_fence_ops *fence_ops,
299 		     struct pvr_queue_fence_ctx *fence_ctx)
300 {
301 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
302 
303 	pvr_context_get(queue->ctx);
304 	fence->queue = queue;
305 	INIT_WORK(&fence->release_work, pvr_queue_fence_release_work);
306 	dma_fence_init(&fence->base, fence_ops,
307 		       &fence_ctx->lock, fence_ctx->id,
308 		       atomic_inc_return(&fence_ctx->seqno));
309 }
310 
311 /**
312  * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object.
313  * @fence: The fence to initialize.
314  * @queue: The queue this fence belongs to.
315  *
316  * Initializes a fence that can be used to wait for CCCB space.
317  *
318  * Should be called in the ::prepare_job() path, so the fence returned to
319  * drm_sched is valid.
320  */
321 static void
322 pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
323 {
324 	pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops,
325 			     &queue->cccb_fence_ctx.base);
326 }
327 
328 /**
329  * pvr_queue_job_fence_init() - Initializes a job done fence object.
330  * @fence: The fence to initialize.
331  * @queue: The queue this fence belongs to.
332  *
333  * Initializes a fence that will be signaled when the GPU is done executing
334  * a job.
335  *
336  * Should be called *before* the ::run_job() path, so the fence is initialised
337  * before being placed in the pending_list.
338  */
339 static void
340 pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
341 {
342 	if (!fence->ops)
343 		pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops,
344 				     &queue->job_fence_ctx);
345 }
346 
347 /**
348  * pvr_queue_fence_ctx_init() - Queue fence context initialization.
349  * @fence_ctx: The context to initialize
350  */
351 static void
352 pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx)
353 {
354 	spin_lock_init(&fence_ctx->lock);
355 	fence_ctx->id = dma_fence_context_alloc(1);
356 	atomic_set(&fence_ctx->seqno, 0);
357 }
358 
359 static u32 ufo_cmds_size(u32 elem_count)
360 {
361 	/* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */
362 	u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS;
363 	u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS;
364 	u32 size = full_cmd_count *
365 		   pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS *
366 						     sizeof(struct rogue_fwif_ufo));
367 
368 	if (remaining_elems) {
369 		size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems *
370 							  sizeof(struct rogue_fwif_ufo));
371 	}
372 
373 	return size;
374 }
375 
376 static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count)
377 {
378 	/*
379 	 * One UFO command per native fence this job will be waiting on (unless any are
380 	 * signaled by the time the job is submitted), plus a command for the job itself,
381 	 * plus one UFO command for the fence signaling.
382 	 */
383 	return ufo_cmds_size(ufo_wait_count) +
384 	       pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len) +
385 	       ufo_cmds_size(1);
386 }
387 
388 static bool
389 is_paired_job_fence(struct dma_fence *fence, struct pvr_job *job)
390 {
391 	/* This assumes "fence" is one of "job"'s drm_sched_job::dependencies */
392 	return job->type == DRM_PVR_JOB_TYPE_FRAGMENT &&
393 	       job->paired_job &&
394 	       &job->paired_job->base.s_fence->scheduled == fence;
395 }
396 
397 /**
398  * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies.
399  * @job: Job to operate on.
400  *
401  * Returns: Number of non-signaled native deps remaining.
402  */
403 static unsigned long job_count_remaining_native_deps(struct pvr_job *job)
404 {
405 	unsigned long remaining_count = 0;
406 	struct dma_fence *fence = NULL;
407 	unsigned long index;
408 
409 	xa_for_each(&job->base.dependencies, index, fence) {
410 		struct pvr_queue_fence *jfence;
411 
412 		if (is_paired_job_fence(fence, job)) {
413 			/*
414 			 * A fence between paired jobs won't resolve to a pvr_queue_fence (i.e.
415 			 * be backed by a UFO) until the jobs have been submitted, together.
416 			 * The submitting code will insert a partial render fence command for this.
417 			 */
418 			WARN_ON(dma_fence_is_signaled(fence));
419 			remaining_count++;
420 			continue;
421 		}
422 
423 		jfence = to_pvr_queue_job_fence(fence);
424 		if (!jfence)
425 			continue;
426 
427 		if (!dma_fence_is_signaled(&jfence->base))
428 			remaining_count++;
429 	}
430 
431 	return remaining_count;
432 }
433 
434 /**
435  * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job.
436  * @queue: The queue this job will be submitted to.
437  * @job: The job to get the CCCB fence on.
438  *
439  * The CCCB fence is a synchronization primitive allowing us to delay job
440  * submission until there's enough space in the CCCB to submit the job.
441  *
442  * Return:
443  *  * NULL if there's enough space in the CCCB to submit this job, or
444  *  * A valid dma_fence object otherwise.
445  */
446 static struct dma_fence *
447 pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job)
448 {
449 	struct pvr_queue_fence *cccb_fence;
450 	unsigned int native_deps_remaining;
451 
452 	/* If the fence is NULL, that means we already checked that we had
453 	 * enough space in the cccb for our job.
454 	 */
455 	if (!job->cccb_fence)
456 		return NULL;
457 
458 	mutex_lock(&queue->cccb_fence_ctx.job_lock);
459 
460 	/* Count remaining native dependencies and check if the job fits in the CCCB. */
461 	native_deps_remaining = job_count_remaining_native_deps(job);
462 	if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
463 		pvr_queue_fence_put(job->cccb_fence);
464 		job->cccb_fence = NULL;
465 		goto out_unlock;
466 	}
467 
468 	/* There should be no job attached to the CCCB fence context:
469 	 * drm_sched_entity guarantees that jobs are submitted one at a time.
470 	 */
471 	if (WARN_ON(queue->cccb_fence_ctx.job))
472 		pvr_job_put(queue->cccb_fence_ctx.job);
473 
474 	queue->cccb_fence_ctx.job = pvr_job_get(job);
475 
476 	/* Initialize the fence before returning it. */
477 	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
478 	if (!WARN_ON(cccb_fence->queue))
479 		pvr_queue_cccb_fence_init(job->cccb_fence, queue);
480 
481 out_unlock:
482 	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
483 
484 	return dma_fence_get(job->cccb_fence);
485 }
486 
487 /**
488  * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job.
489  * @queue: The queue this job will be submitted to.
490  * @job: The job to get the KCCB fence on.
491  *
492  * The KCCB fence is a synchronization primitive allowing us to delay job
493  * submission until there's enough space in the KCCB to submit the job.
494  *
495  * Return:
496  *  * NULL if there's enough space in the KCCB to submit this job, or
497  *  * A valid dma_fence object otherwise.
498  */
499 static struct dma_fence *
500 pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job)
501 {
502 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
503 	struct dma_fence *kccb_fence = NULL;
504 
505 	/* If the fence is NULL, that means we already checked that we had
506 	 * enough space in the KCCB for our job.
507 	 */
508 	if (!job->kccb_fence)
509 		return NULL;
510 
511 	if (!WARN_ON(job->kccb_fence->ops)) {
512 		kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence);
513 		job->kccb_fence = NULL;
514 	}
515 
516 	return kccb_fence;
517 }
518 
519 static struct dma_fence *
520 pvr_queue_get_paired_frag_job_dep(struct pvr_job *job)
521 {
522 	struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ?
523 				   job->paired_job : NULL;
524 	struct pvr_queue *frag_queue = frag_job ? frag_job->ctx->queues.fragment : NULL;
525 	struct dma_fence *f;
526 	unsigned long index;
527 
528 	if (!frag_job)
529 		return NULL;
530 
531 	/* Have the geometry job wait on the paired fragment job's dependencies as well. */
532 	xa_for_each(&frag_job->base.dependencies, index, f) {
533 		/* Skip already signaled fences. */
534 		if (dma_fence_is_signaled(f))
535 			continue;
536 
537 		/*
538 		 * The paired job fence won't be signaled until both jobs have
539 		 * been submitted, so we can't wait on it to schedule them.
540 		 */
541 		if (f == &job->base.s_fence->scheduled)
542 			continue;
543 
544 		return dma_fence_get(f);
545 	}
546 
547 	/* Initialize the paired fragment job's done_fence, so we can signal it. */
548 	pvr_queue_job_fence_init(frag_job->done_fence, frag_queue);
549 
550 	return pvr_queue_get_job_cccb_fence(frag_queue, frag_job);
551 }
552 
553 /**
554  * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence.
555  * @sched_job: The job to query the next internal dependency on
556  * @s_entity: The entity this job is queue on.
557  *
558  * After iterating over drm_sched_job::dependencies, drm_sched let the driver return
559  * its own internal dependencies. We use this function to return our internal dependencies.
560  */
561 static struct dma_fence *
562 pvr_queue_prepare_job(struct drm_sched_job *sched_job,
563 		      struct drm_sched_entity *s_entity)
564 {
565 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
566 	struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity);
567 	struct dma_fence *internal_dep = NULL;
568 
569 	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
570 		/*
571 		 * This will be called on a paired fragment job after being submitted
572 		 * to the firmware as part of the paired geometry job's submission.
573 		 * We can tell if this is the case and bail early from whether run_job()
574 		 * has been called on the geometry job, which would issue a pm ref on
575 		 * this job as well.
576 		 */
577 		if (job->has_pm_ref)
578 			return NULL;
579 	}
580 
581 	/*
582 	 * Initialize the done_fence, so we can signal it. This must be done
583 	 * here because otherwise by the time of run_job() the job will end up
584 	 * in the pending list without a valid fence.
585 	 */
586 	pvr_queue_job_fence_init(job->done_fence, queue);
587 
588 	/* CCCB fence is used to make sure we have enough space in the CCCB to
589 	 * submit our commands.
590 	 */
591 	internal_dep = pvr_queue_get_job_cccb_fence(queue, job);
592 
593 	/* KCCB fence is used to make sure we have a KCCB slot to queue our
594 	 * CMD_KICK.
595 	 */
596 	if (!internal_dep)
597 		internal_dep = pvr_queue_get_job_kccb_fence(queue, job);
598 
599 	/* Any extra internal dependency should be added here, using the following
600 	 * pattern:
601 	 *
602 	 *	if (!internal_dep)
603 	 *		internal_dep = pvr_queue_get_job_xxxx_fence(queue, job);
604 	 */
605 
606 	/* The paired job fence should come last, when everything else is ready. */
607 	if (!internal_dep)
608 		internal_dep = pvr_queue_get_paired_frag_job_dep(job);
609 
610 	return internal_dep;
611 }
612 
613 /**
614  * pvr_queue_update_active_state_locked() - Update the queue active state.
615  * @queue: Queue to update the state on.
616  *
617  * Locked version of pvr_queue_update_active_state(). Must be called with
618  * pvr_device::queue::lock held.
619  */
620 static void pvr_queue_update_active_state_locked(struct pvr_queue *queue)
621 {
622 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
623 
624 	lockdep_assert_held(&pvr_dev->queues.lock);
625 
626 	/* The queue is temporary out of any list when it's being reset,
627 	 * we don't want a call to pvr_queue_update_active_state_locked()
628 	 * to re-insert it behind our back.
629 	 */
630 	if (list_empty(&queue->node))
631 		return;
632 
633 	if (!atomic_read(&queue->in_flight_job_count))
634 		list_move_tail(&queue->node, &pvr_dev->queues.idle);
635 	else
636 		list_move_tail(&queue->node, &pvr_dev->queues.active);
637 }
638 
639 /**
640  * pvr_queue_update_active_state() - Update the queue active state.
641  * @queue: Queue to update the state on.
642  *
643  * Active state is based on the in_flight_job_count value.
644  *
645  * Updating the active state implies moving the queue in or out of the
646  * active queue list, which also defines whether the queue is checked
647  * or not when a FW event is received.
648  *
649  * This function should be called any time a job is submitted or it done
650  * fence is signaled.
651  */
652 static void pvr_queue_update_active_state(struct pvr_queue *queue)
653 {
654 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
655 
656 	mutex_lock(&pvr_dev->queues.lock);
657 	pvr_queue_update_active_state_locked(queue);
658 	mutex_unlock(&pvr_dev->queues.lock);
659 }
660 
661 static void pvr_queue_submit_job_to_cccb(struct pvr_job *job)
662 {
663 	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
664 	struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS];
665 	struct pvr_cccb *cccb = &queue->cccb;
666 	struct pvr_queue_fence *jfence;
667 	struct dma_fence *fence;
668 	unsigned long index;
669 	u32 ufo_count = 0;
670 
671 	/* We need to add the queue to the active list before updating the CCCB,
672 	 * otherwise we might miss the FW event informing us that something
673 	 * happened on this queue.
674 	 */
675 	atomic_inc(&queue->in_flight_job_count);
676 	pvr_queue_update_active_state(queue);
677 
678 	xa_for_each(&job->base.dependencies, index, fence) {
679 		jfence = to_pvr_queue_job_fence(fence);
680 		if (!jfence)
681 			continue;
682 
683 		/* Some dependencies might have been signaled since prepare_job() */
684 		if (dma_fence_is_signaled(&jfence->base))
685 			continue;
686 
687 		pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
688 					  &ufos[ufo_count].addr);
689 		ufos[ufo_count++].value = jfence->base.seqno;
690 
691 		if (ufo_count == ARRAY_SIZE(ufos)) {
692 			pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
693 							   sizeof(ufos), ufos, 0, 0);
694 			ufo_count = 0;
695 		}
696 	}
697 
698 	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
699 		/*
700 		 * The loop above will only process dependencies backed by a UFO i.e. with
701 		 * a valid parent fence assigned, but the paired job dependency won't have
702 		 * one until both jobs have been submitted. Access the parent fence directly
703 		 * here instead, submitting it last as partial render fence.
704 		 */
705 		jfence = to_pvr_queue_job_fence(job->paired_job->done_fence);
706 		if (!WARN_ON(!jfence)) {
707 			pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
708 						  &ufos[ufo_count].addr);
709 			ufos[ufo_count++].value = job->paired_job->done_fence->seqno;
710 		}
711 	}
712 
713 	if (ufo_count) {
714 		pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
715 						   sizeof(ufos[0]) * ufo_count, ufos, 0, 0);
716 	}
717 
718 	if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) {
719 		struct rogue_fwif_cmd_geom *cmd = job->cmd;
720 
721 		/* Reference value for the partial render test is the current queue fence
722 		 * seqno minus one.
723 		 */
724 		pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj,
725 					  &cmd->partial_render_geom_frag_fence.addr);
726 		cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1;
727 	}
728 
729 	trace_pvr_job_submit_fw(job);
730 
731 	/* Submit job to FW */
732 	pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd,
733 					   job->id, job->id);
734 
735 	/* Update command to signal the job fence. */
736 	pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr);
737 	ufos[0].value = job->done_fence->seqno;
738 	pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE,
739 					   sizeof(ufos[0]), ufos, 0, 0);
740 }
741 
742 /**
743  * pvr_queue_run_job() - Submit a job to the FW.
744  * @sched_job: The job to submit.
745  *
746  * This function is called when all non-native dependencies have been met and
747  * when the commands resulting from this job are guaranteed to fit in the CCCB.
748  */
749 static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job)
750 {
751 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
752 	struct pvr_device *pvr_dev = job->pvr_dev;
753 	int err;
754 
755 	/* The fragment job is issued along the geometry job when we use combined
756 	 * geom+frag kicks. When we get there, we should simply return the
757 	 * done_fence that's been initialized earlier.
758 	 */
759 	if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT &&
760 	    job->done_fence->ops) {
761 		return dma_fence_get(job->done_fence);
762 	}
763 
764 	/* The only kind of jobs that can be paired are geometry and fragment, and
765 	 * we bail out early if we see a fragment job that's paired with a geometry job.
766 	 * Paired jobs must also target the same context and point to the same HWRT.
767 	 */
768 	if (WARN_ON(job->paired_job &&
769 		    (job->type != DRM_PVR_JOB_TYPE_GEOMETRY ||
770 		     job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT ||
771 		     job->hwrt != job->paired_job->hwrt ||
772 		     job->ctx != job->paired_job->ctx)))
773 		return ERR_PTR(-EINVAL);
774 
775 	err = pvr_job_get_pm_ref(job);
776 	if (WARN_ON(err))
777 		return ERR_PTR(err);
778 
779 	if (job->paired_job) {
780 		err = pvr_job_get_pm_ref(job->paired_job);
781 		if (WARN_ON(err))
782 			return ERR_PTR(err);
783 	}
784 
785 	/* Submit our job to the CCCB */
786 	pvr_queue_submit_job_to_cccb(job);
787 
788 	if (job->paired_job) {
789 		struct pvr_job *geom_job = job;
790 		struct pvr_job *frag_job = job->paired_job;
791 		struct pvr_queue *geom_queue = job->ctx->queues.geometry;
792 		struct pvr_queue *frag_queue = job->ctx->queues.fragment;
793 
794 		/* Submit the fragment job along the geometry job and send a combined kick. */
795 		pvr_queue_submit_job_to_cccb(frag_job);
796 		pvr_cccb_send_kccb_combined_kick(pvr_dev,
797 						 &geom_queue->cccb, &frag_queue->cccb,
798 						 pvr_context_get_fw_addr(geom_job->ctx) +
799 						 geom_queue->ctx_offset,
800 						 pvr_context_get_fw_addr(frag_job->ctx) +
801 						 frag_queue->ctx_offset,
802 						 job->hwrt,
803 						 frag_job->fw_ccb_cmd_type ==
804 						 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR);
805 	} else {
806 		struct pvr_queue *queue = container_of(job->base.sched,
807 						       struct pvr_queue, scheduler);
808 
809 		pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb,
810 					pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset,
811 					job->hwrt);
812 	}
813 
814 	return dma_fence_get(job->done_fence);
815 }
816 
817 static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job)
818 {
819 	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
820 }
821 
822 static void pvr_queue_start(struct pvr_queue *queue)
823 {
824 	struct pvr_job *job;
825 
826 	/* Make sure we CPU-signal the UFO object, so other queues don't get
827 	 * blocked waiting on it.
828 	 */
829 	*queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno);
830 
831 	list_for_each_entry(job, &queue->scheduler.pending_list, base.list) {
832 		if (dma_fence_is_signaled(job->done_fence)) {
833 			/* Jobs might have completed after drm_sched_stop() was called.
834 			 * In that case, re-assign the parent field to the done_fence.
835 			 */
836 			WARN_ON(job->base.s_fence->parent);
837 			job->base.s_fence->parent = dma_fence_get(job->done_fence);
838 		} else {
839 			/* If we had unfinished jobs, flag the entity as guilty so no
840 			 * new job can be submitted.
841 			 */
842 			atomic_set(&queue->ctx->faulty, 1);
843 		}
844 	}
845 
846 	drm_sched_start(&queue->scheduler, 0);
847 }
848 
849 /**
850  * pvr_queue_timedout_job() - Handle a job timeout event.
851  * @s_job: The job this timeout occurred on.
852  *
853  * FIXME: We don't do anything here to unblock the situation, we just stop+start
854  * the scheduler, and re-assign parent fences in the middle.
855  *
856  * Return:
857  *  *%DRM_GPU_SCHED_STAT_NO_HANG if the job fence has already been
858  *   signaled, or
859  *  *%DRM_GPU_SCHED_STAT_RESET otherwise.
860  */
861 static enum drm_gpu_sched_stat
862 pvr_queue_timedout_job(struct drm_sched_job *s_job)
863 {
864 	struct drm_gpu_scheduler *sched = s_job->sched;
865 	struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler);
866 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
867 	struct pvr_job *job;
868 	u32 job_count = 0;
869 
870 	if (dma_fence_is_signaled(s_job->s_fence->parent))
871 		return DRM_GPU_SCHED_STAT_NO_HANG;
872 
873 	dev_err(sched->dev, "Job timeout\n");
874 
875 	/* Before we stop the scheduler, make sure the queue is out of any list, so
876 	 * any call to pvr_queue_update_active_state_locked() that might happen
877 	 * until the scheduler is really stopped doesn't end up re-inserting the
878 	 * queue in the active list. This would cause
879 	 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each
880 	 * other when accessing the pending_list, since drm_sched_stop() doesn't
881 	 * grab the job_list_lock when modifying the list (it's assuming the
882 	 * only other accessor is the scheduler, and it's safe to not grab the
883 	 * lock since it's stopped).
884 	 */
885 	mutex_lock(&pvr_dev->queues.lock);
886 	list_del_init(&queue->node);
887 	mutex_unlock(&pvr_dev->queues.lock);
888 
889 	drm_sched_stop(sched, s_job);
890 
891 	/* Re-assign job parent fences. */
892 	list_for_each_entry(job, &sched->pending_list, base.list) {
893 		job->base.s_fence->parent = dma_fence_get(job->done_fence);
894 		job_count++;
895 	}
896 	WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count);
897 
898 	/* Re-insert the queue in the proper list, and kick a queue processing
899 	 * operation if there were jobs pending.
900 	 */
901 	mutex_lock(&pvr_dev->queues.lock);
902 	if (!job_count) {
903 		list_move_tail(&queue->node, &pvr_dev->queues.idle);
904 	} else {
905 		atomic_set(&queue->in_flight_job_count, job_count);
906 		list_move_tail(&queue->node, &pvr_dev->queues.active);
907 		pvr_queue_process(queue);
908 	}
909 	mutex_unlock(&pvr_dev->queues.lock);
910 
911 	drm_sched_start(sched, 0);
912 
913 	return DRM_GPU_SCHED_STAT_RESET;
914 }
915 
916 /**
917  * pvr_queue_free_job() - Release the reference the scheduler had on a job object.
918  * @sched_job: Job object to free.
919  */
920 static void pvr_queue_free_job(struct drm_sched_job *sched_job)
921 {
922 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
923 
924 	drm_sched_job_cleanup(sched_job);
925 
926 	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job)
927 		pvr_job_put(job->paired_job);
928 
929 	job->paired_job = NULL;
930 	pvr_job_put(job);
931 }
932 
933 static const struct drm_sched_backend_ops pvr_queue_sched_ops = {
934 	.prepare_job = pvr_queue_prepare_job,
935 	.run_job = pvr_queue_run_job,
936 	.timedout_job = pvr_queue_timedout_job,
937 	.free_job = pvr_queue_free_job,
938 };
939 
940 /**
941  * pvr_queue_fence_is_native() - Check if a dma_fence is native to this driver.
942  * @f: Fence to test.
943  *
944  * Check if the fence we're being passed is a drm_sched_fence that is coming from this driver.
945  *
946  * It may be a UFO-backed fence i.e. a fence that can be signaled or waited upon FW-side,
947  * such as pvr_job::done_fence objects that are backed by the timeline UFO attached to the queue
948  * they are pushed to.
949  */
950 bool pvr_queue_fence_is_native(struct dma_fence *f)
951 {
952 	struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL;
953 
954 	if (sched_fence &&
955 	    sched_fence->sched->ops == &pvr_queue_sched_ops)
956 		return true;
957 
958 	return pvr_queue_fence_is_ufo_backed(f);
959 }
960 
961 /**
962  * pvr_queue_signal_done_fences() - Signal done fences.
963  * @queue: Queue to check.
964  *
965  * Signal done fences of jobs whose seqno is less than the current value of
966  * the UFO object attached to the queue.
967  */
968 static void
969 pvr_queue_signal_done_fences(struct pvr_queue *queue)
970 {
971 	struct pvr_job *job, *tmp_job;
972 	u32 cur_seqno;
973 
974 	spin_lock(&queue->scheduler.job_list_lock);
975 	cur_seqno = *queue->timeline_ufo.value;
976 	list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) {
977 		if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0)
978 			break;
979 
980 		if (!dma_fence_is_signaled(job->done_fence)) {
981 			dma_fence_signal(job->done_fence);
982 			pvr_job_release_pm_ref(job);
983 			atomic_dec(&queue->in_flight_job_count);
984 		}
985 	}
986 	spin_unlock(&queue->scheduler.job_list_lock);
987 }
988 
989 /**
990  * pvr_queue_check_job_waiting_for_cccb_space() - Check if a job waiting for CCCB space
991  * can be unblocked and pushed to the CCCB.
992  * @queue: Queue to check
993  *
994  * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal
995  * its CCCB fence, which should kick drm_sched.
996  */
997 static void
998 pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue)
999 {
1000 	struct pvr_queue_fence *cccb_fence;
1001 	u32 native_deps_remaining;
1002 	struct pvr_job *job;
1003 
1004 	mutex_lock(&queue->cccb_fence_ctx.job_lock);
1005 	job = queue->cccb_fence_ctx.job;
1006 	if (!job)
1007 		goto out_unlock;
1008 
1009 	/* If we have a job attached to the CCCB fence context, its CCCB fence
1010 	 * shouldn't be NULL.
1011 	 */
1012 	if (WARN_ON(!job->cccb_fence)) {
1013 		job = NULL;
1014 		goto out_unlock;
1015 	}
1016 
1017 	/* If we get there, CCCB fence has to be initialized. */
1018 	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
1019 	if (WARN_ON(!cccb_fence->queue)) {
1020 		job = NULL;
1021 		goto out_unlock;
1022 	}
1023 
1024 	/* Evict signaled dependencies before checking for CCCB space.
1025 	 * If the job fits, signal the CCCB fence, this should unblock
1026 	 * the drm_sched_entity.
1027 	 */
1028 	native_deps_remaining = job_count_remaining_native_deps(job);
1029 	if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
1030 		job = NULL;
1031 		goto out_unlock;
1032 	}
1033 
1034 	dma_fence_signal(job->cccb_fence);
1035 	pvr_queue_fence_put(job->cccb_fence);
1036 	job->cccb_fence = NULL;
1037 	queue->cccb_fence_ctx.job = NULL;
1038 
1039 out_unlock:
1040 	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
1041 
1042 	pvr_job_put(job);
1043 }
1044 
1045 /**
1046  * pvr_queue_process() - Process events that happened on a queue.
1047  * @queue: Queue to check
1048  *
1049  * Signal job fences and check if jobs waiting for CCCB space can be unblocked.
1050  */
1051 void pvr_queue_process(struct pvr_queue *queue)
1052 {
1053 	lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock);
1054 
1055 	pvr_queue_check_job_waiting_for_cccb_space(queue);
1056 	pvr_queue_signal_done_fences(queue);
1057 	pvr_queue_update_active_state_locked(queue);
1058 }
1059 
1060 static u32 get_dm_type(struct pvr_queue *queue)
1061 {
1062 	switch (queue->type) {
1063 	case DRM_PVR_JOB_TYPE_GEOMETRY:
1064 		return PVR_FWIF_DM_GEOM;
1065 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
1066 	case DRM_PVR_JOB_TYPE_FRAGMENT:
1067 		return PVR_FWIF_DM_FRAG;
1068 	case DRM_PVR_JOB_TYPE_COMPUTE:
1069 		return PVR_FWIF_DM_CDM;
1070 	}
1071 
1072 	return ~0;
1073 }
1074 
1075 /**
1076  * init_fw_context() - Initializes the queue part of a FW context.
1077  * @queue: Queue object to initialize the FW context for.
1078  * @fw_ctx_map: The FW context CPU mapping.
1079  *
1080  * FW contexts are containing various states, one of them being a per-queue state
1081  * that needs to be initialized for each queue being exposed by a context. This
1082  * function takes care of that.
1083  */
1084 static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map)
1085 {
1086 	struct pvr_context *ctx = queue->ctx;
1087 	struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx);
1088 	struct rogue_fwif_fwcommoncontext *cctx_fw;
1089 	struct pvr_cccb *cccb = &queue->cccb;
1090 
1091 	cctx_fw = fw_ctx_map + queue->ctx_offset;
1092 	cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr;
1093 	cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr;
1094 
1095 	cctx_fw->dm = get_dm_type(queue);
1096 	cctx_fw->priority = ctx->priority;
1097 	cctx_fw->priority_seq_num = 0;
1098 	cctx_fw->max_deadline_ms = MAX_DEADLINE_MS;
1099 	cctx_fw->pid = task_tgid_nr(current);
1100 	cctx_fw->server_common_context_id = ctx->ctx_id;
1101 
1102 	pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr);
1103 
1104 	pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr);
1105 }
1106 
1107 /**
1108  * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up.
1109  * @queue: Queue on FW context to clean up.
1110  *
1111  * Return:
1112  *  * 0 on success,
1113  *  * Any error returned by pvr_fw_structure_cleanup() otherwise.
1114  */
1115 static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue)
1116 {
1117 	if (!queue->ctx->fw_obj)
1118 		return 0;
1119 
1120 	return pvr_fw_structure_cleanup(queue->ctx->pvr_dev,
1121 					ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT,
1122 					queue->ctx->fw_obj, queue->ctx_offset);
1123 }
1124 
1125 /**
1126  * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object.
1127  * @job: The job to initialize.
1128  * @drm_client_id: drm_file.client_id submitting the job
1129  *
1130  * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm()
1131  * and pvr_queue_job_push() can't fail. We also make sure the context type is
1132  * valid and the job can fit in the CCCB.
1133  *
1134  * Return:
1135  *  * 0 on success, or
1136  *  * An error code if something failed.
1137  */
1138 int pvr_queue_job_init(struct pvr_job *job, u64 drm_client_id)
1139 {
1140 	/* Fragment jobs need at least one native fence wait on the geometry job fence. */
1141 	u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0;
1142 	struct pvr_queue *queue;
1143 	int err;
1144 
1145 	if (atomic_read(&job->ctx->faulty))
1146 		return -EIO;
1147 
1148 	queue = pvr_context_get_queue_for_job(job->ctx, job->type);
1149 	if (!queue)
1150 		return -EINVAL;
1151 
1152 	if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count)))
1153 		return -E2BIG;
1154 
1155 	err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE, drm_client_id);
1156 	if (err)
1157 		return err;
1158 
1159 	job->cccb_fence = pvr_queue_fence_alloc();
1160 	job->kccb_fence = pvr_kccb_fence_alloc();
1161 	job->done_fence = pvr_queue_fence_alloc();
1162 	if (!job->cccb_fence || !job->kccb_fence || !job->done_fence)
1163 		return -ENOMEM;
1164 
1165 	return 0;
1166 }
1167 
1168 /**
1169  * pvr_queue_job_arm() - Arm a job object.
1170  * @job: The job to arm.
1171  *
1172  * Initializes fences and return the drm_sched finished fence so it can
1173  * be exposed to the outside world. Once this function is called, you should
1174  * make sure the job is pushed using pvr_queue_job_push(), or guarantee that
1175  * no one grabbed a reference to the returned fence. The latter can happen if
1176  * we do multi-job submission, and something failed when creating/initializing
1177  * a job. In that case, we know the fence didn't leave the driver, and we
1178  * can thus guarantee nobody will wait on an dead fence object.
1179  *
1180  * Return:
1181  *  * A dma_fence object.
1182  */
1183 struct dma_fence *pvr_queue_job_arm(struct pvr_job *job)
1184 {
1185 	drm_sched_job_arm(&job->base);
1186 
1187 	return &job->base.s_fence->finished;
1188 }
1189 
1190 /**
1191  * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object.
1192  * @job: The job to cleanup.
1193  *
1194  * Should be called in the job release path.
1195  */
1196 void pvr_queue_job_cleanup(struct pvr_job *job)
1197 {
1198 	pvr_queue_fence_put(job->done_fence);
1199 	pvr_queue_fence_put(job->cccb_fence);
1200 	pvr_kccb_fence_put(job->kccb_fence);
1201 
1202 	if (job->base.s_fence)
1203 		drm_sched_job_cleanup(&job->base);
1204 
1205 	trace_pvr_job_done(job);
1206 }
1207 
1208 /**
1209  * pvr_queue_job_push() - Push a job to its queue.
1210  * @job: The job to push.
1211  *
1212  * Must be called after pvr_queue_job_init() and after all dependencies
1213  * have been added to the job. This will effectively queue the job to
1214  * the drm_sched_entity attached to the queue. We grab a reference on
1215  * the job object, so the caller is free to drop its reference when it's
1216  * done accessing the job object.
1217  */
1218 void pvr_queue_job_push(struct pvr_job *job)
1219 {
1220 	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
1221 
1222 	/* Keep track of the last queued job scheduled fence for combined submit. */
1223 	dma_fence_put(queue->last_queued_job_scheduled_fence);
1224 	queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled);
1225 
1226 	pvr_job_get(job);
1227 	drm_sched_entity_push_job(&job->base);
1228 }
1229 
1230 static void reg_state_init(void *cpu_ptr, void *priv)
1231 {
1232 	struct pvr_queue *queue = priv;
1233 
1234 	if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) {
1235 		struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr;
1236 
1237 		geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init =
1238 			queue->callstack_addr;
1239 	}
1240 }
1241 
1242 /**
1243  * pvr_queue_create() - Create a queue object.
1244  * @ctx: The context this queue will be attached to.
1245  * @type: The type of jobs being pushed to this queue.
1246  * @args: The arguments passed to the context creation function.
1247  * @fw_ctx_map: CPU mapping of the FW context object.
1248  *
1249  * Create a queue object that will be used to queue and track jobs.
1250  *
1251  * Return:
1252  *  * A valid pointer to a pvr_queue object, or
1253  *  * An error pointer if the creation/initialization failed.
1254  */
1255 struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
1256 				   enum drm_pvr_job_type type,
1257 				   struct drm_pvr_ioctl_create_context_args *args,
1258 				   void *fw_ctx_map)
1259 {
1260 	static const struct {
1261 		u32 cccb_size;
1262 		const char *name;
1263 	} props[] = {
1264 		[DRM_PVR_JOB_TYPE_GEOMETRY] = {
1265 			.cccb_size = CTX_GEOM_CCCB_SIZE_LOG2,
1266 			.name = "geometry",
1267 		},
1268 		[DRM_PVR_JOB_TYPE_FRAGMENT] = {
1269 			.cccb_size = CTX_FRAG_CCCB_SIZE_LOG2,
1270 			.name = "fragment"
1271 		},
1272 		[DRM_PVR_JOB_TYPE_COMPUTE] = {
1273 			.cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2,
1274 			.name = "compute"
1275 		},
1276 		[DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = {
1277 			.cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2,
1278 			.name = "transfer_frag"
1279 		},
1280 	};
1281 	struct pvr_device *pvr_dev = ctx->pvr_dev;
1282 	const struct drm_sched_init_args sched_args = {
1283 		.ops = &pvr_queue_sched_ops,
1284 		.submit_wq = pvr_dev->sched_wq,
1285 		.credit_limit = 64 * 1024,
1286 		.hang_limit = 1,
1287 		.timeout = msecs_to_jiffies(500),
1288 		.timeout_wq = pvr_dev->sched_wq,
1289 		.name = "pvr-queue",
1290 		.dev = pvr_dev->base.dev,
1291 	};
1292 	struct drm_gpu_scheduler *sched;
1293 	struct pvr_queue *queue;
1294 	int ctx_state_size, err;
1295 	void *cpu_map;
1296 
1297 	if (WARN_ON(type >= sizeof(props)))
1298 		return ERR_PTR(-EINVAL);
1299 
1300 	switch (ctx->type) {
1301 	case DRM_PVR_CTX_TYPE_RENDER:
1302 		if (type != DRM_PVR_JOB_TYPE_GEOMETRY &&
1303 		    type != DRM_PVR_JOB_TYPE_FRAGMENT)
1304 			return ERR_PTR(-EINVAL);
1305 		break;
1306 	case DRM_PVR_CTX_TYPE_COMPUTE:
1307 		if (type != DRM_PVR_JOB_TYPE_COMPUTE)
1308 			return ERR_PTR(-EINVAL);
1309 		break;
1310 	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
1311 		if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG)
1312 			return ERR_PTR(-EINVAL);
1313 		break;
1314 	default:
1315 		return ERR_PTR(-EINVAL);
1316 	}
1317 
1318 	ctx_state_size = get_ctx_state_size(pvr_dev, type);
1319 	if (ctx_state_size < 0)
1320 		return ERR_PTR(ctx_state_size);
1321 
1322 	queue = kzalloc_obj(*queue);
1323 	if (!queue)
1324 		return ERR_PTR(-ENOMEM);
1325 
1326 	queue->type = type;
1327 	queue->ctx_offset = get_ctx_offset(type);
1328 	queue->ctx = ctx;
1329 	queue->callstack_addr = args->callstack_addr;
1330 	sched = &queue->scheduler;
1331 	INIT_LIST_HEAD(&queue->node);
1332 	mutex_init(&queue->cccb_fence_ctx.job_lock);
1333 	pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base);
1334 	pvr_queue_fence_ctx_init(&queue->job_fence_ctx);
1335 
1336 	err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name);
1337 	if (err)
1338 		goto err_free_queue;
1339 
1340 	err = pvr_fw_object_create(pvr_dev, ctx_state_size,
1341 				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1342 				   reg_state_init, queue, &queue->reg_state_obj);
1343 	if (err)
1344 		goto err_cccb_fini;
1345 
1346 	init_fw_context(queue, fw_ctx_map);
1347 
1348 	if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT &&
1349 	    args->callstack_addr) {
1350 		err = -EINVAL;
1351 		goto err_release_reg_state;
1352 	}
1353 
1354 	cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value),
1355 					       PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1356 					       NULL, NULL, &queue->timeline_ufo.fw_obj);
1357 	if (IS_ERR(cpu_map)) {
1358 		err = PTR_ERR(cpu_map);
1359 		goto err_release_reg_state;
1360 	}
1361 
1362 	queue->timeline_ufo.value = cpu_map;
1363 
1364 	err = drm_sched_init(&queue->scheduler, &sched_args);
1365 	if (err)
1366 		goto err_release_ufo;
1367 
1368 	err = drm_sched_entity_init(&queue->entity,
1369 				    DRM_SCHED_PRIORITY_KERNEL,
1370 				    &sched, 1, &ctx->faulty);
1371 	if (err)
1372 		goto err_sched_fini;
1373 
1374 	mutex_lock(&pvr_dev->queues.lock);
1375 	list_add_tail(&queue->node, &pvr_dev->queues.idle);
1376 	mutex_unlock(&pvr_dev->queues.lock);
1377 
1378 	return queue;
1379 
1380 err_sched_fini:
1381 	drm_sched_fini(&queue->scheduler);
1382 
1383 err_release_ufo:
1384 	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1385 
1386 err_release_reg_state:
1387 	pvr_fw_object_destroy(queue->reg_state_obj);
1388 
1389 err_cccb_fini:
1390 	pvr_cccb_fini(&queue->cccb);
1391 
1392 err_free_queue:
1393 	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1394 	kfree(queue);
1395 
1396 	return ERR_PTR(err);
1397 }
1398 
1399 void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev)
1400 {
1401 	struct pvr_queue *queue;
1402 
1403 	mutex_lock(&pvr_dev->queues.lock);
1404 	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1405 		pvr_queue_stop(queue, NULL);
1406 	list_for_each_entry(queue, &pvr_dev->queues.active, node)
1407 		pvr_queue_stop(queue, NULL);
1408 	mutex_unlock(&pvr_dev->queues.lock);
1409 }
1410 
1411 void pvr_queue_device_post_reset(struct pvr_device *pvr_dev)
1412 {
1413 	struct pvr_queue *queue;
1414 
1415 	mutex_lock(&pvr_dev->queues.lock);
1416 	list_for_each_entry(queue, &pvr_dev->queues.active, node)
1417 		pvr_queue_start(queue);
1418 	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1419 		pvr_queue_start(queue);
1420 	mutex_unlock(&pvr_dev->queues.lock);
1421 }
1422 
1423 /**
1424  * pvr_queue_kill() - Kill a queue.
1425  * @queue: The queue to kill.
1426  *
1427  * Kill the queue so no new jobs can be pushed. Should be called when the
1428  * context handle is destroyed. The queue object might last longer if jobs
1429  * are still in flight and holding a reference to the context this queue
1430  * belongs to.
1431  */
1432 void pvr_queue_kill(struct pvr_queue *queue)
1433 {
1434 	drm_sched_entity_destroy(&queue->entity);
1435 	dma_fence_put(queue->last_queued_job_scheduled_fence);
1436 	queue->last_queued_job_scheduled_fence = NULL;
1437 }
1438 
1439 /**
1440  * pvr_queue_destroy() - Destroy a queue.
1441  * @queue: The queue to destroy.
1442  *
1443  * Cleanup the queue and free the resources attached to it. Should be
1444  * called from the context release function.
1445  */
1446 void pvr_queue_destroy(struct pvr_queue *queue)
1447 {
1448 	if (!queue)
1449 		return;
1450 
1451 	mutex_lock(&queue->ctx->pvr_dev->queues.lock);
1452 	list_del_init(&queue->node);
1453 	mutex_unlock(&queue->ctx->pvr_dev->queues.lock);
1454 
1455 	drm_sched_fini(&queue->scheduler);
1456 	drm_sched_entity_fini(&queue->entity);
1457 
1458 	if (WARN_ON(queue->last_queued_job_scheduled_fence))
1459 		dma_fence_put(queue->last_queued_job_scheduled_fence);
1460 
1461 	pvr_queue_cleanup_fw_context(queue);
1462 
1463 	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1464 	pvr_fw_object_destroy(queue->reg_state_obj);
1465 	pvr_cccb_fini(&queue->cccb);
1466 	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1467 	kfree(queue);
1468 }
1469 
1470 /**
1471  * pvr_queue_device_init() - Device-level initialization of queue related fields.
1472  * @pvr_dev: The device to initialize.
1473  *
1474  * Initializes all fields related to queue management in pvr_device.
1475  *
1476  * Return:
1477  *  * 0 on success, or
1478  *  * An error code on failure.
1479  */
1480 int pvr_queue_device_init(struct pvr_device *pvr_dev)
1481 {
1482 	int err;
1483 
1484 	INIT_LIST_HEAD(&pvr_dev->queues.active);
1485 	INIT_LIST_HEAD(&pvr_dev->queues.idle);
1486 	err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock);
1487 	if (err)
1488 		return err;
1489 
1490 	pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0);
1491 	if (!pvr_dev->sched_wq)
1492 		return -ENOMEM;
1493 
1494 	return 0;
1495 }
1496 
1497 /**
1498  * pvr_queue_device_fini() - Device-level cleanup of queue related fields.
1499  * @pvr_dev: The device to cleanup.
1500  *
1501  * Cleanup/free all queue-related resources attached to a pvr_device object.
1502  */
1503 void pvr_queue_device_fini(struct pvr_device *pvr_dev)
1504 {
1505 	destroy_workqueue(pvr_dev->sched_wq);
1506 }
1507