xref: /linux/drivers/gpu/drm/imagination/pvr_queue.c (revision 3a39d672e7f48b8d6b91a09afa4b55352773b4b5)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
3 
4 #include <drm/drm_managed.h>
5 #include <drm/gpu_scheduler.h>
6 
7 #include "pvr_cccb.h"
8 #include "pvr_context.h"
9 #include "pvr_device.h"
10 #include "pvr_drv.h"
11 #include "pvr_job.h"
12 #include "pvr_queue.h"
13 #include "pvr_vm.h"
14 
15 #include "pvr_rogue_fwif_client.h"
16 
17 #define MAX_DEADLINE_MS 30000
18 
19 #define CTX_COMPUTE_CCCB_SIZE_LOG2 15
20 #define CTX_FRAG_CCCB_SIZE_LOG2 15
21 #define CTX_GEOM_CCCB_SIZE_LOG2 15
22 #define CTX_TRANSFER_CCCB_SIZE_LOG2 15
23 
get_xfer_ctx_state_size(struct pvr_device * pvr_dev)24 static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev)
25 {
26 	u32 num_isp_store_registers;
27 
28 	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
29 		num_isp_store_registers = 1;
30 	} else {
31 		int err;
32 
33 		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
34 		if (WARN_ON(err))
35 			return err;
36 	}
37 
38 	return sizeof(struct rogue_fwif_frag_ctx_state) +
39 	       (num_isp_store_registers *
40 		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
41 }
42 
get_frag_ctx_state_size(struct pvr_device * pvr_dev)43 static int get_frag_ctx_state_size(struct pvr_device *pvr_dev)
44 {
45 	u32 num_isp_store_registers;
46 	int err;
47 
48 	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
49 		err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers);
50 		if (WARN_ON(err))
51 			return err;
52 
53 		if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) {
54 			u32 xpu_max_slaves;
55 
56 			err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves);
57 			if (WARN_ON(err))
58 				return err;
59 
60 			num_isp_store_registers *= (1 + xpu_max_slaves);
61 		}
62 	} else {
63 		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
64 		if (WARN_ON(err))
65 			return err;
66 	}
67 
68 	return sizeof(struct rogue_fwif_frag_ctx_state) +
69 	       (num_isp_store_registers *
70 		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
71 }
72 
get_ctx_state_size(struct pvr_device * pvr_dev,enum drm_pvr_job_type type)73 static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type)
74 {
75 	switch (type) {
76 	case DRM_PVR_JOB_TYPE_GEOMETRY:
77 		return sizeof(struct rogue_fwif_geom_ctx_state);
78 	case DRM_PVR_JOB_TYPE_FRAGMENT:
79 		return get_frag_ctx_state_size(pvr_dev);
80 	case DRM_PVR_JOB_TYPE_COMPUTE:
81 		return sizeof(struct rogue_fwif_compute_ctx_state);
82 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
83 		return get_xfer_ctx_state_size(pvr_dev);
84 	}
85 
86 	WARN(1, "Invalid queue type");
87 	return -EINVAL;
88 }
89 
get_ctx_offset(enum drm_pvr_job_type type)90 static u32 get_ctx_offset(enum drm_pvr_job_type type)
91 {
92 	switch (type) {
93 	case DRM_PVR_JOB_TYPE_GEOMETRY:
94 		return offsetof(struct rogue_fwif_fwrendercontext, geom_context);
95 	case DRM_PVR_JOB_TYPE_FRAGMENT:
96 		return offsetof(struct rogue_fwif_fwrendercontext, frag_context);
97 	case DRM_PVR_JOB_TYPE_COMPUTE:
98 		return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context);
99 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
100 		return offsetof(struct rogue_fwif_fwtransfercontext, tq_context);
101 	}
102 
103 	return 0;
104 }
105 
106 static const char *
pvr_queue_fence_get_driver_name(struct dma_fence * f)107 pvr_queue_fence_get_driver_name(struct dma_fence *f)
108 {
109 	return PVR_DRIVER_NAME;
110 }
111 
pvr_queue_fence_release(struct dma_fence * f)112 static void pvr_queue_fence_release(struct dma_fence *f)
113 {
114 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
115 
116 	pvr_context_put(fence->queue->ctx);
117 	dma_fence_free(f);
118 }
119 
120 static const char *
pvr_queue_job_fence_get_timeline_name(struct dma_fence * f)121 pvr_queue_job_fence_get_timeline_name(struct dma_fence *f)
122 {
123 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
124 
125 	switch (fence->queue->type) {
126 	case DRM_PVR_JOB_TYPE_GEOMETRY:
127 		return "geometry";
128 
129 	case DRM_PVR_JOB_TYPE_FRAGMENT:
130 		return "fragment";
131 
132 	case DRM_PVR_JOB_TYPE_COMPUTE:
133 		return "compute";
134 
135 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
136 		return "transfer";
137 	}
138 
139 	WARN(1, "Invalid queue type");
140 	return "invalid";
141 }
142 
143 static const char *
pvr_queue_cccb_fence_get_timeline_name(struct dma_fence * f)144 pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f)
145 {
146 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
147 
148 	switch (fence->queue->type) {
149 	case DRM_PVR_JOB_TYPE_GEOMETRY:
150 		return "geometry-cccb";
151 
152 	case DRM_PVR_JOB_TYPE_FRAGMENT:
153 		return "fragment-cccb";
154 
155 	case DRM_PVR_JOB_TYPE_COMPUTE:
156 		return "compute-cccb";
157 
158 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
159 		return "transfer-cccb";
160 	}
161 
162 	WARN(1, "Invalid queue type");
163 	return "invalid";
164 }
165 
166 static const struct dma_fence_ops pvr_queue_job_fence_ops = {
167 	.get_driver_name = pvr_queue_fence_get_driver_name,
168 	.get_timeline_name = pvr_queue_job_fence_get_timeline_name,
169 	.release = pvr_queue_fence_release,
170 };
171 
172 /**
173  * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is
174  * backed by a UFO.
175  * @f: The dma_fence to turn into a pvr_queue_fence.
176  *
177  * Return:
178  *  * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or
179  *  * NULL otherwise.
180  */
181 static struct pvr_queue_fence *
to_pvr_queue_job_fence(struct dma_fence * f)182 to_pvr_queue_job_fence(struct dma_fence *f)
183 {
184 	struct drm_sched_fence *sched_fence = to_drm_sched_fence(f);
185 
186 	if (sched_fence)
187 		f = sched_fence->parent;
188 
189 	if (f && f->ops == &pvr_queue_job_fence_ops)
190 		return container_of(f, struct pvr_queue_fence, base);
191 
192 	return NULL;
193 }
194 
195 static const struct dma_fence_ops pvr_queue_cccb_fence_ops = {
196 	.get_driver_name = pvr_queue_fence_get_driver_name,
197 	.get_timeline_name = pvr_queue_cccb_fence_get_timeline_name,
198 	.release = pvr_queue_fence_release,
199 };
200 
201 /**
202  * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects.
203  * @f: The dma_fence object to put.
204  *
205  * If the pvr_queue_fence has been initialized, we call dma_fence_put(),
206  * otherwise we free the object with dma_fence_free(). This allows us
207  * to do the right thing before and after pvr_queue_fence_init() had been
208  * called.
209  */
pvr_queue_fence_put(struct dma_fence * f)210 static void pvr_queue_fence_put(struct dma_fence *f)
211 {
212 	if (!f)
213 		return;
214 
215 	if (WARN_ON(f->ops &&
216 		    f->ops != &pvr_queue_cccb_fence_ops &&
217 		    f->ops != &pvr_queue_job_fence_ops))
218 		return;
219 
220 	/* If the fence hasn't been initialized yet, free the object directly. */
221 	if (f->ops)
222 		dma_fence_put(f);
223 	else
224 		dma_fence_free(f);
225 }
226 
227 /**
228  * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object
229  *
230  * Call this function to allocate job CCCB and done fences. This only
231  * allocates the objects. Initialization happens when the underlying
232  * dma_fence object is to be returned to drm_sched (in prepare_job() or
233  * run_job()).
234  *
235  * Return:
236  *  * A valid pointer if the allocation succeeds, or
237  *  * NULL if the allocation fails.
238  */
239 static struct dma_fence *
pvr_queue_fence_alloc(void)240 pvr_queue_fence_alloc(void)
241 {
242 	struct pvr_queue_fence *fence;
243 
244 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
245 	if (!fence)
246 		return NULL;
247 
248 	return &fence->base;
249 }
250 
251 /**
252  * pvr_queue_fence_init() - Initializes a pvr_queue_fence object.
253  * @f: The fence to initialize
254  * @queue: The queue this fence belongs to.
255  * @fence_ops: The fence operations.
256  * @fence_ctx: The fence context.
257  *
258  * Wrapper around dma_fence_init() that takes care of initializing the
259  * pvr_queue_fence::queue field too.
260  */
261 static void
pvr_queue_fence_init(struct dma_fence * f,struct pvr_queue * queue,const struct dma_fence_ops * fence_ops,struct pvr_queue_fence_ctx * fence_ctx)262 pvr_queue_fence_init(struct dma_fence *f,
263 		     struct pvr_queue *queue,
264 		     const struct dma_fence_ops *fence_ops,
265 		     struct pvr_queue_fence_ctx *fence_ctx)
266 {
267 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
268 
269 	pvr_context_get(queue->ctx);
270 	fence->queue = queue;
271 	dma_fence_init(&fence->base, fence_ops,
272 		       &fence_ctx->lock, fence_ctx->id,
273 		       atomic_inc_return(&fence_ctx->seqno));
274 }
275 
276 /**
277  * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object.
278  * @fence: The fence to initialize.
279  * @queue: The queue this fence belongs to.
280  *
281  * Initializes a fence that can be used to wait for CCCB space.
282  *
283  * Should be called in the ::prepare_job() path, so the fence returned to
284  * drm_sched is valid.
285  */
286 static void
pvr_queue_cccb_fence_init(struct dma_fence * fence,struct pvr_queue * queue)287 pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
288 {
289 	pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops,
290 			     &queue->cccb_fence_ctx.base);
291 }
292 
293 /**
294  * pvr_queue_job_fence_init() - Initializes a job done fence object.
295  * @fence: The fence to initialize.
296  * @queue: The queue this fence belongs to.
297  *
298  * Initializes a fence that will be signaled when the GPU is done executing
299  * a job.
300  *
301  * Should be called *before* the ::run_job() path, so the fence is initialised
302  * before being placed in the pending_list.
303  */
304 static void
pvr_queue_job_fence_init(struct dma_fence * fence,struct pvr_queue * queue)305 pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
306 {
307 	pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops,
308 			     &queue->job_fence_ctx);
309 }
310 
311 /**
312  * pvr_queue_fence_ctx_init() - Queue fence context initialization.
313  * @fence_ctx: The context to initialize
314  */
315 static void
pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx * fence_ctx)316 pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx)
317 {
318 	spin_lock_init(&fence_ctx->lock);
319 	fence_ctx->id = dma_fence_context_alloc(1);
320 	atomic_set(&fence_ctx->seqno, 0);
321 }
322 
ufo_cmds_size(u32 elem_count)323 static u32 ufo_cmds_size(u32 elem_count)
324 {
325 	/* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */
326 	u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS;
327 	u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS;
328 	u32 size = full_cmd_count *
329 		   pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS *
330 						     sizeof(struct rogue_fwif_ufo));
331 
332 	if (remaining_elems) {
333 		size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems *
334 							  sizeof(struct rogue_fwif_ufo));
335 	}
336 
337 	return size;
338 }
339 
job_cmds_size(struct pvr_job * job,u32 ufo_wait_count)340 static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count)
341 {
342 	/* One UFO cmd for the fence signaling, one UFO cmd per native fence native,
343 	 * and a command for the job itself.
344 	 */
345 	return ufo_cmds_size(1) + ufo_cmds_size(ufo_wait_count) +
346 	       pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len);
347 }
348 
349 /**
350  * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies.
351  * @job: Job to operate on.
352  *
353  * Returns: Number of non-signaled native deps remaining.
354  */
job_count_remaining_native_deps(struct pvr_job * job)355 static unsigned long job_count_remaining_native_deps(struct pvr_job *job)
356 {
357 	unsigned long remaining_count = 0;
358 	struct dma_fence *fence = NULL;
359 	unsigned long index;
360 
361 	xa_for_each(&job->base.dependencies, index, fence) {
362 		struct pvr_queue_fence *jfence;
363 
364 		jfence = to_pvr_queue_job_fence(fence);
365 		if (!jfence)
366 			continue;
367 
368 		if (!dma_fence_is_signaled(&jfence->base))
369 			remaining_count++;
370 	}
371 
372 	return remaining_count;
373 }
374 
375 /**
376  * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job.
377  * @queue: The queue this job will be submitted to.
378  * @job: The job to get the CCCB fence on.
379  *
380  * The CCCB fence is a synchronization primitive allowing us to delay job
381  * submission until there's enough space in the CCCB to submit the job.
382  *
383  * Return:
384  *  * NULL if there's enough space in the CCCB to submit this job, or
385  *  * A valid dma_fence object otherwise.
386  */
387 static struct dma_fence *
pvr_queue_get_job_cccb_fence(struct pvr_queue * queue,struct pvr_job * job)388 pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job)
389 {
390 	struct pvr_queue_fence *cccb_fence;
391 	unsigned int native_deps_remaining;
392 
393 	/* If the fence is NULL, that means we already checked that we had
394 	 * enough space in the cccb for our job.
395 	 */
396 	if (!job->cccb_fence)
397 		return NULL;
398 
399 	mutex_lock(&queue->cccb_fence_ctx.job_lock);
400 
401 	/* Count remaining native dependencies and check if the job fits in the CCCB. */
402 	native_deps_remaining = job_count_remaining_native_deps(job);
403 	if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
404 		pvr_queue_fence_put(job->cccb_fence);
405 		job->cccb_fence = NULL;
406 		goto out_unlock;
407 	}
408 
409 	/* There should be no job attached to the CCCB fence context:
410 	 * drm_sched_entity guarantees that jobs are submitted one at a time.
411 	 */
412 	if (WARN_ON(queue->cccb_fence_ctx.job))
413 		pvr_job_put(queue->cccb_fence_ctx.job);
414 
415 	queue->cccb_fence_ctx.job = pvr_job_get(job);
416 
417 	/* Initialize the fence before returning it. */
418 	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
419 	if (!WARN_ON(cccb_fence->queue))
420 		pvr_queue_cccb_fence_init(job->cccb_fence, queue);
421 
422 out_unlock:
423 	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
424 
425 	return dma_fence_get(job->cccb_fence);
426 }
427 
428 /**
429  * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job.
430  * @queue: The queue this job will be submitted to.
431  * @job: The job to get the KCCB fence on.
432  *
433  * The KCCB fence is a synchronization primitive allowing us to delay job
434  * submission until there's enough space in the KCCB to submit the job.
435  *
436  * Return:
437  *  * NULL if there's enough space in the KCCB to submit this job, or
438  *  * A valid dma_fence object otherwise.
439  */
440 static struct dma_fence *
pvr_queue_get_job_kccb_fence(struct pvr_queue * queue,struct pvr_job * job)441 pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job)
442 {
443 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
444 	struct dma_fence *kccb_fence = NULL;
445 
446 	/* If the fence is NULL, that means we already checked that we had
447 	 * enough space in the KCCB for our job.
448 	 */
449 	if (!job->kccb_fence)
450 		return NULL;
451 
452 	if (!WARN_ON(job->kccb_fence->ops)) {
453 		kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence);
454 		job->kccb_fence = NULL;
455 	}
456 
457 	return kccb_fence;
458 }
459 
460 static struct dma_fence *
pvr_queue_get_paired_frag_job_dep(struct pvr_queue * queue,struct pvr_job * job)461 pvr_queue_get_paired_frag_job_dep(struct pvr_queue *queue, struct pvr_job *job)
462 {
463 	struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ?
464 				   job->paired_job : NULL;
465 	struct dma_fence *f;
466 	unsigned long index;
467 
468 	if (!frag_job)
469 		return NULL;
470 
471 	xa_for_each(&frag_job->base.dependencies, index, f) {
472 		/* Skip already signaled fences. */
473 		if (dma_fence_is_signaled(f))
474 			continue;
475 
476 		/* Skip our own fence. */
477 		if (f == &job->base.s_fence->scheduled)
478 			continue;
479 
480 		return dma_fence_get(f);
481 	}
482 
483 	return frag_job->base.sched->ops->prepare_job(&frag_job->base, &queue->entity);
484 }
485 
486 /**
487  * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence.
488  * @sched_job: The job to query the next internal dependency on
489  * @s_entity: The entity this job is queue on.
490  *
491  * After iterating over drm_sched_job::dependencies, drm_sched let the driver return
492  * its own internal dependencies. We use this function to return our internal dependencies.
493  */
494 static struct dma_fence *
pvr_queue_prepare_job(struct drm_sched_job * sched_job,struct drm_sched_entity * s_entity)495 pvr_queue_prepare_job(struct drm_sched_job *sched_job,
496 		      struct drm_sched_entity *s_entity)
497 {
498 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
499 	struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity);
500 	struct dma_fence *internal_dep = NULL;
501 
502 	/*
503 	 * Initialize the done_fence, so we can signal it. This must be done
504 	 * here because otherwise by the time of run_job() the job will end up
505 	 * in the pending list without a valid fence.
506 	 */
507 	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
508 		/*
509 		 * This will be called on a paired fragment job after being
510 		 * submitted to firmware. We can tell if this is the case and
511 		 * bail early from whether run_job() has been called on the
512 		 * geometry job, which would issue a pm ref.
513 		 */
514 		if (job->paired_job->has_pm_ref)
515 			return NULL;
516 
517 		/*
518 		 * In this case we need to use the job's own ctx to initialise
519 		 * the done_fence.  The other steps are done in the ctx of the
520 		 * paired geometry job.
521 		 */
522 		pvr_queue_job_fence_init(job->done_fence,
523 					 job->ctx->queues.fragment);
524 	} else {
525 		pvr_queue_job_fence_init(job->done_fence, queue);
526 	}
527 
528 	/* CCCB fence is used to make sure we have enough space in the CCCB to
529 	 * submit our commands.
530 	 */
531 	internal_dep = pvr_queue_get_job_cccb_fence(queue, job);
532 
533 	/* KCCB fence is used to make sure we have a KCCB slot to queue our
534 	 * CMD_KICK.
535 	 */
536 	if (!internal_dep)
537 		internal_dep = pvr_queue_get_job_kccb_fence(queue, job);
538 
539 	/* Any extra internal dependency should be added here, using the following
540 	 * pattern:
541 	 *
542 	 *	if (!internal_dep)
543 	 *		internal_dep = pvr_queue_get_job_xxxx_fence(queue, job);
544 	 */
545 
546 	/* The paired job fence should come last, when everything else is ready. */
547 	if (!internal_dep)
548 		internal_dep = pvr_queue_get_paired_frag_job_dep(queue, job);
549 
550 	return internal_dep;
551 }
552 
553 /**
554  * pvr_queue_update_active_state_locked() - Update the queue active state.
555  * @queue: Queue to update the state on.
556  *
557  * Locked version of pvr_queue_update_active_state(). Must be called with
558  * pvr_device::queue::lock held.
559  */
pvr_queue_update_active_state_locked(struct pvr_queue * queue)560 static void pvr_queue_update_active_state_locked(struct pvr_queue *queue)
561 {
562 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
563 
564 	lockdep_assert_held(&pvr_dev->queues.lock);
565 
566 	/* The queue is temporary out of any list when it's being reset,
567 	 * we don't want a call to pvr_queue_update_active_state_locked()
568 	 * to re-insert it behind our back.
569 	 */
570 	if (list_empty(&queue->node))
571 		return;
572 
573 	if (!atomic_read(&queue->in_flight_job_count))
574 		list_move_tail(&queue->node, &pvr_dev->queues.idle);
575 	else
576 		list_move_tail(&queue->node, &pvr_dev->queues.active);
577 }
578 
579 /**
580  * pvr_queue_update_active_state() - Update the queue active state.
581  * @queue: Queue to update the state on.
582  *
583  * Active state is based on the in_flight_job_count value.
584  *
585  * Updating the active state implies moving the queue in or out of the
586  * active queue list, which also defines whether the queue is checked
587  * or not when a FW event is received.
588  *
589  * This function should be called any time a job is submitted or it done
590  * fence is signaled.
591  */
pvr_queue_update_active_state(struct pvr_queue * queue)592 static void pvr_queue_update_active_state(struct pvr_queue *queue)
593 {
594 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
595 
596 	mutex_lock(&pvr_dev->queues.lock);
597 	pvr_queue_update_active_state_locked(queue);
598 	mutex_unlock(&pvr_dev->queues.lock);
599 }
600 
pvr_queue_submit_job_to_cccb(struct pvr_job * job)601 static void pvr_queue_submit_job_to_cccb(struct pvr_job *job)
602 {
603 	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
604 	struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS];
605 	struct pvr_cccb *cccb = &queue->cccb;
606 	struct pvr_queue_fence *jfence;
607 	struct dma_fence *fence;
608 	unsigned long index;
609 	u32 ufo_count = 0;
610 
611 	/* We need to add the queue to the active list before updating the CCCB,
612 	 * otherwise we might miss the FW event informing us that something
613 	 * happened on this queue.
614 	 */
615 	atomic_inc(&queue->in_flight_job_count);
616 	pvr_queue_update_active_state(queue);
617 
618 	xa_for_each(&job->base.dependencies, index, fence) {
619 		jfence = to_pvr_queue_job_fence(fence);
620 		if (!jfence)
621 			continue;
622 
623 		/* Skip the partial render fence, we will place it at the end. */
624 		if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job &&
625 		    &job->paired_job->base.s_fence->scheduled == fence)
626 			continue;
627 
628 		if (dma_fence_is_signaled(&jfence->base))
629 			continue;
630 
631 		pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
632 					  &ufos[ufo_count].addr);
633 		ufos[ufo_count++].value = jfence->base.seqno;
634 
635 		if (ufo_count == ARRAY_SIZE(ufos)) {
636 			pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
637 							   sizeof(ufos), ufos, 0, 0);
638 			ufo_count = 0;
639 		}
640 	}
641 
642 	/* Partial render fence goes last. */
643 	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
644 		jfence = to_pvr_queue_job_fence(job->paired_job->done_fence);
645 		if (!WARN_ON(!jfence)) {
646 			pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
647 						  &ufos[ufo_count].addr);
648 			ufos[ufo_count++].value = job->paired_job->done_fence->seqno;
649 		}
650 	}
651 
652 	if (ufo_count) {
653 		pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
654 						   sizeof(ufos[0]) * ufo_count, ufos, 0, 0);
655 	}
656 
657 	if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) {
658 		struct rogue_fwif_cmd_geom *cmd = job->cmd;
659 
660 		/* Reference value for the partial render test is the current queue fence
661 		 * seqno minus one.
662 		 */
663 		pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj,
664 					  &cmd->partial_render_geom_frag_fence.addr);
665 		cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1;
666 	}
667 
668 	/* Submit job to FW */
669 	pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd,
670 					   job->id, job->id);
671 
672 	/* Signal the job fence. */
673 	pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr);
674 	ufos[0].value = job->done_fence->seqno;
675 	pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE,
676 					   sizeof(ufos[0]), ufos, 0, 0);
677 }
678 
679 /**
680  * pvr_queue_run_job() - Submit a job to the FW.
681  * @sched_job: The job to submit.
682  *
683  * This function is called when all non-native dependencies have been met and
684  * when the commands resulting from this job are guaranteed to fit in the CCCB.
685  */
pvr_queue_run_job(struct drm_sched_job * sched_job)686 static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job)
687 {
688 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
689 	struct pvr_device *pvr_dev = job->pvr_dev;
690 	int err;
691 
692 	/* The fragment job is issued along the geometry job when we use combined
693 	 * geom+frag kicks. When we get there, we should simply return the
694 	 * done_fence that's been initialized earlier.
695 	 */
696 	if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT &&
697 	    job->done_fence->ops) {
698 		return dma_fence_get(job->done_fence);
699 	}
700 
701 	/* The only kind of jobs that can be paired are geometry and fragment, and
702 	 * we bail out early if we see a fragment job that's paired with a geomtry
703 	 * job.
704 	 * Paired jobs must also target the same context and point to the same
705 	 * HWRT.
706 	 */
707 	if (WARN_ON(job->paired_job &&
708 		    (job->type != DRM_PVR_JOB_TYPE_GEOMETRY ||
709 		     job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT ||
710 		     job->hwrt != job->paired_job->hwrt ||
711 		     job->ctx != job->paired_job->ctx)))
712 		return ERR_PTR(-EINVAL);
713 
714 	err = pvr_job_get_pm_ref(job);
715 	if (WARN_ON(err))
716 		return ERR_PTR(err);
717 
718 	if (job->paired_job) {
719 		err = pvr_job_get_pm_ref(job->paired_job);
720 		if (WARN_ON(err))
721 			return ERR_PTR(err);
722 	}
723 
724 	/* Submit our job to the CCCB */
725 	pvr_queue_submit_job_to_cccb(job);
726 
727 	if (job->paired_job) {
728 		struct pvr_job *geom_job = job;
729 		struct pvr_job *frag_job = job->paired_job;
730 		struct pvr_queue *geom_queue = job->ctx->queues.geometry;
731 		struct pvr_queue *frag_queue = job->ctx->queues.fragment;
732 
733 		/* Submit the fragment job along the geometry job and send a combined kick. */
734 		pvr_queue_submit_job_to_cccb(frag_job);
735 		pvr_cccb_send_kccb_combined_kick(pvr_dev,
736 						 &geom_queue->cccb, &frag_queue->cccb,
737 						 pvr_context_get_fw_addr(geom_job->ctx) +
738 						 geom_queue->ctx_offset,
739 						 pvr_context_get_fw_addr(frag_job->ctx) +
740 						 frag_queue->ctx_offset,
741 						 job->hwrt,
742 						 frag_job->fw_ccb_cmd_type ==
743 						 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR);
744 	} else {
745 		struct pvr_queue *queue = container_of(job->base.sched,
746 						       struct pvr_queue, scheduler);
747 
748 		pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb,
749 					pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset,
750 					job->hwrt);
751 	}
752 
753 	return dma_fence_get(job->done_fence);
754 }
755 
pvr_queue_stop(struct pvr_queue * queue,struct pvr_job * bad_job)756 static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job)
757 {
758 	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
759 }
760 
pvr_queue_start(struct pvr_queue * queue)761 static void pvr_queue_start(struct pvr_queue *queue)
762 {
763 	struct pvr_job *job;
764 
765 	/* Make sure we CPU-signal the UFO object, so other queues don't get
766 	 * blocked waiting on it.
767 	 */
768 	*queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno);
769 
770 	list_for_each_entry(job, &queue->scheduler.pending_list, base.list) {
771 		if (dma_fence_is_signaled(job->done_fence)) {
772 			/* Jobs might have completed after drm_sched_stop() was called.
773 			 * In that case, re-assign the parent field to the done_fence.
774 			 */
775 			WARN_ON(job->base.s_fence->parent);
776 			job->base.s_fence->parent = dma_fence_get(job->done_fence);
777 		} else {
778 			/* If we had unfinished jobs, flag the entity as guilty so no
779 			 * new job can be submitted.
780 			 */
781 			atomic_set(&queue->ctx->faulty, 1);
782 		}
783 	}
784 
785 	drm_sched_start(&queue->scheduler);
786 }
787 
788 /**
789  * pvr_queue_timedout_job() - Handle a job timeout event.
790  * @s_job: The job this timeout occurred on.
791  *
792  * FIXME: We don't do anything here to unblock the situation, we just stop+start
793  * the scheduler, and re-assign parent fences in the middle.
794  *
795  * Return:
796  *  * DRM_GPU_SCHED_STAT_NOMINAL.
797  */
798 static enum drm_gpu_sched_stat
pvr_queue_timedout_job(struct drm_sched_job * s_job)799 pvr_queue_timedout_job(struct drm_sched_job *s_job)
800 {
801 	struct drm_gpu_scheduler *sched = s_job->sched;
802 	struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler);
803 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
804 	struct pvr_job *job;
805 	u32 job_count = 0;
806 
807 	dev_err(sched->dev, "Job timeout\n");
808 
809 	/* Before we stop the scheduler, make sure the queue is out of any list, so
810 	 * any call to pvr_queue_update_active_state_locked() that might happen
811 	 * until the scheduler is really stopped doesn't end up re-inserting the
812 	 * queue in the active list. This would cause
813 	 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each
814 	 * other when accessing the pending_list, since drm_sched_stop() doesn't
815 	 * grab the job_list_lock when modifying the list (it's assuming the
816 	 * only other accessor is the scheduler, and it's safe to not grab the
817 	 * lock since it's stopped).
818 	 */
819 	mutex_lock(&pvr_dev->queues.lock);
820 	list_del_init(&queue->node);
821 	mutex_unlock(&pvr_dev->queues.lock);
822 
823 	drm_sched_stop(sched, s_job);
824 
825 	/* Re-assign job parent fences. */
826 	list_for_each_entry(job, &sched->pending_list, base.list) {
827 		job->base.s_fence->parent = dma_fence_get(job->done_fence);
828 		job_count++;
829 	}
830 	WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count);
831 
832 	/* Re-insert the queue in the proper list, and kick a queue processing
833 	 * operation if there were jobs pending.
834 	 */
835 	mutex_lock(&pvr_dev->queues.lock);
836 	if (!job_count) {
837 		list_move_tail(&queue->node, &pvr_dev->queues.idle);
838 	} else {
839 		atomic_set(&queue->in_flight_job_count, job_count);
840 		list_move_tail(&queue->node, &pvr_dev->queues.active);
841 		pvr_queue_process(queue);
842 	}
843 	mutex_unlock(&pvr_dev->queues.lock);
844 
845 	drm_sched_start(sched);
846 
847 	return DRM_GPU_SCHED_STAT_NOMINAL;
848 }
849 
850 /**
851  * pvr_queue_free_job() - Release the reference the scheduler had on a job object.
852  * @sched_job: Job object to free.
853  */
pvr_queue_free_job(struct drm_sched_job * sched_job)854 static void pvr_queue_free_job(struct drm_sched_job *sched_job)
855 {
856 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
857 
858 	drm_sched_job_cleanup(sched_job);
859 	job->paired_job = NULL;
860 	pvr_job_put(job);
861 }
862 
863 static const struct drm_sched_backend_ops pvr_queue_sched_ops = {
864 	.prepare_job = pvr_queue_prepare_job,
865 	.run_job = pvr_queue_run_job,
866 	.timedout_job = pvr_queue_timedout_job,
867 	.free_job = pvr_queue_free_job,
868 };
869 
870 /**
871  * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO object
872  * @f: Fence to test.
873  *
874  * A UFO-backed fence is a fence that can be signaled or waited upon FW-side.
875  * pvr_job::done_fence objects are backed by the timeline UFO attached to the queue
876  * they are pushed to, but those fences are not directly exposed to the outside
877  * world, so we also need to check if the fence we're being passed is a
878  * drm_sched_fence that was coming from our driver.
879  */
pvr_queue_fence_is_ufo_backed(struct dma_fence * f)880 bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f)
881 {
882 	struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL;
883 
884 	if (sched_fence &&
885 	    sched_fence->sched->ops == &pvr_queue_sched_ops)
886 		return true;
887 
888 	if (f && f->ops == &pvr_queue_job_fence_ops)
889 		return true;
890 
891 	return false;
892 }
893 
894 /**
895  * pvr_queue_signal_done_fences() - Signal done fences.
896  * @queue: Queue to check.
897  *
898  * Signal done fences of jobs whose seqno is less than the current value of
899  * the UFO object attached to the queue.
900  */
901 static void
pvr_queue_signal_done_fences(struct pvr_queue * queue)902 pvr_queue_signal_done_fences(struct pvr_queue *queue)
903 {
904 	struct pvr_job *job, *tmp_job;
905 	u32 cur_seqno;
906 
907 	spin_lock(&queue->scheduler.job_list_lock);
908 	cur_seqno = *queue->timeline_ufo.value;
909 	list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) {
910 		if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0)
911 			break;
912 
913 		if (!dma_fence_is_signaled(job->done_fence)) {
914 			dma_fence_signal(job->done_fence);
915 			pvr_job_release_pm_ref(job);
916 			atomic_dec(&queue->in_flight_job_count);
917 		}
918 	}
919 	spin_unlock(&queue->scheduler.job_list_lock);
920 }
921 
922 /**
923  * pvr_queue_check_job_waiting_for_cccb_space() - Check if the job waiting for CCCB space
924  * can be unblocked
925  * pushed to the CCCB
926  * @queue: Queue to check
927  *
928  * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal
929  * its CCCB fence, which should kick drm_sched.
930  */
931 static void
pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue * queue)932 pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue)
933 {
934 	struct pvr_queue_fence *cccb_fence;
935 	u32 native_deps_remaining;
936 	struct pvr_job *job;
937 
938 	mutex_lock(&queue->cccb_fence_ctx.job_lock);
939 	job = queue->cccb_fence_ctx.job;
940 	if (!job)
941 		goto out_unlock;
942 
943 	/* If we have a job attached to the CCCB fence context, its CCCB fence
944 	 * shouldn't be NULL.
945 	 */
946 	if (WARN_ON(!job->cccb_fence)) {
947 		job = NULL;
948 		goto out_unlock;
949 	}
950 
951 	/* If we get there, CCCB fence has to be initialized. */
952 	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
953 	if (WARN_ON(!cccb_fence->queue)) {
954 		job = NULL;
955 		goto out_unlock;
956 	}
957 
958 	/* Evict signaled dependencies before checking for CCCB space.
959 	 * If the job fits, signal the CCCB fence, this should unblock
960 	 * the drm_sched_entity.
961 	 */
962 	native_deps_remaining = job_count_remaining_native_deps(job);
963 	if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
964 		job = NULL;
965 		goto out_unlock;
966 	}
967 
968 	dma_fence_signal(job->cccb_fence);
969 	pvr_queue_fence_put(job->cccb_fence);
970 	job->cccb_fence = NULL;
971 	queue->cccb_fence_ctx.job = NULL;
972 
973 out_unlock:
974 	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
975 
976 	pvr_job_put(job);
977 }
978 
979 /**
980  * pvr_queue_process() - Process events that happened on a queue.
981  * @queue: Queue to check
982  *
983  * Signal job fences and check if jobs waiting for CCCB space can be unblocked.
984  */
pvr_queue_process(struct pvr_queue * queue)985 void pvr_queue_process(struct pvr_queue *queue)
986 {
987 	lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock);
988 
989 	pvr_queue_check_job_waiting_for_cccb_space(queue);
990 	pvr_queue_signal_done_fences(queue);
991 	pvr_queue_update_active_state_locked(queue);
992 }
993 
get_dm_type(struct pvr_queue * queue)994 static u32 get_dm_type(struct pvr_queue *queue)
995 {
996 	switch (queue->type) {
997 	case DRM_PVR_JOB_TYPE_GEOMETRY:
998 		return PVR_FWIF_DM_GEOM;
999 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
1000 	case DRM_PVR_JOB_TYPE_FRAGMENT:
1001 		return PVR_FWIF_DM_FRAG;
1002 	case DRM_PVR_JOB_TYPE_COMPUTE:
1003 		return PVR_FWIF_DM_CDM;
1004 	}
1005 
1006 	return ~0;
1007 }
1008 
1009 /**
1010  * init_fw_context() - Initializes the queue part of a FW context.
1011  * @queue: Queue object to initialize the FW context for.
1012  * @fw_ctx_map: The FW context CPU mapping.
1013  *
1014  * FW contexts are containing various states, one of them being a per-queue state
1015  * that needs to be initialized for each queue being exposed by a context. This
1016  * function takes care of that.
1017  */
init_fw_context(struct pvr_queue * queue,void * fw_ctx_map)1018 static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map)
1019 {
1020 	struct pvr_context *ctx = queue->ctx;
1021 	struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx);
1022 	struct rogue_fwif_fwcommoncontext *cctx_fw;
1023 	struct pvr_cccb *cccb = &queue->cccb;
1024 
1025 	cctx_fw = fw_ctx_map + queue->ctx_offset;
1026 	cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr;
1027 	cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr;
1028 
1029 	cctx_fw->dm = get_dm_type(queue);
1030 	cctx_fw->priority = ctx->priority;
1031 	cctx_fw->priority_seq_num = 0;
1032 	cctx_fw->max_deadline_ms = MAX_DEADLINE_MS;
1033 	cctx_fw->pid = task_tgid_nr(current);
1034 	cctx_fw->server_common_context_id = ctx->ctx_id;
1035 
1036 	pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr);
1037 
1038 	pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr);
1039 }
1040 
1041 /**
1042  * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up.
1043  * @queue: Queue on FW context to clean up.
1044  *
1045  * Return:
1046  *  * 0 on success,
1047  *  * Any error returned by pvr_fw_structure_cleanup() otherwise.
1048  */
pvr_queue_cleanup_fw_context(struct pvr_queue * queue)1049 static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue)
1050 {
1051 	if (!queue->ctx->fw_obj)
1052 		return 0;
1053 
1054 	return pvr_fw_structure_cleanup(queue->ctx->pvr_dev,
1055 					ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT,
1056 					queue->ctx->fw_obj, queue->ctx_offset);
1057 }
1058 
1059 /**
1060  * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object.
1061  * @job: The job to initialize.
1062  *
1063  * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm()
1064  * and pvr_queue_job_push() can't fail. We also make sure the context type is
1065  * valid and the job can fit in the CCCB.
1066  *
1067  * Return:
1068  *  * 0 on success, or
1069  *  * An error code if something failed.
1070  */
pvr_queue_job_init(struct pvr_job * job)1071 int pvr_queue_job_init(struct pvr_job *job)
1072 {
1073 	/* Fragment jobs need at least one native fence wait on the geometry job fence. */
1074 	u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0;
1075 	struct pvr_queue *queue;
1076 	int err;
1077 
1078 	if (atomic_read(&job->ctx->faulty))
1079 		return -EIO;
1080 
1081 	queue = pvr_context_get_queue_for_job(job->ctx, job->type);
1082 	if (!queue)
1083 		return -EINVAL;
1084 
1085 	if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count)))
1086 		return -E2BIG;
1087 
1088 	err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE);
1089 	if (err)
1090 		return err;
1091 
1092 	job->cccb_fence = pvr_queue_fence_alloc();
1093 	job->kccb_fence = pvr_kccb_fence_alloc();
1094 	job->done_fence = pvr_queue_fence_alloc();
1095 	if (!job->cccb_fence || !job->kccb_fence || !job->done_fence)
1096 		return -ENOMEM;
1097 
1098 	return 0;
1099 }
1100 
1101 /**
1102  * pvr_queue_job_arm() - Arm a job object.
1103  * @job: The job to arm.
1104  *
1105  * Initializes fences and return the drm_sched finished fence so it can
1106  * be exposed to the outside world. Once this function is called, you should
1107  * make sure the job is pushed using pvr_queue_job_push(), or guarantee that
1108  * no one grabbed a reference to the returned fence. The latter can happen if
1109  * we do multi-job submission, and something failed when creating/initializing
1110  * a job. In that case, we know the fence didn't leave the driver, and we
1111  * can thus guarantee nobody will wait on an dead fence object.
1112  *
1113  * Return:
1114  *  * A dma_fence object.
1115  */
pvr_queue_job_arm(struct pvr_job * job)1116 struct dma_fence *pvr_queue_job_arm(struct pvr_job *job)
1117 {
1118 	drm_sched_job_arm(&job->base);
1119 
1120 	return &job->base.s_fence->finished;
1121 }
1122 
1123 /**
1124  * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object.
1125  * @job: The job to cleanup.
1126  *
1127  * Should be called in the job release path.
1128  */
pvr_queue_job_cleanup(struct pvr_job * job)1129 void pvr_queue_job_cleanup(struct pvr_job *job)
1130 {
1131 	pvr_queue_fence_put(job->done_fence);
1132 	pvr_queue_fence_put(job->cccb_fence);
1133 	pvr_kccb_fence_put(job->kccb_fence);
1134 
1135 	if (job->base.s_fence)
1136 		drm_sched_job_cleanup(&job->base);
1137 }
1138 
1139 /**
1140  * pvr_queue_job_push() - Push a job to its queue.
1141  * @job: The job to push.
1142  *
1143  * Must be called after pvr_queue_job_init() and after all dependencies
1144  * have been added to the job. This will effectively queue the job to
1145  * the drm_sched_entity attached to the queue. We grab a reference on
1146  * the job object, so the caller is free to drop its reference when it's
1147  * done accessing the job object.
1148  */
pvr_queue_job_push(struct pvr_job * job)1149 void pvr_queue_job_push(struct pvr_job *job)
1150 {
1151 	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
1152 
1153 	/* Keep track of the last queued job scheduled fence for combined submit. */
1154 	dma_fence_put(queue->last_queued_job_scheduled_fence);
1155 	queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled);
1156 
1157 	pvr_job_get(job);
1158 	drm_sched_entity_push_job(&job->base);
1159 }
1160 
reg_state_init(void * cpu_ptr,void * priv)1161 static void reg_state_init(void *cpu_ptr, void *priv)
1162 {
1163 	struct pvr_queue *queue = priv;
1164 
1165 	if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) {
1166 		struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr;
1167 
1168 		geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init =
1169 			queue->callstack_addr;
1170 	}
1171 }
1172 
1173 /**
1174  * pvr_queue_create() - Create a queue object.
1175  * @ctx: The context this queue will be attached to.
1176  * @type: The type of jobs being pushed to this queue.
1177  * @args: The arguments passed to the context creation function.
1178  * @fw_ctx_map: CPU mapping of the FW context object.
1179  *
1180  * Create a queue object that will be used to queue and track jobs.
1181  *
1182  * Return:
1183  *  * A valid pointer to a pvr_queue object, or
1184  *  * An error pointer if the creation/initialization failed.
1185  */
pvr_queue_create(struct pvr_context * ctx,enum drm_pvr_job_type type,struct drm_pvr_ioctl_create_context_args * args,void * fw_ctx_map)1186 struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
1187 				   enum drm_pvr_job_type type,
1188 				   struct drm_pvr_ioctl_create_context_args *args,
1189 				   void *fw_ctx_map)
1190 {
1191 	static const struct {
1192 		u32 cccb_size;
1193 		const char *name;
1194 	} props[] = {
1195 		[DRM_PVR_JOB_TYPE_GEOMETRY] = {
1196 			.cccb_size = CTX_GEOM_CCCB_SIZE_LOG2,
1197 			.name = "geometry",
1198 		},
1199 		[DRM_PVR_JOB_TYPE_FRAGMENT] = {
1200 			.cccb_size = CTX_FRAG_CCCB_SIZE_LOG2,
1201 			.name = "fragment"
1202 		},
1203 		[DRM_PVR_JOB_TYPE_COMPUTE] = {
1204 			.cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2,
1205 			.name = "compute"
1206 		},
1207 		[DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = {
1208 			.cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2,
1209 			.name = "transfer_frag"
1210 		},
1211 	};
1212 	struct pvr_device *pvr_dev = ctx->pvr_dev;
1213 	struct drm_gpu_scheduler *sched;
1214 	struct pvr_queue *queue;
1215 	int ctx_state_size, err;
1216 	void *cpu_map;
1217 
1218 	if (WARN_ON(type >= sizeof(props)))
1219 		return ERR_PTR(-EINVAL);
1220 
1221 	switch (ctx->type) {
1222 	case DRM_PVR_CTX_TYPE_RENDER:
1223 		if (type != DRM_PVR_JOB_TYPE_GEOMETRY &&
1224 		    type != DRM_PVR_JOB_TYPE_FRAGMENT)
1225 			return ERR_PTR(-EINVAL);
1226 		break;
1227 	case DRM_PVR_CTX_TYPE_COMPUTE:
1228 		if (type != DRM_PVR_JOB_TYPE_COMPUTE)
1229 			return ERR_PTR(-EINVAL);
1230 		break;
1231 	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
1232 		if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG)
1233 			return ERR_PTR(-EINVAL);
1234 		break;
1235 	default:
1236 		return ERR_PTR(-EINVAL);
1237 	}
1238 
1239 	ctx_state_size = get_ctx_state_size(pvr_dev, type);
1240 	if (ctx_state_size < 0)
1241 		return ERR_PTR(ctx_state_size);
1242 
1243 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
1244 	if (!queue)
1245 		return ERR_PTR(-ENOMEM);
1246 
1247 	queue->type = type;
1248 	queue->ctx_offset = get_ctx_offset(type);
1249 	queue->ctx = ctx;
1250 	queue->callstack_addr = args->callstack_addr;
1251 	sched = &queue->scheduler;
1252 	INIT_LIST_HEAD(&queue->node);
1253 	mutex_init(&queue->cccb_fence_ctx.job_lock);
1254 	pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base);
1255 	pvr_queue_fence_ctx_init(&queue->job_fence_ctx);
1256 
1257 	err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name);
1258 	if (err)
1259 		goto err_free_queue;
1260 
1261 	err = pvr_fw_object_create(pvr_dev, ctx_state_size,
1262 				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1263 				   reg_state_init, queue, &queue->reg_state_obj);
1264 	if (err)
1265 		goto err_cccb_fini;
1266 
1267 	init_fw_context(queue, fw_ctx_map);
1268 
1269 	if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT &&
1270 	    args->callstack_addr) {
1271 		err = -EINVAL;
1272 		goto err_release_reg_state;
1273 	}
1274 
1275 	cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value),
1276 					       PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1277 					       NULL, NULL, &queue->timeline_ufo.fw_obj);
1278 	if (IS_ERR(cpu_map)) {
1279 		err = PTR_ERR(cpu_map);
1280 		goto err_release_reg_state;
1281 	}
1282 
1283 	queue->timeline_ufo.value = cpu_map;
1284 
1285 	err = drm_sched_init(&queue->scheduler,
1286 			     &pvr_queue_sched_ops,
1287 			     pvr_dev->sched_wq, 1, 64 * 1024, 1,
1288 			     msecs_to_jiffies(500),
1289 			     pvr_dev->sched_wq, NULL, "pvr-queue",
1290 			     pvr_dev->base.dev);
1291 	if (err)
1292 		goto err_release_ufo;
1293 
1294 	err = drm_sched_entity_init(&queue->entity,
1295 				    DRM_SCHED_PRIORITY_KERNEL,
1296 				    &sched, 1, &ctx->faulty);
1297 	if (err)
1298 		goto err_sched_fini;
1299 
1300 	mutex_lock(&pvr_dev->queues.lock);
1301 	list_add_tail(&queue->node, &pvr_dev->queues.idle);
1302 	mutex_unlock(&pvr_dev->queues.lock);
1303 
1304 	return queue;
1305 
1306 err_sched_fini:
1307 	drm_sched_fini(&queue->scheduler);
1308 
1309 err_release_ufo:
1310 	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1311 
1312 err_release_reg_state:
1313 	pvr_fw_object_destroy(queue->reg_state_obj);
1314 
1315 err_cccb_fini:
1316 	pvr_cccb_fini(&queue->cccb);
1317 
1318 err_free_queue:
1319 	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1320 	kfree(queue);
1321 
1322 	return ERR_PTR(err);
1323 }
1324 
pvr_queue_device_pre_reset(struct pvr_device * pvr_dev)1325 void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev)
1326 {
1327 	struct pvr_queue *queue;
1328 
1329 	mutex_lock(&pvr_dev->queues.lock);
1330 	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1331 		pvr_queue_stop(queue, NULL);
1332 	list_for_each_entry(queue, &pvr_dev->queues.active, node)
1333 		pvr_queue_stop(queue, NULL);
1334 	mutex_unlock(&pvr_dev->queues.lock);
1335 }
1336 
pvr_queue_device_post_reset(struct pvr_device * pvr_dev)1337 void pvr_queue_device_post_reset(struct pvr_device *pvr_dev)
1338 {
1339 	struct pvr_queue *queue;
1340 
1341 	mutex_lock(&pvr_dev->queues.lock);
1342 	list_for_each_entry(queue, &pvr_dev->queues.active, node)
1343 		pvr_queue_start(queue);
1344 	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1345 		pvr_queue_start(queue);
1346 	mutex_unlock(&pvr_dev->queues.lock);
1347 }
1348 
1349 /**
1350  * pvr_queue_kill() - Kill a queue.
1351  * @queue: The queue to kill.
1352  *
1353  * Kill the queue so no new jobs can be pushed. Should be called when the
1354  * context handle is destroyed. The queue object might last longer if jobs
1355  * are still in flight and holding a reference to the context this queue
1356  * belongs to.
1357  */
pvr_queue_kill(struct pvr_queue * queue)1358 void pvr_queue_kill(struct pvr_queue *queue)
1359 {
1360 	drm_sched_entity_destroy(&queue->entity);
1361 	dma_fence_put(queue->last_queued_job_scheduled_fence);
1362 	queue->last_queued_job_scheduled_fence = NULL;
1363 }
1364 
1365 /**
1366  * pvr_queue_destroy() - Destroy a queue.
1367  * @queue: The queue to destroy.
1368  *
1369  * Cleanup the queue and free the resources attached to it. Should be
1370  * called from the context release function.
1371  */
pvr_queue_destroy(struct pvr_queue * queue)1372 void pvr_queue_destroy(struct pvr_queue *queue)
1373 {
1374 	if (!queue)
1375 		return;
1376 
1377 	mutex_lock(&queue->ctx->pvr_dev->queues.lock);
1378 	list_del_init(&queue->node);
1379 	mutex_unlock(&queue->ctx->pvr_dev->queues.lock);
1380 
1381 	drm_sched_fini(&queue->scheduler);
1382 	drm_sched_entity_fini(&queue->entity);
1383 
1384 	if (WARN_ON(queue->last_queued_job_scheduled_fence))
1385 		dma_fence_put(queue->last_queued_job_scheduled_fence);
1386 
1387 	pvr_queue_cleanup_fw_context(queue);
1388 
1389 	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1390 	pvr_fw_object_destroy(queue->reg_state_obj);
1391 	pvr_cccb_fini(&queue->cccb);
1392 	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1393 	kfree(queue);
1394 }
1395 
1396 /**
1397  * pvr_queue_device_init() - Device-level initialization of queue related fields.
1398  * @pvr_dev: The device to initialize.
1399  *
1400  * Initializes all fields related to queue management in pvr_device.
1401  *
1402  * Return:
1403  *  * 0 on success, or
1404  *  * An error code on failure.
1405  */
pvr_queue_device_init(struct pvr_device * pvr_dev)1406 int pvr_queue_device_init(struct pvr_device *pvr_dev)
1407 {
1408 	int err;
1409 
1410 	INIT_LIST_HEAD(&pvr_dev->queues.active);
1411 	INIT_LIST_HEAD(&pvr_dev->queues.idle);
1412 	err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock);
1413 	if (err)
1414 		return err;
1415 
1416 	pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0);
1417 	if (!pvr_dev->sched_wq)
1418 		return -ENOMEM;
1419 
1420 	return 0;
1421 }
1422 
1423 /**
1424  * pvr_queue_device_fini() - Device-level cleanup of queue related fields.
1425  * @pvr_dev: The device to cleanup.
1426  *
1427  * Cleanup/free all queue-related resources attached to a pvr_device object.
1428  */
pvr_queue_device_fini(struct pvr_device * pvr_dev)1429 void pvr_queue_device_fini(struct pvr_device *pvr_dev)
1430 {
1431 	destroy_workqueue(pvr_dev->sched_wq);
1432 }
1433