xref: /linux/drivers/gpu/drm/imagination/pvr_queue.c (revision 28f587adb69957125241a8df359b68b134f3c4a1)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
3 
4 #include <drm/drm_managed.h>
5 #include <drm/gpu_scheduler.h>
6 
7 #include "pvr_cccb.h"
8 #include "pvr_context.h"
9 #include "pvr_device.h"
10 #include "pvr_drv.h"
11 #include "pvr_job.h"
12 #include "pvr_queue.h"
13 #include "pvr_vm.h"
14 
15 #include "pvr_rogue_fwif_client.h"
16 
17 #define MAX_DEADLINE_MS 30000
18 
19 #define CTX_COMPUTE_CCCB_SIZE_LOG2 15
20 #define CTX_FRAG_CCCB_SIZE_LOG2 15
21 #define CTX_GEOM_CCCB_SIZE_LOG2 15
22 #define CTX_TRANSFER_CCCB_SIZE_LOG2 15
23 
get_xfer_ctx_state_size(struct pvr_device * pvr_dev)24 static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev)
25 {
26 	u32 num_isp_store_registers;
27 
28 	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
29 		num_isp_store_registers = 1;
30 	} else {
31 		int err;
32 
33 		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
34 		if (WARN_ON(err))
35 			return err;
36 	}
37 
38 	return sizeof(struct rogue_fwif_frag_ctx_state) +
39 	       (num_isp_store_registers *
40 		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
41 }
42 
get_frag_ctx_state_size(struct pvr_device * pvr_dev)43 static int get_frag_ctx_state_size(struct pvr_device *pvr_dev)
44 {
45 	u32 num_isp_store_registers;
46 	int err;
47 
48 	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
49 		err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers);
50 		if (WARN_ON(err))
51 			return err;
52 
53 		if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) {
54 			u32 xpu_max_slaves;
55 
56 			err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves);
57 			if (WARN_ON(err))
58 				return err;
59 
60 			num_isp_store_registers *= (1 + xpu_max_slaves);
61 		}
62 	} else {
63 		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
64 		if (WARN_ON(err))
65 			return err;
66 	}
67 
68 	return sizeof(struct rogue_fwif_frag_ctx_state) +
69 	       (num_isp_store_registers *
70 		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
71 }
72 
get_ctx_state_size(struct pvr_device * pvr_dev,enum drm_pvr_job_type type)73 static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type)
74 {
75 	switch (type) {
76 	case DRM_PVR_JOB_TYPE_GEOMETRY:
77 		return sizeof(struct rogue_fwif_geom_ctx_state);
78 	case DRM_PVR_JOB_TYPE_FRAGMENT:
79 		return get_frag_ctx_state_size(pvr_dev);
80 	case DRM_PVR_JOB_TYPE_COMPUTE:
81 		return sizeof(struct rogue_fwif_compute_ctx_state);
82 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
83 		return get_xfer_ctx_state_size(pvr_dev);
84 	}
85 
86 	WARN(1, "Invalid queue type");
87 	return -EINVAL;
88 }
89 
get_ctx_offset(enum drm_pvr_job_type type)90 static u32 get_ctx_offset(enum drm_pvr_job_type type)
91 {
92 	switch (type) {
93 	case DRM_PVR_JOB_TYPE_GEOMETRY:
94 		return offsetof(struct rogue_fwif_fwrendercontext, geom_context);
95 	case DRM_PVR_JOB_TYPE_FRAGMENT:
96 		return offsetof(struct rogue_fwif_fwrendercontext, frag_context);
97 	case DRM_PVR_JOB_TYPE_COMPUTE:
98 		return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context);
99 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
100 		return offsetof(struct rogue_fwif_fwtransfercontext, tq_context);
101 	}
102 
103 	return 0;
104 }
105 
106 static const char *
pvr_queue_fence_get_driver_name(struct dma_fence * f)107 pvr_queue_fence_get_driver_name(struct dma_fence *f)
108 {
109 	return PVR_DRIVER_NAME;
110 }
111 
pvr_queue_fence_release_work(struct work_struct * w)112 static void pvr_queue_fence_release_work(struct work_struct *w)
113 {
114 	struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work);
115 
116 	pvr_context_put(fence->queue->ctx);
117 	dma_fence_free(&fence->base);
118 }
119 
pvr_queue_fence_release(struct dma_fence * f)120 static void pvr_queue_fence_release(struct dma_fence *f)
121 {
122 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
123 	struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev;
124 
125 	queue_work(pvr_dev->sched_wq, &fence->release_work);
126 }
127 
128 static const char *
pvr_queue_job_fence_get_timeline_name(struct dma_fence * f)129 pvr_queue_job_fence_get_timeline_name(struct dma_fence *f)
130 {
131 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
132 
133 	switch (fence->queue->type) {
134 	case DRM_PVR_JOB_TYPE_GEOMETRY:
135 		return "geometry";
136 
137 	case DRM_PVR_JOB_TYPE_FRAGMENT:
138 		return "fragment";
139 
140 	case DRM_PVR_JOB_TYPE_COMPUTE:
141 		return "compute";
142 
143 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
144 		return "transfer";
145 	}
146 
147 	WARN(1, "Invalid queue type");
148 	return "invalid";
149 }
150 
151 static const char *
pvr_queue_cccb_fence_get_timeline_name(struct dma_fence * f)152 pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f)
153 {
154 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
155 
156 	switch (fence->queue->type) {
157 	case DRM_PVR_JOB_TYPE_GEOMETRY:
158 		return "geometry-cccb";
159 
160 	case DRM_PVR_JOB_TYPE_FRAGMENT:
161 		return "fragment-cccb";
162 
163 	case DRM_PVR_JOB_TYPE_COMPUTE:
164 		return "compute-cccb";
165 
166 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
167 		return "transfer-cccb";
168 	}
169 
170 	WARN(1, "Invalid queue type");
171 	return "invalid";
172 }
173 
174 static const struct dma_fence_ops pvr_queue_job_fence_ops = {
175 	.get_driver_name = pvr_queue_fence_get_driver_name,
176 	.get_timeline_name = pvr_queue_job_fence_get_timeline_name,
177 	.release = pvr_queue_fence_release,
178 };
179 
180 /**
181  * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is
182  * backed by a UFO.
183  * @f: The dma_fence to turn into a pvr_queue_fence.
184  *
185  * Return:
186  *  * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or
187  *  * NULL otherwise.
188  */
189 static struct pvr_queue_fence *
to_pvr_queue_job_fence(struct dma_fence * f)190 to_pvr_queue_job_fence(struct dma_fence *f)
191 {
192 	struct drm_sched_fence *sched_fence = to_drm_sched_fence(f);
193 
194 	if (sched_fence)
195 		f = sched_fence->parent;
196 
197 	if (f && f->ops == &pvr_queue_job_fence_ops)
198 		return container_of(f, struct pvr_queue_fence, base);
199 
200 	return NULL;
201 }
202 
203 static const struct dma_fence_ops pvr_queue_cccb_fence_ops = {
204 	.get_driver_name = pvr_queue_fence_get_driver_name,
205 	.get_timeline_name = pvr_queue_cccb_fence_get_timeline_name,
206 	.release = pvr_queue_fence_release,
207 };
208 
209 /**
210  * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects.
211  * @f: The dma_fence object to put.
212  *
213  * If the pvr_queue_fence has been initialized, we call dma_fence_put(),
214  * otherwise we free the object with dma_fence_free(). This allows us
215  * to do the right thing before and after pvr_queue_fence_init() had been
216  * called.
217  */
pvr_queue_fence_put(struct dma_fence * f)218 static void pvr_queue_fence_put(struct dma_fence *f)
219 {
220 	if (!f)
221 		return;
222 
223 	if (WARN_ON(f->ops &&
224 		    f->ops != &pvr_queue_cccb_fence_ops &&
225 		    f->ops != &pvr_queue_job_fence_ops))
226 		return;
227 
228 	/* If the fence hasn't been initialized yet, free the object directly. */
229 	if (f->ops)
230 		dma_fence_put(f);
231 	else
232 		dma_fence_free(f);
233 }
234 
235 /**
236  * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object
237  *
238  * Call this function to allocate job CCCB and done fences. This only
239  * allocates the objects. Initialization happens when the underlying
240  * dma_fence object is to be returned to drm_sched (in prepare_job() or
241  * run_job()).
242  *
243  * Return:
244  *  * A valid pointer if the allocation succeeds, or
245  *  * NULL if the allocation fails.
246  */
247 static struct dma_fence *
pvr_queue_fence_alloc(void)248 pvr_queue_fence_alloc(void)
249 {
250 	struct pvr_queue_fence *fence;
251 
252 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
253 	if (!fence)
254 		return NULL;
255 
256 	return &fence->base;
257 }
258 
259 /**
260  * pvr_queue_fence_init() - Initializes a pvr_queue_fence object.
261  * @f: The fence to initialize
262  * @queue: The queue this fence belongs to.
263  * @fence_ops: The fence operations.
264  * @fence_ctx: The fence context.
265  *
266  * Wrapper around dma_fence_init() that takes care of initializing the
267  * pvr_queue_fence::queue field too.
268  */
269 static void
pvr_queue_fence_init(struct dma_fence * f,struct pvr_queue * queue,const struct dma_fence_ops * fence_ops,struct pvr_queue_fence_ctx * fence_ctx)270 pvr_queue_fence_init(struct dma_fence *f,
271 		     struct pvr_queue *queue,
272 		     const struct dma_fence_ops *fence_ops,
273 		     struct pvr_queue_fence_ctx *fence_ctx)
274 {
275 	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
276 
277 	pvr_context_get(queue->ctx);
278 	fence->queue = queue;
279 	INIT_WORK(&fence->release_work, pvr_queue_fence_release_work);
280 	dma_fence_init(&fence->base, fence_ops,
281 		       &fence_ctx->lock, fence_ctx->id,
282 		       atomic_inc_return(&fence_ctx->seqno));
283 }
284 
285 /**
286  * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object.
287  * @fence: The fence to initialize.
288  * @queue: The queue this fence belongs to.
289  *
290  * Initializes a fence that can be used to wait for CCCB space.
291  *
292  * Should be called in the ::prepare_job() path, so the fence returned to
293  * drm_sched is valid.
294  */
295 static void
pvr_queue_cccb_fence_init(struct dma_fence * fence,struct pvr_queue * queue)296 pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
297 {
298 	pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops,
299 			     &queue->cccb_fence_ctx.base);
300 }
301 
302 /**
303  * pvr_queue_job_fence_init() - Initializes a job done fence object.
304  * @fence: The fence to initialize.
305  * @queue: The queue this fence belongs to.
306  *
307  * Initializes a fence that will be signaled when the GPU is done executing
308  * a job.
309  *
310  * Should be called *before* the ::run_job() path, so the fence is initialised
311  * before being placed in the pending_list.
312  */
313 static void
pvr_queue_job_fence_init(struct dma_fence * fence,struct pvr_queue * queue)314 pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
315 {
316 	if (!fence->ops)
317 		pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops,
318 				     &queue->job_fence_ctx);
319 }
320 
321 /**
322  * pvr_queue_fence_ctx_init() - Queue fence context initialization.
323  * @fence_ctx: The context to initialize
324  */
325 static void
pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx * fence_ctx)326 pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx)
327 {
328 	spin_lock_init(&fence_ctx->lock);
329 	fence_ctx->id = dma_fence_context_alloc(1);
330 	atomic_set(&fence_ctx->seqno, 0);
331 }
332 
ufo_cmds_size(u32 elem_count)333 static u32 ufo_cmds_size(u32 elem_count)
334 {
335 	/* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */
336 	u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS;
337 	u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS;
338 	u32 size = full_cmd_count *
339 		   pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS *
340 						     sizeof(struct rogue_fwif_ufo));
341 
342 	if (remaining_elems) {
343 		size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems *
344 							  sizeof(struct rogue_fwif_ufo));
345 	}
346 
347 	return size;
348 }
349 
job_cmds_size(struct pvr_job * job,u32 ufo_wait_count)350 static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count)
351 {
352 	/* One UFO cmd for the fence signaling, one UFO cmd per native fence native,
353 	 * and a command for the job itself.
354 	 */
355 	return ufo_cmds_size(1) + ufo_cmds_size(ufo_wait_count) +
356 	       pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len);
357 }
358 
359 /**
360  * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies.
361  * @job: Job to operate on.
362  *
363  * Returns: Number of non-signaled native deps remaining.
364  */
job_count_remaining_native_deps(struct pvr_job * job)365 static unsigned long job_count_remaining_native_deps(struct pvr_job *job)
366 {
367 	unsigned long remaining_count = 0;
368 	struct dma_fence *fence = NULL;
369 	unsigned long index;
370 
371 	xa_for_each(&job->base.dependencies, index, fence) {
372 		struct pvr_queue_fence *jfence;
373 
374 		jfence = to_pvr_queue_job_fence(fence);
375 		if (!jfence)
376 			continue;
377 
378 		if (!dma_fence_is_signaled(&jfence->base))
379 			remaining_count++;
380 	}
381 
382 	return remaining_count;
383 }
384 
385 /**
386  * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job.
387  * @queue: The queue this job will be submitted to.
388  * @job: The job to get the CCCB fence on.
389  *
390  * The CCCB fence is a synchronization primitive allowing us to delay job
391  * submission until there's enough space in the CCCB to submit the job.
392  *
393  * Return:
394  *  * NULL if there's enough space in the CCCB to submit this job, or
395  *  * A valid dma_fence object otherwise.
396  */
397 static struct dma_fence *
pvr_queue_get_job_cccb_fence(struct pvr_queue * queue,struct pvr_job * job)398 pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job)
399 {
400 	struct pvr_queue_fence *cccb_fence;
401 	unsigned int native_deps_remaining;
402 
403 	/* If the fence is NULL, that means we already checked that we had
404 	 * enough space in the cccb for our job.
405 	 */
406 	if (!job->cccb_fence)
407 		return NULL;
408 
409 	mutex_lock(&queue->cccb_fence_ctx.job_lock);
410 
411 	/* Count remaining native dependencies and check if the job fits in the CCCB. */
412 	native_deps_remaining = job_count_remaining_native_deps(job);
413 	if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
414 		pvr_queue_fence_put(job->cccb_fence);
415 		job->cccb_fence = NULL;
416 		goto out_unlock;
417 	}
418 
419 	/* There should be no job attached to the CCCB fence context:
420 	 * drm_sched_entity guarantees that jobs are submitted one at a time.
421 	 */
422 	if (WARN_ON(queue->cccb_fence_ctx.job))
423 		pvr_job_put(queue->cccb_fence_ctx.job);
424 
425 	queue->cccb_fence_ctx.job = pvr_job_get(job);
426 
427 	/* Initialize the fence before returning it. */
428 	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
429 	if (!WARN_ON(cccb_fence->queue))
430 		pvr_queue_cccb_fence_init(job->cccb_fence, queue);
431 
432 out_unlock:
433 	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
434 
435 	return dma_fence_get(job->cccb_fence);
436 }
437 
438 /**
439  * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job.
440  * @queue: The queue this job will be submitted to.
441  * @job: The job to get the KCCB fence on.
442  *
443  * The KCCB fence is a synchronization primitive allowing us to delay job
444  * submission until there's enough space in the KCCB to submit the job.
445  *
446  * Return:
447  *  * NULL if there's enough space in the KCCB to submit this job, or
448  *  * A valid dma_fence object otherwise.
449  */
450 static struct dma_fence *
pvr_queue_get_job_kccb_fence(struct pvr_queue * queue,struct pvr_job * job)451 pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job)
452 {
453 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
454 	struct dma_fence *kccb_fence = NULL;
455 
456 	/* If the fence is NULL, that means we already checked that we had
457 	 * enough space in the KCCB for our job.
458 	 */
459 	if (!job->kccb_fence)
460 		return NULL;
461 
462 	if (!WARN_ON(job->kccb_fence->ops)) {
463 		kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence);
464 		job->kccb_fence = NULL;
465 	}
466 
467 	return kccb_fence;
468 }
469 
470 static struct dma_fence *
pvr_queue_get_paired_frag_job_dep(struct pvr_queue * queue,struct pvr_job * job)471 pvr_queue_get_paired_frag_job_dep(struct pvr_queue *queue, struct pvr_job *job)
472 {
473 	struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ?
474 				   job->paired_job : NULL;
475 	struct dma_fence *f;
476 	unsigned long index;
477 
478 	if (!frag_job)
479 		return NULL;
480 
481 	xa_for_each(&frag_job->base.dependencies, index, f) {
482 		/* Skip already signaled fences. */
483 		if (dma_fence_is_signaled(f))
484 			continue;
485 
486 		/* Skip our own fence. */
487 		if (f == &job->base.s_fence->scheduled)
488 			continue;
489 
490 		return dma_fence_get(f);
491 	}
492 
493 	return frag_job->base.sched->ops->prepare_job(&frag_job->base, &queue->entity);
494 }
495 
496 /**
497  * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence.
498  * @sched_job: The job to query the next internal dependency on
499  * @s_entity: The entity this job is queue on.
500  *
501  * After iterating over drm_sched_job::dependencies, drm_sched let the driver return
502  * its own internal dependencies. We use this function to return our internal dependencies.
503  */
504 static struct dma_fence *
pvr_queue_prepare_job(struct drm_sched_job * sched_job,struct drm_sched_entity * s_entity)505 pvr_queue_prepare_job(struct drm_sched_job *sched_job,
506 		      struct drm_sched_entity *s_entity)
507 {
508 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
509 	struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity);
510 	struct dma_fence *internal_dep = NULL;
511 
512 	/*
513 	 * Initialize the done_fence, so we can signal it. This must be done
514 	 * here because otherwise by the time of run_job() the job will end up
515 	 * in the pending list without a valid fence.
516 	 */
517 	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
518 		/*
519 		 * This will be called on a paired fragment job after being
520 		 * submitted to firmware. We can tell if this is the case and
521 		 * bail early from whether run_job() has been called on the
522 		 * geometry job, which would issue a pm ref.
523 		 */
524 		if (job->paired_job->has_pm_ref)
525 			return NULL;
526 
527 		/*
528 		 * In this case we need to use the job's own ctx to initialise
529 		 * the done_fence.  The other steps are done in the ctx of the
530 		 * paired geometry job.
531 		 */
532 		pvr_queue_job_fence_init(job->done_fence,
533 					 job->ctx->queues.fragment);
534 	} else {
535 		pvr_queue_job_fence_init(job->done_fence, queue);
536 	}
537 
538 	/* CCCB fence is used to make sure we have enough space in the CCCB to
539 	 * submit our commands.
540 	 */
541 	internal_dep = pvr_queue_get_job_cccb_fence(queue, job);
542 
543 	/* KCCB fence is used to make sure we have a KCCB slot to queue our
544 	 * CMD_KICK.
545 	 */
546 	if (!internal_dep)
547 		internal_dep = pvr_queue_get_job_kccb_fence(queue, job);
548 
549 	/* Any extra internal dependency should be added here, using the following
550 	 * pattern:
551 	 *
552 	 *	if (!internal_dep)
553 	 *		internal_dep = pvr_queue_get_job_xxxx_fence(queue, job);
554 	 */
555 
556 	/* The paired job fence should come last, when everything else is ready. */
557 	if (!internal_dep)
558 		internal_dep = pvr_queue_get_paired_frag_job_dep(queue, job);
559 
560 	return internal_dep;
561 }
562 
563 /**
564  * pvr_queue_update_active_state_locked() - Update the queue active state.
565  * @queue: Queue to update the state on.
566  *
567  * Locked version of pvr_queue_update_active_state(). Must be called with
568  * pvr_device::queue::lock held.
569  */
pvr_queue_update_active_state_locked(struct pvr_queue * queue)570 static void pvr_queue_update_active_state_locked(struct pvr_queue *queue)
571 {
572 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
573 
574 	lockdep_assert_held(&pvr_dev->queues.lock);
575 
576 	/* The queue is temporary out of any list when it's being reset,
577 	 * we don't want a call to pvr_queue_update_active_state_locked()
578 	 * to re-insert it behind our back.
579 	 */
580 	if (list_empty(&queue->node))
581 		return;
582 
583 	if (!atomic_read(&queue->in_flight_job_count))
584 		list_move_tail(&queue->node, &pvr_dev->queues.idle);
585 	else
586 		list_move_tail(&queue->node, &pvr_dev->queues.active);
587 }
588 
589 /**
590  * pvr_queue_update_active_state() - Update the queue active state.
591  * @queue: Queue to update the state on.
592  *
593  * Active state is based on the in_flight_job_count value.
594  *
595  * Updating the active state implies moving the queue in or out of the
596  * active queue list, which also defines whether the queue is checked
597  * or not when a FW event is received.
598  *
599  * This function should be called any time a job is submitted or it done
600  * fence is signaled.
601  */
pvr_queue_update_active_state(struct pvr_queue * queue)602 static void pvr_queue_update_active_state(struct pvr_queue *queue)
603 {
604 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
605 
606 	mutex_lock(&pvr_dev->queues.lock);
607 	pvr_queue_update_active_state_locked(queue);
608 	mutex_unlock(&pvr_dev->queues.lock);
609 }
610 
pvr_queue_submit_job_to_cccb(struct pvr_job * job)611 static void pvr_queue_submit_job_to_cccb(struct pvr_job *job)
612 {
613 	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
614 	struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS];
615 	struct pvr_cccb *cccb = &queue->cccb;
616 	struct pvr_queue_fence *jfence;
617 	struct dma_fence *fence;
618 	unsigned long index;
619 	u32 ufo_count = 0;
620 
621 	/* We need to add the queue to the active list before updating the CCCB,
622 	 * otherwise we might miss the FW event informing us that something
623 	 * happened on this queue.
624 	 */
625 	atomic_inc(&queue->in_flight_job_count);
626 	pvr_queue_update_active_state(queue);
627 
628 	xa_for_each(&job->base.dependencies, index, fence) {
629 		jfence = to_pvr_queue_job_fence(fence);
630 		if (!jfence)
631 			continue;
632 
633 		/* Skip the partial render fence, we will place it at the end. */
634 		if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job &&
635 		    &job->paired_job->base.s_fence->scheduled == fence)
636 			continue;
637 
638 		if (dma_fence_is_signaled(&jfence->base))
639 			continue;
640 
641 		pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
642 					  &ufos[ufo_count].addr);
643 		ufos[ufo_count++].value = jfence->base.seqno;
644 
645 		if (ufo_count == ARRAY_SIZE(ufos)) {
646 			pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
647 							   sizeof(ufos), ufos, 0, 0);
648 			ufo_count = 0;
649 		}
650 	}
651 
652 	/* Partial render fence goes last. */
653 	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
654 		jfence = to_pvr_queue_job_fence(job->paired_job->done_fence);
655 		if (!WARN_ON(!jfence)) {
656 			pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
657 						  &ufos[ufo_count].addr);
658 			ufos[ufo_count++].value = job->paired_job->done_fence->seqno;
659 		}
660 	}
661 
662 	if (ufo_count) {
663 		pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
664 						   sizeof(ufos[0]) * ufo_count, ufos, 0, 0);
665 	}
666 
667 	if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) {
668 		struct rogue_fwif_cmd_geom *cmd = job->cmd;
669 
670 		/* Reference value for the partial render test is the current queue fence
671 		 * seqno minus one.
672 		 */
673 		pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj,
674 					  &cmd->partial_render_geom_frag_fence.addr);
675 		cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1;
676 	}
677 
678 	/* Submit job to FW */
679 	pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd,
680 					   job->id, job->id);
681 
682 	/* Signal the job fence. */
683 	pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr);
684 	ufos[0].value = job->done_fence->seqno;
685 	pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE,
686 					   sizeof(ufos[0]), ufos, 0, 0);
687 }
688 
689 /**
690  * pvr_queue_run_job() - Submit a job to the FW.
691  * @sched_job: The job to submit.
692  *
693  * This function is called when all non-native dependencies have been met and
694  * when the commands resulting from this job are guaranteed to fit in the CCCB.
695  */
pvr_queue_run_job(struct drm_sched_job * sched_job)696 static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job)
697 {
698 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
699 	struct pvr_device *pvr_dev = job->pvr_dev;
700 	int err;
701 
702 	/* The fragment job is issued along the geometry job when we use combined
703 	 * geom+frag kicks. When we get there, we should simply return the
704 	 * done_fence that's been initialized earlier.
705 	 */
706 	if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT &&
707 	    job->done_fence->ops) {
708 		return dma_fence_get(job->done_fence);
709 	}
710 
711 	/* The only kind of jobs that can be paired are geometry and fragment, and
712 	 * we bail out early if we see a fragment job that's paired with a geomtry
713 	 * job.
714 	 * Paired jobs must also target the same context and point to the same
715 	 * HWRT.
716 	 */
717 	if (WARN_ON(job->paired_job &&
718 		    (job->type != DRM_PVR_JOB_TYPE_GEOMETRY ||
719 		     job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT ||
720 		     job->hwrt != job->paired_job->hwrt ||
721 		     job->ctx != job->paired_job->ctx)))
722 		return ERR_PTR(-EINVAL);
723 
724 	err = pvr_job_get_pm_ref(job);
725 	if (WARN_ON(err))
726 		return ERR_PTR(err);
727 
728 	if (job->paired_job) {
729 		err = pvr_job_get_pm_ref(job->paired_job);
730 		if (WARN_ON(err))
731 			return ERR_PTR(err);
732 	}
733 
734 	/* Submit our job to the CCCB */
735 	pvr_queue_submit_job_to_cccb(job);
736 
737 	if (job->paired_job) {
738 		struct pvr_job *geom_job = job;
739 		struct pvr_job *frag_job = job->paired_job;
740 		struct pvr_queue *geom_queue = job->ctx->queues.geometry;
741 		struct pvr_queue *frag_queue = job->ctx->queues.fragment;
742 
743 		/* Submit the fragment job along the geometry job and send a combined kick. */
744 		pvr_queue_submit_job_to_cccb(frag_job);
745 		pvr_cccb_send_kccb_combined_kick(pvr_dev,
746 						 &geom_queue->cccb, &frag_queue->cccb,
747 						 pvr_context_get_fw_addr(geom_job->ctx) +
748 						 geom_queue->ctx_offset,
749 						 pvr_context_get_fw_addr(frag_job->ctx) +
750 						 frag_queue->ctx_offset,
751 						 job->hwrt,
752 						 frag_job->fw_ccb_cmd_type ==
753 						 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR);
754 	} else {
755 		struct pvr_queue *queue = container_of(job->base.sched,
756 						       struct pvr_queue, scheduler);
757 
758 		pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb,
759 					pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset,
760 					job->hwrt);
761 	}
762 
763 	return dma_fence_get(job->done_fence);
764 }
765 
pvr_queue_stop(struct pvr_queue * queue,struct pvr_job * bad_job)766 static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job)
767 {
768 	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
769 }
770 
pvr_queue_start(struct pvr_queue * queue)771 static void pvr_queue_start(struct pvr_queue *queue)
772 {
773 	struct pvr_job *job;
774 
775 	/* Make sure we CPU-signal the UFO object, so other queues don't get
776 	 * blocked waiting on it.
777 	 */
778 	*queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno);
779 
780 	list_for_each_entry(job, &queue->scheduler.pending_list, base.list) {
781 		if (dma_fence_is_signaled(job->done_fence)) {
782 			/* Jobs might have completed after drm_sched_stop() was called.
783 			 * In that case, re-assign the parent field to the done_fence.
784 			 */
785 			WARN_ON(job->base.s_fence->parent);
786 			job->base.s_fence->parent = dma_fence_get(job->done_fence);
787 		} else {
788 			/* If we had unfinished jobs, flag the entity as guilty so no
789 			 * new job can be submitted.
790 			 */
791 			atomic_set(&queue->ctx->faulty, 1);
792 		}
793 	}
794 
795 	drm_sched_start(&queue->scheduler, 0);
796 }
797 
798 /**
799  * pvr_queue_timedout_job() - Handle a job timeout event.
800  * @s_job: The job this timeout occurred on.
801  *
802  * FIXME: We don't do anything here to unblock the situation, we just stop+start
803  * the scheduler, and re-assign parent fences in the middle.
804  *
805  * Return:
806  *  * DRM_GPU_SCHED_STAT_NOMINAL.
807  */
808 static enum drm_gpu_sched_stat
pvr_queue_timedout_job(struct drm_sched_job * s_job)809 pvr_queue_timedout_job(struct drm_sched_job *s_job)
810 {
811 	struct drm_gpu_scheduler *sched = s_job->sched;
812 	struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler);
813 	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
814 	struct pvr_job *job;
815 	u32 job_count = 0;
816 
817 	dev_err(sched->dev, "Job timeout\n");
818 
819 	/* Before we stop the scheduler, make sure the queue is out of any list, so
820 	 * any call to pvr_queue_update_active_state_locked() that might happen
821 	 * until the scheduler is really stopped doesn't end up re-inserting the
822 	 * queue in the active list. This would cause
823 	 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each
824 	 * other when accessing the pending_list, since drm_sched_stop() doesn't
825 	 * grab the job_list_lock when modifying the list (it's assuming the
826 	 * only other accessor is the scheduler, and it's safe to not grab the
827 	 * lock since it's stopped).
828 	 */
829 	mutex_lock(&pvr_dev->queues.lock);
830 	list_del_init(&queue->node);
831 	mutex_unlock(&pvr_dev->queues.lock);
832 
833 	drm_sched_stop(sched, s_job);
834 
835 	/* Re-assign job parent fences. */
836 	list_for_each_entry(job, &sched->pending_list, base.list) {
837 		job->base.s_fence->parent = dma_fence_get(job->done_fence);
838 		job_count++;
839 	}
840 	WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count);
841 
842 	/* Re-insert the queue in the proper list, and kick a queue processing
843 	 * operation if there were jobs pending.
844 	 */
845 	mutex_lock(&pvr_dev->queues.lock);
846 	if (!job_count) {
847 		list_move_tail(&queue->node, &pvr_dev->queues.idle);
848 	} else {
849 		atomic_set(&queue->in_flight_job_count, job_count);
850 		list_move_tail(&queue->node, &pvr_dev->queues.active);
851 		pvr_queue_process(queue);
852 	}
853 	mutex_unlock(&pvr_dev->queues.lock);
854 
855 	drm_sched_start(sched, 0);
856 
857 	return DRM_GPU_SCHED_STAT_NOMINAL;
858 }
859 
860 /**
861  * pvr_queue_free_job() - Release the reference the scheduler had on a job object.
862  * @sched_job: Job object to free.
863  */
pvr_queue_free_job(struct drm_sched_job * sched_job)864 static void pvr_queue_free_job(struct drm_sched_job *sched_job)
865 {
866 	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
867 
868 	drm_sched_job_cleanup(sched_job);
869 	job->paired_job = NULL;
870 	pvr_job_put(job);
871 }
872 
873 static const struct drm_sched_backend_ops pvr_queue_sched_ops = {
874 	.prepare_job = pvr_queue_prepare_job,
875 	.run_job = pvr_queue_run_job,
876 	.timedout_job = pvr_queue_timedout_job,
877 	.free_job = pvr_queue_free_job,
878 };
879 
880 /**
881  * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO object
882  * @f: Fence to test.
883  *
884  * A UFO-backed fence is a fence that can be signaled or waited upon FW-side.
885  * pvr_job::done_fence objects are backed by the timeline UFO attached to the queue
886  * they are pushed to, but those fences are not directly exposed to the outside
887  * world, so we also need to check if the fence we're being passed is a
888  * drm_sched_fence that was coming from our driver.
889  */
pvr_queue_fence_is_ufo_backed(struct dma_fence * f)890 bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f)
891 {
892 	struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL;
893 
894 	if (sched_fence &&
895 	    sched_fence->sched->ops == &pvr_queue_sched_ops)
896 		return true;
897 
898 	if (f && f->ops == &pvr_queue_job_fence_ops)
899 		return true;
900 
901 	return false;
902 }
903 
904 /**
905  * pvr_queue_signal_done_fences() - Signal done fences.
906  * @queue: Queue to check.
907  *
908  * Signal done fences of jobs whose seqno is less than the current value of
909  * the UFO object attached to the queue.
910  */
911 static void
pvr_queue_signal_done_fences(struct pvr_queue * queue)912 pvr_queue_signal_done_fences(struct pvr_queue *queue)
913 {
914 	struct pvr_job *job, *tmp_job;
915 	u32 cur_seqno;
916 
917 	spin_lock(&queue->scheduler.job_list_lock);
918 	cur_seqno = *queue->timeline_ufo.value;
919 	list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) {
920 		if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0)
921 			break;
922 
923 		if (!dma_fence_is_signaled(job->done_fence)) {
924 			dma_fence_signal(job->done_fence);
925 			pvr_job_release_pm_ref(job);
926 			atomic_dec(&queue->in_flight_job_count);
927 		}
928 	}
929 	spin_unlock(&queue->scheduler.job_list_lock);
930 }
931 
932 /**
933  * pvr_queue_check_job_waiting_for_cccb_space() - Check if the job waiting for CCCB space
934  * can be unblocked
935  * pushed to the CCCB
936  * @queue: Queue to check
937  *
938  * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal
939  * its CCCB fence, which should kick drm_sched.
940  */
941 static void
pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue * queue)942 pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue)
943 {
944 	struct pvr_queue_fence *cccb_fence;
945 	u32 native_deps_remaining;
946 	struct pvr_job *job;
947 
948 	mutex_lock(&queue->cccb_fence_ctx.job_lock);
949 	job = queue->cccb_fence_ctx.job;
950 	if (!job)
951 		goto out_unlock;
952 
953 	/* If we have a job attached to the CCCB fence context, its CCCB fence
954 	 * shouldn't be NULL.
955 	 */
956 	if (WARN_ON(!job->cccb_fence)) {
957 		job = NULL;
958 		goto out_unlock;
959 	}
960 
961 	/* If we get there, CCCB fence has to be initialized. */
962 	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
963 	if (WARN_ON(!cccb_fence->queue)) {
964 		job = NULL;
965 		goto out_unlock;
966 	}
967 
968 	/* Evict signaled dependencies before checking for CCCB space.
969 	 * If the job fits, signal the CCCB fence, this should unblock
970 	 * the drm_sched_entity.
971 	 */
972 	native_deps_remaining = job_count_remaining_native_deps(job);
973 	if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
974 		job = NULL;
975 		goto out_unlock;
976 	}
977 
978 	dma_fence_signal(job->cccb_fence);
979 	pvr_queue_fence_put(job->cccb_fence);
980 	job->cccb_fence = NULL;
981 	queue->cccb_fence_ctx.job = NULL;
982 
983 out_unlock:
984 	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
985 
986 	pvr_job_put(job);
987 }
988 
989 /**
990  * pvr_queue_process() - Process events that happened on a queue.
991  * @queue: Queue to check
992  *
993  * Signal job fences and check if jobs waiting for CCCB space can be unblocked.
994  */
pvr_queue_process(struct pvr_queue * queue)995 void pvr_queue_process(struct pvr_queue *queue)
996 {
997 	lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock);
998 
999 	pvr_queue_check_job_waiting_for_cccb_space(queue);
1000 	pvr_queue_signal_done_fences(queue);
1001 	pvr_queue_update_active_state_locked(queue);
1002 }
1003 
get_dm_type(struct pvr_queue * queue)1004 static u32 get_dm_type(struct pvr_queue *queue)
1005 {
1006 	switch (queue->type) {
1007 	case DRM_PVR_JOB_TYPE_GEOMETRY:
1008 		return PVR_FWIF_DM_GEOM;
1009 	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
1010 	case DRM_PVR_JOB_TYPE_FRAGMENT:
1011 		return PVR_FWIF_DM_FRAG;
1012 	case DRM_PVR_JOB_TYPE_COMPUTE:
1013 		return PVR_FWIF_DM_CDM;
1014 	}
1015 
1016 	return ~0;
1017 }
1018 
1019 /**
1020  * init_fw_context() - Initializes the queue part of a FW context.
1021  * @queue: Queue object to initialize the FW context for.
1022  * @fw_ctx_map: The FW context CPU mapping.
1023  *
1024  * FW contexts are containing various states, one of them being a per-queue state
1025  * that needs to be initialized for each queue being exposed by a context. This
1026  * function takes care of that.
1027  */
init_fw_context(struct pvr_queue * queue,void * fw_ctx_map)1028 static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map)
1029 {
1030 	struct pvr_context *ctx = queue->ctx;
1031 	struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx);
1032 	struct rogue_fwif_fwcommoncontext *cctx_fw;
1033 	struct pvr_cccb *cccb = &queue->cccb;
1034 
1035 	cctx_fw = fw_ctx_map + queue->ctx_offset;
1036 	cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr;
1037 	cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr;
1038 
1039 	cctx_fw->dm = get_dm_type(queue);
1040 	cctx_fw->priority = ctx->priority;
1041 	cctx_fw->priority_seq_num = 0;
1042 	cctx_fw->max_deadline_ms = MAX_DEADLINE_MS;
1043 	cctx_fw->pid = task_tgid_nr(current);
1044 	cctx_fw->server_common_context_id = ctx->ctx_id;
1045 
1046 	pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr);
1047 
1048 	pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr);
1049 }
1050 
1051 /**
1052  * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up.
1053  * @queue: Queue on FW context to clean up.
1054  *
1055  * Return:
1056  *  * 0 on success,
1057  *  * Any error returned by pvr_fw_structure_cleanup() otherwise.
1058  */
pvr_queue_cleanup_fw_context(struct pvr_queue * queue)1059 static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue)
1060 {
1061 	if (!queue->ctx->fw_obj)
1062 		return 0;
1063 
1064 	return pvr_fw_structure_cleanup(queue->ctx->pvr_dev,
1065 					ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT,
1066 					queue->ctx->fw_obj, queue->ctx_offset);
1067 }
1068 
1069 /**
1070  * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object.
1071  * @job: The job to initialize.
1072  *
1073  * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm()
1074  * and pvr_queue_job_push() can't fail. We also make sure the context type is
1075  * valid and the job can fit in the CCCB.
1076  *
1077  * Return:
1078  *  * 0 on success, or
1079  *  * An error code if something failed.
1080  */
pvr_queue_job_init(struct pvr_job * job)1081 int pvr_queue_job_init(struct pvr_job *job)
1082 {
1083 	/* Fragment jobs need at least one native fence wait on the geometry job fence. */
1084 	u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0;
1085 	struct pvr_queue *queue;
1086 	int err;
1087 
1088 	if (atomic_read(&job->ctx->faulty))
1089 		return -EIO;
1090 
1091 	queue = pvr_context_get_queue_for_job(job->ctx, job->type);
1092 	if (!queue)
1093 		return -EINVAL;
1094 
1095 	if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count)))
1096 		return -E2BIG;
1097 
1098 	err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE);
1099 	if (err)
1100 		return err;
1101 
1102 	job->cccb_fence = pvr_queue_fence_alloc();
1103 	job->kccb_fence = pvr_kccb_fence_alloc();
1104 	job->done_fence = pvr_queue_fence_alloc();
1105 	if (!job->cccb_fence || !job->kccb_fence || !job->done_fence)
1106 		return -ENOMEM;
1107 
1108 	return 0;
1109 }
1110 
1111 /**
1112  * pvr_queue_job_arm() - Arm a job object.
1113  * @job: The job to arm.
1114  *
1115  * Initializes fences and return the drm_sched finished fence so it can
1116  * be exposed to the outside world. Once this function is called, you should
1117  * make sure the job is pushed using pvr_queue_job_push(), or guarantee that
1118  * no one grabbed a reference to the returned fence. The latter can happen if
1119  * we do multi-job submission, and something failed when creating/initializing
1120  * a job. In that case, we know the fence didn't leave the driver, and we
1121  * can thus guarantee nobody will wait on an dead fence object.
1122  *
1123  * Return:
1124  *  * A dma_fence object.
1125  */
pvr_queue_job_arm(struct pvr_job * job)1126 struct dma_fence *pvr_queue_job_arm(struct pvr_job *job)
1127 {
1128 	drm_sched_job_arm(&job->base);
1129 
1130 	return &job->base.s_fence->finished;
1131 }
1132 
1133 /**
1134  * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object.
1135  * @job: The job to cleanup.
1136  *
1137  * Should be called in the job release path.
1138  */
pvr_queue_job_cleanup(struct pvr_job * job)1139 void pvr_queue_job_cleanup(struct pvr_job *job)
1140 {
1141 	pvr_queue_fence_put(job->done_fence);
1142 	pvr_queue_fence_put(job->cccb_fence);
1143 	pvr_kccb_fence_put(job->kccb_fence);
1144 
1145 	if (job->base.s_fence)
1146 		drm_sched_job_cleanup(&job->base);
1147 }
1148 
1149 /**
1150  * pvr_queue_job_push() - Push a job to its queue.
1151  * @job: The job to push.
1152  *
1153  * Must be called after pvr_queue_job_init() and after all dependencies
1154  * have been added to the job. This will effectively queue the job to
1155  * the drm_sched_entity attached to the queue. We grab a reference on
1156  * the job object, so the caller is free to drop its reference when it's
1157  * done accessing the job object.
1158  */
pvr_queue_job_push(struct pvr_job * job)1159 void pvr_queue_job_push(struct pvr_job *job)
1160 {
1161 	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
1162 
1163 	/* Keep track of the last queued job scheduled fence for combined submit. */
1164 	dma_fence_put(queue->last_queued_job_scheduled_fence);
1165 	queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled);
1166 
1167 	pvr_job_get(job);
1168 	drm_sched_entity_push_job(&job->base);
1169 }
1170 
reg_state_init(void * cpu_ptr,void * priv)1171 static void reg_state_init(void *cpu_ptr, void *priv)
1172 {
1173 	struct pvr_queue *queue = priv;
1174 
1175 	if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) {
1176 		struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr;
1177 
1178 		geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init =
1179 			queue->callstack_addr;
1180 	}
1181 }
1182 
1183 /**
1184  * pvr_queue_create() - Create a queue object.
1185  * @ctx: The context this queue will be attached to.
1186  * @type: The type of jobs being pushed to this queue.
1187  * @args: The arguments passed to the context creation function.
1188  * @fw_ctx_map: CPU mapping of the FW context object.
1189  *
1190  * Create a queue object that will be used to queue and track jobs.
1191  *
1192  * Return:
1193  *  * A valid pointer to a pvr_queue object, or
1194  *  * An error pointer if the creation/initialization failed.
1195  */
pvr_queue_create(struct pvr_context * ctx,enum drm_pvr_job_type type,struct drm_pvr_ioctl_create_context_args * args,void * fw_ctx_map)1196 struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
1197 				   enum drm_pvr_job_type type,
1198 				   struct drm_pvr_ioctl_create_context_args *args,
1199 				   void *fw_ctx_map)
1200 {
1201 	static const struct {
1202 		u32 cccb_size;
1203 		const char *name;
1204 	} props[] = {
1205 		[DRM_PVR_JOB_TYPE_GEOMETRY] = {
1206 			.cccb_size = CTX_GEOM_CCCB_SIZE_LOG2,
1207 			.name = "geometry",
1208 		},
1209 		[DRM_PVR_JOB_TYPE_FRAGMENT] = {
1210 			.cccb_size = CTX_FRAG_CCCB_SIZE_LOG2,
1211 			.name = "fragment"
1212 		},
1213 		[DRM_PVR_JOB_TYPE_COMPUTE] = {
1214 			.cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2,
1215 			.name = "compute"
1216 		},
1217 		[DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = {
1218 			.cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2,
1219 			.name = "transfer_frag"
1220 		},
1221 	};
1222 	struct pvr_device *pvr_dev = ctx->pvr_dev;
1223 	struct drm_gpu_scheduler *sched;
1224 	struct pvr_queue *queue;
1225 	int ctx_state_size, err;
1226 	void *cpu_map;
1227 
1228 	if (WARN_ON(type >= sizeof(props)))
1229 		return ERR_PTR(-EINVAL);
1230 
1231 	switch (ctx->type) {
1232 	case DRM_PVR_CTX_TYPE_RENDER:
1233 		if (type != DRM_PVR_JOB_TYPE_GEOMETRY &&
1234 		    type != DRM_PVR_JOB_TYPE_FRAGMENT)
1235 			return ERR_PTR(-EINVAL);
1236 		break;
1237 	case DRM_PVR_CTX_TYPE_COMPUTE:
1238 		if (type != DRM_PVR_JOB_TYPE_COMPUTE)
1239 			return ERR_PTR(-EINVAL);
1240 		break;
1241 	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
1242 		if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG)
1243 			return ERR_PTR(-EINVAL);
1244 		break;
1245 	default:
1246 		return ERR_PTR(-EINVAL);
1247 	}
1248 
1249 	ctx_state_size = get_ctx_state_size(pvr_dev, type);
1250 	if (ctx_state_size < 0)
1251 		return ERR_PTR(ctx_state_size);
1252 
1253 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
1254 	if (!queue)
1255 		return ERR_PTR(-ENOMEM);
1256 
1257 	queue->type = type;
1258 	queue->ctx_offset = get_ctx_offset(type);
1259 	queue->ctx = ctx;
1260 	queue->callstack_addr = args->callstack_addr;
1261 	sched = &queue->scheduler;
1262 	INIT_LIST_HEAD(&queue->node);
1263 	mutex_init(&queue->cccb_fence_ctx.job_lock);
1264 	pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base);
1265 	pvr_queue_fence_ctx_init(&queue->job_fence_ctx);
1266 
1267 	err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name);
1268 	if (err)
1269 		goto err_free_queue;
1270 
1271 	err = pvr_fw_object_create(pvr_dev, ctx_state_size,
1272 				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1273 				   reg_state_init, queue, &queue->reg_state_obj);
1274 	if (err)
1275 		goto err_cccb_fini;
1276 
1277 	init_fw_context(queue, fw_ctx_map);
1278 
1279 	if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT &&
1280 	    args->callstack_addr) {
1281 		err = -EINVAL;
1282 		goto err_release_reg_state;
1283 	}
1284 
1285 	cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value),
1286 					       PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1287 					       NULL, NULL, &queue->timeline_ufo.fw_obj);
1288 	if (IS_ERR(cpu_map)) {
1289 		err = PTR_ERR(cpu_map);
1290 		goto err_release_reg_state;
1291 	}
1292 
1293 	queue->timeline_ufo.value = cpu_map;
1294 
1295 	err = drm_sched_init(&queue->scheduler,
1296 			     &pvr_queue_sched_ops,
1297 			     pvr_dev->sched_wq, 1, 64 * 1024, 1,
1298 			     msecs_to_jiffies(500),
1299 			     pvr_dev->sched_wq, NULL, "pvr-queue",
1300 			     pvr_dev->base.dev);
1301 	if (err)
1302 		goto err_release_ufo;
1303 
1304 	err = drm_sched_entity_init(&queue->entity,
1305 				    DRM_SCHED_PRIORITY_KERNEL,
1306 				    &sched, 1, &ctx->faulty);
1307 	if (err)
1308 		goto err_sched_fini;
1309 
1310 	mutex_lock(&pvr_dev->queues.lock);
1311 	list_add_tail(&queue->node, &pvr_dev->queues.idle);
1312 	mutex_unlock(&pvr_dev->queues.lock);
1313 
1314 	return queue;
1315 
1316 err_sched_fini:
1317 	drm_sched_fini(&queue->scheduler);
1318 
1319 err_release_ufo:
1320 	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1321 
1322 err_release_reg_state:
1323 	pvr_fw_object_destroy(queue->reg_state_obj);
1324 
1325 err_cccb_fini:
1326 	pvr_cccb_fini(&queue->cccb);
1327 
1328 err_free_queue:
1329 	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1330 	kfree(queue);
1331 
1332 	return ERR_PTR(err);
1333 }
1334 
pvr_queue_device_pre_reset(struct pvr_device * pvr_dev)1335 void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev)
1336 {
1337 	struct pvr_queue *queue;
1338 
1339 	mutex_lock(&pvr_dev->queues.lock);
1340 	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1341 		pvr_queue_stop(queue, NULL);
1342 	list_for_each_entry(queue, &pvr_dev->queues.active, node)
1343 		pvr_queue_stop(queue, NULL);
1344 	mutex_unlock(&pvr_dev->queues.lock);
1345 }
1346 
pvr_queue_device_post_reset(struct pvr_device * pvr_dev)1347 void pvr_queue_device_post_reset(struct pvr_device *pvr_dev)
1348 {
1349 	struct pvr_queue *queue;
1350 
1351 	mutex_lock(&pvr_dev->queues.lock);
1352 	list_for_each_entry(queue, &pvr_dev->queues.active, node)
1353 		pvr_queue_start(queue);
1354 	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1355 		pvr_queue_start(queue);
1356 	mutex_unlock(&pvr_dev->queues.lock);
1357 }
1358 
1359 /**
1360  * pvr_queue_kill() - Kill a queue.
1361  * @queue: The queue to kill.
1362  *
1363  * Kill the queue so no new jobs can be pushed. Should be called when the
1364  * context handle is destroyed. The queue object might last longer if jobs
1365  * are still in flight and holding a reference to the context this queue
1366  * belongs to.
1367  */
pvr_queue_kill(struct pvr_queue * queue)1368 void pvr_queue_kill(struct pvr_queue *queue)
1369 {
1370 	drm_sched_entity_destroy(&queue->entity);
1371 	dma_fence_put(queue->last_queued_job_scheduled_fence);
1372 	queue->last_queued_job_scheduled_fence = NULL;
1373 }
1374 
1375 /**
1376  * pvr_queue_destroy() - Destroy a queue.
1377  * @queue: The queue to destroy.
1378  *
1379  * Cleanup the queue and free the resources attached to it. Should be
1380  * called from the context release function.
1381  */
pvr_queue_destroy(struct pvr_queue * queue)1382 void pvr_queue_destroy(struct pvr_queue *queue)
1383 {
1384 	if (!queue)
1385 		return;
1386 
1387 	mutex_lock(&queue->ctx->pvr_dev->queues.lock);
1388 	list_del_init(&queue->node);
1389 	mutex_unlock(&queue->ctx->pvr_dev->queues.lock);
1390 
1391 	drm_sched_fini(&queue->scheduler);
1392 	drm_sched_entity_fini(&queue->entity);
1393 
1394 	if (WARN_ON(queue->last_queued_job_scheduled_fence))
1395 		dma_fence_put(queue->last_queued_job_scheduled_fence);
1396 
1397 	pvr_queue_cleanup_fw_context(queue);
1398 
1399 	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1400 	pvr_fw_object_destroy(queue->reg_state_obj);
1401 	pvr_cccb_fini(&queue->cccb);
1402 	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1403 	kfree(queue);
1404 }
1405 
1406 /**
1407  * pvr_queue_device_init() - Device-level initialization of queue related fields.
1408  * @pvr_dev: The device to initialize.
1409  *
1410  * Initializes all fields related to queue management in pvr_device.
1411  *
1412  * Return:
1413  *  * 0 on success, or
1414  *  * An error code on failure.
1415  */
pvr_queue_device_init(struct pvr_device * pvr_dev)1416 int pvr_queue_device_init(struct pvr_device *pvr_dev)
1417 {
1418 	int err;
1419 
1420 	INIT_LIST_HEAD(&pvr_dev->queues.active);
1421 	INIT_LIST_HEAD(&pvr_dev->queues.idle);
1422 	err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock);
1423 	if (err)
1424 		return err;
1425 
1426 	pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0);
1427 	if (!pvr_dev->sched_wq)
1428 		return -ENOMEM;
1429 
1430 	return 0;
1431 }
1432 
1433 /**
1434  * pvr_queue_device_fini() - Device-level cleanup of queue related fields.
1435  * @pvr_dev: The device to cleanup.
1436  *
1437  * Cleanup/free all queue-related resources attached to a pvr_device object.
1438  */
pvr_queue_device_fini(struct pvr_device * pvr_dev)1439 void pvr_queue_device_fini(struct pvr_device *pvr_dev)
1440 {
1441 	destroy_workqueue(pvr_dev->sched_wq);
1442 }
1443