1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
3
4 #include <drm/drm_managed.h>
5 #include <drm/gpu_scheduler.h>
6
7 #include "pvr_cccb.h"
8 #include "pvr_context.h"
9 #include "pvr_device.h"
10 #include "pvr_drv.h"
11 #include "pvr_job.h"
12 #include "pvr_queue.h"
13 #include "pvr_vm.h"
14
15 #include "pvr_rogue_fwif_client.h"
16
17 #define MAX_DEADLINE_MS 30000
18
19 #define CTX_COMPUTE_CCCB_SIZE_LOG2 15
20 #define CTX_FRAG_CCCB_SIZE_LOG2 15
21 #define CTX_GEOM_CCCB_SIZE_LOG2 15
22 #define CTX_TRANSFER_CCCB_SIZE_LOG2 15
23
get_xfer_ctx_state_size(struct pvr_device * pvr_dev)24 static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev)
25 {
26 u32 num_isp_store_registers;
27
28 if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
29 num_isp_store_registers = 1;
30 } else {
31 int err;
32
33 err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
34 if (WARN_ON(err))
35 return err;
36 }
37
38 return sizeof(struct rogue_fwif_frag_ctx_state) +
39 (num_isp_store_registers *
40 sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
41 }
42
get_frag_ctx_state_size(struct pvr_device * pvr_dev)43 static int get_frag_ctx_state_size(struct pvr_device *pvr_dev)
44 {
45 u32 num_isp_store_registers;
46 int err;
47
48 if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
49 err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers);
50 if (WARN_ON(err))
51 return err;
52
53 if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) {
54 u32 xpu_max_slaves;
55
56 err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves);
57 if (WARN_ON(err))
58 return err;
59
60 num_isp_store_registers *= (1 + xpu_max_slaves);
61 }
62 } else {
63 err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
64 if (WARN_ON(err))
65 return err;
66 }
67
68 return sizeof(struct rogue_fwif_frag_ctx_state) +
69 (num_isp_store_registers *
70 sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
71 }
72
get_ctx_state_size(struct pvr_device * pvr_dev,enum drm_pvr_job_type type)73 static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type)
74 {
75 switch (type) {
76 case DRM_PVR_JOB_TYPE_GEOMETRY:
77 return sizeof(struct rogue_fwif_geom_ctx_state);
78 case DRM_PVR_JOB_TYPE_FRAGMENT:
79 return get_frag_ctx_state_size(pvr_dev);
80 case DRM_PVR_JOB_TYPE_COMPUTE:
81 return sizeof(struct rogue_fwif_compute_ctx_state);
82 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
83 return get_xfer_ctx_state_size(pvr_dev);
84 }
85
86 WARN(1, "Invalid queue type");
87 return -EINVAL;
88 }
89
get_ctx_offset(enum drm_pvr_job_type type)90 static u32 get_ctx_offset(enum drm_pvr_job_type type)
91 {
92 switch (type) {
93 case DRM_PVR_JOB_TYPE_GEOMETRY:
94 return offsetof(struct rogue_fwif_fwrendercontext, geom_context);
95 case DRM_PVR_JOB_TYPE_FRAGMENT:
96 return offsetof(struct rogue_fwif_fwrendercontext, frag_context);
97 case DRM_PVR_JOB_TYPE_COMPUTE:
98 return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context);
99 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
100 return offsetof(struct rogue_fwif_fwtransfercontext, tq_context);
101 }
102
103 return 0;
104 }
105
106 static const char *
pvr_queue_fence_get_driver_name(struct dma_fence * f)107 pvr_queue_fence_get_driver_name(struct dma_fence *f)
108 {
109 return PVR_DRIVER_NAME;
110 }
111
pvr_queue_fence_release_work(struct work_struct * w)112 static void pvr_queue_fence_release_work(struct work_struct *w)
113 {
114 struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work);
115
116 pvr_context_put(fence->queue->ctx);
117 dma_fence_free(&fence->base);
118 }
119
pvr_queue_fence_release(struct dma_fence * f)120 static void pvr_queue_fence_release(struct dma_fence *f)
121 {
122 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
123 struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev;
124
125 queue_work(pvr_dev->sched_wq, &fence->release_work);
126 }
127
128 static const char *
pvr_queue_job_fence_get_timeline_name(struct dma_fence * f)129 pvr_queue_job_fence_get_timeline_name(struct dma_fence *f)
130 {
131 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
132
133 switch (fence->queue->type) {
134 case DRM_PVR_JOB_TYPE_GEOMETRY:
135 return "geometry";
136
137 case DRM_PVR_JOB_TYPE_FRAGMENT:
138 return "fragment";
139
140 case DRM_PVR_JOB_TYPE_COMPUTE:
141 return "compute";
142
143 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
144 return "transfer";
145 }
146
147 WARN(1, "Invalid queue type");
148 return "invalid";
149 }
150
151 static const char *
pvr_queue_cccb_fence_get_timeline_name(struct dma_fence * f)152 pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f)
153 {
154 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
155
156 switch (fence->queue->type) {
157 case DRM_PVR_JOB_TYPE_GEOMETRY:
158 return "geometry-cccb";
159
160 case DRM_PVR_JOB_TYPE_FRAGMENT:
161 return "fragment-cccb";
162
163 case DRM_PVR_JOB_TYPE_COMPUTE:
164 return "compute-cccb";
165
166 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
167 return "transfer-cccb";
168 }
169
170 WARN(1, "Invalid queue type");
171 return "invalid";
172 }
173
174 static const struct dma_fence_ops pvr_queue_job_fence_ops = {
175 .get_driver_name = pvr_queue_fence_get_driver_name,
176 .get_timeline_name = pvr_queue_job_fence_get_timeline_name,
177 .release = pvr_queue_fence_release,
178 };
179
180 /**
181 * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is
182 * backed by a UFO.
183 * @f: The dma_fence to turn into a pvr_queue_fence.
184 *
185 * Return:
186 * * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or
187 * * NULL otherwise.
188 */
189 static struct pvr_queue_fence *
to_pvr_queue_job_fence(struct dma_fence * f)190 to_pvr_queue_job_fence(struct dma_fence *f)
191 {
192 struct drm_sched_fence *sched_fence = to_drm_sched_fence(f);
193
194 if (sched_fence)
195 f = sched_fence->parent;
196
197 if (f && f->ops == &pvr_queue_job_fence_ops)
198 return container_of(f, struct pvr_queue_fence, base);
199
200 return NULL;
201 }
202
203 static const struct dma_fence_ops pvr_queue_cccb_fence_ops = {
204 .get_driver_name = pvr_queue_fence_get_driver_name,
205 .get_timeline_name = pvr_queue_cccb_fence_get_timeline_name,
206 .release = pvr_queue_fence_release,
207 };
208
209 /**
210 * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects.
211 * @f: The dma_fence object to put.
212 *
213 * If the pvr_queue_fence has been initialized, we call dma_fence_put(),
214 * otherwise we free the object with dma_fence_free(). This allows us
215 * to do the right thing before and after pvr_queue_fence_init() had been
216 * called.
217 */
pvr_queue_fence_put(struct dma_fence * f)218 static void pvr_queue_fence_put(struct dma_fence *f)
219 {
220 if (!f)
221 return;
222
223 if (WARN_ON(f->ops &&
224 f->ops != &pvr_queue_cccb_fence_ops &&
225 f->ops != &pvr_queue_job_fence_ops))
226 return;
227
228 /* If the fence hasn't been initialized yet, free the object directly. */
229 if (f->ops)
230 dma_fence_put(f);
231 else
232 dma_fence_free(f);
233 }
234
235 /**
236 * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object
237 *
238 * Call this function to allocate job CCCB and done fences. This only
239 * allocates the objects. Initialization happens when the underlying
240 * dma_fence object is to be returned to drm_sched (in prepare_job() or
241 * run_job()).
242 *
243 * Return:
244 * * A valid pointer if the allocation succeeds, or
245 * * NULL if the allocation fails.
246 */
247 static struct dma_fence *
pvr_queue_fence_alloc(void)248 pvr_queue_fence_alloc(void)
249 {
250 struct pvr_queue_fence *fence;
251
252 fence = kzalloc(sizeof(*fence), GFP_KERNEL);
253 if (!fence)
254 return NULL;
255
256 return &fence->base;
257 }
258
259 /**
260 * pvr_queue_fence_init() - Initializes a pvr_queue_fence object.
261 * @f: The fence to initialize
262 * @queue: The queue this fence belongs to.
263 * @fence_ops: The fence operations.
264 * @fence_ctx: The fence context.
265 *
266 * Wrapper around dma_fence_init() that takes care of initializing the
267 * pvr_queue_fence::queue field too.
268 */
269 static void
pvr_queue_fence_init(struct dma_fence * f,struct pvr_queue * queue,const struct dma_fence_ops * fence_ops,struct pvr_queue_fence_ctx * fence_ctx)270 pvr_queue_fence_init(struct dma_fence *f,
271 struct pvr_queue *queue,
272 const struct dma_fence_ops *fence_ops,
273 struct pvr_queue_fence_ctx *fence_ctx)
274 {
275 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
276
277 pvr_context_get(queue->ctx);
278 fence->queue = queue;
279 INIT_WORK(&fence->release_work, pvr_queue_fence_release_work);
280 dma_fence_init(&fence->base, fence_ops,
281 &fence_ctx->lock, fence_ctx->id,
282 atomic_inc_return(&fence_ctx->seqno));
283 }
284
285 /**
286 * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object.
287 * @fence: The fence to initialize.
288 * @queue: The queue this fence belongs to.
289 *
290 * Initializes a fence that can be used to wait for CCCB space.
291 *
292 * Should be called in the ::prepare_job() path, so the fence returned to
293 * drm_sched is valid.
294 */
295 static void
pvr_queue_cccb_fence_init(struct dma_fence * fence,struct pvr_queue * queue)296 pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
297 {
298 pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops,
299 &queue->cccb_fence_ctx.base);
300 }
301
302 /**
303 * pvr_queue_job_fence_init() - Initializes a job done fence object.
304 * @fence: The fence to initialize.
305 * @queue: The queue this fence belongs to.
306 *
307 * Initializes a fence that will be signaled when the GPU is done executing
308 * a job.
309 *
310 * Should be called *before* the ::run_job() path, so the fence is initialised
311 * before being placed in the pending_list.
312 */
313 static void
pvr_queue_job_fence_init(struct dma_fence * fence,struct pvr_queue * queue)314 pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
315 {
316 if (!fence->ops)
317 pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops,
318 &queue->job_fence_ctx);
319 }
320
321 /**
322 * pvr_queue_fence_ctx_init() - Queue fence context initialization.
323 * @fence_ctx: The context to initialize
324 */
325 static void
pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx * fence_ctx)326 pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx)
327 {
328 spin_lock_init(&fence_ctx->lock);
329 fence_ctx->id = dma_fence_context_alloc(1);
330 atomic_set(&fence_ctx->seqno, 0);
331 }
332
ufo_cmds_size(u32 elem_count)333 static u32 ufo_cmds_size(u32 elem_count)
334 {
335 /* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */
336 u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS;
337 u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS;
338 u32 size = full_cmd_count *
339 pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS *
340 sizeof(struct rogue_fwif_ufo));
341
342 if (remaining_elems) {
343 size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems *
344 sizeof(struct rogue_fwif_ufo));
345 }
346
347 return size;
348 }
349
job_cmds_size(struct pvr_job * job,u32 ufo_wait_count)350 static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count)
351 {
352 /* One UFO cmd for the fence signaling, one UFO cmd per native fence native,
353 * and a command for the job itself.
354 */
355 return ufo_cmds_size(1) + ufo_cmds_size(ufo_wait_count) +
356 pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len);
357 }
358
359 /**
360 * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies.
361 * @job: Job to operate on.
362 *
363 * Returns: Number of non-signaled native deps remaining.
364 */
job_count_remaining_native_deps(struct pvr_job * job)365 static unsigned long job_count_remaining_native_deps(struct pvr_job *job)
366 {
367 unsigned long remaining_count = 0;
368 struct dma_fence *fence = NULL;
369 unsigned long index;
370
371 xa_for_each(&job->base.dependencies, index, fence) {
372 struct pvr_queue_fence *jfence;
373
374 jfence = to_pvr_queue_job_fence(fence);
375 if (!jfence)
376 continue;
377
378 if (!dma_fence_is_signaled(&jfence->base))
379 remaining_count++;
380 }
381
382 return remaining_count;
383 }
384
385 /**
386 * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job.
387 * @queue: The queue this job will be submitted to.
388 * @job: The job to get the CCCB fence on.
389 *
390 * The CCCB fence is a synchronization primitive allowing us to delay job
391 * submission until there's enough space in the CCCB to submit the job.
392 *
393 * Return:
394 * * NULL if there's enough space in the CCCB to submit this job, or
395 * * A valid dma_fence object otherwise.
396 */
397 static struct dma_fence *
pvr_queue_get_job_cccb_fence(struct pvr_queue * queue,struct pvr_job * job)398 pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job)
399 {
400 struct pvr_queue_fence *cccb_fence;
401 unsigned int native_deps_remaining;
402
403 /* If the fence is NULL, that means we already checked that we had
404 * enough space in the cccb for our job.
405 */
406 if (!job->cccb_fence)
407 return NULL;
408
409 mutex_lock(&queue->cccb_fence_ctx.job_lock);
410
411 /* Count remaining native dependencies and check if the job fits in the CCCB. */
412 native_deps_remaining = job_count_remaining_native_deps(job);
413 if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
414 pvr_queue_fence_put(job->cccb_fence);
415 job->cccb_fence = NULL;
416 goto out_unlock;
417 }
418
419 /* There should be no job attached to the CCCB fence context:
420 * drm_sched_entity guarantees that jobs are submitted one at a time.
421 */
422 if (WARN_ON(queue->cccb_fence_ctx.job))
423 pvr_job_put(queue->cccb_fence_ctx.job);
424
425 queue->cccb_fence_ctx.job = pvr_job_get(job);
426
427 /* Initialize the fence before returning it. */
428 cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
429 if (!WARN_ON(cccb_fence->queue))
430 pvr_queue_cccb_fence_init(job->cccb_fence, queue);
431
432 out_unlock:
433 mutex_unlock(&queue->cccb_fence_ctx.job_lock);
434
435 return dma_fence_get(job->cccb_fence);
436 }
437
438 /**
439 * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job.
440 * @queue: The queue this job will be submitted to.
441 * @job: The job to get the KCCB fence on.
442 *
443 * The KCCB fence is a synchronization primitive allowing us to delay job
444 * submission until there's enough space in the KCCB to submit the job.
445 *
446 * Return:
447 * * NULL if there's enough space in the KCCB to submit this job, or
448 * * A valid dma_fence object otherwise.
449 */
450 static struct dma_fence *
pvr_queue_get_job_kccb_fence(struct pvr_queue * queue,struct pvr_job * job)451 pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job)
452 {
453 struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
454 struct dma_fence *kccb_fence = NULL;
455
456 /* If the fence is NULL, that means we already checked that we had
457 * enough space in the KCCB for our job.
458 */
459 if (!job->kccb_fence)
460 return NULL;
461
462 if (!WARN_ON(job->kccb_fence->ops)) {
463 kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence);
464 job->kccb_fence = NULL;
465 }
466
467 return kccb_fence;
468 }
469
470 static struct dma_fence *
pvr_queue_get_paired_frag_job_dep(struct pvr_queue * queue,struct pvr_job * job)471 pvr_queue_get_paired_frag_job_dep(struct pvr_queue *queue, struct pvr_job *job)
472 {
473 struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ?
474 job->paired_job : NULL;
475 struct dma_fence *f;
476 unsigned long index;
477
478 if (!frag_job)
479 return NULL;
480
481 xa_for_each(&frag_job->base.dependencies, index, f) {
482 /* Skip already signaled fences. */
483 if (dma_fence_is_signaled(f))
484 continue;
485
486 /* Skip our own fence. */
487 if (f == &job->base.s_fence->scheduled)
488 continue;
489
490 return dma_fence_get(f);
491 }
492
493 return frag_job->base.sched->ops->prepare_job(&frag_job->base, &queue->entity);
494 }
495
496 /**
497 * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence.
498 * @sched_job: The job to query the next internal dependency on
499 * @s_entity: The entity this job is queue on.
500 *
501 * After iterating over drm_sched_job::dependencies, drm_sched let the driver return
502 * its own internal dependencies. We use this function to return our internal dependencies.
503 */
504 static struct dma_fence *
pvr_queue_prepare_job(struct drm_sched_job * sched_job,struct drm_sched_entity * s_entity)505 pvr_queue_prepare_job(struct drm_sched_job *sched_job,
506 struct drm_sched_entity *s_entity)
507 {
508 struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
509 struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity);
510 struct dma_fence *internal_dep = NULL;
511
512 /*
513 * Initialize the done_fence, so we can signal it. This must be done
514 * here because otherwise by the time of run_job() the job will end up
515 * in the pending list without a valid fence.
516 */
517 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
518 /*
519 * This will be called on a paired fragment job after being
520 * submitted to firmware. We can tell if this is the case and
521 * bail early from whether run_job() has been called on the
522 * geometry job, which would issue a pm ref.
523 */
524 if (job->paired_job->has_pm_ref)
525 return NULL;
526
527 /*
528 * In this case we need to use the job's own ctx to initialise
529 * the done_fence. The other steps are done in the ctx of the
530 * paired geometry job.
531 */
532 pvr_queue_job_fence_init(job->done_fence,
533 job->ctx->queues.fragment);
534 } else {
535 pvr_queue_job_fence_init(job->done_fence, queue);
536 }
537
538 /* CCCB fence is used to make sure we have enough space in the CCCB to
539 * submit our commands.
540 */
541 internal_dep = pvr_queue_get_job_cccb_fence(queue, job);
542
543 /* KCCB fence is used to make sure we have a KCCB slot to queue our
544 * CMD_KICK.
545 */
546 if (!internal_dep)
547 internal_dep = pvr_queue_get_job_kccb_fence(queue, job);
548
549 /* Any extra internal dependency should be added here, using the following
550 * pattern:
551 *
552 * if (!internal_dep)
553 * internal_dep = pvr_queue_get_job_xxxx_fence(queue, job);
554 */
555
556 /* The paired job fence should come last, when everything else is ready. */
557 if (!internal_dep)
558 internal_dep = pvr_queue_get_paired_frag_job_dep(queue, job);
559
560 return internal_dep;
561 }
562
563 /**
564 * pvr_queue_update_active_state_locked() - Update the queue active state.
565 * @queue: Queue to update the state on.
566 *
567 * Locked version of pvr_queue_update_active_state(). Must be called with
568 * pvr_device::queue::lock held.
569 */
pvr_queue_update_active_state_locked(struct pvr_queue * queue)570 static void pvr_queue_update_active_state_locked(struct pvr_queue *queue)
571 {
572 struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
573
574 lockdep_assert_held(&pvr_dev->queues.lock);
575
576 /* The queue is temporary out of any list when it's being reset,
577 * we don't want a call to pvr_queue_update_active_state_locked()
578 * to re-insert it behind our back.
579 */
580 if (list_empty(&queue->node))
581 return;
582
583 if (!atomic_read(&queue->in_flight_job_count))
584 list_move_tail(&queue->node, &pvr_dev->queues.idle);
585 else
586 list_move_tail(&queue->node, &pvr_dev->queues.active);
587 }
588
589 /**
590 * pvr_queue_update_active_state() - Update the queue active state.
591 * @queue: Queue to update the state on.
592 *
593 * Active state is based on the in_flight_job_count value.
594 *
595 * Updating the active state implies moving the queue in or out of the
596 * active queue list, which also defines whether the queue is checked
597 * or not when a FW event is received.
598 *
599 * This function should be called any time a job is submitted or it done
600 * fence is signaled.
601 */
pvr_queue_update_active_state(struct pvr_queue * queue)602 static void pvr_queue_update_active_state(struct pvr_queue *queue)
603 {
604 struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
605
606 mutex_lock(&pvr_dev->queues.lock);
607 pvr_queue_update_active_state_locked(queue);
608 mutex_unlock(&pvr_dev->queues.lock);
609 }
610
pvr_queue_submit_job_to_cccb(struct pvr_job * job)611 static void pvr_queue_submit_job_to_cccb(struct pvr_job *job)
612 {
613 struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
614 struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS];
615 struct pvr_cccb *cccb = &queue->cccb;
616 struct pvr_queue_fence *jfence;
617 struct dma_fence *fence;
618 unsigned long index;
619 u32 ufo_count = 0;
620
621 /* We need to add the queue to the active list before updating the CCCB,
622 * otherwise we might miss the FW event informing us that something
623 * happened on this queue.
624 */
625 atomic_inc(&queue->in_flight_job_count);
626 pvr_queue_update_active_state(queue);
627
628 xa_for_each(&job->base.dependencies, index, fence) {
629 jfence = to_pvr_queue_job_fence(fence);
630 if (!jfence)
631 continue;
632
633 /* Skip the partial render fence, we will place it at the end. */
634 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job &&
635 &job->paired_job->base.s_fence->scheduled == fence)
636 continue;
637
638 if (dma_fence_is_signaled(&jfence->base))
639 continue;
640
641 pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
642 &ufos[ufo_count].addr);
643 ufos[ufo_count++].value = jfence->base.seqno;
644
645 if (ufo_count == ARRAY_SIZE(ufos)) {
646 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
647 sizeof(ufos), ufos, 0, 0);
648 ufo_count = 0;
649 }
650 }
651
652 /* Partial render fence goes last. */
653 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
654 jfence = to_pvr_queue_job_fence(job->paired_job->done_fence);
655 if (!WARN_ON(!jfence)) {
656 pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
657 &ufos[ufo_count].addr);
658 ufos[ufo_count++].value = job->paired_job->done_fence->seqno;
659 }
660 }
661
662 if (ufo_count) {
663 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
664 sizeof(ufos[0]) * ufo_count, ufos, 0, 0);
665 }
666
667 if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) {
668 struct rogue_fwif_cmd_geom *cmd = job->cmd;
669
670 /* Reference value for the partial render test is the current queue fence
671 * seqno minus one.
672 */
673 pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj,
674 &cmd->partial_render_geom_frag_fence.addr);
675 cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1;
676 }
677
678 /* Submit job to FW */
679 pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd,
680 job->id, job->id);
681
682 /* Signal the job fence. */
683 pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr);
684 ufos[0].value = job->done_fence->seqno;
685 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE,
686 sizeof(ufos[0]), ufos, 0, 0);
687 }
688
689 /**
690 * pvr_queue_run_job() - Submit a job to the FW.
691 * @sched_job: The job to submit.
692 *
693 * This function is called when all non-native dependencies have been met and
694 * when the commands resulting from this job are guaranteed to fit in the CCCB.
695 */
pvr_queue_run_job(struct drm_sched_job * sched_job)696 static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job)
697 {
698 struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
699 struct pvr_device *pvr_dev = job->pvr_dev;
700 int err;
701
702 /* The fragment job is issued along the geometry job when we use combined
703 * geom+frag kicks. When we get there, we should simply return the
704 * done_fence that's been initialized earlier.
705 */
706 if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT &&
707 job->done_fence->ops) {
708 return dma_fence_get(job->done_fence);
709 }
710
711 /* The only kind of jobs that can be paired are geometry and fragment, and
712 * we bail out early if we see a fragment job that's paired with a geomtry
713 * job.
714 * Paired jobs must also target the same context and point to the same
715 * HWRT.
716 */
717 if (WARN_ON(job->paired_job &&
718 (job->type != DRM_PVR_JOB_TYPE_GEOMETRY ||
719 job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT ||
720 job->hwrt != job->paired_job->hwrt ||
721 job->ctx != job->paired_job->ctx)))
722 return ERR_PTR(-EINVAL);
723
724 err = pvr_job_get_pm_ref(job);
725 if (WARN_ON(err))
726 return ERR_PTR(err);
727
728 if (job->paired_job) {
729 err = pvr_job_get_pm_ref(job->paired_job);
730 if (WARN_ON(err))
731 return ERR_PTR(err);
732 }
733
734 /* Submit our job to the CCCB */
735 pvr_queue_submit_job_to_cccb(job);
736
737 if (job->paired_job) {
738 struct pvr_job *geom_job = job;
739 struct pvr_job *frag_job = job->paired_job;
740 struct pvr_queue *geom_queue = job->ctx->queues.geometry;
741 struct pvr_queue *frag_queue = job->ctx->queues.fragment;
742
743 /* Submit the fragment job along the geometry job and send a combined kick. */
744 pvr_queue_submit_job_to_cccb(frag_job);
745 pvr_cccb_send_kccb_combined_kick(pvr_dev,
746 &geom_queue->cccb, &frag_queue->cccb,
747 pvr_context_get_fw_addr(geom_job->ctx) +
748 geom_queue->ctx_offset,
749 pvr_context_get_fw_addr(frag_job->ctx) +
750 frag_queue->ctx_offset,
751 job->hwrt,
752 frag_job->fw_ccb_cmd_type ==
753 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR);
754 } else {
755 struct pvr_queue *queue = container_of(job->base.sched,
756 struct pvr_queue, scheduler);
757
758 pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb,
759 pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset,
760 job->hwrt);
761 }
762
763 return dma_fence_get(job->done_fence);
764 }
765
pvr_queue_stop(struct pvr_queue * queue,struct pvr_job * bad_job)766 static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job)
767 {
768 drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
769 }
770
pvr_queue_start(struct pvr_queue * queue)771 static void pvr_queue_start(struct pvr_queue *queue)
772 {
773 struct pvr_job *job;
774
775 /* Make sure we CPU-signal the UFO object, so other queues don't get
776 * blocked waiting on it.
777 */
778 *queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno);
779
780 list_for_each_entry(job, &queue->scheduler.pending_list, base.list) {
781 if (dma_fence_is_signaled(job->done_fence)) {
782 /* Jobs might have completed after drm_sched_stop() was called.
783 * In that case, re-assign the parent field to the done_fence.
784 */
785 WARN_ON(job->base.s_fence->parent);
786 job->base.s_fence->parent = dma_fence_get(job->done_fence);
787 } else {
788 /* If we had unfinished jobs, flag the entity as guilty so no
789 * new job can be submitted.
790 */
791 atomic_set(&queue->ctx->faulty, 1);
792 }
793 }
794
795 drm_sched_start(&queue->scheduler, 0);
796 }
797
798 /**
799 * pvr_queue_timedout_job() - Handle a job timeout event.
800 * @s_job: The job this timeout occurred on.
801 *
802 * FIXME: We don't do anything here to unblock the situation, we just stop+start
803 * the scheduler, and re-assign parent fences in the middle.
804 *
805 * Return:
806 * * DRM_GPU_SCHED_STAT_RESET.
807 */
808 static enum drm_gpu_sched_stat
pvr_queue_timedout_job(struct drm_sched_job * s_job)809 pvr_queue_timedout_job(struct drm_sched_job *s_job)
810 {
811 struct drm_gpu_scheduler *sched = s_job->sched;
812 struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler);
813 struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
814 struct pvr_job *job;
815 u32 job_count = 0;
816
817 dev_err(sched->dev, "Job timeout\n");
818
819 /* Before we stop the scheduler, make sure the queue is out of any list, so
820 * any call to pvr_queue_update_active_state_locked() that might happen
821 * until the scheduler is really stopped doesn't end up re-inserting the
822 * queue in the active list. This would cause
823 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each
824 * other when accessing the pending_list, since drm_sched_stop() doesn't
825 * grab the job_list_lock when modifying the list (it's assuming the
826 * only other accessor is the scheduler, and it's safe to not grab the
827 * lock since it's stopped).
828 */
829 mutex_lock(&pvr_dev->queues.lock);
830 list_del_init(&queue->node);
831 mutex_unlock(&pvr_dev->queues.lock);
832
833 drm_sched_stop(sched, s_job);
834
835 /* Re-assign job parent fences. */
836 list_for_each_entry(job, &sched->pending_list, base.list) {
837 job->base.s_fence->parent = dma_fence_get(job->done_fence);
838 job_count++;
839 }
840 WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count);
841
842 /* Re-insert the queue in the proper list, and kick a queue processing
843 * operation if there were jobs pending.
844 */
845 mutex_lock(&pvr_dev->queues.lock);
846 if (!job_count) {
847 list_move_tail(&queue->node, &pvr_dev->queues.idle);
848 } else {
849 atomic_set(&queue->in_flight_job_count, job_count);
850 list_move_tail(&queue->node, &pvr_dev->queues.active);
851 pvr_queue_process(queue);
852 }
853 mutex_unlock(&pvr_dev->queues.lock);
854
855 drm_sched_start(sched, 0);
856
857 return DRM_GPU_SCHED_STAT_RESET;
858 }
859
860 /**
861 * pvr_queue_free_job() - Release the reference the scheduler had on a job object.
862 * @sched_job: Job object to free.
863 */
pvr_queue_free_job(struct drm_sched_job * sched_job)864 static void pvr_queue_free_job(struct drm_sched_job *sched_job)
865 {
866 struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
867
868 drm_sched_job_cleanup(sched_job);
869
870 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job)
871 pvr_job_put(job->paired_job);
872
873 job->paired_job = NULL;
874 pvr_job_put(job);
875 }
876
877 static const struct drm_sched_backend_ops pvr_queue_sched_ops = {
878 .prepare_job = pvr_queue_prepare_job,
879 .run_job = pvr_queue_run_job,
880 .timedout_job = pvr_queue_timedout_job,
881 .free_job = pvr_queue_free_job,
882 };
883
884 /**
885 * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO object
886 * @f: Fence to test.
887 *
888 * A UFO-backed fence is a fence that can be signaled or waited upon FW-side.
889 * pvr_job::done_fence objects are backed by the timeline UFO attached to the queue
890 * they are pushed to, but those fences are not directly exposed to the outside
891 * world, so we also need to check if the fence we're being passed is a
892 * drm_sched_fence that was coming from our driver.
893 */
pvr_queue_fence_is_ufo_backed(struct dma_fence * f)894 bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f)
895 {
896 struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL;
897
898 if (sched_fence &&
899 sched_fence->sched->ops == &pvr_queue_sched_ops)
900 return true;
901
902 if (f && f->ops == &pvr_queue_job_fence_ops)
903 return true;
904
905 return false;
906 }
907
908 /**
909 * pvr_queue_signal_done_fences() - Signal done fences.
910 * @queue: Queue to check.
911 *
912 * Signal done fences of jobs whose seqno is less than the current value of
913 * the UFO object attached to the queue.
914 */
915 static void
pvr_queue_signal_done_fences(struct pvr_queue * queue)916 pvr_queue_signal_done_fences(struct pvr_queue *queue)
917 {
918 struct pvr_job *job, *tmp_job;
919 u32 cur_seqno;
920
921 spin_lock(&queue->scheduler.job_list_lock);
922 cur_seqno = *queue->timeline_ufo.value;
923 list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) {
924 if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0)
925 break;
926
927 if (!dma_fence_is_signaled(job->done_fence)) {
928 dma_fence_signal(job->done_fence);
929 pvr_job_release_pm_ref(job);
930 atomic_dec(&queue->in_flight_job_count);
931 }
932 }
933 spin_unlock(&queue->scheduler.job_list_lock);
934 }
935
936 /**
937 * pvr_queue_check_job_waiting_for_cccb_space() - Check if the job waiting for CCCB space
938 * can be unblocked
939 * pushed to the CCCB
940 * @queue: Queue to check
941 *
942 * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal
943 * its CCCB fence, which should kick drm_sched.
944 */
945 static void
pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue * queue)946 pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue)
947 {
948 struct pvr_queue_fence *cccb_fence;
949 u32 native_deps_remaining;
950 struct pvr_job *job;
951
952 mutex_lock(&queue->cccb_fence_ctx.job_lock);
953 job = queue->cccb_fence_ctx.job;
954 if (!job)
955 goto out_unlock;
956
957 /* If we have a job attached to the CCCB fence context, its CCCB fence
958 * shouldn't be NULL.
959 */
960 if (WARN_ON(!job->cccb_fence)) {
961 job = NULL;
962 goto out_unlock;
963 }
964
965 /* If we get there, CCCB fence has to be initialized. */
966 cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
967 if (WARN_ON(!cccb_fence->queue)) {
968 job = NULL;
969 goto out_unlock;
970 }
971
972 /* Evict signaled dependencies before checking for CCCB space.
973 * If the job fits, signal the CCCB fence, this should unblock
974 * the drm_sched_entity.
975 */
976 native_deps_remaining = job_count_remaining_native_deps(job);
977 if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
978 job = NULL;
979 goto out_unlock;
980 }
981
982 dma_fence_signal(job->cccb_fence);
983 pvr_queue_fence_put(job->cccb_fence);
984 job->cccb_fence = NULL;
985 queue->cccb_fence_ctx.job = NULL;
986
987 out_unlock:
988 mutex_unlock(&queue->cccb_fence_ctx.job_lock);
989
990 pvr_job_put(job);
991 }
992
993 /**
994 * pvr_queue_process() - Process events that happened on a queue.
995 * @queue: Queue to check
996 *
997 * Signal job fences and check if jobs waiting for CCCB space can be unblocked.
998 */
pvr_queue_process(struct pvr_queue * queue)999 void pvr_queue_process(struct pvr_queue *queue)
1000 {
1001 lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock);
1002
1003 pvr_queue_check_job_waiting_for_cccb_space(queue);
1004 pvr_queue_signal_done_fences(queue);
1005 pvr_queue_update_active_state_locked(queue);
1006 }
1007
get_dm_type(struct pvr_queue * queue)1008 static u32 get_dm_type(struct pvr_queue *queue)
1009 {
1010 switch (queue->type) {
1011 case DRM_PVR_JOB_TYPE_GEOMETRY:
1012 return PVR_FWIF_DM_GEOM;
1013 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
1014 case DRM_PVR_JOB_TYPE_FRAGMENT:
1015 return PVR_FWIF_DM_FRAG;
1016 case DRM_PVR_JOB_TYPE_COMPUTE:
1017 return PVR_FWIF_DM_CDM;
1018 }
1019
1020 return ~0;
1021 }
1022
1023 /**
1024 * init_fw_context() - Initializes the queue part of a FW context.
1025 * @queue: Queue object to initialize the FW context for.
1026 * @fw_ctx_map: The FW context CPU mapping.
1027 *
1028 * FW contexts are containing various states, one of them being a per-queue state
1029 * that needs to be initialized for each queue being exposed by a context. This
1030 * function takes care of that.
1031 */
init_fw_context(struct pvr_queue * queue,void * fw_ctx_map)1032 static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map)
1033 {
1034 struct pvr_context *ctx = queue->ctx;
1035 struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx);
1036 struct rogue_fwif_fwcommoncontext *cctx_fw;
1037 struct pvr_cccb *cccb = &queue->cccb;
1038
1039 cctx_fw = fw_ctx_map + queue->ctx_offset;
1040 cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr;
1041 cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr;
1042
1043 cctx_fw->dm = get_dm_type(queue);
1044 cctx_fw->priority = ctx->priority;
1045 cctx_fw->priority_seq_num = 0;
1046 cctx_fw->max_deadline_ms = MAX_DEADLINE_MS;
1047 cctx_fw->pid = task_tgid_nr(current);
1048 cctx_fw->server_common_context_id = ctx->ctx_id;
1049
1050 pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr);
1051
1052 pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr);
1053 }
1054
1055 /**
1056 * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up.
1057 * @queue: Queue on FW context to clean up.
1058 *
1059 * Return:
1060 * * 0 on success,
1061 * * Any error returned by pvr_fw_structure_cleanup() otherwise.
1062 */
pvr_queue_cleanup_fw_context(struct pvr_queue * queue)1063 static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue)
1064 {
1065 if (!queue->ctx->fw_obj)
1066 return 0;
1067
1068 return pvr_fw_structure_cleanup(queue->ctx->pvr_dev,
1069 ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT,
1070 queue->ctx->fw_obj, queue->ctx_offset);
1071 }
1072
1073 /**
1074 * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object.
1075 * @job: The job to initialize.
1076 * @drm_client_id: drm_file.client_id submitting the job
1077 *
1078 * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm()
1079 * and pvr_queue_job_push() can't fail. We also make sure the context type is
1080 * valid and the job can fit in the CCCB.
1081 *
1082 * Return:
1083 * * 0 on success, or
1084 * * An error code if something failed.
1085 */
pvr_queue_job_init(struct pvr_job * job,u64 drm_client_id)1086 int pvr_queue_job_init(struct pvr_job *job, u64 drm_client_id)
1087 {
1088 /* Fragment jobs need at least one native fence wait on the geometry job fence. */
1089 u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0;
1090 struct pvr_queue *queue;
1091 int err;
1092
1093 if (atomic_read(&job->ctx->faulty))
1094 return -EIO;
1095
1096 queue = pvr_context_get_queue_for_job(job->ctx, job->type);
1097 if (!queue)
1098 return -EINVAL;
1099
1100 if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count)))
1101 return -E2BIG;
1102
1103 err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE, drm_client_id);
1104 if (err)
1105 return err;
1106
1107 job->cccb_fence = pvr_queue_fence_alloc();
1108 job->kccb_fence = pvr_kccb_fence_alloc();
1109 job->done_fence = pvr_queue_fence_alloc();
1110 if (!job->cccb_fence || !job->kccb_fence || !job->done_fence)
1111 return -ENOMEM;
1112
1113 return 0;
1114 }
1115
1116 /**
1117 * pvr_queue_job_arm() - Arm a job object.
1118 * @job: The job to arm.
1119 *
1120 * Initializes fences and return the drm_sched finished fence so it can
1121 * be exposed to the outside world. Once this function is called, you should
1122 * make sure the job is pushed using pvr_queue_job_push(), or guarantee that
1123 * no one grabbed a reference to the returned fence. The latter can happen if
1124 * we do multi-job submission, and something failed when creating/initializing
1125 * a job. In that case, we know the fence didn't leave the driver, and we
1126 * can thus guarantee nobody will wait on an dead fence object.
1127 *
1128 * Return:
1129 * * A dma_fence object.
1130 */
pvr_queue_job_arm(struct pvr_job * job)1131 struct dma_fence *pvr_queue_job_arm(struct pvr_job *job)
1132 {
1133 drm_sched_job_arm(&job->base);
1134
1135 return &job->base.s_fence->finished;
1136 }
1137
1138 /**
1139 * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object.
1140 * @job: The job to cleanup.
1141 *
1142 * Should be called in the job release path.
1143 */
pvr_queue_job_cleanup(struct pvr_job * job)1144 void pvr_queue_job_cleanup(struct pvr_job *job)
1145 {
1146 pvr_queue_fence_put(job->done_fence);
1147 pvr_queue_fence_put(job->cccb_fence);
1148 pvr_kccb_fence_put(job->kccb_fence);
1149
1150 if (job->base.s_fence)
1151 drm_sched_job_cleanup(&job->base);
1152 }
1153
1154 /**
1155 * pvr_queue_job_push() - Push a job to its queue.
1156 * @job: The job to push.
1157 *
1158 * Must be called after pvr_queue_job_init() and after all dependencies
1159 * have been added to the job. This will effectively queue the job to
1160 * the drm_sched_entity attached to the queue. We grab a reference on
1161 * the job object, so the caller is free to drop its reference when it's
1162 * done accessing the job object.
1163 */
pvr_queue_job_push(struct pvr_job * job)1164 void pvr_queue_job_push(struct pvr_job *job)
1165 {
1166 struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
1167
1168 /* Keep track of the last queued job scheduled fence for combined submit. */
1169 dma_fence_put(queue->last_queued_job_scheduled_fence);
1170 queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled);
1171
1172 pvr_job_get(job);
1173 drm_sched_entity_push_job(&job->base);
1174 }
1175
reg_state_init(void * cpu_ptr,void * priv)1176 static void reg_state_init(void *cpu_ptr, void *priv)
1177 {
1178 struct pvr_queue *queue = priv;
1179
1180 if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) {
1181 struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr;
1182
1183 geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init =
1184 queue->callstack_addr;
1185 }
1186 }
1187
1188 /**
1189 * pvr_queue_create() - Create a queue object.
1190 * @ctx: The context this queue will be attached to.
1191 * @type: The type of jobs being pushed to this queue.
1192 * @args: The arguments passed to the context creation function.
1193 * @fw_ctx_map: CPU mapping of the FW context object.
1194 *
1195 * Create a queue object that will be used to queue and track jobs.
1196 *
1197 * Return:
1198 * * A valid pointer to a pvr_queue object, or
1199 * * An error pointer if the creation/initialization failed.
1200 */
pvr_queue_create(struct pvr_context * ctx,enum drm_pvr_job_type type,struct drm_pvr_ioctl_create_context_args * args,void * fw_ctx_map)1201 struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
1202 enum drm_pvr_job_type type,
1203 struct drm_pvr_ioctl_create_context_args *args,
1204 void *fw_ctx_map)
1205 {
1206 static const struct {
1207 u32 cccb_size;
1208 const char *name;
1209 } props[] = {
1210 [DRM_PVR_JOB_TYPE_GEOMETRY] = {
1211 .cccb_size = CTX_GEOM_CCCB_SIZE_LOG2,
1212 .name = "geometry",
1213 },
1214 [DRM_PVR_JOB_TYPE_FRAGMENT] = {
1215 .cccb_size = CTX_FRAG_CCCB_SIZE_LOG2,
1216 .name = "fragment"
1217 },
1218 [DRM_PVR_JOB_TYPE_COMPUTE] = {
1219 .cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2,
1220 .name = "compute"
1221 },
1222 [DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = {
1223 .cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2,
1224 .name = "transfer_frag"
1225 },
1226 };
1227 struct pvr_device *pvr_dev = ctx->pvr_dev;
1228 const struct drm_sched_init_args sched_args = {
1229 .ops = &pvr_queue_sched_ops,
1230 .submit_wq = pvr_dev->sched_wq,
1231 .num_rqs = 1,
1232 .credit_limit = 64 * 1024,
1233 .hang_limit = 1,
1234 .timeout = msecs_to_jiffies(500),
1235 .timeout_wq = pvr_dev->sched_wq,
1236 .name = "pvr-queue",
1237 .dev = pvr_dev->base.dev,
1238 };
1239 struct drm_gpu_scheduler *sched;
1240 struct pvr_queue *queue;
1241 int ctx_state_size, err;
1242 void *cpu_map;
1243
1244 if (WARN_ON(type >= sizeof(props)))
1245 return ERR_PTR(-EINVAL);
1246
1247 switch (ctx->type) {
1248 case DRM_PVR_CTX_TYPE_RENDER:
1249 if (type != DRM_PVR_JOB_TYPE_GEOMETRY &&
1250 type != DRM_PVR_JOB_TYPE_FRAGMENT)
1251 return ERR_PTR(-EINVAL);
1252 break;
1253 case DRM_PVR_CTX_TYPE_COMPUTE:
1254 if (type != DRM_PVR_JOB_TYPE_COMPUTE)
1255 return ERR_PTR(-EINVAL);
1256 break;
1257 case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
1258 if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG)
1259 return ERR_PTR(-EINVAL);
1260 break;
1261 default:
1262 return ERR_PTR(-EINVAL);
1263 }
1264
1265 ctx_state_size = get_ctx_state_size(pvr_dev, type);
1266 if (ctx_state_size < 0)
1267 return ERR_PTR(ctx_state_size);
1268
1269 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
1270 if (!queue)
1271 return ERR_PTR(-ENOMEM);
1272
1273 queue->type = type;
1274 queue->ctx_offset = get_ctx_offset(type);
1275 queue->ctx = ctx;
1276 queue->callstack_addr = args->callstack_addr;
1277 sched = &queue->scheduler;
1278 INIT_LIST_HEAD(&queue->node);
1279 mutex_init(&queue->cccb_fence_ctx.job_lock);
1280 pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base);
1281 pvr_queue_fence_ctx_init(&queue->job_fence_ctx);
1282
1283 err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name);
1284 if (err)
1285 goto err_free_queue;
1286
1287 err = pvr_fw_object_create(pvr_dev, ctx_state_size,
1288 PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1289 reg_state_init, queue, &queue->reg_state_obj);
1290 if (err)
1291 goto err_cccb_fini;
1292
1293 init_fw_context(queue, fw_ctx_map);
1294
1295 if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT &&
1296 args->callstack_addr) {
1297 err = -EINVAL;
1298 goto err_release_reg_state;
1299 }
1300
1301 cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value),
1302 PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1303 NULL, NULL, &queue->timeline_ufo.fw_obj);
1304 if (IS_ERR(cpu_map)) {
1305 err = PTR_ERR(cpu_map);
1306 goto err_release_reg_state;
1307 }
1308
1309 queue->timeline_ufo.value = cpu_map;
1310
1311 err = drm_sched_init(&queue->scheduler, &sched_args);
1312 if (err)
1313 goto err_release_ufo;
1314
1315 err = drm_sched_entity_init(&queue->entity,
1316 DRM_SCHED_PRIORITY_KERNEL,
1317 &sched, 1, &ctx->faulty);
1318 if (err)
1319 goto err_sched_fini;
1320
1321 mutex_lock(&pvr_dev->queues.lock);
1322 list_add_tail(&queue->node, &pvr_dev->queues.idle);
1323 mutex_unlock(&pvr_dev->queues.lock);
1324
1325 return queue;
1326
1327 err_sched_fini:
1328 drm_sched_fini(&queue->scheduler);
1329
1330 err_release_ufo:
1331 pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1332
1333 err_release_reg_state:
1334 pvr_fw_object_destroy(queue->reg_state_obj);
1335
1336 err_cccb_fini:
1337 pvr_cccb_fini(&queue->cccb);
1338
1339 err_free_queue:
1340 mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1341 kfree(queue);
1342
1343 return ERR_PTR(err);
1344 }
1345
pvr_queue_device_pre_reset(struct pvr_device * pvr_dev)1346 void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev)
1347 {
1348 struct pvr_queue *queue;
1349
1350 mutex_lock(&pvr_dev->queues.lock);
1351 list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1352 pvr_queue_stop(queue, NULL);
1353 list_for_each_entry(queue, &pvr_dev->queues.active, node)
1354 pvr_queue_stop(queue, NULL);
1355 mutex_unlock(&pvr_dev->queues.lock);
1356 }
1357
pvr_queue_device_post_reset(struct pvr_device * pvr_dev)1358 void pvr_queue_device_post_reset(struct pvr_device *pvr_dev)
1359 {
1360 struct pvr_queue *queue;
1361
1362 mutex_lock(&pvr_dev->queues.lock);
1363 list_for_each_entry(queue, &pvr_dev->queues.active, node)
1364 pvr_queue_start(queue);
1365 list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1366 pvr_queue_start(queue);
1367 mutex_unlock(&pvr_dev->queues.lock);
1368 }
1369
1370 /**
1371 * pvr_queue_kill() - Kill a queue.
1372 * @queue: The queue to kill.
1373 *
1374 * Kill the queue so no new jobs can be pushed. Should be called when the
1375 * context handle is destroyed. The queue object might last longer if jobs
1376 * are still in flight and holding a reference to the context this queue
1377 * belongs to.
1378 */
pvr_queue_kill(struct pvr_queue * queue)1379 void pvr_queue_kill(struct pvr_queue *queue)
1380 {
1381 drm_sched_entity_destroy(&queue->entity);
1382 dma_fence_put(queue->last_queued_job_scheduled_fence);
1383 queue->last_queued_job_scheduled_fence = NULL;
1384 }
1385
1386 /**
1387 * pvr_queue_destroy() - Destroy a queue.
1388 * @queue: The queue to destroy.
1389 *
1390 * Cleanup the queue and free the resources attached to it. Should be
1391 * called from the context release function.
1392 */
pvr_queue_destroy(struct pvr_queue * queue)1393 void pvr_queue_destroy(struct pvr_queue *queue)
1394 {
1395 if (!queue)
1396 return;
1397
1398 mutex_lock(&queue->ctx->pvr_dev->queues.lock);
1399 list_del_init(&queue->node);
1400 mutex_unlock(&queue->ctx->pvr_dev->queues.lock);
1401
1402 drm_sched_fini(&queue->scheduler);
1403 drm_sched_entity_fini(&queue->entity);
1404
1405 if (WARN_ON(queue->last_queued_job_scheduled_fence))
1406 dma_fence_put(queue->last_queued_job_scheduled_fence);
1407
1408 pvr_queue_cleanup_fw_context(queue);
1409
1410 pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1411 pvr_fw_object_destroy(queue->reg_state_obj);
1412 pvr_cccb_fini(&queue->cccb);
1413 mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1414 kfree(queue);
1415 }
1416
1417 /**
1418 * pvr_queue_device_init() - Device-level initialization of queue related fields.
1419 * @pvr_dev: The device to initialize.
1420 *
1421 * Initializes all fields related to queue management in pvr_device.
1422 *
1423 * Return:
1424 * * 0 on success, or
1425 * * An error code on failure.
1426 */
pvr_queue_device_init(struct pvr_device * pvr_dev)1427 int pvr_queue_device_init(struct pvr_device *pvr_dev)
1428 {
1429 int err;
1430
1431 INIT_LIST_HEAD(&pvr_dev->queues.active);
1432 INIT_LIST_HEAD(&pvr_dev->queues.idle);
1433 err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock);
1434 if (err)
1435 return err;
1436
1437 pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0);
1438 if (!pvr_dev->sched_wq)
1439 return -ENOMEM;
1440
1441 return 0;
1442 }
1443
1444 /**
1445 * pvr_queue_device_fini() - Device-level cleanup of queue related fields.
1446 * @pvr_dev: The device to cleanup.
1447 *
1448 * Cleanup/free all queue-related resources attached to a pvr_device object.
1449 */
pvr_queue_device_fini(struct pvr_device * pvr_dev)1450 void pvr_queue_device_fini(struct pvr_device *pvr_dev)
1451 {
1452 destroy_workqueue(pvr_dev->sched_wq);
1453 }
1454