1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* Copyright (c) 2023 Imagination Technologies Ltd. */ 3 4 #include <drm/drm_managed.h> 5 #include <drm/gpu_scheduler.h> 6 7 #include "pvr_cccb.h" 8 #include "pvr_context.h" 9 #include "pvr_device.h" 10 #include "pvr_drv.h" 11 #include "pvr_job.h" 12 #include "pvr_queue.h" 13 #include "pvr_trace.h" 14 #include "pvr_vm.h" 15 16 #include "pvr_rogue_fwif_client.h" 17 18 #define MAX_DEADLINE_MS 30000 19 20 #define CTX_COMPUTE_CCCB_SIZE_LOG2 15 21 #define CTX_FRAG_CCCB_SIZE_LOG2 15 22 #define CTX_GEOM_CCCB_SIZE_LOG2 15 23 #define CTX_TRANSFER_CCCB_SIZE_LOG2 15 24 25 static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev) 26 { 27 u32 num_isp_store_registers; 28 29 if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) { 30 num_isp_store_registers = 1; 31 } else { 32 int err; 33 34 err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers); 35 if (WARN_ON(err)) 36 return err; 37 } 38 39 return sizeof(struct rogue_fwif_frag_ctx_state) + 40 (num_isp_store_registers * 41 sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0])); 42 } 43 44 static int get_frag_ctx_state_size(struct pvr_device *pvr_dev) 45 { 46 u32 num_isp_store_registers; 47 int err; 48 49 if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) { 50 err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers); 51 if (WARN_ON(err)) 52 return err; 53 54 if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) { 55 u32 xpu_max_slaves; 56 57 err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves); 58 if (WARN_ON(err)) 59 return err; 60 61 num_isp_store_registers *= (1 + xpu_max_slaves); 62 } 63 } else { 64 err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers); 65 if (WARN_ON(err)) 66 return err; 67 } 68 69 return sizeof(struct rogue_fwif_frag_ctx_state) + 70 (num_isp_store_registers * 71 sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0])); 72 } 73 74 static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type) 75 { 76 switch (type) { 77 case DRM_PVR_JOB_TYPE_GEOMETRY: 78 return sizeof(struct rogue_fwif_geom_ctx_state); 79 case DRM_PVR_JOB_TYPE_FRAGMENT: 80 return get_frag_ctx_state_size(pvr_dev); 81 case DRM_PVR_JOB_TYPE_COMPUTE: 82 return sizeof(struct rogue_fwif_compute_ctx_state); 83 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 84 return get_xfer_ctx_state_size(pvr_dev); 85 } 86 87 WARN(1, "Invalid queue type"); 88 return -EINVAL; 89 } 90 91 static u32 get_ctx_offset(enum drm_pvr_job_type type) 92 { 93 switch (type) { 94 case DRM_PVR_JOB_TYPE_GEOMETRY: 95 return offsetof(struct rogue_fwif_fwrendercontext, geom_context); 96 case DRM_PVR_JOB_TYPE_FRAGMENT: 97 return offsetof(struct rogue_fwif_fwrendercontext, frag_context); 98 case DRM_PVR_JOB_TYPE_COMPUTE: 99 return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context); 100 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 101 return offsetof(struct rogue_fwif_fwtransfercontext, tq_context); 102 } 103 104 return 0; 105 } 106 107 static const char * 108 pvr_queue_fence_get_driver_name(struct dma_fence *f) 109 { 110 return PVR_DRIVER_NAME; 111 } 112 113 static void pvr_queue_fence_release_work(struct work_struct *w) 114 { 115 struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work); 116 117 pvr_context_put(fence->queue->ctx); 118 dma_fence_free(&fence->base); 119 } 120 121 static void pvr_queue_fence_release(struct dma_fence *f) 122 { 123 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 124 struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev; 125 126 queue_work(pvr_dev->sched_wq, &fence->release_work); 127 } 128 129 static const char * 130 pvr_queue_job_fence_get_timeline_name(struct dma_fence *f) 131 { 132 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 133 134 switch (fence->queue->type) { 135 case DRM_PVR_JOB_TYPE_GEOMETRY: 136 return "geometry"; 137 138 case DRM_PVR_JOB_TYPE_FRAGMENT: 139 return "fragment"; 140 141 case DRM_PVR_JOB_TYPE_COMPUTE: 142 return "compute"; 143 144 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 145 return "transfer"; 146 } 147 148 WARN(1, "Invalid queue type"); 149 return "invalid"; 150 } 151 152 static const char * 153 pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f) 154 { 155 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 156 157 switch (fence->queue->type) { 158 case DRM_PVR_JOB_TYPE_GEOMETRY: 159 return "geometry-cccb"; 160 161 case DRM_PVR_JOB_TYPE_FRAGMENT: 162 return "fragment-cccb"; 163 164 case DRM_PVR_JOB_TYPE_COMPUTE: 165 return "compute-cccb"; 166 167 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 168 return "transfer-cccb"; 169 } 170 171 WARN(1, "Invalid queue type"); 172 return "invalid"; 173 } 174 175 static const struct dma_fence_ops pvr_queue_job_fence_ops = { 176 .get_driver_name = pvr_queue_fence_get_driver_name, 177 .get_timeline_name = pvr_queue_job_fence_get_timeline_name, 178 .release = pvr_queue_fence_release, 179 }; 180 181 /** 182 * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO. 183 * @f: The dma_fence to check. 184 * 185 * Return: 186 * * true if the dma_fence is backed by a UFO, or 187 * * false otherwise. 188 */ 189 static inline bool 190 pvr_queue_fence_is_ufo_backed(struct dma_fence *f) 191 { 192 /* 193 * Currently the only dma_fence backed by a UFO object is the job fence, 194 * e.g. pvr_job::done_fence, wrapped by a pvr_queue_fence object. 195 */ 196 return f && f->ops == &pvr_queue_job_fence_ops; 197 } 198 199 /** 200 * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is 201 * already backed by a UFO. 202 * @f: The dma_fence to turn into a pvr_queue_fence. 203 * 204 * This could be called on: 205 * - a job fence directly, in which case it simply returns the containing pvr_queue_fence; 206 * - a drm_sched_fence's scheduled or finished fence, in which case it will first try to follow 207 * the parent pointer to find the job fence (note that the parent pointer is initialized 208 * only after the run_job() callback is called on the drm_sched_fence's owning job); 209 * - any other dma_fence, in which case it will return NULL. 210 * 211 * Return: 212 * * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or 213 * * NULL otherwise. 214 */ 215 static struct pvr_queue_fence * 216 to_pvr_queue_job_fence(struct dma_fence *f) 217 { 218 struct drm_sched_fence *sched_fence = to_drm_sched_fence(f); 219 220 if (sched_fence) 221 f = sched_fence->parent; 222 223 if (pvr_queue_fence_is_ufo_backed(f)) 224 return container_of(f, struct pvr_queue_fence, base); 225 226 return NULL; 227 } 228 229 static const struct dma_fence_ops pvr_queue_cccb_fence_ops = { 230 .get_driver_name = pvr_queue_fence_get_driver_name, 231 .get_timeline_name = pvr_queue_cccb_fence_get_timeline_name, 232 .release = pvr_queue_fence_release, 233 }; 234 235 /** 236 * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects. 237 * @f: The dma_fence object to put. 238 * 239 * If the pvr_queue_fence has been initialized, we call dma_fence_put(), 240 * otherwise we free the object with dma_fence_free(). This allows us 241 * to do the right thing before and after pvr_queue_fence_init() had been 242 * called. 243 */ 244 static void pvr_queue_fence_put(struct dma_fence *f) 245 { 246 if (!f) 247 return; 248 249 if (WARN_ON(f->ops && 250 f->ops != &pvr_queue_cccb_fence_ops && 251 f->ops != &pvr_queue_job_fence_ops)) 252 return; 253 254 /* If the fence hasn't been initialized yet, free the object directly. */ 255 if (f->ops) 256 dma_fence_put(f); 257 else 258 dma_fence_free(f); 259 } 260 261 /** 262 * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object 263 * 264 * Call this function to allocate job CCCB and done fences. This only 265 * allocates the objects. Initialization happens when the underlying 266 * dma_fence object is to be returned to drm_sched (in prepare_job() or 267 * run_job()). 268 * 269 * Return: 270 * * A valid pointer if the allocation succeeds, or 271 * * NULL if the allocation fails. 272 */ 273 static struct dma_fence * 274 pvr_queue_fence_alloc(void) 275 { 276 struct pvr_queue_fence *fence; 277 278 fence = kzalloc_obj(*fence); 279 if (!fence) 280 return NULL; 281 282 return &fence->base; 283 } 284 285 /** 286 * pvr_queue_fence_init() - Initializes a pvr_queue_fence object. 287 * @f: The fence to initialize 288 * @queue: The queue this fence belongs to. 289 * @fence_ops: The fence operations. 290 * @fence_ctx: The fence context. 291 * 292 * Wrapper around dma_fence_init() that takes care of initializing the 293 * pvr_queue_fence::queue field too. 294 */ 295 static void 296 pvr_queue_fence_init(struct dma_fence *f, 297 struct pvr_queue *queue, 298 const struct dma_fence_ops *fence_ops, 299 struct pvr_queue_fence_ctx *fence_ctx) 300 { 301 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 302 303 pvr_context_get(queue->ctx); 304 fence->queue = queue; 305 INIT_WORK(&fence->release_work, pvr_queue_fence_release_work); 306 dma_fence_init(&fence->base, fence_ops, 307 &fence_ctx->lock, fence_ctx->id, 308 atomic_inc_return(&fence_ctx->seqno)); 309 } 310 311 /** 312 * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object. 313 * @fence: The fence to initialize. 314 * @queue: The queue this fence belongs to. 315 * 316 * Initializes a fence that can be used to wait for CCCB space. 317 * 318 * Should be called in the ::prepare_job() path, so the fence returned to 319 * drm_sched is valid. 320 */ 321 static void 322 pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue) 323 { 324 pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops, 325 &queue->cccb_fence_ctx.base); 326 } 327 328 /** 329 * pvr_queue_job_fence_init() - Initializes a job done fence object. 330 * @fence: The fence to initialize. 331 * @queue: The queue this fence belongs to. 332 * 333 * Initializes a fence that will be signaled when the GPU is done executing 334 * a job. 335 * 336 * Should be called *before* the ::run_job() path, so the fence is initialised 337 * before being placed in the pending_list. 338 */ 339 static void 340 pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue) 341 { 342 if (!fence->ops) 343 pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, 344 &queue->job_fence_ctx); 345 } 346 347 /** 348 * pvr_queue_fence_ctx_init() - Queue fence context initialization. 349 * @fence_ctx: The context to initialize 350 */ 351 static void 352 pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx) 353 { 354 spin_lock_init(&fence_ctx->lock); 355 fence_ctx->id = dma_fence_context_alloc(1); 356 atomic_set(&fence_ctx->seqno, 0); 357 } 358 359 static u32 ufo_cmds_size(u32 elem_count) 360 { 361 /* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */ 362 u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS; 363 u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS; 364 u32 size = full_cmd_count * 365 pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS * 366 sizeof(struct rogue_fwif_ufo)); 367 368 if (remaining_elems) { 369 size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems * 370 sizeof(struct rogue_fwif_ufo)); 371 } 372 373 return size; 374 } 375 376 static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count) 377 { 378 /* 379 * One UFO command per native fence this job will be waiting on (unless any are 380 * signaled by the time the job is submitted), plus a command for the job itself, 381 * plus one UFO command for the fence signaling. 382 */ 383 return ufo_cmds_size(ufo_wait_count) + 384 pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len) + 385 ufo_cmds_size(1); 386 } 387 388 static bool 389 is_paired_job_fence(struct dma_fence *fence, struct pvr_job *job) 390 { 391 /* This assumes "fence" is one of "job"'s drm_sched_job::dependencies */ 392 return job->type == DRM_PVR_JOB_TYPE_FRAGMENT && 393 job->paired_job && 394 &job->paired_job->base.s_fence->scheduled == fence; 395 } 396 397 /** 398 * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies. 399 * @job: Job to operate on. 400 * 401 * Returns: Number of non-signaled native deps remaining. 402 */ 403 static unsigned long job_count_remaining_native_deps(struct pvr_job *job) 404 { 405 unsigned long remaining_count = 0; 406 struct dma_fence *fence = NULL; 407 unsigned long index; 408 409 xa_for_each(&job->base.dependencies, index, fence) { 410 struct pvr_queue_fence *jfence; 411 412 if (is_paired_job_fence(fence, job)) { 413 /* 414 * A fence between paired jobs won't resolve to a pvr_queue_fence (i.e. 415 * be backed by a UFO) until the jobs have been submitted, together. 416 * The submitting code will insert a partial render fence command for this. 417 */ 418 WARN_ON(dma_fence_is_signaled(fence)); 419 remaining_count++; 420 continue; 421 } 422 423 jfence = to_pvr_queue_job_fence(fence); 424 if (!jfence) 425 continue; 426 427 if (!dma_fence_is_signaled(&jfence->base)) 428 remaining_count++; 429 } 430 431 return remaining_count; 432 } 433 434 /** 435 * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job. 436 * @queue: The queue this job will be submitted to. 437 * @job: The job to get the CCCB fence on. 438 * 439 * The CCCB fence is a synchronization primitive allowing us to delay job 440 * submission until there's enough space in the CCCB to submit the job. 441 * 442 * Return: 443 * * NULL if there's enough space in the CCCB to submit this job, or 444 * * A valid dma_fence object otherwise. 445 */ 446 static struct dma_fence * 447 pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job) 448 { 449 struct pvr_queue_fence *cccb_fence; 450 unsigned int native_deps_remaining; 451 452 /* If the fence is NULL, that means we already checked that we had 453 * enough space in the cccb for our job. 454 */ 455 if (!job->cccb_fence) 456 return NULL; 457 458 mutex_lock(&queue->cccb_fence_ctx.job_lock); 459 460 /* Count remaining native dependencies and check if the job fits in the CCCB. */ 461 native_deps_remaining = job_count_remaining_native_deps(job); 462 if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) { 463 pvr_queue_fence_put(job->cccb_fence); 464 job->cccb_fence = NULL; 465 goto out_unlock; 466 } 467 468 /* There should be no job attached to the CCCB fence context: 469 * drm_sched_entity guarantees that jobs are submitted one at a time. 470 */ 471 if (WARN_ON(queue->cccb_fence_ctx.job)) 472 pvr_job_put(queue->cccb_fence_ctx.job); 473 474 queue->cccb_fence_ctx.job = pvr_job_get(job); 475 476 /* Initialize the fence before returning it. */ 477 cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base); 478 if (!WARN_ON(cccb_fence->queue)) 479 pvr_queue_cccb_fence_init(job->cccb_fence, queue); 480 481 out_unlock: 482 mutex_unlock(&queue->cccb_fence_ctx.job_lock); 483 484 return dma_fence_get(job->cccb_fence); 485 } 486 487 /** 488 * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job. 489 * @queue: The queue this job will be submitted to. 490 * @job: The job to get the KCCB fence on. 491 * 492 * The KCCB fence is a synchronization primitive allowing us to delay job 493 * submission until there's enough space in the KCCB to submit the job. 494 * 495 * Return: 496 * * NULL if there's enough space in the KCCB to submit this job, or 497 * * A valid dma_fence object otherwise. 498 */ 499 static struct dma_fence * 500 pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job) 501 { 502 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 503 struct dma_fence *kccb_fence = NULL; 504 505 /* If the fence is NULL, that means we already checked that we had 506 * enough space in the KCCB for our job. 507 */ 508 if (!job->kccb_fence) 509 return NULL; 510 511 if (!WARN_ON(job->kccb_fence->ops)) { 512 kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence); 513 job->kccb_fence = NULL; 514 } 515 516 return kccb_fence; 517 } 518 519 static struct dma_fence * 520 pvr_queue_get_paired_frag_job_dep(struct pvr_job *job) 521 { 522 struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ? 523 job->paired_job : NULL; 524 struct pvr_queue *frag_queue = frag_job ? frag_job->ctx->queues.fragment : NULL; 525 struct dma_fence *f; 526 unsigned long index; 527 528 if (!frag_job) 529 return NULL; 530 531 /* Have the geometry job wait on the paired fragment job's dependencies as well. */ 532 xa_for_each(&frag_job->base.dependencies, index, f) { 533 /* Skip already signaled fences. */ 534 if (dma_fence_is_signaled(f)) 535 continue; 536 537 /* 538 * The paired job fence won't be signaled until both jobs have 539 * been submitted, so we can't wait on it to schedule them. 540 */ 541 if (f == &job->base.s_fence->scheduled) 542 continue; 543 544 return dma_fence_get(f); 545 } 546 547 /* Initialize the paired fragment job's done_fence, so we can signal it. */ 548 pvr_queue_job_fence_init(frag_job->done_fence, frag_queue); 549 550 return pvr_queue_get_job_cccb_fence(frag_queue, frag_job); 551 } 552 553 /** 554 * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence. 555 * @sched_job: The job to query the next internal dependency on 556 * @s_entity: The entity this job is queue on. 557 * 558 * After iterating over drm_sched_job::dependencies, drm_sched let the driver return 559 * its own internal dependencies. We use this function to return our internal dependencies. 560 */ 561 static struct dma_fence * 562 pvr_queue_prepare_job(struct drm_sched_job *sched_job, 563 struct drm_sched_entity *s_entity) 564 { 565 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 566 struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity); 567 struct dma_fence *internal_dep = NULL; 568 569 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) { 570 /* 571 * This will be called on a paired fragment job after being submitted 572 * to the firmware as part of the paired geometry job's submission. 573 * We can tell if this is the case and bail early from whether run_job() 574 * has been called on the geometry job, which would issue a pm ref on 575 * this job as well. 576 */ 577 if (job->has_pm_ref) 578 return NULL; 579 } 580 581 /* 582 * Initialize the done_fence, so we can signal it. This must be done 583 * here because otherwise by the time of run_job() the job will end up 584 * in the pending list without a valid fence. 585 */ 586 pvr_queue_job_fence_init(job->done_fence, queue); 587 588 /* CCCB fence is used to make sure we have enough space in the CCCB to 589 * submit our commands. 590 */ 591 internal_dep = pvr_queue_get_job_cccb_fence(queue, job); 592 593 /* KCCB fence is used to make sure we have a KCCB slot to queue our 594 * CMD_KICK. 595 */ 596 if (!internal_dep) 597 internal_dep = pvr_queue_get_job_kccb_fence(queue, job); 598 599 /* Any extra internal dependency should be added here, using the following 600 * pattern: 601 * 602 * if (!internal_dep) 603 * internal_dep = pvr_queue_get_job_xxxx_fence(queue, job); 604 */ 605 606 /* The paired job fence should come last, when everything else is ready. */ 607 if (!internal_dep) 608 internal_dep = pvr_queue_get_paired_frag_job_dep(job); 609 610 return internal_dep; 611 } 612 613 /** 614 * pvr_queue_update_active_state_locked() - Update the queue active state. 615 * @queue: Queue to update the state on. 616 * 617 * Locked version of pvr_queue_update_active_state(). Must be called with 618 * pvr_device::queue::lock held. 619 */ 620 static void pvr_queue_update_active_state_locked(struct pvr_queue *queue) 621 { 622 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 623 624 lockdep_assert_held(&pvr_dev->queues.lock); 625 626 /* The queue is temporary out of any list when it's being reset, 627 * we don't want a call to pvr_queue_update_active_state_locked() 628 * to re-insert it behind our back. 629 */ 630 if (list_empty(&queue->node)) 631 return; 632 633 if (!atomic_read(&queue->in_flight_job_count)) 634 list_move_tail(&queue->node, &pvr_dev->queues.idle); 635 else 636 list_move_tail(&queue->node, &pvr_dev->queues.active); 637 } 638 639 /** 640 * pvr_queue_update_active_state() - Update the queue active state. 641 * @queue: Queue to update the state on. 642 * 643 * Active state is based on the in_flight_job_count value. 644 * 645 * Updating the active state implies moving the queue in or out of the 646 * active queue list, which also defines whether the queue is checked 647 * or not when a FW event is received. 648 * 649 * This function should be called any time a job is submitted or it done 650 * fence is signaled. 651 */ 652 static void pvr_queue_update_active_state(struct pvr_queue *queue) 653 { 654 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 655 656 mutex_lock(&pvr_dev->queues.lock); 657 pvr_queue_update_active_state_locked(queue); 658 mutex_unlock(&pvr_dev->queues.lock); 659 } 660 661 static void pvr_queue_submit_job_to_cccb(struct pvr_job *job) 662 { 663 struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler); 664 struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS]; 665 struct pvr_cccb *cccb = &queue->cccb; 666 struct pvr_queue_fence *jfence; 667 struct dma_fence *fence; 668 unsigned long index; 669 u32 ufo_count = 0; 670 671 /* We need to add the queue to the active list before updating the CCCB, 672 * otherwise we might miss the FW event informing us that something 673 * happened on this queue. 674 */ 675 atomic_inc(&queue->in_flight_job_count); 676 pvr_queue_update_active_state(queue); 677 678 xa_for_each(&job->base.dependencies, index, fence) { 679 jfence = to_pvr_queue_job_fence(fence); 680 if (!jfence) 681 continue; 682 683 /* Some dependencies might have been signaled since prepare_job() */ 684 if (dma_fence_is_signaled(&jfence->base)) 685 continue; 686 687 pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj, 688 &ufos[ufo_count].addr); 689 ufos[ufo_count++].value = jfence->base.seqno; 690 691 if (ufo_count == ARRAY_SIZE(ufos)) { 692 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR, 693 sizeof(ufos), ufos, 0, 0); 694 ufo_count = 0; 695 } 696 } 697 698 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) { 699 /* 700 * The loop above will only process dependencies backed by a UFO i.e. with 701 * a valid parent fence assigned, but the paired job dependency won't have 702 * one until both jobs have been submitted. Access the parent fence directly 703 * here instead, submitting it last as partial render fence. 704 */ 705 jfence = to_pvr_queue_job_fence(job->paired_job->done_fence); 706 if (!WARN_ON(!jfence)) { 707 pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj, 708 &ufos[ufo_count].addr); 709 ufos[ufo_count++].value = job->paired_job->done_fence->seqno; 710 } 711 } 712 713 if (ufo_count) { 714 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR, 715 sizeof(ufos[0]) * ufo_count, ufos, 0, 0); 716 } 717 718 if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) { 719 struct rogue_fwif_cmd_geom *cmd = job->cmd; 720 721 /* Reference value for the partial render test is the current queue fence 722 * seqno minus one. 723 */ 724 pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, 725 &cmd->partial_render_geom_frag_fence.addr); 726 cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1; 727 } 728 729 trace_pvr_job_submit_fw(job); 730 731 /* Submit job to FW */ 732 pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd, 733 job->id, job->id); 734 735 /* Update command to signal the job fence. */ 736 pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr); 737 ufos[0].value = job->done_fence->seqno; 738 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE, 739 sizeof(ufos[0]), ufos, 0, 0); 740 } 741 742 /** 743 * pvr_queue_run_job() - Submit a job to the FW. 744 * @sched_job: The job to submit. 745 * 746 * This function is called when all non-native dependencies have been met and 747 * when the commands resulting from this job are guaranteed to fit in the CCCB. 748 */ 749 static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job) 750 { 751 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 752 struct pvr_device *pvr_dev = job->pvr_dev; 753 int err; 754 755 /* The fragment job is issued along the geometry job when we use combined 756 * geom+frag kicks. When we get there, we should simply return the 757 * done_fence that's been initialized earlier. 758 */ 759 if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT && 760 job->done_fence->ops) { 761 return dma_fence_get(job->done_fence); 762 } 763 764 /* The only kind of jobs that can be paired are geometry and fragment, and 765 * we bail out early if we see a fragment job that's paired with a geometry job. 766 * Paired jobs must also target the same context and point to the same HWRT. 767 */ 768 if (WARN_ON(job->paired_job && 769 (job->type != DRM_PVR_JOB_TYPE_GEOMETRY || 770 job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT || 771 job->hwrt != job->paired_job->hwrt || 772 job->ctx != job->paired_job->ctx))) 773 return ERR_PTR(-EINVAL); 774 775 err = pvr_job_get_pm_ref(job); 776 if (WARN_ON(err)) 777 return ERR_PTR(err); 778 779 if (job->paired_job) { 780 err = pvr_job_get_pm_ref(job->paired_job); 781 if (WARN_ON(err)) 782 return ERR_PTR(err); 783 } 784 785 /* Submit our job to the CCCB */ 786 pvr_queue_submit_job_to_cccb(job); 787 788 if (job->paired_job) { 789 struct pvr_job *geom_job = job; 790 struct pvr_job *frag_job = job->paired_job; 791 struct pvr_queue *geom_queue = job->ctx->queues.geometry; 792 struct pvr_queue *frag_queue = job->ctx->queues.fragment; 793 794 /* Submit the fragment job along the geometry job and send a combined kick. */ 795 pvr_queue_submit_job_to_cccb(frag_job); 796 pvr_cccb_send_kccb_combined_kick(pvr_dev, 797 &geom_queue->cccb, &frag_queue->cccb, 798 pvr_context_get_fw_addr(geom_job->ctx) + 799 geom_queue->ctx_offset, 800 pvr_context_get_fw_addr(frag_job->ctx) + 801 frag_queue->ctx_offset, 802 job->hwrt, 803 frag_job->fw_ccb_cmd_type == 804 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR); 805 } else { 806 struct pvr_queue *queue = container_of(job->base.sched, 807 struct pvr_queue, scheduler); 808 809 pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb, 810 pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset, 811 job->hwrt); 812 } 813 814 return dma_fence_get(job->done_fence); 815 } 816 817 static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job) 818 { 819 drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL); 820 } 821 822 static void pvr_queue_start(struct pvr_queue *queue) 823 { 824 struct pvr_job *job; 825 826 /* Make sure we CPU-signal the UFO object, so other queues don't get 827 * blocked waiting on it. 828 */ 829 *queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno); 830 831 list_for_each_entry(job, &queue->scheduler.pending_list, base.list) { 832 if (dma_fence_is_signaled(job->done_fence)) { 833 /* Jobs might have completed after drm_sched_stop() was called. 834 * In that case, re-assign the parent field to the done_fence. 835 */ 836 WARN_ON(job->base.s_fence->parent); 837 job->base.s_fence->parent = dma_fence_get(job->done_fence); 838 } else { 839 /* If we had unfinished jobs, flag the entity as guilty so no 840 * new job can be submitted. 841 */ 842 atomic_set(&queue->ctx->faulty, 1); 843 } 844 } 845 846 drm_sched_start(&queue->scheduler, 0); 847 } 848 849 /** 850 * pvr_queue_timedout_job() - Handle a job timeout event. 851 * @s_job: The job this timeout occurred on. 852 * 853 * FIXME: We don't do anything here to unblock the situation, we just stop+start 854 * the scheduler, and re-assign parent fences in the middle. 855 * 856 * Return: 857 * *%DRM_GPU_SCHED_STAT_NO_HANG if the job fence has already been 858 * signaled, or 859 * *%DRM_GPU_SCHED_STAT_RESET otherwise. 860 */ 861 static enum drm_gpu_sched_stat 862 pvr_queue_timedout_job(struct drm_sched_job *s_job) 863 { 864 struct drm_gpu_scheduler *sched = s_job->sched; 865 struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler); 866 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 867 struct pvr_job *job; 868 u32 job_count = 0; 869 870 if (dma_fence_is_signaled(s_job->s_fence->parent)) 871 return DRM_GPU_SCHED_STAT_NO_HANG; 872 873 dev_err(sched->dev, "Job timeout\n"); 874 875 /* Before we stop the scheduler, make sure the queue is out of any list, so 876 * any call to pvr_queue_update_active_state_locked() that might happen 877 * until the scheduler is really stopped doesn't end up re-inserting the 878 * queue in the active list. This would cause 879 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each 880 * other when accessing the pending_list, since drm_sched_stop() doesn't 881 * grab the job_list_lock when modifying the list (it's assuming the 882 * only other accessor is the scheduler, and it's safe to not grab the 883 * lock since it's stopped). 884 */ 885 mutex_lock(&pvr_dev->queues.lock); 886 list_del_init(&queue->node); 887 mutex_unlock(&pvr_dev->queues.lock); 888 889 drm_sched_stop(sched, s_job); 890 891 /* Re-assign job parent fences. */ 892 list_for_each_entry(job, &sched->pending_list, base.list) { 893 job->base.s_fence->parent = dma_fence_get(job->done_fence); 894 job_count++; 895 } 896 WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count); 897 898 /* Re-insert the queue in the proper list, and kick a queue processing 899 * operation if there were jobs pending. 900 */ 901 mutex_lock(&pvr_dev->queues.lock); 902 if (!job_count) { 903 list_move_tail(&queue->node, &pvr_dev->queues.idle); 904 } else { 905 atomic_set(&queue->in_flight_job_count, job_count); 906 list_move_tail(&queue->node, &pvr_dev->queues.active); 907 pvr_queue_process(queue); 908 } 909 mutex_unlock(&pvr_dev->queues.lock); 910 911 drm_sched_start(sched, 0); 912 913 return DRM_GPU_SCHED_STAT_RESET; 914 } 915 916 /** 917 * pvr_queue_free_job() - Release the reference the scheduler had on a job object. 918 * @sched_job: Job object to free. 919 */ 920 static void pvr_queue_free_job(struct drm_sched_job *sched_job) 921 { 922 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 923 924 drm_sched_job_cleanup(sched_job); 925 926 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) 927 pvr_job_put(job->paired_job); 928 929 job->paired_job = NULL; 930 pvr_job_put(job); 931 } 932 933 static const struct drm_sched_backend_ops pvr_queue_sched_ops = { 934 .prepare_job = pvr_queue_prepare_job, 935 .run_job = pvr_queue_run_job, 936 .timedout_job = pvr_queue_timedout_job, 937 .free_job = pvr_queue_free_job, 938 }; 939 940 /** 941 * pvr_queue_fence_is_native() - Check if a dma_fence is native to this driver. 942 * @f: Fence to test. 943 * 944 * Check if the fence we're being passed is a drm_sched_fence that is coming from this driver. 945 * 946 * It may be a UFO-backed fence i.e. a fence that can be signaled or waited upon FW-side, 947 * such as pvr_job::done_fence objects that are backed by the timeline UFO attached to the queue 948 * they are pushed to. 949 */ 950 bool pvr_queue_fence_is_native(struct dma_fence *f) 951 { 952 struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL; 953 954 if (sched_fence && 955 sched_fence->sched->ops == &pvr_queue_sched_ops) 956 return true; 957 958 return pvr_queue_fence_is_ufo_backed(f); 959 } 960 961 /** 962 * pvr_queue_signal_done_fences() - Signal done fences. 963 * @queue: Queue to check. 964 * 965 * Signal done fences of jobs whose seqno is less than the current value of 966 * the UFO object attached to the queue. 967 */ 968 static void 969 pvr_queue_signal_done_fences(struct pvr_queue *queue) 970 { 971 struct pvr_job *job, *tmp_job; 972 u32 cur_seqno; 973 974 spin_lock(&queue->scheduler.job_list_lock); 975 cur_seqno = *queue->timeline_ufo.value; 976 list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) { 977 if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0) 978 break; 979 980 if (!dma_fence_is_signaled(job->done_fence)) { 981 dma_fence_signal(job->done_fence); 982 pvr_job_release_pm_ref(job); 983 atomic_dec(&queue->in_flight_job_count); 984 } 985 } 986 spin_unlock(&queue->scheduler.job_list_lock); 987 } 988 989 /** 990 * pvr_queue_check_job_waiting_for_cccb_space() - Check if a job waiting for CCCB space 991 * can be unblocked and pushed to the CCCB. 992 * @queue: Queue to check 993 * 994 * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal 995 * its CCCB fence, which should kick drm_sched. 996 */ 997 static void 998 pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue) 999 { 1000 struct pvr_queue_fence *cccb_fence; 1001 u32 native_deps_remaining; 1002 struct pvr_job *job; 1003 1004 mutex_lock(&queue->cccb_fence_ctx.job_lock); 1005 job = queue->cccb_fence_ctx.job; 1006 if (!job) 1007 goto out_unlock; 1008 1009 /* If we have a job attached to the CCCB fence context, its CCCB fence 1010 * shouldn't be NULL. 1011 */ 1012 if (WARN_ON(!job->cccb_fence)) { 1013 job = NULL; 1014 goto out_unlock; 1015 } 1016 1017 /* If we get there, CCCB fence has to be initialized. */ 1018 cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base); 1019 if (WARN_ON(!cccb_fence->queue)) { 1020 job = NULL; 1021 goto out_unlock; 1022 } 1023 1024 /* Evict signaled dependencies before checking for CCCB space. 1025 * If the job fits, signal the CCCB fence, this should unblock 1026 * the drm_sched_entity. 1027 */ 1028 native_deps_remaining = job_count_remaining_native_deps(job); 1029 if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) { 1030 job = NULL; 1031 goto out_unlock; 1032 } 1033 1034 dma_fence_signal(job->cccb_fence); 1035 pvr_queue_fence_put(job->cccb_fence); 1036 job->cccb_fence = NULL; 1037 queue->cccb_fence_ctx.job = NULL; 1038 1039 out_unlock: 1040 mutex_unlock(&queue->cccb_fence_ctx.job_lock); 1041 1042 pvr_job_put(job); 1043 } 1044 1045 /** 1046 * pvr_queue_process() - Process events that happened on a queue. 1047 * @queue: Queue to check 1048 * 1049 * Signal job fences and check if jobs waiting for CCCB space can be unblocked. 1050 */ 1051 void pvr_queue_process(struct pvr_queue *queue) 1052 { 1053 lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock); 1054 1055 pvr_queue_check_job_waiting_for_cccb_space(queue); 1056 pvr_queue_signal_done_fences(queue); 1057 pvr_queue_update_active_state_locked(queue); 1058 } 1059 1060 static u32 get_dm_type(struct pvr_queue *queue) 1061 { 1062 switch (queue->type) { 1063 case DRM_PVR_JOB_TYPE_GEOMETRY: 1064 return PVR_FWIF_DM_GEOM; 1065 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 1066 case DRM_PVR_JOB_TYPE_FRAGMENT: 1067 return PVR_FWIF_DM_FRAG; 1068 case DRM_PVR_JOB_TYPE_COMPUTE: 1069 return PVR_FWIF_DM_CDM; 1070 } 1071 1072 return ~0; 1073 } 1074 1075 /** 1076 * init_fw_context() - Initializes the queue part of a FW context. 1077 * @queue: Queue object to initialize the FW context for. 1078 * @fw_ctx_map: The FW context CPU mapping. 1079 * 1080 * FW contexts are containing various states, one of them being a per-queue state 1081 * that needs to be initialized for each queue being exposed by a context. This 1082 * function takes care of that. 1083 */ 1084 static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map) 1085 { 1086 struct pvr_context *ctx = queue->ctx; 1087 struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx); 1088 struct rogue_fwif_fwcommoncontext *cctx_fw; 1089 struct pvr_cccb *cccb = &queue->cccb; 1090 1091 cctx_fw = fw_ctx_map + queue->ctx_offset; 1092 cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr; 1093 cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr; 1094 1095 cctx_fw->dm = get_dm_type(queue); 1096 cctx_fw->priority = ctx->priority; 1097 cctx_fw->priority_seq_num = 0; 1098 cctx_fw->max_deadline_ms = MAX_DEADLINE_MS; 1099 cctx_fw->pid = task_tgid_nr(current); 1100 cctx_fw->server_common_context_id = ctx->ctx_id; 1101 1102 pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr); 1103 1104 pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr); 1105 } 1106 1107 /** 1108 * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up. 1109 * @queue: Queue on FW context to clean up. 1110 * 1111 * Return: 1112 * * 0 on success, 1113 * * Any error returned by pvr_fw_structure_cleanup() otherwise. 1114 */ 1115 static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue) 1116 { 1117 if (!queue->ctx->fw_obj) 1118 return 0; 1119 1120 return pvr_fw_structure_cleanup(queue->ctx->pvr_dev, 1121 ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT, 1122 queue->ctx->fw_obj, queue->ctx_offset); 1123 } 1124 1125 /** 1126 * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object. 1127 * @job: The job to initialize. 1128 * @drm_client_id: drm_file.client_id submitting the job 1129 * 1130 * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm() 1131 * and pvr_queue_job_push() can't fail. We also make sure the context type is 1132 * valid and the job can fit in the CCCB. 1133 * 1134 * Return: 1135 * * 0 on success, or 1136 * * An error code if something failed. 1137 */ 1138 int pvr_queue_job_init(struct pvr_job *job, u64 drm_client_id) 1139 { 1140 /* Fragment jobs need at least one native fence wait on the geometry job fence. */ 1141 u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0; 1142 struct pvr_queue *queue; 1143 int err; 1144 1145 if (atomic_read(&job->ctx->faulty)) 1146 return -EIO; 1147 1148 queue = pvr_context_get_queue_for_job(job->ctx, job->type); 1149 if (!queue) 1150 return -EINVAL; 1151 1152 if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count))) 1153 return -E2BIG; 1154 1155 err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE, drm_client_id); 1156 if (err) 1157 return err; 1158 1159 job->cccb_fence = pvr_queue_fence_alloc(); 1160 job->kccb_fence = pvr_kccb_fence_alloc(); 1161 job->done_fence = pvr_queue_fence_alloc(); 1162 if (!job->cccb_fence || !job->kccb_fence || !job->done_fence) 1163 return -ENOMEM; 1164 1165 return 0; 1166 } 1167 1168 /** 1169 * pvr_queue_job_arm() - Arm a job object. 1170 * @job: The job to arm. 1171 * 1172 * Initializes fences and return the drm_sched finished fence so it can 1173 * be exposed to the outside world. Once this function is called, you should 1174 * make sure the job is pushed using pvr_queue_job_push(), or guarantee that 1175 * no one grabbed a reference to the returned fence. The latter can happen if 1176 * we do multi-job submission, and something failed when creating/initializing 1177 * a job. In that case, we know the fence didn't leave the driver, and we 1178 * can thus guarantee nobody will wait on an dead fence object. 1179 * 1180 * Return: 1181 * * A dma_fence object. 1182 */ 1183 struct dma_fence *pvr_queue_job_arm(struct pvr_job *job) 1184 { 1185 drm_sched_job_arm(&job->base); 1186 1187 return &job->base.s_fence->finished; 1188 } 1189 1190 /** 1191 * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object. 1192 * @job: The job to cleanup. 1193 * 1194 * Should be called in the job release path. 1195 */ 1196 void pvr_queue_job_cleanup(struct pvr_job *job) 1197 { 1198 pvr_queue_fence_put(job->done_fence); 1199 pvr_queue_fence_put(job->cccb_fence); 1200 pvr_kccb_fence_put(job->kccb_fence); 1201 1202 if (job->base.s_fence) 1203 drm_sched_job_cleanup(&job->base); 1204 1205 trace_pvr_job_done(job); 1206 } 1207 1208 /** 1209 * pvr_queue_job_push() - Push a job to its queue. 1210 * @job: The job to push. 1211 * 1212 * Must be called after pvr_queue_job_init() and after all dependencies 1213 * have been added to the job. This will effectively queue the job to 1214 * the drm_sched_entity attached to the queue. We grab a reference on 1215 * the job object, so the caller is free to drop its reference when it's 1216 * done accessing the job object. 1217 */ 1218 void pvr_queue_job_push(struct pvr_job *job) 1219 { 1220 struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler); 1221 1222 /* Keep track of the last queued job scheduled fence for combined submit. */ 1223 dma_fence_put(queue->last_queued_job_scheduled_fence); 1224 queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled); 1225 1226 pvr_job_get(job); 1227 drm_sched_entity_push_job(&job->base); 1228 } 1229 1230 static void reg_state_init(void *cpu_ptr, void *priv) 1231 { 1232 struct pvr_queue *queue = priv; 1233 1234 if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) { 1235 struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr; 1236 1237 geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init = 1238 queue->callstack_addr; 1239 } 1240 } 1241 1242 /** 1243 * pvr_queue_create() - Create a queue object. 1244 * @ctx: The context this queue will be attached to. 1245 * @type: The type of jobs being pushed to this queue. 1246 * @args: The arguments passed to the context creation function. 1247 * @fw_ctx_map: CPU mapping of the FW context object. 1248 * 1249 * Create a queue object that will be used to queue and track jobs. 1250 * 1251 * Return: 1252 * * A valid pointer to a pvr_queue object, or 1253 * * An error pointer if the creation/initialization failed. 1254 */ 1255 struct pvr_queue *pvr_queue_create(struct pvr_context *ctx, 1256 enum drm_pvr_job_type type, 1257 struct drm_pvr_ioctl_create_context_args *args, 1258 void *fw_ctx_map) 1259 { 1260 static const struct { 1261 u32 cccb_size; 1262 const char *name; 1263 } props[] = { 1264 [DRM_PVR_JOB_TYPE_GEOMETRY] = { 1265 .cccb_size = CTX_GEOM_CCCB_SIZE_LOG2, 1266 .name = "geometry", 1267 }, 1268 [DRM_PVR_JOB_TYPE_FRAGMENT] = { 1269 .cccb_size = CTX_FRAG_CCCB_SIZE_LOG2, 1270 .name = "fragment" 1271 }, 1272 [DRM_PVR_JOB_TYPE_COMPUTE] = { 1273 .cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2, 1274 .name = "compute" 1275 }, 1276 [DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = { 1277 .cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2, 1278 .name = "transfer_frag" 1279 }, 1280 }; 1281 struct pvr_device *pvr_dev = ctx->pvr_dev; 1282 const struct drm_sched_init_args sched_args = { 1283 .ops = &pvr_queue_sched_ops, 1284 .submit_wq = pvr_dev->sched_wq, 1285 .credit_limit = 64 * 1024, 1286 .hang_limit = 1, 1287 .timeout = msecs_to_jiffies(500), 1288 .timeout_wq = pvr_dev->sched_wq, 1289 .name = "pvr-queue", 1290 .dev = pvr_dev->base.dev, 1291 }; 1292 struct drm_gpu_scheduler *sched; 1293 struct pvr_queue *queue; 1294 int ctx_state_size, err; 1295 void *cpu_map; 1296 1297 if (WARN_ON(type >= sizeof(props))) 1298 return ERR_PTR(-EINVAL); 1299 1300 switch (ctx->type) { 1301 case DRM_PVR_CTX_TYPE_RENDER: 1302 if (type != DRM_PVR_JOB_TYPE_GEOMETRY && 1303 type != DRM_PVR_JOB_TYPE_FRAGMENT) 1304 return ERR_PTR(-EINVAL); 1305 break; 1306 case DRM_PVR_CTX_TYPE_COMPUTE: 1307 if (type != DRM_PVR_JOB_TYPE_COMPUTE) 1308 return ERR_PTR(-EINVAL); 1309 break; 1310 case DRM_PVR_CTX_TYPE_TRANSFER_FRAG: 1311 if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG) 1312 return ERR_PTR(-EINVAL); 1313 break; 1314 default: 1315 return ERR_PTR(-EINVAL); 1316 } 1317 1318 ctx_state_size = get_ctx_state_size(pvr_dev, type); 1319 if (ctx_state_size < 0) 1320 return ERR_PTR(ctx_state_size); 1321 1322 queue = kzalloc_obj(*queue); 1323 if (!queue) 1324 return ERR_PTR(-ENOMEM); 1325 1326 queue->type = type; 1327 queue->ctx_offset = get_ctx_offset(type); 1328 queue->ctx = ctx; 1329 queue->callstack_addr = args->callstack_addr; 1330 sched = &queue->scheduler; 1331 INIT_LIST_HEAD(&queue->node); 1332 mutex_init(&queue->cccb_fence_ctx.job_lock); 1333 pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base); 1334 pvr_queue_fence_ctx_init(&queue->job_fence_ctx); 1335 1336 err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name); 1337 if (err) 1338 goto err_free_queue; 1339 1340 err = pvr_fw_object_create(pvr_dev, ctx_state_size, 1341 PVR_BO_FW_FLAGS_DEVICE_UNCACHED, 1342 reg_state_init, queue, &queue->reg_state_obj); 1343 if (err) 1344 goto err_cccb_fini; 1345 1346 init_fw_context(queue, fw_ctx_map); 1347 1348 if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT && 1349 args->callstack_addr) { 1350 err = -EINVAL; 1351 goto err_release_reg_state; 1352 } 1353 1354 cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value), 1355 PVR_BO_FW_FLAGS_DEVICE_UNCACHED, 1356 NULL, NULL, &queue->timeline_ufo.fw_obj); 1357 if (IS_ERR(cpu_map)) { 1358 err = PTR_ERR(cpu_map); 1359 goto err_release_reg_state; 1360 } 1361 1362 queue->timeline_ufo.value = cpu_map; 1363 1364 err = drm_sched_init(&queue->scheduler, &sched_args); 1365 if (err) 1366 goto err_release_ufo; 1367 1368 err = drm_sched_entity_init(&queue->entity, 1369 DRM_SCHED_PRIORITY_KERNEL, 1370 &sched, 1, &ctx->faulty); 1371 if (err) 1372 goto err_sched_fini; 1373 1374 mutex_lock(&pvr_dev->queues.lock); 1375 list_add_tail(&queue->node, &pvr_dev->queues.idle); 1376 mutex_unlock(&pvr_dev->queues.lock); 1377 1378 return queue; 1379 1380 err_sched_fini: 1381 drm_sched_fini(&queue->scheduler); 1382 1383 err_release_ufo: 1384 pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj); 1385 1386 err_release_reg_state: 1387 pvr_fw_object_destroy(queue->reg_state_obj); 1388 1389 err_cccb_fini: 1390 pvr_cccb_fini(&queue->cccb); 1391 1392 err_free_queue: 1393 mutex_destroy(&queue->cccb_fence_ctx.job_lock); 1394 kfree(queue); 1395 1396 return ERR_PTR(err); 1397 } 1398 1399 void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev) 1400 { 1401 struct pvr_queue *queue; 1402 1403 mutex_lock(&pvr_dev->queues.lock); 1404 list_for_each_entry(queue, &pvr_dev->queues.idle, node) 1405 pvr_queue_stop(queue, NULL); 1406 list_for_each_entry(queue, &pvr_dev->queues.active, node) 1407 pvr_queue_stop(queue, NULL); 1408 mutex_unlock(&pvr_dev->queues.lock); 1409 } 1410 1411 void pvr_queue_device_post_reset(struct pvr_device *pvr_dev) 1412 { 1413 struct pvr_queue *queue; 1414 1415 mutex_lock(&pvr_dev->queues.lock); 1416 list_for_each_entry(queue, &pvr_dev->queues.active, node) 1417 pvr_queue_start(queue); 1418 list_for_each_entry(queue, &pvr_dev->queues.idle, node) 1419 pvr_queue_start(queue); 1420 mutex_unlock(&pvr_dev->queues.lock); 1421 } 1422 1423 /** 1424 * pvr_queue_kill() - Kill a queue. 1425 * @queue: The queue to kill. 1426 * 1427 * Kill the queue so no new jobs can be pushed. Should be called when the 1428 * context handle is destroyed. The queue object might last longer if jobs 1429 * are still in flight and holding a reference to the context this queue 1430 * belongs to. 1431 */ 1432 void pvr_queue_kill(struct pvr_queue *queue) 1433 { 1434 drm_sched_entity_destroy(&queue->entity); 1435 dma_fence_put(queue->last_queued_job_scheduled_fence); 1436 queue->last_queued_job_scheduled_fence = NULL; 1437 } 1438 1439 /** 1440 * pvr_queue_destroy() - Destroy a queue. 1441 * @queue: The queue to destroy. 1442 * 1443 * Cleanup the queue and free the resources attached to it. Should be 1444 * called from the context release function. 1445 */ 1446 void pvr_queue_destroy(struct pvr_queue *queue) 1447 { 1448 if (!queue) 1449 return; 1450 1451 mutex_lock(&queue->ctx->pvr_dev->queues.lock); 1452 list_del_init(&queue->node); 1453 mutex_unlock(&queue->ctx->pvr_dev->queues.lock); 1454 1455 drm_sched_fini(&queue->scheduler); 1456 drm_sched_entity_fini(&queue->entity); 1457 1458 if (WARN_ON(queue->last_queued_job_scheduled_fence)) 1459 dma_fence_put(queue->last_queued_job_scheduled_fence); 1460 1461 pvr_queue_cleanup_fw_context(queue); 1462 1463 pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj); 1464 pvr_fw_object_destroy(queue->reg_state_obj); 1465 pvr_cccb_fini(&queue->cccb); 1466 mutex_destroy(&queue->cccb_fence_ctx.job_lock); 1467 kfree(queue); 1468 } 1469 1470 /** 1471 * pvr_queue_device_init() - Device-level initialization of queue related fields. 1472 * @pvr_dev: The device to initialize. 1473 * 1474 * Initializes all fields related to queue management in pvr_device. 1475 * 1476 * Return: 1477 * * 0 on success, or 1478 * * An error code on failure. 1479 */ 1480 int pvr_queue_device_init(struct pvr_device *pvr_dev) 1481 { 1482 int err; 1483 1484 INIT_LIST_HEAD(&pvr_dev->queues.active); 1485 INIT_LIST_HEAD(&pvr_dev->queues.idle); 1486 err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock); 1487 if (err) 1488 return err; 1489 1490 pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0); 1491 if (!pvr_dev->sched_wq) 1492 return -ENOMEM; 1493 1494 return 0; 1495 } 1496 1497 /** 1498 * pvr_queue_device_fini() - Device-level cleanup of queue related fields. 1499 * @pvr_dev: The device to cleanup. 1500 * 1501 * Cleanup/free all queue-related resources attached to a pvr_device object. 1502 */ 1503 void pvr_queue_device_fini(struct pvr_device *pvr_dev) 1504 { 1505 destroy_workqueue(pvr_dev->sched_wq); 1506 } 1507