1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* Copyright (c) 2023 Imagination Technologies Ltd. */ 3 4 #include <drm/drm_managed.h> 5 #include <drm/gpu_scheduler.h> 6 7 #include "pvr_cccb.h" 8 #include "pvr_context.h" 9 #include "pvr_device.h" 10 #include "pvr_drv.h" 11 #include "pvr_job.h" 12 #include "pvr_queue.h" 13 #include "pvr_vm.h" 14 15 #include "pvr_rogue_fwif_client.h" 16 17 #define MAX_DEADLINE_MS 30000 18 19 #define CTX_COMPUTE_CCCB_SIZE_LOG2 15 20 #define CTX_FRAG_CCCB_SIZE_LOG2 15 21 #define CTX_GEOM_CCCB_SIZE_LOG2 15 22 #define CTX_TRANSFER_CCCB_SIZE_LOG2 15 23 24 static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev) 25 { 26 u32 num_isp_store_registers; 27 28 if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) { 29 num_isp_store_registers = 1; 30 } else { 31 int err; 32 33 err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers); 34 if (WARN_ON(err)) 35 return err; 36 } 37 38 return sizeof(struct rogue_fwif_frag_ctx_state) + 39 (num_isp_store_registers * 40 sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0])); 41 } 42 43 static int get_frag_ctx_state_size(struct pvr_device *pvr_dev) 44 { 45 u32 num_isp_store_registers; 46 int err; 47 48 if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) { 49 err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers); 50 if (WARN_ON(err)) 51 return err; 52 53 if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) { 54 u32 xpu_max_slaves; 55 56 err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves); 57 if (WARN_ON(err)) 58 return err; 59 60 num_isp_store_registers *= (1 + xpu_max_slaves); 61 } 62 } else { 63 err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers); 64 if (WARN_ON(err)) 65 return err; 66 } 67 68 return sizeof(struct rogue_fwif_frag_ctx_state) + 69 (num_isp_store_registers * 70 sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0])); 71 } 72 73 static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type) 74 { 75 switch (type) { 76 case DRM_PVR_JOB_TYPE_GEOMETRY: 77 return sizeof(struct rogue_fwif_geom_ctx_state); 78 case DRM_PVR_JOB_TYPE_FRAGMENT: 79 return get_frag_ctx_state_size(pvr_dev); 80 case DRM_PVR_JOB_TYPE_COMPUTE: 81 return sizeof(struct rogue_fwif_compute_ctx_state); 82 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 83 return get_xfer_ctx_state_size(pvr_dev); 84 } 85 86 WARN(1, "Invalid queue type"); 87 return -EINVAL; 88 } 89 90 static u32 get_ctx_offset(enum drm_pvr_job_type type) 91 { 92 switch (type) { 93 case DRM_PVR_JOB_TYPE_GEOMETRY: 94 return offsetof(struct rogue_fwif_fwrendercontext, geom_context); 95 case DRM_PVR_JOB_TYPE_FRAGMENT: 96 return offsetof(struct rogue_fwif_fwrendercontext, frag_context); 97 case DRM_PVR_JOB_TYPE_COMPUTE: 98 return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context); 99 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 100 return offsetof(struct rogue_fwif_fwtransfercontext, tq_context); 101 } 102 103 return 0; 104 } 105 106 static const char * 107 pvr_queue_fence_get_driver_name(struct dma_fence *f) 108 { 109 return PVR_DRIVER_NAME; 110 } 111 112 static void pvr_queue_fence_release_work(struct work_struct *w) 113 { 114 struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work); 115 116 pvr_context_put(fence->queue->ctx); 117 dma_fence_free(&fence->base); 118 } 119 120 static void pvr_queue_fence_release(struct dma_fence *f) 121 { 122 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 123 struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev; 124 125 queue_work(pvr_dev->sched_wq, &fence->release_work); 126 } 127 128 static const char * 129 pvr_queue_job_fence_get_timeline_name(struct dma_fence *f) 130 { 131 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 132 133 switch (fence->queue->type) { 134 case DRM_PVR_JOB_TYPE_GEOMETRY: 135 return "geometry"; 136 137 case DRM_PVR_JOB_TYPE_FRAGMENT: 138 return "fragment"; 139 140 case DRM_PVR_JOB_TYPE_COMPUTE: 141 return "compute"; 142 143 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 144 return "transfer"; 145 } 146 147 WARN(1, "Invalid queue type"); 148 return "invalid"; 149 } 150 151 static const char * 152 pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f) 153 { 154 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 155 156 switch (fence->queue->type) { 157 case DRM_PVR_JOB_TYPE_GEOMETRY: 158 return "geometry-cccb"; 159 160 case DRM_PVR_JOB_TYPE_FRAGMENT: 161 return "fragment-cccb"; 162 163 case DRM_PVR_JOB_TYPE_COMPUTE: 164 return "compute-cccb"; 165 166 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 167 return "transfer-cccb"; 168 } 169 170 WARN(1, "Invalid queue type"); 171 return "invalid"; 172 } 173 174 static const struct dma_fence_ops pvr_queue_job_fence_ops = { 175 .get_driver_name = pvr_queue_fence_get_driver_name, 176 .get_timeline_name = pvr_queue_job_fence_get_timeline_name, 177 .release = pvr_queue_fence_release, 178 }; 179 180 /** 181 * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO. 182 * @f: The dma_fence to check. 183 * 184 * Return: 185 * * true if the dma_fence is backed by a UFO, or 186 * * false otherwise. 187 */ 188 static inline bool 189 pvr_queue_fence_is_ufo_backed(struct dma_fence *f) 190 { 191 /* 192 * Currently the only dma_fence backed by a UFO object is the job fence, 193 * e.g. pvr_job::done_fence, wrapped by a pvr_queue_fence object. 194 */ 195 return f && f->ops == &pvr_queue_job_fence_ops; 196 } 197 198 /** 199 * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is 200 * already backed by a UFO. 201 * @f: The dma_fence to turn into a pvr_queue_fence. 202 * 203 * This could be called on: 204 * - a job fence directly, in which case it simply returns the containing pvr_queue_fence; 205 * - a drm_sched_fence's scheduled or finished fence, in which case it will first try to follow 206 * the parent pointer to find the job fence (note that the parent pointer is initialized 207 * only after the run_job() callback is called on the drm_sched_fence's owning job); 208 * - any other dma_fence, in which case it will return NULL. 209 * 210 * Return: 211 * * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or 212 * * NULL otherwise. 213 */ 214 static struct pvr_queue_fence * 215 to_pvr_queue_job_fence(struct dma_fence *f) 216 { 217 struct drm_sched_fence *sched_fence = to_drm_sched_fence(f); 218 219 if (sched_fence) 220 f = sched_fence->parent; 221 222 if (pvr_queue_fence_is_ufo_backed(f)) 223 return container_of(f, struct pvr_queue_fence, base); 224 225 return NULL; 226 } 227 228 static const struct dma_fence_ops pvr_queue_cccb_fence_ops = { 229 .get_driver_name = pvr_queue_fence_get_driver_name, 230 .get_timeline_name = pvr_queue_cccb_fence_get_timeline_name, 231 .release = pvr_queue_fence_release, 232 }; 233 234 /** 235 * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects. 236 * @f: The dma_fence object to put. 237 * 238 * If the pvr_queue_fence has been initialized, we call dma_fence_put(), 239 * otherwise we free the object with dma_fence_free(). This allows us 240 * to do the right thing before and after pvr_queue_fence_init() had been 241 * called. 242 */ 243 static void pvr_queue_fence_put(struct dma_fence *f) 244 { 245 if (!f) 246 return; 247 248 if (WARN_ON(f->ops && 249 f->ops != &pvr_queue_cccb_fence_ops && 250 f->ops != &pvr_queue_job_fence_ops)) 251 return; 252 253 /* If the fence hasn't been initialized yet, free the object directly. */ 254 if (f->ops) 255 dma_fence_put(f); 256 else 257 dma_fence_free(f); 258 } 259 260 /** 261 * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object 262 * 263 * Call this function to allocate job CCCB and done fences. This only 264 * allocates the objects. Initialization happens when the underlying 265 * dma_fence object is to be returned to drm_sched (in prepare_job() or 266 * run_job()). 267 * 268 * Return: 269 * * A valid pointer if the allocation succeeds, or 270 * * NULL if the allocation fails. 271 */ 272 static struct dma_fence * 273 pvr_queue_fence_alloc(void) 274 { 275 struct pvr_queue_fence *fence; 276 277 fence = kzalloc_obj(*fence); 278 if (!fence) 279 return NULL; 280 281 return &fence->base; 282 } 283 284 /** 285 * pvr_queue_fence_init() - Initializes a pvr_queue_fence object. 286 * @f: The fence to initialize 287 * @queue: The queue this fence belongs to. 288 * @fence_ops: The fence operations. 289 * @fence_ctx: The fence context. 290 * 291 * Wrapper around dma_fence_init() that takes care of initializing the 292 * pvr_queue_fence::queue field too. 293 */ 294 static void 295 pvr_queue_fence_init(struct dma_fence *f, 296 struct pvr_queue *queue, 297 const struct dma_fence_ops *fence_ops, 298 struct pvr_queue_fence_ctx *fence_ctx) 299 { 300 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 301 302 pvr_context_get(queue->ctx); 303 fence->queue = queue; 304 INIT_WORK(&fence->release_work, pvr_queue_fence_release_work); 305 dma_fence_init(&fence->base, fence_ops, 306 &fence_ctx->lock, fence_ctx->id, 307 atomic_inc_return(&fence_ctx->seqno)); 308 } 309 310 /** 311 * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object. 312 * @fence: The fence to initialize. 313 * @queue: The queue this fence belongs to. 314 * 315 * Initializes a fence that can be used to wait for CCCB space. 316 * 317 * Should be called in the ::prepare_job() path, so the fence returned to 318 * drm_sched is valid. 319 */ 320 static void 321 pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue) 322 { 323 pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops, 324 &queue->cccb_fence_ctx.base); 325 } 326 327 /** 328 * pvr_queue_job_fence_init() - Initializes a job done fence object. 329 * @fence: The fence to initialize. 330 * @queue: The queue this fence belongs to. 331 * 332 * Initializes a fence that will be signaled when the GPU is done executing 333 * a job. 334 * 335 * Should be called *before* the ::run_job() path, so the fence is initialised 336 * before being placed in the pending_list. 337 */ 338 static void 339 pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue) 340 { 341 if (!fence->ops) 342 pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, 343 &queue->job_fence_ctx); 344 } 345 346 /** 347 * pvr_queue_fence_ctx_init() - Queue fence context initialization. 348 * @fence_ctx: The context to initialize 349 */ 350 static void 351 pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx) 352 { 353 spin_lock_init(&fence_ctx->lock); 354 fence_ctx->id = dma_fence_context_alloc(1); 355 atomic_set(&fence_ctx->seqno, 0); 356 } 357 358 static u32 ufo_cmds_size(u32 elem_count) 359 { 360 /* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */ 361 u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS; 362 u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS; 363 u32 size = full_cmd_count * 364 pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS * 365 sizeof(struct rogue_fwif_ufo)); 366 367 if (remaining_elems) { 368 size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems * 369 sizeof(struct rogue_fwif_ufo)); 370 } 371 372 return size; 373 } 374 375 static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count) 376 { 377 /* 378 * One UFO command per native fence this job will be waiting on (unless any are 379 * signaled by the time the job is submitted), plus a command for the job itself, 380 * plus one UFO command for the fence signaling. 381 */ 382 return ufo_cmds_size(ufo_wait_count) + 383 pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len) + 384 ufo_cmds_size(1); 385 } 386 387 static bool 388 is_paired_job_fence(struct dma_fence *fence, struct pvr_job *job) 389 { 390 /* This assumes "fence" is one of "job"'s drm_sched_job::dependencies */ 391 return job->type == DRM_PVR_JOB_TYPE_FRAGMENT && 392 job->paired_job && 393 &job->paired_job->base.s_fence->scheduled == fence; 394 } 395 396 /** 397 * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies. 398 * @job: Job to operate on. 399 * 400 * Returns: Number of non-signaled native deps remaining. 401 */ 402 static unsigned long job_count_remaining_native_deps(struct pvr_job *job) 403 { 404 unsigned long remaining_count = 0; 405 struct dma_fence *fence = NULL; 406 unsigned long index; 407 408 xa_for_each(&job->base.dependencies, index, fence) { 409 struct pvr_queue_fence *jfence; 410 411 if (is_paired_job_fence(fence, job)) { 412 /* 413 * A fence between paired jobs won't resolve to a pvr_queue_fence (i.e. 414 * be backed by a UFO) until the jobs have been submitted, together. 415 * The submitting code will insert a partial render fence command for this. 416 */ 417 WARN_ON(dma_fence_is_signaled(fence)); 418 remaining_count++; 419 continue; 420 } 421 422 jfence = to_pvr_queue_job_fence(fence); 423 if (!jfence) 424 continue; 425 426 if (!dma_fence_is_signaled(&jfence->base)) 427 remaining_count++; 428 } 429 430 return remaining_count; 431 } 432 433 /** 434 * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job. 435 * @queue: The queue this job will be submitted to. 436 * @job: The job to get the CCCB fence on. 437 * 438 * The CCCB fence is a synchronization primitive allowing us to delay job 439 * submission until there's enough space in the CCCB to submit the job. 440 * 441 * Return: 442 * * NULL if there's enough space in the CCCB to submit this job, or 443 * * A valid dma_fence object otherwise. 444 */ 445 static struct dma_fence * 446 pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job) 447 { 448 struct pvr_queue_fence *cccb_fence; 449 unsigned int native_deps_remaining; 450 451 /* If the fence is NULL, that means we already checked that we had 452 * enough space in the cccb for our job. 453 */ 454 if (!job->cccb_fence) 455 return NULL; 456 457 mutex_lock(&queue->cccb_fence_ctx.job_lock); 458 459 /* Count remaining native dependencies and check if the job fits in the CCCB. */ 460 native_deps_remaining = job_count_remaining_native_deps(job); 461 if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) { 462 pvr_queue_fence_put(job->cccb_fence); 463 job->cccb_fence = NULL; 464 goto out_unlock; 465 } 466 467 /* There should be no job attached to the CCCB fence context: 468 * drm_sched_entity guarantees that jobs are submitted one at a time. 469 */ 470 if (WARN_ON(queue->cccb_fence_ctx.job)) 471 pvr_job_put(queue->cccb_fence_ctx.job); 472 473 queue->cccb_fence_ctx.job = pvr_job_get(job); 474 475 /* Initialize the fence before returning it. */ 476 cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base); 477 if (!WARN_ON(cccb_fence->queue)) 478 pvr_queue_cccb_fence_init(job->cccb_fence, queue); 479 480 out_unlock: 481 mutex_unlock(&queue->cccb_fence_ctx.job_lock); 482 483 return dma_fence_get(job->cccb_fence); 484 } 485 486 /** 487 * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job. 488 * @queue: The queue this job will be submitted to. 489 * @job: The job to get the KCCB fence on. 490 * 491 * The KCCB fence is a synchronization primitive allowing us to delay job 492 * submission until there's enough space in the KCCB to submit the job. 493 * 494 * Return: 495 * * NULL if there's enough space in the KCCB to submit this job, or 496 * * A valid dma_fence object otherwise. 497 */ 498 static struct dma_fence * 499 pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job) 500 { 501 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 502 struct dma_fence *kccb_fence = NULL; 503 504 /* If the fence is NULL, that means we already checked that we had 505 * enough space in the KCCB for our job. 506 */ 507 if (!job->kccb_fence) 508 return NULL; 509 510 if (!WARN_ON(job->kccb_fence->ops)) { 511 kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence); 512 job->kccb_fence = NULL; 513 } 514 515 return kccb_fence; 516 } 517 518 static struct dma_fence * 519 pvr_queue_get_paired_frag_job_dep(struct pvr_job *job) 520 { 521 struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ? 522 job->paired_job : NULL; 523 struct pvr_queue *frag_queue = frag_job ? frag_job->ctx->queues.fragment : NULL; 524 struct dma_fence *f; 525 unsigned long index; 526 527 if (!frag_job) 528 return NULL; 529 530 /* Have the geometry job wait on the paired fragment job's dependencies as well. */ 531 xa_for_each(&frag_job->base.dependencies, index, f) { 532 /* Skip already signaled fences. */ 533 if (dma_fence_is_signaled(f)) 534 continue; 535 536 /* 537 * The paired job fence won't be signaled until both jobs have 538 * been submitted, so we can't wait on it to schedule them. 539 */ 540 if (f == &job->base.s_fence->scheduled) 541 continue; 542 543 return dma_fence_get(f); 544 } 545 546 /* Initialize the paired fragment job's done_fence, so we can signal it. */ 547 pvr_queue_job_fence_init(frag_job->done_fence, frag_queue); 548 549 return pvr_queue_get_job_cccb_fence(frag_queue, frag_job); 550 } 551 552 /** 553 * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence. 554 * @sched_job: The job to query the next internal dependency on 555 * @s_entity: The entity this job is queue on. 556 * 557 * After iterating over drm_sched_job::dependencies, drm_sched let the driver return 558 * its own internal dependencies. We use this function to return our internal dependencies. 559 */ 560 static struct dma_fence * 561 pvr_queue_prepare_job(struct drm_sched_job *sched_job, 562 struct drm_sched_entity *s_entity) 563 { 564 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 565 struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity); 566 struct dma_fence *internal_dep = NULL; 567 568 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) { 569 /* 570 * This will be called on a paired fragment job after being submitted 571 * to the firmware as part of the paired geometry job's submission. 572 * We can tell if this is the case and bail early from whether run_job() 573 * has been called on the geometry job, which would issue a pm ref on 574 * this job as well. 575 */ 576 if (job->has_pm_ref) 577 return NULL; 578 } 579 580 /* 581 * Initialize the done_fence, so we can signal it. This must be done 582 * here because otherwise by the time of run_job() the job will end up 583 * in the pending list without a valid fence. 584 */ 585 pvr_queue_job_fence_init(job->done_fence, queue); 586 587 /* CCCB fence is used to make sure we have enough space in the CCCB to 588 * submit our commands. 589 */ 590 internal_dep = pvr_queue_get_job_cccb_fence(queue, job); 591 592 /* KCCB fence is used to make sure we have a KCCB slot to queue our 593 * CMD_KICK. 594 */ 595 if (!internal_dep) 596 internal_dep = pvr_queue_get_job_kccb_fence(queue, job); 597 598 /* Any extra internal dependency should be added here, using the following 599 * pattern: 600 * 601 * if (!internal_dep) 602 * internal_dep = pvr_queue_get_job_xxxx_fence(queue, job); 603 */ 604 605 /* The paired job fence should come last, when everything else is ready. */ 606 if (!internal_dep) 607 internal_dep = pvr_queue_get_paired_frag_job_dep(job); 608 609 return internal_dep; 610 } 611 612 /** 613 * pvr_queue_update_active_state_locked() - Update the queue active state. 614 * @queue: Queue to update the state on. 615 * 616 * Locked version of pvr_queue_update_active_state(). Must be called with 617 * pvr_device::queue::lock held. 618 */ 619 static void pvr_queue_update_active_state_locked(struct pvr_queue *queue) 620 { 621 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 622 623 lockdep_assert_held(&pvr_dev->queues.lock); 624 625 /* The queue is temporary out of any list when it's being reset, 626 * we don't want a call to pvr_queue_update_active_state_locked() 627 * to re-insert it behind our back. 628 */ 629 if (list_empty(&queue->node)) 630 return; 631 632 if (!atomic_read(&queue->in_flight_job_count)) 633 list_move_tail(&queue->node, &pvr_dev->queues.idle); 634 else 635 list_move_tail(&queue->node, &pvr_dev->queues.active); 636 } 637 638 /** 639 * pvr_queue_update_active_state() - Update the queue active state. 640 * @queue: Queue to update the state on. 641 * 642 * Active state is based on the in_flight_job_count value. 643 * 644 * Updating the active state implies moving the queue in or out of the 645 * active queue list, which also defines whether the queue is checked 646 * or not when a FW event is received. 647 * 648 * This function should be called any time a job is submitted or it done 649 * fence is signaled. 650 */ 651 static void pvr_queue_update_active_state(struct pvr_queue *queue) 652 { 653 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 654 655 mutex_lock(&pvr_dev->queues.lock); 656 pvr_queue_update_active_state_locked(queue); 657 mutex_unlock(&pvr_dev->queues.lock); 658 } 659 660 static void pvr_queue_submit_job_to_cccb(struct pvr_job *job) 661 { 662 struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler); 663 struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS]; 664 struct pvr_cccb *cccb = &queue->cccb; 665 struct pvr_queue_fence *jfence; 666 struct dma_fence *fence; 667 unsigned long index; 668 u32 ufo_count = 0; 669 670 /* We need to add the queue to the active list before updating the CCCB, 671 * otherwise we might miss the FW event informing us that something 672 * happened on this queue. 673 */ 674 atomic_inc(&queue->in_flight_job_count); 675 pvr_queue_update_active_state(queue); 676 677 xa_for_each(&job->base.dependencies, index, fence) { 678 jfence = to_pvr_queue_job_fence(fence); 679 if (!jfence) 680 continue; 681 682 /* Some dependencies might have been signaled since prepare_job() */ 683 if (dma_fence_is_signaled(&jfence->base)) 684 continue; 685 686 pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj, 687 &ufos[ufo_count].addr); 688 ufos[ufo_count++].value = jfence->base.seqno; 689 690 if (ufo_count == ARRAY_SIZE(ufos)) { 691 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR, 692 sizeof(ufos), ufos, 0, 0); 693 ufo_count = 0; 694 } 695 } 696 697 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) { 698 /* 699 * The loop above will only process dependencies backed by a UFO i.e. with 700 * a valid parent fence assigned, but the paired job dependency won't have 701 * one until both jobs have been submitted. Access the parent fence directly 702 * here instead, submitting it last as partial render fence. 703 */ 704 jfence = to_pvr_queue_job_fence(job->paired_job->done_fence); 705 if (!WARN_ON(!jfence)) { 706 pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj, 707 &ufos[ufo_count].addr); 708 ufos[ufo_count++].value = job->paired_job->done_fence->seqno; 709 } 710 } 711 712 if (ufo_count) { 713 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR, 714 sizeof(ufos[0]) * ufo_count, ufos, 0, 0); 715 } 716 717 if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) { 718 struct rogue_fwif_cmd_geom *cmd = job->cmd; 719 720 /* Reference value for the partial render test is the current queue fence 721 * seqno minus one. 722 */ 723 pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, 724 &cmd->partial_render_geom_frag_fence.addr); 725 cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1; 726 } 727 728 /* Submit job to FW */ 729 pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd, 730 job->id, job->id); 731 732 /* Update command to signal the job fence. */ 733 pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr); 734 ufos[0].value = job->done_fence->seqno; 735 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE, 736 sizeof(ufos[0]), ufos, 0, 0); 737 } 738 739 /** 740 * pvr_queue_run_job() - Submit a job to the FW. 741 * @sched_job: The job to submit. 742 * 743 * This function is called when all non-native dependencies have been met and 744 * when the commands resulting from this job are guaranteed to fit in the CCCB. 745 */ 746 static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job) 747 { 748 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 749 struct pvr_device *pvr_dev = job->pvr_dev; 750 int err; 751 752 /* The fragment job is issued along the geometry job when we use combined 753 * geom+frag kicks. When we get there, we should simply return the 754 * done_fence that's been initialized earlier. 755 */ 756 if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT && 757 job->done_fence->ops) { 758 return dma_fence_get(job->done_fence); 759 } 760 761 /* The only kind of jobs that can be paired are geometry and fragment, and 762 * we bail out early if we see a fragment job that's paired with a geometry job. 763 * Paired jobs must also target the same context and point to the same HWRT. 764 */ 765 if (WARN_ON(job->paired_job && 766 (job->type != DRM_PVR_JOB_TYPE_GEOMETRY || 767 job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT || 768 job->hwrt != job->paired_job->hwrt || 769 job->ctx != job->paired_job->ctx))) 770 return ERR_PTR(-EINVAL); 771 772 err = pvr_job_get_pm_ref(job); 773 if (WARN_ON(err)) 774 return ERR_PTR(err); 775 776 if (job->paired_job) { 777 err = pvr_job_get_pm_ref(job->paired_job); 778 if (WARN_ON(err)) 779 return ERR_PTR(err); 780 } 781 782 /* Submit our job to the CCCB */ 783 pvr_queue_submit_job_to_cccb(job); 784 785 if (job->paired_job) { 786 struct pvr_job *geom_job = job; 787 struct pvr_job *frag_job = job->paired_job; 788 struct pvr_queue *geom_queue = job->ctx->queues.geometry; 789 struct pvr_queue *frag_queue = job->ctx->queues.fragment; 790 791 /* Submit the fragment job along the geometry job and send a combined kick. */ 792 pvr_queue_submit_job_to_cccb(frag_job); 793 pvr_cccb_send_kccb_combined_kick(pvr_dev, 794 &geom_queue->cccb, &frag_queue->cccb, 795 pvr_context_get_fw_addr(geom_job->ctx) + 796 geom_queue->ctx_offset, 797 pvr_context_get_fw_addr(frag_job->ctx) + 798 frag_queue->ctx_offset, 799 job->hwrt, 800 frag_job->fw_ccb_cmd_type == 801 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR); 802 } else { 803 struct pvr_queue *queue = container_of(job->base.sched, 804 struct pvr_queue, scheduler); 805 806 pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb, 807 pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset, 808 job->hwrt); 809 } 810 811 return dma_fence_get(job->done_fence); 812 } 813 814 static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job) 815 { 816 drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL); 817 } 818 819 static void pvr_queue_start(struct pvr_queue *queue) 820 { 821 struct pvr_job *job; 822 823 /* Make sure we CPU-signal the UFO object, so other queues don't get 824 * blocked waiting on it. 825 */ 826 *queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno); 827 828 list_for_each_entry(job, &queue->scheduler.pending_list, base.list) { 829 if (dma_fence_is_signaled(job->done_fence)) { 830 /* Jobs might have completed after drm_sched_stop() was called. 831 * In that case, re-assign the parent field to the done_fence. 832 */ 833 WARN_ON(job->base.s_fence->parent); 834 job->base.s_fence->parent = dma_fence_get(job->done_fence); 835 } else { 836 /* If we had unfinished jobs, flag the entity as guilty so no 837 * new job can be submitted. 838 */ 839 atomic_set(&queue->ctx->faulty, 1); 840 } 841 } 842 843 drm_sched_start(&queue->scheduler, 0); 844 } 845 846 /** 847 * pvr_queue_timedout_job() - Handle a job timeout event. 848 * @s_job: The job this timeout occurred on. 849 * 850 * FIXME: We don't do anything here to unblock the situation, we just stop+start 851 * the scheduler, and re-assign parent fences in the middle. 852 * 853 * Return: 854 * * DRM_GPU_SCHED_STAT_RESET. 855 */ 856 static enum drm_gpu_sched_stat 857 pvr_queue_timedout_job(struct drm_sched_job *s_job) 858 { 859 struct drm_gpu_scheduler *sched = s_job->sched; 860 struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler); 861 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 862 struct pvr_job *job; 863 u32 job_count = 0; 864 865 dev_err(sched->dev, "Job timeout\n"); 866 867 /* Before we stop the scheduler, make sure the queue is out of any list, so 868 * any call to pvr_queue_update_active_state_locked() that might happen 869 * until the scheduler is really stopped doesn't end up re-inserting the 870 * queue in the active list. This would cause 871 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each 872 * other when accessing the pending_list, since drm_sched_stop() doesn't 873 * grab the job_list_lock when modifying the list (it's assuming the 874 * only other accessor is the scheduler, and it's safe to not grab the 875 * lock since it's stopped). 876 */ 877 mutex_lock(&pvr_dev->queues.lock); 878 list_del_init(&queue->node); 879 mutex_unlock(&pvr_dev->queues.lock); 880 881 drm_sched_stop(sched, s_job); 882 883 /* Re-assign job parent fences. */ 884 list_for_each_entry(job, &sched->pending_list, base.list) { 885 job->base.s_fence->parent = dma_fence_get(job->done_fence); 886 job_count++; 887 } 888 WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count); 889 890 /* Re-insert the queue in the proper list, and kick a queue processing 891 * operation if there were jobs pending. 892 */ 893 mutex_lock(&pvr_dev->queues.lock); 894 if (!job_count) { 895 list_move_tail(&queue->node, &pvr_dev->queues.idle); 896 } else { 897 atomic_set(&queue->in_flight_job_count, job_count); 898 list_move_tail(&queue->node, &pvr_dev->queues.active); 899 pvr_queue_process(queue); 900 } 901 mutex_unlock(&pvr_dev->queues.lock); 902 903 drm_sched_start(sched, 0); 904 905 return DRM_GPU_SCHED_STAT_RESET; 906 } 907 908 /** 909 * pvr_queue_free_job() - Release the reference the scheduler had on a job object. 910 * @sched_job: Job object to free. 911 */ 912 static void pvr_queue_free_job(struct drm_sched_job *sched_job) 913 { 914 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 915 916 drm_sched_job_cleanup(sched_job); 917 918 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) 919 pvr_job_put(job->paired_job); 920 921 job->paired_job = NULL; 922 pvr_job_put(job); 923 } 924 925 static const struct drm_sched_backend_ops pvr_queue_sched_ops = { 926 .prepare_job = pvr_queue_prepare_job, 927 .run_job = pvr_queue_run_job, 928 .timedout_job = pvr_queue_timedout_job, 929 .free_job = pvr_queue_free_job, 930 }; 931 932 /** 933 * pvr_queue_fence_is_native() - Check if a dma_fence is native to this driver. 934 * @f: Fence to test. 935 * 936 * Check if the fence we're being passed is a drm_sched_fence that is coming from this driver. 937 * 938 * It may be a UFO-backed fence i.e. a fence that can be signaled or waited upon FW-side, 939 * such as pvr_job::done_fence objects that are backed by the timeline UFO attached to the queue 940 * they are pushed to. 941 */ 942 bool pvr_queue_fence_is_native(struct dma_fence *f) 943 { 944 struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL; 945 946 if (sched_fence && 947 sched_fence->sched->ops == &pvr_queue_sched_ops) 948 return true; 949 950 return pvr_queue_fence_is_ufo_backed(f); 951 } 952 953 /** 954 * pvr_queue_signal_done_fences() - Signal done fences. 955 * @queue: Queue to check. 956 * 957 * Signal done fences of jobs whose seqno is less than the current value of 958 * the UFO object attached to the queue. 959 */ 960 static void 961 pvr_queue_signal_done_fences(struct pvr_queue *queue) 962 { 963 struct pvr_job *job, *tmp_job; 964 u32 cur_seqno; 965 966 spin_lock(&queue->scheduler.job_list_lock); 967 cur_seqno = *queue->timeline_ufo.value; 968 list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) { 969 if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0) 970 break; 971 972 if (!dma_fence_is_signaled(job->done_fence)) { 973 dma_fence_signal(job->done_fence); 974 pvr_job_release_pm_ref(job); 975 atomic_dec(&queue->in_flight_job_count); 976 } 977 } 978 spin_unlock(&queue->scheduler.job_list_lock); 979 } 980 981 /** 982 * pvr_queue_check_job_waiting_for_cccb_space() - Check if a job waiting for CCCB space 983 * can be unblocked and pushed to the CCCB. 984 * @queue: Queue to check 985 * 986 * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal 987 * its CCCB fence, which should kick drm_sched. 988 */ 989 static void 990 pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue) 991 { 992 struct pvr_queue_fence *cccb_fence; 993 u32 native_deps_remaining; 994 struct pvr_job *job; 995 996 mutex_lock(&queue->cccb_fence_ctx.job_lock); 997 job = queue->cccb_fence_ctx.job; 998 if (!job) 999 goto out_unlock; 1000 1001 /* If we have a job attached to the CCCB fence context, its CCCB fence 1002 * shouldn't be NULL. 1003 */ 1004 if (WARN_ON(!job->cccb_fence)) { 1005 job = NULL; 1006 goto out_unlock; 1007 } 1008 1009 /* If we get there, CCCB fence has to be initialized. */ 1010 cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base); 1011 if (WARN_ON(!cccb_fence->queue)) { 1012 job = NULL; 1013 goto out_unlock; 1014 } 1015 1016 /* Evict signaled dependencies before checking for CCCB space. 1017 * If the job fits, signal the CCCB fence, this should unblock 1018 * the drm_sched_entity. 1019 */ 1020 native_deps_remaining = job_count_remaining_native_deps(job); 1021 if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) { 1022 job = NULL; 1023 goto out_unlock; 1024 } 1025 1026 dma_fence_signal(job->cccb_fence); 1027 pvr_queue_fence_put(job->cccb_fence); 1028 job->cccb_fence = NULL; 1029 queue->cccb_fence_ctx.job = NULL; 1030 1031 out_unlock: 1032 mutex_unlock(&queue->cccb_fence_ctx.job_lock); 1033 1034 pvr_job_put(job); 1035 } 1036 1037 /** 1038 * pvr_queue_process() - Process events that happened on a queue. 1039 * @queue: Queue to check 1040 * 1041 * Signal job fences and check if jobs waiting for CCCB space can be unblocked. 1042 */ 1043 void pvr_queue_process(struct pvr_queue *queue) 1044 { 1045 lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock); 1046 1047 pvr_queue_check_job_waiting_for_cccb_space(queue); 1048 pvr_queue_signal_done_fences(queue); 1049 pvr_queue_update_active_state_locked(queue); 1050 } 1051 1052 static u32 get_dm_type(struct pvr_queue *queue) 1053 { 1054 switch (queue->type) { 1055 case DRM_PVR_JOB_TYPE_GEOMETRY: 1056 return PVR_FWIF_DM_GEOM; 1057 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 1058 case DRM_PVR_JOB_TYPE_FRAGMENT: 1059 return PVR_FWIF_DM_FRAG; 1060 case DRM_PVR_JOB_TYPE_COMPUTE: 1061 return PVR_FWIF_DM_CDM; 1062 } 1063 1064 return ~0; 1065 } 1066 1067 /** 1068 * init_fw_context() - Initializes the queue part of a FW context. 1069 * @queue: Queue object to initialize the FW context for. 1070 * @fw_ctx_map: The FW context CPU mapping. 1071 * 1072 * FW contexts are containing various states, one of them being a per-queue state 1073 * that needs to be initialized for each queue being exposed by a context. This 1074 * function takes care of that. 1075 */ 1076 static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map) 1077 { 1078 struct pvr_context *ctx = queue->ctx; 1079 struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx); 1080 struct rogue_fwif_fwcommoncontext *cctx_fw; 1081 struct pvr_cccb *cccb = &queue->cccb; 1082 1083 cctx_fw = fw_ctx_map + queue->ctx_offset; 1084 cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr; 1085 cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr; 1086 1087 cctx_fw->dm = get_dm_type(queue); 1088 cctx_fw->priority = ctx->priority; 1089 cctx_fw->priority_seq_num = 0; 1090 cctx_fw->max_deadline_ms = MAX_DEADLINE_MS; 1091 cctx_fw->pid = task_tgid_nr(current); 1092 cctx_fw->server_common_context_id = ctx->ctx_id; 1093 1094 pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr); 1095 1096 pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr); 1097 } 1098 1099 /** 1100 * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up. 1101 * @queue: Queue on FW context to clean up. 1102 * 1103 * Return: 1104 * * 0 on success, 1105 * * Any error returned by pvr_fw_structure_cleanup() otherwise. 1106 */ 1107 static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue) 1108 { 1109 if (!queue->ctx->fw_obj) 1110 return 0; 1111 1112 return pvr_fw_structure_cleanup(queue->ctx->pvr_dev, 1113 ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT, 1114 queue->ctx->fw_obj, queue->ctx_offset); 1115 } 1116 1117 /** 1118 * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object. 1119 * @job: The job to initialize. 1120 * @drm_client_id: drm_file.client_id submitting the job 1121 * 1122 * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm() 1123 * and pvr_queue_job_push() can't fail. We also make sure the context type is 1124 * valid and the job can fit in the CCCB. 1125 * 1126 * Return: 1127 * * 0 on success, or 1128 * * An error code if something failed. 1129 */ 1130 int pvr_queue_job_init(struct pvr_job *job, u64 drm_client_id) 1131 { 1132 /* Fragment jobs need at least one native fence wait on the geometry job fence. */ 1133 u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0; 1134 struct pvr_queue *queue; 1135 int err; 1136 1137 if (atomic_read(&job->ctx->faulty)) 1138 return -EIO; 1139 1140 queue = pvr_context_get_queue_for_job(job->ctx, job->type); 1141 if (!queue) 1142 return -EINVAL; 1143 1144 if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count))) 1145 return -E2BIG; 1146 1147 err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE, drm_client_id); 1148 if (err) 1149 return err; 1150 1151 job->cccb_fence = pvr_queue_fence_alloc(); 1152 job->kccb_fence = pvr_kccb_fence_alloc(); 1153 job->done_fence = pvr_queue_fence_alloc(); 1154 if (!job->cccb_fence || !job->kccb_fence || !job->done_fence) 1155 return -ENOMEM; 1156 1157 return 0; 1158 } 1159 1160 /** 1161 * pvr_queue_job_arm() - Arm a job object. 1162 * @job: The job to arm. 1163 * 1164 * Initializes fences and return the drm_sched finished fence so it can 1165 * be exposed to the outside world. Once this function is called, you should 1166 * make sure the job is pushed using pvr_queue_job_push(), or guarantee that 1167 * no one grabbed a reference to the returned fence. The latter can happen if 1168 * we do multi-job submission, and something failed when creating/initializing 1169 * a job. In that case, we know the fence didn't leave the driver, and we 1170 * can thus guarantee nobody will wait on an dead fence object. 1171 * 1172 * Return: 1173 * * A dma_fence object. 1174 */ 1175 struct dma_fence *pvr_queue_job_arm(struct pvr_job *job) 1176 { 1177 drm_sched_job_arm(&job->base); 1178 1179 return &job->base.s_fence->finished; 1180 } 1181 1182 /** 1183 * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object. 1184 * @job: The job to cleanup. 1185 * 1186 * Should be called in the job release path. 1187 */ 1188 void pvr_queue_job_cleanup(struct pvr_job *job) 1189 { 1190 pvr_queue_fence_put(job->done_fence); 1191 pvr_queue_fence_put(job->cccb_fence); 1192 pvr_kccb_fence_put(job->kccb_fence); 1193 1194 if (job->base.s_fence) 1195 drm_sched_job_cleanup(&job->base); 1196 } 1197 1198 /** 1199 * pvr_queue_job_push() - Push a job to its queue. 1200 * @job: The job to push. 1201 * 1202 * Must be called after pvr_queue_job_init() and after all dependencies 1203 * have been added to the job. This will effectively queue the job to 1204 * the drm_sched_entity attached to the queue. We grab a reference on 1205 * the job object, so the caller is free to drop its reference when it's 1206 * done accessing the job object. 1207 */ 1208 void pvr_queue_job_push(struct pvr_job *job) 1209 { 1210 struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler); 1211 1212 /* Keep track of the last queued job scheduled fence for combined submit. */ 1213 dma_fence_put(queue->last_queued_job_scheduled_fence); 1214 queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled); 1215 1216 pvr_job_get(job); 1217 drm_sched_entity_push_job(&job->base); 1218 } 1219 1220 static void reg_state_init(void *cpu_ptr, void *priv) 1221 { 1222 struct pvr_queue *queue = priv; 1223 1224 if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) { 1225 struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr; 1226 1227 geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init = 1228 queue->callstack_addr; 1229 } 1230 } 1231 1232 /** 1233 * pvr_queue_create() - Create a queue object. 1234 * @ctx: The context this queue will be attached to. 1235 * @type: The type of jobs being pushed to this queue. 1236 * @args: The arguments passed to the context creation function. 1237 * @fw_ctx_map: CPU mapping of the FW context object. 1238 * 1239 * Create a queue object that will be used to queue and track jobs. 1240 * 1241 * Return: 1242 * * A valid pointer to a pvr_queue object, or 1243 * * An error pointer if the creation/initialization failed. 1244 */ 1245 struct pvr_queue *pvr_queue_create(struct pvr_context *ctx, 1246 enum drm_pvr_job_type type, 1247 struct drm_pvr_ioctl_create_context_args *args, 1248 void *fw_ctx_map) 1249 { 1250 static const struct { 1251 u32 cccb_size; 1252 const char *name; 1253 } props[] = { 1254 [DRM_PVR_JOB_TYPE_GEOMETRY] = { 1255 .cccb_size = CTX_GEOM_CCCB_SIZE_LOG2, 1256 .name = "geometry", 1257 }, 1258 [DRM_PVR_JOB_TYPE_FRAGMENT] = { 1259 .cccb_size = CTX_FRAG_CCCB_SIZE_LOG2, 1260 .name = "fragment" 1261 }, 1262 [DRM_PVR_JOB_TYPE_COMPUTE] = { 1263 .cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2, 1264 .name = "compute" 1265 }, 1266 [DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = { 1267 .cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2, 1268 .name = "transfer_frag" 1269 }, 1270 }; 1271 struct pvr_device *pvr_dev = ctx->pvr_dev; 1272 const struct drm_sched_init_args sched_args = { 1273 .ops = &pvr_queue_sched_ops, 1274 .submit_wq = pvr_dev->sched_wq, 1275 .credit_limit = 64 * 1024, 1276 .hang_limit = 1, 1277 .timeout = msecs_to_jiffies(500), 1278 .timeout_wq = pvr_dev->sched_wq, 1279 .name = "pvr-queue", 1280 .dev = pvr_dev->base.dev, 1281 }; 1282 struct drm_gpu_scheduler *sched; 1283 struct pvr_queue *queue; 1284 int ctx_state_size, err; 1285 void *cpu_map; 1286 1287 if (WARN_ON(type >= sizeof(props))) 1288 return ERR_PTR(-EINVAL); 1289 1290 switch (ctx->type) { 1291 case DRM_PVR_CTX_TYPE_RENDER: 1292 if (type != DRM_PVR_JOB_TYPE_GEOMETRY && 1293 type != DRM_PVR_JOB_TYPE_FRAGMENT) 1294 return ERR_PTR(-EINVAL); 1295 break; 1296 case DRM_PVR_CTX_TYPE_COMPUTE: 1297 if (type != DRM_PVR_JOB_TYPE_COMPUTE) 1298 return ERR_PTR(-EINVAL); 1299 break; 1300 case DRM_PVR_CTX_TYPE_TRANSFER_FRAG: 1301 if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG) 1302 return ERR_PTR(-EINVAL); 1303 break; 1304 default: 1305 return ERR_PTR(-EINVAL); 1306 } 1307 1308 ctx_state_size = get_ctx_state_size(pvr_dev, type); 1309 if (ctx_state_size < 0) 1310 return ERR_PTR(ctx_state_size); 1311 1312 queue = kzalloc_obj(*queue); 1313 if (!queue) 1314 return ERR_PTR(-ENOMEM); 1315 1316 queue->type = type; 1317 queue->ctx_offset = get_ctx_offset(type); 1318 queue->ctx = ctx; 1319 queue->callstack_addr = args->callstack_addr; 1320 sched = &queue->scheduler; 1321 INIT_LIST_HEAD(&queue->node); 1322 mutex_init(&queue->cccb_fence_ctx.job_lock); 1323 pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base); 1324 pvr_queue_fence_ctx_init(&queue->job_fence_ctx); 1325 1326 err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name); 1327 if (err) 1328 goto err_free_queue; 1329 1330 err = pvr_fw_object_create(pvr_dev, ctx_state_size, 1331 PVR_BO_FW_FLAGS_DEVICE_UNCACHED, 1332 reg_state_init, queue, &queue->reg_state_obj); 1333 if (err) 1334 goto err_cccb_fini; 1335 1336 init_fw_context(queue, fw_ctx_map); 1337 1338 if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT && 1339 args->callstack_addr) { 1340 err = -EINVAL; 1341 goto err_release_reg_state; 1342 } 1343 1344 cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value), 1345 PVR_BO_FW_FLAGS_DEVICE_UNCACHED, 1346 NULL, NULL, &queue->timeline_ufo.fw_obj); 1347 if (IS_ERR(cpu_map)) { 1348 err = PTR_ERR(cpu_map); 1349 goto err_release_reg_state; 1350 } 1351 1352 queue->timeline_ufo.value = cpu_map; 1353 1354 err = drm_sched_init(&queue->scheduler, &sched_args); 1355 if (err) 1356 goto err_release_ufo; 1357 1358 err = drm_sched_entity_init(&queue->entity, 1359 DRM_SCHED_PRIORITY_KERNEL, 1360 &sched, 1, &ctx->faulty); 1361 if (err) 1362 goto err_sched_fini; 1363 1364 mutex_lock(&pvr_dev->queues.lock); 1365 list_add_tail(&queue->node, &pvr_dev->queues.idle); 1366 mutex_unlock(&pvr_dev->queues.lock); 1367 1368 return queue; 1369 1370 err_sched_fini: 1371 drm_sched_fini(&queue->scheduler); 1372 1373 err_release_ufo: 1374 pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj); 1375 1376 err_release_reg_state: 1377 pvr_fw_object_destroy(queue->reg_state_obj); 1378 1379 err_cccb_fini: 1380 pvr_cccb_fini(&queue->cccb); 1381 1382 err_free_queue: 1383 mutex_destroy(&queue->cccb_fence_ctx.job_lock); 1384 kfree(queue); 1385 1386 return ERR_PTR(err); 1387 } 1388 1389 void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev) 1390 { 1391 struct pvr_queue *queue; 1392 1393 mutex_lock(&pvr_dev->queues.lock); 1394 list_for_each_entry(queue, &pvr_dev->queues.idle, node) 1395 pvr_queue_stop(queue, NULL); 1396 list_for_each_entry(queue, &pvr_dev->queues.active, node) 1397 pvr_queue_stop(queue, NULL); 1398 mutex_unlock(&pvr_dev->queues.lock); 1399 } 1400 1401 void pvr_queue_device_post_reset(struct pvr_device *pvr_dev) 1402 { 1403 struct pvr_queue *queue; 1404 1405 mutex_lock(&pvr_dev->queues.lock); 1406 list_for_each_entry(queue, &pvr_dev->queues.active, node) 1407 pvr_queue_start(queue); 1408 list_for_each_entry(queue, &pvr_dev->queues.idle, node) 1409 pvr_queue_start(queue); 1410 mutex_unlock(&pvr_dev->queues.lock); 1411 } 1412 1413 /** 1414 * pvr_queue_kill() - Kill a queue. 1415 * @queue: The queue to kill. 1416 * 1417 * Kill the queue so no new jobs can be pushed. Should be called when the 1418 * context handle is destroyed. The queue object might last longer if jobs 1419 * are still in flight and holding a reference to the context this queue 1420 * belongs to. 1421 */ 1422 void pvr_queue_kill(struct pvr_queue *queue) 1423 { 1424 drm_sched_entity_destroy(&queue->entity); 1425 dma_fence_put(queue->last_queued_job_scheduled_fence); 1426 queue->last_queued_job_scheduled_fence = NULL; 1427 } 1428 1429 /** 1430 * pvr_queue_destroy() - Destroy a queue. 1431 * @queue: The queue to destroy. 1432 * 1433 * Cleanup the queue and free the resources attached to it. Should be 1434 * called from the context release function. 1435 */ 1436 void pvr_queue_destroy(struct pvr_queue *queue) 1437 { 1438 if (!queue) 1439 return; 1440 1441 mutex_lock(&queue->ctx->pvr_dev->queues.lock); 1442 list_del_init(&queue->node); 1443 mutex_unlock(&queue->ctx->pvr_dev->queues.lock); 1444 1445 drm_sched_fini(&queue->scheduler); 1446 drm_sched_entity_fini(&queue->entity); 1447 1448 if (WARN_ON(queue->last_queued_job_scheduled_fence)) 1449 dma_fence_put(queue->last_queued_job_scheduled_fence); 1450 1451 pvr_queue_cleanup_fw_context(queue); 1452 1453 pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj); 1454 pvr_fw_object_destroy(queue->reg_state_obj); 1455 pvr_cccb_fini(&queue->cccb); 1456 mutex_destroy(&queue->cccb_fence_ctx.job_lock); 1457 kfree(queue); 1458 } 1459 1460 /** 1461 * pvr_queue_device_init() - Device-level initialization of queue related fields. 1462 * @pvr_dev: The device to initialize. 1463 * 1464 * Initializes all fields related to queue management in pvr_device. 1465 * 1466 * Return: 1467 * * 0 on success, or 1468 * * An error code on failure. 1469 */ 1470 int pvr_queue_device_init(struct pvr_device *pvr_dev) 1471 { 1472 int err; 1473 1474 INIT_LIST_HEAD(&pvr_dev->queues.active); 1475 INIT_LIST_HEAD(&pvr_dev->queues.idle); 1476 err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock); 1477 if (err) 1478 return err; 1479 1480 pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0); 1481 if (!pvr_dev->sched_wq) 1482 return -ENOMEM; 1483 1484 return 0; 1485 } 1486 1487 /** 1488 * pvr_queue_device_fini() - Device-level cleanup of queue related fields. 1489 * @pvr_dev: The device to cleanup. 1490 * 1491 * Cleanup/free all queue-related resources attached to a pvr_device object. 1492 */ 1493 void pvr_queue_device_fini(struct pvr_device *pvr_dev) 1494 { 1495 destroy_workqueue(pvr_dev->sched_wq); 1496 } 1497