1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* Copyright (c) 2023 Imagination Technologies Ltd. */ 3 4 #include <drm/drm_managed.h> 5 #include <drm/gpu_scheduler.h> 6 7 #include "pvr_cccb.h" 8 #include "pvr_context.h" 9 #include "pvr_device.h" 10 #include "pvr_drv.h" 11 #include "pvr_job.h" 12 #include "pvr_queue.h" 13 #include "pvr_vm.h" 14 15 #include "pvr_rogue_fwif_client.h" 16 17 #define MAX_DEADLINE_MS 30000 18 19 #define CTX_COMPUTE_CCCB_SIZE_LOG2 15 20 #define CTX_FRAG_CCCB_SIZE_LOG2 15 21 #define CTX_GEOM_CCCB_SIZE_LOG2 15 22 #define CTX_TRANSFER_CCCB_SIZE_LOG2 15 23 24 static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev) 25 { 26 u32 num_isp_store_registers; 27 28 if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) { 29 num_isp_store_registers = 1; 30 } else { 31 int err; 32 33 err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers); 34 if (WARN_ON(err)) 35 return err; 36 } 37 38 return sizeof(struct rogue_fwif_frag_ctx_state) + 39 (num_isp_store_registers * 40 sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0])); 41 } 42 43 static int get_frag_ctx_state_size(struct pvr_device *pvr_dev) 44 { 45 u32 num_isp_store_registers; 46 int err; 47 48 if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) { 49 err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers); 50 if (WARN_ON(err)) 51 return err; 52 53 if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) { 54 u32 xpu_max_slaves; 55 56 err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves); 57 if (WARN_ON(err)) 58 return err; 59 60 num_isp_store_registers *= (1 + xpu_max_slaves); 61 } 62 } else { 63 err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers); 64 if (WARN_ON(err)) 65 return err; 66 } 67 68 return sizeof(struct rogue_fwif_frag_ctx_state) + 69 (num_isp_store_registers * 70 sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0])); 71 } 72 73 static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type) 74 { 75 switch (type) { 76 case DRM_PVR_JOB_TYPE_GEOMETRY: 77 return sizeof(struct rogue_fwif_geom_ctx_state); 78 case DRM_PVR_JOB_TYPE_FRAGMENT: 79 return get_frag_ctx_state_size(pvr_dev); 80 case DRM_PVR_JOB_TYPE_COMPUTE: 81 return sizeof(struct rogue_fwif_compute_ctx_state); 82 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 83 return get_xfer_ctx_state_size(pvr_dev); 84 } 85 86 WARN(1, "Invalid queue type"); 87 return -EINVAL; 88 } 89 90 static u32 get_ctx_offset(enum drm_pvr_job_type type) 91 { 92 switch (type) { 93 case DRM_PVR_JOB_TYPE_GEOMETRY: 94 return offsetof(struct rogue_fwif_fwrendercontext, geom_context); 95 case DRM_PVR_JOB_TYPE_FRAGMENT: 96 return offsetof(struct rogue_fwif_fwrendercontext, frag_context); 97 case DRM_PVR_JOB_TYPE_COMPUTE: 98 return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context); 99 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 100 return offsetof(struct rogue_fwif_fwtransfercontext, tq_context); 101 } 102 103 return 0; 104 } 105 106 static const char * 107 pvr_queue_fence_get_driver_name(struct dma_fence *f) 108 { 109 return PVR_DRIVER_NAME; 110 } 111 112 static void pvr_queue_fence_release(struct dma_fence *f) 113 { 114 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 115 116 pvr_context_put(fence->queue->ctx); 117 dma_fence_free(f); 118 } 119 120 static const char * 121 pvr_queue_job_fence_get_timeline_name(struct dma_fence *f) 122 { 123 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 124 125 switch (fence->queue->type) { 126 case DRM_PVR_JOB_TYPE_GEOMETRY: 127 return "geometry"; 128 129 case DRM_PVR_JOB_TYPE_FRAGMENT: 130 return "fragment"; 131 132 case DRM_PVR_JOB_TYPE_COMPUTE: 133 return "compute"; 134 135 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 136 return "transfer"; 137 } 138 139 WARN(1, "Invalid queue type"); 140 return "invalid"; 141 } 142 143 static const char * 144 pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f) 145 { 146 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 147 148 switch (fence->queue->type) { 149 case DRM_PVR_JOB_TYPE_GEOMETRY: 150 return "geometry-cccb"; 151 152 case DRM_PVR_JOB_TYPE_FRAGMENT: 153 return "fragment-cccb"; 154 155 case DRM_PVR_JOB_TYPE_COMPUTE: 156 return "compute-cccb"; 157 158 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 159 return "transfer-cccb"; 160 } 161 162 WARN(1, "Invalid queue type"); 163 return "invalid"; 164 } 165 166 static const struct dma_fence_ops pvr_queue_job_fence_ops = { 167 .get_driver_name = pvr_queue_fence_get_driver_name, 168 .get_timeline_name = pvr_queue_job_fence_get_timeline_name, 169 .release = pvr_queue_fence_release, 170 }; 171 172 /** 173 * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is 174 * backed by a UFO. 175 * @f: The dma_fence to turn into a pvr_queue_fence. 176 * 177 * Return: 178 * * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or 179 * * NULL otherwise. 180 */ 181 static struct pvr_queue_fence * 182 to_pvr_queue_job_fence(struct dma_fence *f) 183 { 184 struct drm_sched_fence *sched_fence = to_drm_sched_fence(f); 185 186 if (sched_fence) 187 f = sched_fence->parent; 188 189 if (f && f->ops == &pvr_queue_job_fence_ops) 190 return container_of(f, struct pvr_queue_fence, base); 191 192 return NULL; 193 } 194 195 static const struct dma_fence_ops pvr_queue_cccb_fence_ops = { 196 .get_driver_name = pvr_queue_fence_get_driver_name, 197 .get_timeline_name = pvr_queue_cccb_fence_get_timeline_name, 198 .release = pvr_queue_fence_release, 199 }; 200 201 /** 202 * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects. 203 * @f: The dma_fence object to put. 204 * 205 * If the pvr_queue_fence has been initialized, we call dma_fence_put(), 206 * otherwise we free the object with dma_fence_free(). This allows us 207 * to do the right thing before and after pvr_queue_fence_init() had been 208 * called. 209 */ 210 static void pvr_queue_fence_put(struct dma_fence *f) 211 { 212 if (!f) 213 return; 214 215 if (WARN_ON(f->ops && 216 f->ops != &pvr_queue_cccb_fence_ops && 217 f->ops != &pvr_queue_job_fence_ops)) 218 return; 219 220 /* If the fence hasn't been initialized yet, free the object directly. */ 221 if (f->ops) 222 dma_fence_put(f); 223 else 224 dma_fence_free(f); 225 } 226 227 /** 228 * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object 229 * 230 * Call this function to allocate job CCCB and done fences. This only 231 * allocates the objects. Initialization happens when the underlying 232 * dma_fence object is to be returned to drm_sched (in prepare_job() or 233 * run_job()). 234 * 235 * Return: 236 * * A valid pointer if the allocation succeeds, or 237 * * NULL if the allocation fails. 238 */ 239 static struct dma_fence * 240 pvr_queue_fence_alloc(void) 241 { 242 struct pvr_queue_fence *fence; 243 244 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 245 if (!fence) 246 return NULL; 247 248 return &fence->base; 249 } 250 251 /** 252 * pvr_queue_fence_init() - Initializes a pvr_queue_fence object. 253 * @f: The fence to initialize 254 * @queue: The queue this fence belongs to. 255 * @fence_ops: The fence operations. 256 * @fence_ctx: The fence context. 257 * 258 * Wrapper around dma_fence_init() that takes care of initializing the 259 * pvr_queue_fence::queue field too. 260 */ 261 static void 262 pvr_queue_fence_init(struct dma_fence *f, 263 struct pvr_queue *queue, 264 const struct dma_fence_ops *fence_ops, 265 struct pvr_queue_fence_ctx *fence_ctx) 266 { 267 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 268 269 pvr_context_get(queue->ctx); 270 fence->queue = queue; 271 dma_fence_init(&fence->base, fence_ops, 272 &fence_ctx->lock, fence_ctx->id, 273 atomic_inc_return(&fence_ctx->seqno)); 274 } 275 276 /** 277 * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object. 278 * @fence: The fence to initialize. 279 * @queue: The queue this fence belongs to. 280 * 281 * Initializes a fence that can be used to wait for CCCB space. 282 * 283 * Should be called in the ::prepare_job() path, so the fence returned to 284 * drm_sched is valid. 285 */ 286 static void 287 pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue) 288 { 289 pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops, 290 &queue->cccb_fence_ctx.base); 291 } 292 293 /** 294 * pvr_queue_job_fence_init() - Initializes a job done fence object. 295 * @fence: The fence to initialize. 296 * @queue: The queue this fence belongs to. 297 * 298 * Initializes a fence that will be signaled when the GPU is done executing 299 * a job. 300 * 301 * Should be called *before* the ::run_job() path, so the fence is initialised 302 * before being placed in the pending_list. 303 */ 304 static void 305 pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue) 306 { 307 pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, 308 &queue->job_fence_ctx); 309 } 310 311 /** 312 * pvr_queue_fence_ctx_init() - Queue fence context initialization. 313 * @fence_ctx: The context to initialize 314 */ 315 static void 316 pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx) 317 { 318 spin_lock_init(&fence_ctx->lock); 319 fence_ctx->id = dma_fence_context_alloc(1); 320 atomic_set(&fence_ctx->seqno, 0); 321 } 322 323 static u32 ufo_cmds_size(u32 elem_count) 324 { 325 /* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */ 326 u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS; 327 u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS; 328 u32 size = full_cmd_count * 329 pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS * 330 sizeof(struct rogue_fwif_ufo)); 331 332 if (remaining_elems) { 333 size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems * 334 sizeof(struct rogue_fwif_ufo)); 335 } 336 337 return size; 338 } 339 340 static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count) 341 { 342 /* One UFO cmd for the fence signaling, one UFO cmd per native fence native, 343 * and a command for the job itself. 344 */ 345 return ufo_cmds_size(1) + ufo_cmds_size(ufo_wait_count) + 346 pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len); 347 } 348 349 /** 350 * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies. 351 * @job: Job to operate on. 352 * 353 * Returns: Number of non-signaled native deps remaining. 354 */ 355 static unsigned long job_count_remaining_native_deps(struct pvr_job *job) 356 { 357 unsigned long remaining_count = 0; 358 struct dma_fence *fence = NULL; 359 unsigned long index; 360 361 xa_for_each(&job->base.dependencies, index, fence) { 362 struct pvr_queue_fence *jfence; 363 364 jfence = to_pvr_queue_job_fence(fence); 365 if (!jfence) 366 continue; 367 368 if (!dma_fence_is_signaled(&jfence->base)) 369 remaining_count++; 370 } 371 372 return remaining_count; 373 } 374 375 /** 376 * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job. 377 * @queue: The queue this job will be submitted to. 378 * @job: The job to get the CCCB fence on. 379 * 380 * The CCCB fence is a synchronization primitive allowing us to delay job 381 * submission until there's enough space in the CCCB to submit the job. 382 * 383 * Return: 384 * * NULL if there's enough space in the CCCB to submit this job, or 385 * * A valid dma_fence object otherwise. 386 */ 387 static struct dma_fence * 388 pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job) 389 { 390 struct pvr_queue_fence *cccb_fence; 391 unsigned int native_deps_remaining; 392 393 /* If the fence is NULL, that means we already checked that we had 394 * enough space in the cccb for our job. 395 */ 396 if (!job->cccb_fence) 397 return NULL; 398 399 mutex_lock(&queue->cccb_fence_ctx.job_lock); 400 401 /* Count remaining native dependencies and check if the job fits in the CCCB. */ 402 native_deps_remaining = job_count_remaining_native_deps(job); 403 if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) { 404 pvr_queue_fence_put(job->cccb_fence); 405 job->cccb_fence = NULL; 406 goto out_unlock; 407 } 408 409 /* There should be no job attached to the CCCB fence context: 410 * drm_sched_entity guarantees that jobs are submitted one at a time. 411 */ 412 if (WARN_ON(queue->cccb_fence_ctx.job)) 413 pvr_job_put(queue->cccb_fence_ctx.job); 414 415 queue->cccb_fence_ctx.job = pvr_job_get(job); 416 417 /* Initialize the fence before returning it. */ 418 cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base); 419 if (!WARN_ON(cccb_fence->queue)) 420 pvr_queue_cccb_fence_init(job->cccb_fence, queue); 421 422 out_unlock: 423 mutex_unlock(&queue->cccb_fence_ctx.job_lock); 424 425 return dma_fence_get(job->cccb_fence); 426 } 427 428 /** 429 * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job. 430 * @queue: The queue this job will be submitted to. 431 * @job: The job to get the KCCB fence on. 432 * 433 * The KCCB fence is a synchronization primitive allowing us to delay job 434 * submission until there's enough space in the KCCB to submit the job. 435 * 436 * Return: 437 * * NULL if there's enough space in the KCCB to submit this job, or 438 * * A valid dma_fence object otherwise. 439 */ 440 static struct dma_fence * 441 pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job) 442 { 443 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 444 struct dma_fence *kccb_fence = NULL; 445 446 /* If the fence is NULL, that means we already checked that we had 447 * enough space in the KCCB for our job. 448 */ 449 if (!job->kccb_fence) 450 return NULL; 451 452 if (!WARN_ON(job->kccb_fence->ops)) { 453 kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence); 454 job->kccb_fence = NULL; 455 } 456 457 return kccb_fence; 458 } 459 460 static struct dma_fence * 461 pvr_queue_get_paired_frag_job_dep(struct pvr_queue *queue, struct pvr_job *job) 462 { 463 struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ? 464 job->paired_job : NULL; 465 struct dma_fence *f; 466 unsigned long index; 467 468 if (!frag_job) 469 return NULL; 470 471 xa_for_each(&frag_job->base.dependencies, index, f) { 472 /* Skip already signaled fences. */ 473 if (dma_fence_is_signaled(f)) 474 continue; 475 476 /* Skip our own fence. */ 477 if (f == &job->base.s_fence->scheduled) 478 continue; 479 480 return dma_fence_get(f); 481 } 482 483 return frag_job->base.sched->ops->prepare_job(&frag_job->base, &queue->entity); 484 } 485 486 /** 487 * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence. 488 * @sched_job: The job to query the next internal dependency on 489 * @s_entity: The entity this job is queue on. 490 * 491 * After iterating over drm_sched_job::dependencies, drm_sched let the driver return 492 * its own internal dependencies. We use this function to return our internal dependencies. 493 */ 494 static struct dma_fence * 495 pvr_queue_prepare_job(struct drm_sched_job *sched_job, 496 struct drm_sched_entity *s_entity) 497 { 498 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 499 struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity); 500 struct dma_fence *internal_dep = NULL; 501 502 /* 503 * Initialize the done_fence, so we can signal it. This must be done 504 * here because otherwise by the time of run_job() the job will end up 505 * in the pending list without a valid fence. 506 */ 507 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) { 508 /* 509 * This will be called on a paired fragment job after being 510 * submitted to firmware. We can tell if this is the case and 511 * bail early from whether run_job() has been called on the 512 * geometry job, which would issue a pm ref. 513 */ 514 if (job->paired_job->has_pm_ref) 515 return NULL; 516 517 /* 518 * In this case we need to use the job's own ctx to initialise 519 * the done_fence. The other steps are done in the ctx of the 520 * paired geometry job. 521 */ 522 pvr_queue_job_fence_init(job->done_fence, 523 job->ctx->queues.fragment); 524 } else { 525 pvr_queue_job_fence_init(job->done_fence, queue); 526 } 527 528 /* CCCB fence is used to make sure we have enough space in the CCCB to 529 * submit our commands. 530 */ 531 internal_dep = pvr_queue_get_job_cccb_fence(queue, job); 532 533 /* KCCB fence is used to make sure we have a KCCB slot to queue our 534 * CMD_KICK. 535 */ 536 if (!internal_dep) 537 internal_dep = pvr_queue_get_job_kccb_fence(queue, job); 538 539 /* Any extra internal dependency should be added here, using the following 540 * pattern: 541 * 542 * if (!internal_dep) 543 * internal_dep = pvr_queue_get_job_xxxx_fence(queue, job); 544 */ 545 546 /* The paired job fence should come last, when everything else is ready. */ 547 if (!internal_dep) 548 internal_dep = pvr_queue_get_paired_frag_job_dep(queue, job); 549 550 return internal_dep; 551 } 552 553 /** 554 * pvr_queue_update_active_state_locked() - Update the queue active state. 555 * @queue: Queue to update the state on. 556 * 557 * Locked version of pvr_queue_update_active_state(). Must be called with 558 * pvr_device::queue::lock held. 559 */ 560 static void pvr_queue_update_active_state_locked(struct pvr_queue *queue) 561 { 562 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 563 564 lockdep_assert_held(&pvr_dev->queues.lock); 565 566 /* The queue is temporary out of any list when it's being reset, 567 * we don't want a call to pvr_queue_update_active_state_locked() 568 * to re-insert it behind our back. 569 */ 570 if (list_empty(&queue->node)) 571 return; 572 573 if (!atomic_read(&queue->in_flight_job_count)) 574 list_move_tail(&queue->node, &pvr_dev->queues.idle); 575 else 576 list_move_tail(&queue->node, &pvr_dev->queues.active); 577 } 578 579 /** 580 * pvr_queue_update_active_state() - Update the queue active state. 581 * @queue: Queue to update the state on. 582 * 583 * Active state is based on the in_flight_job_count value. 584 * 585 * Updating the active state implies moving the queue in or out of the 586 * active queue list, which also defines whether the queue is checked 587 * or not when a FW event is received. 588 * 589 * This function should be called any time a job is submitted or it done 590 * fence is signaled. 591 */ 592 static void pvr_queue_update_active_state(struct pvr_queue *queue) 593 { 594 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 595 596 mutex_lock(&pvr_dev->queues.lock); 597 pvr_queue_update_active_state_locked(queue); 598 mutex_unlock(&pvr_dev->queues.lock); 599 } 600 601 static void pvr_queue_submit_job_to_cccb(struct pvr_job *job) 602 { 603 struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler); 604 struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS]; 605 struct pvr_cccb *cccb = &queue->cccb; 606 struct pvr_queue_fence *jfence; 607 struct dma_fence *fence; 608 unsigned long index; 609 u32 ufo_count = 0; 610 611 /* We need to add the queue to the active list before updating the CCCB, 612 * otherwise we might miss the FW event informing us that something 613 * happened on this queue. 614 */ 615 atomic_inc(&queue->in_flight_job_count); 616 pvr_queue_update_active_state(queue); 617 618 xa_for_each(&job->base.dependencies, index, fence) { 619 jfence = to_pvr_queue_job_fence(fence); 620 if (!jfence) 621 continue; 622 623 /* Skip the partial render fence, we will place it at the end. */ 624 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job && 625 &job->paired_job->base.s_fence->scheduled == fence) 626 continue; 627 628 if (dma_fence_is_signaled(&jfence->base)) 629 continue; 630 631 pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj, 632 &ufos[ufo_count].addr); 633 ufos[ufo_count++].value = jfence->base.seqno; 634 635 if (ufo_count == ARRAY_SIZE(ufos)) { 636 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR, 637 sizeof(ufos), ufos, 0, 0); 638 ufo_count = 0; 639 } 640 } 641 642 /* Partial render fence goes last. */ 643 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) { 644 jfence = to_pvr_queue_job_fence(job->paired_job->done_fence); 645 if (!WARN_ON(!jfence)) { 646 pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj, 647 &ufos[ufo_count].addr); 648 ufos[ufo_count++].value = job->paired_job->done_fence->seqno; 649 } 650 } 651 652 if (ufo_count) { 653 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR, 654 sizeof(ufos[0]) * ufo_count, ufos, 0, 0); 655 } 656 657 if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) { 658 struct rogue_fwif_cmd_geom *cmd = job->cmd; 659 660 /* Reference value for the partial render test is the current queue fence 661 * seqno minus one. 662 */ 663 pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, 664 &cmd->partial_render_geom_frag_fence.addr); 665 cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1; 666 } 667 668 /* Submit job to FW */ 669 pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd, 670 job->id, job->id); 671 672 /* Signal the job fence. */ 673 pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr); 674 ufos[0].value = job->done_fence->seqno; 675 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE, 676 sizeof(ufos[0]), ufos, 0, 0); 677 } 678 679 /** 680 * pvr_queue_run_job() - Submit a job to the FW. 681 * @sched_job: The job to submit. 682 * 683 * This function is called when all non-native dependencies have been met and 684 * when the commands resulting from this job are guaranteed to fit in the CCCB. 685 */ 686 static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job) 687 { 688 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 689 struct pvr_device *pvr_dev = job->pvr_dev; 690 int err; 691 692 /* The fragment job is issued along the geometry job when we use combined 693 * geom+frag kicks. When we get there, we should simply return the 694 * done_fence that's been initialized earlier. 695 */ 696 if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT && 697 job->done_fence->ops) { 698 return dma_fence_get(job->done_fence); 699 } 700 701 /* The only kind of jobs that can be paired are geometry and fragment, and 702 * we bail out early if we see a fragment job that's paired with a geomtry 703 * job. 704 * Paired jobs must also target the same context and point to the same 705 * HWRT. 706 */ 707 if (WARN_ON(job->paired_job && 708 (job->type != DRM_PVR_JOB_TYPE_GEOMETRY || 709 job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT || 710 job->hwrt != job->paired_job->hwrt || 711 job->ctx != job->paired_job->ctx))) 712 return ERR_PTR(-EINVAL); 713 714 err = pvr_job_get_pm_ref(job); 715 if (WARN_ON(err)) 716 return ERR_PTR(err); 717 718 if (job->paired_job) { 719 err = pvr_job_get_pm_ref(job->paired_job); 720 if (WARN_ON(err)) 721 return ERR_PTR(err); 722 } 723 724 /* Submit our job to the CCCB */ 725 pvr_queue_submit_job_to_cccb(job); 726 727 if (job->paired_job) { 728 struct pvr_job *geom_job = job; 729 struct pvr_job *frag_job = job->paired_job; 730 struct pvr_queue *geom_queue = job->ctx->queues.geometry; 731 struct pvr_queue *frag_queue = job->ctx->queues.fragment; 732 733 /* Submit the fragment job along the geometry job and send a combined kick. */ 734 pvr_queue_submit_job_to_cccb(frag_job); 735 pvr_cccb_send_kccb_combined_kick(pvr_dev, 736 &geom_queue->cccb, &frag_queue->cccb, 737 pvr_context_get_fw_addr(geom_job->ctx) + 738 geom_queue->ctx_offset, 739 pvr_context_get_fw_addr(frag_job->ctx) + 740 frag_queue->ctx_offset, 741 job->hwrt, 742 frag_job->fw_ccb_cmd_type == 743 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR); 744 } else { 745 struct pvr_queue *queue = container_of(job->base.sched, 746 struct pvr_queue, scheduler); 747 748 pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb, 749 pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset, 750 job->hwrt); 751 } 752 753 return dma_fence_get(job->done_fence); 754 } 755 756 static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job) 757 { 758 drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL); 759 } 760 761 static void pvr_queue_start(struct pvr_queue *queue) 762 { 763 struct pvr_job *job; 764 765 /* Make sure we CPU-signal the UFO object, so other queues don't get 766 * blocked waiting on it. 767 */ 768 *queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno); 769 770 list_for_each_entry(job, &queue->scheduler.pending_list, base.list) { 771 if (dma_fence_is_signaled(job->done_fence)) { 772 /* Jobs might have completed after drm_sched_stop() was called. 773 * In that case, re-assign the parent field to the done_fence. 774 */ 775 WARN_ON(job->base.s_fence->parent); 776 job->base.s_fence->parent = dma_fence_get(job->done_fence); 777 } else { 778 /* If we had unfinished jobs, flag the entity as guilty so no 779 * new job can be submitted. 780 */ 781 atomic_set(&queue->ctx->faulty, 1); 782 } 783 } 784 785 drm_sched_start(&queue->scheduler, true); 786 } 787 788 /** 789 * pvr_queue_timedout_job() - Handle a job timeout event. 790 * @s_job: The job this timeout occurred on. 791 * 792 * FIXME: We don't do anything here to unblock the situation, we just stop+start 793 * the scheduler, and re-assign parent fences in the middle. 794 * 795 * Return: 796 * * DRM_GPU_SCHED_STAT_NOMINAL. 797 */ 798 static enum drm_gpu_sched_stat 799 pvr_queue_timedout_job(struct drm_sched_job *s_job) 800 { 801 struct drm_gpu_scheduler *sched = s_job->sched; 802 struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler); 803 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 804 struct pvr_job *job; 805 u32 job_count = 0; 806 807 dev_err(sched->dev, "Job timeout\n"); 808 809 /* Before we stop the scheduler, make sure the queue is out of any list, so 810 * any call to pvr_queue_update_active_state_locked() that might happen 811 * until the scheduler is really stopped doesn't end up re-inserting the 812 * queue in the active list. This would cause 813 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each 814 * other when accessing the pending_list, since drm_sched_stop() doesn't 815 * grab the job_list_lock when modifying the list (it's assuming the 816 * only other accessor is the scheduler, and it's safe to not grab the 817 * lock since it's stopped). 818 */ 819 mutex_lock(&pvr_dev->queues.lock); 820 list_del_init(&queue->node); 821 mutex_unlock(&pvr_dev->queues.lock); 822 823 drm_sched_stop(sched, s_job); 824 825 /* Re-assign job parent fences. */ 826 list_for_each_entry(job, &sched->pending_list, base.list) { 827 job->base.s_fence->parent = dma_fence_get(job->done_fence); 828 job_count++; 829 } 830 WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count); 831 832 /* Re-insert the queue in the proper list, and kick a queue processing 833 * operation if there were jobs pending. 834 */ 835 mutex_lock(&pvr_dev->queues.lock); 836 if (!job_count) { 837 list_move_tail(&queue->node, &pvr_dev->queues.idle); 838 } else { 839 atomic_set(&queue->in_flight_job_count, job_count); 840 list_move_tail(&queue->node, &pvr_dev->queues.active); 841 pvr_queue_process(queue); 842 } 843 mutex_unlock(&pvr_dev->queues.lock); 844 845 drm_sched_start(sched, true); 846 847 return DRM_GPU_SCHED_STAT_NOMINAL; 848 } 849 850 /** 851 * pvr_queue_free_job() - Release the reference the scheduler had on a job object. 852 * @sched_job: Job object to free. 853 */ 854 static void pvr_queue_free_job(struct drm_sched_job *sched_job) 855 { 856 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 857 858 drm_sched_job_cleanup(sched_job); 859 job->paired_job = NULL; 860 pvr_job_put(job); 861 } 862 863 static const struct drm_sched_backend_ops pvr_queue_sched_ops = { 864 .prepare_job = pvr_queue_prepare_job, 865 .run_job = pvr_queue_run_job, 866 .timedout_job = pvr_queue_timedout_job, 867 .free_job = pvr_queue_free_job, 868 }; 869 870 /** 871 * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO object 872 * @f: Fence to test. 873 * 874 * A UFO-backed fence is a fence that can be signaled or waited upon FW-side. 875 * pvr_job::done_fence objects are backed by the timeline UFO attached to the queue 876 * they are pushed to, but those fences are not directly exposed to the outside 877 * world, so we also need to check if the fence we're being passed is a 878 * drm_sched_fence that was coming from our driver. 879 */ 880 bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f) 881 { 882 struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL; 883 884 if (sched_fence && 885 sched_fence->sched->ops == &pvr_queue_sched_ops) 886 return true; 887 888 if (f && f->ops == &pvr_queue_job_fence_ops) 889 return true; 890 891 return false; 892 } 893 894 /** 895 * pvr_queue_signal_done_fences() - Signal done fences. 896 * @queue: Queue to check. 897 * 898 * Signal done fences of jobs whose seqno is less than the current value of 899 * the UFO object attached to the queue. 900 */ 901 static void 902 pvr_queue_signal_done_fences(struct pvr_queue *queue) 903 { 904 struct pvr_job *job, *tmp_job; 905 u32 cur_seqno; 906 907 spin_lock(&queue->scheduler.job_list_lock); 908 cur_seqno = *queue->timeline_ufo.value; 909 list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) { 910 if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0) 911 break; 912 913 if (!dma_fence_is_signaled(job->done_fence)) { 914 dma_fence_signal(job->done_fence); 915 pvr_job_release_pm_ref(job); 916 atomic_dec(&queue->in_flight_job_count); 917 } 918 } 919 spin_unlock(&queue->scheduler.job_list_lock); 920 } 921 922 /** 923 * pvr_queue_check_job_waiting_for_cccb_space() - Check if the job waiting for CCCB space 924 * can be unblocked 925 * pushed to the CCCB 926 * @queue: Queue to check 927 * 928 * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal 929 * its CCCB fence, which should kick drm_sched. 930 */ 931 static void 932 pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue) 933 { 934 struct pvr_queue_fence *cccb_fence; 935 u32 native_deps_remaining; 936 struct pvr_job *job; 937 938 mutex_lock(&queue->cccb_fence_ctx.job_lock); 939 job = queue->cccb_fence_ctx.job; 940 if (!job) 941 goto out_unlock; 942 943 /* If we have a job attached to the CCCB fence context, its CCCB fence 944 * shouldn't be NULL. 945 */ 946 if (WARN_ON(!job->cccb_fence)) { 947 job = NULL; 948 goto out_unlock; 949 } 950 951 /* If we get there, CCCB fence has to be initialized. */ 952 cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base); 953 if (WARN_ON(!cccb_fence->queue)) { 954 job = NULL; 955 goto out_unlock; 956 } 957 958 /* Evict signaled dependencies before checking for CCCB space. 959 * If the job fits, signal the CCCB fence, this should unblock 960 * the drm_sched_entity. 961 */ 962 native_deps_remaining = job_count_remaining_native_deps(job); 963 if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) { 964 job = NULL; 965 goto out_unlock; 966 } 967 968 dma_fence_signal(job->cccb_fence); 969 pvr_queue_fence_put(job->cccb_fence); 970 job->cccb_fence = NULL; 971 queue->cccb_fence_ctx.job = NULL; 972 973 out_unlock: 974 mutex_unlock(&queue->cccb_fence_ctx.job_lock); 975 976 pvr_job_put(job); 977 } 978 979 /** 980 * pvr_queue_process() - Process events that happened on a queue. 981 * @queue: Queue to check 982 * 983 * Signal job fences and check if jobs waiting for CCCB space can be unblocked. 984 */ 985 void pvr_queue_process(struct pvr_queue *queue) 986 { 987 lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock); 988 989 pvr_queue_check_job_waiting_for_cccb_space(queue); 990 pvr_queue_signal_done_fences(queue); 991 pvr_queue_update_active_state_locked(queue); 992 } 993 994 static u32 get_dm_type(struct pvr_queue *queue) 995 { 996 switch (queue->type) { 997 case DRM_PVR_JOB_TYPE_GEOMETRY: 998 return PVR_FWIF_DM_GEOM; 999 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 1000 case DRM_PVR_JOB_TYPE_FRAGMENT: 1001 return PVR_FWIF_DM_FRAG; 1002 case DRM_PVR_JOB_TYPE_COMPUTE: 1003 return PVR_FWIF_DM_CDM; 1004 } 1005 1006 return ~0; 1007 } 1008 1009 /** 1010 * init_fw_context() - Initializes the queue part of a FW context. 1011 * @queue: Queue object to initialize the FW context for. 1012 * @fw_ctx_map: The FW context CPU mapping. 1013 * 1014 * FW contexts are containing various states, one of them being a per-queue state 1015 * that needs to be initialized for each queue being exposed by a context. This 1016 * function takes care of that. 1017 */ 1018 static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map) 1019 { 1020 struct pvr_context *ctx = queue->ctx; 1021 struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx); 1022 struct rogue_fwif_fwcommoncontext *cctx_fw; 1023 struct pvr_cccb *cccb = &queue->cccb; 1024 1025 cctx_fw = fw_ctx_map + queue->ctx_offset; 1026 cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr; 1027 cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr; 1028 1029 cctx_fw->dm = get_dm_type(queue); 1030 cctx_fw->priority = ctx->priority; 1031 cctx_fw->priority_seq_num = 0; 1032 cctx_fw->max_deadline_ms = MAX_DEADLINE_MS; 1033 cctx_fw->pid = task_tgid_nr(current); 1034 cctx_fw->server_common_context_id = ctx->ctx_id; 1035 1036 pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr); 1037 1038 pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr); 1039 } 1040 1041 /** 1042 * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up. 1043 * @queue: Queue on FW context to clean up. 1044 * 1045 * Return: 1046 * * 0 on success, 1047 * * Any error returned by pvr_fw_structure_cleanup() otherwise. 1048 */ 1049 static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue) 1050 { 1051 if (!queue->ctx->fw_obj) 1052 return 0; 1053 1054 return pvr_fw_structure_cleanup(queue->ctx->pvr_dev, 1055 ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT, 1056 queue->ctx->fw_obj, queue->ctx_offset); 1057 } 1058 1059 /** 1060 * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object. 1061 * @job: The job to initialize. 1062 * 1063 * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm() 1064 * and pvr_queue_job_push() can't fail. We also make sure the context type is 1065 * valid and the job can fit in the CCCB. 1066 * 1067 * Return: 1068 * * 0 on success, or 1069 * * An error code if something failed. 1070 */ 1071 int pvr_queue_job_init(struct pvr_job *job) 1072 { 1073 /* Fragment jobs need at least one native fence wait on the geometry job fence. */ 1074 u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0; 1075 struct pvr_queue *queue; 1076 int err; 1077 1078 if (atomic_read(&job->ctx->faulty)) 1079 return -EIO; 1080 1081 queue = pvr_context_get_queue_for_job(job->ctx, job->type); 1082 if (!queue) 1083 return -EINVAL; 1084 1085 if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count))) 1086 return -E2BIG; 1087 1088 err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE); 1089 if (err) 1090 return err; 1091 1092 job->cccb_fence = pvr_queue_fence_alloc(); 1093 job->kccb_fence = pvr_kccb_fence_alloc(); 1094 job->done_fence = pvr_queue_fence_alloc(); 1095 if (!job->cccb_fence || !job->kccb_fence || !job->done_fence) 1096 return -ENOMEM; 1097 1098 return 0; 1099 } 1100 1101 /** 1102 * pvr_queue_job_arm() - Arm a job object. 1103 * @job: The job to arm. 1104 * 1105 * Initializes fences and return the drm_sched finished fence so it can 1106 * be exposed to the outside world. Once this function is called, you should 1107 * make sure the job is pushed using pvr_queue_job_push(), or guarantee that 1108 * no one grabbed a reference to the returned fence. The latter can happen if 1109 * we do multi-job submission, and something failed when creating/initializing 1110 * a job. In that case, we know the fence didn't leave the driver, and we 1111 * can thus guarantee nobody will wait on an dead fence object. 1112 * 1113 * Return: 1114 * * A dma_fence object. 1115 */ 1116 struct dma_fence *pvr_queue_job_arm(struct pvr_job *job) 1117 { 1118 drm_sched_job_arm(&job->base); 1119 1120 return &job->base.s_fence->finished; 1121 } 1122 1123 /** 1124 * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object. 1125 * @job: The job to cleanup. 1126 * 1127 * Should be called in the job release path. 1128 */ 1129 void pvr_queue_job_cleanup(struct pvr_job *job) 1130 { 1131 pvr_queue_fence_put(job->done_fence); 1132 pvr_queue_fence_put(job->cccb_fence); 1133 pvr_kccb_fence_put(job->kccb_fence); 1134 1135 if (job->base.s_fence) 1136 drm_sched_job_cleanup(&job->base); 1137 } 1138 1139 /** 1140 * pvr_queue_job_push() - Push a job to its queue. 1141 * @job: The job to push. 1142 * 1143 * Must be called after pvr_queue_job_init() and after all dependencies 1144 * have been added to the job. This will effectively queue the job to 1145 * the drm_sched_entity attached to the queue. We grab a reference on 1146 * the job object, so the caller is free to drop its reference when it's 1147 * done accessing the job object. 1148 */ 1149 void pvr_queue_job_push(struct pvr_job *job) 1150 { 1151 struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler); 1152 1153 /* Keep track of the last queued job scheduled fence for combined submit. */ 1154 dma_fence_put(queue->last_queued_job_scheduled_fence); 1155 queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled); 1156 1157 pvr_job_get(job); 1158 drm_sched_entity_push_job(&job->base); 1159 } 1160 1161 static void reg_state_init(void *cpu_ptr, void *priv) 1162 { 1163 struct pvr_queue *queue = priv; 1164 1165 if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) { 1166 struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr; 1167 1168 geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init = 1169 queue->callstack_addr; 1170 } 1171 } 1172 1173 /** 1174 * pvr_queue_create() - Create a queue object. 1175 * @ctx: The context this queue will be attached to. 1176 * @type: The type of jobs being pushed to this queue. 1177 * @args: The arguments passed to the context creation function. 1178 * @fw_ctx_map: CPU mapping of the FW context object. 1179 * 1180 * Create a queue object that will be used to queue and track jobs. 1181 * 1182 * Return: 1183 * * A valid pointer to a pvr_queue object, or 1184 * * An error pointer if the creation/initialization failed. 1185 */ 1186 struct pvr_queue *pvr_queue_create(struct pvr_context *ctx, 1187 enum drm_pvr_job_type type, 1188 struct drm_pvr_ioctl_create_context_args *args, 1189 void *fw_ctx_map) 1190 { 1191 static const struct { 1192 u32 cccb_size; 1193 const char *name; 1194 } props[] = { 1195 [DRM_PVR_JOB_TYPE_GEOMETRY] = { 1196 .cccb_size = CTX_GEOM_CCCB_SIZE_LOG2, 1197 .name = "geometry", 1198 }, 1199 [DRM_PVR_JOB_TYPE_FRAGMENT] = { 1200 .cccb_size = CTX_FRAG_CCCB_SIZE_LOG2, 1201 .name = "fragment" 1202 }, 1203 [DRM_PVR_JOB_TYPE_COMPUTE] = { 1204 .cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2, 1205 .name = "compute" 1206 }, 1207 [DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = { 1208 .cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2, 1209 .name = "transfer_frag" 1210 }, 1211 }; 1212 struct pvr_device *pvr_dev = ctx->pvr_dev; 1213 struct drm_gpu_scheduler *sched; 1214 struct pvr_queue *queue; 1215 int ctx_state_size, err; 1216 void *cpu_map; 1217 1218 if (WARN_ON(type >= sizeof(props))) 1219 return ERR_PTR(-EINVAL); 1220 1221 switch (ctx->type) { 1222 case DRM_PVR_CTX_TYPE_RENDER: 1223 if (type != DRM_PVR_JOB_TYPE_GEOMETRY && 1224 type != DRM_PVR_JOB_TYPE_FRAGMENT) 1225 return ERR_PTR(-EINVAL); 1226 break; 1227 case DRM_PVR_CTX_TYPE_COMPUTE: 1228 if (type != DRM_PVR_JOB_TYPE_COMPUTE) 1229 return ERR_PTR(-EINVAL); 1230 break; 1231 case DRM_PVR_CTX_TYPE_TRANSFER_FRAG: 1232 if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG) 1233 return ERR_PTR(-EINVAL); 1234 break; 1235 default: 1236 return ERR_PTR(-EINVAL); 1237 } 1238 1239 ctx_state_size = get_ctx_state_size(pvr_dev, type); 1240 if (ctx_state_size < 0) 1241 return ERR_PTR(ctx_state_size); 1242 1243 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 1244 if (!queue) 1245 return ERR_PTR(-ENOMEM); 1246 1247 queue->type = type; 1248 queue->ctx_offset = get_ctx_offset(type); 1249 queue->ctx = ctx; 1250 queue->callstack_addr = args->callstack_addr; 1251 sched = &queue->scheduler; 1252 INIT_LIST_HEAD(&queue->node); 1253 mutex_init(&queue->cccb_fence_ctx.job_lock); 1254 pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base); 1255 pvr_queue_fence_ctx_init(&queue->job_fence_ctx); 1256 1257 err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name); 1258 if (err) 1259 goto err_free_queue; 1260 1261 err = pvr_fw_object_create(pvr_dev, ctx_state_size, 1262 PVR_BO_FW_FLAGS_DEVICE_UNCACHED, 1263 reg_state_init, queue, &queue->reg_state_obj); 1264 if (err) 1265 goto err_cccb_fini; 1266 1267 init_fw_context(queue, fw_ctx_map); 1268 1269 if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT && 1270 args->callstack_addr) { 1271 err = -EINVAL; 1272 goto err_release_reg_state; 1273 } 1274 1275 cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value), 1276 PVR_BO_FW_FLAGS_DEVICE_UNCACHED, 1277 NULL, NULL, &queue->timeline_ufo.fw_obj); 1278 if (IS_ERR(cpu_map)) { 1279 err = PTR_ERR(cpu_map); 1280 goto err_release_reg_state; 1281 } 1282 1283 queue->timeline_ufo.value = cpu_map; 1284 1285 err = drm_sched_init(&queue->scheduler, 1286 &pvr_queue_sched_ops, 1287 pvr_dev->sched_wq, 1, 64 * 1024, 1, 1288 msecs_to_jiffies(500), 1289 pvr_dev->sched_wq, NULL, "pvr-queue", 1290 pvr_dev->base.dev); 1291 if (err) 1292 goto err_release_ufo; 1293 1294 err = drm_sched_entity_init(&queue->entity, 1295 DRM_SCHED_PRIORITY_KERNEL, 1296 &sched, 1, &ctx->faulty); 1297 if (err) 1298 goto err_sched_fini; 1299 1300 mutex_lock(&pvr_dev->queues.lock); 1301 list_add_tail(&queue->node, &pvr_dev->queues.idle); 1302 mutex_unlock(&pvr_dev->queues.lock); 1303 1304 return queue; 1305 1306 err_sched_fini: 1307 drm_sched_fini(&queue->scheduler); 1308 1309 err_release_ufo: 1310 pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj); 1311 1312 err_release_reg_state: 1313 pvr_fw_object_destroy(queue->reg_state_obj); 1314 1315 err_cccb_fini: 1316 pvr_cccb_fini(&queue->cccb); 1317 1318 err_free_queue: 1319 mutex_destroy(&queue->cccb_fence_ctx.job_lock); 1320 kfree(queue); 1321 1322 return ERR_PTR(err); 1323 } 1324 1325 void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev) 1326 { 1327 struct pvr_queue *queue; 1328 1329 mutex_lock(&pvr_dev->queues.lock); 1330 list_for_each_entry(queue, &pvr_dev->queues.idle, node) 1331 pvr_queue_stop(queue, NULL); 1332 list_for_each_entry(queue, &pvr_dev->queues.active, node) 1333 pvr_queue_stop(queue, NULL); 1334 mutex_unlock(&pvr_dev->queues.lock); 1335 } 1336 1337 void pvr_queue_device_post_reset(struct pvr_device *pvr_dev) 1338 { 1339 struct pvr_queue *queue; 1340 1341 mutex_lock(&pvr_dev->queues.lock); 1342 list_for_each_entry(queue, &pvr_dev->queues.active, node) 1343 pvr_queue_start(queue); 1344 list_for_each_entry(queue, &pvr_dev->queues.idle, node) 1345 pvr_queue_start(queue); 1346 mutex_unlock(&pvr_dev->queues.lock); 1347 } 1348 1349 /** 1350 * pvr_queue_kill() - Kill a queue. 1351 * @queue: The queue to kill. 1352 * 1353 * Kill the queue so no new jobs can be pushed. Should be called when the 1354 * context handle is destroyed. The queue object might last longer if jobs 1355 * are still in flight and holding a reference to the context this queue 1356 * belongs to. 1357 */ 1358 void pvr_queue_kill(struct pvr_queue *queue) 1359 { 1360 drm_sched_entity_destroy(&queue->entity); 1361 dma_fence_put(queue->last_queued_job_scheduled_fence); 1362 queue->last_queued_job_scheduled_fence = NULL; 1363 } 1364 1365 /** 1366 * pvr_queue_destroy() - Destroy a queue. 1367 * @queue: The queue to destroy. 1368 * 1369 * Cleanup the queue and free the resources attached to it. Should be 1370 * called from the context release function. 1371 */ 1372 void pvr_queue_destroy(struct pvr_queue *queue) 1373 { 1374 if (!queue) 1375 return; 1376 1377 mutex_lock(&queue->ctx->pvr_dev->queues.lock); 1378 list_del_init(&queue->node); 1379 mutex_unlock(&queue->ctx->pvr_dev->queues.lock); 1380 1381 drm_sched_fini(&queue->scheduler); 1382 drm_sched_entity_fini(&queue->entity); 1383 1384 if (WARN_ON(queue->last_queued_job_scheduled_fence)) 1385 dma_fence_put(queue->last_queued_job_scheduled_fence); 1386 1387 pvr_queue_cleanup_fw_context(queue); 1388 1389 pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj); 1390 pvr_fw_object_destroy(queue->reg_state_obj); 1391 pvr_cccb_fini(&queue->cccb); 1392 mutex_destroy(&queue->cccb_fence_ctx.job_lock); 1393 kfree(queue); 1394 } 1395 1396 /** 1397 * pvr_queue_device_init() - Device-level initialization of queue related fields. 1398 * @pvr_dev: The device to initialize. 1399 * 1400 * Initializes all fields related to queue management in pvr_device. 1401 * 1402 * Return: 1403 * * 0 on success, or 1404 * * An error code on failure. 1405 */ 1406 int pvr_queue_device_init(struct pvr_device *pvr_dev) 1407 { 1408 int err; 1409 1410 INIT_LIST_HEAD(&pvr_dev->queues.active); 1411 INIT_LIST_HEAD(&pvr_dev->queues.idle); 1412 err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock); 1413 if (err) 1414 return err; 1415 1416 pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0); 1417 if (!pvr_dev->sched_wq) 1418 return -ENOMEM; 1419 1420 return 0; 1421 } 1422 1423 /** 1424 * pvr_queue_device_fini() - Device-level cleanup of queue related fields. 1425 * @pvr_dev: The device to cleanup. 1426 * 1427 * Cleanup/free all queue-related resources attached to a pvr_device object. 1428 */ 1429 void pvr_queue_device_fini(struct pvr_device *pvr_dev) 1430 { 1431 destroy_workqueue(pvr_dev->sched_wq); 1432 } 1433