1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* Copyright (c) 2023 Imagination Technologies Ltd. */ 3 4 #include <drm/drm_managed.h> 5 #include <drm/gpu_scheduler.h> 6 7 #include "pvr_cccb.h" 8 #include "pvr_context.h" 9 #include "pvr_device.h" 10 #include "pvr_drv.h" 11 #include "pvr_job.h" 12 #include "pvr_queue.h" 13 #include "pvr_vm.h" 14 15 #include "pvr_rogue_fwif_client.h" 16 17 #define MAX_DEADLINE_MS 30000 18 19 #define CTX_COMPUTE_CCCB_SIZE_LOG2 15 20 #define CTX_FRAG_CCCB_SIZE_LOG2 15 21 #define CTX_GEOM_CCCB_SIZE_LOG2 15 22 #define CTX_TRANSFER_CCCB_SIZE_LOG2 15 23 24 static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev) 25 { 26 u32 num_isp_store_registers; 27 28 if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) { 29 num_isp_store_registers = 1; 30 } else { 31 int err; 32 33 err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers); 34 if (WARN_ON(err)) 35 return err; 36 } 37 38 return sizeof(struct rogue_fwif_frag_ctx_state) + 39 (num_isp_store_registers * 40 sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0])); 41 } 42 43 static int get_frag_ctx_state_size(struct pvr_device *pvr_dev) 44 { 45 u32 num_isp_store_registers; 46 int err; 47 48 if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) { 49 err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers); 50 if (WARN_ON(err)) 51 return err; 52 53 if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) { 54 u32 xpu_max_slaves; 55 56 err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves); 57 if (WARN_ON(err)) 58 return err; 59 60 num_isp_store_registers *= (1 + xpu_max_slaves); 61 } 62 } else { 63 err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers); 64 if (WARN_ON(err)) 65 return err; 66 } 67 68 return sizeof(struct rogue_fwif_frag_ctx_state) + 69 (num_isp_store_registers * 70 sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0])); 71 } 72 73 static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type) 74 { 75 switch (type) { 76 case DRM_PVR_JOB_TYPE_GEOMETRY: 77 return sizeof(struct rogue_fwif_geom_ctx_state); 78 case DRM_PVR_JOB_TYPE_FRAGMENT: 79 return get_frag_ctx_state_size(pvr_dev); 80 case DRM_PVR_JOB_TYPE_COMPUTE: 81 return sizeof(struct rogue_fwif_compute_ctx_state); 82 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 83 return get_xfer_ctx_state_size(pvr_dev); 84 } 85 86 WARN(1, "Invalid queue type"); 87 return -EINVAL; 88 } 89 90 static u32 get_ctx_offset(enum drm_pvr_job_type type) 91 { 92 switch (type) { 93 case DRM_PVR_JOB_TYPE_GEOMETRY: 94 return offsetof(struct rogue_fwif_fwrendercontext, geom_context); 95 case DRM_PVR_JOB_TYPE_FRAGMENT: 96 return offsetof(struct rogue_fwif_fwrendercontext, frag_context); 97 case DRM_PVR_JOB_TYPE_COMPUTE: 98 return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context); 99 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 100 return offsetof(struct rogue_fwif_fwtransfercontext, tq_context); 101 } 102 103 return 0; 104 } 105 106 static const char * 107 pvr_queue_fence_get_driver_name(struct dma_fence *f) 108 { 109 return PVR_DRIVER_NAME; 110 } 111 112 static void pvr_queue_fence_release_work(struct work_struct *w) 113 { 114 struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work); 115 116 pvr_context_put(fence->queue->ctx); 117 dma_fence_free(&fence->base); 118 } 119 120 static void pvr_queue_fence_release(struct dma_fence *f) 121 { 122 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 123 struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev; 124 125 queue_work(pvr_dev->sched_wq, &fence->release_work); 126 } 127 128 static const char * 129 pvr_queue_job_fence_get_timeline_name(struct dma_fence *f) 130 { 131 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 132 133 switch (fence->queue->type) { 134 case DRM_PVR_JOB_TYPE_GEOMETRY: 135 return "geometry"; 136 137 case DRM_PVR_JOB_TYPE_FRAGMENT: 138 return "fragment"; 139 140 case DRM_PVR_JOB_TYPE_COMPUTE: 141 return "compute"; 142 143 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 144 return "transfer"; 145 } 146 147 WARN(1, "Invalid queue type"); 148 return "invalid"; 149 } 150 151 static const char * 152 pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f) 153 { 154 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 155 156 switch (fence->queue->type) { 157 case DRM_PVR_JOB_TYPE_GEOMETRY: 158 return "geometry-cccb"; 159 160 case DRM_PVR_JOB_TYPE_FRAGMENT: 161 return "fragment-cccb"; 162 163 case DRM_PVR_JOB_TYPE_COMPUTE: 164 return "compute-cccb"; 165 166 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 167 return "transfer-cccb"; 168 } 169 170 WARN(1, "Invalid queue type"); 171 return "invalid"; 172 } 173 174 static const struct dma_fence_ops pvr_queue_job_fence_ops = { 175 .get_driver_name = pvr_queue_fence_get_driver_name, 176 .get_timeline_name = pvr_queue_job_fence_get_timeline_name, 177 .release = pvr_queue_fence_release, 178 }; 179 180 /** 181 * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is 182 * backed by a UFO. 183 * @f: The dma_fence to turn into a pvr_queue_fence. 184 * 185 * Return: 186 * * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or 187 * * NULL otherwise. 188 */ 189 static struct pvr_queue_fence * 190 to_pvr_queue_job_fence(struct dma_fence *f) 191 { 192 struct drm_sched_fence *sched_fence = to_drm_sched_fence(f); 193 194 if (sched_fence) 195 f = sched_fence->parent; 196 197 if (f && f->ops == &pvr_queue_job_fence_ops) 198 return container_of(f, struct pvr_queue_fence, base); 199 200 return NULL; 201 } 202 203 static const struct dma_fence_ops pvr_queue_cccb_fence_ops = { 204 .get_driver_name = pvr_queue_fence_get_driver_name, 205 .get_timeline_name = pvr_queue_cccb_fence_get_timeline_name, 206 .release = pvr_queue_fence_release, 207 }; 208 209 /** 210 * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects. 211 * @f: The dma_fence object to put. 212 * 213 * If the pvr_queue_fence has been initialized, we call dma_fence_put(), 214 * otherwise we free the object with dma_fence_free(). This allows us 215 * to do the right thing before and after pvr_queue_fence_init() had been 216 * called. 217 */ 218 static void pvr_queue_fence_put(struct dma_fence *f) 219 { 220 if (!f) 221 return; 222 223 if (WARN_ON(f->ops && 224 f->ops != &pvr_queue_cccb_fence_ops && 225 f->ops != &pvr_queue_job_fence_ops)) 226 return; 227 228 /* If the fence hasn't been initialized yet, free the object directly. */ 229 if (f->ops) 230 dma_fence_put(f); 231 else 232 dma_fence_free(f); 233 } 234 235 /** 236 * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object 237 * 238 * Call this function to allocate job CCCB and done fences. This only 239 * allocates the objects. Initialization happens when the underlying 240 * dma_fence object is to be returned to drm_sched (in prepare_job() or 241 * run_job()). 242 * 243 * Return: 244 * * A valid pointer if the allocation succeeds, or 245 * * NULL if the allocation fails. 246 */ 247 static struct dma_fence * 248 pvr_queue_fence_alloc(void) 249 { 250 struct pvr_queue_fence *fence; 251 252 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 253 if (!fence) 254 return NULL; 255 256 return &fence->base; 257 } 258 259 /** 260 * pvr_queue_fence_init() - Initializes a pvr_queue_fence object. 261 * @f: The fence to initialize 262 * @queue: The queue this fence belongs to. 263 * @fence_ops: The fence operations. 264 * @fence_ctx: The fence context. 265 * 266 * Wrapper around dma_fence_init() that takes care of initializing the 267 * pvr_queue_fence::queue field too. 268 */ 269 static void 270 pvr_queue_fence_init(struct dma_fence *f, 271 struct pvr_queue *queue, 272 const struct dma_fence_ops *fence_ops, 273 struct pvr_queue_fence_ctx *fence_ctx) 274 { 275 struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); 276 277 pvr_context_get(queue->ctx); 278 fence->queue = queue; 279 INIT_WORK(&fence->release_work, pvr_queue_fence_release_work); 280 dma_fence_init(&fence->base, fence_ops, 281 &fence_ctx->lock, fence_ctx->id, 282 atomic_inc_return(&fence_ctx->seqno)); 283 } 284 285 /** 286 * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object. 287 * @fence: The fence to initialize. 288 * @queue: The queue this fence belongs to. 289 * 290 * Initializes a fence that can be used to wait for CCCB space. 291 * 292 * Should be called in the ::prepare_job() path, so the fence returned to 293 * drm_sched is valid. 294 */ 295 static void 296 pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue) 297 { 298 pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops, 299 &queue->cccb_fence_ctx.base); 300 } 301 302 /** 303 * pvr_queue_job_fence_init() - Initializes a job done fence object. 304 * @fence: The fence to initialize. 305 * @queue: The queue this fence belongs to. 306 * 307 * Initializes a fence that will be signaled when the GPU is done executing 308 * a job. 309 * 310 * Should be called *before* the ::run_job() path, so the fence is initialised 311 * before being placed in the pending_list. 312 */ 313 static void 314 pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue) 315 { 316 if (!fence->ops) 317 pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, 318 &queue->job_fence_ctx); 319 } 320 321 /** 322 * pvr_queue_fence_ctx_init() - Queue fence context initialization. 323 * @fence_ctx: The context to initialize 324 */ 325 static void 326 pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx) 327 { 328 spin_lock_init(&fence_ctx->lock); 329 fence_ctx->id = dma_fence_context_alloc(1); 330 atomic_set(&fence_ctx->seqno, 0); 331 } 332 333 static u32 ufo_cmds_size(u32 elem_count) 334 { 335 /* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */ 336 u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS; 337 u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS; 338 u32 size = full_cmd_count * 339 pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS * 340 sizeof(struct rogue_fwif_ufo)); 341 342 if (remaining_elems) { 343 size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems * 344 sizeof(struct rogue_fwif_ufo)); 345 } 346 347 return size; 348 } 349 350 static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count) 351 { 352 /* One UFO cmd for the fence signaling, one UFO cmd per native fence native, 353 * and a command for the job itself. 354 */ 355 return ufo_cmds_size(1) + ufo_cmds_size(ufo_wait_count) + 356 pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len); 357 } 358 359 /** 360 * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies. 361 * @job: Job to operate on. 362 * 363 * Returns: Number of non-signaled native deps remaining. 364 */ 365 static unsigned long job_count_remaining_native_deps(struct pvr_job *job) 366 { 367 unsigned long remaining_count = 0; 368 struct dma_fence *fence = NULL; 369 unsigned long index; 370 371 xa_for_each(&job->base.dependencies, index, fence) { 372 struct pvr_queue_fence *jfence; 373 374 jfence = to_pvr_queue_job_fence(fence); 375 if (!jfence) 376 continue; 377 378 if (!dma_fence_is_signaled(&jfence->base)) 379 remaining_count++; 380 } 381 382 return remaining_count; 383 } 384 385 /** 386 * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job. 387 * @queue: The queue this job will be submitted to. 388 * @job: The job to get the CCCB fence on. 389 * 390 * The CCCB fence is a synchronization primitive allowing us to delay job 391 * submission until there's enough space in the CCCB to submit the job. 392 * 393 * Return: 394 * * NULL if there's enough space in the CCCB to submit this job, or 395 * * A valid dma_fence object otherwise. 396 */ 397 static struct dma_fence * 398 pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job) 399 { 400 struct pvr_queue_fence *cccb_fence; 401 unsigned int native_deps_remaining; 402 403 /* If the fence is NULL, that means we already checked that we had 404 * enough space in the cccb for our job. 405 */ 406 if (!job->cccb_fence) 407 return NULL; 408 409 mutex_lock(&queue->cccb_fence_ctx.job_lock); 410 411 /* Count remaining native dependencies and check if the job fits in the CCCB. */ 412 native_deps_remaining = job_count_remaining_native_deps(job); 413 if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) { 414 pvr_queue_fence_put(job->cccb_fence); 415 job->cccb_fence = NULL; 416 goto out_unlock; 417 } 418 419 /* There should be no job attached to the CCCB fence context: 420 * drm_sched_entity guarantees that jobs are submitted one at a time. 421 */ 422 if (WARN_ON(queue->cccb_fence_ctx.job)) 423 pvr_job_put(queue->cccb_fence_ctx.job); 424 425 queue->cccb_fence_ctx.job = pvr_job_get(job); 426 427 /* Initialize the fence before returning it. */ 428 cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base); 429 if (!WARN_ON(cccb_fence->queue)) 430 pvr_queue_cccb_fence_init(job->cccb_fence, queue); 431 432 out_unlock: 433 mutex_unlock(&queue->cccb_fence_ctx.job_lock); 434 435 return dma_fence_get(job->cccb_fence); 436 } 437 438 /** 439 * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job. 440 * @queue: The queue this job will be submitted to. 441 * @job: The job to get the KCCB fence on. 442 * 443 * The KCCB fence is a synchronization primitive allowing us to delay job 444 * submission until there's enough space in the KCCB to submit the job. 445 * 446 * Return: 447 * * NULL if there's enough space in the KCCB to submit this job, or 448 * * A valid dma_fence object otherwise. 449 */ 450 static struct dma_fence * 451 pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job) 452 { 453 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 454 struct dma_fence *kccb_fence = NULL; 455 456 /* If the fence is NULL, that means we already checked that we had 457 * enough space in the KCCB for our job. 458 */ 459 if (!job->kccb_fence) 460 return NULL; 461 462 if (!WARN_ON(job->kccb_fence->ops)) { 463 kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence); 464 job->kccb_fence = NULL; 465 } 466 467 return kccb_fence; 468 } 469 470 static struct dma_fence * 471 pvr_queue_get_paired_frag_job_dep(struct pvr_queue *queue, struct pvr_job *job) 472 { 473 struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ? 474 job->paired_job : NULL; 475 struct dma_fence *f; 476 unsigned long index; 477 478 if (!frag_job) 479 return NULL; 480 481 xa_for_each(&frag_job->base.dependencies, index, f) { 482 /* Skip already signaled fences. */ 483 if (dma_fence_is_signaled(f)) 484 continue; 485 486 /* Skip our own fence. */ 487 if (f == &job->base.s_fence->scheduled) 488 continue; 489 490 return dma_fence_get(f); 491 } 492 493 return frag_job->base.sched->ops->prepare_job(&frag_job->base, &queue->entity); 494 } 495 496 /** 497 * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence. 498 * @sched_job: The job to query the next internal dependency on 499 * @s_entity: The entity this job is queue on. 500 * 501 * After iterating over drm_sched_job::dependencies, drm_sched let the driver return 502 * its own internal dependencies. We use this function to return our internal dependencies. 503 */ 504 static struct dma_fence * 505 pvr_queue_prepare_job(struct drm_sched_job *sched_job, 506 struct drm_sched_entity *s_entity) 507 { 508 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 509 struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity); 510 struct dma_fence *internal_dep = NULL; 511 512 /* 513 * Initialize the done_fence, so we can signal it. This must be done 514 * here because otherwise by the time of run_job() the job will end up 515 * in the pending list without a valid fence. 516 */ 517 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) { 518 /* 519 * This will be called on a paired fragment job after being 520 * submitted to firmware. We can tell if this is the case and 521 * bail early from whether run_job() has been called on the 522 * geometry job, which would issue a pm ref. 523 */ 524 if (job->paired_job->has_pm_ref) 525 return NULL; 526 527 /* 528 * In this case we need to use the job's own ctx to initialise 529 * the done_fence. The other steps are done in the ctx of the 530 * paired geometry job. 531 */ 532 pvr_queue_job_fence_init(job->done_fence, 533 job->ctx->queues.fragment); 534 } else { 535 pvr_queue_job_fence_init(job->done_fence, queue); 536 } 537 538 /* CCCB fence is used to make sure we have enough space in the CCCB to 539 * submit our commands. 540 */ 541 internal_dep = pvr_queue_get_job_cccb_fence(queue, job); 542 543 /* KCCB fence is used to make sure we have a KCCB slot to queue our 544 * CMD_KICK. 545 */ 546 if (!internal_dep) 547 internal_dep = pvr_queue_get_job_kccb_fence(queue, job); 548 549 /* Any extra internal dependency should be added here, using the following 550 * pattern: 551 * 552 * if (!internal_dep) 553 * internal_dep = pvr_queue_get_job_xxxx_fence(queue, job); 554 */ 555 556 /* The paired job fence should come last, when everything else is ready. */ 557 if (!internal_dep) 558 internal_dep = pvr_queue_get_paired_frag_job_dep(queue, job); 559 560 return internal_dep; 561 } 562 563 /** 564 * pvr_queue_update_active_state_locked() - Update the queue active state. 565 * @queue: Queue to update the state on. 566 * 567 * Locked version of pvr_queue_update_active_state(). Must be called with 568 * pvr_device::queue::lock held. 569 */ 570 static void pvr_queue_update_active_state_locked(struct pvr_queue *queue) 571 { 572 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 573 574 lockdep_assert_held(&pvr_dev->queues.lock); 575 576 /* The queue is temporary out of any list when it's being reset, 577 * we don't want a call to pvr_queue_update_active_state_locked() 578 * to re-insert it behind our back. 579 */ 580 if (list_empty(&queue->node)) 581 return; 582 583 if (!atomic_read(&queue->in_flight_job_count)) 584 list_move_tail(&queue->node, &pvr_dev->queues.idle); 585 else 586 list_move_tail(&queue->node, &pvr_dev->queues.active); 587 } 588 589 /** 590 * pvr_queue_update_active_state() - Update the queue active state. 591 * @queue: Queue to update the state on. 592 * 593 * Active state is based on the in_flight_job_count value. 594 * 595 * Updating the active state implies moving the queue in or out of the 596 * active queue list, which also defines whether the queue is checked 597 * or not when a FW event is received. 598 * 599 * This function should be called any time a job is submitted or it done 600 * fence is signaled. 601 */ 602 static void pvr_queue_update_active_state(struct pvr_queue *queue) 603 { 604 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 605 606 mutex_lock(&pvr_dev->queues.lock); 607 pvr_queue_update_active_state_locked(queue); 608 mutex_unlock(&pvr_dev->queues.lock); 609 } 610 611 static void pvr_queue_submit_job_to_cccb(struct pvr_job *job) 612 { 613 struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler); 614 struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS]; 615 struct pvr_cccb *cccb = &queue->cccb; 616 struct pvr_queue_fence *jfence; 617 struct dma_fence *fence; 618 unsigned long index; 619 u32 ufo_count = 0; 620 621 /* We need to add the queue to the active list before updating the CCCB, 622 * otherwise we might miss the FW event informing us that something 623 * happened on this queue. 624 */ 625 atomic_inc(&queue->in_flight_job_count); 626 pvr_queue_update_active_state(queue); 627 628 xa_for_each(&job->base.dependencies, index, fence) { 629 jfence = to_pvr_queue_job_fence(fence); 630 if (!jfence) 631 continue; 632 633 /* Skip the partial render fence, we will place it at the end. */ 634 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job && 635 &job->paired_job->base.s_fence->scheduled == fence) 636 continue; 637 638 if (dma_fence_is_signaled(&jfence->base)) 639 continue; 640 641 pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj, 642 &ufos[ufo_count].addr); 643 ufos[ufo_count++].value = jfence->base.seqno; 644 645 if (ufo_count == ARRAY_SIZE(ufos)) { 646 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR, 647 sizeof(ufos), ufos, 0, 0); 648 ufo_count = 0; 649 } 650 } 651 652 /* Partial render fence goes last. */ 653 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) { 654 jfence = to_pvr_queue_job_fence(job->paired_job->done_fence); 655 if (!WARN_ON(!jfence)) { 656 pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj, 657 &ufos[ufo_count].addr); 658 ufos[ufo_count++].value = job->paired_job->done_fence->seqno; 659 } 660 } 661 662 if (ufo_count) { 663 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR, 664 sizeof(ufos[0]) * ufo_count, ufos, 0, 0); 665 } 666 667 if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) { 668 struct rogue_fwif_cmd_geom *cmd = job->cmd; 669 670 /* Reference value for the partial render test is the current queue fence 671 * seqno minus one. 672 */ 673 pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, 674 &cmd->partial_render_geom_frag_fence.addr); 675 cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1; 676 } 677 678 /* Submit job to FW */ 679 pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd, 680 job->id, job->id); 681 682 /* Signal the job fence. */ 683 pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr); 684 ufos[0].value = job->done_fence->seqno; 685 pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE, 686 sizeof(ufos[0]), ufos, 0, 0); 687 } 688 689 /** 690 * pvr_queue_run_job() - Submit a job to the FW. 691 * @sched_job: The job to submit. 692 * 693 * This function is called when all non-native dependencies have been met and 694 * when the commands resulting from this job are guaranteed to fit in the CCCB. 695 */ 696 static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job) 697 { 698 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 699 struct pvr_device *pvr_dev = job->pvr_dev; 700 int err; 701 702 /* The fragment job is issued along the geometry job when we use combined 703 * geom+frag kicks. When we get there, we should simply return the 704 * done_fence that's been initialized earlier. 705 */ 706 if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT && 707 job->done_fence->ops) { 708 return dma_fence_get(job->done_fence); 709 } 710 711 /* The only kind of jobs that can be paired are geometry and fragment, and 712 * we bail out early if we see a fragment job that's paired with a geomtry 713 * job. 714 * Paired jobs must also target the same context and point to the same 715 * HWRT. 716 */ 717 if (WARN_ON(job->paired_job && 718 (job->type != DRM_PVR_JOB_TYPE_GEOMETRY || 719 job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT || 720 job->hwrt != job->paired_job->hwrt || 721 job->ctx != job->paired_job->ctx))) 722 return ERR_PTR(-EINVAL); 723 724 err = pvr_job_get_pm_ref(job); 725 if (WARN_ON(err)) 726 return ERR_PTR(err); 727 728 if (job->paired_job) { 729 err = pvr_job_get_pm_ref(job->paired_job); 730 if (WARN_ON(err)) 731 return ERR_PTR(err); 732 } 733 734 /* Submit our job to the CCCB */ 735 pvr_queue_submit_job_to_cccb(job); 736 737 if (job->paired_job) { 738 struct pvr_job *geom_job = job; 739 struct pvr_job *frag_job = job->paired_job; 740 struct pvr_queue *geom_queue = job->ctx->queues.geometry; 741 struct pvr_queue *frag_queue = job->ctx->queues.fragment; 742 743 /* Submit the fragment job along the geometry job and send a combined kick. */ 744 pvr_queue_submit_job_to_cccb(frag_job); 745 pvr_cccb_send_kccb_combined_kick(pvr_dev, 746 &geom_queue->cccb, &frag_queue->cccb, 747 pvr_context_get_fw_addr(geom_job->ctx) + 748 geom_queue->ctx_offset, 749 pvr_context_get_fw_addr(frag_job->ctx) + 750 frag_queue->ctx_offset, 751 job->hwrt, 752 frag_job->fw_ccb_cmd_type == 753 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR); 754 } else { 755 struct pvr_queue *queue = container_of(job->base.sched, 756 struct pvr_queue, scheduler); 757 758 pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb, 759 pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset, 760 job->hwrt); 761 } 762 763 return dma_fence_get(job->done_fence); 764 } 765 766 static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job) 767 { 768 drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL); 769 } 770 771 static void pvr_queue_start(struct pvr_queue *queue) 772 { 773 struct pvr_job *job; 774 775 /* Make sure we CPU-signal the UFO object, so other queues don't get 776 * blocked waiting on it. 777 */ 778 *queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno); 779 780 list_for_each_entry(job, &queue->scheduler.pending_list, base.list) { 781 if (dma_fence_is_signaled(job->done_fence)) { 782 /* Jobs might have completed after drm_sched_stop() was called. 783 * In that case, re-assign the parent field to the done_fence. 784 */ 785 WARN_ON(job->base.s_fence->parent); 786 job->base.s_fence->parent = dma_fence_get(job->done_fence); 787 } else { 788 /* If we had unfinished jobs, flag the entity as guilty so no 789 * new job can be submitted. 790 */ 791 atomic_set(&queue->ctx->faulty, 1); 792 } 793 } 794 795 drm_sched_start(&queue->scheduler, 0); 796 } 797 798 /** 799 * pvr_queue_timedout_job() - Handle a job timeout event. 800 * @s_job: The job this timeout occurred on. 801 * 802 * FIXME: We don't do anything here to unblock the situation, we just stop+start 803 * the scheduler, and re-assign parent fences in the middle. 804 * 805 * Return: 806 * * DRM_GPU_SCHED_STAT_NOMINAL. 807 */ 808 static enum drm_gpu_sched_stat 809 pvr_queue_timedout_job(struct drm_sched_job *s_job) 810 { 811 struct drm_gpu_scheduler *sched = s_job->sched; 812 struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler); 813 struct pvr_device *pvr_dev = queue->ctx->pvr_dev; 814 struct pvr_job *job; 815 u32 job_count = 0; 816 817 dev_err(sched->dev, "Job timeout\n"); 818 819 /* Before we stop the scheduler, make sure the queue is out of any list, so 820 * any call to pvr_queue_update_active_state_locked() that might happen 821 * until the scheduler is really stopped doesn't end up re-inserting the 822 * queue in the active list. This would cause 823 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each 824 * other when accessing the pending_list, since drm_sched_stop() doesn't 825 * grab the job_list_lock when modifying the list (it's assuming the 826 * only other accessor is the scheduler, and it's safe to not grab the 827 * lock since it's stopped). 828 */ 829 mutex_lock(&pvr_dev->queues.lock); 830 list_del_init(&queue->node); 831 mutex_unlock(&pvr_dev->queues.lock); 832 833 drm_sched_stop(sched, s_job); 834 835 /* Re-assign job parent fences. */ 836 list_for_each_entry(job, &sched->pending_list, base.list) { 837 job->base.s_fence->parent = dma_fence_get(job->done_fence); 838 job_count++; 839 } 840 WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count); 841 842 /* Re-insert the queue in the proper list, and kick a queue processing 843 * operation if there were jobs pending. 844 */ 845 mutex_lock(&pvr_dev->queues.lock); 846 if (!job_count) { 847 list_move_tail(&queue->node, &pvr_dev->queues.idle); 848 } else { 849 atomic_set(&queue->in_flight_job_count, job_count); 850 list_move_tail(&queue->node, &pvr_dev->queues.active); 851 pvr_queue_process(queue); 852 } 853 mutex_unlock(&pvr_dev->queues.lock); 854 855 drm_sched_start(sched, 0); 856 857 return DRM_GPU_SCHED_STAT_NOMINAL; 858 } 859 860 /** 861 * pvr_queue_free_job() - Release the reference the scheduler had on a job object. 862 * @sched_job: Job object to free. 863 */ 864 static void pvr_queue_free_job(struct drm_sched_job *sched_job) 865 { 866 struct pvr_job *job = container_of(sched_job, struct pvr_job, base); 867 868 drm_sched_job_cleanup(sched_job); 869 870 if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) 871 pvr_job_put(job->paired_job); 872 873 job->paired_job = NULL; 874 pvr_job_put(job); 875 } 876 877 static const struct drm_sched_backend_ops pvr_queue_sched_ops = { 878 .prepare_job = pvr_queue_prepare_job, 879 .run_job = pvr_queue_run_job, 880 .timedout_job = pvr_queue_timedout_job, 881 .free_job = pvr_queue_free_job, 882 }; 883 884 /** 885 * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO object 886 * @f: Fence to test. 887 * 888 * A UFO-backed fence is a fence that can be signaled or waited upon FW-side. 889 * pvr_job::done_fence objects are backed by the timeline UFO attached to the queue 890 * they are pushed to, but those fences are not directly exposed to the outside 891 * world, so we also need to check if the fence we're being passed is a 892 * drm_sched_fence that was coming from our driver. 893 */ 894 bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f) 895 { 896 struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL; 897 898 if (sched_fence && 899 sched_fence->sched->ops == &pvr_queue_sched_ops) 900 return true; 901 902 if (f && f->ops == &pvr_queue_job_fence_ops) 903 return true; 904 905 return false; 906 } 907 908 /** 909 * pvr_queue_signal_done_fences() - Signal done fences. 910 * @queue: Queue to check. 911 * 912 * Signal done fences of jobs whose seqno is less than the current value of 913 * the UFO object attached to the queue. 914 */ 915 static void 916 pvr_queue_signal_done_fences(struct pvr_queue *queue) 917 { 918 struct pvr_job *job, *tmp_job; 919 u32 cur_seqno; 920 921 spin_lock(&queue->scheduler.job_list_lock); 922 cur_seqno = *queue->timeline_ufo.value; 923 list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) { 924 if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0) 925 break; 926 927 if (!dma_fence_is_signaled(job->done_fence)) { 928 dma_fence_signal(job->done_fence); 929 pvr_job_release_pm_ref(job); 930 atomic_dec(&queue->in_flight_job_count); 931 } 932 } 933 spin_unlock(&queue->scheduler.job_list_lock); 934 } 935 936 /** 937 * pvr_queue_check_job_waiting_for_cccb_space() - Check if the job waiting for CCCB space 938 * can be unblocked 939 * pushed to the CCCB 940 * @queue: Queue to check 941 * 942 * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal 943 * its CCCB fence, which should kick drm_sched. 944 */ 945 static void 946 pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue) 947 { 948 struct pvr_queue_fence *cccb_fence; 949 u32 native_deps_remaining; 950 struct pvr_job *job; 951 952 mutex_lock(&queue->cccb_fence_ctx.job_lock); 953 job = queue->cccb_fence_ctx.job; 954 if (!job) 955 goto out_unlock; 956 957 /* If we have a job attached to the CCCB fence context, its CCCB fence 958 * shouldn't be NULL. 959 */ 960 if (WARN_ON(!job->cccb_fence)) { 961 job = NULL; 962 goto out_unlock; 963 } 964 965 /* If we get there, CCCB fence has to be initialized. */ 966 cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base); 967 if (WARN_ON(!cccb_fence->queue)) { 968 job = NULL; 969 goto out_unlock; 970 } 971 972 /* Evict signaled dependencies before checking for CCCB space. 973 * If the job fits, signal the CCCB fence, this should unblock 974 * the drm_sched_entity. 975 */ 976 native_deps_remaining = job_count_remaining_native_deps(job); 977 if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) { 978 job = NULL; 979 goto out_unlock; 980 } 981 982 dma_fence_signal(job->cccb_fence); 983 pvr_queue_fence_put(job->cccb_fence); 984 job->cccb_fence = NULL; 985 queue->cccb_fence_ctx.job = NULL; 986 987 out_unlock: 988 mutex_unlock(&queue->cccb_fence_ctx.job_lock); 989 990 pvr_job_put(job); 991 } 992 993 /** 994 * pvr_queue_process() - Process events that happened on a queue. 995 * @queue: Queue to check 996 * 997 * Signal job fences and check if jobs waiting for CCCB space can be unblocked. 998 */ 999 void pvr_queue_process(struct pvr_queue *queue) 1000 { 1001 lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock); 1002 1003 pvr_queue_check_job_waiting_for_cccb_space(queue); 1004 pvr_queue_signal_done_fences(queue); 1005 pvr_queue_update_active_state_locked(queue); 1006 } 1007 1008 static u32 get_dm_type(struct pvr_queue *queue) 1009 { 1010 switch (queue->type) { 1011 case DRM_PVR_JOB_TYPE_GEOMETRY: 1012 return PVR_FWIF_DM_GEOM; 1013 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: 1014 case DRM_PVR_JOB_TYPE_FRAGMENT: 1015 return PVR_FWIF_DM_FRAG; 1016 case DRM_PVR_JOB_TYPE_COMPUTE: 1017 return PVR_FWIF_DM_CDM; 1018 } 1019 1020 return ~0; 1021 } 1022 1023 /** 1024 * init_fw_context() - Initializes the queue part of a FW context. 1025 * @queue: Queue object to initialize the FW context for. 1026 * @fw_ctx_map: The FW context CPU mapping. 1027 * 1028 * FW contexts are containing various states, one of them being a per-queue state 1029 * that needs to be initialized for each queue being exposed by a context. This 1030 * function takes care of that. 1031 */ 1032 static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map) 1033 { 1034 struct pvr_context *ctx = queue->ctx; 1035 struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx); 1036 struct rogue_fwif_fwcommoncontext *cctx_fw; 1037 struct pvr_cccb *cccb = &queue->cccb; 1038 1039 cctx_fw = fw_ctx_map + queue->ctx_offset; 1040 cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr; 1041 cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr; 1042 1043 cctx_fw->dm = get_dm_type(queue); 1044 cctx_fw->priority = ctx->priority; 1045 cctx_fw->priority_seq_num = 0; 1046 cctx_fw->max_deadline_ms = MAX_DEADLINE_MS; 1047 cctx_fw->pid = task_tgid_nr(current); 1048 cctx_fw->server_common_context_id = ctx->ctx_id; 1049 1050 pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr); 1051 1052 pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr); 1053 } 1054 1055 /** 1056 * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up. 1057 * @queue: Queue on FW context to clean up. 1058 * 1059 * Return: 1060 * * 0 on success, 1061 * * Any error returned by pvr_fw_structure_cleanup() otherwise. 1062 */ 1063 static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue) 1064 { 1065 if (!queue->ctx->fw_obj) 1066 return 0; 1067 1068 return pvr_fw_structure_cleanup(queue->ctx->pvr_dev, 1069 ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT, 1070 queue->ctx->fw_obj, queue->ctx_offset); 1071 } 1072 1073 /** 1074 * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object. 1075 * @job: The job to initialize. 1076 * 1077 * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm() 1078 * and pvr_queue_job_push() can't fail. We also make sure the context type is 1079 * valid and the job can fit in the CCCB. 1080 * 1081 * Return: 1082 * * 0 on success, or 1083 * * An error code if something failed. 1084 */ 1085 int pvr_queue_job_init(struct pvr_job *job) 1086 { 1087 /* Fragment jobs need at least one native fence wait on the geometry job fence. */ 1088 u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0; 1089 struct pvr_queue *queue; 1090 int err; 1091 1092 if (atomic_read(&job->ctx->faulty)) 1093 return -EIO; 1094 1095 queue = pvr_context_get_queue_for_job(job->ctx, job->type); 1096 if (!queue) 1097 return -EINVAL; 1098 1099 if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count))) 1100 return -E2BIG; 1101 1102 err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE); 1103 if (err) 1104 return err; 1105 1106 job->cccb_fence = pvr_queue_fence_alloc(); 1107 job->kccb_fence = pvr_kccb_fence_alloc(); 1108 job->done_fence = pvr_queue_fence_alloc(); 1109 if (!job->cccb_fence || !job->kccb_fence || !job->done_fence) 1110 return -ENOMEM; 1111 1112 return 0; 1113 } 1114 1115 /** 1116 * pvr_queue_job_arm() - Arm a job object. 1117 * @job: The job to arm. 1118 * 1119 * Initializes fences and return the drm_sched finished fence so it can 1120 * be exposed to the outside world. Once this function is called, you should 1121 * make sure the job is pushed using pvr_queue_job_push(), or guarantee that 1122 * no one grabbed a reference to the returned fence. The latter can happen if 1123 * we do multi-job submission, and something failed when creating/initializing 1124 * a job. In that case, we know the fence didn't leave the driver, and we 1125 * can thus guarantee nobody will wait on an dead fence object. 1126 * 1127 * Return: 1128 * * A dma_fence object. 1129 */ 1130 struct dma_fence *pvr_queue_job_arm(struct pvr_job *job) 1131 { 1132 drm_sched_job_arm(&job->base); 1133 1134 return &job->base.s_fence->finished; 1135 } 1136 1137 /** 1138 * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object. 1139 * @job: The job to cleanup. 1140 * 1141 * Should be called in the job release path. 1142 */ 1143 void pvr_queue_job_cleanup(struct pvr_job *job) 1144 { 1145 pvr_queue_fence_put(job->done_fence); 1146 pvr_queue_fence_put(job->cccb_fence); 1147 pvr_kccb_fence_put(job->kccb_fence); 1148 1149 if (job->base.s_fence) 1150 drm_sched_job_cleanup(&job->base); 1151 } 1152 1153 /** 1154 * pvr_queue_job_push() - Push a job to its queue. 1155 * @job: The job to push. 1156 * 1157 * Must be called after pvr_queue_job_init() and after all dependencies 1158 * have been added to the job. This will effectively queue the job to 1159 * the drm_sched_entity attached to the queue. We grab a reference on 1160 * the job object, so the caller is free to drop its reference when it's 1161 * done accessing the job object. 1162 */ 1163 void pvr_queue_job_push(struct pvr_job *job) 1164 { 1165 struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler); 1166 1167 /* Keep track of the last queued job scheduled fence for combined submit. */ 1168 dma_fence_put(queue->last_queued_job_scheduled_fence); 1169 queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled); 1170 1171 pvr_job_get(job); 1172 drm_sched_entity_push_job(&job->base); 1173 } 1174 1175 static void reg_state_init(void *cpu_ptr, void *priv) 1176 { 1177 struct pvr_queue *queue = priv; 1178 1179 if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) { 1180 struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr; 1181 1182 geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init = 1183 queue->callstack_addr; 1184 } 1185 } 1186 1187 /** 1188 * pvr_queue_create() - Create a queue object. 1189 * @ctx: The context this queue will be attached to. 1190 * @type: The type of jobs being pushed to this queue. 1191 * @args: The arguments passed to the context creation function. 1192 * @fw_ctx_map: CPU mapping of the FW context object. 1193 * 1194 * Create a queue object that will be used to queue and track jobs. 1195 * 1196 * Return: 1197 * * A valid pointer to a pvr_queue object, or 1198 * * An error pointer if the creation/initialization failed. 1199 */ 1200 struct pvr_queue *pvr_queue_create(struct pvr_context *ctx, 1201 enum drm_pvr_job_type type, 1202 struct drm_pvr_ioctl_create_context_args *args, 1203 void *fw_ctx_map) 1204 { 1205 static const struct { 1206 u32 cccb_size; 1207 const char *name; 1208 } props[] = { 1209 [DRM_PVR_JOB_TYPE_GEOMETRY] = { 1210 .cccb_size = CTX_GEOM_CCCB_SIZE_LOG2, 1211 .name = "geometry", 1212 }, 1213 [DRM_PVR_JOB_TYPE_FRAGMENT] = { 1214 .cccb_size = CTX_FRAG_CCCB_SIZE_LOG2, 1215 .name = "fragment" 1216 }, 1217 [DRM_PVR_JOB_TYPE_COMPUTE] = { 1218 .cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2, 1219 .name = "compute" 1220 }, 1221 [DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = { 1222 .cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2, 1223 .name = "transfer_frag" 1224 }, 1225 }; 1226 struct pvr_device *pvr_dev = ctx->pvr_dev; 1227 const struct drm_sched_init_args sched_args = { 1228 .ops = &pvr_queue_sched_ops, 1229 .submit_wq = pvr_dev->sched_wq, 1230 .num_rqs = 1, 1231 .credit_limit = 64 * 1024, 1232 .hang_limit = 1, 1233 .timeout = msecs_to_jiffies(500), 1234 .timeout_wq = pvr_dev->sched_wq, 1235 .name = "pvr-queue", 1236 .dev = pvr_dev->base.dev, 1237 }; 1238 struct drm_gpu_scheduler *sched; 1239 struct pvr_queue *queue; 1240 int ctx_state_size, err; 1241 void *cpu_map; 1242 1243 if (WARN_ON(type >= sizeof(props))) 1244 return ERR_PTR(-EINVAL); 1245 1246 switch (ctx->type) { 1247 case DRM_PVR_CTX_TYPE_RENDER: 1248 if (type != DRM_PVR_JOB_TYPE_GEOMETRY && 1249 type != DRM_PVR_JOB_TYPE_FRAGMENT) 1250 return ERR_PTR(-EINVAL); 1251 break; 1252 case DRM_PVR_CTX_TYPE_COMPUTE: 1253 if (type != DRM_PVR_JOB_TYPE_COMPUTE) 1254 return ERR_PTR(-EINVAL); 1255 break; 1256 case DRM_PVR_CTX_TYPE_TRANSFER_FRAG: 1257 if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG) 1258 return ERR_PTR(-EINVAL); 1259 break; 1260 default: 1261 return ERR_PTR(-EINVAL); 1262 } 1263 1264 ctx_state_size = get_ctx_state_size(pvr_dev, type); 1265 if (ctx_state_size < 0) 1266 return ERR_PTR(ctx_state_size); 1267 1268 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 1269 if (!queue) 1270 return ERR_PTR(-ENOMEM); 1271 1272 queue->type = type; 1273 queue->ctx_offset = get_ctx_offset(type); 1274 queue->ctx = ctx; 1275 queue->callstack_addr = args->callstack_addr; 1276 sched = &queue->scheduler; 1277 INIT_LIST_HEAD(&queue->node); 1278 mutex_init(&queue->cccb_fence_ctx.job_lock); 1279 pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base); 1280 pvr_queue_fence_ctx_init(&queue->job_fence_ctx); 1281 1282 err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name); 1283 if (err) 1284 goto err_free_queue; 1285 1286 err = pvr_fw_object_create(pvr_dev, ctx_state_size, 1287 PVR_BO_FW_FLAGS_DEVICE_UNCACHED, 1288 reg_state_init, queue, &queue->reg_state_obj); 1289 if (err) 1290 goto err_cccb_fini; 1291 1292 init_fw_context(queue, fw_ctx_map); 1293 1294 if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT && 1295 args->callstack_addr) { 1296 err = -EINVAL; 1297 goto err_release_reg_state; 1298 } 1299 1300 cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value), 1301 PVR_BO_FW_FLAGS_DEVICE_UNCACHED, 1302 NULL, NULL, &queue->timeline_ufo.fw_obj); 1303 if (IS_ERR(cpu_map)) { 1304 err = PTR_ERR(cpu_map); 1305 goto err_release_reg_state; 1306 } 1307 1308 queue->timeline_ufo.value = cpu_map; 1309 1310 err = drm_sched_init(&queue->scheduler, &sched_args); 1311 if (err) 1312 goto err_release_ufo; 1313 1314 err = drm_sched_entity_init(&queue->entity, 1315 DRM_SCHED_PRIORITY_KERNEL, 1316 &sched, 1, &ctx->faulty); 1317 if (err) 1318 goto err_sched_fini; 1319 1320 mutex_lock(&pvr_dev->queues.lock); 1321 list_add_tail(&queue->node, &pvr_dev->queues.idle); 1322 mutex_unlock(&pvr_dev->queues.lock); 1323 1324 return queue; 1325 1326 err_sched_fini: 1327 drm_sched_fini(&queue->scheduler); 1328 1329 err_release_ufo: 1330 pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj); 1331 1332 err_release_reg_state: 1333 pvr_fw_object_destroy(queue->reg_state_obj); 1334 1335 err_cccb_fini: 1336 pvr_cccb_fini(&queue->cccb); 1337 1338 err_free_queue: 1339 mutex_destroy(&queue->cccb_fence_ctx.job_lock); 1340 kfree(queue); 1341 1342 return ERR_PTR(err); 1343 } 1344 1345 void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev) 1346 { 1347 struct pvr_queue *queue; 1348 1349 mutex_lock(&pvr_dev->queues.lock); 1350 list_for_each_entry(queue, &pvr_dev->queues.idle, node) 1351 pvr_queue_stop(queue, NULL); 1352 list_for_each_entry(queue, &pvr_dev->queues.active, node) 1353 pvr_queue_stop(queue, NULL); 1354 mutex_unlock(&pvr_dev->queues.lock); 1355 } 1356 1357 void pvr_queue_device_post_reset(struct pvr_device *pvr_dev) 1358 { 1359 struct pvr_queue *queue; 1360 1361 mutex_lock(&pvr_dev->queues.lock); 1362 list_for_each_entry(queue, &pvr_dev->queues.active, node) 1363 pvr_queue_start(queue); 1364 list_for_each_entry(queue, &pvr_dev->queues.idle, node) 1365 pvr_queue_start(queue); 1366 mutex_unlock(&pvr_dev->queues.lock); 1367 } 1368 1369 /** 1370 * pvr_queue_kill() - Kill a queue. 1371 * @queue: The queue to kill. 1372 * 1373 * Kill the queue so no new jobs can be pushed. Should be called when the 1374 * context handle is destroyed. The queue object might last longer if jobs 1375 * are still in flight and holding a reference to the context this queue 1376 * belongs to. 1377 */ 1378 void pvr_queue_kill(struct pvr_queue *queue) 1379 { 1380 drm_sched_entity_destroy(&queue->entity); 1381 dma_fence_put(queue->last_queued_job_scheduled_fence); 1382 queue->last_queued_job_scheduled_fence = NULL; 1383 } 1384 1385 /** 1386 * pvr_queue_destroy() - Destroy a queue. 1387 * @queue: The queue to destroy. 1388 * 1389 * Cleanup the queue and free the resources attached to it. Should be 1390 * called from the context release function. 1391 */ 1392 void pvr_queue_destroy(struct pvr_queue *queue) 1393 { 1394 if (!queue) 1395 return; 1396 1397 mutex_lock(&queue->ctx->pvr_dev->queues.lock); 1398 list_del_init(&queue->node); 1399 mutex_unlock(&queue->ctx->pvr_dev->queues.lock); 1400 1401 drm_sched_fini(&queue->scheduler); 1402 drm_sched_entity_fini(&queue->entity); 1403 1404 if (WARN_ON(queue->last_queued_job_scheduled_fence)) 1405 dma_fence_put(queue->last_queued_job_scheduled_fence); 1406 1407 pvr_queue_cleanup_fw_context(queue); 1408 1409 pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj); 1410 pvr_fw_object_destroy(queue->reg_state_obj); 1411 pvr_cccb_fini(&queue->cccb); 1412 mutex_destroy(&queue->cccb_fence_ctx.job_lock); 1413 kfree(queue); 1414 } 1415 1416 /** 1417 * pvr_queue_device_init() - Device-level initialization of queue related fields. 1418 * @pvr_dev: The device to initialize. 1419 * 1420 * Initializes all fields related to queue management in pvr_device. 1421 * 1422 * Return: 1423 * * 0 on success, or 1424 * * An error code on failure. 1425 */ 1426 int pvr_queue_device_init(struct pvr_device *pvr_dev) 1427 { 1428 int err; 1429 1430 INIT_LIST_HEAD(&pvr_dev->queues.active); 1431 INIT_LIST_HEAD(&pvr_dev->queues.idle); 1432 err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock); 1433 if (err) 1434 return err; 1435 1436 pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0); 1437 if (!pvr_dev->sched_wq) 1438 return -ENOMEM; 1439 1440 return 0; 1441 } 1442 1443 /** 1444 * pvr_queue_device_fini() - Device-level cleanup of queue related fields. 1445 * @pvr_dev: The device to cleanup. 1446 * 1447 * Cleanup/free all queue-related resources attached to a pvr_device object. 1448 */ 1449 void pvr_queue_device_fini(struct pvr_device *pvr_dev) 1450 { 1451 destroy_workqueue(pvr_dev->sched_wq); 1452 } 1453