1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ 3 /* Copyright 2019 Collabora ltd. */ 4 #include <linux/delay.h> 5 #include <linux/interrupt.h> 6 #include <linux/io.h> 7 #include <linux/iopoll.h> 8 #include <linux/platform_device.h> 9 #include <linux/pm_runtime.h> 10 #include <linux/dma-resv.h> 11 #include <drm/gpu_scheduler.h> 12 #include <drm/panfrost_drm.h> 13 14 #include "panfrost_device.h" 15 #include "panfrost_devfreq.h" 16 #include "panfrost_job.h" 17 #include "panfrost_features.h" 18 #include "panfrost_issues.h" 19 #include "panfrost_gem.h" 20 #include "panfrost_regs.h" 21 #include "panfrost_gpu.h" 22 #include "panfrost_mmu.h" 23 #include "panfrost_dump.h" 24 25 #define MAX_JM_CTX_PER_FILE 64 26 #define JOB_TIMEOUT_MS 500 27 28 #define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) 29 #define job_read(dev, reg) readl(dev->iomem + (reg)) 30 31 const char * const panfrost_engine_names[] = { 32 "fragment", "vertex-tiler", "compute-only" 33 }; 34 35 struct panfrost_queue_state { 36 struct drm_gpu_scheduler sched; 37 u64 fence_context; 38 u64 emit_seqno; 39 }; 40 41 struct panfrost_job_slot { 42 struct panfrost_queue_state queue[NUM_JOB_SLOTS]; 43 spinlock_t job_lock; 44 int irq; 45 }; 46 47 static struct panfrost_job * 48 to_panfrost_job(struct drm_sched_job *sched_job) 49 { 50 return container_of(sched_job, struct panfrost_job, base); 51 } 52 53 struct panfrost_fence { 54 struct dma_fence base; 55 struct drm_device *dev; 56 /* panfrost seqno for signaled() test */ 57 u64 seqno; 58 int queue; 59 }; 60 61 static inline struct panfrost_fence * 62 to_panfrost_fence(struct dma_fence *fence) 63 { 64 return (struct panfrost_fence *)fence; 65 } 66 67 static const char *panfrost_fence_get_driver_name(struct dma_fence *fence) 68 { 69 return "panfrost"; 70 } 71 72 static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence) 73 { 74 struct panfrost_fence *f = to_panfrost_fence(fence); 75 76 switch (f->queue) { 77 case 0: 78 return "panfrost-js-0"; 79 case 1: 80 return "panfrost-js-1"; 81 case 2: 82 return "panfrost-js-2"; 83 default: 84 return NULL; 85 } 86 } 87 88 static const struct dma_fence_ops panfrost_fence_ops = { 89 .get_driver_name = panfrost_fence_get_driver_name, 90 .get_timeline_name = panfrost_fence_get_timeline_name, 91 }; 92 93 static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num) 94 { 95 struct panfrost_fence *fence; 96 struct panfrost_job_slot *js = pfdev->js; 97 98 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 99 if (!fence) 100 return ERR_PTR(-ENOMEM); 101 102 fence->dev = &pfdev->base; 103 fence->queue = js_num; 104 fence->seqno = ++js->queue[js_num].emit_seqno; 105 dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock, 106 js->queue[js_num].fence_context, fence->seqno); 107 108 return &fence->base; 109 } 110 111 int panfrost_job_get_slot(struct panfrost_job *job) 112 { 113 /* JS0: fragment jobs. 114 * JS1: vertex/tiler jobs 115 * JS2: compute jobs 116 */ 117 if (job->requirements & PANFROST_JD_REQ_FS) 118 return 0; 119 120 /* Not exposed to userspace yet */ 121 #if 0 122 if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) { 123 if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) && 124 (job->pfdev->features.nr_core_groups == 2)) 125 return 2; 126 if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987)) 127 return 2; 128 } 129 #endif 130 return 1; 131 } 132 133 static void panfrost_job_write_affinity(struct panfrost_device *pfdev, 134 u32 requirements, 135 int js) 136 { 137 u64 affinity; 138 139 /* 140 * Use all cores for now. 141 * Eventually we may need to support tiler only jobs and h/w with 142 * multiple (2) coherent core groups 143 */ 144 affinity = pfdev->features.shader_present; 145 146 job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity)); 147 job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity)); 148 } 149 150 static u32 151 panfrost_get_job_chain_flag(const struct panfrost_job *job) 152 { 153 struct panfrost_fence *f = to_panfrost_fence(job->done_fence); 154 155 if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) 156 return 0; 157 158 return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0; 159 } 160 161 static struct panfrost_job * 162 panfrost_dequeue_job(struct panfrost_device *pfdev, int slot) 163 { 164 struct panfrost_job *job = pfdev->jobs[slot][0]; 165 166 WARN_ON(!job); 167 168 if (job->is_profiled && job->engine_usage) { 169 job->engine_usage->elapsed_ns[slot] += 170 ktime_to_ns(ktime_sub(ktime_get(), job->start_time)); 171 job->engine_usage->cycles[slot] += 172 panfrost_cycle_counter_read(pfdev) - job->start_cycles; 173 } 174 175 if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT || job->is_profiled) 176 panfrost_cycle_counter_put(pfdev); 177 178 pfdev->jobs[slot][0] = pfdev->jobs[slot][1]; 179 pfdev->jobs[slot][1] = NULL; 180 181 return job; 182 } 183 184 static unsigned int 185 panfrost_enqueue_job(struct panfrost_device *pfdev, int slot, 186 struct panfrost_job *job) 187 { 188 if (WARN_ON(!job)) 189 return 0; 190 191 if (!pfdev->jobs[slot][0]) { 192 pfdev->jobs[slot][0] = job; 193 return 0; 194 } 195 196 WARN_ON(pfdev->jobs[slot][1]); 197 pfdev->jobs[slot][1] = job; 198 WARN_ON(panfrost_get_job_chain_flag(job) == 199 panfrost_get_job_chain_flag(pfdev->jobs[slot][0])); 200 return 1; 201 } 202 203 static int panfrost_job_hw_submit(struct panfrost_job *job, int js) 204 { 205 struct panfrost_device *pfdev = job->pfdev; 206 unsigned int subslot; 207 u32 cfg; 208 u64 jc_head = job->jc; 209 int ret; 210 211 ret = pm_runtime_get_sync(pfdev->base.dev); 212 if (ret < 0) 213 goto err_hwsubmit; 214 215 if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) { 216 ret = -EINVAL; 217 goto err_hwsubmit; 218 } 219 220 ret = panfrost_mmu_as_get(pfdev, job->mmu); 221 if (ret < 0) 222 goto err_hwsubmit; 223 224 cfg = ret; 225 226 panfrost_devfreq_record_busy(&pfdev->pfdevfreq); 227 228 job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head)); 229 job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head)); 230 231 panfrost_job_write_affinity(pfdev, job->requirements, js); 232 233 /* start MMU, medium priority, cache clean/flush on end, clean/flush on 234 * start */ 235 cfg |= JS_CONFIG_THREAD_PRI(8) | 236 JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | 237 JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE | 238 panfrost_get_job_chain_flag(job); 239 240 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) 241 cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; 242 243 if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649)) 244 cfg |= JS_CONFIG_START_MMU; 245 246 job_write(pfdev, JS_CONFIG_NEXT(js), cfg); 247 248 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) 249 job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); 250 251 /* GO ! */ 252 253 spin_lock(&pfdev->js->job_lock); 254 subslot = panfrost_enqueue_job(pfdev, js, job); 255 /* Don't queue the job if a reset is in progress */ 256 if (!atomic_read(&pfdev->reset.pending)) { 257 job->is_profiled = pfdev->profile_mode; 258 259 if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT || 260 job->is_profiled) 261 panfrost_cycle_counter_get(pfdev); 262 263 if (job->is_profiled) { 264 job->start_time = ktime_get(); 265 job->start_cycles = panfrost_cycle_counter_read(pfdev); 266 } 267 268 job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); 269 dev_dbg(pfdev->base.dev, 270 "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d", 271 job, js, subslot, jc_head, cfg & 0xf); 272 } 273 spin_unlock(&pfdev->js->job_lock); 274 275 return 0; 276 277 err_hwsubmit: 278 pm_runtime_put_autosuspend(pfdev->base.dev); 279 return ret; 280 } 281 282 static int panfrost_acquire_object_fences(struct drm_gem_object **bos, 283 int bo_count, 284 struct drm_sched_job *job) 285 { 286 int i, ret; 287 288 for (i = 0; i < bo_count; i++) { 289 ret = dma_resv_reserve_fences(bos[i]->resv, 1); 290 if (ret) 291 return ret; 292 293 /* panfrost always uses write mode in its current uapi */ 294 ret = drm_sched_job_add_implicit_dependencies(job, bos[i], 295 true); 296 if (ret) 297 return ret; 298 } 299 300 return 0; 301 } 302 303 static void panfrost_attach_object_fences(struct drm_gem_object **bos, 304 int bo_count, 305 struct dma_fence *fence) 306 { 307 int i; 308 309 for (i = 0; i < bo_count; i++) 310 dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); 311 } 312 313 int panfrost_job_push(struct panfrost_job *job) 314 { 315 struct panfrost_device *pfdev = job->pfdev; 316 struct ww_acquire_ctx acquire_ctx; 317 int ret = 0; 318 319 ret = drm_gem_lock_reservations(job->bos, job->bo_count, 320 &acquire_ctx); 321 if (ret) 322 return ret; 323 324 mutex_lock(&pfdev->sched_lock); 325 drm_sched_job_arm(&job->base); 326 327 job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); 328 329 ret = panfrost_acquire_object_fences(job->bos, job->bo_count, 330 &job->base); 331 if (ret) { 332 mutex_unlock(&pfdev->sched_lock); 333 goto unlock; 334 } 335 336 kref_get(&job->refcount); /* put by scheduler job completion */ 337 338 drm_sched_entity_push_job(&job->base); 339 340 mutex_unlock(&pfdev->sched_lock); 341 342 panfrost_attach_object_fences(job->bos, job->bo_count, 343 job->render_done_fence); 344 345 unlock: 346 drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx); 347 348 return ret; 349 } 350 351 static void panfrost_job_cleanup(struct kref *ref) 352 { 353 struct panfrost_job *job = container_of(ref, struct panfrost_job, 354 refcount); 355 unsigned int i; 356 357 dma_fence_put(job->done_fence); 358 dma_fence_put(job->render_done_fence); 359 360 if (job->mappings) { 361 for (i = 0; i < job->bo_count; i++) { 362 if (!job->mappings[i]) 363 break; 364 365 atomic_dec(&job->mappings[i]->obj->gpu_usecount); 366 panfrost_gem_mapping_put(job->mappings[i]); 367 } 368 kvfree(job->mappings); 369 } 370 371 if (job->bos) { 372 for (i = 0; i < job->bo_count; i++) 373 drm_gem_object_put(job->bos[i]); 374 375 kvfree(job->bos); 376 } 377 378 panfrost_jm_ctx_put(job->ctx); 379 kfree(job); 380 } 381 382 void panfrost_job_put(struct panfrost_job *job) 383 { 384 kref_put(&job->refcount, panfrost_job_cleanup); 385 } 386 387 static void panfrost_job_free(struct drm_sched_job *sched_job) 388 { 389 struct panfrost_job *job = to_panfrost_job(sched_job); 390 391 drm_sched_job_cleanup(sched_job); 392 393 panfrost_job_put(job); 394 } 395 396 static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) 397 { 398 struct panfrost_job *job = to_panfrost_job(sched_job); 399 struct panfrost_device *pfdev = job->pfdev; 400 int slot = panfrost_job_get_slot(job); 401 struct dma_fence *fence = NULL; 402 int ret; 403 404 if (job->ctx->destroyed) 405 return ERR_PTR(-ECANCELED); 406 407 if (unlikely(job->base.s_fence->finished.error)) 408 return NULL; 409 410 /* Nothing to execute: can happen if the job has finished while 411 * we were resetting the GPU. 412 */ 413 if (!job->jc) 414 return NULL; 415 416 fence = panfrost_fence_create(pfdev, slot); 417 if (IS_ERR(fence)) 418 return fence; 419 420 if (job->done_fence) 421 dma_fence_put(job->done_fence); 422 job->done_fence = dma_fence_get(fence); 423 424 ret = panfrost_job_hw_submit(job, slot); 425 if (ret) { 426 dma_fence_put(fence); 427 return ERR_PTR(ret); 428 } 429 430 return fence; 431 } 432 433 void panfrost_jm_reset_interrupts(struct panfrost_device *pfdev) 434 { 435 job_write(pfdev, JOB_INT_CLEAR, ALL_JS_INT_MASK); 436 } 437 438 void panfrost_jm_enable_interrupts(struct panfrost_device *pfdev) 439 { 440 clear_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended); 441 job_write(pfdev, JOB_INT_MASK, ALL_JS_INT_MASK); 442 } 443 444 void panfrost_jm_suspend_irq(struct panfrost_device *pfdev) 445 { 446 set_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended); 447 448 job_write(pfdev, JOB_INT_MASK, 0); 449 synchronize_irq(pfdev->js->irq); 450 } 451 452 static void panfrost_job_handle_err(struct panfrost_device *pfdev, 453 struct panfrost_job *job, 454 unsigned int js) 455 { 456 u32 js_status = job_read(pfdev, JS_STATUS(js)); 457 const char *exception_name = panfrost_exception_name(js_status); 458 bool signal_fence = true; 459 460 if (!panfrost_exception_is_fault(js_status)) { 461 dev_dbg(pfdev->base.dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x", 462 js, exception_name, 463 job_read(pfdev, JS_HEAD_LO(js)), 464 job_read(pfdev, JS_TAIL_LO(js))); 465 } else { 466 dev_err(pfdev->base.dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", 467 js, exception_name, 468 job_read(pfdev, JS_HEAD_LO(js)), 469 job_read(pfdev, JS_TAIL_LO(js))); 470 } 471 472 if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) { 473 /* Update the job head so we can resume */ 474 job->jc = job_read(pfdev, JS_TAIL_LO(js)) | 475 ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32); 476 477 /* The job will be resumed, don't signal the fence */ 478 signal_fence = false; 479 } else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) { 480 /* Job has been hard-stopped, flag it as canceled */ 481 dma_fence_set_error(job->done_fence, -ECANCELED); 482 job->jc = 0; 483 } else if (panfrost_exception_is_fault(js_status)) { 484 /* We might want to provide finer-grained error code based on 485 * the exception type, but unconditionally setting to EINVAL 486 * is good enough for now. 487 */ 488 dma_fence_set_error(job->done_fence, -EINVAL); 489 job->jc = 0; 490 } 491 492 panfrost_mmu_as_put(pfdev, job->mmu); 493 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 494 495 if (signal_fence) 496 dma_fence_signal_locked(job->done_fence); 497 498 pm_runtime_put_autosuspend(pfdev->base.dev); 499 500 if (panfrost_exception_needs_reset(pfdev, js_status)) { 501 atomic_set(&pfdev->reset.pending, 1); 502 drm_sched_fault(&pfdev->js->queue[js].sched); 503 } 504 } 505 506 static void panfrost_jm_handle_done(struct panfrost_device *pfdev, 507 struct panfrost_job *job) 508 { 509 /* Set ->jc to 0 to avoid re-submitting an already finished job (can 510 * happen when we receive the DONE interrupt while doing a GPU reset). 511 */ 512 job->jc = 0; 513 panfrost_mmu_as_put(pfdev, job->mmu); 514 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 515 516 dma_fence_signal_locked(job->done_fence); 517 pm_runtime_put_autosuspend(pfdev->base.dev); 518 } 519 520 static void panfrost_jm_handle_irq(struct panfrost_device *pfdev, u32 status) 521 { 522 struct panfrost_job *done[NUM_JOB_SLOTS][2] = {}; 523 struct panfrost_job *failed[NUM_JOB_SLOTS] = {}; 524 u32 js_state = 0, js_events = 0; 525 unsigned int i, j; 526 527 /* First we collect all failed/done jobs. */ 528 while (status) { 529 u32 js_state_mask = 0; 530 531 for (j = 0; j < NUM_JOB_SLOTS; j++) { 532 if (status & MK_JS_MASK(j)) 533 js_state_mask |= MK_JS_MASK(j); 534 535 if (status & JOB_INT_MASK_DONE(j)) { 536 if (done[j][0]) 537 done[j][1] = panfrost_dequeue_job(pfdev, j); 538 else 539 done[j][0] = panfrost_dequeue_job(pfdev, j); 540 } 541 542 if (status & JOB_INT_MASK_ERR(j)) { 543 /* Cancel the next submission. Will be submitted 544 * after we're done handling this failure if 545 * there's no reset pending. 546 */ 547 job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); 548 failed[j] = panfrost_dequeue_job(pfdev, j); 549 } 550 } 551 552 /* JS_STATE is sampled when JOB_INT_CLEAR is written. 553 * For each BIT(slot) or BIT(slot + 16) bit written to 554 * JOB_INT_CLEAR, the corresponding bits in JS_STATE 555 * (BIT(slot) and BIT(slot + 16)) are updated, but this 556 * is racy. If we only have one job done at the time we 557 * read JOB_INT_RAWSTAT but the second job fails before we 558 * clear the status, we end up with a status containing 559 * only the DONE bit and consider both jobs as DONE since 560 * JS_STATE reports both NEXT and CURRENT as inactive. 561 * To prevent that, let's repeat this clear+read steps 562 * until status is 0. 563 */ 564 job_write(pfdev, JOB_INT_CLEAR, status); 565 js_state &= ~js_state_mask; 566 js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask; 567 js_events |= status; 568 status = job_read(pfdev, JOB_INT_RAWSTAT); 569 } 570 571 /* Then we handle the dequeued jobs. */ 572 for (j = 0; j < NUM_JOB_SLOTS; j++) { 573 if (!(js_events & MK_JS_MASK(j))) 574 continue; 575 576 if (failed[j]) { 577 panfrost_job_handle_err(pfdev, failed[j], j); 578 } else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) { 579 /* When the current job doesn't fail, the JM dequeues 580 * the next job without waiting for an ACK, this means 581 * we can have 2 jobs dequeued and only catch the 582 * interrupt when the second one is done. If both slots 583 * are inactive, but one job remains in pfdev->jobs[j], 584 * consider it done. Of course that doesn't apply if a 585 * failure happened since we cancelled execution of the 586 * job in _NEXT (see above). 587 */ 588 if (WARN_ON(!done[j][0])) 589 done[j][0] = panfrost_dequeue_job(pfdev, j); 590 else 591 done[j][1] = panfrost_dequeue_job(pfdev, j); 592 } 593 594 for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++) 595 panfrost_jm_handle_done(pfdev, done[j][i]); 596 } 597 598 /* And finally we requeue jobs that were waiting in the second slot 599 * and have been stopped if we detected a failure on the first slot. 600 */ 601 for (j = 0; j < NUM_JOB_SLOTS; j++) { 602 if (!(js_events & MK_JS_MASK(j))) 603 continue; 604 605 if (!failed[j] || !pfdev->jobs[j][0]) 606 continue; 607 608 if (pfdev->jobs[j][0]->jc == 0) { 609 /* The job was cancelled, signal the fence now */ 610 struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j); 611 612 dma_fence_set_error(canceled->done_fence, -ECANCELED); 613 panfrost_jm_handle_done(pfdev, canceled); 614 } else if (!atomic_read(&pfdev->reset.pending)) { 615 /* Requeue the job we removed if no reset is pending */ 616 job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START); 617 } 618 } 619 } 620 621 static void panfrost_jm_handle_irqs(struct panfrost_device *pfdev) 622 { 623 u32 status = job_read(pfdev, JOB_INT_RAWSTAT); 624 625 while (status) { 626 pm_runtime_mark_last_busy(pfdev->base.dev); 627 628 spin_lock(&pfdev->js->job_lock); 629 panfrost_jm_handle_irq(pfdev, status); 630 spin_unlock(&pfdev->js->job_lock); 631 status = job_read(pfdev, JOB_INT_RAWSTAT); 632 } 633 } 634 635 static u32 panfrost_active_slots(struct panfrost_device *pfdev, 636 u32 *js_state_mask, u32 js_state) 637 { 638 u32 rawstat; 639 640 if (!(js_state & *js_state_mask)) 641 return 0; 642 643 rawstat = job_read(pfdev, JOB_INT_RAWSTAT); 644 if (rawstat) { 645 unsigned int i; 646 647 for (i = 0; i < NUM_JOB_SLOTS; i++) { 648 if (rawstat & MK_JS_MASK(i)) 649 *js_state_mask &= ~MK_JS_MASK(i); 650 } 651 } 652 653 return js_state & *js_state_mask; 654 } 655 656 static void 657 panfrost_reset(struct panfrost_device *pfdev, 658 struct drm_sched_job *bad) 659 { 660 u32 js_state, js_state_mask = 0xffffffff; 661 unsigned int i, j; 662 bool cookie; 663 int ret; 664 665 if (!atomic_read(&pfdev->reset.pending)) 666 return; 667 668 /* Stop the schedulers. 669 * 670 * FIXME: We temporarily get out of the dma_fence_signalling section 671 * because the cleanup path generate lockdep splats when taking locks 672 * to release job resources. We should rework the code to follow this 673 * pattern: 674 * 675 * try_lock 676 * if (locked) 677 * release 678 * else 679 * schedule_work_to_release_later 680 */ 681 for (i = 0; i < NUM_JOB_SLOTS; i++) 682 drm_sched_stop(&pfdev->js->queue[i].sched, bad); 683 684 cookie = dma_fence_begin_signalling(); 685 686 if (bad) 687 drm_sched_increase_karma(bad); 688 689 /* Mask job interrupts and synchronize to make sure we won't be 690 * interrupted during our reset. 691 */ 692 job_write(pfdev, JOB_INT_MASK, 0); 693 synchronize_irq(pfdev->js->irq); 694 695 for (i = 0; i < NUM_JOB_SLOTS; i++) { 696 /* Cancel the next job and soft-stop the running job. */ 697 job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); 698 job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP); 699 } 700 701 /* Wait at most 10ms for soft-stops to complete */ 702 ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state, 703 !panfrost_active_slots(pfdev, &js_state_mask, js_state), 704 10, 10000); 705 706 if (ret) 707 dev_err(pfdev->base.dev, "Soft-stop failed\n"); 708 709 /* Handle the remaining interrupts before we reset. */ 710 panfrost_jm_handle_irqs(pfdev); 711 712 /* Remaining interrupts have been handled, but we might still have 713 * stuck jobs. Let's make sure the PM counters stay balanced by 714 * manually calling pm_runtime_put_noidle() and 715 * panfrost_devfreq_record_idle() for each stuck job. 716 * Let's also make sure the cycle counting register's refcnt is 717 * kept balanced to prevent it from running forever 718 */ 719 spin_lock(&pfdev->js->job_lock); 720 for (i = 0; i < NUM_JOB_SLOTS; i++) { 721 for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) { 722 if (pfdev->jobs[i][j]->requirements & PANFROST_JD_REQ_CYCLE_COUNT || 723 pfdev->jobs[i][j]->is_profiled) 724 panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev); 725 pm_runtime_put_noidle(pfdev->base.dev); 726 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 727 } 728 } 729 memset(pfdev->jobs, 0, sizeof(pfdev->jobs)); 730 spin_unlock(&pfdev->js->job_lock); 731 732 /* Proceed with reset now. */ 733 panfrost_device_reset(pfdev, false); 734 735 /* GPU has been reset, we can clear the reset pending bit. */ 736 atomic_set(&pfdev->reset.pending, 0); 737 738 /* Now resubmit jobs that were previously queued but didn't have a 739 * chance to finish. 740 * FIXME: We temporarily get out of the DMA fence signalling section 741 * while resubmitting jobs because the job submission logic will 742 * allocate memory with the GFP_KERNEL flag which can trigger memory 743 * reclaim and exposes a lock ordering issue. 744 */ 745 dma_fence_end_signalling(cookie); 746 for (i = 0; i < NUM_JOB_SLOTS; i++) 747 drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched); 748 cookie = dma_fence_begin_signalling(); 749 750 /* Restart the schedulers */ 751 for (i = 0; i < NUM_JOB_SLOTS; i++) 752 drm_sched_start(&pfdev->js->queue[i].sched, 0); 753 754 /* Re-enable job interrupts now that everything has been restarted. */ 755 panfrost_jm_enable_interrupts(pfdev); 756 757 dma_fence_end_signalling(cookie); 758 } 759 760 static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job 761 *sched_job) 762 { 763 struct panfrost_job *job = to_panfrost_job(sched_job); 764 struct panfrost_device *pfdev = job->pfdev; 765 int js = panfrost_job_get_slot(job); 766 767 /* 768 * If the GPU managed to complete this jobs fence, the timeout has 769 * fired before free-job worker. The timeout is spurious, so bail out. 770 */ 771 if (dma_fence_is_signaled(job->done_fence)) 772 return DRM_GPU_SCHED_STAT_NO_HANG; 773 774 /* 775 * Panfrost IRQ handler may take a long time to process an interrupt 776 * if there is another IRQ handler hogging the processing. 777 * For example, the HDMI encoder driver might be stuck in the IRQ 778 * handler for a significant time in a case of bad cable connection. 779 * In order to catch such cases and not report spurious Panfrost 780 * job timeouts, synchronize the IRQ handler and re-check the fence 781 * status. 782 */ 783 synchronize_irq(pfdev->js->irq); 784 785 if (dma_fence_is_signaled(job->done_fence)) { 786 dev_warn(pfdev->base.dev, "unexpectedly high interrupt latency\n"); 787 return DRM_GPU_SCHED_STAT_NO_HANG; 788 } 789 790 dev_err(pfdev->base.dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", 791 js, 792 job_read(pfdev, JS_CONFIG(js)), 793 job_read(pfdev, JS_STATUS(js)), 794 job_read(pfdev, JS_HEAD_LO(js)), 795 job_read(pfdev, JS_TAIL_LO(js)), 796 sched_job); 797 798 panfrost_core_dump(job); 799 800 atomic_set(&pfdev->reset.pending, 1); 801 panfrost_reset(pfdev, sched_job); 802 803 return DRM_GPU_SCHED_STAT_RESET; 804 } 805 806 static void panfrost_reset_work(struct work_struct *work) 807 { 808 struct panfrost_device *pfdev; 809 810 pfdev = container_of(work, struct panfrost_device, reset.work); 811 panfrost_reset(pfdev, NULL); 812 } 813 814 static const struct drm_sched_backend_ops panfrost_sched_ops = { 815 .run_job = panfrost_job_run, 816 .timedout_job = panfrost_job_timedout, 817 .free_job = panfrost_job_free 818 }; 819 820 static irqreturn_t panfrost_jm_irq_handler_thread(int irq, void *data) 821 { 822 struct panfrost_device *pfdev = data; 823 824 panfrost_jm_handle_irqs(pfdev); 825 826 /* Enable interrupts only if we're not about to get suspended */ 827 if (!test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended)) 828 job_write(pfdev, JOB_INT_MASK, ALL_JS_INT_MASK); 829 830 return IRQ_HANDLED; 831 } 832 833 static irqreturn_t panfrost_jm_irq_handler(int irq, void *data) 834 { 835 struct panfrost_device *pfdev = data; 836 u32 status; 837 838 if (test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended)) 839 return IRQ_NONE; 840 841 status = job_read(pfdev, JOB_INT_STAT); 842 if (!status) 843 return IRQ_NONE; 844 845 job_write(pfdev, JOB_INT_MASK, 0); 846 return IRQ_WAKE_THREAD; 847 } 848 849 int panfrost_jm_init(struct panfrost_device *pfdev) 850 { 851 struct drm_sched_init_args args = { 852 .ops = &panfrost_sched_ops, 853 .num_rqs = DRM_SCHED_PRIORITY_COUNT, 854 .credit_limit = 2, 855 .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), 856 .dev = pfdev->base.dev, 857 }; 858 struct panfrost_job_slot *js; 859 int ret, j; 860 861 BUILD_BUG_ON(ARRAY_SIZE(panfrost_engine_names) != NUM_JOB_SLOTS); 862 863 /* All GPUs have two entries per queue, but without jobchain 864 * disambiguation stopping the right job in the close path is tricky, 865 * so let's just advertise one entry in that case. 866 */ 867 if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) 868 args.credit_limit = 1; 869 870 js = devm_kzalloc(pfdev->base.dev, sizeof(*js), GFP_KERNEL); 871 if (!js) 872 return -ENOMEM; 873 pfdev->js = js; 874 875 INIT_WORK(&pfdev->reset.work, panfrost_reset_work); 876 spin_lock_init(&js->job_lock); 877 878 js->irq = platform_get_irq_byname(to_platform_device(pfdev->base.dev), "job"); 879 if (js->irq < 0) 880 return js->irq; 881 882 ret = devm_request_threaded_irq(pfdev->base.dev, js->irq, 883 panfrost_jm_irq_handler, 884 panfrost_jm_irq_handler_thread, 885 IRQF_SHARED, KBUILD_MODNAME "-job", 886 pfdev); 887 if (ret) { 888 dev_err(pfdev->base.dev, "failed to request job irq"); 889 return ret; 890 } 891 892 pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0); 893 if (!pfdev->reset.wq) 894 return -ENOMEM; 895 args.timeout_wq = pfdev->reset.wq; 896 897 for (j = 0; j < NUM_JOB_SLOTS; j++) { 898 js->queue[j].fence_context = dma_fence_context_alloc(1); 899 args.name = panfrost_engine_names[j]; 900 901 ret = drm_sched_init(&js->queue[j].sched, &args); 902 if (ret) { 903 dev_err(pfdev->base.dev, "Failed to create scheduler: %d.", ret); 904 goto err_sched; 905 } 906 } 907 908 panfrost_jm_reset_interrupts(pfdev); 909 panfrost_jm_enable_interrupts(pfdev); 910 911 return 0; 912 913 err_sched: 914 for (j--; j >= 0; j--) 915 drm_sched_fini(&js->queue[j].sched); 916 917 destroy_workqueue(pfdev->reset.wq); 918 return ret; 919 } 920 921 void panfrost_jm_fini(struct panfrost_device *pfdev) 922 { 923 struct panfrost_job_slot *js = pfdev->js; 924 int j; 925 926 job_write(pfdev, JOB_INT_MASK, 0); 927 928 for (j = 0; j < NUM_JOB_SLOTS; j++) { 929 drm_sched_fini(&js->queue[j].sched); 930 } 931 932 cancel_work_sync(&pfdev->reset.work); 933 destroy_workqueue(pfdev->reset.wq); 934 } 935 936 int panfrost_jm_open(struct drm_file *file) 937 { 938 struct panfrost_file_priv *panfrost_priv = file->driver_priv; 939 int ret; 940 941 struct drm_panfrost_jm_ctx_create default_jm_ctx = { 942 .priority = PANFROST_JM_CTX_PRIORITY_MEDIUM, 943 }; 944 945 xa_init_flags(&panfrost_priv->jm_ctxs, XA_FLAGS_ALLOC); 946 947 ret = panfrost_jm_ctx_create(file, &default_jm_ctx); 948 if (ret) 949 return ret; 950 951 /* We expect the default context to be assigned handle 0. */ 952 if (WARN_ON(default_jm_ctx.handle)) 953 return -EINVAL; 954 955 return 0; 956 } 957 958 void panfrost_jm_close(struct drm_file *file) 959 { 960 struct panfrost_file_priv *panfrost_priv = file->driver_priv; 961 struct panfrost_jm_ctx *jm_ctx; 962 unsigned long i; 963 964 xa_for_each(&panfrost_priv->jm_ctxs, i, jm_ctx) 965 panfrost_jm_ctx_destroy(file, i); 966 967 xa_destroy(&panfrost_priv->jm_ctxs); 968 } 969 970 int panfrost_jm_is_idle(struct panfrost_device *pfdev) 971 { 972 struct panfrost_job_slot *js = pfdev->js; 973 int i; 974 975 for (i = 0; i < NUM_JOB_SLOTS; i++) { 976 /* If there are any jobs in the HW queue, we're not idle */ 977 if (atomic_read(&js->queue[i].sched.credit_count)) 978 return false; 979 } 980 981 return true; 982 } 983 984 static void panfrost_jm_ctx_release(struct kref *kref) 985 { 986 struct panfrost_jm_ctx *jm_ctx = container_of(kref, struct panfrost_jm_ctx, refcnt); 987 988 WARN_ON(!jm_ctx->destroyed); 989 990 for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slot_entity); i++) 991 drm_sched_entity_destroy(&jm_ctx->slot_entity[i]); 992 993 kfree(jm_ctx); 994 } 995 996 void 997 panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx) 998 { 999 if (jm_ctx) 1000 kref_put(&jm_ctx->refcnt, panfrost_jm_ctx_release); 1001 } 1002 1003 struct panfrost_jm_ctx * 1004 panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx) 1005 { 1006 if (jm_ctx) 1007 kref_get(&jm_ctx->refcnt); 1008 1009 return jm_ctx; 1010 } 1011 1012 struct panfrost_jm_ctx * 1013 panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle) 1014 { 1015 struct panfrost_file_priv *priv = file->driver_priv; 1016 struct panfrost_jm_ctx *jm_ctx; 1017 1018 xa_lock(&priv->jm_ctxs); 1019 jm_ctx = panfrost_jm_ctx_get(xa_load(&priv->jm_ctxs, handle)); 1020 xa_unlock(&priv->jm_ctxs); 1021 1022 return jm_ctx; 1023 } 1024 1025 static int jm_ctx_prio_to_drm_sched_prio(struct drm_file *file, 1026 enum drm_panfrost_jm_ctx_priority in, 1027 enum drm_sched_priority *out) 1028 { 1029 switch (in) { 1030 case PANFROST_JM_CTX_PRIORITY_LOW: 1031 *out = DRM_SCHED_PRIORITY_LOW; 1032 return 0; 1033 case PANFROST_JM_CTX_PRIORITY_MEDIUM: 1034 *out = DRM_SCHED_PRIORITY_NORMAL; 1035 return 0; 1036 case PANFROST_JM_CTX_PRIORITY_HIGH: 1037 if (!panfrost_high_prio_allowed(file)) 1038 return -EACCES; 1039 1040 *out = DRM_SCHED_PRIORITY_HIGH; 1041 return 0; 1042 default: 1043 return -EINVAL; 1044 } 1045 } 1046 1047 int panfrost_jm_ctx_create(struct drm_file *file, 1048 struct drm_panfrost_jm_ctx_create *args) 1049 { 1050 struct panfrost_file_priv *priv = file->driver_priv; 1051 struct panfrost_device *pfdev = priv->pfdev; 1052 enum drm_sched_priority sched_prio; 1053 struct panfrost_jm_ctx *jm_ctx; 1054 int ret; 1055 1056 jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL); 1057 if (!jm_ctx) 1058 return -ENOMEM; 1059 1060 kref_init(&jm_ctx->refcnt); 1061 1062 ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio); 1063 if (ret) 1064 goto err_put_jm_ctx; 1065 1066 for (u32 i = 0; i < NUM_JOB_SLOTS; i++) { 1067 struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched; 1068 1069 ret = drm_sched_entity_init(&jm_ctx->slot_entity[i], sched_prio, 1070 &sched, 1, NULL); 1071 if (ret) 1072 goto err_put_jm_ctx; 1073 } 1074 1075 ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx, 1076 XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL); 1077 if (ret) 1078 goto err_put_jm_ctx; 1079 1080 return 0; 1081 1082 err_put_jm_ctx: 1083 jm_ctx->destroyed = true; 1084 panfrost_jm_ctx_put(jm_ctx); 1085 return ret; 1086 } 1087 1088 int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle) 1089 { 1090 struct panfrost_file_priv *priv = file->driver_priv; 1091 struct panfrost_device *pfdev = priv->pfdev; 1092 struct panfrost_jm_ctx *jm_ctx; 1093 1094 jm_ctx = xa_erase(&priv->jm_ctxs, handle); 1095 if (!jm_ctx) 1096 return -EINVAL; 1097 1098 jm_ctx->destroyed = true; 1099 1100 /* Kill in-flight jobs */ 1101 spin_lock(&pfdev->js->job_lock); 1102 for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slot_entity); i++) { 1103 struct drm_sched_entity *entity = &jm_ctx->slot_entity[i]; 1104 1105 for (int j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) { 1106 struct panfrost_job *job = pfdev->jobs[i][j]; 1107 u32 cmd; 1108 1109 if (!job || job->base.entity != entity) 1110 continue; 1111 1112 if (j == 1) { 1113 /* Try to cancel the job before it starts */ 1114 job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); 1115 /* Reset the job head so it doesn't get restarted if 1116 * the job in the first slot failed. 1117 */ 1118 job->jc = 0; 1119 } 1120 1121 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { 1122 cmd = panfrost_get_job_chain_flag(job) ? 1123 JS_COMMAND_HARD_STOP_1 : 1124 JS_COMMAND_HARD_STOP_0; 1125 } else { 1126 cmd = JS_COMMAND_HARD_STOP; 1127 } 1128 1129 job_write(pfdev, JS_COMMAND(i), cmd); 1130 1131 /* Jobs can outlive their file context */ 1132 job->engine_usage = NULL; 1133 } 1134 } 1135 spin_unlock(&pfdev->js->job_lock); 1136 1137 panfrost_jm_ctx_put(jm_ctx); 1138 return 0; 1139 } 1140