1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ 3 /* Copyright 2019 Collabora ltd. */ 4 #include <linux/delay.h> 5 #include <linux/interrupt.h> 6 #include <linux/io.h> 7 #include <linux/iopoll.h> 8 #include <linux/platform_device.h> 9 #include <linux/pm_runtime.h> 10 #include <linux/dma-resv.h> 11 #include <drm/gpu_scheduler.h> 12 #include <drm/panfrost_drm.h> 13 14 #include "panfrost_device.h" 15 #include "panfrost_devfreq.h" 16 #include "panfrost_job.h" 17 #include "panfrost_features.h" 18 #include "panfrost_issues.h" 19 #include "panfrost_gem.h" 20 #include "panfrost_regs.h" 21 #include "panfrost_gpu.h" 22 #include "panfrost_mmu.h" 23 #include "panfrost_dump.h" 24 25 #define JOB_TIMEOUT_MS 500 26 27 #define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) 28 #define job_read(dev, reg) readl(dev->iomem + (reg)) 29 30 struct panfrost_queue_state { 31 struct drm_gpu_scheduler sched; 32 u64 fence_context; 33 u64 emit_seqno; 34 }; 35 36 struct panfrost_job_slot { 37 struct panfrost_queue_state queue[NUM_JOB_SLOTS]; 38 spinlock_t job_lock; 39 int irq; 40 }; 41 42 static struct panfrost_job * 43 to_panfrost_job(struct drm_sched_job *sched_job) 44 { 45 return container_of(sched_job, struct panfrost_job, base); 46 } 47 48 struct panfrost_fence { 49 struct dma_fence base; 50 struct drm_device *dev; 51 /* panfrost seqno for signaled() test */ 52 u64 seqno; 53 int queue; 54 }; 55 56 static inline struct panfrost_fence * 57 to_panfrost_fence(struct dma_fence *fence) 58 { 59 return (struct panfrost_fence *)fence; 60 } 61 62 static const char *panfrost_fence_get_driver_name(struct dma_fence *fence) 63 { 64 return "panfrost"; 65 } 66 67 static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence) 68 { 69 struct panfrost_fence *f = to_panfrost_fence(fence); 70 71 switch (f->queue) { 72 case 0: 73 return "panfrost-js-0"; 74 case 1: 75 return "panfrost-js-1"; 76 case 2: 77 return "panfrost-js-2"; 78 default: 79 return NULL; 80 } 81 } 82 83 static const struct dma_fence_ops panfrost_fence_ops = { 84 .get_driver_name = panfrost_fence_get_driver_name, 85 .get_timeline_name = panfrost_fence_get_timeline_name, 86 }; 87 88 static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num) 89 { 90 struct panfrost_fence *fence; 91 struct panfrost_job_slot *js = pfdev->js; 92 93 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 94 if (!fence) 95 return ERR_PTR(-ENOMEM); 96 97 fence->dev = pfdev->ddev; 98 fence->queue = js_num; 99 fence->seqno = ++js->queue[js_num].emit_seqno; 100 dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock, 101 js->queue[js_num].fence_context, fence->seqno); 102 103 return &fence->base; 104 } 105 106 int panfrost_job_get_slot(struct panfrost_job *job) 107 { 108 /* JS0: fragment jobs. 109 * JS1: vertex/tiler jobs 110 * JS2: compute jobs 111 */ 112 if (job->requirements & PANFROST_JD_REQ_FS) 113 return 0; 114 115 /* Not exposed to userspace yet */ 116 #if 0 117 if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) { 118 if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) && 119 (job->pfdev->features.nr_core_groups == 2)) 120 return 2; 121 if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987)) 122 return 2; 123 } 124 #endif 125 return 1; 126 } 127 128 static void panfrost_job_write_affinity(struct panfrost_device *pfdev, 129 u32 requirements, 130 int js) 131 { 132 u64 affinity; 133 134 /* 135 * Use all cores for now. 136 * Eventually we may need to support tiler only jobs and h/w with 137 * multiple (2) coherent core groups 138 */ 139 affinity = pfdev->features.shader_present; 140 141 job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity)); 142 job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity)); 143 } 144 145 static u32 146 panfrost_get_job_chain_flag(const struct panfrost_job *job) 147 { 148 struct panfrost_fence *f = to_panfrost_fence(job->done_fence); 149 150 if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) 151 return 0; 152 153 return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0; 154 } 155 156 static struct panfrost_job * 157 panfrost_dequeue_job(struct panfrost_device *pfdev, int slot) 158 { 159 struct panfrost_job *job = pfdev->jobs[slot][0]; 160 161 WARN_ON(!job); 162 if (job->is_profiled) { 163 if (job->engine_usage) { 164 job->engine_usage->elapsed_ns[slot] += 165 ktime_to_ns(ktime_sub(ktime_get(), job->start_time)); 166 job->engine_usage->cycles[slot] += 167 panfrost_cycle_counter_read(pfdev) - job->start_cycles; 168 } 169 panfrost_cycle_counter_put(job->pfdev); 170 } 171 172 pfdev->jobs[slot][0] = pfdev->jobs[slot][1]; 173 pfdev->jobs[slot][1] = NULL; 174 175 return job; 176 } 177 178 static unsigned int 179 panfrost_enqueue_job(struct panfrost_device *pfdev, int slot, 180 struct panfrost_job *job) 181 { 182 if (WARN_ON(!job)) 183 return 0; 184 185 if (!pfdev->jobs[slot][0]) { 186 pfdev->jobs[slot][0] = job; 187 return 0; 188 } 189 190 WARN_ON(pfdev->jobs[slot][1]); 191 pfdev->jobs[slot][1] = job; 192 WARN_ON(panfrost_get_job_chain_flag(job) == 193 panfrost_get_job_chain_flag(pfdev->jobs[slot][0])); 194 return 1; 195 } 196 197 static void panfrost_job_hw_submit(struct panfrost_job *job, int js) 198 { 199 struct panfrost_device *pfdev = job->pfdev; 200 unsigned int subslot; 201 u32 cfg; 202 u64 jc_head = job->jc; 203 int ret; 204 205 panfrost_devfreq_record_busy(&pfdev->pfdevfreq); 206 207 ret = pm_runtime_get_sync(pfdev->dev); 208 if (ret < 0) 209 return; 210 211 if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) { 212 return; 213 } 214 215 cfg = panfrost_mmu_as_get(pfdev, job->mmu); 216 217 job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head)); 218 job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head)); 219 220 panfrost_job_write_affinity(pfdev, job->requirements, js); 221 222 /* start MMU, medium priority, cache clean/flush on end, clean/flush on 223 * start */ 224 cfg |= JS_CONFIG_THREAD_PRI(8) | 225 JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | 226 JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE | 227 panfrost_get_job_chain_flag(job); 228 229 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) 230 cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; 231 232 if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649)) 233 cfg |= JS_CONFIG_START_MMU; 234 235 job_write(pfdev, JS_CONFIG_NEXT(js), cfg); 236 237 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) 238 job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); 239 240 /* GO ! */ 241 242 spin_lock(&pfdev->js->job_lock); 243 subslot = panfrost_enqueue_job(pfdev, js, job); 244 /* Don't queue the job if a reset is in progress */ 245 if (!atomic_read(&pfdev->reset.pending)) { 246 if (pfdev->profile_mode) { 247 panfrost_cycle_counter_get(pfdev); 248 job->is_profiled = true; 249 job->start_time = ktime_get(); 250 job->start_cycles = panfrost_cycle_counter_read(pfdev); 251 } 252 253 job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); 254 dev_dbg(pfdev->dev, 255 "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d", 256 job, js, subslot, jc_head, cfg & 0xf); 257 } 258 spin_unlock(&pfdev->js->job_lock); 259 } 260 261 static int panfrost_acquire_object_fences(struct drm_gem_object **bos, 262 int bo_count, 263 struct drm_sched_job *job) 264 { 265 int i, ret; 266 267 for (i = 0; i < bo_count; i++) { 268 ret = dma_resv_reserve_fences(bos[i]->resv, 1); 269 if (ret) 270 return ret; 271 272 /* panfrost always uses write mode in its current uapi */ 273 ret = drm_sched_job_add_implicit_dependencies(job, bos[i], 274 true); 275 if (ret) 276 return ret; 277 } 278 279 return 0; 280 } 281 282 static void panfrost_attach_object_fences(struct drm_gem_object **bos, 283 int bo_count, 284 struct dma_fence *fence) 285 { 286 int i; 287 288 for (i = 0; i < bo_count; i++) 289 dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); 290 } 291 292 int panfrost_job_push(struct panfrost_job *job) 293 { 294 struct panfrost_device *pfdev = job->pfdev; 295 struct ww_acquire_ctx acquire_ctx; 296 int ret = 0; 297 298 ret = drm_gem_lock_reservations(job->bos, job->bo_count, 299 &acquire_ctx); 300 if (ret) 301 return ret; 302 303 mutex_lock(&pfdev->sched_lock); 304 drm_sched_job_arm(&job->base); 305 306 job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); 307 308 ret = panfrost_acquire_object_fences(job->bos, job->bo_count, 309 &job->base); 310 if (ret) { 311 mutex_unlock(&pfdev->sched_lock); 312 goto unlock; 313 } 314 315 kref_get(&job->refcount); /* put by scheduler job completion */ 316 317 drm_sched_entity_push_job(&job->base); 318 319 mutex_unlock(&pfdev->sched_lock); 320 321 panfrost_attach_object_fences(job->bos, job->bo_count, 322 job->render_done_fence); 323 324 unlock: 325 drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx); 326 327 return ret; 328 } 329 330 static void panfrost_job_cleanup(struct kref *ref) 331 { 332 struct panfrost_job *job = container_of(ref, struct panfrost_job, 333 refcount); 334 unsigned int i; 335 336 dma_fence_put(job->done_fence); 337 dma_fence_put(job->render_done_fence); 338 339 if (job->mappings) { 340 for (i = 0; i < job->bo_count; i++) { 341 if (!job->mappings[i]) 342 break; 343 344 atomic_dec(&job->mappings[i]->obj->gpu_usecount); 345 panfrost_gem_mapping_put(job->mappings[i]); 346 } 347 kvfree(job->mappings); 348 } 349 350 if (job->bos) { 351 for (i = 0; i < job->bo_count; i++) 352 drm_gem_object_put(job->bos[i]); 353 354 kvfree(job->bos); 355 } 356 357 kfree(job); 358 } 359 360 void panfrost_job_put(struct panfrost_job *job) 361 { 362 kref_put(&job->refcount, panfrost_job_cleanup); 363 } 364 365 static void panfrost_job_free(struct drm_sched_job *sched_job) 366 { 367 struct panfrost_job *job = to_panfrost_job(sched_job); 368 369 drm_sched_job_cleanup(sched_job); 370 371 panfrost_job_put(job); 372 } 373 374 static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) 375 { 376 struct panfrost_job *job = to_panfrost_job(sched_job); 377 struct panfrost_device *pfdev = job->pfdev; 378 int slot = panfrost_job_get_slot(job); 379 struct dma_fence *fence = NULL; 380 381 if (unlikely(job->base.s_fence->finished.error)) 382 return NULL; 383 384 /* Nothing to execute: can happen if the job has finished while 385 * we were resetting the GPU. 386 */ 387 if (!job->jc) 388 return NULL; 389 390 fence = panfrost_fence_create(pfdev, slot); 391 if (IS_ERR(fence)) 392 return fence; 393 394 if (job->done_fence) 395 dma_fence_put(job->done_fence); 396 job->done_fence = dma_fence_get(fence); 397 398 panfrost_job_hw_submit(job, slot); 399 400 return fence; 401 } 402 403 void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) 404 { 405 int j; 406 u32 irq_mask = 0; 407 408 clear_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended); 409 410 for (j = 0; j < NUM_JOB_SLOTS; j++) { 411 irq_mask |= MK_JS_MASK(j); 412 } 413 414 job_write(pfdev, JOB_INT_CLEAR, irq_mask); 415 job_write(pfdev, JOB_INT_MASK, irq_mask); 416 } 417 418 void panfrost_job_suspend_irq(struct panfrost_device *pfdev) 419 { 420 set_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended); 421 422 job_write(pfdev, JOB_INT_MASK, 0); 423 synchronize_irq(pfdev->js->irq); 424 } 425 426 static void panfrost_job_handle_err(struct panfrost_device *pfdev, 427 struct panfrost_job *job, 428 unsigned int js) 429 { 430 u32 js_status = job_read(pfdev, JS_STATUS(js)); 431 const char *exception_name = panfrost_exception_name(js_status); 432 bool signal_fence = true; 433 434 if (!panfrost_exception_is_fault(js_status)) { 435 dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x", 436 js, exception_name, 437 job_read(pfdev, JS_HEAD_LO(js)), 438 job_read(pfdev, JS_TAIL_LO(js))); 439 } else { 440 dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", 441 js, exception_name, 442 job_read(pfdev, JS_HEAD_LO(js)), 443 job_read(pfdev, JS_TAIL_LO(js))); 444 } 445 446 if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) { 447 /* Update the job head so we can resume */ 448 job->jc = job_read(pfdev, JS_TAIL_LO(js)) | 449 ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32); 450 451 /* The job will be resumed, don't signal the fence */ 452 signal_fence = false; 453 } else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) { 454 /* Job has been hard-stopped, flag it as canceled */ 455 dma_fence_set_error(job->done_fence, -ECANCELED); 456 job->jc = 0; 457 } else if (panfrost_exception_is_fault(js_status)) { 458 /* We might want to provide finer-grained error code based on 459 * the exception type, but unconditionally setting to EINVAL 460 * is good enough for now. 461 */ 462 dma_fence_set_error(job->done_fence, -EINVAL); 463 job->jc = 0; 464 } 465 466 panfrost_mmu_as_put(pfdev, job->mmu); 467 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 468 469 if (signal_fence) 470 dma_fence_signal_locked(job->done_fence); 471 472 pm_runtime_put_autosuspend(pfdev->dev); 473 474 if (panfrost_exception_needs_reset(pfdev, js_status)) { 475 atomic_set(&pfdev->reset.pending, 1); 476 drm_sched_fault(&pfdev->js->queue[js].sched); 477 } 478 } 479 480 static void panfrost_job_handle_done(struct panfrost_device *pfdev, 481 struct panfrost_job *job) 482 { 483 /* Set ->jc to 0 to avoid re-submitting an already finished job (can 484 * happen when we receive the DONE interrupt while doing a GPU reset). 485 */ 486 job->jc = 0; 487 panfrost_mmu_as_put(pfdev, job->mmu); 488 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 489 490 dma_fence_signal_locked(job->done_fence); 491 pm_runtime_put_autosuspend(pfdev->dev); 492 } 493 494 static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status) 495 { 496 struct panfrost_job *done[NUM_JOB_SLOTS][2] = {}; 497 struct panfrost_job *failed[NUM_JOB_SLOTS] = {}; 498 u32 js_state = 0, js_events = 0; 499 unsigned int i, j; 500 501 /* First we collect all failed/done jobs. */ 502 while (status) { 503 u32 js_state_mask = 0; 504 505 for (j = 0; j < NUM_JOB_SLOTS; j++) { 506 if (status & MK_JS_MASK(j)) 507 js_state_mask |= MK_JS_MASK(j); 508 509 if (status & JOB_INT_MASK_DONE(j)) { 510 if (done[j][0]) 511 done[j][1] = panfrost_dequeue_job(pfdev, j); 512 else 513 done[j][0] = panfrost_dequeue_job(pfdev, j); 514 } 515 516 if (status & JOB_INT_MASK_ERR(j)) { 517 /* Cancel the next submission. Will be submitted 518 * after we're done handling this failure if 519 * there's no reset pending. 520 */ 521 job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); 522 failed[j] = panfrost_dequeue_job(pfdev, j); 523 } 524 } 525 526 /* JS_STATE is sampled when JOB_INT_CLEAR is written. 527 * For each BIT(slot) or BIT(slot + 16) bit written to 528 * JOB_INT_CLEAR, the corresponding bits in JS_STATE 529 * (BIT(slot) and BIT(slot + 16)) are updated, but this 530 * is racy. If we only have one job done at the time we 531 * read JOB_INT_RAWSTAT but the second job fails before we 532 * clear the status, we end up with a status containing 533 * only the DONE bit and consider both jobs as DONE since 534 * JS_STATE reports both NEXT and CURRENT as inactive. 535 * To prevent that, let's repeat this clear+read steps 536 * until status is 0. 537 */ 538 job_write(pfdev, JOB_INT_CLEAR, status); 539 js_state &= ~js_state_mask; 540 js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask; 541 js_events |= status; 542 status = job_read(pfdev, JOB_INT_RAWSTAT); 543 } 544 545 /* Then we handle the dequeued jobs. */ 546 for (j = 0; j < NUM_JOB_SLOTS; j++) { 547 if (!(js_events & MK_JS_MASK(j))) 548 continue; 549 550 if (failed[j]) { 551 panfrost_job_handle_err(pfdev, failed[j], j); 552 } else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) { 553 /* When the current job doesn't fail, the JM dequeues 554 * the next job without waiting for an ACK, this means 555 * we can have 2 jobs dequeued and only catch the 556 * interrupt when the second one is done. If both slots 557 * are inactive, but one job remains in pfdev->jobs[j], 558 * consider it done. Of course that doesn't apply if a 559 * failure happened since we cancelled execution of the 560 * job in _NEXT (see above). 561 */ 562 if (WARN_ON(!done[j][0])) 563 done[j][0] = panfrost_dequeue_job(pfdev, j); 564 else 565 done[j][1] = panfrost_dequeue_job(pfdev, j); 566 } 567 568 for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++) 569 panfrost_job_handle_done(pfdev, done[j][i]); 570 } 571 572 /* And finally we requeue jobs that were waiting in the second slot 573 * and have been stopped if we detected a failure on the first slot. 574 */ 575 for (j = 0; j < NUM_JOB_SLOTS; j++) { 576 if (!(js_events & MK_JS_MASK(j))) 577 continue; 578 579 if (!failed[j] || !pfdev->jobs[j][0]) 580 continue; 581 582 if (pfdev->jobs[j][0]->jc == 0) { 583 /* The job was cancelled, signal the fence now */ 584 struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j); 585 586 dma_fence_set_error(canceled->done_fence, -ECANCELED); 587 panfrost_job_handle_done(pfdev, canceled); 588 } else if (!atomic_read(&pfdev->reset.pending)) { 589 /* Requeue the job we removed if no reset is pending */ 590 job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START); 591 } 592 } 593 } 594 595 static void panfrost_job_handle_irqs(struct panfrost_device *pfdev) 596 { 597 u32 status = job_read(pfdev, JOB_INT_RAWSTAT); 598 599 while (status) { 600 pm_runtime_mark_last_busy(pfdev->dev); 601 602 spin_lock(&pfdev->js->job_lock); 603 panfrost_job_handle_irq(pfdev, status); 604 spin_unlock(&pfdev->js->job_lock); 605 status = job_read(pfdev, JOB_INT_RAWSTAT); 606 } 607 } 608 609 static u32 panfrost_active_slots(struct panfrost_device *pfdev, 610 u32 *js_state_mask, u32 js_state) 611 { 612 u32 rawstat; 613 614 if (!(js_state & *js_state_mask)) 615 return 0; 616 617 rawstat = job_read(pfdev, JOB_INT_RAWSTAT); 618 if (rawstat) { 619 unsigned int i; 620 621 for (i = 0; i < NUM_JOB_SLOTS; i++) { 622 if (rawstat & MK_JS_MASK(i)) 623 *js_state_mask &= ~MK_JS_MASK(i); 624 } 625 } 626 627 return js_state & *js_state_mask; 628 } 629 630 static void 631 panfrost_reset(struct panfrost_device *pfdev, 632 struct drm_sched_job *bad) 633 { 634 u32 js_state, js_state_mask = 0xffffffff; 635 unsigned int i, j; 636 bool cookie; 637 int ret; 638 639 if (!atomic_read(&pfdev->reset.pending)) 640 return; 641 642 /* Stop the schedulers. 643 * 644 * FIXME: We temporarily get out of the dma_fence_signalling section 645 * because the cleanup path generate lockdep splats when taking locks 646 * to release job resources. We should rework the code to follow this 647 * pattern: 648 * 649 * try_lock 650 * if (locked) 651 * release 652 * else 653 * schedule_work_to_release_later 654 */ 655 for (i = 0; i < NUM_JOB_SLOTS; i++) 656 drm_sched_stop(&pfdev->js->queue[i].sched, bad); 657 658 cookie = dma_fence_begin_signalling(); 659 660 if (bad) 661 drm_sched_increase_karma(bad); 662 663 /* Mask job interrupts and synchronize to make sure we won't be 664 * interrupted during our reset. 665 */ 666 job_write(pfdev, JOB_INT_MASK, 0); 667 synchronize_irq(pfdev->js->irq); 668 669 for (i = 0; i < NUM_JOB_SLOTS; i++) { 670 /* Cancel the next job and soft-stop the running job. */ 671 job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); 672 job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP); 673 } 674 675 /* Wait at most 10ms for soft-stops to complete */ 676 ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state, 677 !panfrost_active_slots(pfdev, &js_state_mask, js_state), 678 10, 10000); 679 680 if (ret) 681 dev_err(pfdev->dev, "Soft-stop failed\n"); 682 683 /* Handle the remaining interrupts before we reset. */ 684 panfrost_job_handle_irqs(pfdev); 685 686 /* Remaining interrupts have been handled, but we might still have 687 * stuck jobs. Let's make sure the PM counters stay balanced by 688 * manually calling pm_runtime_put_noidle() and 689 * panfrost_devfreq_record_idle() for each stuck job. 690 * Let's also make sure the cycle counting register's refcnt is 691 * kept balanced to prevent it from running forever 692 */ 693 spin_lock(&pfdev->js->job_lock); 694 for (i = 0; i < NUM_JOB_SLOTS; i++) { 695 for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) { 696 if (pfdev->jobs[i][j]->is_profiled) 697 panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev); 698 pm_runtime_put_noidle(pfdev->dev); 699 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 700 } 701 } 702 memset(pfdev->jobs, 0, sizeof(pfdev->jobs)); 703 spin_unlock(&pfdev->js->job_lock); 704 705 /* Proceed with reset now. */ 706 panfrost_device_reset(pfdev); 707 708 /* panfrost_device_reset() unmasks job interrupts, but we want to 709 * keep them masked a bit longer. 710 */ 711 job_write(pfdev, JOB_INT_MASK, 0); 712 713 /* GPU has been reset, we can clear the reset pending bit. */ 714 atomic_set(&pfdev->reset.pending, 0); 715 716 /* Now resubmit jobs that were previously queued but didn't have a 717 * chance to finish. 718 * FIXME: We temporarily get out of the DMA fence signalling section 719 * while resubmitting jobs because the job submission logic will 720 * allocate memory with the GFP_KERNEL flag which can trigger memory 721 * reclaim and exposes a lock ordering issue. 722 */ 723 dma_fence_end_signalling(cookie); 724 for (i = 0; i < NUM_JOB_SLOTS; i++) 725 drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched); 726 cookie = dma_fence_begin_signalling(); 727 728 /* Restart the schedulers */ 729 for (i = 0; i < NUM_JOB_SLOTS; i++) 730 drm_sched_start(&pfdev->js->queue[i].sched, true); 731 732 /* Re-enable job interrupts now that everything has been restarted. */ 733 job_write(pfdev, JOB_INT_MASK, 734 GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | 735 GENMASK(NUM_JOB_SLOTS - 1, 0)); 736 737 dma_fence_end_signalling(cookie); 738 } 739 740 static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job 741 *sched_job) 742 { 743 struct panfrost_job *job = to_panfrost_job(sched_job); 744 struct panfrost_device *pfdev = job->pfdev; 745 int js = panfrost_job_get_slot(job); 746 747 /* 748 * If the GPU managed to complete this jobs fence, the timeout is 749 * spurious. Bail out. 750 */ 751 if (dma_fence_is_signaled(job->done_fence)) 752 return DRM_GPU_SCHED_STAT_NOMINAL; 753 754 /* 755 * Panfrost IRQ handler may take a long time to process an interrupt 756 * if there is another IRQ handler hogging the processing. 757 * For example, the HDMI encoder driver might be stuck in the IRQ 758 * handler for a significant time in a case of bad cable connection. 759 * In order to catch such cases and not report spurious Panfrost 760 * job timeouts, synchronize the IRQ handler and re-check the fence 761 * status. 762 */ 763 synchronize_irq(pfdev->js->irq); 764 765 if (dma_fence_is_signaled(job->done_fence)) { 766 dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n"); 767 return DRM_GPU_SCHED_STAT_NOMINAL; 768 } 769 770 dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", 771 js, 772 job_read(pfdev, JS_CONFIG(js)), 773 job_read(pfdev, JS_STATUS(js)), 774 job_read(pfdev, JS_HEAD_LO(js)), 775 job_read(pfdev, JS_TAIL_LO(js)), 776 sched_job); 777 778 panfrost_core_dump(job); 779 780 atomic_set(&pfdev->reset.pending, 1); 781 panfrost_reset(pfdev, sched_job); 782 783 return DRM_GPU_SCHED_STAT_NOMINAL; 784 } 785 786 static void panfrost_reset_work(struct work_struct *work) 787 { 788 struct panfrost_device *pfdev; 789 790 pfdev = container_of(work, struct panfrost_device, reset.work); 791 panfrost_reset(pfdev, NULL); 792 } 793 794 static const struct drm_sched_backend_ops panfrost_sched_ops = { 795 .run_job = panfrost_job_run, 796 .timedout_job = panfrost_job_timedout, 797 .free_job = panfrost_job_free 798 }; 799 800 static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data) 801 { 802 struct panfrost_device *pfdev = data; 803 804 panfrost_job_handle_irqs(pfdev); 805 806 /* Enable interrupts only if we're not about to get suspended */ 807 if (!test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended)) 808 job_write(pfdev, JOB_INT_MASK, 809 GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | 810 GENMASK(NUM_JOB_SLOTS - 1, 0)); 811 812 return IRQ_HANDLED; 813 } 814 815 static irqreturn_t panfrost_job_irq_handler(int irq, void *data) 816 { 817 struct panfrost_device *pfdev = data; 818 u32 status; 819 820 if (test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended)) 821 return IRQ_NONE; 822 823 status = job_read(pfdev, JOB_INT_STAT); 824 if (!status) 825 return IRQ_NONE; 826 827 job_write(pfdev, JOB_INT_MASK, 0); 828 return IRQ_WAKE_THREAD; 829 } 830 831 int panfrost_job_init(struct panfrost_device *pfdev) 832 { 833 struct panfrost_job_slot *js; 834 unsigned int nentries = 2; 835 int ret, j; 836 837 /* All GPUs have two entries per queue, but without jobchain 838 * disambiguation stopping the right job in the close path is tricky, 839 * so let's just advertise one entry in that case. 840 */ 841 if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) 842 nentries = 1; 843 844 pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL); 845 if (!js) 846 return -ENOMEM; 847 848 INIT_WORK(&pfdev->reset.work, panfrost_reset_work); 849 spin_lock_init(&js->job_lock); 850 851 js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); 852 if (js->irq < 0) 853 return js->irq; 854 855 ret = devm_request_threaded_irq(pfdev->dev, js->irq, 856 panfrost_job_irq_handler, 857 panfrost_job_irq_handler_thread, 858 IRQF_SHARED, KBUILD_MODNAME "-job", 859 pfdev); 860 if (ret) { 861 dev_err(pfdev->dev, "failed to request job irq"); 862 return ret; 863 } 864 865 pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0); 866 if (!pfdev->reset.wq) 867 return -ENOMEM; 868 869 for (j = 0; j < NUM_JOB_SLOTS; j++) { 870 js->queue[j].fence_context = dma_fence_context_alloc(1); 871 872 ret = drm_sched_init(&js->queue[j].sched, 873 &panfrost_sched_ops, NULL, 874 DRM_SCHED_PRIORITY_COUNT, 875 nentries, 0, 876 msecs_to_jiffies(JOB_TIMEOUT_MS), 877 pfdev->reset.wq, 878 NULL, "pan_js", pfdev->dev); 879 if (ret) { 880 dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); 881 goto err_sched; 882 } 883 } 884 885 panfrost_job_enable_interrupts(pfdev); 886 887 return 0; 888 889 err_sched: 890 for (j--; j >= 0; j--) 891 drm_sched_fini(&js->queue[j].sched); 892 893 destroy_workqueue(pfdev->reset.wq); 894 return ret; 895 } 896 897 void panfrost_job_fini(struct panfrost_device *pfdev) 898 { 899 struct panfrost_job_slot *js = pfdev->js; 900 int j; 901 902 job_write(pfdev, JOB_INT_MASK, 0); 903 904 for (j = 0; j < NUM_JOB_SLOTS; j++) { 905 drm_sched_fini(&js->queue[j].sched); 906 } 907 908 cancel_work_sync(&pfdev->reset.work); 909 destroy_workqueue(pfdev->reset.wq); 910 } 911 912 int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) 913 { 914 struct panfrost_device *pfdev = panfrost_priv->pfdev; 915 struct panfrost_job_slot *js = pfdev->js; 916 struct drm_gpu_scheduler *sched; 917 int ret, i; 918 919 for (i = 0; i < NUM_JOB_SLOTS; i++) { 920 sched = &js->queue[i].sched; 921 ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], 922 DRM_SCHED_PRIORITY_NORMAL, &sched, 923 1, NULL); 924 if (WARN_ON(ret)) 925 return ret; 926 } 927 return 0; 928 } 929 930 void panfrost_job_close(struct panfrost_file_priv *panfrost_priv) 931 { 932 struct panfrost_device *pfdev = panfrost_priv->pfdev; 933 int i; 934 935 for (i = 0; i < NUM_JOB_SLOTS; i++) 936 drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]); 937 938 /* Kill in-flight jobs */ 939 spin_lock(&pfdev->js->job_lock); 940 for (i = 0; i < NUM_JOB_SLOTS; i++) { 941 struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i]; 942 int j; 943 944 for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) { 945 struct panfrost_job *job = pfdev->jobs[i][j]; 946 u32 cmd; 947 948 if (!job || job->base.entity != entity) 949 continue; 950 951 if (j == 1) { 952 /* Try to cancel the job before it starts */ 953 job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); 954 /* Reset the job head so it doesn't get restarted if 955 * the job in the first slot failed. 956 */ 957 job->jc = 0; 958 } 959 960 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { 961 cmd = panfrost_get_job_chain_flag(job) ? 962 JS_COMMAND_HARD_STOP_1 : 963 JS_COMMAND_HARD_STOP_0; 964 } else { 965 cmd = JS_COMMAND_HARD_STOP; 966 } 967 968 job_write(pfdev, JS_COMMAND(i), cmd); 969 970 /* Jobs can outlive their file context */ 971 job->engine_usage = NULL; 972 } 973 } 974 spin_unlock(&pfdev->js->job_lock); 975 } 976 977 int panfrost_job_is_idle(struct panfrost_device *pfdev) 978 { 979 struct panfrost_job_slot *js = pfdev->js; 980 int i; 981 982 for (i = 0; i < NUM_JOB_SLOTS; i++) { 983 /* If there are any jobs in the HW queue, we're not idle */ 984 if (atomic_read(&js->queue[i].sched.credit_count)) 985 return false; 986 } 987 988 return true; 989 } 990