1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ 3 /* Copyright 2019 Collabora ltd. */ 4 #include <linux/delay.h> 5 #include <linux/interrupt.h> 6 #include <linux/io.h> 7 #include <linux/iopoll.h> 8 #include <linux/platform_device.h> 9 #include <linux/pm_runtime.h> 10 #include <linux/dma-resv.h> 11 #include <drm/gpu_scheduler.h> 12 #include <drm/panfrost_drm.h> 13 14 #include "panfrost_device.h" 15 #include "panfrost_devfreq.h" 16 #include "panfrost_job.h" 17 #include "panfrost_features.h" 18 #include "panfrost_issues.h" 19 #include "panfrost_gem.h" 20 #include "panfrost_regs.h" 21 #include "panfrost_gpu.h" 22 #include "panfrost_mmu.h" 23 #include "panfrost_dump.h" 24 25 #define JOB_TIMEOUT_MS 500 26 27 #define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) 28 #define job_read(dev, reg) readl(dev->iomem + (reg)) 29 30 struct panfrost_queue_state { 31 struct drm_gpu_scheduler sched; 32 u64 fence_context; 33 u64 emit_seqno; 34 }; 35 36 struct panfrost_job_slot { 37 struct panfrost_queue_state queue[NUM_JOB_SLOTS]; 38 spinlock_t job_lock; 39 int irq; 40 }; 41 42 static struct panfrost_job * 43 to_panfrost_job(struct drm_sched_job *sched_job) 44 { 45 return container_of(sched_job, struct panfrost_job, base); 46 } 47 48 struct panfrost_fence { 49 struct dma_fence base; 50 struct drm_device *dev; 51 /* panfrost seqno for signaled() test */ 52 u64 seqno; 53 int queue; 54 }; 55 56 static inline struct panfrost_fence * 57 to_panfrost_fence(struct dma_fence *fence) 58 { 59 return (struct panfrost_fence *)fence; 60 } 61 62 static const char *panfrost_fence_get_driver_name(struct dma_fence *fence) 63 { 64 return "panfrost"; 65 } 66 67 static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence) 68 { 69 struct panfrost_fence *f = to_panfrost_fence(fence); 70 71 switch (f->queue) { 72 case 0: 73 return "panfrost-js-0"; 74 case 1: 75 return "panfrost-js-1"; 76 case 2: 77 return "panfrost-js-2"; 78 default: 79 return NULL; 80 } 81 } 82 83 static const struct dma_fence_ops panfrost_fence_ops = { 84 .get_driver_name = panfrost_fence_get_driver_name, 85 .get_timeline_name = panfrost_fence_get_timeline_name, 86 }; 87 88 static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num) 89 { 90 struct panfrost_fence *fence; 91 struct panfrost_job_slot *js = pfdev->js; 92 93 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 94 if (!fence) 95 return ERR_PTR(-ENOMEM); 96 97 fence->dev = pfdev->ddev; 98 fence->queue = js_num; 99 fence->seqno = ++js->queue[js_num].emit_seqno; 100 dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock, 101 js->queue[js_num].fence_context, fence->seqno); 102 103 return &fence->base; 104 } 105 106 int panfrost_job_get_slot(struct panfrost_job *job) 107 { 108 /* JS0: fragment jobs. 109 * JS1: vertex/tiler jobs 110 * JS2: compute jobs 111 */ 112 if (job->requirements & PANFROST_JD_REQ_FS) 113 return 0; 114 115 /* Not exposed to userspace yet */ 116 #if 0 117 if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) { 118 if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) && 119 (job->pfdev->features.nr_core_groups == 2)) 120 return 2; 121 if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987)) 122 return 2; 123 } 124 #endif 125 return 1; 126 } 127 128 static void panfrost_job_write_affinity(struct panfrost_device *pfdev, 129 u32 requirements, 130 int js) 131 { 132 u64 affinity; 133 134 /* 135 * Use all cores for now. 136 * Eventually we may need to support tiler only jobs and h/w with 137 * multiple (2) coherent core groups 138 */ 139 affinity = pfdev->features.shader_present; 140 141 job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity)); 142 job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity)); 143 } 144 145 static u32 146 panfrost_get_job_chain_flag(const struct panfrost_job *job) 147 { 148 struct panfrost_fence *f = to_panfrost_fence(job->done_fence); 149 150 if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) 151 return 0; 152 153 return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0; 154 } 155 156 static struct panfrost_job * 157 panfrost_dequeue_job(struct panfrost_device *pfdev, int slot) 158 { 159 struct panfrost_job *job = pfdev->jobs[slot][0]; 160 161 WARN_ON(!job); 162 if (job->is_profiled) { 163 if (job->engine_usage) { 164 job->engine_usage->elapsed_ns[slot] += 165 ktime_to_ns(ktime_sub(ktime_get(), job->start_time)); 166 job->engine_usage->cycles[slot] += 167 panfrost_cycle_counter_read(pfdev) - job->start_cycles; 168 } 169 panfrost_cycle_counter_put(job->pfdev); 170 } 171 172 pfdev->jobs[slot][0] = pfdev->jobs[slot][1]; 173 pfdev->jobs[slot][1] = NULL; 174 175 return job; 176 } 177 178 static unsigned int 179 panfrost_enqueue_job(struct panfrost_device *pfdev, int slot, 180 struct panfrost_job *job) 181 { 182 if (WARN_ON(!job)) 183 return 0; 184 185 if (!pfdev->jobs[slot][0]) { 186 pfdev->jobs[slot][0] = job; 187 return 0; 188 } 189 190 WARN_ON(pfdev->jobs[slot][1]); 191 pfdev->jobs[slot][1] = job; 192 WARN_ON(panfrost_get_job_chain_flag(job) == 193 panfrost_get_job_chain_flag(pfdev->jobs[slot][0])); 194 return 1; 195 } 196 197 static void panfrost_job_hw_submit(struct panfrost_job *job, int js) 198 { 199 struct panfrost_device *pfdev = job->pfdev; 200 unsigned int subslot; 201 u32 cfg; 202 u64 jc_head = job->jc; 203 int ret; 204 205 panfrost_devfreq_record_busy(&pfdev->pfdevfreq); 206 207 ret = pm_runtime_get_sync(pfdev->dev); 208 if (ret < 0) 209 return; 210 211 if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) { 212 return; 213 } 214 215 cfg = panfrost_mmu_as_get(pfdev, job->mmu); 216 217 job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head)); 218 job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head)); 219 220 panfrost_job_write_affinity(pfdev, job->requirements, js); 221 222 /* start MMU, medium priority, cache clean/flush on end, clean/flush on 223 * start */ 224 cfg |= JS_CONFIG_THREAD_PRI(8) | 225 JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | 226 JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE | 227 panfrost_get_job_chain_flag(job); 228 229 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) 230 cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; 231 232 if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649)) 233 cfg |= JS_CONFIG_START_MMU; 234 235 job_write(pfdev, JS_CONFIG_NEXT(js), cfg); 236 237 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) 238 job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); 239 240 /* GO ! */ 241 242 spin_lock(&pfdev->js->job_lock); 243 subslot = panfrost_enqueue_job(pfdev, js, job); 244 /* Don't queue the job if a reset is in progress */ 245 if (!atomic_read(&pfdev->reset.pending)) { 246 if (atomic_read(&pfdev->profile_mode)) { 247 panfrost_cycle_counter_get(pfdev); 248 job->is_profiled = true; 249 job->start_time = ktime_get(); 250 job->start_cycles = panfrost_cycle_counter_read(pfdev); 251 } 252 253 job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); 254 dev_dbg(pfdev->dev, 255 "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d", 256 job, js, subslot, jc_head, cfg & 0xf); 257 } 258 spin_unlock(&pfdev->js->job_lock); 259 } 260 261 static int panfrost_acquire_object_fences(struct drm_gem_object **bos, 262 int bo_count, 263 struct drm_sched_job *job) 264 { 265 int i, ret; 266 267 for (i = 0; i < bo_count; i++) { 268 ret = dma_resv_reserve_fences(bos[i]->resv, 1); 269 if (ret) 270 return ret; 271 272 /* panfrost always uses write mode in its current uapi */ 273 ret = drm_sched_job_add_implicit_dependencies(job, bos[i], 274 true); 275 if (ret) 276 return ret; 277 } 278 279 return 0; 280 } 281 282 static void panfrost_attach_object_fences(struct drm_gem_object **bos, 283 int bo_count, 284 struct dma_fence *fence) 285 { 286 int i; 287 288 for (i = 0; i < bo_count; i++) 289 dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); 290 } 291 292 int panfrost_job_push(struct panfrost_job *job) 293 { 294 struct panfrost_device *pfdev = job->pfdev; 295 struct ww_acquire_ctx acquire_ctx; 296 int ret = 0; 297 298 ret = drm_gem_lock_reservations(job->bos, job->bo_count, 299 &acquire_ctx); 300 if (ret) 301 return ret; 302 303 mutex_lock(&pfdev->sched_lock); 304 drm_sched_job_arm(&job->base); 305 306 job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); 307 308 ret = panfrost_acquire_object_fences(job->bos, job->bo_count, 309 &job->base); 310 if (ret) { 311 mutex_unlock(&pfdev->sched_lock); 312 goto unlock; 313 } 314 315 kref_get(&job->refcount); /* put by scheduler job completion */ 316 317 drm_sched_entity_push_job(&job->base); 318 319 mutex_unlock(&pfdev->sched_lock); 320 321 panfrost_attach_object_fences(job->bos, job->bo_count, 322 job->render_done_fence); 323 324 unlock: 325 drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx); 326 327 return ret; 328 } 329 330 static void panfrost_job_cleanup(struct kref *ref) 331 { 332 struct panfrost_job *job = container_of(ref, struct panfrost_job, 333 refcount); 334 unsigned int i; 335 336 dma_fence_put(job->done_fence); 337 dma_fence_put(job->render_done_fence); 338 339 if (job->mappings) { 340 for (i = 0; i < job->bo_count; i++) { 341 if (!job->mappings[i]) 342 break; 343 344 atomic_dec(&job->mappings[i]->obj->gpu_usecount); 345 panfrost_gem_mapping_put(job->mappings[i]); 346 } 347 kvfree(job->mappings); 348 } 349 350 if (job->bos) { 351 for (i = 0; i < job->bo_count; i++) 352 drm_gem_object_put(job->bos[i]); 353 354 kvfree(job->bos); 355 } 356 357 kfree(job); 358 } 359 360 void panfrost_job_put(struct panfrost_job *job) 361 { 362 kref_put(&job->refcount, panfrost_job_cleanup); 363 } 364 365 static void panfrost_job_free(struct drm_sched_job *sched_job) 366 { 367 struct panfrost_job *job = to_panfrost_job(sched_job); 368 369 drm_sched_job_cleanup(sched_job); 370 371 panfrost_job_put(job); 372 } 373 374 static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) 375 { 376 struct panfrost_job *job = to_panfrost_job(sched_job); 377 struct panfrost_device *pfdev = job->pfdev; 378 int slot = panfrost_job_get_slot(job); 379 struct dma_fence *fence = NULL; 380 381 if (unlikely(job->base.s_fence->finished.error)) 382 return NULL; 383 384 /* Nothing to execute: can happen if the job has finished while 385 * we were resetting the GPU. 386 */ 387 if (!job->jc) 388 return NULL; 389 390 fence = panfrost_fence_create(pfdev, slot); 391 if (IS_ERR(fence)) 392 return fence; 393 394 if (job->done_fence) 395 dma_fence_put(job->done_fence); 396 job->done_fence = dma_fence_get(fence); 397 398 panfrost_job_hw_submit(job, slot); 399 400 return fence; 401 } 402 403 void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) 404 { 405 int j; 406 u32 irq_mask = 0; 407 408 for (j = 0; j < NUM_JOB_SLOTS; j++) { 409 irq_mask |= MK_JS_MASK(j); 410 } 411 412 job_write(pfdev, JOB_INT_CLEAR, irq_mask); 413 job_write(pfdev, JOB_INT_MASK, irq_mask); 414 } 415 416 static void panfrost_job_handle_err(struct panfrost_device *pfdev, 417 struct panfrost_job *job, 418 unsigned int js) 419 { 420 u32 js_status = job_read(pfdev, JS_STATUS(js)); 421 const char *exception_name = panfrost_exception_name(js_status); 422 bool signal_fence = true; 423 424 if (!panfrost_exception_is_fault(js_status)) { 425 dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x", 426 js, exception_name, 427 job_read(pfdev, JS_HEAD_LO(js)), 428 job_read(pfdev, JS_TAIL_LO(js))); 429 } else { 430 dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", 431 js, exception_name, 432 job_read(pfdev, JS_HEAD_LO(js)), 433 job_read(pfdev, JS_TAIL_LO(js))); 434 } 435 436 if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) { 437 /* Update the job head so we can resume */ 438 job->jc = job_read(pfdev, JS_TAIL_LO(js)) | 439 ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32); 440 441 /* The job will be resumed, don't signal the fence */ 442 signal_fence = false; 443 } else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) { 444 /* Job has been hard-stopped, flag it as canceled */ 445 dma_fence_set_error(job->done_fence, -ECANCELED); 446 job->jc = 0; 447 } else if (panfrost_exception_is_fault(js_status)) { 448 /* We might want to provide finer-grained error code based on 449 * the exception type, but unconditionally setting to EINVAL 450 * is good enough for now. 451 */ 452 dma_fence_set_error(job->done_fence, -EINVAL); 453 job->jc = 0; 454 } 455 456 panfrost_mmu_as_put(pfdev, job->mmu); 457 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 458 459 if (signal_fence) 460 dma_fence_signal_locked(job->done_fence); 461 462 pm_runtime_put_autosuspend(pfdev->dev); 463 464 if (panfrost_exception_needs_reset(pfdev, js_status)) { 465 atomic_set(&pfdev->reset.pending, 1); 466 drm_sched_fault(&pfdev->js->queue[js].sched); 467 } 468 } 469 470 static void panfrost_job_handle_done(struct panfrost_device *pfdev, 471 struct panfrost_job *job) 472 { 473 /* Set ->jc to 0 to avoid re-submitting an already finished job (can 474 * happen when we receive the DONE interrupt while doing a GPU reset). 475 */ 476 job->jc = 0; 477 panfrost_mmu_as_put(pfdev, job->mmu); 478 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 479 480 dma_fence_signal_locked(job->done_fence); 481 pm_runtime_put_autosuspend(pfdev->dev); 482 } 483 484 static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status) 485 { 486 struct panfrost_job *done[NUM_JOB_SLOTS][2] = {}; 487 struct panfrost_job *failed[NUM_JOB_SLOTS] = {}; 488 u32 js_state = 0, js_events = 0; 489 unsigned int i, j; 490 491 /* First we collect all failed/done jobs. */ 492 while (status) { 493 u32 js_state_mask = 0; 494 495 for (j = 0; j < NUM_JOB_SLOTS; j++) { 496 if (status & MK_JS_MASK(j)) 497 js_state_mask |= MK_JS_MASK(j); 498 499 if (status & JOB_INT_MASK_DONE(j)) { 500 if (done[j][0]) 501 done[j][1] = panfrost_dequeue_job(pfdev, j); 502 else 503 done[j][0] = panfrost_dequeue_job(pfdev, j); 504 } 505 506 if (status & JOB_INT_MASK_ERR(j)) { 507 /* Cancel the next submission. Will be submitted 508 * after we're done handling this failure if 509 * there's no reset pending. 510 */ 511 job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); 512 failed[j] = panfrost_dequeue_job(pfdev, j); 513 } 514 } 515 516 /* JS_STATE is sampled when JOB_INT_CLEAR is written. 517 * For each BIT(slot) or BIT(slot + 16) bit written to 518 * JOB_INT_CLEAR, the corresponding bits in JS_STATE 519 * (BIT(slot) and BIT(slot + 16)) are updated, but this 520 * is racy. If we only have one job done at the time we 521 * read JOB_INT_RAWSTAT but the second job fails before we 522 * clear the status, we end up with a status containing 523 * only the DONE bit and consider both jobs as DONE since 524 * JS_STATE reports both NEXT and CURRENT as inactive. 525 * To prevent that, let's repeat this clear+read steps 526 * until status is 0. 527 */ 528 job_write(pfdev, JOB_INT_CLEAR, status); 529 js_state &= ~js_state_mask; 530 js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask; 531 js_events |= status; 532 status = job_read(pfdev, JOB_INT_RAWSTAT); 533 } 534 535 /* Then we handle the dequeued jobs. */ 536 for (j = 0; j < NUM_JOB_SLOTS; j++) { 537 if (!(js_events & MK_JS_MASK(j))) 538 continue; 539 540 if (failed[j]) { 541 panfrost_job_handle_err(pfdev, failed[j], j); 542 } else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) { 543 /* When the current job doesn't fail, the JM dequeues 544 * the next job without waiting for an ACK, this means 545 * we can have 2 jobs dequeued and only catch the 546 * interrupt when the second one is done. If both slots 547 * are inactive, but one job remains in pfdev->jobs[j], 548 * consider it done. Of course that doesn't apply if a 549 * failure happened since we cancelled execution of the 550 * job in _NEXT (see above). 551 */ 552 if (WARN_ON(!done[j][0])) 553 done[j][0] = panfrost_dequeue_job(pfdev, j); 554 else 555 done[j][1] = panfrost_dequeue_job(pfdev, j); 556 } 557 558 for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++) 559 panfrost_job_handle_done(pfdev, done[j][i]); 560 } 561 562 /* And finally we requeue jobs that were waiting in the second slot 563 * and have been stopped if we detected a failure on the first slot. 564 */ 565 for (j = 0; j < NUM_JOB_SLOTS; j++) { 566 if (!(js_events & MK_JS_MASK(j))) 567 continue; 568 569 if (!failed[j] || !pfdev->jobs[j][0]) 570 continue; 571 572 if (pfdev->jobs[j][0]->jc == 0) { 573 /* The job was cancelled, signal the fence now */ 574 struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j); 575 576 dma_fence_set_error(canceled->done_fence, -ECANCELED); 577 panfrost_job_handle_done(pfdev, canceled); 578 } else if (!atomic_read(&pfdev->reset.pending)) { 579 /* Requeue the job we removed if no reset is pending */ 580 job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START); 581 } 582 } 583 } 584 585 static void panfrost_job_handle_irqs(struct panfrost_device *pfdev) 586 { 587 u32 status = job_read(pfdev, JOB_INT_RAWSTAT); 588 589 while (status) { 590 pm_runtime_mark_last_busy(pfdev->dev); 591 592 spin_lock(&pfdev->js->job_lock); 593 panfrost_job_handle_irq(pfdev, status); 594 spin_unlock(&pfdev->js->job_lock); 595 status = job_read(pfdev, JOB_INT_RAWSTAT); 596 } 597 } 598 599 static u32 panfrost_active_slots(struct panfrost_device *pfdev, 600 u32 *js_state_mask, u32 js_state) 601 { 602 u32 rawstat; 603 604 if (!(js_state & *js_state_mask)) 605 return 0; 606 607 rawstat = job_read(pfdev, JOB_INT_RAWSTAT); 608 if (rawstat) { 609 unsigned int i; 610 611 for (i = 0; i < NUM_JOB_SLOTS; i++) { 612 if (rawstat & MK_JS_MASK(i)) 613 *js_state_mask &= ~MK_JS_MASK(i); 614 } 615 } 616 617 return js_state & *js_state_mask; 618 } 619 620 static void 621 panfrost_reset(struct panfrost_device *pfdev, 622 struct drm_sched_job *bad) 623 { 624 u32 js_state, js_state_mask = 0xffffffff; 625 unsigned int i, j; 626 bool cookie; 627 int ret; 628 629 if (!atomic_read(&pfdev->reset.pending)) 630 return; 631 632 /* Stop the schedulers. 633 * 634 * FIXME: We temporarily get out of the dma_fence_signalling section 635 * because the cleanup path generate lockdep splats when taking locks 636 * to release job resources. We should rework the code to follow this 637 * pattern: 638 * 639 * try_lock 640 * if (locked) 641 * release 642 * else 643 * schedule_work_to_release_later 644 */ 645 for (i = 0; i < NUM_JOB_SLOTS; i++) 646 drm_sched_stop(&pfdev->js->queue[i].sched, bad); 647 648 cookie = dma_fence_begin_signalling(); 649 650 if (bad) 651 drm_sched_increase_karma(bad); 652 653 /* Mask job interrupts and synchronize to make sure we won't be 654 * interrupted during our reset. 655 */ 656 job_write(pfdev, JOB_INT_MASK, 0); 657 synchronize_irq(pfdev->js->irq); 658 659 for (i = 0; i < NUM_JOB_SLOTS; i++) { 660 /* Cancel the next job and soft-stop the running job. */ 661 job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); 662 job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP); 663 } 664 665 /* Wait at most 10ms for soft-stops to complete */ 666 ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state, 667 !panfrost_active_slots(pfdev, &js_state_mask, js_state), 668 10, 10000); 669 670 if (ret) 671 dev_err(pfdev->dev, "Soft-stop failed\n"); 672 673 /* Handle the remaining interrupts before we reset. */ 674 panfrost_job_handle_irqs(pfdev); 675 676 /* Remaining interrupts have been handled, but we might still have 677 * stuck jobs. Let's make sure the PM counters stay balanced by 678 * manually calling pm_runtime_put_noidle() and 679 * panfrost_devfreq_record_idle() for each stuck job. 680 * Let's also make sure the cycle counting register's refcnt is 681 * kept balanced to prevent it from running forever 682 */ 683 spin_lock(&pfdev->js->job_lock); 684 for (i = 0; i < NUM_JOB_SLOTS; i++) { 685 for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) { 686 if (pfdev->jobs[i][j]->is_profiled) 687 panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev); 688 pm_runtime_put_noidle(pfdev->dev); 689 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 690 } 691 } 692 memset(pfdev->jobs, 0, sizeof(pfdev->jobs)); 693 spin_unlock(&pfdev->js->job_lock); 694 695 /* Proceed with reset now. */ 696 panfrost_device_reset(pfdev); 697 698 /* panfrost_device_reset() unmasks job interrupts, but we want to 699 * keep them masked a bit longer. 700 */ 701 job_write(pfdev, JOB_INT_MASK, 0); 702 703 /* GPU has been reset, we can clear the reset pending bit. */ 704 atomic_set(&pfdev->reset.pending, 0); 705 706 /* Now resubmit jobs that were previously queued but didn't have a 707 * chance to finish. 708 * FIXME: We temporarily get out of the DMA fence signalling section 709 * while resubmitting jobs because the job submission logic will 710 * allocate memory with the GFP_KERNEL flag which can trigger memory 711 * reclaim and exposes a lock ordering issue. 712 */ 713 dma_fence_end_signalling(cookie); 714 for (i = 0; i < NUM_JOB_SLOTS; i++) 715 drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched); 716 cookie = dma_fence_begin_signalling(); 717 718 /* Restart the schedulers */ 719 for (i = 0; i < NUM_JOB_SLOTS; i++) 720 drm_sched_start(&pfdev->js->queue[i].sched, true); 721 722 /* Re-enable job interrupts now that everything has been restarted. */ 723 job_write(pfdev, JOB_INT_MASK, 724 GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | 725 GENMASK(NUM_JOB_SLOTS - 1, 0)); 726 727 dma_fence_end_signalling(cookie); 728 } 729 730 static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job 731 *sched_job) 732 { 733 struct panfrost_job *job = to_panfrost_job(sched_job); 734 struct panfrost_device *pfdev = job->pfdev; 735 int js = panfrost_job_get_slot(job); 736 737 /* 738 * If the GPU managed to complete this jobs fence, the timeout is 739 * spurious. Bail out. 740 */ 741 if (dma_fence_is_signaled(job->done_fence)) 742 return DRM_GPU_SCHED_STAT_NOMINAL; 743 744 /* 745 * Panfrost IRQ handler may take a long time to process an interrupt 746 * if there is another IRQ handler hogging the processing. 747 * For example, the HDMI encoder driver might be stuck in the IRQ 748 * handler for a significant time in a case of bad cable connection. 749 * In order to catch such cases and not report spurious Panfrost 750 * job timeouts, synchronize the IRQ handler and re-check the fence 751 * status. 752 */ 753 synchronize_irq(pfdev->js->irq); 754 755 if (dma_fence_is_signaled(job->done_fence)) { 756 dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n"); 757 return DRM_GPU_SCHED_STAT_NOMINAL; 758 } 759 760 dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", 761 js, 762 job_read(pfdev, JS_CONFIG(js)), 763 job_read(pfdev, JS_STATUS(js)), 764 job_read(pfdev, JS_HEAD_LO(js)), 765 job_read(pfdev, JS_TAIL_LO(js)), 766 sched_job); 767 768 panfrost_core_dump(job); 769 770 atomic_set(&pfdev->reset.pending, 1); 771 panfrost_reset(pfdev, sched_job); 772 773 return DRM_GPU_SCHED_STAT_NOMINAL; 774 } 775 776 static void panfrost_reset_work(struct work_struct *work) 777 { 778 struct panfrost_device *pfdev; 779 780 pfdev = container_of(work, struct panfrost_device, reset.work); 781 panfrost_reset(pfdev, NULL); 782 } 783 784 static const struct drm_sched_backend_ops panfrost_sched_ops = { 785 .run_job = panfrost_job_run, 786 .timedout_job = panfrost_job_timedout, 787 .free_job = panfrost_job_free 788 }; 789 790 static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data) 791 { 792 struct panfrost_device *pfdev = data; 793 794 panfrost_job_handle_irqs(pfdev); 795 job_write(pfdev, JOB_INT_MASK, 796 GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | 797 GENMASK(NUM_JOB_SLOTS - 1, 0)); 798 return IRQ_HANDLED; 799 } 800 801 static irqreturn_t panfrost_job_irq_handler(int irq, void *data) 802 { 803 struct panfrost_device *pfdev = data; 804 u32 status = job_read(pfdev, JOB_INT_STAT); 805 806 if (!status) 807 return IRQ_NONE; 808 809 job_write(pfdev, JOB_INT_MASK, 0); 810 return IRQ_WAKE_THREAD; 811 } 812 813 int panfrost_job_init(struct panfrost_device *pfdev) 814 { 815 struct panfrost_job_slot *js; 816 unsigned int nentries = 2; 817 int ret, j; 818 819 /* All GPUs have two entries per queue, but without jobchain 820 * disambiguation stopping the right job in the close path is tricky, 821 * so let's just advertise one entry in that case. 822 */ 823 if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) 824 nentries = 1; 825 826 pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL); 827 if (!js) 828 return -ENOMEM; 829 830 INIT_WORK(&pfdev->reset.work, panfrost_reset_work); 831 spin_lock_init(&js->job_lock); 832 833 js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); 834 if (js->irq < 0) 835 return js->irq; 836 837 ret = devm_request_threaded_irq(pfdev->dev, js->irq, 838 panfrost_job_irq_handler, 839 panfrost_job_irq_handler_thread, 840 IRQF_SHARED, KBUILD_MODNAME "-job", 841 pfdev); 842 if (ret) { 843 dev_err(pfdev->dev, "failed to request job irq"); 844 return ret; 845 } 846 847 pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0); 848 if (!pfdev->reset.wq) 849 return -ENOMEM; 850 851 for (j = 0; j < NUM_JOB_SLOTS; j++) { 852 js->queue[j].fence_context = dma_fence_context_alloc(1); 853 854 ret = drm_sched_init(&js->queue[j].sched, 855 &panfrost_sched_ops, 856 DRM_SCHED_PRIORITY_COUNT, 857 nentries, 0, 858 msecs_to_jiffies(JOB_TIMEOUT_MS), 859 pfdev->reset.wq, 860 NULL, "pan_js", pfdev->dev); 861 if (ret) { 862 dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); 863 goto err_sched; 864 } 865 } 866 867 panfrost_job_enable_interrupts(pfdev); 868 869 return 0; 870 871 err_sched: 872 for (j--; j >= 0; j--) 873 drm_sched_fini(&js->queue[j].sched); 874 875 destroy_workqueue(pfdev->reset.wq); 876 return ret; 877 } 878 879 void panfrost_job_fini(struct panfrost_device *pfdev) 880 { 881 struct panfrost_job_slot *js = pfdev->js; 882 int j; 883 884 job_write(pfdev, JOB_INT_MASK, 0); 885 886 for (j = 0; j < NUM_JOB_SLOTS; j++) { 887 drm_sched_fini(&js->queue[j].sched); 888 } 889 890 cancel_work_sync(&pfdev->reset.work); 891 destroy_workqueue(pfdev->reset.wq); 892 } 893 894 int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) 895 { 896 struct panfrost_device *pfdev = panfrost_priv->pfdev; 897 struct panfrost_job_slot *js = pfdev->js; 898 struct drm_gpu_scheduler *sched; 899 int ret, i; 900 901 for (i = 0; i < NUM_JOB_SLOTS; i++) { 902 sched = &js->queue[i].sched; 903 ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], 904 DRM_SCHED_PRIORITY_NORMAL, &sched, 905 1, NULL); 906 if (WARN_ON(ret)) 907 return ret; 908 } 909 return 0; 910 } 911 912 void panfrost_job_close(struct panfrost_file_priv *panfrost_priv) 913 { 914 struct panfrost_device *pfdev = panfrost_priv->pfdev; 915 int i; 916 917 for (i = 0; i < NUM_JOB_SLOTS; i++) 918 drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]); 919 920 /* Kill in-flight jobs */ 921 spin_lock(&pfdev->js->job_lock); 922 for (i = 0; i < NUM_JOB_SLOTS; i++) { 923 struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i]; 924 int j; 925 926 for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) { 927 struct panfrost_job *job = pfdev->jobs[i][j]; 928 u32 cmd; 929 930 if (!job || job->base.entity != entity) 931 continue; 932 933 if (j == 1) { 934 /* Try to cancel the job before it starts */ 935 job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); 936 /* Reset the job head so it doesn't get restarted if 937 * the job in the first slot failed. 938 */ 939 job->jc = 0; 940 } 941 942 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { 943 cmd = panfrost_get_job_chain_flag(job) ? 944 JS_COMMAND_HARD_STOP_1 : 945 JS_COMMAND_HARD_STOP_0; 946 } else { 947 cmd = JS_COMMAND_HARD_STOP; 948 } 949 950 job_write(pfdev, JS_COMMAND(i), cmd); 951 952 /* Jobs can outlive their file context */ 953 job->engine_usage = NULL; 954 } 955 } 956 spin_unlock(&pfdev->js->job_lock); 957 } 958 959 int panfrost_job_is_idle(struct panfrost_device *pfdev) 960 { 961 struct panfrost_job_slot *js = pfdev->js; 962 int i; 963 964 for (i = 0; i < NUM_JOB_SLOTS; i++) { 965 /* If there are any jobs in the HW queue, we're not idle */ 966 if (atomic_read(&js->queue[i].sched.hw_rq_count)) 967 return false; 968 } 969 970 return true; 971 } 972