1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2008-2021 Intel Corporation 4 */ 5 6 #include <drm/drm_cache.h> 7 8 #include "gem/i915_gem_internal.h" 9 10 #include "gen2_engine_cs.h" 11 #include "gen6_engine_cs.h" 12 #include "gen6_ppgtt.h" 13 #include "gen7_renderclear.h" 14 #include "i915_drv.h" 15 #include "i915_irq.h" 16 #include "i915_mitigations.h" 17 #include "i915_reg.h" 18 #include "intel_breadcrumbs.h" 19 #include "intel_context.h" 20 #include "intel_engine_regs.h" 21 #include "intel_gt.h" 22 #include "intel_gt_irq.h" 23 #include "intel_gt_regs.h" 24 #include "intel_reset.h" 25 #include "intel_ring.h" 26 #include "shmem_utils.h" 27 #include "intel_engine_heartbeat.h" 28 #include "intel_engine_pm.h" 29 #include "intel_gt_print.h" 30 31 /* Rough estimate of the typical request size, performing a flush, 32 * set-context and then emitting the batch. 33 */ 34 #define LEGACY_REQUEST_SIZE 200 35 36 static void set_hwstam(struct intel_engine_cs *engine, u32 mask) 37 { 38 /* 39 * Keep the render interrupt unmasked as this papers over 40 * lost interrupts following a reset. 41 */ 42 if (engine->class == RENDER_CLASS) { 43 if (GRAPHICS_VER(engine->i915) >= 6) 44 mask &= ~BIT(0); 45 else 46 mask &= ~I915_USER_INTERRUPT; 47 } 48 49 intel_engine_set_hwsp_writemask(engine, mask); 50 } 51 52 static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) 53 { 54 u32 addr; 55 56 addr = lower_32_bits(phys); 57 if (GRAPHICS_VER(engine->i915) >= 4) 58 addr |= (phys >> 28) & 0xf0; 59 60 intel_uncore_write(engine->uncore, HWS_PGA, addr); 61 } 62 63 static struct page *status_page(struct intel_engine_cs *engine) 64 { 65 struct drm_i915_gem_object *obj = engine->status_page.vma->obj; 66 67 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 68 return sg_page(obj->mm.pages->sgl); 69 } 70 71 static void ring_setup_phys_status_page(struct intel_engine_cs *engine) 72 { 73 set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine)))); 74 set_hwstam(engine, ~0u); 75 } 76 77 static void set_hwsp(struct intel_engine_cs *engine, u32 offset) 78 { 79 i915_reg_t hwsp; 80 81 /* 82 * The ring status page addresses are no longer next to the rest of 83 * the ring registers as of gen7. 84 */ 85 if (GRAPHICS_VER(engine->i915) == 7) { 86 switch (engine->id) { 87 /* 88 * No more rings exist on Gen7. Default case is only to shut up 89 * gcc switch check warning. 90 */ 91 default: 92 GEM_BUG_ON(engine->id); 93 fallthrough; 94 case RCS0: 95 hwsp = RENDER_HWS_PGA_GEN7; 96 break; 97 case BCS0: 98 hwsp = BLT_HWS_PGA_GEN7; 99 break; 100 case VCS0: 101 hwsp = BSD_HWS_PGA_GEN7; 102 break; 103 case VECS0: 104 hwsp = VEBOX_HWS_PGA_GEN7; 105 break; 106 } 107 } else if (GRAPHICS_VER(engine->i915) == 6) { 108 hwsp = RING_HWS_PGA_GEN6(engine->mmio_base); 109 } else { 110 hwsp = RING_HWS_PGA(engine->mmio_base); 111 } 112 113 intel_uncore_write_fw(engine->uncore, hwsp, offset); 114 intel_uncore_posting_read_fw(engine->uncore, hwsp); 115 } 116 117 static void flush_cs_tlb(struct intel_engine_cs *engine) 118 { 119 if (!IS_GRAPHICS_VER(engine->i915, 6, 7)) 120 return; 121 122 /* ring should be idle before issuing a sync flush*/ 123 if ((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0) 124 drm_warn(&engine->i915->drm, "%s not idle before sync flush!\n", 125 engine->name); 126 127 ENGINE_WRITE_FW(engine, RING_INSTPM, 128 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | 129 INSTPM_SYNC_FLUSH)); 130 if (__intel_wait_for_register_fw(engine->uncore, 131 RING_INSTPM(engine->mmio_base), 132 INSTPM_SYNC_FLUSH, 0, 133 2000, 0, NULL)) 134 ENGINE_TRACE(engine, 135 "wait for SyncFlush to complete for TLB invalidation timed out\n"); 136 } 137 138 static void ring_setup_status_page(struct intel_engine_cs *engine) 139 { 140 set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma)); 141 set_hwstam(engine, ~0u); 142 143 flush_cs_tlb(engine); 144 } 145 146 static struct i915_address_space *vm_alias(struct i915_address_space *vm) 147 { 148 if (i915_is_ggtt(vm)) 149 vm = &i915_vm_to_ggtt(vm)->alias->vm; 150 151 return vm; 152 } 153 154 static u32 pp_dir(struct i915_address_space *vm) 155 { 156 return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir; 157 } 158 159 static void set_pp_dir(struct intel_engine_cs *engine) 160 { 161 struct i915_address_space *vm = vm_alias(engine->gt->vm); 162 163 if (!vm) 164 return; 165 166 ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); 167 ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm)); 168 169 if (GRAPHICS_VER(engine->i915) >= 7) { 170 ENGINE_WRITE_FW(engine, 171 RING_MODE_GEN7, 172 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 173 } 174 } 175 176 static bool stop_ring(struct intel_engine_cs *engine) 177 { 178 /* Empty the ring by skipping to the end */ 179 ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL)); 180 ENGINE_POSTING_READ(engine, RING_HEAD); 181 182 /* The ring must be empty before it is disabled */ 183 ENGINE_WRITE_FW(engine, RING_CTL, 0); 184 ENGINE_POSTING_READ(engine, RING_CTL); 185 186 /* Then reset the disabled ring */ 187 ENGINE_WRITE_FW(engine, RING_HEAD, 0); 188 ENGINE_WRITE_FW(engine, RING_TAIL, 0); 189 190 return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0; 191 } 192 193 static int xcs_resume(struct intel_engine_cs *engine) 194 { 195 struct intel_ring *ring = engine->legacy.ring; 196 ktime_t kt; 197 198 ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", 199 ring->head, ring->tail); 200 201 /* 202 * Double check the ring is empty & disabled before we resume. Called 203 * from atomic context during PCI probe, so _hardirq(). 204 */ 205 intel_synchronize_hardirq(engine->i915); 206 if (!stop_ring(engine)) 207 goto err; 208 209 if (HWS_NEEDS_PHYSICAL(engine->i915)) 210 ring_setup_phys_status_page(engine); 211 else 212 ring_setup_status_page(engine); 213 214 intel_breadcrumbs_reset(engine->breadcrumbs); 215 216 /* Enforce ordering by reading HEAD register back */ 217 ENGINE_POSTING_READ(engine, RING_HEAD); 218 219 /* 220 * Initialize the ring. This must happen _after_ we've cleared the ring 221 * registers with the above sequence (the readback of the HEAD registers 222 * also enforces ordering), otherwise the hw might lose the new ring 223 * register values. 224 */ 225 ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma)); 226 227 /* Check that the ring offsets point within the ring! */ 228 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); 229 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); 230 intel_ring_update_space(ring); 231 232 set_pp_dir(engine); 233 234 /* 235 * First wake the ring up to an empty/idle ring. 236 * Use 50ms of delay to let the engine write successfully 237 * for all platforms. Experimented with different values and 238 * determined that 50ms works best based on testing. 239 */ 240 for ((kt) = ktime_get() + (50 * NSEC_PER_MSEC); 241 ktime_before(ktime_get(), (kt)); cpu_relax()) { 242 /* 243 * In case of resets fails because engine resumes from 244 * incorrect RING_HEAD and then GPU may be then fed 245 * to invalid instructions, which may lead to unrecoverable 246 * hang. So at first write doesn't succeed then try again. 247 */ 248 ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); 249 if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head) 250 break; 251 } 252 253 ENGINE_WRITE_FW(engine, RING_TAIL, ring->head); 254 if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) { 255 ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n", 256 ENGINE_READ_FW(engine, RING_HEAD), 257 ENGINE_READ_FW(engine, RING_TAIL), 258 ring->head); 259 goto err; 260 } 261 262 ENGINE_WRITE_FW(engine, RING_CTL, 263 RING_CTL_SIZE(ring->size) | RING_VALID); 264 265 /* If the head is still not zero, the ring is dead */ 266 if (__intel_wait_for_register_fw(engine->uncore, 267 RING_CTL(engine->mmio_base), 268 RING_VALID, RING_VALID, 269 5000, 0, NULL)) { 270 ENGINE_TRACE(engine, "failed to restart\n"); 271 goto err; 272 } 273 274 if (GRAPHICS_VER(engine->i915) > 2) { 275 ENGINE_WRITE_FW(engine, 276 RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 277 ENGINE_POSTING_READ(engine, RING_MI_MODE); 278 } 279 280 /* Now awake, let it get started */ 281 if (ring->tail != ring->head) { 282 ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail); 283 ENGINE_POSTING_READ(engine, RING_TAIL); 284 } 285 286 /* Papering over lost _interrupts_ immediately following the restart */ 287 intel_engine_signal_breadcrumbs(engine); 288 return 0; 289 290 err: 291 gt_err(engine->gt, "%s initialization failed\n", engine->name); 292 ENGINE_TRACE(engine, 293 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", 294 ENGINE_READ(engine, RING_CTL), 295 ENGINE_READ(engine, RING_CTL) & RING_VALID, 296 ENGINE_READ(engine, RING_HEAD), ring->head, 297 ENGINE_READ(engine, RING_TAIL), ring->tail, 298 ENGINE_READ(engine, RING_START), 299 i915_ggtt_offset(ring->vma)); 300 GEM_TRACE_DUMP(); 301 return -EIO; 302 } 303 304 static void sanitize_hwsp(struct intel_engine_cs *engine) 305 { 306 struct intel_timeline *tl; 307 308 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 309 intel_timeline_reset_seqno(tl); 310 } 311 312 static void xcs_sanitize(struct intel_engine_cs *engine) 313 { 314 /* 315 * Poison residual state on resume, in case the suspend didn't! 316 * 317 * We have to assume that across suspend/resume (or other loss 318 * of control) that the contents of our pinned buffers has been 319 * lost, replaced by garbage. Since this doesn't always happen, 320 * let's poison such state so that we more quickly spot when 321 * we falsely assume it has been preserved. 322 */ 323 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 324 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 325 326 /* 327 * The kernel_context HWSP is stored in the status_page. As above, 328 * that may be lost on resume/initialisation, and so we need to 329 * reset the value in the HWSP. 330 */ 331 sanitize_hwsp(engine); 332 333 /* And scrub the dirty cachelines for the HWSP */ 334 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 335 336 intel_engine_reset_pinned_contexts(engine); 337 } 338 339 static void reset_prepare(struct intel_engine_cs *engine) 340 { 341 /* 342 * We stop engines, otherwise we might get failed reset and a 343 * dead gpu (on elk). Also as modern gpu as kbl can suffer 344 * from system hang if batchbuffer is progressing when 345 * the reset is issued, regardless of READY_TO_RESET ack. 346 * Thus assume it is best to stop engines on all gens 347 * where we have a gpu reset. 348 * 349 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) 350 * 351 * WaMediaResetMainRingCleanup:ctg,elk (presumably) 352 * WaClearRingBufHeadRegAtInit:ctg,elk 353 * 354 * FIXME: Wa for more modern gens needs to be validated 355 */ 356 ENGINE_TRACE(engine, "\n"); 357 intel_engine_stop_cs(engine); 358 359 if (!stop_ring(engine)) { 360 /* G45 ring initialization often fails to reset head to zero */ 361 ENGINE_TRACE(engine, 362 "HEAD not reset to zero, " 363 "{ CTL:%08x, HEAD:%08x, TAIL:%08x, START:%08x }\n", 364 ENGINE_READ_FW(engine, RING_CTL), 365 ENGINE_READ_FW(engine, RING_HEAD), 366 ENGINE_READ_FW(engine, RING_TAIL), 367 ENGINE_READ_FW(engine, RING_START)); 368 if (!stop_ring(engine)) { 369 drm_err(&engine->i915->drm, 370 "failed to set %s head to zero " 371 "ctl %08x head %08x tail %08x start %08x\n", 372 engine->name, 373 ENGINE_READ_FW(engine, RING_CTL), 374 ENGINE_READ_FW(engine, RING_HEAD), 375 ENGINE_READ_FW(engine, RING_TAIL), 376 ENGINE_READ_FW(engine, RING_START)); 377 } 378 } 379 } 380 381 static void reset_rewind(struct intel_engine_cs *engine, bool stalled) 382 { 383 struct i915_request *pos, *rq; 384 unsigned long flags; 385 u32 head; 386 387 rq = NULL; 388 spin_lock_irqsave(&engine->sched_engine->lock, flags); 389 rcu_read_lock(); 390 list_for_each_entry(pos, &engine->sched_engine->requests, sched.link) { 391 if (!__i915_request_is_complete(pos)) { 392 rq = pos; 393 break; 394 } 395 } 396 rcu_read_unlock(); 397 398 /* 399 * The guilty request will get skipped on a hung engine. 400 * 401 * Users of client default contexts do not rely on logical 402 * state preserved between batches so it is safe to execute 403 * queued requests following the hang. Non default contexts 404 * rely on preserved state, so skipping a batch loses the 405 * evolution of the state and it needs to be considered corrupted. 406 * Executing more queued batches on top of corrupted state is 407 * risky. But we take the risk by trying to advance through 408 * the queued requests in order to make the client behaviour 409 * more predictable around resets, by not throwing away random 410 * amount of batches it has prepared for execution. Sophisticated 411 * clients can use gem_reset_stats_ioctl and dma fence status 412 * (exported via sync_file info ioctl on explicit fences) to observe 413 * when it loses the context state and should rebuild accordingly. 414 * 415 * The context ban, and ultimately the client ban, mechanism are safety 416 * valves if client submission ends up resulting in nothing more than 417 * subsequent hangs. 418 */ 419 420 if (rq) { 421 /* 422 * Try to restore the logical GPU state to match the 423 * continuation of the request queue. If we skip the 424 * context/PD restore, then the next request may try to execute 425 * assuming that its context is valid and loaded on the GPU and 426 * so may try to access invalid memory, prompting repeated GPU 427 * hangs. 428 * 429 * If the request was guilty, we still restore the logical 430 * state in case the next request requires it (e.g. the 431 * aliasing ppgtt), but skip over the hung batch. 432 * 433 * If the request was innocent, we try to replay the request 434 * with the restored context. 435 */ 436 __i915_request_reset(rq, stalled); 437 438 GEM_BUG_ON(rq->ring != engine->legacy.ring); 439 head = rq->head; 440 } else { 441 head = engine->legacy.ring->tail; 442 } 443 engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head); 444 445 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 446 } 447 448 static void reset_finish(struct intel_engine_cs *engine) 449 { 450 } 451 452 static void reset_cancel(struct intel_engine_cs *engine) 453 { 454 struct i915_request *request; 455 unsigned long flags; 456 457 spin_lock_irqsave(&engine->sched_engine->lock, flags); 458 459 /* Mark all submitted requests as skipped. */ 460 list_for_each_entry(request, &engine->sched_engine->requests, sched.link) 461 i915_request_put(i915_request_mark_eio(request)); 462 intel_engine_signal_breadcrumbs(engine); 463 464 /* Remaining _unready_ requests will be nop'ed when submitted */ 465 466 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 467 } 468 469 static void i9xx_submit_request(struct i915_request *request) 470 { 471 i915_request_submit(request); 472 wmb(); /* paranoid flush writes out of the WCB before mmio */ 473 474 ENGINE_WRITE(request->engine, RING_TAIL, 475 intel_ring_set_tail(request->ring, request->tail)); 476 } 477 478 static void __ring_context_fini(struct intel_context *ce) 479 { 480 i915_vma_put(ce->state); 481 } 482 483 static void ring_context_destroy(struct kref *ref) 484 { 485 struct intel_context *ce = container_of(ref, typeof(*ce), ref); 486 487 GEM_BUG_ON(intel_context_is_pinned(ce)); 488 489 if (ce->state) 490 __ring_context_fini(ce); 491 492 intel_context_fini(ce); 493 intel_context_free(ce); 494 } 495 496 static int ring_context_init_default_state(struct intel_context *ce, 497 struct i915_gem_ww_ctx *ww) 498 { 499 struct drm_i915_gem_object *obj = ce->state->obj; 500 void *vaddr; 501 502 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 503 if (IS_ERR(vaddr)) 504 return PTR_ERR(vaddr); 505 506 shmem_read(ce->default_state, 0, vaddr, ce->engine->context_size); 507 508 i915_gem_object_flush_map(obj); 509 __i915_gem_object_release_map(obj); 510 511 __set_bit(CONTEXT_VALID_BIT, &ce->flags); 512 return 0; 513 } 514 515 static int ring_context_pre_pin(struct intel_context *ce, 516 struct i915_gem_ww_ctx *ww, 517 void **unused) 518 { 519 struct i915_address_space *vm; 520 int err = 0; 521 522 if (ce->default_state && 523 !test_bit(CONTEXT_VALID_BIT, &ce->flags)) { 524 err = ring_context_init_default_state(ce, ww); 525 if (err) 526 return err; 527 } 528 529 vm = vm_alias(ce->vm); 530 if (vm) 531 err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww); 532 533 return err; 534 } 535 536 static void __context_unpin_ppgtt(struct intel_context *ce) 537 { 538 struct i915_address_space *vm; 539 540 vm = vm_alias(ce->vm); 541 if (vm) 542 gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm)); 543 } 544 545 static void ring_context_unpin(struct intel_context *ce) 546 { 547 } 548 549 static void ring_context_post_unpin(struct intel_context *ce) 550 { 551 __context_unpin_ppgtt(ce); 552 } 553 554 static struct i915_vma * 555 alloc_context_vma(struct intel_engine_cs *engine) 556 { 557 struct drm_i915_private *i915 = engine->i915; 558 struct drm_i915_gem_object *obj; 559 struct i915_vma *vma; 560 int err; 561 562 obj = i915_gem_object_create_shmem(i915, engine->context_size); 563 if (IS_ERR(obj)) 564 return ERR_CAST(obj); 565 566 /* 567 * Try to make the context utilize L3 as well as LLC. 568 * 569 * On VLV we don't have L3 controls in the PTEs so we 570 * shouldn't touch the cache level, especially as that 571 * would make the object snooped which might have a 572 * negative performance impact. 573 * 574 * Snooping is required on non-llc platforms in execlist 575 * mode, but since all GGTT accesses use PAT entry 0 we 576 * get snooping anyway regardless of cache_level. 577 * 578 * This is only applicable for Ivy Bridge devices since 579 * later platforms don't have L3 control bits in the PTE. 580 */ 581 if (IS_IVYBRIDGE(i915)) 582 i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC); 583 584 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 585 if (IS_ERR(vma)) { 586 err = PTR_ERR(vma); 587 goto err_obj; 588 } 589 590 return vma; 591 592 err_obj: 593 i915_gem_object_put(obj); 594 return ERR_PTR(err); 595 } 596 597 static int ring_context_alloc(struct intel_context *ce) 598 { 599 struct intel_engine_cs *engine = ce->engine; 600 601 if (!intel_context_has_own_state(ce)) 602 ce->default_state = engine->default_state; 603 604 /* One ringbuffer to rule them all */ 605 GEM_BUG_ON(!engine->legacy.ring); 606 ce->ring = engine->legacy.ring; 607 ce->timeline = intel_timeline_get(engine->legacy.timeline); 608 609 GEM_BUG_ON(ce->state); 610 if (engine->context_size) { 611 struct i915_vma *vma; 612 613 vma = alloc_context_vma(engine); 614 if (IS_ERR(vma)) 615 return PTR_ERR(vma); 616 617 ce->state = vma; 618 } 619 620 return 0; 621 } 622 623 static int ring_context_pin(struct intel_context *ce, void *unused) 624 { 625 return 0; 626 } 627 628 static void ring_context_reset(struct intel_context *ce) 629 { 630 intel_ring_reset(ce->ring, ce->ring->emit); 631 clear_bit(CONTEXT_VALID_BIT, &ce->flags); 632 } 633 634 static void ring_context_revoke(struct intel_context *ce, 635 struct i915_request *rq, 636 unsigned int preempt_timeout_ms) 637 { 638 struct intel_engine_cs *engine; 639 640 if (!rq || !i915_request_is_active(rq)) 641 return; 642 643 engine = rq->engine; 644 lockdep_assert_held(&engine->sched_engine->lock); 645 list_for_each_entry_continue(rq, &engine->sched_engine->requests, 646 sched.link) 647 if (rq->context == ce) { 648 i915_request_set_error_once(rq, -EIO); 649 __i915_request_skip(rq); 650 } 651 } 652 653 static void ring_context_cancel_request(struct intel_context *ce, 654 struct i915_request *rq) 655 { 656 struct intel_engine_cs *engine = NULL; 657 658 i915_request_active_engine(rq, &engine); 659 660 if (engine && intel_engine_pulse(engine)) 661 intel_gt_handle_error(engine->gt, engine->mask, 0, 662 "request cancellation by %s", 663 current->comm); 664 } 665 666 static const struct intel_context_ops ring_context_ops = { 667 .alloc = ring_context_alloc, 668 669 .cancel_request = ring_context_cancel_request, 670 671 .revoke = ring_context_revoke, 672 673 .pre_pin = ring_context_pre_pin, 674 .pin = ring_context_pin, 675 .unpin = ring_context_unpin, 676 .post_unpin = ring_context_post_unpin, 677 678 .enter = intel_context_enter_engine, 679 .exit = intel_context_exit_engine, 680 681 .reset = ring_context_reset, 682 .destroy = ring_context_destroy, 683 }; 684 685 static int load_pd_dir(struct i915_request *rq, 686 struct i915_address_space *vm, 687 u32 valid) 688 { 689 const struct intel_engine_cs * const engine = rq->engine; 690 u32 *cs; 691 692 cs = intel_ring_begin(rq, 12); 693 if (IS_ERR(cs)) 694 return PTR_ERR(cs); 695 696 *cs++ = MI_LOAD_REGISTER_IMM(1); 697 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); 698 *cs++ = valid; 699 700 *cs++ = MI_LOAD_REGISTER_IMM(1); 701 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); 702 *cs++ = pp_dir(vm); 703 704 /* Stall until the page table load is complete? */ 705 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 706 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); 707 *cs++ = intel_gt_scratch_offset(engine->gt, 708 INTEL_GT_SCRATCH_FIELD_DEFAULT); 709 710 *cs++ = MI_LOAD_REGISTER_IMM(1); 711 *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base)); 712 *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE); 713 714 intel_ring_advance(rq, cs); 715 716 return rq->engine->emit_flush(rq, EMIT_FLUSH); 717 } 718 719 static int mi_set_context(struct i915_request *rq, 720 struct intel_context *ce, 721 u32 flags) 722 { 723 struct intel_engine_cs *engine = rq->engine; 724 struct drm_i915_private *i915 = engine->i915; 725 enum intel_engine_id id; 726 const int num_engines = 727 IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0; 728 bool force_restore = false; 729 int len; 730 u32 *cs; 731 732 len = 4; 733 if (GRAPHICS_VER(i915) == 7) 734 len += 2 + (num_engines ? 4 * num_engines + 6 : 0); 735 else if (GRAPHICS_VER(i915) == 5) 736 len += 2; 737 if (flags & MI_FORCE_RESTORE) { 738 GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); 739 flags &= ~MI_FORCE_RESTORE; 740 force_restore = true; 741 len += 2; 742 } 743 744 cs = intel_ring_begin(rq, len); 745 if (IS_ERR(cs)) 746 return PTR_ERR(cs); 747 748 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ 749 if (GRAPHICS_VER(i915) == 7) { 750 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 751 if (num_engines) { 752 struct intel_engine_cs *signaller; 753 754 *cs++ = MI_LOAD_REGISTER_IMM(num_engines); 755 for_each_engine(signaller, engine->gt, id) { 756 if (signaller == engine) 757 continue; 758 759 *cs++ = i915_mmio_reg_offset( 760 RING_PSMI_CTL(signaller->mmio_base)); 761 *cs++ = _MASKED_BIT_ENABLE( 762 GEN6_PSMI_SLEEP_MSG_DISABLE); 763 } 764 } 765 } else if (GRAPHICS_VER(i915) == 5) { 766 /* 767 * This w/a is only listed for pre-production ilk a/b steppings, 768 * but is also mentioned for programming the powerctx. To be 769 * safe, just apply the workaround; we do not use SyncFlush so 770 * this should never take effect and so be a no-op! 771 */ 772 *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN; 773 } 774 775 if (force_restore) { 776 /* 777 * The HW doesn't handle being told to restore the current 778 * context very well. Quite often it likes goes to go off and 779 * sulk, especially when it is meant to be reloading PP_DIR. 780 * A very simple fix to force the reload is to simply switch 781 * away from the current context and back again. 782 * 783 * Note that the kernel_context will contain random state 784 * following the INHIBIT_RESTORE. We accept this since we 785 * never use the kernel_context state; it is merely a 786 * placeholder we use to flush other contexts. 787 */ 788 *cs++ = MI_SET_CONTEXT; 789 *cs++ = i915_ggtt_offset(engine->kernel_context->state) | 790 MI_MM_SPACE_GTT | 791 MI_RESTORE_INHIBIT; 792 } 793 794 *cs++ = MI_NOOP; 795 *cs++ = MI_SET_CONTEXT; 796 *cs++ = i915_ggtt_offset(ce->state) | flags; 797 /* 798 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 799 * WaMiSetContext_Hang:snb,ivb,vlv 800 */ 801 *cs++ = MI_NOOP; 802 803 if (GRAPHICS_VER(i915) == 7) { 804 if (num_engines) { 805 struct intel_engine_cs *signaller; 806 i915_reg_t last_reg = INVALID_MMIO_REG; /* keep gcc quiet */ 807 808 *cs++ = MI_LOAD_REGISTER_IMM(num_engines); 809 for_each_engine(signaller, engine->gt, id) { 810 if (signaller == engine) 811 continue; 812 813 last_reg = RING_PSMI_CTL(signaller->mmio_base); 814 *cs++ = i915_mmio_reg_offset(last_reg); 815 *cs++ = _MASKED_BIT_DISABLE( 816 GEN6_PSMI_SLEEP_MSG_DISABLE); 817 } 818 819 /* Insert a delay before the next switch! */ 820 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 821 *cs++ = i915_mmio_reg_offset(last_reg); 822 *cs++ = intel_gt_scratch_offset(engine->gt, 823 INTEL_GT_SCRATCH_FIELD_DEFAULT); 824 *cs++ = MI_NOOP; 825 } 826 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 827 } else if (GRAPHICS_VER(i915) == 5) { 828 *cs++ = MI_SUSPEND_FLUSH; 829 } 830 831 intel_ring_advance(rq, cs); 832 833 return 0; 834 } 835 836 static int remap_l3_slice(struct i915_request *rq, int slice) 837 { 838 #define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32)) 839 u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; 840 int i; 841 842 if (!remap_info) 843 return 0; 844 845 cs = intel_ring_begin(rq, L3LOG_DW * 2 + 2); 846 if (IS_ERR(cs)) 847 return PTR_ERR(cs); 848 849 /* 850 * Note: We do not worry about the concurrent register cacheline hang 851 * here because no other code should access these registers other than 852 * at initialization time. 853 */ 854 *cs++ = MI_LOAD_REGISTER_IMM(L3LOG_DW); 855 for (i = 0; i < L3LOG_DW; i++) { 856 *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); 857 *cs++ = remap_info[i]; 858 } 859 *cs++ = MI_NOOP; 860 intel_ring_advance(rq, cs); 861 862 return 0; 863 #undef L3LOG_DW 864 } 865 866 static int remap_l3(struct i915_request *rq) 867 { 868 struct i915_gem_context *ctx = i915_request_gem_context(rq); 869 int i, err; 870 871 if (!ctx || !ctx->remap_slice) 872 return 0; 873 874 for (i = 0; i < MAX_L3_SLICES; i++) { 875 if (!(ctx->remap_slice & BIT(i))) 876 continue; 877 878 err = remap_l3_slice(rq, i); 879 if (err) 880 return err; 881 } 882 883 ctx->remap_slice = 0; 884 return 0; 885 } 886 887 static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) 888 { 889 int ret; 890 891 if (!vm) 892 return 0; 893 894 ret = rq->engine->emit_flush(rq, EMIT_FLUSH); 895 if (ret) 896 return ret; 897 898 /* 899 * Not only do we need a full barrier (post-sync write) after 900 * invalidating the TLBs, but we need to wait a little bit 901 * longer. Whether this is merely delaying us, or the 902 * subsequent flush is a key part of serialising with the 903 * post-sync op, this extra pass appears vital before a 904 * mm switch! 905 */ 906 ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G); 907 if (ret) 908 return ret; 909 910 return rq->engine->emit_flush(rq, EMIT_INVALIDATE); 911 } 912 913 static int clear_residuals(struct i915_request *rq) 914 { 915 struct intel_engine_cs *engine = rq->engine; 916 int ret; 917 918 ret = switch_mm(rq, vm_alias(engine->kernel_context->vm)); 919 if (ret) 920 return ret; 921 922 if (engine->kernel_context->state) { 923 ret = mi_set_context(rq, 924 engine->kernel_context, 925 MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT); 926 if (ret) 927 return ret; 928 } 929 930 ret = engine->emit_bb_start(rq, 931 i915_vma_offset(engine->wa_ctx.vma), 0, 932 0); 933 if (ret) 934 return ret; 935 936 ret = engine->emit_flush(rq, EMIT_FLUSH); 937 if (ret) 938 return ret; 939 940 /* Always invalidate before the next switch_mm() */ 941 return engine->emit_flush(rq, EMIT_INVALIDATE); 942 } 943 944 static int switch_context(struct i915_request *rq) 945 { 946 struct intel_engine_cs *engine = rq->engine; 947 struct intel_context *ce = rq->context; 948 void **residuals = NULL; 949 int ret; 950 951 GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); 952 953 if (engine->wa_ctx.vma && ce != engine->kernel_context) { 954 if (engine->wa_ctx.vma->private != ce && 955 i915_mitigate_clear_residuals()) { 956 ret = clear_residuals(rq); 957 if (ret) 958 return ret; 959 960 residuals = &engine->wa_ctx.vma->private; 961 } 962 } 963 964 ret = switch_mm(rq, vm_alias(ce->vm)); 965 if (ret) 966 return ret; 967 968 if (ce->state) { 969 u32 flags; 970 971 GEM_BUG_ON(engine->id != RCS0); 972 973 /* For resource streamer on HSW+ and power context elsewhere */ 974 BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); 975 BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN); 976 977 flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT; 978 if (test_bit(CONTEXT_VALID_BIT, &ce->flags)) 979 flags |= MI_RESTORE_EXT_STATE_EN; 980 else 981 flags |= MI_RESTORE_INHIBIT; 982 983 ret = mi_set_context(rq, ce, flags); 984 if (ret) 985 return ret; 986 } 987 988 ret = remap_l3(rq); 989 if (ret) 990 return ret; 991 992 /* 993 * Now past the point of no return, this request _will_ be emitted. 994 * 995 * Or at least this preamble will be emitted, the request may be 996 * interrupted prior to submitting the user payload. If so, we 997 * still submit the "empty" request in order to preserve global 998 * state tracking such as this, our tracking of the current 999 * dirty context. 1000 */ 1001 if (residuals) { 1002 intel_context_put(*residuals); 1003 *residuals = intel_context_get(ce); 1004 } 1005 1006 return 0; 1007 } 1008 1009 static int ring_request_alloc(struct i915_request *request) 1010 { 1011 int ret; 1012 1013 GEM_BUG_ON(!intel_context_is_pinned(request->context)); 1014 GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); 1015 1016 /* 1017 * Flush enough space to reduce the likelihood of waiting after 1018 * we start building the request - in which case we will just 1019 * have to repeat work. 1020 */ 1021 request->reserved_space += LEGACY_REQUEST_SIZE; 1022 1023 /* Unconditionally invalidate GPU caches and TLBs. */ 1024 ret = request->engine->emit_flush(request, EMIT_INVALIDATE); 1025 if (ret) 1026 return ret; 1027 1028 ret = switch_context(request); 1029 if (ret) 1030 return ret; 1031 1032 request->reserved_space -= LEGACY_REQUEST_SIZE; 1033 return 0; 1034 } 1035 1036 static void gen6_bsd_submit_request(struct i915_request *request) 1037 { 1038 struct intel_uncore *uncore = request->engine->uncore; 1039 1040 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 1041 1042 /* Every tail move must follow the sequence below */ 1043 1044 /* Disable notification that the ring is IDLE. The GT 1045 * will then assume that it is busy and bring it out of rc6. 1046 */ 1047 intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE), 1048 _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); 1049 1050 /* Clear the context id. Here be magic! */ 1051 intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0); 1052 1053 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1054 if (__intel_wait_for_register_fw(uncore, 1055 RING_PSMI_CTL(GEN6_BSD_RING_BASE), 1056 GEN6_BSD_SLEEP_INDICATOR, 1057 0, 1058 1000, 0, NULL)) 1059 drm_err(&uncore->i915->drm, 1060 "timed out waiting for the BSD ring to wake up\n"); 1061 1062 /* Now that the ring is fully powered up, update the tail */ 1063 i9xx_submit_request(request); 1064 1065 /* Let the ring send IDLE messages to the GT again, 1066 * and so let it sleep to conserve power when idle. 1067 */ 1068 intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE), 1069 _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); 1070 1071 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 1072 } 1073 1074 static void i9xx_set_default_submission(struct intel_engine_cs *engine) 1075 { 1076 engine->submit_request = i9xx_submit_request; 1077 } 1078 1079 static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) 1080 { 1081 engine->submit_request = gen6_bsd_submit_request; 1082 } 1083 1084 static void ring_release(struct intel_engine_cs *engine) 1085 { 1086 struct drm_i915_private *i915 = engine->i915; 1087 1088 drm_WARN_ON(&i915->drm, GRAPHICS_VER(i915) > 2 && 1089 (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); 1090 1091 intel_engine_cleanup_common(engine); 1092 1093 if (engine->wa_ctx.vma) { 1094 intel_context_put(engine->wa_ctx.vma->private); 1095 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); 1096 } 1097 1098 intel_ring_unpin(engine->legacy.ring); 1099 intel_ring_put(engine->legacy.ring); 1100 1101 intel_timeline_unpin(engine->legacy.timeline); 1102 intel_timeline_put(engine->legacy.timeline); 1103 } 1104 1105 static void irq_handler(struct intel_engine_cs *engine, u16 iir) 1106 { 1107 intel_engine_signal_breadcrumbs(engine); 1108 } 1109 1110 static void setup_irq(struct intel_engine_cs *engine) 1111 { 1112 struct drm_i915_private *i915 = engine->i915; 1113 1114 intel_engine_set_irq_handler(engine, irq_handler); 1115 1116 if (GRAPHICS_VER(i915) >= 6) { 1117 engine->irq_enable = gen6_irq_enable; 1118 engine->irq_disable = gen6_irq_disable; 1119 } else if (GRAPHICS_VER(i915) >= 5) { 1120 engine->irq_enable = gen5_irq_enable; 1121 engine->irq_disable = gen5_irq_disable; 1122 } else { 1123 engine->irq_enable = gen2_irq_enable; 1124 engine->irq_disable = gen2_irq_disable; 1125 } 1126 } 1127 1128 static void add_to_engine(struct i915_request *rq) 1129 { 1130 lockdep_assert_held(&rq->engine->sched_engine->lock); 1131 list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); 1132 } 1133 1134 static void remove_from_engine(struct i915_request *rq) 1135 { 1136 spin_lock_irq(&rq->engine->sched_engine->lock); 1137 list_del_init(&rq->sched.link); 1138 1139 /* Prevent further __await_execution() registering a cb, then flush */ 1140 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 1141 1142 spin_unlock_irq(&rq->engine->sched_engine->lock); 1143 1144 i915_request_notify_execute_cb_imm(rq); 1145 } 1146 1147 static void setup_common(struct intel_engine_cs *engine) 1148 { 1149 struct drm_i915_private *i915 = engine->i915; 1150 1151 /* gen8+ are only supported with execlists */ 1152 GEM_BUG_ON(GRAPHICS_VER(i915) >= 8); 1153 1154 setup_irq(engine); 1155 1156 engine->resume = xcs_resume; 1157 engine->sanitize = xcs_sanitize; 1158 1159 engine->reset.prepare = reset_prepare; 1160 engine->reset.rewind = reset_rewind; 1161 engine->reset.cancel = reset_cancel; 1162 engine->reset.finish = reset_finish; 1163 1164 engine->add_active_request = add_to_engine; 1165 engine->remove_active_request = remove_from_engine; 1166 1167 engine->cops = &ring_context_ops; 1168 engine->request_alloc = ring_request_alloc; 1169 1170 /* 1171 * Using a global execution timeline; the previous final breadcrumb is 1172 * equivalent to our next initial bread so we can elide 1173 * engine->emit_init_breadcrumb(). 1174 */ 1175 engine->emit_fini_breadcrumb = gen2_emit_breadcrumb; 1176 if (GRAPHICS_VER(i915) == 5) 1177 engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; 1178 1179 engine->set_default_submission = i9xx_set_default_submission; 1180 1181 if (GRAPHICS_VER(i915) >= 6) 1182 engine->emit_bb_start = gen6_emit_bb_start; 1183 else if (GRAPHICS_VER(i915) >= 4) 1184 engine->emit_bb_start = gen4_emit_bb_start; 1185 else if (IS_I830(i915) || IS_I845G(i915)) 1186 engine->emit_bb_start = i830_emit_bb_start; 1187 else 1188 engine->emit_bb_start = gen2_emit_bb_start; 1189 } 1190 1191 static void setup_rcs(struct intel_engine_cs *engine) 1192 { 1193 struct drm_i915_private *i915 = engine->i915; 1194 1195 if (HAS_L3_DPF(i915)) 1196 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; 1197 1198 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; 1199 1200 if (GRAPHICS_VER(i915) >= 7) { 1201 engine->emit_flush = gen7_emit_flush_rcs; 1202 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs; 1203 } else if (GRAPHICS_VER(i915) == 6) { 1204 engine->emit_flush = gen6_emit_flush_rcs; 1205 engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs; 1206 } else if (GRAPHICS_VER(i915) == 5) { 1207 engine->emit_flush = gen4_emit_flush_rcs; 1208 } else { 1209 if (GRAPHICS_VER(i915) < 4) 1210 engine->emit_flush = gen2_emit_flush; 1211 else 1212 engine->emit_flush = gen4_emit_flush_rcs; 1213 engine->irq_enable_mask = I915_USER_INTERRUPT; 1214 } 1215 1216 if (IS_HASWELL(i915)) 1217 engine->emit_bb_start = hsw_emit_bb_start; 1218 } 1219 1220 static void setup_vcs(struct intel_engine_cs *engine) 1221 { 1222 struct drm_i915_private *i915 = engine->i915; 1223 1224 if (GRAPHICS_VER(i915) >= 6) { 1225 /* gen6 bsd needs a special wa for tail updates */ 1226 if (GRAPHICS_VER(i915) == 6) 1227 engine->set_default_submission = gen6_bsd_set_default_submission; 1228 engine->emit_flush = gen6_emit_flush_vcs; 1229 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; 1230 1231 if (GRAPHICS_VER(i915) == 6) 1232 engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; 1233 else 1234 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; 1235 } else { 1236 engine->emit_flush = gen4_emit_flush_vcs; 1237 if (GRAPHICS_VER(i915) == 5) 1238 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; 1239 else 1240 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; 1241 } 1242 } 1243 1244 static void setup_bcs(struct intel_engine_cs *engine) 1245 { 1246 struct drm_i915_private *i915 = engine->i915; 1247 1248 engine->emit_flush = gen6_emit_flush_xcs; 1249 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; 1250 1251 if (GRAPHICS_VER(i915) == 6) 1252 engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; 1253 else 1254 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; 1255 } 1256 1257 static void setup_vecs(struct intel_engine_cs *engine) 1258 { 1259 struct drm_i915_private *i915 = engine->i915; 1260 1261 GEM_BUG_ON(GRAPHICS_VER(i915) < 7); 1262 1263 engine->emit_flush = gen6_emit_flush_xcs; 1264 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; 1265 engine->irq_enable = hsw_irq_enable_vecs; 1266 engine->irq_disable = hsw_irq_disable_vecs; 1267 1268 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; 1269 } 1270 1271 static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, 1272 struct i915_vma * const vma) 1273 { 1274 return gen7_setup_clear_gpr_bb(engine, vma); 1275 } 1276 1277 static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine, 1278 struct i915_gem_ww_ctx *ww, 1279 struct i915_vma *vma) 1280 { 1281 int err; 1282 1283 err = i915_vma_pin_ww(vma, ww, 0, 0, PIN_USER | PIN_HIGH); 1284 if (err) 1285 return err; 1286 1287 err = i915_vma_sync(vma); 1288 if (err) 1289 goto err_unpin; 1290 1291 err = gen7_ctx_switch_bb_setup(engine, vma); 1292 if (err) 1293 goto err_unpin; 1294 1295 engine->wa_ctx.vma = vma; 1296 return 0; 1297 1298 err_unpin: 1299 i915_vma_unpin(vma); 1300 return err; 1301 } 1302 1303 static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine) 1304 { 1305 struct drm_i915_gem_object *obj; 1306 struct i915_vma *vma; 1307 int size, err; 1308 1309 if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS) 1310 return NULL; 1311 1312 err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); 1313 if (err < 0) 1314 return ERR_PTR(err); 1315 if (!err) 1316 return NULL; 1317 1318 size = ALIGN(err, PAGE_SIZE); 1319 1320 obj = i915_gem_object_create_internal(engine->i915, size); 1321 if (IS_ERR(obj)) 1322 return ERR_CAST(obj); 1323 1324 vma = i915_vma_instance(obj, engine->gt->vm, NULL); 1325 if (IS_ERR(vma)) { 1326 i915_gem_object_put(obj); 1327 return ERR_CAST(vma); 1328 } 1329 1330 vma->private = intel_context_create(engine); /* dummy residuals */ 1331 if (IS_ERR(vma->private)) { 1332 err = PTR_ERR(vma->private); 1333 vma->private = NULL; 1334 i915_gem_object_put(obj); 1335 return ERR_PTR(err); 1336 } 1337 1338 return vma; 1339 } 1340 1341 int intel_ring_submission_setup(struct intel_engine_cs *engine) 1342 { 1343 struct i915_gem_ww_ctx ww; 1344 struct intel_timeline *timeline; 1345 struct intel_ring *ring; 1346 struct i915_vma *gen7_wa_vma; 1347 int err; 1348 1349 setup_common(engine); 1350 1351 switch (engine->class) { 1352 case RENDER_CLASS: 1353 setup_rcs(engine); 1354 break; 1355 case VIDEO_DECODE_CLASS: 1356 setup_vcs(engine); 1357 break; 1358 case COPY_ENGINE_CLASS: 1359 setup_bcs(engine); 1360 break; 1361 case VIDEO_ENHANCEMENT_CLASS: 1362 setup_vecs(engine); 1363 break; 1364 default: 1365 MISSING_CASE(engine->class); 1366 return -ENODEV; 1367 } 1368 1369 timeline = intel_timeline_create_from_engine(engine, 1370 I915_GEM_HWS_SEQNO_ADDR); 1371 if (IS_ERR(timeline)) { 1372 err = PTR_ERR(timeline); 1373 goto err; 1374 } 1375 GEM_BUG_ON(timeline->has_initial_breadcrumb); 1376 1377 ring = intel_engine_create_ring(engine, SZ_16K); 1378 if (IS_ERR(ring)) { 1379 err = PTR_ERR(ring); 1380 goto err_timeline; 1381 } 1382 1383 GEM_BUG_ON(engine->legacy.ring); 1384 engine->legacy.ring = ring; 1385 engine->legacy.timeline = timeline; 1386 1387 gen7_wa_vma = gen7_ctx_vma(engine); 1388 if (IS_ERR(gen7_wa_vma)) { 1389 err = PTR_ERR(gen7_wa_vma); 1390 goto err_ring; 1391 } 1392 1393 i915_gem_ww_ctx_init(&ww, false); 1394 1395 retry: 1396 err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww); 1397 if (!err && gen7_wa_vma) 1398 err = i915_gem_object_lock(gen7_wa_vma->obj, &ww); 1399 if (!err) 1400 err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww); 1401 if (!err) 1402 err = intel_timeline_pin(timeline, &ww); 1403 if (!err) { 1404 err = intel_ring_pin(ring, &ww); 1405 if (err) 1406 intel_timeline_unpin(timeline); 1407 } 1408 if (err) 1409 goto out; 1410 1411 GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); 1412 1413 if (gen7_wa_vma) { 1414 err = gen7_ctx_switch_bb_init(engine, &ww, gen7_wa_vma); 1415 if (err) { 1416 intel_ring_unpin(ring); 1417 intel_timeline_unpin(timeline); 1418 } 1419 } 1420 1421 out: 1422 if (err == -EDEADLK) { 1423 err = i915_gem_ww_ctx_backoff(&ww); 1424 if (!err) 1425 goto retry; 1426 } 1427 i915_gem_ww_ctx_fini(&ww); 1428 if (err) 1429 goto err_gen7_put; 1430 1431 /* Finally, take ownership and responsibility for cleanup! */ 1432 engine->release = ring_release; 1433 1434 return 0; 1435 1436 err_gen7_put: 1437 if (gen7_wa_vma) { 1438 intel_context_put(gen7_wa_vma->private); 1439 i915_gem_object_put(gen7_wa_vma->obj); 1440 } 1441 err_ring: 1442 intel_ring_put(ring); 1443 err_timeline: 1444 intel_timeline_put(timeline); 1445 err: 1446 intel_engine_cleanup_common(engine); 1447 return err; 1448 } 1449 1450 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1451 #include "selftest_ring_submission.c" 1452 #endif 1453