1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "gem/i915_gem_context.h" 7 #include "gem/i915_gem_pm.h" 8 9 #include "i915_drm_client.h" 10 #include "i915_drv.h" 11 #include "i915_trace.h" 12 13 #include "intel_context.h" 14 #include "intel_engine.h" 15 #include "intel_engine_pm.h" 16 #include "intel_ring.h" 17 18 static struct kmem_cache *slab_ce; 19 20 static struct intel_context *intel_context_alloc(void) 21 { 22 return kmem_cache_zalloc(slab_ce, GFP_KERNEL); 23 } 24 25 static void rcu_context_free(struct rcu_head *rcu) 26 { 27 struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); 28 29 trace_intel_context_free(ce); 30 kmem_cache_free(slab_ce, ce); 31 } 32 33 void intel_context_free(struct intel_context *ce) 34 { 35 call_rcu(&ce->rcu, rcu_context_free); 36 } 37 38 struct intel_context * 39 intel_context_create(struct intel_engine_cs *engine) 40 { 41 struct intel_context *ce; 42 43 ce = intel_context_alloc(); 44 if (!ce) 45 return ERR_PTR(-ENOMEM); 46 47 intel_context_init(ce, engine); 48 trace_intel_context_create(ce); 49 return ce; 50 } 51 52 int intel_context_alloc_state(struct intel_context *ce) 53 { 54 struct i915_gem_context *ctx; 55 int err = 0; 56 57 if (mutex_lock_interruptible(&ce->pin_mutex)) 58 return -EINTR; 59 60 if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { 61 if (intel_context_is_banned(ce)) { 62 err = -EIO; 63 goto unlock; 64 } 65 66 err = ce->ops->alloc(ce); 67 if (unlikely(err)) 68 goto unlock; 69 70 set_bit(CONTEXT_ALLOC_BIT, &ce->flags); 71 72 rcu_read_lock(); 73 ctx = rcu_dereference(ce->gem_context); 74 if (ctx && !kref_get_unless_zero(&ctx->ref)) 75 ctx = NULL; 76 rcu_read_unlock(); 77 if (ctx) { 78 if (ctx->client) 79 i915_drm_client_add_context_objects(ctx->client, 80 ce); 81 i915_gem_context_put(ctx); 82 } 83 } 84 85 unlock: 86 mutex_unlock(&ce->pin_mutex); 87 return err; 88 } 89 90 static int intel_context_active_acquire(struct intel_context *ce) 91 { 92 int err; 93 94 __i915_active_acquire(&ce->active); 95 96 if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || 97 intel_context_is_parallel(ce)) 98 return 0; 99 100 /* Preallocate tracking nodes */ 101 err = i915_active_acquire_preallocate_barrier(&ce->active, 102 ce->engine); 103 if (err) 104 i915_active_release(&ce->active); 105 106 return err; 107 } 108 109 static void intel_context_active_release(struct intel_context *ce) 110 { 111 /* Nodes preallocated in intel_context_active() */ 112 i915_active_acquire_barrier(&ce->active); 113 i915_active_release(&ce->active); 114 } 115 116 static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww) 117 { 118 unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; 119 int err; 120 121 err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH); 122 if (err) 123 return err; 124 125 err = i915_active_acquire(&vma->active); 126 if (err) 127 goto err_unpin; 128 129 /* 130 * And mark it as a globally pinned object to let the shrinker know 131 * it cannot reclaim the object until we release it. 132 */ 133 i915_vma_make_unshrinkable(vma); 134 vma->obj->mm.dirty = true; 135 136 return 0; 137 138 err_unpin: 139 i915_vma_unpin(vma); 140 return err; 141 } 142 143 static void __context_unpin_state(struct i915_vma *vma) 144 { 145 i915_vma_make_shrinkable(vma); 146 i915_active_release(&vma->active); 147 __i915_vma_unpin(vma); 148 } 149 150 static int __ring_active(struct intel_ring *ring, 151 struct i915_gem_ww_ctx *ww) 152 { 153 int err; 154 155 err = intel_ring_pin(ring, ww); 156 if (err) 157 return err; 158 159 err = i915_active_acquire(&ring->vma->active); 160 if (err) 161 goto err_pin; 162 163 return 0; 164 165 err_pin: 166 intel_ring_unpin(ring); 167 return err; 168 } 169 170 static void __ring_retire(struct intel_ring *ring) 171 { 172 i915_active_release(&ring->vma->active); 173 intel_ring_unpin(ring); 174 } 175 176 static int intel_context_pre_pin(struct intel_context *ce, 177 struct i915_gem_ww_ctx *ww) 178 { 179 int err; 180 181 CE_TRACE(ce, "active\n"); 182 183 err = __ring_active(ce->ring, ww); 184 if (err) 185 return err; 186 187 err = intel_timeline_pin(ce->timeline, ww); 188 if (err) 189 goto err_ring; 190 191 if (!ce->state) 192 return 0; 193 194 err = __context_pin_state(ce->state, ww); 195 if (err) 196 goto err_timeline; 197 198 199 return 0; 200 201 err_timeline: 202 intel_timeline_unpin(ce->timeline); 203 err_ring: 204 __ring_retire(ce->ring); 205 return err; 206 } 207 208 static void intel_context_post_unpin(struct intel_context *ce) 209 { 210 if (ce->state) 211 __context_unpin_state(ce->state); 212 213 intel_timeline_unpin(ce->timeline); 214 __ring_retire(ce->ring); 215 } 216 217 int __intel_context_do_pin_ww(struct intel_context *ce, 218 struct i915_gem_ww_ctx *ww) 219 { 220 bool handoff = false; 221 void *vaddr; 222 int err = 0; 223 224 if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { 225 err = intel_context_alloc_state(ce); 226 if (err) 227 return err; 228 } 229 230 /* 231 * We always pin the context/ring/timeline here, to ensure a pin 232 * refcount for __intel_context_active(), which prevent a lock 233 * inversion of ce->pin_mutex vs dma_resv_lock(). 234 */ 235 236 err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww); 237 if (!err) 238 err = i915_gem_object_lock(ce->ring->vma->obj, ww); 239 if (!err && ce->state) 240 err = i915_gem_object_lock(ce->state->obj, ww); 241 if (!err) 242 err = intel_context_pre_pin(ce, ww); 243 if (err) 244 return err; 245 246 err = ce->ops->pre_pin(ce, ww, &vaddr); 247 if (err) 248 goto err_ctx_unpin; 249 250 err = i915_active_acquire(&ce->active); 251 if (err) 252 goto err_post_unpin; 253 254 err = mutex_lock_interruptible(&ce->pin_mutex); 255 if (err) 256 goto err_release; 257 258 intel_engine_pm_might_get(ce->engine); 259 260 if (unlikely(intel_context_is_closed(ce))) { 261 err = -ENOENT; 262 goto err_unlock; 263 } 264 265 if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) { 266 err = intel_context_active_acquire(ce); 267 if (unlikely(err)) 268 goto err_unlock; 269 270 err = ce->ops->pin(ce, vaddr); 271 if (err) { 272 intel_context_active_release(ce); 273 goto err_unlock; 274 } 275 276 CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n", 277 i915_ggtt_offset(ce->ring->vma), 278 ce->ring->head, ce->ring->tail); 279 280 handoff = true; 281 smp_mb__before_atomic(); /* flush pin before it is visible */ 282 atomic_inc(&ce->pin_count); 283 } 284 285 GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ 286 287 trace_intel_context_do_pin(ce); 288 289 err_unlock: 290 mutex_unlock(&ce->pin_mutex); 291 err_release: 292 i915_active_release(&ce->active); 293 err_post_unpin: 294 if (!handoff) 295 ce->ops->post_unpin(ce); 296 err_ctx_unpin: 297 intel_context_post_unpin(ce); 298 299 /* 300 * Unlock the hwsp_ggtt object since it's shared. 301 * In principle we can unlock all the global state locked above 302 * since it's pinned and doesn't need fencing, and will 303 * thus remain resident until it is explicitly unpinned. 304 */ 305 i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj); 306 307 return err; 308 } 309 310 int __intel_context_do_pin(struct intel_context *ce) 311 { 312 struct i915_gem_ww_ctx ww; 313 int err; 314 315 i915_gem_ww_ctx_init(&ww, true); 316 retry: 317 err = __intel_context_do_pin_ww(ce, &ww); 318 if (err == -EDEADLK) { 319 err = i915_gem_ww_ctx_backoff(&ww); 320 if (!err) 321 goto retry; 322 } 323 i915_gem_ww_ctx_fini(&ww); 324 return err; 325 } 326 327 void __intel_context_do_unpin(struct intel_context *ce, int sub) 328 { 329 if (!atomic_sub_and_test(sub, &ce->pin_count)) 330 return; 331 332 CE_TRACE(ce, "unpin\n"); 333 ce->ops->unpin(ce); 334 ce->ops->post_unpin(ce); 335 336 /* 337 * Once released, we may asynchronously drop the active reference. 338 * As that may be the only reference keeping the context alive, 339 * take an extra now so that it is not freed before we finish 340 * dereferencing it. 341 */ 342 intel_context_get(ce); 343 intel_context_active_release(ce); 344 trace_intel_context_do_unpin(ce); 345 intel_context_put(ce); 346 } 347 348 static void __intel_context_retire(struct i915_active *active) 349 { 350 struct intel_context *ce = container_of(active, typeof(*ce), active); 351 352 CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n", 353 intel_context_get_total_runtime_ns(ce), 354 intel_context_get_avg_runtime_ns(ce)); 355 356 set_bit(CONTEXT_VALID_BIT, &ce->flags); 357 intel_context_post_unpin(ce); 358 intel_context_put(ce); 359 } 360 361 static int __intel_context_active(struct i915_active *active) 362 { 363 struct intel_context *ce = container_of(active, typeof(*ce), active); 364 365 intel_context_get(ce); 366 367 /* everything should already be activated by intel_context_pre_pin() */ 368 GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active)); 369 __intel_ring_pin(ce->ring); 370 371 __intel_timeline_pin(ce->timeline); 372 373 if (ce->state) { 374 GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active)); 375 __i915_vma_pin(ce->state); 376 i915_vma_make_unshrinkable(ce->state); 377 } 378 379 return 0; 380 } 381 382 static int 383 sw_fence_dummy_notify(struct i915_sw_fence *sf, 384 enum i915_sw_fence_notify state) 385 { 386 return NOTIFY_DONE; 387 } 388 389 void 390 intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) 391 { 392 GEM_BUG_ON(!engine->cops); 393 GEM_BUG_ON(!engine->gt->vm); 394 395 kref_init(&ce->ref); 396 397 ce->engine = engine; 398 ce->ops = engine->cops; 399 ce->sseu = engine->sseu; 400 ce->ring = NULL; 401 ce->ring_size = SZ_4K; 402 403 ewma_runtime_init(&ce->stats.runtime.avg); 404 405 ce->vm = i915_vm_get(engine->gt->vm); 406 407 /* NB ce->signal_link/lock is used under RCU */ 408 spin_lock_init(&ce->signal_lock); 409 INIT_LIST_HEAD(&ce->signals); 410 411 mutex_init(&ce->pin_mutex); 412 413 spin_lock_init(&ce->guc_state.lock); 414 INIT_LIST_HEAD(&ce->guc_state.fences); 415 INIT_LIST_HEAD(&ce->guc_state.requests); 416 417 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 418 INIT_LIST_HEAD(&ce->guc_id.link); 419 420 INIT_LIST_HEAD(&ce->destroyed_link); 421 422 INIT_LIST_HEAD(&ce->parallel.child_list); 423 424 /* 425 * Initialize fence to be complete as this is expected to be complete 426 * unless there is a pending schedule disable outstanding. 427 */ 428 i915_sw_fence_init(&ce->guc_state.blocked, 429 sw_fence_dummy_notify); 430 i915_sw_fence_commit(&ce->guc_state.blocked); 431 432 i915_active_init(&ce->active, 433 __intel_context_active, __intel_context_retire, 0); 434 } 435 436 void intel_context_fini(struct intel_context *ce) 437 { 438 struct intel_context *child, *next; 439 440 if (ce->timeline) 441 intel_timeline_put(ce->timeline); 442 i915_vm_put(ce->vm); 443 444 /* Need to put the creation ref for the children */ 445 if (intel_context_is_parent(ce)) 446 for_each_child_safe(ce, child, next) 447 intel_context_put(child); 448 449 mutex_destroy(&ce->pin_mutex); 450 i915_active_fini(&ce->active); 451 i915_sw_fence_fini(&ce->guc_state.blocked); 452 } 453 454 void i915_context_module_exit(void) 455 { 456 kmem_cache_destroy(slab_ce); 457 } 458 459 int __init i915_context_module_init(void) 460 { 461 slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); 462 if (!slab_ce) 463 return -ENOMEM; 464 465 return 0; 466 } 467 468 void intel_context_enter_engine(struct intel_context *ce) 469 { 470 intel_engine_pm_get(ce->engine); 471 intel_timeline_enter(ce->timeline); 472 } 473 474 void intel_context_exit_engine(struct intel_context *ce) 475 { 476 intel_timeline_exit(ce->timeline); 477 intel_engine_pm_put(ce->engine); 478 } 479 480 int intel_context_prepare_remote_request(struct intel_context *ce, 481 struct i915_request *rq) 482 { 483 struct intel_timeline *tl = ce->timeline; 484 int err; 485 486 /* Only suitable for use in remotely modifying this context */ 487 GEM_BUG_ON(rq->context == ce); 488 489 if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ 490 /* Queue this switch after current activity by this context. */ 491 err = i915_active_fence_set(&tl->last_request, rq); 492 if (err) 493 return err; 494 } 495 496 /* 497 * Guarantee context image and the timeline remains pinned until the 498 * modifying request is retired by setting the ce activity tracker. 499 * 500 * But we only need to take one pin on the account of it. Or in other 501 * words transfer the pinned ce object to tracked active request. 502 */ 503 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 504 return i915_active_add_request(&ce->active, rq); 505 } 506 507 struct i915_request *intel_context_create_request(struct intel_context *ce) 508 { 509 struct i915_gem_ww_ctx ww; 510 struct i915_request *rq; 511 int err; 512 513 i915_gem_ww_ctx_init(&ww, true); 514 retry: 515 err = intel_context_pin_ww(ce, &ww); 516 if (!err) { 517 rq = i915_request_create(ce); 518 intel_context_unpin(ce); 519 } else if (err == -EDEADLK) { 520 err = i915_gem_ww_ctx_backoff(&ww); 521 if (!err) 522 goto retry; 523 rq = ERR_PTR(err); 524 } else { 525 rq = ERR_PTR(err); 526 } 527 528 i915_gem_ww_ctx_fini(&ww); 529 530 if (IS_ERR(rq)) 531 return rq; 532 533 /* 534 * timeline->mutex should be the inner lock, but is used as outer lock. 535 * Hack around this to shut up lockdep in selftests.. 536 */ 537 lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie); 538 mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_); 539 mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_); 540 rq->cookie = lockdep_pin_lock(&ce->timeline->mutex); 541 542 return rq; 543 } 544 545 struct i915_request *intel_context_get_active_request(struct intel_context *ce) 546 { 547 struct intel_context *parent = intel_context_to_parent(ce); 548 struct i915_request *rq, *active = NULL; 549 unsigned long flags; 550 551 GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); 552 553 /* 554 * We search the parent list to find an active request on the submitted 555 * context. The parent list contains the requests for all the contexts 556 * in the relationship so we have to do a compare of each request's 557 * context. 558 */ 559 spin_lock_irqsave(&parent->guc_state.lock, flags); 560 list_for_each_entry_reverse(rq, &parent->guc_state.requests, 561 sched.link) { 562 if (rq->context != ce) 563 continue; 564 if (i915_request_completed(rq)) 565 break; 566 567 active = rq; 568 } 569 if (active) 570 active = i915_request_get_rcu(active); 571 spin_unlock_irqrestore(&parent->guc_state.lock, flags); 572 573 return active; 574 } 575 576 void intel_context_bind_parent_child(struct intel_context *parent, 577 struct intel_context *child) 578 { 579 /* 580 * Callers responsibility to validate that this function is used 581 * correctly but we use GEM_BUG_ON here ensure that they do. 582 */ 583 GEM_BUG_ON(intel_context_is_pinned(parent)); 584 GEM_BUG_ON(intel_context_is_child(parent)); 585 GEM_BUG_ON(intel_context_is_pinned(child)); 586 GEM_BUG_ON(intel_context_is_child(child)); 587 GEM_BUG_ON(intel_context_is_parent(child)); 588 589 parent->parallel.child_index = parent->parallel.number_children++; 590 list_add_tail(&child->parallel.child_link, 591 &parent->parallel.child_list); 592 child->parallel.parent = parent; 593 } 594 595 u64 intel_context_get_total_runtime_ns(struct intel_context *ce) 596 { 597 u64 total, active; 598 599 if (ce->ops->update_stats) 600 ce->ops->update_stats(ce); 601 602 total = ce->stats.runtime.total; 603 if (ce->ops->flags & COPS_RUNTIME_CYCLES) 604 total *= ce->engine->gt->clock_period_ns; 605 606 active = READ_ONCE(ce->stats.active); 607 if (active) 608 active = intel_context_clock() - active; 609 610 return total + active; 611 } 612 613 u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) 614 { 615 u64 avg = ewma_runtime_read(&ce->stats.runtime.avg); 616 617 if (ce->ops->flags & COPS_RUNTIME_CYCLES) 618 avg *= ce->engine->gt->clock_period_ns; 619 620 return avg; 621 } 622 623 bool intel_context_ban(struct intel_context *ce, struct i915_request *rq) 624 { 625 bool ret = intel_context_set_banned(ce); 626 627 trace_intel_context_ban(ce); 628 629 if (ce->ops->revoke) 630 ce->ops->revoke(ce, rq, 631 INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS); 632 633 return ret; 634 } 635 636 bool intel_context_revoke(struct intel_context *ce) 637 { 638 bool ret = intel_context_set_exiting(ce); 639 640 if (ce->ops->revoke) 641 ce->ops->revoke(ce, NULL, ce->engine->props.preempt_timeout_ms); 642 643 return ret; 644 } 645 646 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 647 #include "selftest_context.c" 648 #endif 649