1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2016-2018 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 9 #include "i915_active.h" 10 #include "i915_syncmap.h" 11 #include "intel_gt.h" 12 #include "intel_ring.h" 13 #include "intel_timeline.h" 14 15 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) 16 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) 17 18 #define CACHELINE_BITS 6 19 #define CACHELINE_FREE CACHELINE_BITS 20 21 struct intel_timeline_hwsp { 22 struct intel_gt *gt; 23 struct intel_gt_timelines *gt_timelines; 24 struct list_head free_link; 25 struct i915_vma *vma; 26 u64 free_bitmap; 27 }; 28 29 static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) 30 { 31 struct drm_i915_private *i915 = gt->i915; 32 struct drm_i915_gem_object *obj; 33 struct i915_vma *vma; 34 35 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 36 if (IS_ERR(obj)) 37 return ERR_CAST(obj); 38 39 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 40 41 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 42 if (IS_ERR(vma)) 43 i915_gem_object_put(obj); 44 45 return vma; 46 } 47 48 static struct i915_vma * 49 hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) 50 { 51 struct intel_gt_timelines *gt = &timeline->gt->timelines; 52 struct intel_timeline_hwsp *hwsp; 53 54 BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); 55 56 spin_lock_irq(>->hwsp_lock); 57 58 /* hwsp_free_list only contains HWSP that have available cachelines */ 59 hwsp = list_first_entry_or_null(>->hwsp_free_list, 60 typeof(*hwsp), free_link); 61 if (!hwsp) { 62 struct i915_vma *vma; 63 64 spin_unlock_irq(>->hwsp_lock); 65 66 hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); 67 if (!hwsp) 68 return ERR_PTR(-ENOMEM); 69 70 vma = __hwsp_alloc(timeline->gt); 71 if (IS_ERR(vma)) { 72 kfree(hwsp); 73 return vma; 74 } 75 76 vma->private = hwsp; 77 hwsp->gt = timeline->gt; 78 hwsp->vma = vma; 79 hwsp->free_bitmap = ~0ull; 80 hwsp->gt_timelines = gt; 81 82 spin_lock_irq(>->hwsp_lock); 83 list_add(&hwsp->free_link, >->hwsp_free_list); 84 } 85 86 GEM_BUG_ON(!hwsp->free_bitmap); 87 *cacheline = __ffs64(hwsp->free_bitmap); 88 hwsp->free_bitmap &= ~BIT_ULL(*cacheline); 89 if (!hwsp->free_bitmap) 90 list_del(&hwsp->free_link); 91 92 spin_unlock_irq(>->hwsp_lock); 93 94 GEM_BUG_ON(hwsp->vma->private != hwsp); 95 return hwsp->vma; 96 } 97 98 static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) 99 { 100 struct intel_gt_timelines *gt = hwsp->gt_timelines; 101 unsigned long flags; 102 103 spin_lock_irqsave(>->hwsp_lock, flags); 104 105 /* As a cacheline becomes available, publish the HWSP on the freelist */ 106 if (!hwsp->free_bitmap) 107 list_add_tail(&hwsp->free_link, >->hwsp_free_list); 108 109 GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); 110 hwsp->free_bitmap |= BIT_ULL(cacheline); 111 112 /* And if no one is left using it, give the page back to the system */ 113 if (hwsp->free_bitmap == ~0ull) { 114 i915_vma_put(hwsp->vma); 115 list_del(&hwsp->free_link); 116 kfree(hwsp); 117 } 118 119 spin_unlock_irqrestore(>->hwsp_lock, flags); 120 } 121 122 static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) 123 { 124 GEM_BUG_ON(!i915_active_is_idle(&cl->active)); 125 126 i915_gem_object_unpin_map(cl->hwsp->vma->obj); 127 i915_vma_put(cl->hwsp->vma); 128 __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); 129 130 i915_active_fini(&cl->active); 131 kfree_rcu(cl, rcu); 132 } 133 134 __i915_active_call 135 static void __cacheline_retire(struct i915_active *active) 136 { 137 struct intel_timeline_cacheline *cl = 138 container_of(active, typeof(*cl), active); 139 140 i915_vma_unpin(cl->hwsp->vma); 141 if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) 142 __idle_cacheline_free(cl); 143 } 144 145 static int __cacheline_active(struct i915_active *active) 146 { 147 struct intel_timeline_cacheline *cl = 148 container_of(active, typeof(*cl), active); 149 150 __i915_vma_pin(cl->hwsp->vma); 151 return 0; 152 } 153 154 static struct intel_timeline_cacheline * 155 cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) 156 { 157 struct intel_timeline_cacheline *cl; 158 void *vaddr; 159 160 GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); 161 162 cl = kmalloc(sizeof(*cl), GFP_KERNEL); 163 if (!cl) 164 return ERR_PTR(-ENOMEM); 165 166 vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); 167 if (IS_ERR(vaddr)) { 168 kfree(cl); 169 return ERR_CAST(vaddr); 170 } 171 172 i915_vma_get(hwsp->vma); 173 cl->hwsp = hwsp; 174 cl->vaddr = page_pack_bits(vaddr, cacheline); 175 176 i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); 177 178 return cl; 179 } 180 181 static void cacheline_acquire(struct intel_timeline_cacheline *cl) 182 { 183 if (cl) 184 i915_active_acquire(&cl->active); 185 } 186 187 static void cacheline_release(struct intel_timeline_cacheline *cl) 188 { 189 if (cl) 190 i915_active_release(&cl->active); 191 } 192 193 static void cacheline_free(struct intel_timeline_cacheline *cl) 194 { 195 GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); 196 cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); 197 198 if (i915_active_is_idle(&cl->active)) 199 __idle_cacheline_free(cl); 200 } 201 202 int intel_timeline_init(struct intel_timeline *timeline, 203 struct intel_gt *gt, 204 struct i915_vma *hwsp) 205 { 206 void *vaddr; 207 208 kref_init(&timeline->kref); 209 atomic_set(&timeline->pin_count, 0); 210 211 timeline->gt = gt; 212 213 timeline->has_initial_breadcrumb = !hwsp; 214 timeline->hwsp_cacheline = NULL; 215 216 if (!hwsp) { 217 struct intel_timeline_cacheline *cl; 218 unsigned int cacheline; 219 220 hwsp = hwsp_alloc(timeline, &cacheline); 221 if (IS_ERR(hwsp)) 222 return PTR_ERR(hwsp); 223 224 cl = cacheline_alloc(hwsp->private, cacheline); 225 if (IS_ERR(cl)) { 226 __idle_hwsp_free(hwsp->private, cacheline); 227 return PTR_ERR(cl); 228 } 229 230 timeline->hwsp_cacheline = cl; 231 timeline->hwsp_offset = cacheline * CACHELINE_BYTES; 232 233 vaddr = page_mask_bits(cl->vaddr); 234 } else { 235 timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; 236 237 vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); 238 if (IS_ERR(vaddr)) 239 return PTR_ERR(vaddr); 240 } 241 242 timeline->hwsp_seqno = 243 memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); 244 245 timeline->hwsp_ggtt = i915_vma_get(hwsp); 246 GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); 247 248 timeline->fence_context = dma_fence_context_alloc(1); 249 250 mutex_init(&timeline->mutex); 251 252 INIT_ACTIVE_FENCE(&timeline->last_request); 253 INIT_LIST_HEAD(&timeline->requests); 254 255 i915_syncmap_init(&timeline->sync); 256 257 return 0; 258 } 259 260 void intel_gt_init_timelines(struct intel_gt *gt) 261 { 262 struct intel_gt_timelines *timelines = >->timelines; 263 264 spin_lock_init(&timelines->lock); 265 INIT_LIST_HEAD(&timelines->active_list); 266 267 spin_lock_init(&timelines->hwsp_lock); 268 INIT_LIST_HEAD(&timelines->hwsp_free_list); 269 } 270 271 void intel_timeline_fini(struct intel_timeline *timeline) 272 { 273 GEM_BUG_ON(atomic_read(&timeline->pin_count)); 274 GEM_BUG_ON(!list_empty(&timeline->requests)); 275 GEM_BUG_ON(timeline->retire); 276 277 if (timeline->hwsp_cacheline) 278 cacheline_free(timeline->hwsp_cacheline); 279 else 280 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); 281 282 i915_vma_put(timeline->hwsp_ggtt); 283 } 284 285 struct intel_timeline * 286 intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) 287 { 288 struct intel_timeline *timeline; 289 int err; 290 291 timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); 292 if (!timeline) 293 return ERR_PTR(-ENOMEM); 294 295 err = intel_timeline_init(timeline, gt, global_hwsp); 296 if (err) { 297 kfree(timeline); 298 return ERR_PTR(err); 299 } 300 301 return timeline; 302 } 303 304 int intel_timeline_pin(struct intel_timeline *tl) 305 { 306 int err; 307 308 if (atomic_add_unless(&tl->pin_count, 1, 0)) 309 return 0; 310 311 err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH); 312 if (err) 313 return err; 314 315 tl->hwsp_offset = 316 i915_ggtt_offset(tl->hwsp_ggtt) + 317 offset_in_page(tl->hwsp_offset); 318 319 cacheline_acquire(tl->hwsp_cacheline); 320 if (atomic_fetch_inc(&tl->pin_count)) { 321 cacheline_release(tl->hwsp_cacheline); 322 __i915_vma_unpin(tl->hwsp_ggtt); 323 } 324 325 return 0; 326 } 327 328 void intel_timeline_enter(struct intel_timeline *tl) 329 { 330 struct intel_gt_timelines *timelines = &tl->gt->timelines; 331 332 /* 333 * Pretend we are serialised by the timeline->mutex. 334 * 335 * While generally true, there are a few exceptions to the rule 336 * for the engine->kernel_context being used to manage power 337 * transitions. As the engine_park may be called from under any 338 * timeline, it uses the power mutex as a global serialisation 339 * lock to prevent any other request entering its timeline. 340 * 341 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. 342 * 343 * However, intel_gt_retire_request() does not know which engine 344 * it is retiring along and so cannot partake in the engine-pm 345 * barrier, and there we use the tl->active_count as a means to 346 * pin the timeline in the active_list while the locks are dropped. 347 * Ergo, as that is outside of the engine-pm barrier, we need to 348 * use atomic to manipulate tl->active_count. 349 */ 350 lockdep_assert_held(&tl->mutex); 351 352 if (atomic_add_unless(&tl->active_count, 1, 0)) 353 return; 354 355 spin_lock(&timelines->lock); 356 if (!atomic_fetch_inc(&tl->active_count)) 357 list_add_tail(&tl->link, &timelines->active_list); 358 spin_unlock(&timelines->lock); 359 } 360 361 void intel_timeline_exit(struct intel_timeline *tl) 362 { 363 struct intel_gt_timelines *timelines = &tl->gt->timelines; 364 365 /* See intel_timeline_enter() */ 366 lockdep_assert_held(&tl->mutex); 367 368 GEM_BUG_ON(!atomic_read(&tl->active_count)); 369 if (atomic_add_unless(&tl->active_count, -1, 1)) 370 return; 371 372 spin_lock(&timelines->lock); 373 if (atomic_dec_and_test(&tl->active_count)) 374 list_del(&tl->link); 375 spin_unlock(&timelines->lock); 376 377 /* 378 * Since this timeline is idle, all bariers upon which we were waiting 379 * must also be complete and so we can discard the last used barriers 380 * without loss of information. 381 */ 382 i915_syncmap_free(&tl->sync); 383 } 384 385 static u32 timeline_advance(struct intel_timeline *tl) 386 { 387 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 388 GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); 389 390 return tl->seqno += 1 + tl->has_initial_breadcrumb; 391 } 392 393 static void timeline_rollback(struct intel_timeline *tl) 394 { 395 tl->seqno -= 1 + tl->has_initial_breadcrumb; 396 } 397 398 static noinline int 399 __intel_timeline_get_seqno(struct intel_timeline *tl, 400 struct i915_request *rq, 401 u32 *seqno) 402 { 403 struct intel_timeline_cacheline *cl; 404 unsigned int cacheline; 405 struct i915_vma *vma; 406 void *vaddr; 407 int err; 408 409 might_lock(&tl->gt->ggtt->vm.mutex); 410 411 /* 412 * If there is an outstanding GPU reference to this cacheline, 413 * such as it being sampled by a HW semaphore on another timeline, 414 * we cannot wraparound our seqno value (the HW semaphore does 415 * a strict greater-than-or-equals compare, not i915_seqno_passed). 416 * So if the cacheline is still busy, we must detach ourselves 417 * from it and leave it inflight alongside its users. 418 * 419 * However, if nobody is watching and we can guarantee that nobody 420 * will, we could simply reuse the same cacheline. 421 * 422 * if (i915_active_request_is_signaled(&tl->last_request) && 423 * i915_active_is_signaled(&tl->hwsp_cacheline->active)) 424 * return 0; 425 * 426 * That seems unlikely for a busy timeline that needed to wrap in 427 * the first place, so just replace the cacheline. 428 */ 429 430 vma = hwsp_alloc(tl, &cacheline); 431 if (IS_ERR(vma)) { 432 err = PTR_ERR(vma); 433 goto err_rollback; 434 } 435 436 err = i915_ggtt_pin(vma, 0, PIN_HIGH); 437 if (err) { 438 __idle_hwsp_free(vma->private, cacheline); 439 goto err_rollback; 440 } 441 442 cl = cacheline_alloc(vma->private, cacheline); 443 if (IS_ERR(cl)) { 444 err = PTR_ERR(cl); 445 __idle_hwsp_free(vma->private, cacheline); 446 goto err_unpin; 447 } 448 GEM_BUG_ON(cl->hwsp->vma != vma); 449 450 /* 451 * Attach the old cacheline to the current request, so that we only 452 * free it after the current request is retired, which ensures that 453 * all writes into the cacheline from previous requests are complete. 454 */ 455 err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); 456 if (err) 457 goto err_cacheline; 458 459 cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ 460 cacheline_free(tl->hwsp_cacheline); 461 462 i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ 463 i915_vma_put(tl->hwsp_ggtt); 464 465 tl->hwsp_ggtt = i915_vma_get(vma); 466 467 vaddr = page_mask_bits(cl->vaddr); 468 tl->hwsp_offset = cacheline * CACHELINE_BYTES; 469 tl->hwsp_seqno = 470 memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); 471 472 tl->hwsp_offset += i915_ggtt_offset(vma); 473 474 cacheline_acquire(cl); 475 tl->hwsp_cacheline = cl; 476 477 *seqno = timeline_advance(tl); 478 GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); 479 return 0; 480 481 err_cacheline: 482 cacheline_free(cl); 483 err_unpin: 484 i915_vma_unpin(vma); 485 err_rollback: 486 timeline_rollback(tl); 487 return err; 488 } 489 490 int intel_timeline_get_seqno(struct intel_timeline *tl, 491 struct i915_request *rq, 492 u32 *seqno) 493 { 494 *seqno = timeline_advance(tl); 495 496 /* Replace the HWSP on wraparound for HW semaphores */ 497 if (unlikely(!*seqno && tl->hwsp_cacheline)) 498 return __intel_timeline_get_seqno(tl, rq, seqno); 499 500 return 0; 501 } 502 503 static int cacheline_ref(struct intel_timeline_cacheline *cl, 504 struct i915_request *rq) 505 { 506 return i915_active_add_request(&cl->active, rq); 507 } 508 509 int intel_timeline_read_hwsp(struct i915_request *from, 510 struct i915_request *to, 511 u32 *hwsp) 512 { 513 struct intel_timeline_cacheline *cl; 514 int err; 515 516 GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline)); 517 518 rcu_read_lock(); 519 cl = rcu_dereference(from->hwsp_cacheline); 520 if (unlikely(!i915_active_acquire_if_busy(&cl->active))) 521 goto unlock; /* seqno wrapped and completed! */ 522 if (unlikely(i915_request_completed(from))) 523 goto release; 524 rcu_read_unlock(); 525 526 err = cacheline_ref(cl, to); 527 if (err) 528 goto out; 529 530 *hwsp = i915_ggtt_offset(cl->hwsp->vma) + 531 ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; 532 533 out: 534 i915_active_release(&cl->active); 535 return err; 536 537 release: 538 i915_active_release(&cl->active); 539 unlock: 540 rcu_read_unlock(); 541 return 1; 542 } 543 544 void intel_timeline_unpin(struct intel_timeline *tl) 545 { 546 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 547 if (!atomic_dec_and_test(&tl->pin_count)) 548 return; 549 550 cacheline_release(tl->hwsp_cacheline); 551 552 __i915_vma_unpin(tl->hwsp_ggtt); 553 } 554 555 void __intel_timeline_free(struct kref *kref) 556 { 557 struct intel_timeline *timeline = 558 container_of(kref, typeof(*timeline), kref); 559 560 intel_timeline_fini(timeline); 561 kfree_rcu(timeline, rcu); 562 } 563 564 void intel_gt_fini_timelines(struct intel_gt *gt) 565 { 566 struct intel_gt_timelines *timelines = >->timelines; 567 568 GEM_BUG_ON(!list_empty(&timelines->active_list)); 569 GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); 570 } 571 572 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 573 #include "gt/selftests/mock_timeline.c" 574 #include "gt/selftest_timeline.c" 575 #endif 576