1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2016-2018 Intel Corporation 4 */ 5 6 #include <drm/drm_cache.h> 7 #include <drm/drm_print.h> 8 9 #include "gem/i915_gem_internal.h" 10 11 #include "i915_active.h" 12 #include "i915_drv.h" 13 #include "i915_syncmap.h" 14 #include "intel_gt.h" 15 #include "intel_ring.h" 16 #include "intel_timeline.h" 17 18 #define TIMELINE_SEQNO_BYTES 8 19 20 static struct i915_vma *hwsp_alloc(struct intel_gt *gt) 21 { 22 struct drm_i915_private *i915 = gt->i915; 23 struct drm_i915_gem_object *obj; 24 struct i915_vma *vma; 25 26 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 27 if (IS_ERR(obj)) 28 return ERR_CAST(obj); 29 30 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 31 32 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 33 if (IS_ERR(vma)) 34 i915_gem_object_put(obj); 35 36 return vma; 37 } 38 39 static void __timeline_retire(struct i915_active *active) 40 { 41 struct intel_timeline *tl = 42 container_of(active, typeof(*tl), active); 43 44 i915_vma_unpin(tl->hwsp_ggtt); 45 intel_timeline_put(tl); 46 } 47 48 static int __timeline_active(struct i915_active *active) 49 { 50 struct intel_timeline *tl = 51 container_of(active, typeof(*tl), active); 52 53 __i915_vma_pin(tl->hwsp_ggtt); 54 intel_timeline_get(tl); 55 return 0; 56 } 57 58 I915_SELFTEST_EXPORT int 59 intel_timeline_pin_map(struct intel_timeline *timeline) 60 { 61 struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj; 62 u32 ofs = offset_in_page(timeline->hwsp_offset); 63 void *vaddr; 64 65 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 66 if (IS_ERR(vaddr)) 67 return PTR_ERR(vaddr); 68 69 timeline->hwsp_map = vaddr; 70 timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); 71 drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES); 72 73 return 0; 74 } 75 76 static int intel_timeline_init(struct intel_timeline *timeline, 77 struct intel_gt *gt, 78 struct i915_vma *hwsp, 79 unsigned int offset) 80 { 81 kref_init(&timeline->kref); 82 atomic_set(&timeline->pin_count, 0); 83 84 timeline->gt = gt; 85 86 if (hwsp) { 87 timeline->hwsp_offset = offset; 88 timeline->hwsp_ggtt = i915_vma_get(hwsp); 89 } else { 90 timeline->has_initial_breadcrumb = true; 91 hwsp = hwsp_alloc(gt); 92 if (IS_ERR(hwsp)) 93 return PTR_ERR(hwsp); 94 timeline->hwsp_ggtt = hwsp; 95 } 96 97 timeline->hwsp_map = NULL; 98 timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset; 99 100 GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); 101 102 timeline->fence_context = dma_fence_context_alloc(1); 103 104 mutex_init(&timeline->mutex); 105 106 INIT_ACTIVE_FENCE(&timeline->last_request); 107 INIT_LIST_HEAD(&timeline->requests); 108 109 i915_syncmap_init(&timeline->sync); 110 i915_active_init(&timeline->active, __timeline_active, 111 __timeline_retire, 0); 112 113 return 0; 114 } 115 116 void intel_gt_init_timelines(struct intel_gt *gt) 117 { 118 struct intel_gt_timelines *timelines = >->timelines; 119 120 spin_lock_init(&timelines->lock); 121 INIT_LIST_HEAD(&timelines->active_list); 122 } 123 124 static void intel_timeline_fini(struct rcu_head *rcu) 125 { 126 struct intel_timeline *timeline = 127 container_of(rcu, struct intel_timeline, rcu); 128 129 if (timeline->hwsp_map) 130 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); 131 132 i915_vma_put(timeline->hwsp_ggtt); 133 i915_active_fini(&timeline->active); 134 135 /* 136 * A small race exists between intel_gt_retire_requests_timeout and 137 * intel_timeline_exit which could result in the syncmap not getting 138 * free'd. Rather than work to hard to seal this race, simply cleanup 139 * the syncmap on fini. 140 */ 141 i915_syncmap_free(&timeline->sync); 142 143 kfree(timeline); 144 } 145 146 struct intel_timeline * 147 __intel_timeline_create(struct intel_gt *gt, 148 struct i915_vma *global_hwsp, 149 unsigned int offset) 150 { 151 struct intel_timeline *timeline; 152 int err; 153 154 timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); 155 if (!timeline) 156 return ERR_PTR(-ENOMEM); 157 158 err = intel_timeline_init(timeline, gt, global_hwsp, offset); 159 if (err) { 160 kfree(timeline); 161 return ERR_PTR(err); 162 } 163 164 return timeline; 165 } 166 167 struct intel_timeline * 168 intel_timeline_create_from_engine(struct intel_engine_cs *engine, 169 unsigned int offset) 170 { 171 struct i915_vma *hwsp = engine->status_page.vma; 172 struct intel_timeline *tl; 173 174 tl = __intel_timeline_create(engine->gt, hwsp, offset); 175 if (IS_ERR(tl)) 176 return tl; 177 178 /* Borrow a nearby lock; we only create these timelines during init */ 179 mutex_lock(&hwsp->vm->mutex); 180 list_add_tail(&tl->engine_link, &engine->status_page.timelines); 181 mutex_unlock(&hwsp->vm->mutex); 182 183 return tl; 184 } 185 186 void __intel_timeline_pin(struct intel_timeline *tl) 187 { 188 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 189 atomic_inc(&tl->pin_count); 190 } 191 192 int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) 193 { 194 int err; 195 196 if (atomic_add_unless(&tl->pin_count, 1, 0)) 197 return 0; 198 199 if (!tl->hwsp_map) { 200 err = intel_timeline_pin_map(tl); 201 if (err) 202 return err; 203 } 204 205 err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH); 206 if (err) 207 return err; 208 209 tl->hwsp_offset = 210 i915_ggtt_offset(tl->hwsp_ggtt) + 211 offset_in_page(tl->hwsp_offset); 212 GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", 213 tl->fence_context, tl->hwsp_offset); 214 215 i915_active_acquire(&tl->active); 216 if (atomic_fetch_inc(&tl->pin_count)) { 217 i915_active_release(&tl->active); 218 __i915_vma_unpin(tl->hwsp_ggtt); 219 } 220 221 return 0; 222 } 223 224 void intel_timeline_reset_seqno(const struct intel_timeline *tl) 225 { 226 u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno; 227 /* Must be pinned to be writable, and no requests in flight. */ 228 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 229 230 memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); 231 WRITE_ONCE(*hwsp_seqno, tl->seqno); 232 drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES); 233 } 234 235 void intel_timeline_enter(struct intel_timeline *tl) 236 { 237 struct intel_gt_timelines *timelines = &tl->gt->timelines; 238 239 /* 240 * Pretend we are serialised by the timeline->mutex. 241 * 242 * While generally true, there are a few exceptions to the rule 243 * for the engine->kernel_context being used to manage power 244 * transitions. As the engine_park may be called from under any 245 * timeline, it uses the power mutex as a global serialisation 246 * lock to prevent any other request entering its timeline. 247 * 248 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. 249 * 250 * However, intel_gt_retire_request() does not know which engine 251 * it is retiring along and so cannot partake in the engine-pm 252 * barrier, and there we use the tl->active_count as a means to 253 * pin the timeline in the active_list while the locks are dropped. 254 * Ergo, as that is outside of the engine-pm barrier, we need to 255 * use atomic to manipulate tl->active_count. 256 */ 257 lockdep_assert_held(&tl->mutex); 258 259 if (atomic_add_unless(&tl->active_count, 1, 0)) 260 return; 261 262 spin_lock(&timelines->lock); 263 if (!atomic_fetch_inc(&tl->active_count)) { 264 /* 265 * The HWSP is volatile, and may have been lost while inactive, 266 * e.g. across suspend/resume. Be paranoid, and ensure that 267 * the HWSP value matches our seqno so we don't proclaim 268 * the next request as already complete. 269 */ 270 intel_timeline_reset_seqno(tl); 271 list_add_tail(&tl->link, &timelines->active_list); 272 } 273 spin_unlock(&timelines->lock); 274 } 275 276 void intel_timeline_exit(struct intel_timeline *tl) 277 { 278 struct intel_gt_timelines *timelines = &tl->gt->timelines; 279 280 /* See intel_timeline_enter() */ 281 lockdep_assert_held(&tl->mutex); 282 283 GEM_BUG_ON(!atomic_read(&tl->active_count)); 284 if (atomic_add_unless(&tl->active_count, -1, 1)) 285 return; 286 287 spin_lock(&timelines->lock); 288 if (atomic_dec_and_test(&tl->active_count)) 289 list_del(&tl->link); 290 spin_unlock(&timelines->lock); 291 292 /* 293 * Since this timeline is idle, all bariers upon which we were waiting 294 * must also be complete and so we can discard the last used barriers 295 * without loss of information. 296 */ 297 i915_syncmap_free(&tl->sync); 298 } 299 300 static u32 timeline_advance(struct intel_timeline *tl) 301 { 302 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 303 GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); 304 305 return tl->seqno += 1 + tl->has_initial_breadcrumb; 306 } 307 308 static noinline int 309 __intel_timeline_get_seqno(struct intel_timeline *tl, 310 u32 *seqno) 311 { 312 u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES); 313 314 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ 315 if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5))) 316 next_ofs = offset_in_page(next_ofs + BIT(5)); 317 318 tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs; 319 tl->hwsp_seqno = tl->hwsp_map + next_ofs; 320 intel_timeline_reset_seqno(tl); 321 322 *seqno = timeline_advance(tl); 323 GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); 324 return 0; 325 } 326 327 int intel_timeline_get_seqno(struct intel_timeline *tl, 328 struct i915_request *rq, 329 u32 *seqno) 330 { 331 *seqno = timeline_advance(tl); 332 333 /* Replace the HWSP on wraparound for HW semaphores */ 334 if (unlikely(!*seqno && tl->has_initial_breadcrumb)) 335 return __intel_timeline_get_seqno(tl, seqno); 336 337 return 0; 338 } 339 340 int intel_timeline_read_hwsp(struct i915_request *from, 341 struct i915_request *to, 342 u32 *hwsp) 343 { 344 struct intel_timeline *tl; 345 int err; 346 347 rcu_read_lock(); 348 tl = rcu_dereference(from->timeline); 349 if (i915_request_signaled(from) || 350 !i915_active_acquire_if_busy(&tl->active)) 351 tl = NULL; 352 353 if (tl) { 354 /* hwsp_offset may wraparound, so use from->hwsp_seqno */ 355 *hwsp = i915_ggtt_offset(tl->hwsp_ggtt) + 356 offset_in_page(from->hwsp_seqno); 357 } 358 359 /* ensure we wait on the right request, if not, we completed */ 360 if (tl && __i915_request_is_complete(from)) { 361 i915_active_release(&tl->active); 362 tl = NULL; 363 } 364 rcu_read_unlock(); 365 366 if (!tl) 367 return 1; 368 369 /* Can't do semaphore waits on kernel context */ 370 if (!tl->has_initial_breadcrumb) { 371 err = -EINVAL; 372 goto out; 373 } 374 375 err = i915_active_add_request(&tl->active, to); 376 377 out: 378 i915_active_release(&tl->active); 379 return err; 380 } 381 382 void intel_timeline_unpin(struct intel_timeline *tl) 383 { 384 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 385 if (!atomic_dec_and_test(&tl->pin_count)) 386 return; 387 388 i915_active_release(&tl->active); 389 __i915_vma_unpin(tl->hwsp_ggtt); 390 } 391 392 void __intel_timeline_free(struct kref *kref) 393 { 394 struct intel_timeline *timeline = 395 container_of(kref, typeof(*timeline), kref); 396 397 GEM_BUG_ON(atomic_read(&timeline->pin_count)); 398 GEM_BUG_ON(!list_empty(&timeline->requests)); 399 GEM_BUG_ON(timeline->retire); 400 401 call_rcu(&timeline->rcu, intel_timeline_fini); 402 } 403 404 void intel_gt_fini_timelines(struct intel_gt *gt) 405 { 406 struct intel_gt_timelines *timelines = >->timelines; 407 408 GEM_BUG_ON(!list_empty(&timelines->active_list)); 409 } 410 411 void intel_gt_show_timelines(struct intel_gt *gt, 412 struct drm_printer *m, 413 void (*show_request)(struct drm_printer *m, 414 const struct i915_request *rq, 415 const char *prefix, 416 int indent)) 417 { 418 struct intel_gt_timelines *timelines = >->timelines; 419 struct intel_timeline *tl, *tn; 420 LIST_HEAD(free); 421 422 spin_lock(&timelines->lock); 423 list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { 424 unsigned long count, ready, inflight; 425 struct i915_request *rq, *rn; 426 struct dma_fence *fence; 427 428 if (!mutex_trylock(&tl->mutex)) { 429 drm_printf(m, "Timeline %llx: busy; skipping\n", 430 tl->fence_context); 431 continue; 432 } 433 434 intel_timeline_get(tl); 435 GEM_BUG_ON(!atomic_read(&tl->active_count)); 436 atomic_inc(&tl->active_count); /* pin the list element */ 437 spin_unlock(&timelines->lock); 438 439 count = 0; 440 ready = 0; 441 inflight = 0; 442 list_for_each_entry_safe(rq, rn, &tl->requests, link) { 443 if (i915_request_completed(rq)) 444 continue; 445 446 count++; 447 if (i915_request_is_ready(rq)) 448 ready++; 449 if (i915_request_is_active(rq)) 450 inflight++; 451 } 452 453 drm_printf(m, "Timeline %llx: { ", tl->fence_context); 454 drm_printf(m, "count: %lu, ready: %lu, inflight: %lu", 455 count, ready, inflight); 456 drm_printf(m, ", seqno: { current: %d, last: %d }", 457 *tl->hwsp_seqno, tl->seqno); 458 fence = i915_active_fence_get(&tl->last_request); 459 if (fence) { 460 drm_printf(m, ", engine: %s", 461 to_request(fence)->engine->name); 462 dma_fence_put(fence); 463 } 464 drm_printf(m, " }\n"); 465 466 if (show_request) { 467 list_for_each_entry_safe(rq, rn, &tl->requests, link) 468 show_request(m, rq, "", 2); 469 } 470 471 mutex_unlock(&tl->mutex); 472 spin_lock(&timelines->lock); 473 474 /* Resume list iteration after reacquiring spinlock */ 475 list_safe_reset_next(tl, tn, link); 476 if (atomic_dec_and_test(&tl->active_count)) 477 list_del(&tl->link); 478 479 /* Defer the final release to after the spinlock */ 480 if (refcount_dec_and_test(&tl->kref.refcount)) { 481 GEM_BUG_ON(atomic_read(&tl->active_count)); 482 list_add(&tl->link, &free); 483 } 484 } 485 spin_unlock(&timelines->lock); 486 487 list_for_each_entry_safe(tl, tn, &free, link) 488 __intel_timeline_free(&tl->kref); 489 } 490 491 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 492 #include "gt/selftests/mock_timeline.c" 493 #include "gt/selftest_timeline.c" 494 #endif 495