1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2016-2018 Intel Corporation 4 */ 5 6 #include "i915_drv.h" 7 8 #include "i915_active.h" 9 #include "i915_syncmap.h" 10 #include "intel_gt.h" 11 #include "intel_ring.h" 12 #include "intel_timeline.h" 13 14 #define TIMELINE_SEQNO_BYTES 8 15 16 static struct i915_vma *hwsp_alloc(struct intel_gt *gt) 17 { 18 struct drm_i915_private *i915 = gt->i915; 19 struct drm_i915_gem_object *obj; 20 struct i915_vma *vma; 21 22 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 23 if (IS_ERR(obj)) 24 return ERR_CAST(obj); 25 26 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 27 28 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 29 if (IS_ERR(vma)) 30 i915_gem_object_put(obj); 31 32 return vma; 33 } 34 35 __i915_active_call 36 static void __timeline_retire(struct i915_active *active) 37 { 38 struct intel_timeline *tl = 39 container_of(active, typeof(*tl), active); 40 41 i915_vma_unpin(tl->hwsp_ggtt); 42 intel_timeline_put(tl); 43 } 44 45 static int __timeline_active(struct i915_active *active) 46 { 47 struct intel_timeline *tl = 48 container_of(active, typeof(*tl), active); 49 50 __i915_vma_pin(tl->hwsp_ggtt); 51 intel_timeline_get(tl); 52 return 0; 53 } 54 55 I915_SELFTEST_EXPORT int 56 intel_timeline_pin_map(struct intel_timeline *timeline) 57 { 58 struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj; 59 u32 ofs = offset_in_page(timeline->hwsp_offset); 60 void *vaddr; 61 62 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 63 if (IS_ERR(vaddr)) 64 return PTR_ERR(vaddr); 65 66 timeline->hwsp_map = vaddr; 67 timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); 68 clflush(vaddr + ofs); 69 70 return 0; 71 } 72 73 static int intel_timeline_init(struct intel_timeline *timeline, 74 struct intel_gt *gt, 75 struct i915_vma *hwsp, 76 unsigned int offset) 77 { 78 kref_init(&timeline->kref); 79 atomic_set(&timeline->pin_count, 0); 80 81 timeline->gt = gt; 82 83 if (hwsp) { 84 timeline->hwsp_offset = offset; 85 timeline->hwsp_ggtt = i915_vma_get(hwsp); 86 } else { 87 timeline->has_initial_breadcrumb = true; 88 hwsp = hwsp_alloc(gt); 89 if (IS_ERR(hwsp)) 90 return PTR_ERR(hwsp); 91 timeline->hwsp_ggtt = hwsp; 92 } 93 94 timeline->hwsp_map = NULL; 95 timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset; 96 97 GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); 98 99 timeline->fence_context = dma_fence_context_alloc(1); 100 101 mutex_init(&timeline->mutex); 102 103 INIT_ACTIVE_FENCE(&timeline->last_request); 104 INIT_LIST_HEAD(&timeline->requests); 105 106 i915_syncmap_init(&timeline->sync); 107 i915_active_init(&timeline->active, __timeline_active, __timeline_retire); 108 109 return 0; 110 } 111 112 void intel_gt_init_timelines(struct intel_gt *gt) 113 { 114 struct intel_gt_timelines *timelines = >->timelines; 115 116 spin_lock_init(&timelines->lock); 117 INIT_LIST_HEAD(&timelines->active_list); 118 } 119 120 static void intel_timeline_fini(struct rcu_head *rcu) 121 { 122 struct intel_timeline *timeline = 123 container_of(rcu, struct intel_timeline, rcu); 124 125 if (timeline->hwsp_map) 126 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); 127 128 i915_vma_put(timeline->hwsp_ggtt); 129 i915_active_fini(&timeline->active); 130 kfree(timeline); 131 } 132 133 struct intel_timeline * 134 __intel_timeline_create(struct intel_gt *gt, 135 struct i915_vma *global_hwsp, 136 unsigned int offset) 137 { 138 struct intel_timeline *timeline; 139 int err; 140 141 timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); 142 if (!timeline) 143 return ERR_PTR(-ENOMEM); 144 145 err = intel_timeline_init(timeline, gt, global_hwsp, offset); 146 if (err) { 147 kfree(timeline); 148 return ERR_PTR(err); 149 } 150 151 return timeline; 152 } 153 154 struct intel_timeline * 155 intel_timeline_create_from_engine(struct intel_engine_cs *engine, 156 unsigned int offset) 157 { 158 struct i915_vma *hwsp = engine->status_page.vma; 159 struct intel_timeline *tl; 160 161 tl = __intel_timeline_create(engine->gt, hwsp, offset); 162 if (IS_ERR(tl)) 163 return tl; 164 165 /* Borrow a nearby lock; we only create these timelines during init */ 166 mutex_lock(&hwsp->vm->mutex); 167 list_add_tail(&tl->engine_link, &engine->status_page.timelines); 168 mutex_unlock(&hwsp->vm->mutex); 169 170 return tl; 171 } 172 173 void __intel_timeline_pin(struct intel_timeline *tl) 174 { 175 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 176 atomic_inc(&tl->pin_count); 177 } 178 179 int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) 180 { 181 int err; 182 183 if (atomic_add_unless(&tl->pin_count, 1, 0)) 184 return 0; 185 186 if (!tl->hwsp_map) { 187 err = intel_timeline_pin_map(tl); 188 if (err) 189 return err; 190 } 191 192 err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH); 193 if (err) 194 return err; 195 196 tl->hwsp_offset = 197 i915_ggtt_offset(tl->hwsp_ggtt) + 198 offset_in_page(tl->hwsp_offset); 199 GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", 200 tl->fence_context, tl->hwsp_offset); 201 202 i915_active_acquire(&tl->active); 203 if (atomic_fetch_inc(&tl->pin_count)) { 204 i915_active_release(&tl->active); 205 __i915_vma_unpin(tl->hwsp_ggtt); 206 } 207 208 return 0; 209 } 210 211 void intel_timeline_reset_seqno(const struct intel_timeline *tl) 212 { 213 u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno; 214 /* Must be pinned to be writable, and no requests in flight. */ 215 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 216 217 memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); 218 WRITE_ONCE(*hwsp_seqno, tl->seqno); 219 clflush(hwsp_seqno); 220 } 221 222 void intel_timeline_enter(struct intel_timeline *tl) 223 { 224 struct intel_gt_timelines *timelines = &tl->gt->timelines; 225 226 /* 227 * Pretend we are serialised by the timeline->mutex. 228 * 229 * While generally true, there are a few exceptions to the rule 230 * for the engine->kernel_context being used to manage power 231 * transitions. As the engine_park may be called from under any 232 * timeline, it uses the power mutex as a global serialisation 233 * lock to prevent any other request entering its timeline. 234 * 235 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. 236 * 237 * However, intel_gt_retire_request() does not know which engine 238 * it is retiring along and so cannot partake in the engine-pm 239 * barrier, and there we use the tl->active_count as a means to 240 * pin the timeline in the active_list while the locks are dropped. 241 * Ergo, as that is outside of the engine-pm barrier, we need to 242 * use atomic to manipulate tl->active_count. 243 */ 244 lockdep_assert_held(&tl->mutex); 245 246 if (atomic_add_unless(&tl->active_count, 1, 0)) 247 return; 248 249 spin_lock(&timelines->lock); 250 if (!atomic_fetch_inc(&tl->active_count)) { 251 /* 252 * The HWSP is volatile, and may have been lost while inactive, 253 * e.g. across suspend/resume. Be paranoid, and ensure that 254 * the HWSP value matches our seqno so we don't proclaim 255 * the next request as already complete. 256 */ 257 intel_timeline_reset_seqno(tl); 258 list_add_tail(&tl->link, &timelines->active_list); 259 } 260 spin_unlock(&timelines->lock); 261 } 262 263 void intel_timeline_exit(struct intel_timeline *tl) 264 { 265 struct intel_gt_timelines *timelines = &tl->gt->timelines; 266 267 /* See intel_timeline_enter() */ 268 lockdep_assert_held(&tl->mutex); 269 270 GEM_BUG_ON(!atomic_read(&tl->active_count)); 271 if (atomic_add_unless(&tl->active_count, -1, 1)) 272 return; 273 274 spin_lock(&timelines->lock); 275 if (atomic_dec_and_test(&tl->active_count)) 276 list_del(&tl->link); 277 spin_unlock(&timelines->lock); 278 279 /* 280 * Since this timeline is idle, all bariers upon which we were waiting 281 * must also be complete and so we can discard the last used barriers 282 * without loss of information. 283 */ 284 i915_syncmap_free(&tl->sync); 285 } 286 287 static u32 timeline_advance(struct intel_timeline *tl) 288 { 289 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 290 GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); 291 292 return tl->seqno += 1 + tl->has_initial_breadcrumb; 293 } 294 295 static noinline int 296 __intel_timeline_get_seqno(struct intel_timeline *tl, 297 u32 *seqno) 298 { 299 u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES); 300 301 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ 302 if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5))) 303 next_ofs = offset_in_page(next_ofs + BIT(5)); 304 305 tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs; 306 tl->hwsp_seqno = tl->hwsp_map + next_ofs; 307 intel_timeline_reset_seqno(tl); 308 309 *seqno = timeline_advance(tl); 310 GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); 311 return 0; 312 } 313 314 int intel_timeline_get_seqno(struct intel_timeline *tl, 315 struct i915_request *rq, 316 u32 *seqno) 317 { 318 *seqno = timeline_advance(tl); 319 320 /* Replace the HWSP on wraparound for HW semaphores */ 321 if (unlikely(!*seqno && tl->has_initial_breadcrumb)) 322 return __intel_timeline_get_seqno(tl, seqno); 323 324 return 0; 325 } 326 327 int intel_timeline_read_hwsp(struct i915_request *from, 328 struct i915_request *to, 329 u32 *hwsp) 330 { 331 struct intel_timeline *tl; 332 int err; 333 334 rcu_read_lock(); 335 tl = rcu_dereference(from->timeline); 336 if (i915_request_signaled(from) || 337 !i915_active_acquire_if_busy(&tl->active)) 338 tl = NULL; 339 340 if (tl) { 341 /* hwsp_offset may wraparound, so use from->hwsp_seqno */ 342 *hwsp = i915_ggtt_offset(tl->hwsp_ggtt) + 343 offset_in_page(from->hwsp_seqno); 344 } 345 346 /* ensure we wait on the right request, if not, we completed */ 347 if (tl && __i915_request_is_complete(from)) { 348 i915_active_release(&tl->active); 349 tl = NULL; 350 } 351 rcu_read_unlock(); 352 353 if (!tl) 354 return 1; 355 356 /* Can't do semaphore waits on kernel context */ 357 if (!tl->has_initial_breadcrumb) { 358 err = -EINVAL; 359 goto out; 360 } 361 362 err = i915_active_add_request(&tl->active, to); 363 364 out: 365 i915_active_release(&tl->active); 366 return err; 367 } 368 369 void intel_timeline_unpin(struct intel_timeline *tl) 370 { 371 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 372 if (!atomic_dec_and_test(&tl->pin_count)) 373 return; 374 375 i915_active_release(&tl->active); 376 __i915_vma_unpin(tl->hwsp_ggtt); 377 } 378 379 void __intel_timeline_free(struct kref *kref) 380 { 381 struct intel_timeline *timeline = 382 container_of(kref, typeof(*timeline), kref); 383 384 GEM_BUG_ON(atomic_read(&timeline->pin_count)); 385 GEM_BUG_ON(!list_empty(&timeline->requests)); 386 GEM_BUG_ON(timeline->retire); 387 388 call_rcu(&timeline->rcu, intel_timeline_fini); 389 } 390 391 void intel_gt_fini_timelines(struct intel_gt *gt) 392 { 393 struct intel_gt_timelines *timelines = >->timelines; 394 395 GEM_BUG_ON(!list_empty(&timelines->active_list)); 396 } 397 398 void intel_gt_show_timelines(struct intel_gt *gt, 399 struct drm_printer *m, 400 void (*show_request)(struct drm_printer *m, 401 const struct i915_request *rq, 402 const char *prefix, 403 int indent)) 404 { 405 struct intel_gt_timelines *timelines = >->timelines; 406 struct intel_timeline *tl, *tn; 407 LIST_HEAD(free); 408 409 spin_lock(&timelines->lock); 410 list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { 411 unsigned long count, ready, inflight; 412 struct i915_request *rq, *rn; 413 struct dma_fence *fence; 414 415 if (!mutex_trylock(&tl->mutex)) { 416 drm_printf(m, "Timeline %llx: busy; skipping\n", 417 tl->fence_context); 418 continue; 419 } 420 421 intel_timeline_get(tl); 422 GEM_BUG_ON(!atomic_read(&tl->active_count)); 423 atomic_inc(&tl->active_count); /* pin the list element */ 424 spin_unlock(&timelines->lock); 425 426 count = 0; 427 ready = 0; 428 inflight = 0; 429 list_for_each_entry_safe(rq, rn, &tl->requests, link) { 430 if (i915_request_completed(rq)) 431 continue; 432 433 count++; 434 if (i915_request_is_ready(rq)) 435 ready++; 436 if (i915_request_is_active(rq)) 437 inflight++; 438 } 439 440 drm_printf(m, "Timeline %llx: { ", tl->fence_context); 441 drm_printf(m, "count: %lu, ready: %lu, inflight: %lu", 442 count, ready, inflight); 443 drm_printf(m, ", seqno: { current: %d, last: %d }", 444 *tl->hwsp_seqno, tl->seqno); 445 fence = i915_active_fence_get(&tl->last_request); 446 if (fence) { 447 drm_printf(m, ", engine: %s", 448 to_request(fence)->engine->name); 449 dma_fence_put(fence); 450 } 451 drm_printf(m, " }\n"); 452 453 if (show_request) { 454 list_for_each_entry_safe(rq, rn, &tl->requests, link) 455 show_request(m, rq, "", 2); 456 } 457 458 mutex_unlock(&tl->mutex); 459 spin_lock(&timelines->lock); 460 461 /* Resume list iteration after reacquiring spinlock */ 462 list_safe_reset_next(tl, tn, link); 463 if (atomic_dec_and_test(&tl->active_count)) 464 list_del(&tl->link); 465 466 /* Defer the final release to after the spinlock */ 467 if (refcount_dec_and_test(&tl->kref.refcount)) { 468 GEM_BUG_ON(atomic_read(&tl->active_count)); 469 list_add(&tl->link, &free); 470 } 471 } 472 spin_unlock(&timelines->lock); 473 474 list_for_each_entry_safe(tl, tn, &free, link) 475 __intel_timeline_free(&tl->kref); 476 } 477 478 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 479 #include "gt/selftests/mock_timeline.c" 480 #include "gt/selftest_timeline.c" 481 #endif 482