1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2015-2021 Intel Corporation 4 */ 5 6 #include <linux/kthread.h> 7 #include <linux/string_helpers.h> 8 #include <trace/events/dma_fence.h> 9 #include <uapi/linux/sched/types.h> 10 11 #include "i915_drv.h" 12 #include "i915_trace.h" 13 #include "intel_breadcrumbs.h" 14 #include "intel_context.h" 15 #include "intel_engine_pm.h" 16 #include "intel_gt_pm.h" 17 #include "intel_gt_requests.h" 18 19 static bool irq_enable(struct intel_breadcrumbs *b) 20 { 21 return intel_engine_irq_enable(b->irq_engine); 22 } 23 24 static void irq_disable(struct intel_breadcrumbs *b) 25 { 26 intel_engine_irq_disable(b->irq_engine); 27 } 28 29 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 30 { 31 intel_wakeref_t wakeref; 32 33 /* 34 * Since we are waiting on a request, the GPU should be busy 35 * and should have its own rpm reference. 36 */ 37 wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt); 38 if (GEM_WARN_ON(!wakeref)) 39 return; 40 41 /* 42 * The breadcrumb irq will be disarmed on the interrupt after the 43 * waiters are signaled. This gives us a single interrupt window in 44 * which we can add a new waiter and avoid the cost of re-enabling 45 * the irq. 46 */ 47 WRITE_ONCE(b->irq_armed, wakeref); 48 49 /* Requests may have completed before we could enable the interrupt. */ 50 if (!b->irq_enabled++ && b->irq_enable(b)) 51 irq_work_queue(&b->irq_work); 52 } 53 54 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 55 { 56 if (!b->irq_engine) 57 return; 58 59 spin_lock(&b->irq_lock); 60 if (!b->irq_armed) 61 __intel_breadcrumbs_arm_irq(b); 62 spin_unlock(&b->irq_lock); 63 } 64 65 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 66 { 67 intel_wakeref_t wakeref = b->irq_armed; 68 69 GEM_BUG_ON(!b->irq_enabled); 70 if (!--b->irq_enabled) 71 b->irq_disable(b); 72 73 WRITE_ONCE(b->irq_armed, 0); 74 intel_gt_pm_put_async(b->irq_engine->gt, wakeref); 75 } 76 77 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 78 { 79 spin_lock(&b->irq_lock); 80 if (b->irq_armed) 81 __intel_breadcrumbs_disarm_irq(b); 82 spin_unlock(&b->irq_lock); 83 } 84 85 static void add_signaling_context(struct intel_breadcrumbs *b, 86 struct intel_context *ce) 87 { 88 lockdep_assert_held(&ce->signal_lock); 89 90 spin_lock(&b->signalers_lock); 91 list_add_rcu(&ce->signal_link, &b->signalers); 92 spin_unlock(&b->signalers_lock); 93 } 94 95 static bool remove_signaling_context(struct intel_breadcrumbs *b, 96 struct intel_context *ce) 97 { 98 lockdep_assert_held(&ce->signal_lock); 99 100 if (!list_empty(&ce->signals)) 101 return false; 102 103 spin_lock(&b->signalers_lock); 104 list_del_rcu(&ce->signal_link); 105 spin_unlock(&b->signalers_lock); 106 107 return true; 108 } 109 110 __maybe_unused static bool 111 check_signal_order(struct intel_context *ce, struct i915_request *rq) 112 { 113 if (rq->context != ce) 114 return false; 115 116 if (!list_is_last(&rq->signal_link, &ce->signals) && 117 i915_seqno_passed(rq->fence.seqno, 118 list_next_entry(rq, signal_link)->fence.seqno)) 119 return false; 120 121 if (!list_is_first(&rq->signal_link, &ce->signals) && 122 i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno, 123 rq->fence.seqno)) 124 return false; 125 126 return true; 127 } 128 129 static bool 130 __dma_fence_signal(struct dma_fence *fence) 131 { 132 return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); 133 } 134 135 static void 136 __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) 137 { 138 fence->timestamp = timestamp; 139 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); 140 trace_dma_fence_signaled(fence); 141 } 142 143 static void 144 __dma_fence_signal__notify(struct dma_fence *fence, 145 const struct list_head *list) 146 { 147 struct dma_fence_cb *cur, *tmp; 148 149 lockdep_assert_held(fence->lock); 150 151 list_for_each_entry_safe(cur, tmp, list, node) { 152 INIT_LIST_HEAD(&cur->node); 153 cur->func(fence, cur); 154 } 155 } 156 157 static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) 158 { 159 if (b->irq_engine) 160 intel_engine_add_retire(b->irq_engine, tl); 161 } 162 163 static struct llist_node * 164 slist_add(struct llist_node *node, struct llist_node *head) 165 { 166 node->next = head; 167 return node; 168 } 169 170 static void signal_irq_work(struct irq_work *work) 171 { 172 struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); 173 const ktime_t timestamp = ktime_get(); 174 struct llist_node *signal, *sn; 175 struct intel_context *ce; 176 177 signal = NULL; 178 if (unlikely(!llist_empty(&b->signaled_requests))) 179 signal = llist_del_all(&b->signaled_requests); 180 181 /* 182 * Keep the irq armed until the interrupt after all listeners are gone. 183 * 184 * Enabling/disabling the interrupt is rather costly, roughly a couple 185 * of hundred microseconds. If we are proactive and enable/disable 186 * the interrupt around every request that wants a breadcrumb, we 187 * quickly drown in the extra orders of magnitude of latency imposed 188 * on request submission. 189 * 190 * So we try to be lazy, and keep the interrupts enabled until no 191 * more listeners appear within a breadcrumb interrupt interval (that 192 * is until a request completes that no one cares about). The 193 * observation is that listeners come in batches, and will often 194 * listen to a bunch of requests in succession. Though note on icl+, 195 * interrupts are always enabled due to concerns with rc6 being 196 * dysfunctional with per-engine interrupt masking. 197 * 198 * We also try to avoid raising too many interrupts, as they may 199 * be generated by userspace batches and it is unfortunately rather 200 * too easy to drown the CPU under a flood of GPU interrupts. Thus 201 * whenever no one appears to be listening, we turn off the interrupts. 202 * Fewer interrupts should conserve power -- at the very least, fewer 203 * interrupt draw less ire from other users of the system and tools 204 * like powertop. 205 */ 206 if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers)) 207 intel_breadcrumbs_disarm_irq(b); 208 209 rcu_read_lock(); 210 atomic_inc(&b->signaler_active); 211 list_for_each_entry_rcu(ce, &b->signalers, signal_link) { 212 struct i915_request *rq; 213 214 list_for_each_entry_rcu(rq, &ce->signals, signal_link) { 215 bool release; 216 217 if (!__i915_request_is_complete(rq)) 218 break; 219 220 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, 221 &rq->fence.flags)) 222 break; 223 224 /* 225 * Queue for execution after dropping the signaling 226 * spinlock as the callback chain may end up adding 227 * more signalers to the same context or engine. 228 */ 229 spin_lock(&ce->signal_lock); 230 list_del_rcu(&rq->signal_link); 231 release = remove_signaling_context(b, ce); 232 spin_unlock(&ce->signal_lock); 233 if (release) { 234 if (intel_timeline_is_last(ce->timeline, rq)) 235 add_retire(b, ce->timeline); 236 intel_context_put(ce); 237 } 238 239 if (__dma_fence_signal(&rq->fence)) 240 /* We own signal_node now, xfer to local list */ 241 signal = slist_add(&rq->signal_node, signal); 242 else 243 i915_request_put(rq); 244 } 245 } 246 atomic_dec(&b->signaler_active); 247 rcu_read_unlock(); 248 249 llist_for_each_safe(signal, sn, signal) { 250 struct i915_request *rq = 251 llist_entry(signal, typeof(*rq), signal_node); 252 struct list_head cb_list; 253 254 if (rq->engine->sched_engine->retire_inflight_request_prio) 255 rq->engine->sched_engine->retire_inflight_request_prio(rq); 256 257 spin_lock(&rq->lock); 258 list_replace(&rq->fence.cb_list, &cb_list); 259 __dma_fence_signal__timestamp(&rq->fence, timestamp); 260 __dma_fence_signal__notify(&rq->fence, &cb_list); 261 spin_unlock(&rq->lock); 262 263 i915_request_put(rq); 264 } 265 266 /* Lazy irq enabling after HW submission */ 267 if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) 268 intel_breadcrumbs_arm_irq(b); 269 270 /* And confirm that we still want irqs enabled before we yield */ 271 if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) 272 intel_breadcrumbs_disarm_irq(b); 273 } 274 275 struct intel_breadcrumbs * 276 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) 277 { 278 struct intel_breadcrumbs *b; 279 280 b = kzalloc(sizeof(*b), GFP_KERNEL); 281 if (!b) 282 return NULL; 283 284 kref_init(&b->ref); 285 286 spin_lock_init(&b->signalers_lock); 287 INIT_LIST_HEAD(&b->signalers); 288 init_llist_head(&b->signaled_requests); 289 290 spin_lock_init(&b->irq_lock); 291 init_irq_work(&b->irq_work, signal_irq_work); 292 293 b->irq_engine = irq_engine; 294 b->irq_enable = irq_enable; 295 b->irq_disable = irq_disable; 296 297 return b; 298 } 299 300 void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) 301 { 302 unsigned long flags; 303 304 if (!b->irq_engine) 305 return; 306 307 spin_lock_irqsave(&b->irq_lock, flags); 308 309 if (b->irq_enabled) 310 b->irq_enable(b); 311 else 312 b->irq_disable(b); 313 314 spin_unlock_irqrestore(&b->irq_lock, flags); 315 } 316 317 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) 318 { 319 if (!READ_ONCE(b->irq_armed)) 320 return; 321 322 /* Kick the work once more to drain the signalers, and disarm the irq */ 323 irq_work_queue(&b->irq_work); 324 } 325 326 void intel_breadcrumbs_free(struct kref *kref) 327 { 328 struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref); 329 330 irq_work_sync(&b->irq_work); 331 GEM_BUG_ON(!list_empty(&b->signalers)); 332 GEM_BUG_ON(b->irq_armed); 333 334 kfree(b); 335 } 336 337 static void irq_signal_request(struct i915_request *rq, 338 struct intel_breadcrumbs *b) 339 { 340 if (!__dma_fence_signal(&rq->fence)) 341 return; 342 343 i915_request_get(rq); 344 if (llist_add(&rq->signal_node, &b->signaled_requests)) 345 irq_work_queue(&b->irq_work); 346 } 347 348 static void insert_breadcrumb(struct i915_request *rq) 349 { 350 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; 351 struct intel_context *ce = rq->context; 352 struct list_head *pos; 353 354 if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) 355 return; 356 357 /* 358 * If the request is already completed, we can transfer it 359 * straight onto a signaled list, and queue the irq worker for 360 * its signal completion. 361 */ 362 if (__i915_request_is_complete(rq)) { 363 irq_signal_request(rq, b); 364 return; 365 } 366 367 if (list_empty(&ce->signals)) { 368 intel_context_get(ce); 369 add_signaling_context(b, ce); 370 pos = &ce->signals; 371 } else { 372 /* 373 * We keep the seqno in retirement order, so we can break 374 * inside intel_engine_signal_breadcrumbs as soon as we've 375 * passed the last completed request (or seen a request that 376 * hasn't event started). We could walk the timeline->requests, 377 * but keeping a separate signalers_list has the advantage of 378 * hopefully being much smaller than the full list and so 379 * provides faster iteration and detection when there are no 380 * more interrupts required for this context. 381 * 382 * We typically expect to add new signalers in order, so we 383 * start looking for our insertion point from the tail of 384 * the list. 385 */ 386 list_for_each_prev(pos, &ce->signals) { 387 struct i915_request *it = 388 list_entry(pos, typeof(*it), signal_link); 389 390 if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) 391 break; 392 } 393 } 394 395 i915_request_get(rq); 396 list_add_rcu(&rq->signal_link, pos); 397 GEM_BUG_ON(!check_signal_order(ce, rq)); 398 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); 399 set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); 400 401 /* 402 * Defer enabling the interrupt to after HW submission and recheck 403 * the request as it may have completed and raised the interrupt as 404 * we were attaching it into the lists. 405 */ 406 if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq)) 407 irq_work_queue(&b->irq_work); 408 } 409 410 bool i915_request_enable_breadcrumb(struct i915_request *rq) 411 { 412 struct intel_context *ce = rq->context; 413 414 /* Serialises with i915_request_retire() using rq->lock */ 415 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) 416 return true; 417 418 /* 419 * Peek at i915_request_submit()/i915_request_unsubmit() status. 420 * 421 * If the request is not yet active (and not signaled), we will 422 * attach the breadcrumb later. 423 */ 424 if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) 425 return true; 426 427 spin_lock(&ce->signal_lock); 428 if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) 429 insert_breadcrumb(rq); 430 spin_unlock(&ce->signal_lock); 431 432 return true; 433 } 434 435 void i915_request_cancel_breadcrumb(struct i915_request *rq) 436 { 437 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; 438 struct intel_context *ce = rq->context; 439 bool release; 440 441 spin_lock(&ce->signal_lock); 442 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { 443 spin_unlock(&ce->signal_lock); 444 return; 445 } 446 447 list_del_rcu(&rq->signal_link); 448 release = remove_signaling_context(b, ce); 449 spin_unlock(&ce->signal_lock); 450 if (release) 451 intel_context_put(ce); 452 453 if (__i915_request_is_complete(rq)) 454 irq_signal_request(rq, b); 455 456 i915_request_put(rq); 457 } 458 459 void intel_context_remove_breadcrumbs(struct intel_context *ce, 460 struct intel_breadcrumbs *b) 461 { 462 struct i915_request *rq, *rn; 463 bool release = false; 464 unsigned long flags; 465 466 spin_lock_irqsave(&ce->signal_lock, flags); 467 468 if (list_empty(&ce->signals)) 469 goto unlock; 470 471 list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) { 472 GEM_BUG_ON(!__i915_request_is_complete(rq)); 473 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, 474 &rq->fence.flags)) 475 continue; 476 477 list_del_rcu(&rq->signal_link); 478 irq_signal_request(rq, b); 479 i915_request_put(rq); 480 } 481 release = remove_signaling_context(b, ce); 482 483 unlock: 484 spin_unlock_irqrestore(&ce->signal_lock, flags); 485 if (release) 486 intel_context_put(ce); 487 488 while (atomic_read(&b->signaler_active)) 489 cpu_relax(); 490 } 491 492 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) 493 { 494 struct intel_context *ce; 495 struct i915_request *rq; 496 497 drm_printf(p, "Signals:\n"); 498 499 rcu_read_lock(); 500 list_for_each_entry_rcu(ce, &b->signalers, signal_link) { 501 list_for_each_entry_rcu(rq, &ce->signals, signal_link) 502 drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", 503 rq->fence.context, rq->fence.seqno, 504 __i915_request_is_complete(rq) ? "!" : 505 __i915_request_has_started(rq) ? "*" : 506 "", 507 jiffies_to_msecs(jiffies - rq->emitted_jiffies)); 508 } 509 rcu_read_unlock(); 510 } 511 512 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, 513 struct drm_printer *p) 514 { 515 struct intel_breadcrumbs *b; 516 517 b = engine->breadcrumbs; 518 if (!b) 519 return; 520 521 drm_printf(p, "IRQ: %s\n", str_enabled_disabled(b->irq_armed)); 522 if (!list_empty(&b->signalers)) 523 print_signals(b, p); 524 } 525