1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2015-2021 Intel Corporation 4 */ 5 6 #include <linux/kthread.h> 7 #include <linux/string_helpers.h> 8 #include <trace/events/dma_fence.h> 9 #include <uapi/linux/sched/types.h> 10 11 #include <drm/drm_print.h> 12 13 #include "i915_drv.h" 14 #include "i915_trace.h" 15 #include "intel_breadcrumbs.h" 16 #include "intel_context.h" 17 #include "intel_engine_pm.h" 18 #include "intel_gt_pm.h" 19 #include "intel_gt_requests.h" 20 21 static bool irq_enable(struct intel_breadcrumbs *b) 22 { 23 return intel_engine_irq_enable(b->irq_engine); 24 } 25 26 static void irq_disable(struct intel_breadcrumbs *b) 27 { 28 intel_engine_irq_disable(b->irq_engine); 29 } 30 31 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 32 { 33 intel_wakeref_t wakeref; 34 35 /* 36 * Since we are waiting on a request, the GPU should be busy 37 * and should have its own rpm reference. 38 */ 39 wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt); 40 if (GEM_WARN_ON(!wakeref)) 41 return; 42 43 /* 44 * The breadcrumb irq will be disarmed on the interrupt after the 45 * waiters are signaled. This gives us a single interrupt window in 46 * which we can add a new waiter and avoid the cost of re-enabling 47 * the irq. 48 */ 49 WRITE_ONCE(b->irq_armed, wakeref); 50 51 /* Requests may have completed before we could enable the interrupt. */ 52 if (!b->irq_enabled++ && b->irq_enable(b)) 53 irq_work_queue(&b->irq_work); 54 } 55 56 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 57 { 58 if (!b->irq_engine) 59 return; 60 61 spin_lock(&b->irq_lock); 62 if (!b->irq_armed) 63 __intel_breadcrumbs_arm_irq(b); 64 spin_unlock(&b->irq_lock); 65 } 66 67 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 68 { 69 intel_wakeref_t wakeref = b->irq_armed; 70 71 GEM_BUG_ON(!b->irq_enabled); 72 if (!--b->irq_enabled) 73 b->irq_disable(b); 74 75 WRITE_ONCE(b->irq_armed, NULL); 76 intel_gt_pm_put_async(b->irq_engine->gt, wakeref); 77 } 78 79 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 80 { 81 spin_lock(&b->irq_lock); 82 if (b->irq_armed) 83 __intel_breadcrumbs_disarm_irq(b); 84 spin_unlock(&b->irq_lock); 85 } 86 87 static void add_signaling_context(struct intel_breadcrumbs *b, 88 struct intel_context *ce) 89 { 90 lockdep_assert_held(&ce->signal_lock); 91 92 spin_lock(&b->signalers_lock); 93 list_add_rcu(&ce->signal_link, &b->signalers); 94 spin_unlock(&b->signalers_lock); 95 } 96 97 static bool remove_signaling_context(struct intel_breadcrumbs *b, 98 struct intel_context *ce) 99 { 100 lockdep_assert_held(&ce->signal_lock); 101 102 if (!list_empty(&ce->signals)) 103 return false; 104 105 spin_lock(&b->signalers_lock); 106 list_del_rcu(&ce->signal_link); 107 spin_unlock(&b->signalers_lock); 108 109 return true; 110 } 111 112 __maybe_unused static bool 113 check_signal_order(struct intel_context *ce, struct i915_request *rq) 114 { 115 if (rq->context != ce) 116 return false; 117 118 if (!list_is_last(&rq->signal_link, &ce->signals) && 119 i915_seqno_passed(rq->fence.seqno, 120 list_next_entry(rq, signal_link)->fence.seqno)) 121 return false; 122 123 if (!list_is_first(&rq->signal_link, &ce->signals) && 124 i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno, 125 rq->fence.seqno)) 126 return false; 127 128 return true; 129 } 130 131 static bool 132 __dma_fence_signal(struct dma_fence *fence) 133 { 134 return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); 135 } 136 137 static void 138 __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) 139 { 140 fence->timestamp = timestamp; 141 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); 142 trace_dma_fence_signaled(fence); 143 } 144 145 static void 146 __dma_fence_signal__notify(struct dma_fence *fence, 147 const struct list_head *list) 148 { 149 struct dma_fence_cb *cur, *tmp; 150 151 lockdep_assert_held(fence->lock); 152 153 list_for_each_entry_safe(cur, tmp, list, node) { 154 INIT_LIST_HEAD(&cur->node); 155 cur->func(fence, cur); 156 } 157 } 158 159 static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) 160 { 161 if (b->irq_engine) 162 intel_engine_add_retire(b->irq_engine, tl); 163 } 164 165 static struct llist_node * 166 slist_add(struct llist_node *node, struct llist_node *head) 167 { 168 node->next = head; 169 return node; 170 } 171 172 static void signal_irq_work(struct irq_work *work) 173 { 174 struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); 175 const ktime_t timestamp = ktime_get(); 176 struct llist_node *signal, *sn; 177 struct intel_context *ce; 178 179 signal = NULL; 180 if (unlikely(!llist_empty(&b->signaled_requests))) 181 signal = llist_del_all(&b->signaled_requests); 182 183 /* 184 * Keep the irq armed until the interrupt after all listeners are gone. 185 * 186 * Enabling/disabling the interrupt is rather costly, roughly a couple 187 * of hundred microseconds. If we are proactive and enable/disable 188 * the interrupt around every request that wants a breadcrumb, we 189 * quickly drown in the extra orders of magnitude of latency imposed 190 * on request submission. 191 * 192 * So we try to be lazy, and keep the interrupts enabled until no 193 * more listeners appear within a breadcrumb interrupt interval (that 194 * is until a request completes that no one cares about). The 195 * observation is that listeners come in batches, and will often 196 * listen to a bunch of requests in succession. Though note on icl+, 197 * interrupts are always enabled due to concerns with rc6 being 198 * dysfunctional with per-engine interrupt masking. 199 * 200 * We also try to avoid raising too many interrupts, as they may 201 * be generated by userspace batches and it is unfortunately rather 202 * too easy to drown the CPU under a flood of GPU interrupts. Thus 203 * whenever no one appears to be listening, we turn off the interrupts. 204 * Fewer interrupts should conserve power -- at the very least, fewer 205 * interrupt draw less ire from other users of the system and tools 206 * like powertop. 207 */ 208 if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers)) 209 intel_breadcrumbs_disarm_irq(b); 210 211 rcu_read_lock(); 212 atomic_inc(&b->signaler_active); 213 list_for_each_entry_rcu(ce, &b->signalers, signal_link) { 214 struct i915_request *rq; 215 216 list_for_each_entry_rcu(rq, &ce->signals, signal_link) { 217 bool release; 218 219 if (!__i915_request_is_complete(rq)) 220 break; 221 222 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, 223 &rq->fence.flags)) 224 break; 225 226 /* 227 * Queue for execution after dropping the signaling 228 * spinlock as the callback chain may end up adding 229 * more signalers to the same context or engine. 230 */ 231 spin_lock(&ce->signal_lock); 232 list_del_rcu(&rq->signal_link); 233 release = remove_signaling_context(b, ce); 234 spin_unlock(&ce->signal_lock); 235 if (release) { 236 if (intel_timeline_is_last(ce->timeline, rq)) 237 add_retire(b, ce->timeline); 238 intel_context_put(ce); 239 } 240 241 if (__dma_fence_signal(&rq->fence)) 242 /* We own signal_node now, xfer to local list */ 243 signal = slist_add(&rq->signal_node, signal); 244 else 245 i915_request_put(rq); 246 } 247 } 248 atomic_dec(&b->signaler_active); 249 rcu_read_unlock(); 250 251 llist_for_each_safe(signal, sn, signal) { 252 struct i915_request *rq = 253 llist_entry(signal, typeof(*rq), signal_node); 254 struct list_head cb_list; 255 256 if (rq->engine->sched_engine->retire_inflight_request_prio) 257 rq->engine->sched_engine->retire_inflight_request_prio(rq); 258 259 spin_lock(&rq->lock); 260 list_replace(&rq->fence.cb_list, &cb_list); 261 __dma_fence_signal__timestamp(&rq->fence, timestamp); 262 __dma_fence_signal__notify(&rq->fence, &cb_list); 263 spin_unlock(&rq->lock); 264 265 i915_request_put(rq); 266 } 267 268 /* Lazy irq enabling after HW submission */ 269 if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) 270 intel_breadcrumbs_arm_irq(b); 271 272 /* And confirm that we still want irqs enabled before we yield */ 273 if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) 274 intel_breadcrumbs_disarm_irq(b); 275 } 276 277 struct intel_breadcrumbs * 278 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) 279 { 280 struct intel_breadcrumbs *b; 281 282 b = kzalloc(sizeof(*b), GFP_KERNEL); 283 if (!b) 284 return NULL; 285 286 kref_init(&b->ref); 287 288 spin_lock_init(&b->signalers_lock); 289 INIT_LIST_HEAD(&b->signalers); 290 init_llist_head(&b->signaled_requests); 291 292 spin_lock_init(&b->irq_lock); 293 init_irq_work(&b->irq_work, signal_irq_work); 294 295 b->irq_engine = irq_engine; 296 b->irq_enable = irq_enable; 297 b->irq_disable = irq_disable; 298 299 return b; 300 } 301 302 void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) 303 { 304 unsigned long flags; 305 306 if (!b->irq_engine) 307 return; 308 309 spin_lock_irqsave(&b->irq_lock, flags); 310 311 if (b->irq_enabled) 312 b->irq_enable(b); 313 else 314 b->irq_disable(b); 315 316 spin_unlock_irqrestore(&b->irq_lock, flags); 317 } 318 319 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) 320 { 321 if (!READ_ONCE(b->irq_armed)) 322 return; 323 324 /* Kick the work once more to drain the signalers, and disarm the irq */ 325 irq_work_queue(&b->irq_work); 326 } 327 328 void intel_breadcrumbs_free(struct kref *kref) 329 { 330 struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref); 331 332 irq_work_sync(&b->irq_work); 333 GEM_BUG_ON(!list_empty(&b->signalers)); 334 GEM_BUG_ON(b->irq_armed); 335 336 kfree(b); 337 } 338 339 static void irq_signal_request(struct i915_request *rq, 340 struct intel_breadcrumbs *b) 341 { 342 if (!__dma_fence_signal(&rq->fence)) 343 return; 344 345 i915_request_get(rq); 346 if (llist_add(&rq->signal_node, &b->signaled_requests)) 347 irq_work_queue(&b->irq_work); 348 } 349 350 static void insert_breadcrumb(struct i915_request *rq) 351 { 352 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; 353 struct intel_context *ce = rq->context; 354 struct list_head *pos; 355 356 if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) 357 return; 358 359 /* 360 * If the request is already completed, we can transfer it 361 * straight onto a signaled list, and queue the irq worker for 362 * its signal completion. 363 */ 364 if (__i915_request_is_complete(rq)) { 365 irq_signal_request(rq, b); 366 return; 367 } 368 369 if (list_empty(&ce->signals)) { 370 intel_context_get(ce); 371 add_signaling_context(b, ce); 372 pos = &ce->signals; 373 } else { 374 /* 375 * We keep the seqno in retirement order, so we can break 376 * inside intel_engine_signal_breadcrumbs as soon as we've 377 * passed the last completed request (or seen a request that 378 * hasn't event started). We could walk the timeline->requests, 379 * but keeping a separate signalers_list has the advantage of 380 * hopefully being much smaller than the full list and so 381 * provides faster iteration and detection when there are no 382 * more interrupts required for this context. 383 * 384 * We typically expect to add new signalers in order, so we 385 * start looking for our insertion point from the tail of 386 * the list. 387 */ 388 list_for_each_prev(pos, &ce->signals) { 389 struct i915_request *it = 390 list_entry(pos, typeof(*it), signal_link); 391 392 if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) 393 break; 394 } 395 } 396 397 i915_request_get(rq); 398 list_add_rcu(&rq->signal_link, pos); 399 GEM_BUG_ON(!check_signal_order(ce, rq)); 400 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); 401 set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); 402 403 /* 404 * Defer enabling the interrupt to after HW submission and recheck 405 * the request as it may have completed and raised the interrupt as 406 * we were attaching it into the lists. 407 */ 408 if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq)) 409 irq_work_queue(&b->irq_work); 410 } 411 412 bool i915_request_enable_breadcrumb(struct i915_request *rq) 413 { 414 struct intel_context *ce = rq->context; 415 416 /* Serialises with i915_request_retire() using rq->lock */ 417 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) 418 return true; 419 420 /* 421 * Peek at i915_request_submit()/i915_request_unsubmit() status. 422 * 423 * If the request is not yet active (and not signaled), we will 424 * attach the breadcrumb later. 425 */ 426 if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) 427 return true; 428 429 spin_lock(&ce->signal_lock); 430 if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) 431 insert_breadcrumb(rq); 432 spin_unlock(&ce->signal_lock); 433 434 return true; 435 } 436 437 void i915_request_cancel_breadcrumb(struct i915_request *rq) 438 { 439 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; 440 struct intel_context *ce = rq->context; 441 bool release; 442 443 spin_lock(&ce->signal_lock); 444 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { 445 spin_unlock(&ce->signal_lock); 446 return; 447 } 448 449 list_del_rcu(&rq->signal_link); 450 release = remove_signaling_context(b, ce); 451 spin_unlock(&ce->signal_lock); 452 if (release) 453 intel_context_put(ce); 454 455 if (__i915_request_is_complete(rq)) 456 irq_signal_request(rq, b); 457 458 i915_request_put(rq); 459 } 460 461 void intel_context_remove_breadcrumbs(struct intel_context *ce, 462 struct intel_breadcrumbs *b) 463 { 464 struct i915_request *rq, *rn; 465 bool release = false; 466 unsigned long flags; 467 468 spin_lock_irqsave(&ce->signal_lock, flags); 469 470 if (list_empty(&ce->signals)) 471 goto unlock; 472 473 list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) { 474 GEM_BUG_ON(!__i915_request_is_complete(rq)); 475 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, 476 &rq->fence.flags)) 477 continue; 478 479 list_del_rcu(&rq->signal_link); 480 irq_signal_request(rq, b); 481 i915_request_put(rq); 482 } 483 release = remove_signaling_context(b, ce); 484 485 unlock: 486 spin_unlock_irqrestore(&ce->signal_lock, flags); 487 if (release) 488 intel_context_put(ce); 489 490 while (atomic_read(&b->signaler_active)) 491 cpu_relax(); 492 } 493 494 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) 495 { 496 struct intel_context *ce; 497 struct i915_request *rq; 498 499 drm_printf(p, "Signals:\n"); 500 501 rcu_read_lock(); 502 list_for_each_entry_rcu(ce, &b->signalers, signal_link) { 503 list_for_each_entry_rcu(rq, &ce->signals, signal_link) 504 drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", 505 rq->fence.context, rq->fence.seqno, 506 __i915_request_is_complete(rq) ? "!" : 507 __i915_request_has_started(rq) ? "*" : 508 "", 509 jiffies_to_msecs(jiffies - rq->emitted_jiffies)); 510 } 511 rcu_read_unlock(); 512 } 513 514 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, 515 struct drm_printer *p) 516 { 517 struct intel_breadcrumbs *b; 518 519 b = engine->breadcrumbs; 520 if (!b) 521 return; 522 523 drm_printf(p, "IRQ: %s\n", str_enabled_disabled(b->irq_armed)); 524 if (!list_empty(&b->signalers)) 525 print_signals(b, p); 526 } 527