1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2015-2021 Intel Corporation 4 */ 5 6 #include <linux/kthread.h> 7 #include <linux/string_helpers.h> 8 #include <trace/events/dma_fence.h> 9 #include <uapi/linux/sched/types.h> 10 11 #include "i915_drv.h" 12 #include "i915_trace.h" 13 #include "intel_breadcrumbs.h" 14 #include "intel_context.h" 15 #include "intel_engine_pm.h" 16 #include "intel_gt_pm.h" 17 #include "intel_gt_requests.h" 18 19 static bool irq_enable(struct intel_breadcrumbs *b) 20 { 21 return intel_engine_irq_enable(b->irq_engine); 22 } 23 24 static void irq_disable(struct intel_breadcrumbs *b) 25 { 26 intel_engine_irq_disable(b->irq_engine); 27 } 28 29 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 30 { 31 /* 32 * Since we are waiting on a request, the GPU should be busy 33 * and should have its own rpm reference. 34 */ 35 if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt))) 36 return; 37 38 /* 39 * The breadcrumb irq will be disarmed on the interrupt after the 40 * waiters are signaled. This gives us a single interrupt window in 41 * which we can add a new waiter and avoid the cost of re-enabling 42 * the irq. 43 */ 44 WRITE_ONCE(b->irq_armed, true); 45 46 /* Requests may have completed before we could enable the interrupt. */ 47 if (!b->irq_enabled++ && b->irq_enable(b)) 48 irq_work_queue(&b->irq_work); 49 } 50 51 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 52 { 53 if (!b->irq_engine) 54 return; 55 56 spin_lock(&b->irq_lock); 57 if (!b->irq_armed) 58 __intel_breadcrumbs_arm_irq(b); 59 spin_unlock(&b->irq_lock); 60 } 61 62 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 63 { 64 GEM_BUG_ON(!b->irq_enabled); 65 if (!--b->irq_enabled) 66 b->irq_disable(b); 67 68 WRITE_ONCE(b->irq_armed, false); 69 intel_gt_pm_put_async(b->irq_engine->gt); 70 } 71 72 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 73 { 74 spin_lock(&b->irq_lock); 75 if (b->irq_armed) 76 __intel_breadcrumbs_disarm_irq(b); 77 spin_unlock(&b->irq_lock); 78 } 79 80 static void add_signaling_context(struct intel_breadcrumbs *b, 81 struct intel_context *ce) 82 { 83 lockdep_assert_held(&ce->signal_lock); 84 85 spin_lock(&b->signalers_lock); 86 list_add_rcu(&ce->signal_link, &b->signalers); 87 spin_unlock(&b->signalers_lock); 88 } 89 90 static bool remove_signaling_context(struct intel_breadcrumbs *b, 91 struct intel_context *ce) 92 { 93 lockdep_assert_held(&ce->signal_lock); 94 95 if (!list_empty(&ce->signals)) 96 return false; 97 98 spin_lock(&b->signalers_lock); 99 list_del_rcu(&ce->signal_link); 100 spin_unlock(&b->signalers_lock); 101 102 return true; 103 } 104 105 __maybe_unused static bool 106 check_signal_order(struct intel_context *ce, struct i915_request *rq) 107 { 108 if (rq->context != ce) 109 return false; 110 111 if (!list_is_last(&rq->signal_link, &ce->signals) && 112 i915_seqno_passed(rq->fence.seqno, 113 list_next_entry(rq, signal_link)->fence.seqno)) 114 return false; 115 116 if (!list_is_first(&rq->signal_link, &ce->signals) && 117 i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno, 118 rq->fence.seqno)) 119 return false; 120 121 return true; 122 } 123 124 static bool 125 __dma_fence_signal(struct dma_fence *fence) 126 { 127 return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); 128 } 129 130 static void 131 __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) 132 { 133 fence->timestamp = timestamp; 134 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); 135 trace_dma_fence_signaled(fence); 136 } 137 138 static void 139 __dma_fence_signal__notify(struct dma_fence *fence, 140 const struct list_head *list) 141 { 142 struct dma_fence_cb *cur, *tmp; 143 144 lockdep_assert_held(fence->lock); 145 146 list_for_each_entry_safe(cur, tmp, list, node) { 147 INIT_LIST_HEAD(&cur->node); 148 cur->func(fence, cur); 149 } 150 } 151 152 static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) 153 { 154 if (b->irq_engine) 155 intel_engine_add_retire(b->irq_engine, tl); 156 } 157 158 static struct llist_node * 159 slist_add(struct llist_node *node, struct llist_node *head) 160 { 161 node->next = head; 162 return node; 163 } 164 165 static void signal_irq_work(struct irq_work *work) 166 { 167 struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); 168 const ktime_t timestamp = ktime_get(); 169 struct llist_node *signal, *sn; 170 struct intel_context *ce; 171 172 signal = NULL; 173 if (unlikely(!llist_empty(&b->signaled_requests))) 174 signal = llist_del_all(&b->signaled_requests); 175 176 /* 177 * Keep the irq armed until the interrupt after all listeners are gone. 178 * 179 * Enabling/disabling the interrupt is rather costly, roughly a couple 180 * of hundred microseconds. If we are proactive and enable/disable 181 * the interrupt around every request that wants a breadcrumb, we 182 * quickly drown in the extra orders of magnitude of latency imposed 183 * on request submission. 184 * 185 * So we try to be lazy, and keep the interrupts enabled until no 186 * more listeners appear within a breadcrumb interrupt interval (that 187 * is until a request completes that no one cares about). The 188 * observation is that listeners come in batches, and will often 189 * listen to a bunch of requests in succession. Though note on icl+, 190 * interrupts are always enabled due to concerns with rc6 being 191 * dysfunctional with per-engine interrupt masking. 192 * 193 * We also try to avoid raising too many interrupts, as they may 194 * be generated by userspace batches and it is unfortunately rather 195 * too easy to drown the CPU under a flood of GPU interrupts. Thus 196 * whenever no one appears to be listening, we turn off the interrupts. 197 * Fewer interrupts should conserve power -- at the very least, fewer 198 * interrupt draw less ire from other users of the system and tools 199 * like powertop. 200 */ 201 if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers)) 202 intel_breadcrumbs_disarm_irq(b); 203 204 rcu_read_lock(); 205 atomic_inc(&b->signaler_active); 206 list_for_each_entry_rcu(ce, &b->signalers, signal_link) { 207 struct i915_request *rq; 208 209 list_for_each_entry_rcu(rq, &ce->signals, signal_link) { 210 bool release; 211 212 if (!__i915_request_is_complete(rq)) 213 break; 214 215 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, 216 &rq->fence.flags)) 217 break; 218 219 /* 220 * Queue for execution after dropping the signaling 221 * spinlock as the callback chain may end up adding 222 * more signalers to the same context or engine. 223 */ 224 spin_lock(&ce->signal_lock); 225 list_del_rcu(&rq->signal_link); 226 release = remove_signaling_context(b, ce); 227 spin_unlock(&ce->signal_lock); 228 if (release) { 229 if (intel_timeline_is_last(ce->timeline, rq)) 230 add_retire(b, ce->timeline); 231 intel_context_put(ce); 232 } 233 234 if (__dma_fence_signal(&rq->fence)) 235 /* We own signal_node now, xfer to local list */ 236 signal = slist_add(&rq->signal_node, signal); 237 else 238 i915_request_put(rq); 239 } 240 } 241 atomic_dec(&b->signaler_active); 242 rcu_read_unlock(); 243 244 llist_for_each_safe(signal, sn, signal) { 245 struct i915_request *rq = 246 llist_entry(signal, typeof(*rq), signal_node); 247 struct list_head cb_list; 248 249 if (rq->engine->sched_engine->retire_inflight_request_prio) 250 rq->engine->sched_engine->retire_inflight_request_prio(rq); 251 252 spin_lock(&rq->lock); 253 list_replace(&rq->fence.cb_list, &cb_list); 254 __dma_fence_signal__timestamp(&rq->fence, timestamp); 255 __dma_fence_signal__notify(&rq->fence, &cb_list); 256 spin_unlock(&rq->lock); 257 258 i915_request_put(rq); 259 } 260 261 if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) 262 intel_breadcrumbs_arm_irq(b); 263 } 264 265 struct intel_breadcrumbs * 266 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) 267 { 268 struct intel_breadcrumbs *b; 269 270 b = kzalloc(sizeof(*b), GFP_KERNEL); 271 if (!b) 272 return NULL; 273 274 kref_init(&b->ref); 275 276 spin_lock_init(&b->signalers_lock); 277 INIT_LIST_HEAD(&b->signalers); 278 init_llist_head(&b->signaled_requests); 279 280 spin_lock_init(&b->irq_lock); 281 init_irq_work(&b->irq_work, signal_irq_work); 282 283 b->irq_engine = irq_engine; 284 b->irq_enable = irq_enable; 285 b->irq_disable = irq_disable; 286 287 return b; 288 } 289 290 void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) 291 { 292 unsigned long flags; 293 294 if (!b->irq_engine) 295 return; 296 297 spin_lock_irqsave(&b->irq_lock, flags); 298 299 if (b->irq_enabled) 300 b->irq_enable(b); 301 else 302 b->irq_disable(b); 303 304 spin_unlock_irqrestore(&b->irq_lock, flags); 305 } 306 307 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) 308 { 309 if (!READ_ONCE(b->irq_armed)) 310 return; 311 312 /* Kick the work once more to drain the signalers, and disarm the irq */ 313 irq_work_sync(&b->irq_work); 314 while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { 315 local_irq_disable(); 316 signal_irq_work(&b->irq_work); 317 local_irq_enable(); 318 cond_resched(); 319 } 320 } 321 322 void intel_breadcrumbs_free(struct kref *kref) 323 { 324 struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref); 325 326 irq_work_sync(&b->irq_work); 327 GEM_BUG_ON(!list_empty(&b->signalers)); 328 GEM_BUG_ON(b->irq_armed); 329 330 kfree(b); 331 } 332 333 static void irq_signal_request(struct i915_request *rq, 334 struct intel_breadcrumbs *b) 335 { 336 if (!__dma_fence_signal(&rq->fence)) 337 return; 338 339 i915_request_get(rq); 340 if (llist_add(&rq->signal_node, &b->signaled_requests)) 341 irq_work_queue(&b->irq_work); 342 } 343 344 static void insert_breadcrumb(struct i915_request *rq) 345 { 346 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; 347 struct intel_context *ce = rq->context; 348 struct list_head *pos; 349 350 if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) 351 return; 352 353 /* 354 * If the request is already completed, we can transfer it 355 * straight onto a signaled list, and queue the irq worker for 356 * its signal completion. 357 */ 358 if (__i915_request_is_complete(rq)) { 359 irq_signal_request(rq, b); 360 return; 361 } 362 363 if (list_empty(&ce->signals)) { 364 intel_context_get(ce); 365 add_signaling_context(b, ce); 366 pos = &ce->signals; 367 } else { 368 /* 369 * We keep the seqno in retirement order, so we can break 370 * inside intel_engine_signal_breadcrumbs as soon as we've 371 * passed the last completed request (or seen a request that 372 * hasn't event started). We could walk the timeline->requests, 373 * but keeping a separate signalers_list has the advantage of 374 * hopefully being much smaller than the full list and so 375 * provides faster iteration and detection when there are no 376 * more interrupts required for this context. 377 * 378 * We typically expect to add new signalers in order, so we 379 * start looking for our insertion point from the tail of 380 * the list. 381 */ 382 list_for_each_prev(pos, &ce->signals) { 383 struct i915_request *it = 384 list_entry(pos, typeof(*it), signal_link); 385 386 if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) 387 break; 388 } 389 } 390 391 i915_request_get(rq); 392 list_add_rcu(&rq->signal_link, pos); 393 GEM_BUG_ON(!check_signal_order(ce, rq)); 394 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); 395 set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); 396 397 /* 398 * Defer enabling the interrupt to after HW submission and recheck 399 * the request as it may have completed and raised the interrupt as 400 * we were attaching it into the lists. 401 */ 402 irq_work_queue(&b->irq_work); 403 } 404 405 bool i915_request_enable_breadcrumb(struct i915_request *rq) 406 { 407 struct intel_context *ce = rq->context; 408 409 /* Serialises with i915_request_retire() using rq->lock */ 410 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) 411 return true; 412 413 /* 414 * Peek at i915_request_submit()/i915_request_unsubmit() status. 415 * 416 * If the request is not yet active (and not signaled), we will 417 * attach the breadcrumb later. 418 */ 419 if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) 420 return true; 421 422 spin_lock(&ce->signal_lock); 423 if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) 424 insert_breadcrumb(rq); 425 spin_unlock(&ce->signal_lock); 426 427 return true; 428 } 429 430 void i915_request_cancel_breadcrumb(struct i915_request *rq) 431 { 432 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; 433 struct intel_context *ce = rq->context; 434 bool release; 435 436 spin_lock(&ce->signal_lock); 437 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { 438 spin_unlock(&ce->signal_lock); 439 return; 440 } 441 442 list_del_rcu(&rq->signal_link); 443 release = remove_signaling_context(b, ce); 444 spin_unlock(&ce->signal_lock); 445 if (release) 446 intel_context_put(ce); 447 448 if (__i915_request_is_complete(rq)) 449 irq_signal_request(rq, b); 450 451 i915_request_put(rq); 452 } 453 454 void intel_context_remove_breadcrumbs(struct intel_context *ce, 455 struct intel_breadcrumbs *b) 456 { 457 struct i915_request *rq, *rn; 458 bool release = false; 459 unsigned long flags; 460 461 spin_lock_irqsave(&ce->signal_lock, flags); 462 463 if (list_empty(&ce->signals)) 464 goto unlock; 465 466 list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) { 467 GEM_BUG_ON(!__i915_request_is_complete(rq)); 468 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, 469 &rq->fence.flags)) 470 continue; 471 472 list_del_rcu(&rq->signal_link); 473 irq_signal_request(rq, b); 474 i915_request_put(rq); 475 } 476 release = remove_signaling_context(b, ce); 477 478 unlock: 479 spin_unlock_irqrestore(&ce->signal_lock, flags); 480 if (release) 481 intel_context_put(ce); 482 483 while (atomic_read(&b->signaler_active)) 484 cpu_relax(); 485 } 486 487 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) 488 { 489 struct intel_context *ce; 490 struct i915_request *rq; 491 492 drm_printf(p, "Signals:\n"); 493 494 rcu_read_lock(); 495 list_for_each_entry_rcu(ce, &b->signalers, signal_link) { 496 list_for_each_entry_rcu(rq, &ce->signals, signal_link) 497 drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", 498 rq->fence.context, rq->fence.seqno, 499 __i915_request_is_complete(rq) ? "!" : 500 __i915_request_has_started(rq) ? "*" : 501 "", 502 jiffies_to_msecs(jiffies - rq->emitted_jiffies)); 503 } 504 rcu_read_unlock(); 505 } 506 507 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, 508 struct drm_printer *p) 509 { 510 struct intel_breadcrumbs *b; 511 512 b = engine->breadcrumbs; 513 if (!b) 514 return; 515 516 drm_printf(p, "IRQ: %s\n", str_enabled_disabled(b->irq_armed)); 517 if (!list_empty(&b->signalers)) 518 print_signals(b, p); 519 } 520