1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2015-2021 Intel Corporation 4 */ 5 6 #include <linux/kthread.h> 7 #include <trace/events/dma_fence.h> 8 #include <uapi/linux/sched/types.h> 9 10 #include "i915_drv.h" 11 #include "i915_trace.h" 12 #include "intel_breadcrumbs.h" 13 #include "intel_context.h" 14 #include "intel_engine_pm.h" 15 #include "intel_gt_pm.h" 16 #include "intel_gt_requests.h" 17 18 static bool irq_enable(struct intel_engine_cs *engine) 19 { 20 if (!engine->irq_enable) 21 return false; 22 23 /* Caller disables interrupts */ 24 spin_lock(&engine->gt->irq_lock); 25 engine->irq_enable(engine); 26 spin_unlock(&engine->gt->irq_lock); 27 28 return true; 29 } 30 31 static void irq_disable(struct intel_engine_cs *engine) 32 { 33 if (!engine->irq_disable) 34 return; 35 36 /* Caller disables interrupts */ 37 spin_lock(&engine->gt->irq_lock); 38 engine->irq_disable(engine); 39 spin_unlock(&engine->gt->irq_lock); 40 } 41 42 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 43 { 44 /* 45 * Since we are waiting on a request, the GPU should be busy 46 * and should have its own rpm reference. 47 */ 48 if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt))) 49 return; 50 51 /* 52 * The breadcrumb irq will be disarmed on the interrupt after the 53 * waiters are signaled. This gives us a single interrupt window in 54 * which we can add a new waiter and avoid the cost of re-enabling 55 * the irq. 56 */ 57 WRITE_ONCE(b->irq_armed, true); 58 59 /* Requests may have completed before we could enable the interrupt. */ 60 if (!b->irq_enabled++ && irq_enable(b->irq_engine)) 61 irq_work_queue(&b->irq_work); 62 } 63 64 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 65 { 66 if (!b->irq_engine) 67 return; 68 69 spin_lock(&b->irq_lock); 70 if (!b->irq_armed) 71 __intel_breadcrumbs_arm_irq(b); 72 spin_unlock(&b->irq_lock); 73 } 74 75 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 76 { 77 GEM_BUG_ON(!b->irq_enabled); 78 if (!--b->irq_enabled) 79 irq_disable(b->irq_engine); 80 81 WRITE_ONCE(b->irq_armed, false); 82 intel_gt_pm_put_async(b->irq_engine->gt); 83 } 84 85 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 86 { 87 spin_lock(&b->irq_lock); 88 if (b->irq_armed) 89 __intel_breadcrumbs_disarm_irq(b); 90 spin_unlock(&b->irq_lock); 91 } 92 93 static void add_signaling_context(struct intel_breadcrumbs *b, 94 struct intel_context *ce) 95 { 96 lockdep_assert_held(&ce->signal_lock); 97 98 spin_lock(&b->signalers_lock); 99 list_add_rcu(&ce->signal_link, &b->signalers); 100 spin_unlock(&b->signalers_lock); 101 } 102 103 static bool remove_signaling_context(struct intel_breadcrumbs *b, 104 struct intel_context *ce) 105 { 106 lockdep_assert_held(&ce->signal_lock); 107 108 if (!list_empty(&ce->signals)) 109 return false; 110 111 spin_lock(&b->signalers_lock); 112 list_del_rcu(&ce->signal_link); 113 spin_unlock(&b->signalers_lock); 114 115 return true; 116 } 117 118 __maybe_unused static bool 119 check_signal_order(struct intel_context *ce, struct i915_request *rq) 120 { 121 if (rq->context != ce) 122 return false; 123 124 if (!list_is_last(&rq->signal_link, &ce->signals) && 125 i915_seqno_passed(rq->fence.seqno, 126 list_next_entry(rq, signal_link)->fence.seqno)) 127 return false; 128 129 if (!list_is_first(&rq->signal_link, &ce->signals) && 130 i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno, 131 rq->fence.seqno)) 132 return false; 133 134 return true; 135 } 136 137 static bool 138 __dma_fence_signal(struct dma_fence *fence) 139 { 140 return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); 141 } 142 143 static void 144 __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) 145 { 146 fence->timestamp = timestamp; 147 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); 148 trace_dma_fence_signaled(fence); 149 } 150 151 static void 152 __dma_fence_signal__notify(struct dma_fence *fence, 153 const struct list_head *list) 154 { 155 struct dma_fence_cb *cur, *tmp; 156 157 lockdep_assert_held(fence->lock); 158 159 list_for_each_entry_safe(cur, tmp, list, node) { 160 INIT_LIST_HEAD(&cur->node); 161 cur->func(fence, cur); 162 } 163 } 164 165 static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) 166 { 167 if (b->irq_engine) 168 intel_engine_add_retire(b->irq_engine, tl); 169 } 170 171 static struct llist_node * 172 slist_add(struct llist_node *node, struct llist_node *head) 173 { 174 node->next = head; 175 return node; 176 } 177 178 static void signal_irq_work(struct irq_work *work) 179 { 180 struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); 181 const ktime_t timestamp = ktime_get(); 182 struct llist_node *signal, *sn; 183 struct intel_context *ce; 184 185 signal = NULL; 186 if (unlikely(!llist_empty(&b->signaled_requests))) 187 signal = llist_del_all(&b->signaled_requests); 188 189 /* 190 * Keep the irq armed until the interrupt after all listeners are gone. 191 * 192 * Enabling/disabling the interrupt is rather costly, roughly a couple 193 * of hundred microseconds. If we are proactive and enable/disable 194 * the interrupt around every request that wants a breadcrumb, we 195 * quickly drown in the extra orders of magnitude of latency imposed 196 * on request submission. 197 * 198 * So we try to be lazy, and keep the interrupts enabled until no 199 * more listeners appear within a breadcrumb interrupt interval (that 200 * is until a request completes that no one cares about). The 201 * observation is that listeners come in batches, and will often 202 * listen to a bunch of requests in succession. Though note on icl+, 203 * interrupts are always enabled due to concerns with rc6 being 204 * dysfunctional with per-engine interrupt masking. 205 * 206 * We also try to avoid raising too many interrupts, as they may 207 * be generated by userspace batches and it is unfortunately rather 208 * too easy to drown the CPU under a flood of GPU interrupts. Thus 209 * whenever no one appears to be listening, we turn off the interrupts. 210 * Fewer interrupts should conserve power -- at the very least, fewer 211 * interrupt draw less ire from other users of the system and tools 212 * like powertop. 213 */ 214 if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers)) 215 intel_breadcrumbs_disarm_irq(b); 216 217 rcu_read_lock(); 218 atomic_inc(&b->signaler_active); 219 list_for_each_entry_rcu(ce, &b->signalers, signal_link) { 220 struct i915_request *rq; 221 222 list_for_each_entry_rcu(rq, &ce->signals, signal_link) { 223 bool release; 224 225 if (!__i915_request_is_complete(rq)) 226 break; 227 228 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, 229 &rq->fence.flags)) 230 break; 231 232 /* 233 * Queue for execution after dropping the signaling 234 * spinlock as the callback chain may end up adding 235 * more signalers to the same context or engine. 236 */ 237 spin_lock(&ce->signal_lock); 238 list_del_rcu(&rq->signal_link); 239 release = remove_signaling_context(b, ce); 240 spin_unlock(&ce->signal_lock); 241 if (release) { 242 if (intel_timeline_is_last(ce->timeline, rq)) 243 add_retire(b, ce->timeline); 244 intel_context_put(ce); 245 } 246 247 if (__dma_fence_signal(&rq->fence)) 248 /* We own signal_node now, xfer to local list */ 249 signal = slist_add(&rq->signal_node, signal); 250 else 251 i915_request_put(rq); 252 } 253 } 254 atomic_dec(&b->signaler_active); 255 rcu_read_unlock(); 256 257 llist_for_each_safe(signal, sn, signal) { 258 struct i915_request *rq = 259 llist_entry(signal, typeof(*rq), signal_node); 260 struct list_head cb_list; 261 262 spin_lock(&rq->lock); 263 list_replace(&rq->fence.cb_list, &cb_list); 264 __dma_fence_signal__timestamp(&rq->fence, timestamp); 265 __dma_fence_signal__notify(&rq->fence, &cb_list); 266 spin_unlock(&rq->lock); 267 268 i915_request_put(rq); 269 } 270 271 if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) 272 intel_breadcrumbs_arm_irq(b); 273 } 274 275 struct intel_breadcrumbs * 276 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) 277 { 278 struct intel_breadcrumbs *b; 279 280 b = kzalloc(sizeof(*b), GFP_KERNEL); 281 if (!b) 282 return NULL; 283 284 b->irq_engine = irq_engine; 285 286 spin_lock_init(&b->signalers_lock); 287 INIT_LIST_HEAD(&b->signalers); 288 init_llist_head(&b->signaled_requests); 289 290 spin_lock_init(&b->irq_lock); 291 init_irq_work(&b->irq_work, signal_irq_work); 292 293 return b; 294 } 295 296 void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) 297 { 298 unsigned long flags; 299 300 if (!b->irq_engine) 301 return; 302 303 spin_lock_irqsave(&b->irq_lock, flags); 304 305 if (b->irq_enabled) 306 irq_enable(b->irq_engine); 307 else 308 irq_disable(b->irq_engine); 309 310 spin_unlock_irqrestore(&b->irq_lock, flags); 311 } 312 313 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) 314 { 315 if (!READ_ONCE(b->irq_armed)) 316 return; 317 318 /* Kick the work once more to drain the signalers, and disarm the irq */ 319 irq_work_sync(&b->irq_work); 320 while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { 321 local_irq_disable(); 322 signal_irq_work(&b->irq_work); 323 local_irq_enable(); 324 cond_resched(); 325 } 326 } 327 328 void intel_breadcrumbs_free(struct intel_breadcrumbs *b) 329 { 330 irq_work_sync(&b->irq_work); 331 GEM_BUG_ON(!list_empty(&b->signalers)); 332 GEM_BUG_ON(b->irq_armed); 333 kfree(b); 334 } 335 336 static void irq_signal_request(struct i915_request *rq, 337 struct intel_breadcrumbs *b) 338 { 339 if (!__dma_fence_signal(&rq->fence)) 340 return; 341 342 i915_request_get(rq); 343 if (llist_add(&rq->signal_node, &b->signaled_requests)) 344 irq_work_queue(&b->irq_work); 345 } 346 347 static void insert_breadcrumb(struct i915_request *rq) 348 { 349 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; 350 struct intel_context *ce = rq->context; 351 struct list_head *pos; 352 353 if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) 354 return; 355 356 /* 357 * If the request is already completed, we can transfer it 358 * straight onto a signaled list, and queue the irq worker for 359 * its signal completion. 360 */ 361 if (__i915_request_is_complete(rq)) { 362 irq_signal_request(rq, b); 363 return; 364 } 365 366 if (list_empty(&ce->signals)) { 367 intel_context_get(ce); 368 add_signaling_context(b, ce); 369 pos = &ce->signals; 370 } else { 371 /* 372 * We keep the seqno in retirement order, so we can break 373 * inside intel_engine_signal_breadcrumbs as soon as we've 374 * passed the last completed request (or seen a request that 375 * hasn't event started). We could walk the timeline->requests, 376 * but keeping a separate signalers_list has the advantage of 377 * hopefully being much smaller than the full list and so 378 * provides faster iteration and detection when there are no 379 * more interrupts required for this context. 380 * 381 * We typically expect to add new signalers in order, so we 382 * start looking for our insertion point from the tail of 383 * the list. 384 */ 385 list_for_each_prev(pos, &ce->signals) { 386 struct i915_request *it = 387 list_entry(pos, typeof(*it), signal_link); 388 389 if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) 390 break; 391 } 392 } 393 394 i915_request_get(rq); 395 list_add_rcu(&rq->signal_link, pos); 396 GEM_BUG_ON(!check_signal_order(ce, rq)); 397 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); 398 set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); 399 400 /* 401 * Defer enabling the interrupt to after HW submission and recheck 402 * the request as it may have completed and raised the interrupt as 403 * we were attaching it into the lists. 404 */ 405 irq_work_queue(&b->irq_work); 406 } 407 408 bool i915_request_enable_breadcrumb(struct i915_request *rq) 409 { 410 struct intel_context *ce = rq->context; 411 412 /* Serialises with i915_request_retire() using rq->lock */ 413 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) 414 return true; 415 416 /* 417 * Peek at i915_request_submit()/i915_request_unsubmit() status. 418 * 419 * If the request is not yet active (and not signaled), we will 420 * attach the breadcrumb later. 421 */ 422 if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) 423 return true; 424 425 spin_lock(&ce->signal_lock); 426 if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) 427 insert_breadcrumb(rq); 428 spin_unlock(&ce->signal_lock); 429 430 return true; 431 } 432 433 void i915_request_cancel_breadcrumb(struct i915_request *rq) 434 { 435 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; 436 struct intel_context *ce = rq->context; 437 bool release; 438 439 spin_lock(&ce->signal_lock); 440 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { 441 spin_unlock(&ce->signal_lock); 442 return; 443 } 444 445 list_del_rcu(&rq->signal_link); 446 release = remove_signaling_context(b, ce); 447 spin_unlock(&ce->signal_lock); 448 if (release) 449 intel_context_put(ce); 450 451 if (__i915_request_is_complete(rq)) 452 irq_signal_request(rq, b); 453 454 i915_request_put(rq); 455 } 456 457 void intel_context_remove_breadcrumbs(struct intel_context *ce, 458 struct intel_breadcrumbs *b) 459 { 460 struct i915_request *rq, *rn; 461 bool release = false; 462 unsigned long flags; 463 464 spin_lock_irqsave(&ce->signal_lock, flags); 465 466 if (list_empty(&ce->signals)) 467 goto unlock; 468 469 list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) { 470 GEM_BUG_ON(!__i915_request_is_complete(rq)); 471 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, 472 &rq->fence.flags)) 473 continue; 474 475 list_del_rcu(&rq->signal_link); 476 irq_signal_request(rq, b); 477 i915_request_put(rq); 478 } 479 release = remove_signaling_context(b, ce); 480 481 unlock: 482 spin_unlock_irqrestore(&ce->signal_lock, flags); 483 if (release) 484 intel_context_put(ce); 485 486 while (atomic_read(&b->signaler_active)) 487 cpu_relax(); 488 } 489 490 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) 491 { 492 struct intel_context *ce; 493 struct i915_request *rq; 494 495 drm_printf(p, "Signals:\n"); 496 497 rcu_read_lock(); 498 list_for_each_entry_rcu(ce, &b->signalers, signal_link) { 499 list_for_each_entry_rcu(rq, &ce->signals, signal_link) 500 drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", 501 rq->fence.context, rq->fence.seqno, 502 __i915_request_is_complete(rq) ? "!" : 503 __i915_request_has_started(rq) ? "*" : 504 "", 505 jiffies_to_msecs(jiffies - rq->emitted_jiffies)); 506 } 507 rcu_read_unlock(); 508 } 509 510 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, 511 struct drm_printer *p) 512 { 513 struct intel_breadcrumbs *b; 514 515 b = engine->breadcrumbs; 516 if (!b) 517 return; 518 519 drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed)); 520 if (!list_empty(&b->signalers)) 521 print_signals(b, p); 522 } 523