1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2015-2021 Intel Corporation
4 */
5
6 #include <linux/kthread.h>
7 #include <linux/string_helpers.h>
8 #include <trace/events/dma_fence.h>
9 #include <uapi/linux/sched/types.h>
10
11 #include <drm/drm_print.h>
12
13 #include "i915_drv.h"
14 #include "i915_trace.h"
15 #include "intel_breadcrumbs.h"
16 #include "intel_context.h"
17 #include "intel_engine_pm.h"
18 #include "intel_gt_pm.h"
19 #include "intel_gt_requests.h"
20
irq_enable(struct intel_breadcrumbs * b)21 static bool irq_enable(struct intel_breadcrumbs *b)
22 {
23 return intel_engine_irq_enable(b->irq_engine);
24 }
25
irq_disable(struct intel_breadcrumbs * b)26 static void irq_disable(struct intel_breadcrumbs *b)
27 {
28 intel_engine_irq_disable(b->irq_engine);
29 }
30
__intel_breadcrumbs_arm_irq(struct intel_breadcrumbs * b)31 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
32 {
33 intel_wakeref_t wakeref;
34
35 /*
36 * Since we are waiting on a request, the GPU should be busy
37 * and should have its own rpm reference.
38 */
39 wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt);
40 if (GEM_WARN_ON(!wakeref))
41 return;
42
43 /*
44 * The breadcrumb irq will be disarmed on the interrupt after the
45 * waiters are signaled. This gives us a single interrupt window in
46 * which we can add a new waiter and avoid the cost of re-enabling
47 * the irq.
48 */
49 WRITE_ONCE(b->irq_armed, wakeref);
50
51 /* Requests may have completed before we could enable the interrupt. */
52 if (!b->irq_enabled++ && b->irq_enable(b))
53 irq_work_queue(&b->irq_work);
54 }
55
intel_breadcrumbs_arm_irq(struct intel_breadcrumbs * b)56 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
57 {
58 if (!b->irq_engine)
59 return;
60
61 spin_lock(&b->irq_lock);
62 if (!b->irq_armed)
63 __intel_breadcrumbs_arm_irq(b);
64 spin_unlock(&b->irq_lock);
65 }
66
__intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs * b)67 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
68 {
69 intel_wakeref_t wakeref = b->irq_armed;
70
71 GEM_BUG_ON(!b->irq_enabled);
72 if (!--b->irq_enabled)
73 b->irq_disable(b);
74
75 WRITE_ONCE(b->irq_armed, NULL);
76 intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
77 }
78
intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs * b)79 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
80 {
81 spin_lock(&b->irq_lock);
82 if (b->irq_armed)
83 __intel_breadcrumbs_disarm_irq(b);
84 spin_unlock(&b->irq_lock);
85 }
86
add_signaling_context(struct intel_breadcrumbs * b,struct intel_context * ce)87 static void add_signaling_context(struct intel_breadcrumbs *b,
88 struct intel_context *ce)
89 {
90 lockdep_assert_held(&ce->signal_lock);
91
92 spin_lock(&b->signalers_lock);
93 list_add_rcu(&ce->signal_link, &b->signalers);
94 spin_unlock(&b->signalers_lock);
95 }
96
remove_signaling_context(struct intel_breadcrumbs * b,struct intel_context * ce)97 static bool remove_signaling_context(struct intel_breadcrumbs *b,
98 struct intel_context *ce)
99 {
100 lockdep_assert_held(&ce->signal_lock);
101
102 if (!list_empty(&ce->signals))
103 return false;
104
105 spin_lock(&b->signalers_lock);
106 list_del_rcu(&ce->signal_link);
107 spin_unlock(&b->signalers_lock);
108
109 return true;
110 }
111
112 __maybe_unused static bool
check_signal_order(struct intel_context * ce,struct i915_request * rq)113 check_signal_order(struct intel_context *ce, struct i915_request *rq)
114 {
115 if (rq->context != ce)
116 return false;
117
118 if (!list_is_last(&rq->signal_link, &ce->signals) &&
119 i915_seqno_passed(rq->fence.seqno,
120 list_next_entry(rq, signal_link)->fence.seqno))
121 return false;
122
123 if (!list_is_first(&rq->signal_link, &ce->signals) &&
124 i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
125 rq->fence.seqno))
126 return false;
127
128 return true;
129 }
130
131 static bool
__dma_fence_signal(struct dma_fence * fence)132 __dma_fence_signal(struct dma_fence *fence)
133 {
134 return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
135 }
136
137 static void
__dma_fence_signal__timestamp(struct dma_fence * fence,ktime_t timestamp)138 __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
139 {
140 fence->timestamp = timestamp;
141 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
142 trace_dma_fence_signaled(fence);
143 }
144
145 static void
__dma_fence_signal__notify(struct dma_fence * fence,const struct list_head * list)146 __dma_fence_signal__notify(struct dma_fence *fence,
147 const struct list_head *list)
148 {
149 struct dma_fence_cb *cur, *tmp;
150
151 lockdep_assert_held(fence->lock);
152
153 list_for_each_entry_safe(cur, tmp, list, node) {
154 INIT_LIST_HEAD(&cur->node);
155 cur->func(fence, cur);
156 }
157 }
158
add_retire(struct intel_breadcrumbs * b,struct intel_timeline * tl)159 static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
160 {
161 if (b->irq_engine)
162 intel_engine_add_retire(b->irq_engine, tl);
163 }
164
165 static struct llist_node *
slist_add(struct llist_node * node,struct llist_node * head)166 slist_add(struct llist_node *node, struct llist_node *head)
167 {
168 node->next = head;
169 return node;
170 }
171
signal_irq_work(struct irq_work * work)172 static void signal_irq_work(struct irq_work *work)
173 {
174 struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
175 const ktime_t timestamp = ktime_get();
176 struct llist_node *signal, *sn;
177 struct intel_context *ce;
178
179 signal = NULL;
180 if (unlikely(!llist_empty(&b->signaled_requests)))
181 signal = llist_del_all(&b->signaled_requests);
182
183 /*
184 * Keep the irq armed until the interrupt after all listeners are gone.
185 *
186 * Enabling/disabling the interrupt is rather costly, roughly a couple
187 * of hundred microseconds. If we are proactive and enable/disable
188 * the interrupt around every request that wants a breadcrumb, we
189 * quickly drown in the extra orders of magnitude of latency imposed
190 * on request submission.
191 *
192 * So we try to be lazy, and keep the interrupts enabled until no
193 * more listeners appear within a breadcrumb interrupt interval (that
194 * is until a request completes that no one cares about). The
195 * observation is that listeners come in batches, and will often
196 * listen to a bunch of requests in succession. Though note on icl+,
197 * interrupts are always enabled due to concerns with rc6 being
198 * dysfunctional with per-engine interrupt masking.
199 *
200 * We also try to avoid raising too many interrupts, as they may
201 * be generated by userspace batches and it is unfortunately rather
202 * too easy to drown the CPU under a flood of GPU interrupts. Thus
203 * whenever no one appears to be listening, we turn off the interrupts.
204 * Fewer interrupts should conserve power -- at the very least, fewer
205 * interrupt draw less ire from other users of the system and tools
206 * like powertop.
207 */
208 if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
209 intel_breadcrumbs_disarm_irq(b);
210
211 rcu_read_lock();
212 atomic_inc(&b->signaler_active);
213 list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
214 struct i915_request *rq;
215
216 list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
217 bool release;
218
219 if (!__i915_request_is_complete(rq))
220 break;
221
222 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
223 &rq->fence.flags))
224 break;
225
226 /*
227 * Queue for execution after dropping the signaling
228 * spinlock as the callback chain may end up adding
229 * more signalers to the same context or engine.
230 */
231 spin_lock(&ce->signal_lock);
232 list_del_rcu(&rq->signal_link);
233 release = remove_signaling_context(b, ce);
234 spin_unlock(&ce->signal_lock);
235 if (release) {
236 if (intel_timeline_is_last(ce->timeline, rq))
237 add_retire(b, ce->timeline);
238 intel_context_put(ce);
239 }
240
241 if (__dma_fence_signal(&rq->fence))
242 /* We own signal_node now, xfer to local list */
243 signal = slist_add(&rq->signal_node, signal);
244 else
245 i915_request_put(rq);
246 }
247 }
248 atomic_dec(&b->signaler_active);
249 rcu_read_unlock();
250
251 llist_for_each_safe(signal, sn, signal) {
252 struct i915_request *rq =
253 llist_entry(signal, typeof(*rq), signal_node);
254 struct list_head cb_list;
255
256 if (rq->engine->sched_engine->retire_inflight_request_prio)
257 rq->engine->sched_engine->retire_inflight_request_prio(rq);
258
259 spin_lock(&rq->lock);
260 list_replace(&rq->fence.cb_list, &cb_list);
261 __dma_fence_signal__timestamp(&rq->fence, timestamp);
262 __dma_fence_signal__notify(&rq->fence, &cb_list);
263 spin_unlock(&rq->lock);
264
265 i915_request_put(rq);
266 }
267
268 /* Lazy irq enabling after HW submission */
269 if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
270 intel_breadcrumbs_arm_irq(b);
271
272 /* And confirm that we still want irqs enabled before we yield */
273 if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active))
274 intel_breadcrumbs_disarm_irq(b);
275 }
276
277 struct intel_breadcrumbs *
intel_breadcrumbs_create(struct intel_engine_cs * irq_engine)278 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
279 {
280 struct intel_breadcrumbs *b;
281
282 b = kzalloc_obj(*b);
283 if (!b)
284 return NULL;
285
286 kref_init(&b->ref);
287
288 spin_lock_init(&b->signalers_lock);
289 INIT_LIST_HEAD(&b->signalers);
290 init_llist_head(&b->signaled_requests);
291
292 spin_lock_init(&b->irq_lock);
293 init_irq_work(&b->irq_work, signal_irq_work);
294
295 b->irq_engine = irq_engine;
296 b->irq_enable = irq_enable;
297 b->irq_disable = irq_disable;
298
299 return b;
300 }
301
intel_breadcrumbs_reset(struct intel_breadcrumbs * b)302 void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
303 {
304 unsigned long flags;
305
306 if (!b->irq_engine)
307 return;
308
309 spin_lock_irqsave(&b->irq_lock, flags);
310
311 if (b->irq_enabled)
312 b->irq_enable(b);
313 else
314 b->irq_disable(b);
315
316 spin_unlock_irqrestore(&b->irq_lock, flags);
317 }
318
__intel_breadcrumbs_park(struct intel_breadcrumbs * b)319 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
320 {
321 if (!READ_ONCE(b->irq_armed))
322 return;
323
324 /* Kick the work once more to drain the signalers, and disarm the irq */
325 irq_work_queue(&b->irq_work);
326 }
327
intel_breadcrumbs_free(struct kref * kref)328 void intel_breadcrumbs_free(struct kref *kref)
329 {
330 struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref);
331
332 irq_work_sync(&b->irq_work);
333 GEM_BUG_ON(!list_empty(&b->signalers));
334 GEM_BUG_ON(b->irq_armed);
335
336 kfree(b);
337 }
338
irq_signal_request(struct i915_request * rq,struct intel_breadcrumbs * b)339 static void irq_signal_request(struct i915_request *rq,
340 struct intel_breadcrumbs *b)
341 {
342 if (!__dma_fence_signal(&rq->fence))
343 return;
344
345 i915_request_get(rq);
346 if (llist_add(&rq->signal_node, &b->signaled_requests))
347 irq_work_queue(&b->irq_work);
348 }
349
insert_breadcrumb(struct i915_request * rq)350 static void insert_breadcrumb(struct i915_request *rq)
351 {
352 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
353 struct intel_context *ce = rq->context;
354 struct list_head *pos;
355
356 if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
357 return;
358
359 /*
360 * If the request is already completed, we can transfer it
361 * straight onto a signaled list, and queue the irq worker for
362 * its signal completion.
363 */
364 if (__i915_request_is_complete(rq)) {
365 irq_signal_request(rq, b);
366 return;
367 }
368
369 if (list_empty(&ce->signals)) {
370 intel_context_get(ce);
371 add_signaling_context(b, ce);
372 pos = &ce->signals;
373 } else {
374 /*
375 * We keep the seqno in retirement order, so we can break
376 * inside intel_engine_signal_breadcrumbs as soon as we've
377 * passed the last completed request (or seen a request that
378 * hasn't event started). We could walk the timeline->requests,
379 * but keeping a separate signalers_list has the advantage of
380 * hopefully being much smaller than the full list and so
381 * provides faster iteration and detection when there are no
382 * more interrupts required for this context.
383 *
384 * We typically expect to add new signalers in order, so we
385 * start looking for our insertion point from the tail of
386 * the list.
387 */
388 list_for_each_prev(pos, &ce->signals) {
389 struct i915_request *it =
390 list_entry(pos, typeof(*it), signal_link);
391
392 if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
393 break;
394 }
395 }
396
397 i915_request_get(rq);
398 list_add_rcu(&rq->signal_link, pos);
399 GEM_BUG_ON(!check_signal_order(ce, rq));
400 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
401 set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
402
403 /*
404 * Defer enabling the interrupt to after HW submission and recheck
405 * the request as it may have completed and raised the interrupt as
406 * we were attaching it into the lists.
407 */
408 if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq))
409 irq_work_queue(&b->irq_work);
410 }
411
i915_request_enable_breadcrumb(struct i915_request * rq)412 bool i915_request_enable_breadcrumb(struct i915_request *rq)
413 {
414 struct intel_context *ce = rq->context;
415
416 /* Serialises with i915_request_retire() using rq->lock */
417 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
418 return true;
419
420 /*
421 * Peek at i915_request_submit()/i915_request_unsubmit() status.
422 *
423 * If the request is not yet active (and not signaled), we will
424 * attach the breadcrumb later.
425 */
426 if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
427 return true;
428
429 spin_lock(&ce->signal_lock);
430 if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
431 insert_breadcrumb(rq);
432 spin_unlock(&ce->signal_lock);
433
434 return true;
435 }
436
i915_request_cancel_breadcrumb(struct i915_request * rq)437 void i915_request_cancel_breadcrumb(struct i915_request *rq)
438 {
439 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
440 struct intel_context *ce = rq->context;
441 bool release;
442
443 spin_lock(&ce->signal_lock);
444 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
445 spin_unlock(&ce->signal_lock);
446 return;
447 }
448
449 list_del_rcu(&rq->signal_link);
450 release = remove_signaling_context(b, ce);
451 spin_unlock(&ce->signal_lock);
452 if (release)
453 intel_context_put(ce);
454
455 if (__i915_request_is_complete(rq))
456 irq_signal_request(rq, b);
457
458 i915_request_put(rq);
459 }
460
intel_context_remove_breadcrumbs(struct intel_context * ce,struct intel_breadcrumbs * b)461 void intel_context_remove_breadcrumbs(struct intel_context *ce,
462 struct intel_breadcrumbs *b)
463 {
464 struct i915_request *rq, *rn;
465 bool release = false;
466 unsigned long flags;
467
468 spin_lock_irqsave(&ce->signal_lock, flags);
469
470 if (list_empty(&ce->signals))
471 goto unlock;
472
473 list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
474 GEM_BUG_ON(!__i915_request_is_complete(rq));
475 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
476 &rq->fence.flags))
477 continue;
478
479 list_del_rcu(&rq->signal_link);
480 irq_signal_request(rq, b);
481 i915_request_put(rq);
482 }
483 release = remove_signaling_context(b, ce);
484
485 unlock:
486 spin_unlock_irqrestore(&ce->signal_lock, flags);
487 if (release)
488 intel_context_put(ce);
489
490 while (atomic_read(&b->signaler_active))
491 cpu_relax();
492 }
493
print_signals(struct intel_breadcrumbs * b,struct drm_printer * p)494 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
495 {
496 struct intel_context *ce;
497 struct i915_request *rq;
498
499 drm_printf(p, "Signals:\n");
500
501 rcu_read_lock();
502 list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
503 list_for_each_entry_rcu(rq, &ce->signals, signal_link)
504 drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
505 rq->fence.context, rq->fence.seqno,
506 __i915_request_is_complete(rq) ? "!" :
507 __i915_request_has_started(rq) ? "*" :
508 "",
509 jiffies_to_msecs(jiffies - rq->emitted_jiffies));
510 }
511 rcu_read_unlock();
512 }
513
intel_engine_print_breadcrumbs(struct intel_engine_cs * engine,struct drm_printer * p)514 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
515 struct drm_printer *p)
516 {
517 struct intel_breadcrumbs *b;
518
519 b = engine->breadcrumbs;
520 if (!b)
521 return;
522
523 drm_printf(p, "IRQ: %s\n", str_enabled_disabled(b->irq_armed));
524 if (!list_empty(&b->signalers))
525 print_signals(b, p);
526 }
527