1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2023 Intel Corporation
4 */
5
6 #include <drm/drm_managed.h>
7
8 #include "xe_device_types.h"
9 #include "xe_force_wake.h"
10 #include "xe_gt_stats.h"
11 #include "xe_gt_types.h"
12 #include "xe_guc_ct.h"
13 #include "xe_guc_tlb_inval.h"
14 #include "xe_mmio.h"
15 #include "xe_pm.h"
16 #include "xe_tlb_inval.h"
17 #include "xe_trace.h"
18
19 /**
20 * DOC: Xe TLB invalidation
21 *
22 * Xe TLB invalidation is implemented in two layers. The first is the frontend
23 * API, which provides an interface for TLB invalidations to the driver code.
24 * The frontend handles seqno assignment, synchronization (fences), and the
25 * timeout mechanism. The frontend is implemented via an embedded structure
26 * xe_tlb_inval that includes a set of ops hooking into the backend. The backend
27 * interacts with the hardware (or firmware) to perform the actual invalidation.
28 */
29
30 #define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
31
xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence * fence)32 static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence)
33 {
34 if (WARN_ON_ONCE(!fence->tlb_inval))
35 return;
36
37 xe_pm_runtime_put(fence->tlb_inval->xe);
38 fence->tlb_inval = NULL; /* fini() should be called once */
39 }
40
41 static void
xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence * fence)42 xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence)
43 {
44 bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
45
46 lockdep_assert_held(&fence->tlb_inval->pending_lock);
47
48 list_del(&fence->link);
49 trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence);
50 xe_tlb_inval_fence_fini(fence);
51 dma_fence_signal(&fence->base);
52 if (!stack)
53 dma_fence_put(&fence->base);
54 }
55
56 static void
xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence * fence)57 xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence *fence)
58 {
59 struct xe_tlb_inval *tlb_inval = fence->tlb_inval;
60
61 spin_lock_irq(&tlb_inval->pending_lock);
62 xe_tlb_inval_fence_signal(fence);
63 spin_unlock_irq(&tlb_inval->pending_lock);
64 }
65
xe_tlb_inval_fence_timeout(struct work_struct * work)66 static void xe_tlb_inval_fence_timeout(struct work_struct *work)
67 {
68 struct xe_tlb_inval *tlb_inval = container_of(work, struct xe_tlb_inval,
69 fence_tdr.work);
70 struct xe_device *xe = tlb_inval->xe;
71 struct xe_tlb_inval_fence *fence, *next;
72 long timeout_delay = tlb_inval->ops->timeout_delay(tlb_inval);
73
74 tlb_inval->ops->flush(tlb_inval);
75
76 spin_lock_irq(&tlb_inval->pending_lock);
77 list_for_each_entry_safe(fence, next,
78 &tlb_inval->pending_fences, link) {
79 s64 since_inval_ms = ktime_ms_delta(ktime_get(),
80 fence->inval_time);
81
82 if (msecs_to_jiffies(since_inval_ms) < timeout_delay)
83 break;
84
85 trace_xe_tlb_inval_fence_timeout(xe, fence);
86 drm_err(&xe->drm,
87 "TLB invalidation fence timeout, seqno=%d recv=%d",
88 fence->seqno, tlb_inval->seqno_recv);
89
90 fence->base.error = -ETIME;
91 xe_tlb_inval_fence_signal(fence);
92 }
93 if (!list_empty(&tlb_inval->pending_fences))
94 queue_delayed_work(tlb_inval->timeout_wq, &tlb_inval->fence_tdr,
95 timeout_delay);
96 spin_unlock_irq(&tlb_inval->pending_lock);
97 }
98
99 /**
100 * tlb_inval_fini - Clean up TLB invalidation state
101 * @drm: @drm_device
102 * @arg: pointer to struct @xe_tlb_inval
103 *
104 * Cancel pending fence workers and clean up any additional
105 * TLB invalidation state.
106 */
tlb_inval_fini(struct drm_device * drm,void * arg)107 static void tlb_inval_fini(struct drm_device *drm, void *arg)
108 {
109 struct xe_tlb_inval *tlb_inval = arg;
110
111 xe_tlb_inval_reset(tlb_inval);
112 }
113
114 /**
115 * xe_gt_tlb_inval_init_early() - Initialize TLB invalidation state
116 * @gt: GT structure
117 *
118 * Initialize TLB invalidation state, purely software initialization, should
119 * be called once during driver load.
120 *
121 * Return: 0 on success, negative error code on error.
122 */
xe_gt_tlb_inval_init_early(struct xe_gt * gt)123 int xe_gt_tlb_inval_init_early(struct xe_gt *gt)
124 {
125 struct xe_device *xe = gt_to_xe(gt);
126 struct xe_tlb_inval *tlb_inval = >->tlb_inval;
127 int err;
128
129 tlb_inval->xe = xe;
130 tlb_inval->seqno = 1;
131 INIT_LIST_HEAD(&tlb_inval->pending_fences);
132 spin_lock_init(&tlb_inval->pending_lock);
133 spin_lock_init(&tlb_inval->lock);
134 INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout);
135
136 err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock);
137 if (err)
138 return err;
139
140 tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm,
141 "gt-tbl-inval-job-wq",
142 WQ_MEM_RECLAIM);
143 if (IS_ERR(tlb_inval->job_wq))
144 return PTR_ERR(tlb_inval->job_wq);
145
146 tlb_inval->timeout_wq = gt->ordered_wq;
147 if (IS_ERR(tlb_inval->timeout_wq))
148 return PTR_ERR(tlb_inval->timeout_wq);
149
150 /* XXX: Blindly setting up backend to GuC */
151 xe_guc_tlb_inval_init_early(>->uc.guc, tlb_inval);
152
153 return drmm_add_action_or_reset(&xe->drm, tlb_inval_fini, tlb_inval);
154 }
155
156 /**
157 * xe_tlb_inval_reset() - TLB invalidation reset
158 * @tlb_inval: TLB invalidation client
159 *
160 * Signal any pending invalidation fences, should be called during a GT reset
161 */
xe_tlb_inval_reset(struct xe_tlb_inval * tlb_inval)162 void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval)
163 {
164 struct xe_tlb_inval_fence *fence, *next;
165 int pending_seqno;
166
167 /*
168 * we can get here before the backends are even initialized if we're
169 * wedging very early, in which case there are not going to be any
170 * pendind fences so we can bail immediately.
171 */
172 if (!tlb_inval->ops->initialized(tlb_inval))
173 return;
174
175 /*
176 * Backend is already disabled at this point. No new TLB requests can
177 * appear.
178 */
179
180 mutex_lock(&tlb_inval->seqno_lock);
181 spin_lock_irq(&tlb_inval->pending_lock);
182 cancel_delayed_work(&tlb_inval->fence_tdr);
183 /*
184 * We might have various kworkers waiting for TLB flushes to complete
185 * which are not tracked with an explicit TLB fence, however at this
186 * stage that will never happen since the backend is already disabled,
187 * so make sure we signal them here under the assumption that we have
188 * completed a full GT reset.
189 */
190 if (tlb_inval->seqno == 1)
191 pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
192 else
193 pending_seqno = tlb_inval->seqno - 1;
194 WRITE_ONCE(tlb_inval->seqno_recv, pending_seqno);
195
196 list_for_each_entry_safe(fence, next,
197 &tlb_inval->pending_fences, link)
198 xe_tlb_inval_fence_signal(fence);
199 spin_unlock_irq(&tlb_inval->pending_lock);
200 mutex_unlock(&tlb_inval->seqno_lock);
201 }
202
203 /**
204 * xe_tlb_inval_reset_timeout() - Reset TLB inval fence timeout
205 * @tlb_inval: TLB invalidation client
206 *
207 * Reset the TLB invalidation timeout timer.
208 */
xe_tlb_inval_reset_timeout(struct xe_tlb_inval * tlb_inval)209 static void xe_tlb_inval_reset_timeout(struct xe_tlb_inval *tlb_inval)
210 {
211 lockdep_assert_held(&tlb_inval->pending_lock);
212
213 mod_delayed_work(system_wq, &tlb_inval->fence_tdr,
214 tlb_inval->ops->timeout_delay(tlb_inval));
215 }
216
xe_tlb_inval_seqno_past(struct xe_tlb_inval * tlb_inval,int seqno)217 static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno)
218 {
219 int seqno_recv = READ_ONCE(tlb_inval->seqno_recv);
220
221 lockdep_assert_held(&tlb_inval->pending_lock);
222
223 if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
224 return false;
225
226 if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
227 return true;
228
229 return seqno_recv >= seqno;
230 }
231
xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence * fence)232 static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence)
233 {
234 struct xe_tlb_inval *tlb_inval = fence->tlb_inval;
235
236 fence->seqno = tlb_inval->seqno;
237 trace_xe_tlb_inval_fence_send(tlb_inval->xe, fence);
238
239 spin_lock_irq(&tlb_inval->pending_lock);
240 fence->inval_time = ktime_get();
241 list_add_tail(&fence->link, &tlb_inval->pending_fences);
242
243 if (list_is_singular(&tlb_inval->pending_fences))
244 queue_delayed_work(tlb_inval->timeout_wq, &tlb_inval->fence_tdr,
245 tlb_inval->ops->timeout_delay(tlb_inval));
246 spin_unlock_irq(&tlb_inval->pending_lock);
247
248 tlb_inval->seqno = (tlb_inval->seqno + 1) %
249 TLB_INVALIDATION_SEQNO_MAX;
250 if (!tlb_inval->seqno)
251 tlb_inval->seqno = 1;
252 }
253
254 #define xe_tlb_inval_issue(__tlb_inval, __fence, op, args...) \
255 ({ \
256 int __ret; \
257 \
258 xe_assert((__tlb_inval)->xe, (__tlb_inval)->ops); \
259 xe_assert((__tlb_inval)->xe, (__fence)); \
260 \
261 mutex_lock(&(__tlb_inval)->seqno_lock); \
262 xe_tlb_inval_fence_prep((__fence)); \
263 __ret = op((__tlb_inval), (__fence)->seqno, ##args); \
264 if (__ret < 0) \
265 xe_tlb_inval_fence_signal_unlocked((__fence)); \
266 mutex_unlock(&(__tlb_inval)->seqno_lock); \
267 \
268 __ret == -ECANCELED ? 0 : __ret; \
269 })
270
271 /**
272 * xe_tlb_inval_all() - Issue a TLB invalidation for all TLBs
273 * @tlb_inval: TLB invalidation client
274 * @fence: invalidation fence which will be signal on TLB invalidation
275 * completion
276 *
277 * Issue a TLB invalidation for all TLBs. Completion of TLB is asynchronous and
278 * caller can use the invalidation fence to wait for completion.
279 *
280 * Return: 0 on success, negative error code on error
281 */
xe_tlb_inval_all(struct xe_tlb_inval * tlb_inval,struct xe_tlb_inval_fence * fence)282 int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval,
283 struct xe_tlb_inval_fence *fence)
284 {
285 return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->all);
286 }
287
288 /**
289 * xe_tlb_inval_ggtt() - Issue a TLB invalidation for the GGTT
290 * @tlb_inval: TLB invalidation client
291 *
292 * Issue a TLB invalidation for the GGTT. Completion of TLB is asynchronous and
293 * caller can use the invalidation fence to wait for completion.
294 *
295 * Return: 0 on success, negative error code on error
296 */
xe_tlb_inval_ggtt(struct xe_tlb_inval * tlb_inval)297 int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval)
298 {
299 struct xe_tlb_inval_fence fence, *fence_ptr = &fence;
300 int ret;
301
302 xe_tlb_inval_fence_init(tlb_inval, fence_ptr, true);
303 ret = xe_tlb_inval_issue(tlb_inval, fence_ptr, tlb_inval->ops->ggtt);
304 xe_tlb_inval_fence_wait(fence_ptr);
305
306 return ret;
307 }
308
309 /**
310 * xe_tlb_inval_range() - Issue a TLB invalidation for an address range
311 * @tlb_inval: TLB invalidation client
312 * @fence: invalidation fence which will be signal on TLB invalidation
313 * completion
314 * @start: start address
315 * @end: end address
316 * @asid: address space id
317 * @prl_sa: suballocation of page reclaim list if used, NULL indicates PPC flush
318 *
319 * Issue a range based TLB invalidation if supported, if not fallback to a full
320 * TLB invalidation. Completion of TLB is asynchronous and caller can use
321 * the invalidation fence to wait for completion.
322 *
323 * Return: Negative error code on error, 0 on success
324 */
xe_tlb_inval_range(struct xe_tlb_inval * tlb_inval,struct xe_tlb_inval_fence * fence,u64 start,u64 end,u32 asid,struct drm_suballoc * prl_sa)325 int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
326 struct xe_tlb_inval_fence *fence, u64 start, u64 end,
327 u32 asid, struct drm_suballoc *prl_sa)
328 {
329 return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt,
330 start, end, asid, prl_sa);
331 }
332
333 /**
334 * xe_tlb_inval_vm() - Issue a TLB invalidation for a VM
335 * @tlb_inval: TLB invalidation client
336 * @vm: VM to invalidate
337 *
338 * Invalidate entire VM's address space
339 */
xe_tlb_inval_vm(struct xe_tlb_inval * tlb_inval,struct xe_vm * vm)340 void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm)
341 {
342 struct xe_tlb_inval_fence fence;
343 u64 range = 1ull << vm->xe->info.va_bits;
344
345 xe_tlb_inval_fence_init(tlb_inval, &fence, true);
346 xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid, NULL);
347 xe_tlb_inval_fence_wait(&fence);
348 }
349
350 /**
351 * xe_tlb_inval_done_handler() - TLB invalidation done handler
352 * @tlb_inval: TLB invalidation client
353 * @seqno: seqno of invalidation that is done
354 *
355 * Update recv seqno, signal any TLB invalidation fences, and restart TDR
356 */
xe_tlb_inval_done_handler(struct xe_tlb_inval * tlb_inval,int seqno)357 void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno)
358 {
359 struct xe_device *xe = tlb_inval->xe;
360 struct xe_tlb_inval_fence *fence, *next;
361 unsigned long flags;
362
363 /*
364 * This can also be run both directly from the IRQ handler and also in
365 * process_g2h_msg(). Only one may process any individual CT message,
366 * however the order they are processed here could result in skipping a
367 * seqno. To handle that we just process all the seqnos from the last
368 * seqno_recv up to and including the one in msg[0]. The delta should be
369 * very small so there shouldn't be much of pending_fences we actually
370 * need to iterate over here.
371 *
372 * From GuC POV we expect the seqnos to always appear in-order, so if we
373 * see something later in the timeline we can be sure that anything
374 * appearing earlier has already signalled, just that we have yet to
375 * officially process the CT message like if racing against
376 * process_g2h_msg().
377 */
378 spin_lock_irqsave(&tlb_inval->pending_lock, flags);
379 if (seqno == TLB_INVALIDATION_SEQNO_INVALID) {
380 xe_tlb_inval_reset_timeout(tlb_inval);
381 spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
382 return;
383 }
384
385 if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) {
386 spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
387 return;
388 }
389
390 WRITE_ONCE(tlb_inval->seqno_recv, seqno);
391
392 list_for_each_entry_safe(fence, next,
393 &tlb_inval->pending_fences, link) {
394 trace_xe_tlb_inval_fence_recv(xe, fence);
395
396 if (!xe_tlb_inval_seqno_past(tlb_inval, fence->seqno))
397 break;
398
399 xe_tlb_inval_fence_signal(fence);
400 }
401
402 if (!list_empty(&tlb_inval->pending_fences))
403 mod_delayed_work(tlb_inval->timeout_wq,
404 &tlb_inval->fence_tdr,
405 tlb_inval->ops->timeout_delay(tlb_inval));
406 else
407 cancel_delayed_work(&tlb_inval->fence_tdr);
408
409 spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
410 }
411
412 static const char *
xe_inval_fence_get_driver_name(struct dma_fence * dma_fence)413 xe_inval_fence_get_driver_name(struct dma_fence *dma_fence)
414 {
415 return "xe";
416 }
417
418 static const char *
xe_inval_fence_get_timeline_name(struct dma_fence * dma_fence)419 xe_inval_fence_get_timeline_name(struct dma_fence *dma_fence)
420 {
421 return "tlb_inval_fence";
422 }
423
424 static const struct dma_fence_ops inval_fence_ops = {
425 .get_driver_name = xe_inval_fence_get_driver_name,
426 .get_timeline_name = xe_inval_fence_get_timeline_name,
427 };
428
429 /**
430 * xe_tlb_inval_fence_init() - Initialize TLB invalidation fence
431 * @tlb_inval: TLB invalidation client
432 * @fence: TLB invalidation fence to initialize
433 * @stack: fence is stack variable
434 *
435 * Initialize TLB invalidation fence for use. xe_tlb_inval_fence_fini
436 * will be automatically called when fence is signalled (all fences must signal),
437 * even on error.
438 */
xe_tlb_inval_fence_init(struct xe_tlb_inval * tlb_inval,struct xe_tlb_inval_fence * fence,bool stack)439 void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval,
440 struct xe_tlb_inval_fence *fence,
441 bool stack)
442 {
443 xe_pm_runtime_get_noresume(tlb_inval->xe);
444
445 spin_lock_irq(&tlb_inval->lock);
446 dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock,
447 dma_fence_context_alloc(1), 1);
448 spin_unlock_irq(&tlb_inval->lock);
449 INIT_LIST_HEAD(&fence->link);
450 if (stack)
451 set_bit(FENCE_STACK_BIT, &fence->base.flags);
452 else
453 dma_fence_get(&fence->base);
454 fence->tlb_inval = tlb_inval;
455 }
456