xref: /linux/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5 
6 #include "xe_gt_tlb_invalidation.h"
7 
8 #include "abi/guc_actions_abi.h"
9 #include "xe_device.h"
10 #include "xe_gt.h"
11 #include "xe_gt_printk.h"
12 #include "xe_guc.h"
13 #include "xe_guc_ct.h"
14 #include "xe_trace.h"
15 
16 #define TLB_TIMEOUT	(HZ / 4)
17 
18 static void xe_gt_tlb_fence_timeout(struct work_struct *work)
19 {
20 	struct xe_gt *gt = container_of(work, struct xe_gt,
21 					tlb_invalidation.fence_tdr.work);
22 	struct xe_gt_tlb_invalidation_fence *fence, *next;
23 
24 	spin_lock_irq(&gt->tlb_invalidation.pending_lock);
25 	list_for_each_entry_safe(fence, next,
26 				 &gt->tlb_invalidation.pending_fences, link) {
27 		s64 since_inval_ms = ktime_ms_delta(ktime_get(),
28 						    fence->invalidation_time);
29 
30 		if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
31 			break;
32 
33 		trace_xe_gt_tlb_invalidation_fence_timeout(fence);
34 		xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
35 			  fence->seqno, gt->tlb_invalidation.seqno_recv);
36 
37 		list_del(&fence->link);
38 		fence->base.error = -ETIME;
39 		dma_fence_signal(&fence->base);
40 		dma_fence_put(&fence->base);
41 	}
42 	if (!list_empty(&gt->tlb_invalidation.pending_fences))
43 		queue_delayed_work(system_wq,
44 				   &gt->tlb_invalidation.fence_tdr,
45 				   TLB_TIMEOUT);
46 	spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
47 }
48 
49 /**
50  * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
51  * @gt: graphics tile
52  *
53  * Initialize GT TLB invalidation state, purely software initialization, should
54  * be called once during driver load.
55  *
56  * Return: 0 on success, negative error code on error.
57  */
58 int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
59 {
60 	gt->tlb_invalidation.seqno = 1;
61 	INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
62 	spin_lock_init(&gt->tlb_invalidation.pending_lock);
63 	spin_lock_init(&gt->tlb_invalidation.lock);
64 	gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
65 	INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
66 			  xe_gt_tlb_fence_timeout);
67 
68 	return 0;
69 }
70 
71 static void
72 __invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
73 {
74 	trace_xe_gt_tlb_invalidation_fence_signal(fence);
75 	dma_fence_signal(&fence->base);
76 	dma_fence_put(&fence->base);
77 }
78 
79 static void
80 invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
81 {
82 	list_del(&fence->link);
83 	__invalidation_fence_signal(fence);
84 }
85 
86 /**
87  * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
88  * @gt: graphics tile
89  *
90  * Signal any pending invalidation fences, should be called during a GT reset
91  */
92 void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
93 {
94 	struct xe_gt_tlb_invalidation_fence *fence, *next;
95 	struct xe_guc *guc = &gt->uc.guc;
96 	int pending_seqno;
97 
98 	/*
99 	 * CT channel is already disabled at this point. No new TLB requests can
100 	 * appear.
101 	 */
102 
103 	mutex_lock(&gt->uc.guc.ct.lock);
104 	spin_lock_irq(&gt->tlb_invalidation.pending_lock);
105 	cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
106 	/*
107 	 * We might have various kworkers waiting for TLB flushes to complete
108 	 * which are not tracked with an explicit TLB fence, however at this
109 	 * stage that will never happen since the CT is already disabled, so
110 	 * make sure we signal them here under the assumption that we have
111 	 * completed a full GT reset.
112 	 */
113 	if (gt->tlb_invalidation.seqno == 1)
114 		pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
115 	else
116 		pending_seqno = gt->tlb_invalidation.seqno - 1;
117 	WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
118 	wake_up_all(&guc->ct.wq);
119 
120 	list_for_each_entry_safe(fence, next,
121 				 &gt->tlb_invalidation.pending_fences, link)
122 		invalidation_fence_signal(fence);
123 	spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
124 	mutex_unlock(&gt->uc.guc.ct.lock);
125 }
126 
127 static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
128 {
129 	int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv);
130 
131 	if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
132 		return false;
133 
134 	if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
135 		return true;
136 
137 	return seqno_recv >= seqno;
138 }
139 
140 static int send_tlb_invalidation(struct xe_guc *guc,
141 				 struct xe_gt_tlb_invalidation_fence *fence,
142 				 u32 *action, int len)
143 {
144 	struct xe_gt *gt = guc_to_gt(guc);
145 	int seqno;
146 	int ret;
147 
148 	/*
149 	 * XXX: The seqno algorithm relies on TLB invalidation being processed
150 	 * in order which they currently are, if that changes the algorithm will
151 	 * need to be updated.
152 	 */
153 
154 	mutex_lock(&guc->ct.lock);
155 	seqno = gt->tlb_invalidation.seqno;
156 	if (fence) {
157 		fence->seqno = seqno;
158 		trace_xe_gt_tlb_invalidation_fence_send(fence);
159 	}
160 	action[1] = seqno;
161 	ret = xe_guc_ct_send_locked(&guc->ct, action, len,
162 				    G2H_LEN_DW_TLB_INVALIDATE, 1);
163 	if (!ret && fence) {
164 		spin_lock_irq(&gt->tlb_invalidation.pending_lock);
165 		/*
166 		 * We haven't actually published the TLB fence as per
167 		 * pending_fences, but in theory our seqno could have already
168 		 * been written as we acquired the pending_lock. In such a case
169 		 * we can just go ahead and signal the fence here.
170 		 */
171 		if (tlb_invalidation_seqno_past(gt, seqno)) {
172 			__invalidation_fence_signal(fence);
173 		} else {
174 			fence->invalidation_time = ktime_get();
175 			list_add_tail(&fence->link,
176 				      &gt->tlb_invalidation.pending_fences);
177 
178 			if (list_is_singular(&gt->tlb_invalidation.pending_fences))
179 				queue_delayed_work(system_wq,
180 						   &gt->tlb_invalidation.fence_tdr,
181 						   TLB_TIMEOUT);
182 		}
183 		spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
184 	} else if (ret < 0 && fence) {
185 		__invalidation_fence_signal(fence);
186 	}
187 	if (!ret) {
188 		gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
189 			TLB_INVALIDATION_SEQNO_MAX;
190 		if (!gt->tlb_invalidation.seqno)
191 			gt->tlb_invalidation.seqno = 1;
192 		ret = seqno;
193 	}
194 	mutex_unlock(&guc->ct.lock);
195 
196 	return ret;
197 }
198 
199 #define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
200 		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
201 		XE_GUC_TLB_INVAL_FLUSH_CACHE)
202 
203 /**
204  * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
205  * @gt: graphics tile
206  *
207  * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
208  * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
209  *
210  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
211  * negative error code on error.
212  */
213 int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
214 {
215 	u32 action[] = {
216 		XE_GUC_ACTION_TLB_INVALIDATION,
217 		0,  /* seqno, replaced in send_tlb_invalidation */
218 		MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
219 	};
220 
221 	return send_tlb_invalidation(&gt->uc.guc, NULL, action,
222 				     ARRAY_SIZE(action));
223 }
224 
225 /**
226  * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
227  * @gt: graphics tile
228  * @fence: invalidation fence which will be signal on TLB invalidation
229  * completion, can be NULL
230  * @vma: VMA to invalidate
231  *
232  * Issue a range based TLB invalidation if supported, if not fallback to a full
233  * TLB invalidation. Completion of TLB is asynchronous and caller can either use
234  * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
235  * completion.
236  *
237  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
238  * negative error code on error.
239  */
240 int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
241 			       struct xe_gt_tlb_invalidation_fence *fence,
242 			       struct xe_vma *vma)
243 {
244 	struct xe_device *xe = gt_to_xe(gt);
245 #define MAX_TLB_INVALIDATION_LEN	7
246 	u32 action[MAX_TLB_INVALIDATION_LEN];
247 	int len = 0;
248 
249 	xe_gt_assert(gt, vma);
250 
251 	/* Execlists not supported */
252 	if (gt_to_xe(gt)->info.force_execlist) {
253 		if (fence)
254 			__invalidation_fence_signal(fence);
255 
256 		return 0;
257 	}
258 
259 	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
260 	action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
261 	if (!xe->info.has_range_tlb_invalidation) {
262 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
263 	} else {
264 		u64 start = xe_vma_start(vma);
265 		u64 length = xe_vma_size(vma);
266 		u64 align, end;
267 
268 		if (length < SZ_4K)
269 			length = SZ_4K;
270 
271 		/*
272 		 * We need to invalidate a higher granularity if start address
273 		 * is not aligned to length. When start is not aligned with
274 		 * length we need to find the length large enough to create an
275 		 * address mask covering the required range.
276 		 */
277 		align = roundup_pow_of_two(length);
278 		start = ALIGN_DOWN(xe_vma_start(vma), align);
279 		end = ALIGN(xe_vma_end(vma), align);
280 		length = align;
281 		while (start + length < end) {
282 			length <<= 1;
283 			start = ALIGN_DOWN(xe_vma_start(vma), length);
284 		}
285 
286 		/*
287 		 * Minimum invalidation size for a 2MB page that the hardware
288 		 * expects is 16MB
289 		 */
290 		if (length >= SZ_2M) {
291 			length = max_t(u64, SZ_16M, length);
292 			start = ALIGN_DOWN(xe_vma_start(vma), length);
293 		}
294 
295 		xe_gt_assert(gt, length >= SZ_4K);
296 		xe_gt_assert(gt, is_power_of_2(length));
297 		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
298 		xe_gt_assert(gt, IS_ALIGNED(start, length));
299 
300 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
301 		action[len++] = xe_vma_vm(vma)->usm.asid;
302 		action[len++] = lower_32_bits(start);
303 		action[len++] = upper_32_bits(start);
304 		action[len++] = ilog2(length) - ilog2(SZ_4K);
305 	}
306 
307 	xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
308 
309 	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
310 }
311 
312 /**
313  * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
314  * @gt: graphics tile
315  * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
316  *
317  * Wait for 200ms for a TLB invalidation to complete, in practice we always
318  * should receive the TLB invalidation within 200ms.
319  *
320  * Return: 0 on success, -ETIME on TLB invalidation timeout
321  */
322 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
323 {
324 	struct xe_guc *guc = &gt->uc.guc;
325 	int ret;
326 
327 	/* Execlists not supported */
328 	if (gt_to_xe(gt)->info.force_execlist)
329 		return 0;
330 
331 	/*
332 	 * XXX: See above, this algorithm only works if seqno are always in
333 	 * order
334 	 */
335 	ret = wait_event_timeout(guc->ct.wq,
336 				 tlb_invalidation_seqno_past(gt, seqno),
337 				 TLB_TIMEOUT);
338 	if (!ret) {
339 		struct drm_printer p = xe_gt_err_printer(gt);
340 
341 		xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
342 			  seqno, gt->tlb_invalidation.seqno_recv);
343 		xe_guc_ct_print(&guc->ct, &p, true);
344 		return -ETIME;
345 	}
346 
347 	return 0;
348 }
349 
350 /**
351  * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
352  * @guc: guc
353  * @msg: message indicating TLB invalidation done
354  * @len: length of message
355  *
356  * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
357  * invalidation fences for seqno. Algorithm for this depends on seqno being
358  * received in-order and asserts this assumption.
359  *
360  * Return: 0 on success, -EPROTO for malformed messages.
361  */
362 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
363 {
364 	struct xe_gt *gt = guc_to_gt(guc);
365 	struct xe_gt_tlb_invalidation_fence *fence, *next;
366 	unsigned long flags;
367 
368 	if (unlikely(len != 1))
369 		return -EPROTO;
370 
371 	/*
372 	 * This can also be run both directly from the IRQ handler and also in
373 	 * process_g2h_msg(). Only one may process any individual CT message,
374 	 * however the order they are processed here could result in skipping a
375 	 * seqno. To handle that we just process all the seqnos from the last
376 	 * seqno_recv up to and including the one in msg[0]. The delta should be
377 	 * very small so there shouldn't be much of pending_fences we actually
378 	 * need to iterate over here.
379 	 *
380 	 * From GuC POV we expect the seqnos to always appear in-order, so if we
381 	 * see something later in the timeline we can be sure that anything
382 	 * appearing earlier has already signalled, just that we have yet to
383 	 * officially process the CT message like if racing against
384 	 * process_g2h_msg().
385 	 */
386 	spin_lock_irqsave(&gt->tlb_invalidation.pending_lock, flags);
387 	if (tlb_invalidation_seqno_past(gt, msg[0])) {
388 		spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
389 		return 0;
390 	}
391 
392 	/*
393 	 * wake_up_all() and wait_event_timeout() already have the correct
394 	 * barriers.
395 	 */
396 	WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
397 	wake_up_all(&guc->ct.wq);
398 
399 	list_for_each_entry_safe(fence, next,
400 				 &gt->tlb_invalidation.pending_fences, link) {
401 		trace_xe_gt_tlb_invalidation_fence_recv(fence);
402 
403 		if (!tlb_invalidation_seqno_past(gt, fence->seqno))
404 			break;
405 
406 		invalidation_fence_signal(fence);
407 	}
408 
409 	if (!list_empty(&gt->tlb_invalidation.pending_fences))
410 		mod_delayed_work(system_wq,
411 				 &gt->tlb_invalidation.fence_tdr,
412 				 TLB_TIMEOUT);
413 	else
414 		cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
415 
416 	spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
417 
418 	return 0;
419 }
420