xref: /linux/drivers/gpu/drm/xe/xe_guc_tlb_inval.c (revision 68a052239fc4b351e961f698b824f7654a346091)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include "abi/guc_actions_abi.h"
7 
8 #include "xe_device.h"
9 #include "xe_gt_stats.h"
10 #include "xe_gt_types.h"
11 #include "xe_guc.h"
12 #include "xe_guc_ct.h"
13 #include "xe_guc_tlb_inval.h"
14 #include "xe_force_wake.h"
15 #include "xe_mmio.h"
16 #include "xe_tlb_inval.h"
17 
18 #include "regs/xe_guc_regs.h"
19 
20 /*
21  * XXX: The seqno algorithm relies on TLB invalidation being processed in order
22  * which they currently are by the GuC, if that changes the algorithm will need
23  * to be updated.
24  */
25 
26 static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len)
27 {
28 	struct xe_gt *gt = guc_to_gt(guc);
29 
30 	xe_gt_assert(gt, action[1]);	/* Seqno */
31 
32 	xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
33 	return xe_guc_ct_send(&guc->ct, action, len,
34 			      G2H_LEN_DW_TLB_INVALIDATE, 1);
35 }
36 
37 #define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
38 		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
39 		XE_GUC_TLB_INVAL_FLUSH_CACHE)
40 
41 static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno)
42 {
43 	struct xe_guc *guc = tlb_inval->private;
44 	u32 action[] = {
45 		XE_GUC_ACTION_TLB_INVALIDATION_ALL,
46 		seqno,
47 		MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL),
48 	};
49 
50 	return send_tlb_inval(guc, action, ARRAY_SIZE(action));
51 }
52 
53 static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
54 {
55 	struct xe_guc *guc = tlb_inval->private;
56 	struct xe_gt *gt = guc_to_gt(guc);
57 	struct xe_device *xe = guc_to_xe(guc);
58 
59 	/*
60 	 * Returning -ECANCELED in this function is squashed at the caller and
61 	 * signals waiters.
62 	 */
63 
64 	if (xe_guc_ct_enabled(&guc->ct) && guc->submission_state.enabled) {
65 		u32 action[] = {
66 			XE_GUC_ACTION_TLB_INVALIDATION,
67 			seqno,
68 			MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
69 		};
70 
71 		return send_tlb_inval(guc, action, ARRAY_SIZE(action));
72 	} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
73 		struct xe_mmio *mmio = &gt->mmio;
74 		unsigned int fw_ref;
75 
76 		if (IS_SRIOV_VF(xe))
77 			return -ECANCELED;
78 
79 		fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
80 		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
81 			xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
82 					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
83 			xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
84 					PVC_GUC_TLB_INV_DESC0_VALID);
85 		} else {
86 			xe_mmio_write32(mmio, GUC_TLB_INV_CR,
87 					GUC_TLB_INV_CR_INVALIDATE);
88 		}
89 		xe_force_wake_put(gt_to_fw(gt), fw_ref);
90 	}
91 
92 	return -ECANCELED;
93 }
94 
95 /*
96  * Ensure that roundup_pow_of_two(length) doesn't overflow.
97  * Note that roundup_pow_of_two() operates on unsigned long,
98  * not on u64.
99  */
100 #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
101 
102 static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
103 				u64 start, u64 end, u32 asid)
104 {
105 #define MAX_TLB_INVALIDATION_LEN	7
106 	struct xe_guc *guc = tlb_inval->private;
107 	struct xe_gt *gt = guc_to_gt(guc);
108 	u32 action[MAX_TLB_INVALIDATION_LEN];
109 	u64 length = end - start;
110 	int len = 0;
111 
112 	if (guc_to_xe(guc)->info.force_execlist)
113 		return -ECANCELED;
114 
115 	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
116 	action[len++] = seqno;
117 	if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
118 	    length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
119 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
120 	} else {
121 		u64 orig_start = start;
122 		u64 align;
123 
124 		if (length < SZ_4K)
125 			length = SZ_4K;
126 
127 		/*
128 		 * We need to invalidate a higher granularity if start address
129 		 * is not aligned to length. When start is not aligned with
130 		 * length we need to find the length large enough to create an
131 		 * address mask covering the required range.
132 		 */
133 		align = roundup_pow_of_two(length);
134 		start = ALIGN_DOWN(start, align);
135 		end = ALIGN(end, align);
136 		length = align;
137 		while (start + length < end) {
138 			length <<= 1;
139 			start = ALIGN_DOWN(orig_start, length);
140 		}
141 
142 		/*
143 		 * Minimum invalidation size for a 2MB page that the hardware
144 		 * expects is 16MB
145 		 */
146 		if (length >= SZ_2M) {
147 			length = max_t(u64, SZ_16M, length);
148 			start = ALIGN_DOWN(orig_start, length);
149 		}
150 
151 		xe_gt_assert(gt, length >= SZ_4K);
152 		xe_gt_assert(gt, is_power_of_2(length));
153 		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
154 						    ilog2(SZ_2M) + 1)));
155 		xe_gt_assert(gt, IS_ALIGNED(start, length));
156 
157 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
158 		action[len++] = asid;
159 		action[len++] = lower_32_bits(start);
160 		action[len++] = upper_32_bits(start);
161 		action[len++] = ilog2(length) - ilog2(SZ_4K);
162 	}
163 
164 	xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
165 
166 	return send_tlb_inval(guc, action, len);
167 }
168 
169 static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval)
170 {
171 	struct xe_guc *guc = tlb_inval->private;
172 
173 	return xe_guc_ct_initialized(&guc->ct);
174 }
175 
176 static void tlb_inval_flush(struct xe_tlb_inval *tlb_inval)
177 {
178 	struct xe_guc *guc = tlb_inval->private;
179 
180 	LNL_FLUSH_WORK(&guc->ct.g2h_worker);
181 }
182 
183 static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval)
184 {
185 	struct xe_guc *guc = tlb_inval->private;
186 
187 	/* this reflects what HW/GuC needs to process TLB inv request */
188 	const long hw_tlb_timeout = HZ / 4;
189 
190 	/* this estimates actual delay caused by the CTB transport */
191 	long delay = xe_guc_ct_queue_proc_time_jiffies(&guc->ct);
192 
193 	return hw_tlb_timeout + 2 * delay;
194 }
195 
196 static const struct xe_tlb_inval_ops guc_tlb_inval_ops = {
197 	.all = send_tlb_inval_all,
198 	.ggtt = send_tlb_inval_ggtt,
199 	.ppgtt = send_tlb_inval_ppgtt,
200 	.initialized = tlb_inval_initialized,
201 	.flush = tlb_inval_flush,
202 	.timeout_delay = tlb_inval_timeout_delay,
203 };
204 
205 /**
206  * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early
207  * @guc: GuC object
208  * @tlb_inval: TLB invalidation client
209  *
210  * Inititialize GuC TLB invalidation by setting back pointer in TLB invalidation
211  * client to the GuC and setting GuC backend ops.
212  */
213 void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
214 				 struct xe_tlb_inval *tlb_inval)
215 {
216 	tlb_inval->private = guc;
217 	tlb_inval->ops = &guc_tlb_inval_ops;
218 }
219 
220 /**
221  * xe_guc_tlb_inval_done_handler() - TLB invalidation done handler
222  * @guc: guc
223  * @msg: message indicating TLB invalidation done
224  * @len: length of message
225  *
226  * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
227  * invalidation fences for seqno. Algorithm for this depends on seqno being
228  * received in-order and asserts this assumption.
229  *
230  * Return: 0 on success, -EPROTO for malformed messages.
231  */
232 int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
233 {
234 	struct xe_gt *gt = guc_to_gt(guc);
235 
236 	if (unlikely(len != 1))
237 		return -EPROTO;
238 
239 	xe_tlb_inval_done_handler(&gt->tlb_inval, msg[0]);
240 
241 	return 0;
242 }
243