xref: /linux/drivers/gpu/drm/xe/xe_guc_tlb_inval.c (revision ec2e0fb07d789976c601bec19ecced7a501c3705)
1*db16f9d9SMatthew Brost // SPDX-License-Identifier: MIT
2*db16f9d9SMatthew Brost /*
3*db16f9d9SMatthew Brost  * Copyright © 2025 Intel Corporation
4*db16f9d9SMatthew Brost  */
5*db16f9d9SMatthew Brost 
6*db16f9d9SMatthew Brost #include "abi/guc_actions_abi.h"
7*db16f9d9SMatthew Brost 
8*db16f9d9SMatthew Brost #include "xe_device.h"
9*db16f9d9SMatthew Brost #include "xe_gt_stats.h"
10*db16f9d9SMatthew Brost #include "xe_gt_types.h"
11*db16f9d9SMatthew Brost #include "xe_guc.h"
12*db16f9d9SMatthew Brost #include "xe_guc_ct.h"
13*db16f9d9SMatthew Brost #include "xe_guc_tlb_inval.h"
14*db16f9d9SMatthew Brost #include "xe_force_wake.h"
15*db16f9d9SMatthew Brost #include "xe_mmio.h"
16*db16f9d9SMatthew Brost #include "xe_tlb_inval.h"
17*db16f9d9SMatthew Brost 
18*db16f9d9SMatthew Brost #include "regs/xe_guc_regs.h"
19*db16f9d9SMatthew Brost 
20*db16f9d9SMatthew Brost /*
21*db16f9d9SMatthew Brost  * XXX: The seqno algorithm relies on TLB invalidation being processed in order
22*db16f9d9SMatthew Brost  * which they currently are by the GuC, if that changes the algorithm will need
23*db16f9d9SMatthew Brost  * to be updated.
24*db16f9d9SMatthew Brost  */
25*db16f9d9SMatthew Brost 
26*db16f9d9SMatthew Brost static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len)
27*db16f9d9SMatthew Brost {
28*db16f9d9SMatthew Brost 	struct xe_gt *gt = guc_to_gt(guc);
29*db16f9d9SMatthew Brost 
30*db16f9d9SMatthew Brost 	xe_gt_assert(gt, action[1]);	/* Seqno */
31*db16f9d9SMatthew Brost 
32*db16f9d9SMatthew Brost 	xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
33*db16f9d9SMatthew Brost 	return xe_guc_ct_send(&guc->ct, action, len,
34*db16f9d9SMatthew Brost 			      G2H_LEN_DW_TLB_INVALIDATE, 1);
35*db16f9d9SMatthew Brost }
36*db16f9d9SMatthew Brost 
37*db16f9d9SMatthew Brost #define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
38*db16f9d9SMatthew Brost 		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
39*db16f9d9SMatthew Brost 		XE_GUC_TLB_INVAL_FLUSH_CACHE)
40*db16f9d9SMatthew Brost 
41*db16f9d9SMatthew Brost static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno)
42*db16f9d9SMatthew Brost {
43*db16f9d9SMatthew Brost 	struct xe_guc *guc = tlb_inval->private;
44*db16f9d9SMatthew Brost 	u32 action[] = {
45*db16f9d9SMatthew Brost 		XE_GUC_ACTION_TLB_INVALIDATION_ALL,
46*db16f9d9SMatthew Brost 		seqno,
47*db16f9d9SMatthew Brost 		MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL),
48*db16f9d9SMatthew Brost 	};
49*db16f9d9SMatthew Brost 
50*db16f9d9SMatthew Brost 	return send_tlb_inval(guc, action, ARRAY_SIZE(action));
51*db16f9d9SMatthew Brost }
52*db16f9d9SMatthew Brost 
53*db16f9d9SMatthew Brost static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
54*db16f9d9SMatthew Brost {
55*db16f9d9SMatthew Brost 	struct xe_guc *guc = tlb_inval->private;
56*db16f9d9SMatthew Brost 	struct xe_gt *gt = guc_to_gt(guc);
57*db16f9d9SMatthew Brost 	struct xe_device *xe = guc_to_xe(guc);
58*db16f9d9SMatthew Brost 
59*db16f9d9SMatthew Brost 	/*
60*db16f9d9SMatthew Brost 	 * Returning -ECANCELED in this function is squashed at the caller and
61*db16f9d9SMatthew Brost 	 * signals waiters.
62*db16f9d9SMatthew Brost 	 */
63*db16f9d9SMatthew Brost 
64*db16f9d9SMatthew Brost 	if (xe_guc_ct_enabled(&guc->ct) && guc->submission_state.enabled) {
65*db16f9d9SMatthew Brost 		u32 action[] = {
66*db16f9d9SMatthew Brost 			XE_GUC_ACTION_TLB_INVALIDATION,
67*db16f9d9SMatthew Brost 			seqno,
68*db16f9d9SMatthew Brost 			MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
69*db16f9d9SMatthew Brost 		};
70*db16f9d9SMatthew Brost 
71*db16f9d9SMatthew Brost 		return send_tlb_inval(guc, action, ARRAY_SIZE(action));
72*db16f9d9SMatthew Brost 	} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
73*db16f9d9SMatthew Brost 		struct xe_mmio *mmio = &gt->mmio;
74*db16f9d9SMatthew Brost 		unsigned int fw_ref;
75*db16f9d9SMatthew Brost 
76*db16f9d9SMatthew Brost 		if (IS_SRIOV_VF(xe))
77*db16f9d9SMatthew Brost 			return -ECANCELED;
78*db16f9d9SMatthew Brost 
79*db16f9d9SMatthew Brost 		fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
80*db16f9d9SMatthew Brost 		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
81*db16f9d9SMatthew Brost 			xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
82*db16f9d9SMatthew Brost 					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
83*db16f9d9SMatthew Brost 			xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
84*db16f9d9SMatthew Brost 					PVC_GUC_TLB_INV_DESC0_VALID);
85*db16f9d9SMatthew Brost 		} else {
86*db16f9d9SMatthew Brost 			xe_mmio_write32(mmio, GUC_TLB_INV_CR,
87*db16f9d9SMatthew Brost 					GUC_TLB_INV_CR_INVALIDATE);
88*db16f9d9SMatthew Brost 		}
89*db16f9d9SMatthew Brost 		xe_force_wake_put(gt_to_fw(gt), fw_ref);
90*db16f9d9SMatthew Brost 	}
91*db16f9d9SMatthew Brost 
92*db16f9d9SMatthew Brost 	return -ECANCELED;
93*db16f9d9SMatthew Brost }
94*db16f9d9SMatthew Brost 
95*db16f9d9SMatthew Brost /*
96*db16f9d9SMatthew Brost  * Ensure that roundup_pow_of_two(length) doesn't overflow.
97*db16f9d9SMatthew Brost  * Note that roundup_pow_of_two() operates on unsigned long,
98*db16f9d9SMatthew Brost  * not on u64.
99*db16f9d9SMatthew Brost  */
100*db16f9d9SMatthew Brost #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
101*db16f9d9SMatthew Brost 
102*db16f9d9SMatthew Brost static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
103*db16f9d9SMatthew Brost 				u64 start, u64 end, u32 asid)
104*db16f9d9SMatthew Brost {
105*db16f9d9SMatthew Brost #define MAX_TLB_INVALIDATION_LEN	7
106*db16f9d9SMatthew Brost 	struct xe_guc *guc = tlb_inval->private;
107*db16f9d9SMatthew Brost 	struct xe_gt *gt = guc_to_gt(guc);
108*db16f9d9SMatthew Brost 	u32 action[MAX_TLB_INVALIDATION_LEN];
109*db16f9d9SMatthew Brost 	u64 length = end - start;
110*db16f9d9SMatthew Brost 	int len = 0;
111*db16f9d9SMatthew Brost 
112*db16f9d9SMatthew Brost 	if (guc_to_xe(guc)->info.force_execlist)
113*db16f9d9SMatthew Brost 		return -ECANCELED;
114*db16f9d9SMatthew Brost 
115*db16f9d9SMatthew Brost 	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
116*db16f9d9SMatthew Brost 	action[len++] = seqno;
117*db16f9d9SMatthew Brost 	if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
118*db16f9d9SMatthew Brost 	    length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
119*db16f9d9SMatthew Brost 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
120*db16f9d9SMatthew Brost 	} else {
121*db16f9d9SMatthew Brost 		u64 orig_start = start;
122*db16f9d9SMatthew Brost 		u64 align;
123*db16f9d9SMatthew Brost 
124*db16f9d9SMatthew Brost 		if (length < SZ_4K)
125*db16f9d9SMatthew Brost 			length = SZ_4K;
126*db16f9d9SMatthew Brost 
127*db16f9d9SMatthew Brost 		/*
128*db16f9d9SMatthew Brost 		 * We need to invalidate a higher granularity if start address
129*db16f9d9SMatthew Brost 		 * is not aligned to length. When start is not aligned with
130*db16f9d9SMatthew Brost 		 * length we need to find the length large enough to create an
131*db16f9d9SMatthew Brost 		 * address mask covering the required range.
132*db16f9d9SMatthew Brost 		 */
133*db16f9d9SMatthew Brost 		align = roundup_pow_of_two(length);
134*db16f9d9SMatthew Brost 		start = ALIGN_DOWN(start, align);
135*db16f9d9SMatthew Brost 		end = ALIGN(end, align);
136*db16f9d9SMatthew Brost 		length = align;
137*db16f9d9SMatthew Brost 		while (start + length < end) {
138*db16f9d9SMatthew Brost 			length <<= 1;
139*db16f9d9SMatthew Brost 			start = ALIGN_DOWN(orig_start, length);
140*db16f9d9SMatthew Brost 		}
141*db16f9d9SMatthew Brost 
142*db16f9d9SMatthew Brost 		/*
143*db16f9d9SMatthew Brost 		 * Minimum invalidation size for a 2MB page that the hardware
144*db16f9d9SMatthew Brost 		 * expects is 16MB
145*db16f9d9SMatthew Brost 		 */
146*db16f9d9SMatthew Brost 		if (length >= SZ_2M) {
147*db16f9d9SMatthew Brost 			length = max_t(u64, SZ_16M, length);
148*db16f9d9SMatthew Brost 			start = ALIGN_DOWN(orig_start, length);
149*db16f9d9SMatthew Brost 		}
150*db16f9d9SMatthew Brost 
151*db16f9d9SMatthew Brost 		xe_gt_assert(gt, length >= SZ_4K);
152*db16f9d9SMatthew Brost 		xe_gt_assert(gt, is_power_of_2(length));
153*db16f9d9SMatthew Brost 		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
154*db16f9d9SMatthew Brost 						    ilog2(SZ_2M) + 1)));
155*db16f9d9SMatthew Brost 		xe_gt_assert(gt, IS_ALIGNED(start, length));
156*db16f9d9SMatthew Brost 
157*db16f9d9SMatthew Brost 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
158*db16f9d9SMatthew Brost 		action[len++] = asid;
159*db16f9d9SMatthew Brost 		action[len++] = lower_32_bits(start);
160*db16f9d9SMatthew Brost 		action[len++] = upper_32_bits(start);
161*db16f9d9SMatthew Brost 		action[len++] = ilog2(length) - ilog2(SZ_4K);
162*db16f9d9SMatthew Brost 	}
163*db16f9d9SMatthew Brost 
164*db16f9d9SMatthew Brost 	xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
165*db16f9d9SMatthew Brost 
166*db16f9d9SMatthew Brost 	return send_tlb_inval(guc, action, len);
167*db16f9d9SMatthew Brost }
168*db16f9d9SMatthew Brost 
169*db16f9d9SMatthew Brost static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval)
170*db16f9d9SMatthew Brost {
171*db16f9d9SMatthew Brost 	struct xe_guc *guc = tlb_inval->private;
172*db16f9d9SMatthew Brost 
173*db16f9d9SMatthew Brost 	return xe_guc_ct_initialized(&guc->ct);
174*db16f9d9SMatthew Brost }
175*db16f9d9SMatthew Brost 
176*db16f9d9SMatthew Brost static void tlb_inval_flush(struct xe_tlb_inval *tlb_inval)
177*db16f9d9SMatthew Brost {
178*db16f9d9SMatthew Brost 	struct xe_guc *guc = tlb_inval->private;
179*db16f9d9SMatthew Brost 
180*db16f9d9SMatthew Brost 	LNL_FLUSH_WORK(&guc->ct.g2h_worker);
181*db16f9d9SMatthew Brost }
182*db16f9d9SMatthew Brost 
183*db16f9d9SMatthew Brost static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval)
184*db16f9d9SMatthew Brost {
185*db16f9d9SMatthew Brost 	struct xe_guc *guc = tlb_inval->private;
186*db16f9d9SMatthew Brost 
187*db16f9d9SMatthew Brost 	/* this reflects what HW/GuC needs to process TLB inv request */
188*db16f9d9SMatthew Brost 	const long hw_tlb_timeout = HZ / 4;
189*db16f9d9SMatthew Brost 
190*db16f9d9SMatthew Brost 	/* this estimates actual delay caused by the CTB transport */
191*db16f9d9SMatthew Brost 	long delay = xe_guc_ct_queue_proc_time_jiffies(&guc->ct);
192*db16f9d9SMatthew Brost 
193*db16f9d9SMatthew Brost 	return hw_tlb_timeout + 2 * delay;
194*db16f9d9SMatthew Brost }
195*db16f9d9SMatthew Brost 
196*db16f9d9SMatthew Brost static const struct xe_tlb_inval_ops guc_tlb_inval_ops = {
197*db16f9d9SMatthew Brost 	.all = send_tlb_inval_all,
198*db16f9d9SMatthew Brost 	.ggtt = send_tlb_inval_ggtt,
199*db16f9d9SMatthew Brost 	.ppgtt = send_tlb_inval_ppgtt,
200*db16f9d9SMatthew Brost 	.initialized = tlb_inval_initialized,
201*db16f9d9SMatthew Brost 	.flush = tlb_inval_flush,
202*db16f9d9SMatthew Brost 	.timeout_delay = tlb_inval_timeout_delay,
203*db16f9d9SMatthew Brost };
204*db16f9d9SMatthew Brost 
205*db16f9d9SMatthew Brost /**
206*db16f9d9SMatthew Brost  * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early
207*db16f9d9SMatthew Brost  * @guc: GuC object
208*db16f9d9SMatthew Brost  * @tlb_inval: TLB invalidation client
209*db16f9d9SMatthew Brost  *
210*db16f9d9SMatthew Brost  * Inititialize GuC TLB invalidation by setting back pointer in TLB invalidation
211*db16f9d9SMatthew Brost  * client to the GuC and setting GuC backend ops.
212*db16f9d9SMatthew Brost  */
213*db16f9d9SMatthew Brost void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
214*db16f9d9SMatthew Brost 				 struct xe_tlb_inval *tlb_inval)
215*db16f9d9SMatthew Brost {
216*db16f9d9SMatthew Brost 	tlb_inval->private = guc;
217*db16f9d9SMatthew Brost 	tlb_inval->ops = &guc_tlb_inval_ops;
218*db16f9d9SMatthew Brost }
219*db16f9d9SMatthew Brost 
220*db16f9d9SMatthew Brost /**
221*db16f9d9SMatthew Brost  * xe_guc_tlb_inval_done_handler() - TLB invalidation done handler
222*db16f9d9SMatthew Brost  * @guc: guc
223*db16f9d9SMatthew Brost  * @msg: message indicating TLB invalidation done
224*db16f9d9SMatthew Brost  * @len: length of message
225*db16f9d9SMatthew Brost  *
226*db16f9d9SMatthew Brost  * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
227*db16f9d9SMatthew Brost  * invalidation fences for seqno. Algorithm for this depends on seqno being
228*db16f9d9SMatthew Brost  * received in-order and asserts this assumption.
229*db16f9d9SMatthew Brost  *
230*db16f9d9SMatthew Brost  * Return: 0 on success, -EPROTO for malformed messages.
231*db16f9d9SMatthew Brost  */
232*db16f9d9SMatthew Brost int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
233*db16f9d9SMatthew Brost {
234*db16f9d9SMatthew Brost 	struct xe_gt *gt = guc_to_gt(guc);
235*db16f9d9SMatthew Brost 
236*db16f9d9SMatthew Brost 	if (unlikely(len != 1))
237*db16f9d9SMatthew Brost 		return -EPROTO;
238*db16f9d9SMatthew Brost 
239*db16f9d9SMatthew Brost 	xe_tlb_inval_done_handler(&gt->tlb_inval, msg[0]);
240*db16f9d9SMatthew Brost 
241*db16f9d9SMatthew Brost 	return 0;
242*db16f9d9SMatthew Brost }
243