xref: /linux/drivers/gpu/drm/xe/xe_gt_sriov_vf.c (revision f2161d5f1aae21a42b0a64d87e10cb31db423f42)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023-2024 Intel Corporation
4  */
5 
6 #include <linux/bitfield.h>
7 #include <linux/bsearch.h>
8 
9 #include <drm/drm_managed.h>
10 #include <drm/drm_print.h>
11 
12 #include "abi/guc_actions_sriov_abi.h"
13 #include "abi/guc_communication_mmio_abi.h"
14 #include "abi/guc_klvs_abi.h"
15 #include "abi/guc_relay_actions_abi.h"
16 #include "regs/xe_gt_regs.h"
17 #include "regs/xe_gtt_defs.h"
18 
19 #include "xe_assert.h"
20 #include "xe_device.h"
21 #include "xe_ggtt.h"
22 #include "xe_gt_sriov_printk.h"
23 #include "xe_gt_sriov_vf.h"
24 #include "xe_gt_sriov_vf_types.h"
25 #include "xe_guc.h"
26 #include "xe_guc_ct.h"
27 #include "xe_guc_hxg_helpers.h"
28 #include "xe_guc_relay.h"
29 #include "xe_guc_submit.h"
30 #include "xe_irq.h"
31 #include "xe_lrc.h"
32 #include "xe_memirq.h"
33 #include "xe_mmio.h"
34 #include "xe_sriov.h"
35 #include "xe_sriov_vf.h"
36 #include "xe_sriov_vf_ccs.h"
37 #include "xe_tile_sriov_vf.h"
38 #include "xe_tlb_inval.h"
39 #include "xe_uc_fw.h"
40 #include "xe_wopcm.h"
41 
42 #define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
43 
guc_action_vf_reset(struct xe_guc * guc)44 static int guc_action_vf_reset(struct xe_guc *guc)
45 {
46 	u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = {
47 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
48 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
49 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_VF_RESET),
50 	};
51 	int ret;
52 
53 	ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
54 
55 	return ret > 0 ? -EPROTO : ret;
56 }
57 
58 #define GUC_RESET_VF_STATE_RETRY_MAX	10
vf_reset_guc_state(struct xe_gt * gt)59 static int vf_reset_guc_state(struct xe_gt *gt)
60 {
61 	unsigned int retry = GUC_RESET_VF_STATE_RETRY_MAX;
62 	struct xe_guc *guc = &gt->uc.guc;
63 	int err;
64 
65 	do {
66 		err = guc_action_vf_reset(guc);
67 		if (!err || err != -ETIMEDOUT)
68 			break;
69 	} while (--retry);
70 
71 	if (unlikely(err))
72 		xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err));
73 	return err;
74 }
75 
76 /**
77  * xe_gt_sriov_vf_reset - Reset GuC VF internal state.
78  * @gt: the &xe_gt
79  *
80  * It requires functional `GuC MMIO based communication`_.
81  *
82  * Return: 0 on success or a negative error code on failure.
83  */
xe_gt_sriov_vf_reset(struct xe_gt * gt)84 int xe_gt_sriov_vf_reset(struct xe_gt *gt)
85 {
86 	if (!xe_device_uc_enabled(gt_to_xe(gt)))
87 		return -ENODEV;
88 
89 	return vf_reset_guc_state(gt);
90 }
91 
guc_action_match_version(struct xe_guc * guc,struct xe_uc_fw_version * wanted,struct xe_uc_fw_version * found)92 static int guc_action_match_version(struct xe_guc *guc,
93 				    struct xe_uc_fw_version *wanted,
94 				    struct xe_uc_fw_version *found)
95 {
96 	u32 request[VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN] = {
97 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
98 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
99 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
100 			   GUC_ACTION_VF2GUC_MATCH_VERSION),
101 		FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted->branch) |
102 		FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted->major) |
103 		FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted->minor),
104 	};
105 	u32 response[GUC_MAX_MMIO_MSG_LEN];
106 	int ret;
107 
108 	BUILD_BUG_ON(VF2GUC_MATCH_VERSION_RESPONSE_MSG_LEN > GUC_MAX_MMIO_MSG_LEN);
109 
110 	ret = xe_guc_mmio_send_recv(guc, request, ARRAY_SIZE(request), response);
111 	if (unlikely(ret < 0))
112 		return ret;
113 
114 	if (unlikely(FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ, response[0])))
115 		return -EPROTO;
116 
117 	memset(found, 0, sizeof(struct xe_uc_fw_version));
118 	found->branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]);
119 	found->major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]);
120 	found->minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]);
121 	found->patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]);
122 
123 	return 0;
124 }
125 
guc_action_match_version_any(struct xe_guc * guc,struct xe_uc_fw_version * found)126 static int guc_action_match_version_any(struct xe_guc *guc,
127 					struct xe_uc_fw_version *found)
128 {
129 	struct xe_uc_fw_version wanted = {
130 		.branch = GUC_VERSION_BRANCH_ANY,
131 		.major = GUC_VERSION_MAJOR_ANY,
132 		.minor = GUC_VERSION_MINOR_ANY,
133 		.patch = 0
134 	};
135 
136 	return guc_action_match_version(guc, &wanted, found);
137 }
138 
vf_minimum_guc_version(struct xe_gt * gt,struct xe_uc_fw_version * ver)139 static void vf_minimum_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver)
140 {
141 	struct xe_device *xe = gt_to_xe(gt);
142 
143 	memset(ver, 0, sizeof(struct xe_uc_fw_version));
144 
145 	switch (xe->info.platform) {
146 	case XE_TIGERLAKE ... XE_PVC:
147 		/* 1.1 this is current baseline for Xe driver */
148 		ver->branch = 0;
149 		ver->major = 1;
150 		ver->minor = 1;
151 		break;
152 	default:
153 		/* 1.2 has support for the GMD_ID KLV */
154 		ver->branch = 0;
155 		ver->major = 1;
156 		ver->minor = 2;
157 		break;
158 	}
159 }
160 
vf_wanted_guc_version(struct xe_gt * gt,struct xe_uc_fw_version * ver)161 static void vf_wanted_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver)
162 {
163 	/* for now it's the same as minimum */
164 	return vf_minimum_guc_version(gt, ver);
165 }
166 
vf_handshake_with_guc(struct xe_gt * gt)167 static int vf_handshake_with_guc(struct xe_gt *gt)
168 {
169 	struct xe_uc_fw_version *guc_version = &gt->sriov.vf.guc_version;
170 	struct xe_uc_fw_version wanted = {0};
171 	struct xe_guc *guc = &gt->uc.guc;
172 	bool old = false;
173 	int err;
174 
175 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
176 
177 	/* select wanted version - prefer previous (if any) */
178 	if (guc_version->major || guc_version->minor) {
179 		wanted = *guc_version;
180 		old = true;
181 	} else {
182 		vf_wanted_guc_version(gt, &wanted);
183 		xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY);
184 
185 		/* First time we handshake, so record the minimum wanted */
186 		gt->sriov.vf.wanted_guc_version = wanted;
187 	}
188 
189 	err = guc_action_match_version(guc, &wanted, guc_version);
190 	if (unlikely(err))
191 		goto fail;
192 
193 	if (old) {
194 		/* we don't support interface version change */
195 		if (MAKE_GUC_VER_STRUCT(*guc_version) != MAKE_GUC_VER_STRUCT(wanted)) {
196 			xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n",
197 					guc_version->branch, guc_version->major,
198 					guc_version->minor, guc_version->patch);
199 			xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n",
200 					 wanted.branch, wanted.major,
201 					 wanted.minor, wanted.patch);
202 			err = -EREMCHG;
203 			goto fail;
204 		} else {
205 			/* version is unchanged, no need to re-verify it */
206 			return 0;
207 		}
208 	}
209 
210 	/* illegal */
211 	if (guc_version->major > wanted.major) {
212 		err = -EPROTO;
213 		goto unsupported;
214 	}
215 
216 	/* there's no fallback on major version. */
217 	if (guc_version->major != wanted.major) {
218 		err = -ENOPKG;
219 		goto unsupported;
220 	}
221 
222 	/* check against minimum version supported by us */
223 	vf_minimum_guc_version(gt, &wanted);
224 	xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY);
225 	if (MAKE_GUC_VER_STRUCT(*guc_version) < MAKE_GUC_VER_STRUCT(wanted)) {
226 		err = -ENOKEY;
227 		goto unsupported;
228 	}
229 
230 	xe_gt_sriov_dbg(gt, "using GuC interface version %u.%u.%u.%u\n",
231 			guc_version->branch, guc_version->major,
232 			guc_version->minor, guc_version->patch);
233 
234 	return 0;
235 
236 unsupported:
237 	xe_gt_sriov_err(gt, "Unsupported GuC version %u.%u.%u.%u (%pe)\n",
238 			guc_version->branch, guc_version->major,
239 			guc_version->minor, guc_version->patch,
240 			ERR_PTR(err));
241 fail:
242 	xe_gt_sriov_err(gt, "Unable to confirm GuC version %u.%u (%pe)\n",
243 			wanted.major, wanted.minor, ERR_PTR(err));
244 
245 	/* try again with *any* just to query which version is supported */
246 	if (!guc_action_match_version_any(guc, &wanted))
247 		xe_gt_sriov_notice(gt, "GuC reports interface version %u.%u.%u.%u\n",
248 				   wanted.branch, wanted.major, wanted.minor, wanted.patch);
249 	return err;
250 }
251 
252 /**
253  * xe_gt_sriov_vf_bootstrap - Query and setup GuC ABI interface version.
254  * @gt: the &xe_gt
255  *
256  * This function is for VF use only.
257  * It requires functional `GuC MMIO based communication`_.
258  *
259  * Return: 0 on success or a negative error code on failure.
260  */
xe_gt_sriov_vf_bootstrap(struct xe_gt * gt)261 int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt)
262 {
263 	int err;
264 
265 	if (!xe_device_uc_enabled(gt_to_xe(gt)))
266 		return -ENODEV;
267 
268 	err = vf_reset_guc_state(gt);
269 	if (unlikely(err))
270 		return err;
271 
272 	err = vf_handshake_with_guc(gt);
273 	if (unlikely(err))
274 		return err;
275 
276 	return 0;
277 }
278 
279 /**
280  * xe_gt_sriov_vf_guc_versions - Minimum required and found GuC ABI versions
281  * @gt: the &xe_gt
282  * @wanted: pointer to the xe_uc_fw_version to be filled with the wanted version
283  * @found: pointer to the xe_uc_fw_version to be filled with the found version
284  *
285  * This function is for VF use only and it can only be used after successful
286  * version handshake with the GuC.
287  */
xe_gt_sriov_vf_guc_versions(struct xe_gt * gt,struct xe_uc_fw_version * wanted,struct xe_uc_fw_version * found)288 void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt,
289 				 struct xe_uc_fw_version *wanted,
290 				 struct xe_uc_fw_version *found)
291 {
292 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
293 	xe_gt_assert(gt, gt->sriov.vf.guc_version.major);
294 
295 	if (wanted)
296 		*wanted = gt->sriov.vf.wanted_guc_version;
297 
298 	if (found)
299 		*found = gt->sriov.vf.guc_version;
300 }
301 
guc_action_vf_notify_resfix_done(struct xe_guc * guc)302 static int guc_action_vf_notify_resfix_done(struct xe_guc *guc)
303 {
304 	u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = {
305 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
306 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
307 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE),
308 	};
309 	int ret;
310 
311 	ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
312 
313 	return ret > 0 ? -EPROTO : ret;
314 }
315 
316 /**
317  * vf_notify_resfix_done - Notify GuC about resource fixups apply completed.
318  * @gt: the &xe_gt struct instance linked to target GuC
319  *
320  * Returns: 0 if the operation completed successfully, or a negative error
321  * code otherwise.
322  */
vf_notify_resfix_done(struct xe_gt * gt)323 static int vf_notify_resfix_done(struct xe_gt *gt)
324 {
325 	struct xe_guc *guc = &gt->uc.guc;
326 	int err;
327 
328 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
329 
330 	err = guc_action_vf_notify_resfix_done(guc);
331 	if (unlikely(err))
332 		xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n",
333 				ERR_PTR(err));
334 	else
335 		xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n");
336 
337 	return err;
338 }
339 
guc_action_query_single_klv(struct xe_guc * guc,u32 key,u32 * value,u32 value_len)340 static int guc_action_query_single_klv(struct xe_guc *guc, u32 key,
341 				       u32 *value, u32 value_len)
342 {
343 	u32 request[VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_LEN] = {
344 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
345 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
346 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
347 			   GUC_ACTION_VF2GUC_QUERY_SINGLE_KLV),
348 		FIELD_PREP(VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_1_KEY, key),
349 	};
350 	u32 response[GUC_MAX_MMIO_MSG_LEN];
351 	u32 length;
352 	int ret;
353 
354 	BUILD_BUG_ON(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_MAX_LEN > GUC_MAX_MMIO_MSG_LEN);
355 	ret = xe_guc_mmio_send_recv(guc, request, ARRAY_SIZE(request), response);
356 	if (unlikely(ret < 0))
357 		return ret;
358 
359 	if (unlikely(FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_MBZ, response[0])))
360 		return -EPROTO;
361 
362 	length = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_LENGTH, response[0]);
363 	if (unlikely(length > value_len))
364 		return -EOVERFLOW;
365 	if (unlikely(length < value_len))
366 		return -ENODATA;
367 
368 	switch (value_len) {
369 	default:
370 		xe_gt_WARN_ON(guc_to_gt(guc), value_len > 3);
371 		fallthrough;
372 	case 3:
373 		value[2] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_3_VALUE96, response[3]);
374 		fallthrough;
375 	case 2:
376 		value[1] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_2_VALUE64, response[2]);
377 		fallthrough;
378 	case 1:
379 		value[0] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_1_VALUE32, response[1]);
380 		fallthrough;
381 	case 0:
382 		break;
383 	}
384 
385 	return 0;
386 }
387 
guc_action_query_single_klv32(struct xe_guc * guc,u32 key,u32 * value32)388 static int guc_action_query_single_klv32(struct xe_guc *guc, u32 key, u32 *value32)
389 {
390 	return guc_action_query_single_klv(guc, key, value32, hxg_sizeof(u32));
391 }
392 
guc_action_query_single_klv64(struct xe_guc * guc,u32 key,u64 * value64)393 static int guc_action_query_single_klv64(struct xe_guc *guc, u32 key, u64 *value64)
394 {
395 	u32 value[2];
396 	int err;
397 
398 	err = guc_action_query_single_klv(guc, key, value, hxg_sizeof(value));
399 	if (unlikely(err))
400 		return err;
401 
402 	*value64 = make_u64_from_u32(value[1], value[0]);
403 	return 0;
404 }
405 
has_gmdid(struct xe_device * xe)406 static bool has_gmdid(struct xe_device *xe)
407 {
408 	return GRAPHICS_VERx100(xe) >= 1270;
409 }
410 
411 /**
412  * xe_gt_sriov_vf_gmdid - Query GMDID over MMIO.
413  * @gt: the &xe_gt
414  *
415  * This function is for VF use only.
416  *
417  * Return: value of GMDID KLV on success or 0 on failure.
418  */
xe_gt_sriov_vf_gmdid(struct xe_gt * gt)419 u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt)
420 {
421 	const char *type = xe_gt_is_media_type(gt) ? "media" : "graphics";
422 	struct xe_guc *guc = &gt->uc.guc;
423 	u32 value;
424 	int err;
425 
426 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
427 	xe_gt_assert(gt, !GRAPHICS_VERx100(gt_to_xe(gt)) || has_gmdid(gt_to_xe(gt)));
428 	xe_gt_assert(gt, gt->sriov.vf.guc_version.major > 1 || gt->sriov.vf.guc_version.minor >= 2);
429 
430 	err = guc_action_query_single_klv32(guc, GUC_KLV_GLOBAL_CFG_GMD_ID_KEY, &value);
431 	if (unlikely(err)) {
432 		xe_gt_sriov_err(gt, "Failed to obtain %s GMDID (%pe)\n",
433 				type, ERR_PTR(err));
434 		return 0;
435 	}
436 
437 	xe_gt_sriov_dbg(gt, "%s GMDID = %#x\n", type, value);
438 	return value;
439 }
440 
vf_get_ggtt_info(struct xe_gt * gt)441 static int vf_get_ggtt_info(struct xe_gt *gt)
442 {
443 	struct xe_tile *tile = gt_to_tile(gt);
444 	struct xe_ggtt *ggtt = tile->mem.ggtt;
445 	struct xe_guc *guc = &gt->uc.guc;
446 	u64 start, size, ggtt_size;
447 	s64 shift;
448 	int err;
449 
450 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
451 
452 	guard(mutex)(&ggtt->lock);
453 
454 	err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_START_KEY, &start);
455 	if (unlikely(err))
456 		return err;
457 
458 	err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_SIZE_KEY, &size);
459 	if (unlikely(err))
460 		return err;
461 
462 	if (!size)
463 		return -ENODATA;
464 
465 	ggtt_size = xe_tile_sriov_vf_ggtt(tile);
466 	if (ggtt_size && ggtt_size != size) {
467 		xe_gt_sriov_err(gt, "Unexpected GGTT reassignment: %lluK != %lluK\n",
468 				size / SZ_1K, ggtt_size / SZ_1K);
469 		return -EREMCHG;
470 	}
471 
472 	xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n",
473 				start, start + size - 1, size / SZ_1K);
474 
475 	shift = start - (s64)xe_tile_sriov_vf_ggtt_base(tile);
476 	xe_tile_sriov_vf_ggtt_base_store(tile, start);
477 	xe_tile_sriov_vf_ggtt_store(tile, size);
478 
479 	if (shift && shift != start) {
480 		xe_gt_sriov_info(gt, "Shifting GGTT base by %lld to 0x%016llx\n",
481 				 shift, start);
482 		xe_tile_sriov_vf_fixup_ggtt_nodes_locked(gt_to_tile(gt), shift);
483 	}
484 
485 	if (xe_sriov_vf_migration_supported(gt_to_xe(gt))) {
486 		WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false);
487 		smp_wmb();	/* Ensure above write visible before wake */
488 		wake_up_all(&gt->sriov.vf.migration.wq);
489 	}
490 
491 	return 0;
492 }
493 
vf_get_lmem_info(struct xe_gt * gt)494 static int vf_get_lmem_info(struct xe_gt *gt)
495 {
496 	struct xe_tile *tile = gt_to_tile(gt);
497 	struct xe_guc *guc = &gt->uc.guc;
498 	char size_str[10];
499 	u64 size, lmem_size;
500 	int err;
501 
502 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
503 
504 	err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, &size);
505 	if (unlikely(err))
506 		return err;
507 
508 	lmem_size = xe_tile_sriov_vf_lmem(tile);
509 	if (lmem_size && lmem_size != size) {
510 		xe_gt_sriov_err(gt, "Unexpected LMEM reassignment: %lluM != %lluM\n",
511 				size / SZ_1M, lmem_size / SZ_1M);
512 		return -EREMCHG;
513 	}
514 
515 	string_get_size(size, 1, STRING_UNITS_2, size_str, sizeof(size_str));
516 	xe_gt_sriov_dbg_verbose(gt, "LMEM %lluM %s\n", size / SZ_1M, size_str);
517 
518 	xe_tile_sriov_vf_lmem_store(tile, size);
519 
520 	return size ? 0 : -ENODATA;
521 }
522 
vf_get_submission_cfg(struct xe_gt * gt)523 static int vf_get_submission_cfg(struct xe_gt *gt)
524 {
525 	struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
526 	struct xe_guc *guc = &gt->uc.guc;
527 	u32 num_ctxs, num_dbs;
528 	int err;
529 
530 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
531 
532 	err = guc_action_query_single_klv32(guc, GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY, &num_ctxs);
533 	if (unlikely(err))
534 		return err;
535 
536 	err = guc_action_query_single_klv32(guc, GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY, &num_dbs);
537 	if (unlikely(err))
538 		return err;
539 
540 	if (config->num_ctxs && config->num_ctxs != num_ctxs) {
541 		xe_gt_sriov_err(gt, "Unexpected CTXs reassignment: %u != %u\n",
542 				num_ctxs, config->num_ctxs);
543 		return -EREMCHG;
544 	}
545 	if (config->num_dbs && config->num_dbs != num_dbs) {
546 		xe_gt_sriov_err(gt, "Unexpected DBs reassignment: %u != %u\n",
547 				num_dbs, config->num_dbs);
548 		return -EREMCHG;
549 	}
550 
551 	xe_gt_sriov_dbg_verbose(gt, "CTXs %u DBs %u\n", num_ctxs, num_dbs);
552 
553 	config->num_ctxs = num_ctxs;
554 	config->num_dbs = num_dbs;
555 
556 	return config->num_ctxs ? 0 : -ENODATA;
557 }
558 
vf_cache_gmdid(struct xe_gt * gt)559 static void vf_cache_gmdid(struct xe_gt *gt)
560 {
561 	xe_gt_assert(gt, has_gmdid(gt_to_xe(gt)));
562 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
563 
564 	gt->sriov.vf.runtime.gmdid = xe_gt_sriov_vf_gmdid(gt);
565 }
566 
567 /**
568  * xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO.
569  * @gt: the &xe_gt
570  *
571  * This function is for VF use only. This function may shift the GGTT and is
572  * performed under GGTT lock, making this step visible to all GTs that share a
573  * GGTT.
574  *
575  * Return: 0 on success or a negative error code on failure.
576  */
xe_gt_sriov_vf_query_config(struct xe_gt * gt)577 int xe_gt_sriov_vf_query_config(struct xe_gt *gt)
578 {
579 	struct xe_device *xe = gt_to_xe(gt);
580 	int err;
581 
582 	err = vf_get_ggtt_info(gt);
583 	if (unlikely(err))
584 		return err;
585 
586 	if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) {
587 		err = vf_get_lmem_info(gt);
588 		if (unlikely(err))
589 			return err;
590 	}
591 
592 	err = vf_get_submission_cfg(gt);
593 	if (unlikely(err))
594 		return err;
595 
596 	if (has_gmdid(xe))
597 		vf_cache_gmdid(gt);
598 
599 	return 0;
600 }
601 
602 /**
603  * xe_gt_sriov_vf_guc_ids - VF GuC context IDs configuration.
604  * @gt: the &xe_gt
605  *
606  * This function is for VF use only.
607  *
608  * Return: number of GuC context IDs assigned to VF.
609  */
xe_gt_sriov_vf_guc_ids(struct xe_gt * gt)610 u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt)
611 {
612 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
613 	xe_gt_assert(gt, gt->sriov.vf.guc_version.major);
614 	xe_gt_assert(gt, gt->sriov.vf.self_config.num_ctxs);
615 
616 	return gt->sriov.vf.self_config.num_ctxs;
617 }
618 
relay_action_handshake(struct xe_gt * gt,u32 * major,u32 * minor)619 static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor)
620 {
621 	u32 request[VF2PF_HANDSHAKE_REQUEST_MSG_LEN] = {
622 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
623 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
624 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VF2PF_HANDSHAKE),
625 		FIELD_PREP(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, *major) |
626 		FIELD_PREP(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, *minor),
627 	};
628 	u32 response[VF2PF_HANDSHAKE_RESPONSE_MSG_LEN];
629 	int ret;
630 
631 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
632 
633 	ret = xe_guc_relay_send_to_pf(&gt->uc.guc.relay,
634 				      request, ARRAY_SIZE(request),
635 				      response, ARRAY_SIZE(response));
636 	if (unlikely(ret < 0))
637 		return ret;
638 
639 	if (unlikely(ret != VF2PF_HANDSHAKE_RESPONSE_MSG_LEN))
640 		return -EPROTO;
641 
642 	if (unlikely(FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_0_MBZ, response[0])))
643 		return -EPROTO;
644 
645 	*major = FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR, response[1]);
646 	*minor = FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR, response[1]);
647 
648 	return 0;
649 }
650 
vf_connect_pf(struct xe_device * xe,u16 major,u16 minor)651 static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor)
652 {
653 	xe_assert(xe, IS_SRIOV_VF(xe));
654 
655 	xe->sriov.vf.pf_version.major = major;
656 	xe->sriov.vf.pf_version.minor = minor;
657 }
658 
vf_disconnect_pf(struct xe_device * xe)659 static void vf_disconnect_pf(struct xe_device *xe)
660 {
661 	vf_connect_pf(xe, 0, 0);
662 }
663 
vf_handshake_with_pf(struct xe_gt * gt)664 static int vf_handshake_with_pf(struct xe_gt *gt)
665 {
666 	struct xe_device *xe = gt_to_xe(gt);
667 	u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR;
668 	u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR;
669 	u32 major = major_wanted, minor = minor_wanted;
670 	int err;
671 
672 	err = relay_action_handshake(gt, &major, &minor);
673 	if (unlikely(err))
674 		goto failed;
675 
676 	if (!major && !minor) {
677 		err = -ENODATA;
678 		goto failed;
679 	}
680 
681 	xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor);
682 	vf_connect_pf(xe, major, minor);
683 	return 0;
684 
685 failed:
686 	xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n",
687 			major, minor, ERR_PTR(err));
688 	vf_disconnect_pf(xe);
689 	return err;
690 }
691 
692 /**
693  * xe_gt_sriov_vf_connect - Establish connection with the PF driver.
694  * @gt: the &xe_gt
695  *
696  * This function is for VF use only.
697  *
698  * Return: 0 on success or a negative error code on failure.
699  */
xe_gt_sriov_vf_connect(struct xe_gt * gt)700 int xe_gt_sriov_vf_connect(struct xe_gt *gt)
701 {
702 	int err;
703 
704 	err = vf_handshake_with_pf(gt);
705 	if (unlikely(err))
706 		goto failed;
707 
708 	return 0;
709 
710 failed:
711 	xe_gt_sriov_err(gt, "Failed to get version info (%pe)\n", ERR_PTR(err));
712 	return err;
713 }
714 
715 /**
716  * xe_gt_sriov_vf_default_lrcs_hwsp_rebase - Update GGTT references in HWSP of default LRCs.
717  * @gt: the &xe_gt struct instance
718  */
xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt * gt)719 static void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt)
720 {
721 	struct xe_hw_engine *hwe;
722 	enum xe_hw_engine_id id;
723 
724 	for_each_hw_engine(hwe, gt, id)
725 		xe_default_lrc_update_memirq_regs_with_address(hwe);
726 }
727 
vf_start_migration_recovery(struct xe_gt * gt)728 static void vf_start_migration_recovery(struct xe_gt *gt)
729 {
730 	bool started;
731 
732 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
733 
734 	spin_lock(&gt->sriov.vf.migration.lock);
735 
736 	if (!gt->sriov.vf.migration.recovery_queued &&
737 	    !gt->sriov.vf.migration.recovery_teardown) {
738 		gt->sriov.vf.migration.recovery_queued = true;
739 		WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true);
740 		WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, true);
741 		smp_wmb();	/* Ensure above writes visible before wake */
742 
743 		xe_guc_ct_wake_waiters(&gt->uc.guc.ct);
744 
745 		started = queue_work(gt->ordered_wq, &gt->sriov.vf.migration.worker);
746 		xe_gt_sriov_info(gt, "VF migration recovery %s\n", started ?
747 				 "scheduled" : "already in progress");
748 	}
749 
750 	spin_unlock(&gt->sriov.vf.migration.lock);
751 }
752 
753 /**
754  * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery,
755  *   or just mark that a GuC is ready for it.
756  * @gt: the &xe_gt struct instance linked to target GuC
757  *
758  * This function shall be called only by VF.
759  */
xe_gt_sriov_vf_migrated_event_handler(struct xe_gt * gt)760 void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt)
761 {
762 	struct xe_device *xe = gt_to_xe(gt);
763 
764 	xe_gt_assert(gt, IS_SRIOV_VF(xe));
765 	xe_gt_assert(gt, xe_gt_sriov_vf_recovery_pending(gt));
766 
767 	if (!xe_sriov_vf_migration_supported(xe)) {
768 		xe_gt_sriov_err(gt, "migration not supported\n");
769 		return;
770 	}
771 
772 	xe_gt_sriov_info(gt, "ready for recovery after migration\n");
773 	vf_start_migration_recovery(gt);
774 }
775 
vf_is_negotiated(struct xe_gt * gt,u16 major,u16 minor)776 static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor)
777 {
778 	struct xe_device *xe = gt_to_xe(gt);
779 
780 	xe_gt_assert(gt, IS_SRIOV_VF(xe));
781 
782 	return major == xe->sriov.vf.pf_version.major &&
783 	       minor <= xe->sriov.vf.pf_version.minor;
784 }
785 
vf_prepare_runtime_info(struct xe_gt * gt,unsigned int num_regs)786 static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs)
787 {
788 	struct vf_runtime_reg *regs = gt->sriov.vf.runtime.regs;
789 	unsigned int regs_size = round_up(num_regs, 4);
790 	struct xe_device *xe = gt_to_xe(gt);
791 
792 	xe_gt_assert(gt, IS_SRIOV_VF(xe));
793 
794 	if (regs) {
795 		if (num_regs <= gt->sriov.vf.runtime.regs_size) {
796 			memset(regs, 0, num_regs * sizeof(*regs));
797 			gt->sriov.vf.runtime.num_regs = num_regs;
798 			return 0;
799 		}
800 
801 		drmm_kfree(&xe->drm, regs);
802 		gt->sriov.vf.runtime.regs = NULL;
803 		gt->sriov.vf.runtime.num_regs = 0;
804 		gt->sriov.vf.runtime.regs_size = 0;
805 	}
806 
807 	regs = drmm_kcalloc(&xe->drm, regs_size, sizeof(*regs), GFP_KERNEL);
808 	if (unlikely(!regs))
809 		return -ENOMEM;
810 
811 	gt->sriov.vf.runtime.regs = regs;
812 	gt->sriov.vf.runtime.num_regs = num_regs;
813 	gt->sriov.vf.runtime.regs_size = regs_size;
814 	return 0;
815 }
816 
vf_query_runtime_info(struct xe_gt * gt)817 static int vf_query_runtime_info(struct xe_gt *gt)
818 {
819 	u32 request[VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN];
820 	u32 response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 32]; /* up to 16 regs */
821 	u32 limit = (ARRAY_SIZE(response) - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2;
822 	u32 count, remaining, num, i;
823 	u32 start = 0;
824 	int ret;
825 
826 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
827 	xe_gt_assert(gt, limit);
828 
829 	/* this is part of the 1.0 PF/VF ABI */
830 	if (!vf_is_negotiated(gt, 1, 0))
831 		return -ENOPKG;
832 
833 	request[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
834 		     FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
835 		     FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
836 				GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME) |
837 		     FIELD_PREP(VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT, limit);
838 
839 repeat:
840 	request[1] = FIELD_PREP(VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START, start);
841 	ret = xe_guc_relay_send_to_pf(&gt->uc.guc.relay,
842 				      request, ARRAY_SIZE(request),
843 				      response, ARRAY_SIZE(response));
844 	if (unlikely(ret < 0))
845 		goto failed;
846 
847 	if (unlikely(ret < VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN)) {
848 		ret = -EPROTO;
849 		goto failed;
850 	}
851 	if (unlikely((ret - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) % 2)) {
852 		ret = -EPROTO;
853 		goto failed;
854 	}
855 
856 	num = (ret - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2;
857 	count = FIELD_GET(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT, response[0]);
858 	remaining = FIELD_GET(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING, response[1]);
859 
860 	xe_gt_sriov_dbg_verbose(gt, "count=%u num=%u ret=%d start=%u remaining=%u\n",
861 				count, num, ret, start, remaining);
862 
863 	if (unlikely(count != num)) {
864 		ret = -EPROTO;
865 		goto failed;
866 	}
867 
868 	if (start == 0) {
869 		ret = vf_prepare_runtime_info(gt, num + remaining);
870 		if (unlikely(ret < 0))
871 			goto failed;
872 	} else if (unlikely(start + num > gt->sriov.vf.runtime.num_regs)) {
873 		ret = -EPROTO;
874 		goto failed;
875 	}
876 
877 	for (i = 0; i < num; ++i) {
878 		struct vf_runtime_reg *reg = &gt->sriov.vf.runtime.regs[start + i];
879 
880 		reg->offset = response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 2 * i];
881 		reg->value = response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 2 * i + 1];
882 	}
883 
884 	if (remaining) {
885 		start += num;
886 		goto repeat;
887 	}
888 
889 	return 0;
890 
891 failed:
892 	vf_prepare_runtime_info(gt, 0);
893 	return ret;
894 }
895 
vf_show_runtime_info(struct xe_gt * gt)896 static void vf_show_runtime_info(struct xe_gt *gt)
897 {
898 	struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs;
899 	unsigned int size = gt->sriov.vf.runtime.num_regs;
900 
901 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
902 
903 	for (; size--; vf_regs++)
904 		xe_gt_sriov_dbg(gt, "runtime(%#x) = %#x\n",
905 				vf_regs->offset, vf_regs->value);
906 }
907 
908 /**
909  * xe_gt_sriov_vf_query_runtime - Query SR-IOV runtime data.
910  * @gt: the &xe_gt
911  *
912  * This function is for VF use only.
913  *
914  * Return: 0 on success or a negative error code on failure.
915  */
xe_gt_sriov_vf_query_runtime(struct xe_gt * gt)916 int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt)
917 {
918 	int err;
919 
920 	err = vf_query_runtime_info(gt);
921 	if (unlikely(err))
922 		goto failed;
923 
924 	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG))
925 		vf_show_runtime_info(gt);
926 
927 	return 0;
928 
929 failed:
930 	xe_gt_sriov_err(gt, "Failed to get runtime info (%pe)\n",
931 			ERR_PTR(err));
932 	return err;
933 }
934 
vf_runtime_reg_cmp(const void * a,const void * b)935 static int vf_runtime_reg_cmp(const void *a, const void *b)
936 {
937 	const struct vf_runtime_reg *ra = a;
938 	const struct vf_runtime_reg *rb = b;
939 
940 	return (int)ra->offset - (int)rb->offset;
941 }
942 
vf_lookup_reg(struct xe_gt * gt,u32 addr)943 static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr)
944 {
945 	struct xe_gt_sriov_vf_runtime *runtime = &gt->sriov.vf.runtime;
946 	struct vf_runtime_reg key = { .offset = addr };
947 
948 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
949 
950 	return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key),
951 		       vf_runtime_reg_cmp);
952 }
953 
954 /**
955  * xe_gt_sriov_vf_read32 - Get a register value from the runtime data.
956  * @gt: the &xe_gt
957  * @reg: the register to read
958  *
959  * This function is for VF use only.
960  * This function shall be called after VF has connected to PF.
961  * This function is dedicated for registers that VFs can't read directly.
962  *
963  * Return: register value obtained from the PF or 0 if not found.
964  */
xe_gt_sriov_vf_read32(struct xe_gt * gt,struct xe_reg reg)965 u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
966 {
967 	u32 addr = xe_mmio_adjusted_addr(&gt->mmio, reg.addr);
968 	struct vf_runtime_reg *rr;
969 
970 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
971 	xe_gt_assert(gt, !reg.vf);
972 
973 	if (reg.addr == GMD_ID.addr) {
974 		xe_gt_sriov_dbg_verbose(gt, "gmdid(%#x) = %#x\n",
975 					addr, gt->sriov.vf.runtime.gmdid);
976 		return gt->sriov.vf.runtime.gmdid;
977 	}
978 
979 	rr = vf_lookup_reg(gt, addr);
980 	if (!rr) {
981 		xe_gt_WARN(gt, IS_ENABLED(CONFIG_DRM_XE_DEBUG),
982 			   "VF is trying to read an inaccessible register %#x+%#x\n",
983 			   reg.addr, addr - reg.addr);
984 		return 0;
985 	}
986 
987 	xe_gt_sriov_dbg_verbose(gt, "runtime[%#x] = %#x\n", addr, rr->value);
988 	return rr->value;
989 }
990 
991 /**
992  * xe_gt_sriov_vf_write32 - Handle a write to an inaccessible register.
993  * @gt: the &xe_gt
994  * @reg: the register to write
995  * @val: value to write
996  *
997  * This function is for VF use only.
998  * Currently it will trigger a WARN if running on debug build.
999  */
xe_gt_sriov_vf_write32(struct xe_gt * gt,struct xe_reg reg,u32 val)1000 void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
1001 {
1002 	u32 addr = xe_mmio_adjusted_addr(&gt->mmio, reg.addr);
1003 
1004 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
1005 	xe_gt_assert(gt, !reg.vf);
1006 
1007 	/*
1008 	 * In the future, we may want to handle selected writes to inaccessible
1009 	 * registers in some custom way, but for now let's just log a warning
1010 	 * about such attempt, as likely we might be doing something wrong.
1011 	 */
1012 	xe_gt_WARN(gt, IS_ENABLED(CONFIG_DRM_XE_DEBUG),
1013 		   "VF is trying to write %#x to an inaccessible register %#x+%#x\n",
1014 		   val, reg.addr, addr - reg.addr);
1015 }
1016 
1017 /**
1018  * xe_gt_sriov_vf_print_config - Print VF self config.
1019  * @gt: the &xe_gt
1020  * @p: the &drm_printer
1021  *
1022  * This function is for VF use only.
1023  */
xe_gt_sriov_vf_print_config(struct xe_gt * gt,struct drm_printer * p)1024 void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p)
1025 {
1026 	struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
1027 	struct xe_device *xe = gt_to_xe(gt);
1028 	u64 lmem_size;
1029 	char buf[10];
1030 
1031 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
1032 
1033 	if (xe_gt_is_main_type(gt)) {
1034 		u64 ggtt_size = xe_tile_sriov_vf_ggtt(gt_to_tile(gt));
1035 		u64 ggtt_base = xe_tile_sriov_vf_ggtt_base(gt_to_tile(gt));
1036 
1037 		drm_printf(p, "GGTT range:\t%#llx-%#llx\n",
1038 			   ggtt_base, ggtt_base + ggtt_size - 1);
1039 		string_get_size(ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf));
1040 		drm_printf(p, "GGTT size:\t%llu (%s)\n", ggtt_size, buf);
1041 
1042 		if (IS_DGFX(xe)) {
1043 			lmem_size = xe_tile_sriov_vf_lmem(gt_to_tile(gt));
1044 			string_get_size(lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf));
1045 			drm_printf(p, "LMEM size:\t%llu (%s)\n", lmem_size, buf);
1046 		}
1047 	}
1048 
1049 	drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs);
1050 	drm_printf(p, "GuC doorbells:\t%u\n", config->num_dbs);
1051 }
1052 
1053 /**
1054  * xe_gt_sriov_vf_print_runtime - Print VF's runtime regs received from PF.
1055  * @gt: the &xe_gt
1056  * @p: the &drm_printer
1057  *
1058  * This function is for VF use only.
1059  */
xe_gt_sriov_vf_print_runtime(struct xe_gt * gt,struct drm_printer * p)1060 void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p)
1061 {
1062 	struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs;
1063 	unsigned int size = gt->sriov.vf.runtime.num_regs;
1064 
1065 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
1066 
1067 	for (; size--; vf_regs++)
1068 		drm_printf(p, "%#x = %#x\n", vf_regs->offset, vf_regs->value);
1069 }
1070 
1071 /**
1072  * xe_gt_sriov_vf_print_version - Print VF ABI versions.
1073  * @gt: the &xe_gt
1074  * @p: the &drm_printer
1075  *
1076  * This function is for VF use only.
1077  */
xe_gt_sriov_vf_print_version(struct xe_gt * gt,struct drm_printer * p)1078 void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
1079 {
1080 	struct xe_device *xe = gt_to_xe(gt);
1081 	struct xe_uc_fw_version *guc_version = &gt->sriov.vf.guc_version;
1082 	struct xe_uc_fw_version *wanted = &gt->sriov.vf.wanted_guc_version;
1083 	struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version;
1084 	struct xe_uc_fw_version ver;
1085 
1086 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
1087 
1088 	drm_printf(p, "GuC ABI:\n");
1089 
1090 	vf_minimum_guc_version(gt, &ver);
1091 	drm_printf(p, "\tbase:\t%u.%u.%u.*\n", ver.branch, ver.major, ver.minor);
1092 
1093 	drm_printf(p, "\twanted:\t%u.%u.%u.*\n",
1094 		   wanted->branch, wanted->major, wanted->minor);
1095 
1096 	drm_printf(p, "\thandshake:\t%u.%u.%u.%u\n",
1097 		   guc_version->branch, guc_version->major,
1098 		   guc_version->minor, guc_version->patch);
1099 
1100 	drm_printf(p, "PF ABI:\n");
1101 
1102 	drm_printf(p, "\tbase:\t%u.%u\n",
1103 		   GUC_RELAY_VERSION_BASE_MAJOR, GUC_RELAY_VERSION_BASE_MINOR);
1104 	drm_printf(p, "\twanted:\t%u.%u\n",
1105 		   GUC_RELAY_VERSION_LATEST_MAJOR, GUC_RELAY_VERSION_LATEST_MINOR);
1106 	drm_printf(p, "\thandshake:\t%u.%u\n",
1107 		   pf_version->major, pf_version->minor);
1108 }
1109 
vf_post_migration_shutdown(struct xe_gt * gt)1110 static bool vf_post_migration_shutdown(struct xe_gt *gt)
1111 {
1112 	struct xe_device *xe = gt_to_xe(gt);
1113 
1114 	/*
1115 	 * On platforms where CCS must be restored by the primary GT, the media
1116 	 * GT's VF post-migration recovery must run afterward. Detect this case
1117 	 * and re-queue the media GT's restore work item if necessary.
1118 	 */
1119 	if (xe->info.needs_shared_vf_gt_wq && xe_gt_is_media_type(gt)) {
1120 		struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt;
1121 
1122 		if (xe_gt_sriov_vf_recovery_pending(primary_gt))
1123 			return true;
1124 	}
1125 
1126 	spin_lock_irq(&gt->sriov.vf.migration.lock);
1127 	gt->sriov.vf.migration.recovery_queued = false;
1128 	spin_unlock_irq(&gt->sriov.vf.migration.lock);
1129 
1130 	xe_guc_ct_flush_and_stop(&gt->uc.guc.ct);
1131 	xe_guc_submit_pause(&gt->uc.guc);
1132 	xe_tlb_inval_reset(&gt->tlb_inval);
1133 
1134 	return false;
1135 }
1136 
post_migration_scratch_size(struct xe_device * xe)1137 static size_t post_migration_scratch_size(struct xe_device *xe)
1138 {
1139 	return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE);
1140 }
1141 
vf_post_migration_fixups(struct xe_gt * gt)1142 static int vf_post_migration_fixups(struct xe_gt *gt)
1143 {
1144 	void *buf = gt->sriov.vf.migration.scratch;
1145 	int err;
1146 
1147 	/* xe_gt_sriov_vf_query_config will fixup the GGTT addresses */
1148 	err = xe_gt_sriov_vf_query_config(gt);
1149 	if (err)
1150 		return err;
1151 
1152 	if (xe_gt_is_main_type(gt))
1153 		xe_sriov_vf_ccs_rebase(gt_to_xe(gt));
1154 
1155 	xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt);
1156 	err = xe_guc_contexts_hwsp_rebase(&gt->uc.guc, buf);
1157 	if (err)
1158 		return err;
1159 
1160 	return 0;
1161 }
1162 
vf_post_migration_rearm(struct xe_gt * gt)1163 static void vf_post_migration_rearm(struct xe_gt *gt)
1164 {
1165 	xe_guc_ct_restart(&gt->uc.guc.ct);
1166 	xe_guc_submit_unpause_prepare(&gt->uc.guc);
1167 }
1168 
vf_post_migration_kickstart(struct xe_gt * gt)1169 static void vf_post_migration_kickstart(struct xe_gt *gt)
1170 {
1171 	xe_guc_submit_unpause(&gt->uc.guc);
1172 }
1173 
vf_post_migration_abort(struct xe_gt * gt)1174 static void vf_post_migration_abort(struct xe_gt *gt)
1175 {
1176 	spin_lock_irq(&gt->sriov.vf.migration.lock);
1177 	WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false);
1178 	WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false);
1179 	spin_unlock_irq(&gt->sriov.vf.migration.lock);
1180 
1181 	wake_up_all(&gt->sriov.vf.migration.wq);
1182 
1183 	xe_guc_submit_pause_abort(&gt->uc.guc);
1184 }
1185 
vf_post_migration_notify_resfix_done(struct xe_gt * gt)1186 static int vf_post_migration_notify_resfix_done(struct xe_gt *gt)
1187 {
1188 	bool skip_resfix = false;
1189 
1190 	spin_lock_irq(&gt->sriov.vf.migration.lock);
1191 	if (gt->sriov.vf.migration.recovery_queued) {
1192 		skip_resfix = true;
1193 		xe_gt_sriov_dbg(gt, "another recovery imminent, resfix skipped\n");
1194 	} else {
1195 		WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false);
1196 	}
1197 	spin_unlock_irq(&gt->sriov.vf.migration.lock);
1198 
1199 	if (skip_resfix)
1200 		return -EAGAIN;
1201 
1202 	/*
1203 	 * Make sure interrupts on the new HW are properly set. The GuC IRQ
1204 	 * must be working at this point, since the recovery did started,
1205 	 * but the rest was not enabled using the procedure from spec.
1206 	 */
1207 	xe_irq_resume(gt_to_xe(gt));
1208 
1209 	return vf_notify_resfix_done(gt);
1210 }
1211 
vf_post_migration_recovery(struct xe_gt * gt)1212 static void vf_post_migration_recovery(struct xe_gt *gt)
1213 {
1214 	struct xe_device *xe = gt_to_xe(gt);
1215 	int err;
1216 	bool retry;
1217 
1218 	xe_gt_sriov_dbg(gt, "migration recovery in progress\n");
1219 
1220 	retry = vf_post_migration_shutdown(gt);
1221 	if (retry)
1222 		goto queue;
1223 
1224 	if (!xe_sriov_vf_migration_supported(xe)) {
1225 		xe_gt_sriov_err(gt, "migration is not supported\n");
1226 		err = -ENOTRECOVERABLE;
1227 		goto fail;
1228 	}
1229 
1230 	err = vf_post_migration_fixups(gt);
1231 	if (err)
1232 		goto fail;
1233 
1234 	vf_post_migration_rearm(gt);
1235 
1236 	err = vf_post_migration_notify_resfix_done(gt);
1237 	if (err && err != -EAGAIN)
1238 		goto fail;
1239 
1240 	vf_post_migration_kickstart(gt);
1241 
1242 	xe_gt_sriov_notice(gt, "migration recovery ended\n");
1243 	return;
1244 fail:
1245 	vf_post_migration_abort(gt);
1246 	xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err));
1247 	xe_device_declare_wedged(xe);
1248 	return;
1249 
1250 queue:
1251 	xe_gt_sriov_info(gt, "Re-queuing migration recovery\n");
1252 	queue_work(gt->ordered_wq, &gt->sriov.vf.migration.worker);
1253 }
1254 
migration_worker_func(struct work_struct * w)1255 static void migration_worker_func(struct work_struct *w)
1256 {
1257 	struct xe_gt *gt = container_of(w, struct xe_gt,
1258 					sriov.vf.migration.worker);
1259 
1260 	vf_post_migration_recovery(gt);
1261 }
1262 
vf_migration_fini(void * arg)1263 static void vf_migration_fini(void *arg)
1264 {
1265 	struct xe_gt *gt = arg;
1266 
1267 	spin_lock_irq(&gt->sriov.vf.migration.lock);
1268 	gt->sriov.vf.migration.recovery_teardown = true;
1269 	spin_unlock_irq(&gt->sriov.vf.migration.lock);
1270 
1271 	cancel_work_sync(&gt->sriov.vf.migration.worker);
1272 }
1273 
1274 /**
1275  * xe_gt_sriov_vf_init_early() - GT VF init early
1276  * @gt: the &xe_gt
1277  *
1278  * Return 0 on success, errno on failure
1279  */
xe_gt_sriov_vf_init_early(struct xe_gt * gt)1280 int xe_gt_sriov_vf_init_early(struct xe_gt *gt)
1281 {
1282 	void *buf;
1283 
1284 	if (!xe_sriov_vf_migration_supported(gt_to_xe(gt)))
1285 		return 0;
1286 
1287 	buf = drmm_kmalloc(&gt_to_xe(gt)->drm,
1288 			   post_migration_scratch_size(gt_to_xe(gt)),
1289 			   GFP_KERNEL);
1290 	if (!buf)
1291 		return -ENOMEM;
1292 
1293 	gt->sriov.vf.migration.scratch = buf;
1294 	spin_lock_init(&gt->sriov.vf.migration.lock);
1295 	INIT_WORK(&gt->sriov.vf.migration.worker, migration_worker_func);
1296 	init_waitqueue_head(&gt->sriov.vf.migration.wq);
1297 
1298 	return 0;
1299 }
1300 
1301 /**
1302  * xe_gt_sriov_vf_init() - GT VF init
1303  * @gt: the &xe_gt
1304  *
1305  * Return 0 on success, errno on failure
1306  */
xe_gt_sriov_vf_init(struct xe_gt * gt)1307 int xe_gt_sriov_vf_init(struct xe_gt *gt)
1308 {
1309 	if (!xe_sriov_vf_migration_supported(gt_to_xe(gt)))
1310 		return 0;
1311 
1312 	/*
1313 	 * We want to tear down the VF post-migration early during driver
1314 	 * unload; therefore, we add this finalization action later during
1315 	 * driver load.
1316 	 */
1317 	return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev,
1318 					vf_migration_fini, gt);
1319 }
1320 
1321 /**
1322  * xe_gt_sriov_vf_recovery_pending() - VF post migration recovery pending
1323  * @gt: the &xe_gt
1324  *
1325  * The return value of this function must be immediately visible upon vCPU
1326  * unhalt and must persist until RESFIX_DONE is issued. This guarantee is
1327  * currently implemented only for platforms that support memirq. If non-memirq
1328  * platforms begin to support VF migration, this function will need to be
1329  * updated accordingly.
1330  *
1331  * Return: True if VF post migration recovery is pending, False otherwise
1332  */
xe_gt_sriov_vf_recovery_pending(struct xe_gt * gt)1333 bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt)
1334 {
1335 	struct xe_memirq *memirq = &gt_to_tile(gt)->memirq;
1336 
1337 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
1338 
1339 	/* early detection until recovery starts */
1340 	if (xe_device_uses_memirq(gt_to_xe(gt)) &&
1341 	    xe_memirq_guc_sw_int_0_irq_pending(memirq, &gt->uc.guc))
1342 		return true;
1343 
1344 	return READ_ONCE(gt->sriov.vf.migration.recovery_inprogress);
1345 }
1346 
vf_valid_ggtt(struct xe_gt * gt)1347 static bool vf_valid_ggtt(struct xe_gt *gt)
1348 {
1349 	struct xe_memirq *memirq = &gt_to_tile(gt)->memirq;
1350 	bool irq_pending = xe_device_uses_memirq(gt_to_xe(gt)) &&
1351 		xe_memirq_guc_sw_int_0_irq_pending(memirq, &gt->uc.guc);
1352 
1353 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
1354 
1355 	if (irq_pending || READ_ONCE(gt->sriov.vf.migration.ggtt_need_fixes))
1356 		return false;
1357 
1358 	return true;
1359 }
1360 
1361 /**
1362  * xe_gt_sriov_vf_wait_valid_ggtt() - VF wait for valid GGTT addresses
1363  * @gt: the &xe_gt
1364  */
xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt * gt)1365 void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt)
1366 {
1367 	int ret;
1368 
1369 	if (!IS_SRIOV_VF(gt_to_xe(gt)) ||
1370 	    !xe_sriov_vf_migration_supported(gt_to_xe(gt)))
1371 		return;
1372 
1373 	ret = wait_event_interruptible_timeout(gt->sriov.vf.migration.wq,
1374 					       vf_valid_ggtt(gt),
1375 					       HZ * 5);
1376 	xe_gt_WARN_ON(gt, !ret);
1377 }
1378