xref: /linux/drivers/gpu/drm/xe/xe_sriov_vf.c (revision 68a052239fc4b351e961f698b824f7654a346091)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023-2024 Intel Corporation
4  */
5 
6 #include <drm/drm_debugfs.h>
7 #include <drm/drm_managed.h>
8 
9 #include "xe_assert.h"
10 #include "xe_device.h"
11 #include "xe_gt.h"
12 #include "xe_gt_sriov_printk.h"
13 #include "xe_gt_sriov_vf.h"
14 #include "xe_guc.h"
15 #include "xe_guc_ct.h"
16 #include "xe_guc_submit.h"
17 #include "xe_irq.h"
18 #include "xe_lrc.h"
19 #include "xe_pm.h"
20 #include "xe_sriov.h"
21 #include "xe_sriov_printk.h"
22 #include "xe_sriov_vf.h"
23 #include "xe_sriov_vf_ccs.h"
24 #include "xe_tile_sriov_vf.h"
25 
26 /**
27  * DOC: VF restore procedure in PF KMD and VF KMD
28  *
29  * Restoring previously saved state of a VF is one of core features of
30  * SR-IOV. All major VM Management applications allow saving and restoring
31  * the VM state, and doing that to a VM which uses SRIOV VF as one of
32  * the accessible devices requires support from KMD on both PF and VF side.
33  * VMM initiates all required operations through VFIO module, which then
34  * translates them into PF KMD calls. This description will focus on these
35  * calls, leaving out the module which initiates these steps (VFIO).
36  *
37  * In order to start the restore procedure, GuC needs to keep the VF in
38  * proper state. The PF driver can ensure GuC set it to VF_READY state
39  * by provisioning the VF, which in turn can be done after Function Level
40  * Reset of said VF (or after it was freshly created - in that case FLR
41  * is not needed). The FLR procedure ends with GuC sending message
42  * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC.
43  * After the provisioning is completed, the VF needs to be paused, and
44  * at that point the actual restore can begin.
45  *
46  * During VF Restore, state of several resources is restored. These may
47  * include local memory content (system memory is restored by VMM itself),
48  * values of MMIO registers, stateless compression metadata and others.
49  * The final resource which also needs restoring is state of the VF
50  * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE`
51  * message is used, with reference to the state blob to be consumed by
52  * GuC.
53  *
54  * Next, when VFIO is asked to set the VM into running state, the PF driver
55  * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this
56  * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the
57  * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform
58  * the VF KMD within the VM that it was migrated.
59  *
60  * As soon as Virtual GPU of the VM starts, the VF driver within receives
61  * the MIGRATED interrupt and schedules post-migration recovery worker.
62  * That worker queries GuC for new provisioning (using MMIO communication),
63  * and applies fixups to any non-virtualized resources used by the VF.
64  *
65  * When the VF driver is ready to continue operation on the newly connected
66  * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to
67  * enter the long awaited `VF_RUNNING` state, and therefore start handling
68  * CTB messages and scheduling workloads from the VF::
69  *
70  *      PF                             GuC                              VF
71  *     [ ]                              |                               |
72  *     [ ] PF2GUC_VF_CONTROL(pause)     |                               |
73  *     [ ]---------------------------> [ ]                              |
74  *     [ ]                             [ ]  GuC sets new VF state to    |
75  *     [ ]                             [ ]------- VF_READY_PAUSED       |
76  *     [ ]                             [ ]      |                       |
77  *     [ ]                             [ ] <-----                       |
78  *     [ ] success                     [ ]                              |
79  *     [ ] <---------------------------[ ]                              |
80  *     [ ]                              |                               |
81  *     [ ] PF loads resources from the  |                               |
82  *     [ ]------- saved image supplied  |                               |
83  *     [ ]      |                       |                               |
84  *     [ ] <-----                       |                               |
85  *     [ ]                              |                               |
86  *     [ ] GUC_PF_OPCODE_VF_RESTORE     |                               |
87  *     [ ]---------------------------> [ ]                              |
88  *     [ ]                             [ ]  GuC loads contexts and CTB  |
89  *     [ ]                             [ ]------- state from image      |
90  *     [ ]                             [ ]      |                       |
91  *     [ ]                             [ ] <-----                       |
92  *     [ ]                             [ ]                              |
93  *     [ ]                             [ ]  GuC sets new VF state to    |
94  *     [ ]                             [ ]------- VF_RESFIX_PAUSED      |
95  *     [ ]                             [ ]      |                       |
96  *     [ ] success                     [ ] <-----                       |
97  *     [ ] <---------------------------[ ]                              |
98  *     [ ]                              |                               |
99  *     [ ] GUC_PF_TRIGGER_VF_RESUME     |                               |
100  *     [ ]---------------------------> [ ]                              |
101  *     [ ]                             [ ]  GuC sets new VF state to    |
102  *     [ ]                             [ ]------- VF_RESFIX_BLOCKED     |
103  *     [ ]                             [ ]      |                       |
104  *     [ ]                             [ ] <-----                       |
105  *     [ ]                             [ ]                              |
106  *     [ ]                             [ ] GUC_INTR_SW_INT_0            |
107  *     [ ] success                     [ ]---------------------------> [ ]
108  *     [ ] <---------------------------[ ]                             [ ]
109  *      |                               |      VF2GUC_QUERY_SINGLE_KLV [ ]
110  *      |                              [ ] <---------------------------[ ]
111  *      |                              [ ]                             [ ]
112  *      |                              [ ]        new VF provisioning  [ ]
113  *      |                              [ ]---------------------------> [ ]
114  *      |                               |                              [ ]
115  *      |                               |       VF driver applies post [ ]
116  *      |                               |      migration fixups -------[ ]
117  *      |                               |                       |      [ ]
118  *      |                               |                       -----> [ ]
119  *      |                               |                              [ ]
120  *      |                               |    VF2GUC_NOTIFY_RESFIX_DONE [ ]
121  *      |                              [ ] <---------------------------[ ]
122  *      |                              [ ]                             [ ]
123  *      |                              [ ]  GuC sets new VF state to   [ ]
124  *      |                              [ ]------- VF_RUNNING           [ ]
125  *      |                              [ ]      |                      [ ]
126  *      |                              [ ] <-----                      [ ]
127  *      |                              [ ]                     success [ ]
128  *      |                              [ ]---------------------------> [ ]
129  *      |                               |                               |
130  *      |                               |                               |
131  */
132 
133 /**
134  * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is
135  * supported or not.
136  * @xe: the &xe_device to check
137  *
138  * Returns: true if VF migration is supported, false otherwise.
139  */
140 bool xe_sriov_vf_migration_supported(struct xe_device *xe)
141 {
142 	xe_assert(xe, IS_SRIOV_VF(xe));
143 	return xe->sriov.vf.migration.enabled;
144 }
145 
146 static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...)
147 {
148 	struct va_format vaf;
149 	va_list va_args;
150 
151 	xe_assert(xe, IS_SRIOV_VF(xe));
152 
153 	va_start(va_args, fmt);
154 	vaf.fmt = fmt;
155 	vaf.va  = &va_args;
156 	xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf);
157 	va_end(va_args);
158 
159 	xe->sriov.vf.migration.enabled = false;
160 }
161 
162 static void migration_worker_func(struct work_struct *w);
163 
164 static void vf_migration_init_early(struct xe_device *xe)
165 {
166 	/*
167 	 * TODO: Add conditions to allow specific platforms, when they're
168 	 * supported at production quality.
169 	 */
170 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
171 		return vf_disable_migration(xe,
172 					    "experimental feature not available on production builds");
173 
174 	if (GRAPHICS_VER(xe) < 20)
175 		return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found",
176 					    GRAPHICS_VER(xe));
177 
178 	if (!IS_DGFX(xe)) {
179 		struct xe_uc_fw_version guc_version;
180 
181 		xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version);
182 		if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0))
183 			return vf_disable_migration(xe,
184 						    "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
185 						    guc_version.major, guc_version.minor);
186 	}
187 
188 	INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
189 
190 	xe->sriov.vf.migration.enabled = true;
191 	xe_sriov_dbg(xe, "migration support enabled\n");
192 }
193 
194 /**
195  * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
196  * @xe: the &xe_device to initialize
197  */
198 void xe_sriov_vf_init_early(struct xe_device *xe)
199 {
200 	vf_migration_init_early(xe);
201 }
202 
203 /**
204  * vf_post_migration_shutdown - Stop the driver activities after VF migration.
205  * @xe: the &xe_device struct instance
206  *
207  * After this VM is migrated and assigned to a new VF, it is running on a new
208  * hardware, and therefore many hardware-dependent states and related structures
209  * require fixups. Without fixups, the hardware cannot do any work, and therefore
210  * all GPU pipelines are stalled.
211  * Stop some of kernel activities to make the fixup process faster.
212  */
213 static void vf_post_migration_shutdown(struct xe_device *xe)
214 {
215 	struct xe_gt *gt;
216 	unsigned int id;
217 	int ret = 0;
218 
219 	for_each_gt(gt, xe, id) {
220 		xe_guc_submit_pause(&gt->uc.guc);
221 		ret |= xe_guc_submit_reset_block(&gt->uc.guc);
222 	}
223 
224 	if (ret)
225 		drm_info(&xe->drm, "migration recovery encountered ongoing reset\n");
226 }
227 
228 /**
229  * vf_post_migration_kickstart - Re-start the driver activities under new hardware.
230  * @xe: the &xe_device struct instance
231  *
232  * After we have finished with all post-migration fixups, restart the driver
233  * activities to continue feeding the GPU with workloads.
234  */
235 static void vf_post_migration_kickstart(struct xe_device *xe)
236 {
237 	struct xe_gt *gt;
238 	unsigned int id;
239 
240 	/*
241 	 * Make sure interrupts on the new HW are properly set. The GuC IRQ
242 	 * must be working at this point, since the recovery did started,
243 	 * but the rest was not enabled using the procedure from spec.
244 	 */
245 	xe_irq_resume(xe);
246 
247 	for_each_gt(gt, xe, id) {
248 		xe_guc_submit_reset_unblock(&gt->uc.guc);
249 		xe_guc_submit_unpause(&gt->uc.guc);
250 	}
251 }
252 
253 static bool gt_vf_post_migration_needed(struct xe_gt *gt)
254 {
255 	return test_bit(gt->info.id, &gt_to_xe(gt)->sriov.vf.migration.gt_flags);
256 }
257 
258 /*
259  * Notify GuCs marked in flags about resource fixups apply finished.
260  * @xe: the &xe_device struct instance
261  * @gt_flags: flags marking to which GTs the notification shall be sent
262  */
263 static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags)
264 {
265 	struct xe_gt *gt;
266 	unsigned int id;
267 	int err = 0;
268 
269 	for_each_gt(gt, xe, id) {
270 		if (!test_bit(id, &gt_flags))
271 			continue;
272 		/* skip asking GuC for RESFIX exit if new recovery request arrived */
273 		if (gt_vf_post_migration_needed(gt))
274 			continue;
275 		err = xe_gt_sriov_vf_notify_resfix_done(gt);
276 		if (err)
277 			break;
278 		clear_bit(id, &gt_flags);
279 	}
280 
281 	if (gt_flags && !err)
282 		drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n");
283 	return err;
284 }
285 
286 static int vf_get_next_migrated_gt_id(struct xe_device *xe)
287 {
288 	struct xe_gt *gt;
289 	unsigned int id;
290 
291 	for_each_gt(gt, xe, id) {
292 		if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags))
293 			return id;
294 	}
295 	return -1;
296 }
297 
298 static size_t post_migration_scratch_size(struct xe_device *xe)
299 {
300 	return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE);
301 }
302 
303 /**
304  * Perform post-migration fixups on a single GT.
305  *
306  * After migration, GuC needs to be re-queried for VF configuration to check
307  * if it matches previous provisioning. Most of VF provisioning shall be the
308  * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT
309  * range has changed, we have to perform fixups - shift all GGTT references
310  * used anywhere within the driver. After the fixups in this function succeed,
311  * it is allowed to ask the GuC bound to this GT to continue normal operation.
312  *
313  * Returns: 0 if the operation completed successfully, or a negative error
314  * code otherwise.
315  */
316 static int gt_vf_post_migration_fixups(struct xe_gt *gt)
317 {
318 	s64 shift;
319 	void *buf;
320 	int err;
321 
322 	buf = kmalloc(post_migration_scratch_size(gt_to_xe(gt)), GFP_KERNEL);
323 	if (!buf)
324 		return -ENOMEM;
325 
326 	err = xe_gt_sriov_vf_query_config(gt);
327 	if (err)
328 		goto out;
329 
330 	shift = xe_gt_sriov_vf_ggtt_shift(gt);
331 	if (shift) {
332 		xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift);
333 		xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt);
334 		err = xe_guc_contexts_hwsp_rebase(&gt->uc.guc, buf);
335 		if (err)
336 			goto out;
337 		xe_guc_jobs_ring_rebase(&gt->uc.guc);
338 		xe_guc_ct_fixup_messages_with_ggtt(&gt->uc.guc.ct, shift);
339 	}
340 
341 out:
342 	kfree(buf);
343 	return err;
344 }
345 
346 static void vf_post_migration_recovery(struct xe_device *xe)
347 {
348 	unsigned long fixed_gts = 0;
349 	int id, err;
350 
351 	drm_dbg(&xe->drm, "migration recovery in progress\n");
352 	xe_pm_runtime_get(xe);
353 	vf_post_migration_shutdown(xe);
354 
355 	if (!xe_sriov_vf_migration_supported(xe)) {
356 		xe_sriov_err(xe, "migration is not supported\n");
357 		err = -ENOTRECOVERABLE;
358 		goto fail;
359 	}
360 
361 	while (id = vf_get_next_migrated_gt_id(xe), id >= 0) {
362 		struct xe_gt *gt = xe_device_get_gt(xe, id);
363 
364 		err = gt_vf_post_migration_fixups(gt);
365 		if (err)
366 			goto fail;
367 
368 		set_bit(id, &fixed_gts);
369 	}
370 
371 	vf_post_migration_kickstart(xe);
372 	err = vf_post_migration_notify_resfix_done(xe, fixed_gts);
373 	if (err)
374 		goto fail;
375 
376 	xe_pm_runtime_put(xe);
377 	drm_notice(&xe->drm, "migration recovery ended\n");
378 	return;
379 fail:
380 	xe_pm_runtime_put(xe);
381 	drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err));
382 	xe_device_declare_wedged(xe);
383 }
384 
385 static void migration_worker_func(struct work_struct *w)
386 {
387 	struct xe_device *xe = container_of(w, struct xe_device,
388 					    sriov.vf.migration.worker);
389 
390 	vf_post_migration_recovery(xe);
391 }
392 
393 /*
394  * Check if post-restore recovery is coming on any of GTs.
395  * @xe: the &xe_device struct instance
396  *
397  * Return: True if migration recovery worker will soon be running. Any worker currently
398  * executing does not affect the result.
399  */
400 static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe)
401 {
402 	struct xe_gt *gt;
403 	unsigned int id;
404 
405 	for_each_gt(gt, xe, id) {
406 		if (test_bit(id, &xe->sriov.vf.migration.gt_flags))
407 			return true;
408 	}
409 	return false;
410 }
411 
412 /**
413  * xe_sriov_vf_start_migration_recovery - Start VF migration recovery.
414  * @xe: the &xe_device to start recovery on
415  *
416  * This function shall be called only by VF.
417  */
418 void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
419 {
420 	bool started;
421 
422 	xe_assert(xe, IS_SRIOV_VF(xe));
423 
424 	if (!vf_ready_to_recovery_on_any_gts(xe))
425 		return;
426 
427 	started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker);
428 	drm_info(&xe->drm, "VF migration recovery %s\n", started ?
429 		 "scheduled" : "already in progress");
430 }
431 
432 /**
433  * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions.
434  * @xe: the &xe_device to initialize
435  *
436  * This function initializes code for CCS migration.
437  *
438  * Return: 0 on success or a negative error code on failure.
439  */
440 int xe_sriov_vf_init_late(struct xe_device *xe)
441 {
442 	int err = 0;
443 
444 	if (xe_sriov_vf_migration_supported(xe))
445 		err = xe_sriov_vf_ccs_init(xe);
446 
447 	return err;
448 }
449 
450 static int sa_info_vf_ccs(struct seq_file *m, void *data)
451 {
452 	struct drm_info_node *node = m->private;
453 	struct xe_device *xe = to_xe_device(node->minor->dev);
454 	struct drm_printer p = drm_seq_file_printer(m);
455 
456 	xe_sriov_vf_ccs_print(xe, &p);
457 	return 0;
458 }
459 
460 static const struct drm_info_list debugfs_list[] = {
461 	{ .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs },
462 };
463 
464 /**
465  * xe_sriov_vf_debugfs_register - Register VF debugfs attributes.
466  * @xe: the &xe_device
467  * @root: the root &dentry
468  *
469  * Prepare debugfs attributes exposed by the VF.
470  */
471 void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root)
472 {
473 	drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list),
474 				 root, xe->drm.primary);
475 }
476