1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2023-2024 Intel Corporation
4 */
5
6 #include <drm/drm_managed.h>
7
8 #include "xe_assert.h"
9 #include "xe_device.h"
10 #include "xe_gt_sriov_printk.h"
11 #include "xe_gt_sriov_vf.h"
12 #include "xe_pm.h"
13 #include "xe_sriov.h"
14 #include "xe_sriov_printk.h"
15 #include "xe_sriov_vf.h"
16
17 /**
18 * DOC: VF restore procedure in PF KMD and VF KMD
19 *
20 * Restoring previously saved state of a VF is one of core features of
21 * SR-IOV. All major VM Management applications allow saving and restoring
22 * the VM state, and doing that to a VM which uses SRIOV VF as one of
23 * the accessible devices requires support from KMD on both PF and VF side.
24 * VMM initiates all required operations through VFIO module, which then
25 * translates them into PF KMD calls. This description will focus on these
26 * calls, leaving out the module which initiates these steps (VFIO).
27 *
28 * In order to start the restore procedure, GuC needs to keep the VF in
29 * proper state. The PF driver can ensure GuC set it to VF_READY state
30 * by provisioning the VF, which in turn can be done after Function Level
31 * Reset of said VF (or after it was freshly created - in that case FLR
32 * is not needed). The FLR procedure ends with GuC sending message
33 * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC.
34 * After the provisioning is completed, the VF needs to be paused, and
35 * at that point the actual restore can begin.
36 *
37 * During VF Restore, state of several resources is restored. These may
38 * include local memory content (system memory is restored by VMM itself),
39 * values of MMIO registers, stateless compression metadata and others.
40 * The final resource which also needs restoring is state of the VF
41 * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE`
42 * message is used, with reference to the state blob to be consumed by
43 * GuC.
44 *
45 * Next, when VFIO is asked to set the VM into running state, the PF driver
46 * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this
47 * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the
48 * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform
49 * the VF KMD within the VM that it was migrated.
50 *
51 * As soon as Virtual GPU of the VM starts, the VF driver within receives
52 * the MIGRATED interrupt and schedules post-migration recovery worker.
53 * That worker queries GuC for new provisioning (using MMIO communication),
54 * and applies fixups to any non-virtualized resources used by the VF.
55 *
56 * When the VF driver is ready to continue operation on the newly connected
57 * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to
58 * enter the long awaited `VF_RUNNING` state, and therefore start handling
59 * CTB messages and scheduling workloads from the VF::
60 *
61 * PF GuC VF
62 * [ ] | |
63 * [ ] PF2GUC_VF_CONTROL(pause) | |
64 * [ ]---------------------------> [ ] |
65 * [ ] [ ] GuC sets new VF state to |
66 * [ ] [ ]------- VF_READY_PAUSED |
67 * [ ] [ ] | |
68 * [ ] [ ] <----- |
69 * [ ] success [ ] |
70 * [ ] <---------------------------[ ] |
71 * [ ] | |
72 * [ ] PF loads resources from the | |
73 * [ ]------- saved image supplied | |
74 * [ ] | | |
75 * [ ] <----- | |
76 * [ ] | |
77 * [ ] GUC_PF_OPCODE_VF_RESTORE | |
78 * [ ]---------------------------> [ ] |
79 * [ ] [ ] GuC loads contexts and CTB |
80 * [ ] [ ]------- state from image |
81 * [ ] [ ] | |
82 * [ ] [ ] <----- |
83 * [ ] [ ] |
84 * [ ] [ ] GuC sets new VF state to |
85 * [ ] [ ]------- VF_RESFIX_PAUSED |
86 * [ ] [ ] | |
87 * [ ] success [ ] <----- |
88 * [ ] <---------------------------[ ] |
89 * [ ] | |
90 * [ ] GUC_PF_TRIGGER_VF_RESUME | |
91 * [ ]---------------------------> [ ] |
92 * [ ] [ ] GuC sets new VF state to |
93 * [ ] [ ]------- VF_RESFIX_BLOCKED |
94 * [ ] [ ] | |
95 * [ ] [ ] <----- |
96 * [ ] [ ] |
97 * [ ] [ ] GUC_INTR_SW_INT_0 |
98 * [ ] success [ ]---------------------------> [ ]
99 * [ ] <---------------------------[ ] [ ]
100 * | | VF2GUC_QUERY_SINGLE_KLV [ ]
101 * | [ ] <---------------------------[ ]
102 * | [ ] [ ]
103 * | [ ] new VF provisioning [ ]
104 * | [ ]---------------------------> [ ]
105 * | | [ ]
106 * | | VF driver applies post [ ]
107 * | | migration fixups -------[ ]
108 * | | | [ ]
109 * | | -----> [ ]
110 * | | [ ]
111 * | | VF2GUC_NOTIFY_RESFIX_DONE [ ]
112 * | [ ] <---------------------------[ ]
113 * | [ ] [ ]
114 * | [ ] GuC sets new VF state to [ ]
115 * | [ ]------- VF_RUNNING [ ]
116 * | [ ] | [ ]
117 * | [ ] <----- [ ]
118 * | [ ] success [ ]
119 * | [ ]---------------------------> [ ]
120 * | | |
121 * | | |
122 */
123
124 static void migration_worker_func(struct work_struct *w);
125
126 /**
127 * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
128 * @xe: the &xe_device to initialize
129 */
xe_sriov_vf_init_early(struct xe_device * xe)130 void xe_sriov_vf_init_early(struct xe_device *xe)
131 {
132 INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
133 }
134
135 /**
136 * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning.
137 * @xe: the &xe_device struct instance
138 *
139 * After migration, we need to re-query all VF configuration to make sure
140 * they match previous provisioning. Note that most of VF provisioning
141 * shall be the same, except GGTT range, since GGTT is not virtualized per-VF.
142 *
143 * Returns: 0 if the operation completed successfully, or a negative error
144 * code otherwise.
145 */
vf_post_migration_requery_guc(struct xe_device * xe)146 static int vf_post_migration_requery_guc(struct xe_device *xe)
147 {
148 struct xe_gt *gt;
149 unsigned int id;
150 int err, ret = 0;
151
152 for_each_gt(gt, xe, id) {
153 err = xe_gt_sriov_vf_query_config(gt);
154 ret = ret ?: err;
155 }
156
157 return ret;
158 }
159
160 /*
161 * vf_post_migration_imminent - Check if post-restore recovery is coming.
162 * @xe: the &xe_device struct instance
163 *
164 * Return: True if migration recovery worker will soon be running. Any worker currently
165 * executing does not affect the result.
166 */
vf_post_migration_imminent(struct xe_device * xe)167 static bool vf_post_migration_imminent(struct xe_device *xe)
168 {
169 return xe->sriov.vf.migration.gt_flags != 0 ||
170 work_pending(&xe->sriov.vf.migration.worker);
171 }
172
173 /*
174 * Notify all GuCs about resource fixups apply finished.
175 */
vf_post_migration_notify_resfix_done(struct xe_device * xe)176 static void vf_post_migration_notify_resfix_done(struct xe_device *xe)
177 {
178 struct xe_gt *gt;
179 unsigned int id;
180
181 for_each_gt(gt, xe, id) {
182 if (vf_post_migration_imminent(xe))
183 goto skip;
184 xe_gt_sriov_vf_notify_resfix_done(gt);
185 }
186 return;
187
188 skip:
189 drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n");
190 }
191
vf_post_migration_recovery(struct xe_device * xe)192 static void vf_post_migration_recovery(struct xe_device *xe)
193 {
194 int err;
195
196 drm_dbg(&xe->drm, "migration recovery in progress\n");
197 xe_pm_runtime_get(xe);
198 err = vf_post_migration_requery_guc(xe);
199 if (vf_post_migration_imminent(xe))
200 goto defer;
201 if (unlikely(err))
202 goto fail;
203
204 /* FIXME: add the recovery steps */
205 vf_post_migration_notify_resfix_done(xe);
206 xe_pm_runtime_put(xe);
207 drm_notice(&xe->drm, "migration recovery ended\n");
208 return;
209 defer:
210 xe_pm_runtime_put(xe);
211 drm_dbg(&xe->drm, "migration recovery deferred\n");
212 return;
213 fail:
214 xe_pm_runtime_put(xe);
215 drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err));
216 xe_device_declare_wedged(xe);
217 }
218
migration_worker_func(struct work_struct * w)219 static void migration_worker_func(struct work_struct *w)
220 {
221 struct xe_device *xe = container_of(w, struct xe_device,
222 sriov.vf.migration.worker);
223
224 vf_post_migration_recovery(xe);
225 }
226
vf_ready_to_recovery_on_all_gts(struct xe_device * xe)227 static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe)
228 {
229 struct xe_gt *gt;
230 unsigned int id;
231
232 for_each_gt(gt, xe, id) {
233 if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) {
234 xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n");
235 return false;
236 }
237 }
238 return true;
239 }
240
241 /**
242 * xe_sriov_vf_start_migration_recovery - Start VF migration recovery.
243 * @xe: the &xe_device to start recovery on
244 *
245 * This function shall be called only by VF.
246 */
xe_sriov_vf_start_migration_recovery(struct xe_device * xe)247 void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
248 {
249 bool started;
250
251 xe_assert(xe, IS_SRIOV_VF(xe));
252
253 if (!vf_ready_to_recovery_on_all_gts(xe))
254 return;
255
256 WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0);
257 /* Ensure other threads see that no flags are set now. */
258 smp_mb();
259
260 started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker);
261 drm_info(&xe->drm, "VF migration recovery %s\n", started ?
262 "scheduled" : "already in progress");
263 }
264