1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2023-2024 Intel Corporation
4 */
5
6 #include <drm/drm_managed.h>
7
8 #include "abi/guc_actions_sriov_abi.h"
9
10 #include "xe_device.h"
11 #include "xe_gt.h"
12 #include "xe_gt_sriov_pf.h"
13 #include "xe_gt_sriov_pf_config.h"
14 #include "xe_gt_sriov_pf_control.h"
15 #include "xe_gt_sriov_pf_helpers.h"
16 #include "xe_gt_sriov_pf_migration.h"
17 #include "xe_gt_sriov_pf_monitor.h"
18 #include "xe_gt_sriov_pf_service.h"
19 #include "xe_gt_sriov_printk.h"
20 #include "xe_guc_ct.h"
21 #include "xe_sriov.h"
22
control_cmd_to_string(u32 cmd)23 static const char *control_cmd_to_string(u32 cmd)
24 {
25 switch (cmd) {
26 case GUC_PF_TRIGGER_VF_PAUSE:
27 return "PAUSE";
28 case GUC_PF_TRIGGER_VF_RESUME:
29 return "RESUME";
30 case GUC_PF_TRIGGER_VF_STOP:
31 return "STOP";
32 case GUC_PF_TRIGGER_VF_FLR_START:
33 return "FLR_START";
34 case GUC_PF_TRIGGER_VF_FLR_FINISH:
35 return "FLR_FINISH";
36 default:
37 return "<unknown>";
38 }
39 }
40
guc_action_vf_control_cmd(struct xe_guc * guc,u32 vfid,u32 cmd)41 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
42 {
43 u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
44 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
45 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
46 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
47 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
48 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
49 };
50 int ret;
51
52 ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
53 return ret > 0 ? -EPROTO : ret;
54 }
55
pf_send_vf_control_cmd(struct xe_gt * gt,unsigned int vfid,u32 cmd)56 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
57 {
58 int err;
59
60 xe_gt_assert(gt, vfid != PFID);
61 xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n",
62 vfid, control_cmd_to_string(cmd));
63
64 err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd);
65 if (unlikely(err))
66 xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
67 vfid, control_cmd_to_string(cmd), ERR_PTR(err));
68 return err;
69 }
70
pf_send_vf_pause(struct xe_gt * gt,unsigned int vfid)71 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
72 {
73 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
74 }
75
pf_send_vf_resume(struct xe_gt * gt,unsigned int vfid)76 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
77 {
78 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
79 }
80
pf_send_vf_stop(struct xe_gt * gt,unsigned int vfid)81 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
82 {
83 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
84 }
85
pf_send_vf_flr_start(struct xe_gt * gt,unsigned int vfid)86 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
87 {
88 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
89 }
90
pf_send_vf_flr_finish(struct xe_gt * gt,unsigned int vfid)91 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
92 {
93 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
94 }
95
96 /**
97 * DOC: The VF state machine
98 *
99 * The simplified VF state machine could be presented as::
100 *
101 * pause--------------------------o
102 * / |
103 * / v
104 * (READY)<------------------resume-----(PAUSED)
105 * ^ \ / /
106 * | \ / /
107 * | stop---->(STOPPED)<----stop /
108 * | / /
109 * | / /
110 * o--------<-----flr /
111 * \ /
112 * o------<--------------------flr
113 *
114 * Where:
115 *
116 * * READY - represents a state in which VF is fully operable
117 * * PAUSED - represents a state in which VF activity is temporarily suspended
118 * * STOPPED - represents a state in which VF activity is definitely halted
119 * * pause - represents a request to temporarily suspend VF activity
120 * * resume - represents a request to resume VF activity
121 * * stop - represents a request to definitely halt VF activity
122 * * flr - represents a request to perform VF FLR to restore VF activity
123 *
124 * However, each state transition requires additional steps that involves
125 * communication with GuC that might fail or be interrupted by other requests::
126 *
127 * .................................WIP....
128 * : :
129 * pause--------------------->PAUSE_WIP----------------------------o
130 * / : / \ : |
131 * / : o----<---stop flr--o : |
132 * / : | \ / | : V
133 * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED)
134 * ^ \ \ : | | : / /
135 * | \ \ : | | : / /
136 * | \ \ : | | : / /
137 * | \ \ : o----<----------------------+--<-------stop /
138 * | \ \ : | | : /
139 * | \ \ : V | : /
140 * | \ stop----->STOP_WIP---------flr--->-----o : /
141 * | \ : | | : /
142 * | \ : | V : /
143 * | flr--------+----->----------------->FLR_WIP<-----flr
144 * | : | / ^ :
145 * | : | / | :
146 * o--------<-------:----+-----<----------------o | :
147 * : | | :
148 * :....|...........................|.....:
149 * | |
150 * V |
151 * (STOPPED)--------------------flr
152 *
153 * For details about each internal WIP state machine see:
154 *
155 * * `The VF PAUSE state machine`_
156 * * `The VF RESUME state machine`_
157 * * `The VF STOP state machine`_
158 * * `The VF FLR state machine`_
159 */
160
161 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV
control_bit_to_string(enum xe_gt_sriov_control_bits bit)162 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
163 {
164 switch (bit) {
165 #define CASE2STR(_X) \
166 case XE_GT_SRIOV_STATE_##_X: return #_X
167 CASE2STR(WIP);
168 CASE2STR(FLR_WIP);
169 CASE2STR(FLR_SEND_START);
170 CASE2STR(FLR_WAIT_GUC);
171 CASE2STR(FLR_GUC_DONE);
172 CASE2STR(FLR_RESET_CONFIG);
173 CASE2STR(FLR_RESET_DATA);
174 CASE2STR(FLR_RESET_MMIO);
175 CASE2STR(FLR_SEND_FINISH);
176 CASE2STR(FLR_FAILED);
177 CASE2STR(PAUSE_WIP);
178 CASE2STR(PAUSE_SEND_PAUSE);
179 CASE2STR(PAUSE_WAIT_GUC);
180 CASE2STR(PAUSE_GUC_DONE);
181 CASE2STR(PAUSE_SAVE_GUC);
182 CASE2STR(PAUSE_FAILED);
183 CASE2STR(PAUSED);
184 CASE2STR(RESUME_WIP);
185 CASE2STR(RESUME_SEND_RESUME);
186 CASE2STR(RESUME_FAILED);
187 CASE2STR(RESUMED);
188 CASE2STR(STOP_WIP);
189 CASE2STR(STOP_SEND_STOP);
190 CASE2STR(STOP_FAILED);
191 CASE2STR(STOPPED);
192 CASE2STR(MISMATCH);
193 #undef CASE2STR
194 default: return "?";
195 }
196 }
197 #endif
198
pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)199 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
200 {
201 switch (bit) {
202 case XE_GT_SRIOV_STATE_FLR_WAIT_GUC:
203 case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC:
204 return HZ / 2;
205 case XE_GT_SRIOV_STATE_FLR_WIP:
206 case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
207 return 5 * HZ;
208 default:
209 return HZ;
210 }
211 }
212
pf_pick_vf_control(struct xe_gt * gt,unsigned int vfid)213 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid)
214 {
215 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
216 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
217
218 return >->sriov.pf.vfs[vfid].control;
219 }
220
pf_peek_vf_state(struct xe_gt * gt,unsigned int vfid)221 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid)
222 {
223 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
224
225 return &cs->state;
226 }
227
pf_check_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)228 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid,
229 enum xe_gt_sriov_control_bits bit)
230 {
231 return test_bit(bit, pf_peek_vf_state(gt, vfid));
232 }
233
pf_dump_vf_state(struct xe_gt * gt,unsigned int vfid)234 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid)
235 {
236 unsigned long state = *pf_peek_vf_state(gt, vfid);
237 enum xe_gt_sriov_control_bits bit;
238
239 if (state) {
240 xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n",
241 vfid, state, state ? " bits " : "",
242 (int)BITS_PER_LONG, &state);
243 for_each_set_bit(bit, &state, BITS_PER_LONG)
244 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n",
245 vfid, control_bit_to_string(bit), bit);
246 } else {
247 xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid);
248 }
249 }
250
pf_expect_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)251 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid,
252 enum xe_gt_sriov_control_bits bit)
253 {
254 bool result = pf_check_vf_state(gt, vfid, bit);
255
256 if (unlikely(!result))
257 pf_dump_vf_state(gt, vfid);
258
259 return result;
260 }
261
pf_expect_vf_not_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)262 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid,
263 enum xe_gt_sriov_control_bits bit)
264 {
265 bool result = !pf_check_vf_state(gt, vfid, bit);
266
267 if (unlikely(!result))
268 pf_dump_vf_state(gt, vfid);
269
270 return result;
271 }
272
pf_enter_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)273 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid,
274 enum xe_gt_sriov_control_bits bit)
275 {
276 if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) {
277 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n",
278 vfid, control_bit_to_string(bit), bit);
279 return true;
280 }
281 return false;
282 }
283
pf_exit_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)284 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid,
285 enum xe_gt_sriov_control_bits bit)
286 {
287 if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) {
288 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n",
289 vfid, control_bit_to_string(bit), bit);
290 return true;
291 }
292 return false;
293 }
294
pf_escape_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)295 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid,
296 enum xe_gt_sriov_control_bits bit)
297 {
298 if (pf_exit_vf_state(gt, vfid, bit))
299 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n",
300 vfid, control_bit_to_string(bit), bit,
301 __builtin_return_address(0));
302 }
303
pf_enter_vf_mismatch(struct xe_gt * gt,unsigned int vfid)304 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
305 {
306 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) {
307 xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n",
308 vfid, __builtin_return_address(0));
309 pf_dump_vf_state(gt, vfid);
310 }
311 }
312
pf_exit_vf_mismatch(struct xe_gt * gt,unsigned int vfid)313 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
314 {
315 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH))
316 xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n",
317 vfid, __builtin_return_address(0));
318
319 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
320 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
321 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
322 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
323 }
324
325 #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \
326 pf_enter_vf_mismatch((gt), (vfid)); \
327 })
328
pf_queue_control_worker(struct xe_gt * gt)329 static void pf_queue_control_worker(struct xe_gt *gt)
330 {
331 struct xe_device *xe = gt_to_xe(gt);
332
333 xe_gt_assert(gt, IS_SRIOV_PF(xe));
334
335 queue_work(xe->sriov.wq, >->sriov.pf.control.worker);
336 }
337
pf_queue_vf(struct xe_gt * gt,unsigned int vfid)338 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
339 {
340 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control;
341
342 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
343
344 spin_lock(&pfc->lock);
345 list_move_tail(>->sriov.pf.vfs[vfid].control.link, &pfc->list);
346 spin_unlock(&pfc->lock);
347
348 pf_queue_control_worker(gt);
349 }
350
351 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
352 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
353 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
354 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
355
pf_enter_vf_wip(struct xe_gt * gt,unsigned int vfid)356 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid)
357 {
358 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
359 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
360
361 reinit_completion(&cs->done);
362 return true;
363 }
364 return false;
365 }
366
pf_exit_vf_wip(struct xe_gt * gt,unsigned int vfid)367 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
368 {
369 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
370 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
371
372 pf_exit_vf_flr_wip(gt, vfid);
373 pf_exit_vf_stop_wip(gt, vfid);
374 pf_exit_vf_pause_wip(gt, vfid);
375 pf_exit_vf_resume_wip(gt, vfid);
376
377 complete_all(&cs->done);
378 }
379 }
380
pf_wait_vf_wip_done(struct xe_gt * gt,unsigned int vfid,unsigned long timeout)381 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout)
382 {
383 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
384
385 return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT;
386 }
387
pf_enter_vf_ready(struct xe_gt * gt,unsigned int vfid)388 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
389 {
390 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
391 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
392 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
393 pf_exit_vf_mismatch(gt, vfid);
394 pf_exit_vf_wip(gt, vfid);
395 }
396
397 /**
398 * DOC: The VF PAUSE state machine
399 *
400 * The VF PAUSE state machine looks like::
401 *
402 * (READY,RESUMED)<-------------<---------------------o---------o
403 * | \ \
404 * pause \ \
405 * | \ \
406 * ....V...........................PAUSE_WIP........ \ \
407 * : \ : o \
408 * : \ o------<-----busy : | \
409 * : \ / / : | |
410 * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) |
411 * : | \ : | |
412 * : acked rejected---->----------o--->(MISMATCH) /
413 * : | : /
414 * : v : /
415 * : PAUSE_WAIT_GUC : /
416 * : | : /
417 * : done : /
418 * : | : /
419 * : v : /
420 * : PAUSE_GUC_DONE o-----restart
421 * : | :
422 * : | o---<--busy :
423 * : v / / :
424 * : PAUSE_SAVE_GUC :
425 * : / :
426 * : / :
427 * :....o..............o...............o...........:
428 * | | |
429 * completed flr stop
430 * | | |
431 * V .....V..... ......V.....
432 * (PAUSED) : FLR_WIP : : STOP_WIP :
433 * :.........: :..........:
434 *
435 * For the full state machine view, see `The VF state machine`_.
436 */
437
pf_exit_vf_pause_wip(struct xe_gt * gt,unsigned int vfid)438 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
439 {
440 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
441 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
442 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
443 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
444 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC);
445 }
446 }
447
pf_enter_vf_paused(struct xe_gt * gt,unsigned int vfid)448 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid)
449 {
450 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED))
451 pf_enter_vf_state_machine_bug(gt, vfid);
452
453 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
454 pf_exit_vf_mismatch(gt, vfid);
455 pf_exit_vf_wip(gt, vfid);
456 }
457
pf_enter_vf_pause_completed(struct xe_gt * gt,unsigned int vfid)458 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid)
459 {
460 pf_enter_vf_paused(gt, vfid);
461 }
462
pf_enter_vf_pause_failed(struct xe_gt * gt,unsigned int vfid)463 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid)
464 {
465 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
466 pf_exit_vf_wip(gt, vfid);
467 }
468
pf_enter_vf_pause_rejected(struct xe_gt * gt,unsigned int vfid)469 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
470 {
471 pf_enter_vf_mismatch(gt, vfid);
472 pf_enter_vf_pause_failed(gt, vfid);
473 }
474
pf_enter_vf_pause_save_guc(struct xe_gt * gt,unsigned int vfid)475 static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
476 {
477 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
478 pf_enter_vf_state_machine_bug(gt, vfid);
479 }
480
pf_exit_vf_pause_save_guc(struct xe_gt * gt,unsigned int vfid)481 static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
482 {
483 int err;
484
485 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
486 return false;
487
488 err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid);
489 if (err) {
490 /* retry if busy */
491 if (err == -EBUSY) {
492 pf_enter_vf_pause_save_guc(gt, vfid);
493 return true;
494 }
495 /* give up on error */
496 if (err == -EIO)
497 pf_enter_vf_mismatch(gt, vfid);
498 }
499
500 pf_enter_vf_pause_completed(gt, vfid);
501 return true;
502 }
503
pf_exit_vf_pause_guc_done(struct xe_gt * gt,unsigned int vfid)504 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
505 {
506 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
507 return false;
508
509 pf_enter_vf_pause_save_guc(gt, vfid);
510 return true;
511 }
512
pf_enter_vf_pause_guc_done(struct xe_gt * gt,unsigned int vfid)513 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
514 {
515 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
516 pf_queue_vf(gt, vfid);
517 }
518
pf_enter_pause_wait_guc(struct xe_gt * gt,unsigned int vfid)519 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
520 {
521 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC))
522 pf_enter_vf_state_machine_bug(gt, vfid);
523 }
524
pf_exit_pause_wait_guc(struct xe_gt * gt,unsigned int vfid)525 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
526 {
527 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
528 }
529
pf_enter_vf_pause_send_pause(struct xe_gt * gt,unsigned int vfid)530 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
531 {
532 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
533 pf_enter_vf_state_machine_bug(gt, vfid);
534
535 pf_queue_vf(gt, vfid);
536 }
537
pf_exit_vf_pause_send_pause(struct xe_gt * gt,unsigned int vfid)538 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
539 {
540 int err;
541
542 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
543 return false;
544
545 /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */
546 pf_enter_pause_wait_guc(gt, vfid);
547
548 err = pf_send_vf_pause(gt, vfid);
549 if (err) {
550 /* send failed, so we shouldn't expect PAUSE_DONE from GuC */
551 pf_exit_pause_wait_guc(gt, vfid);
552
553 if (err == -EBUSY)
554 pf_enter_vf_pause_send_pause(gt, vfid);
555 else if (err == -EIO)
556 pf_enter_vf_pause_rejected(gt, vfid);
557 else
558 pf_enter_vf_pause_failed(gt, vfid);
559 } else {
560 /*
561 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
562 * but since GuC didn't complain, we may clear MISMATCH
563 */
564 pf_exit_vf_mismatch(gt, vfid);
565 }
566
567 return true;
568 }
569
pf_enter_vf_pause_wip(struct xe_gt * gt,unsigned int vfid)570 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
571 {
572 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
573 pf_enter_vf_wip(gt, vfid);
574 pf_enter_vf_pause_send_pause(gt, vfid);
575 return true;
576 }
577
578 return false;
579 }
580
581 /**
582 * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
583 * @gt: the &xe_gt
584 * @vfid: the VF identifier
585 *
586 * This function is for PF only.
587 *
588 * Return: 0 on success or a negative error code on failure.
589 */
xe_gt_sriov_pf_control_pause_vf(struct xe_gt * gt,unsigned int vfid)590 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
591 {
592 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP);
593 int err;
594
595 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
596 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
597 return -EPERM;
598 }
599
600 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
601 xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid);
602 return -ESTALE;
603 }
604
605 if (!pf_enter_vf_pause_wip(gt, vfid)) {
606 xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid);
607 return -EALREADY;
608 }
609
610 err = pf_wait_vf_wip_done(gt, vfid, timeout);
611 if (err) {
612 xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n",
613 vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
614 return err;
615 }
616
617 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
618 xe_gt_sriov_info(gt, "VF%u paused!\n", vfid);
619 return 0;
620 }
621
622 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) {
623 xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid);
624 return -EIO;
625 }
626
627 xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid);
628 return -ECANCELED;
629 }
630
631 /**
632 * DOC: The VF RESUME state machine
633 *
634 * The VF RESUME state machine looks like::
635 *
636 * (PAUSED)<-----------------<------------------------o
637 * | \
638 * resume \
639 * | \
640 * ....V............................RESUME_WIP...... \
641 * : \ : o
642 * : \ o-------<-----busy : |
643 * : \ / / : |
644 * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED)
645 * : / \ : |
646 * : acked rejected---->---------o--->(MISMATCH)
647 * : / :
648 * :....o..............o...............o.....o.....:
649 * | | | \
650 * completed flr stop restart-->(READY)
651 * | | |
652 * V .....V..... ......V.....
653 * (RESUMED) : FLR_WIP : : STOP_WIP :
654 * :.........: :..........:
655 *
656 * For the full state machine view, see `The VF state machine`_.
657 */
658
pf_exit_vf_resume_wip(struct xe_gt * gt,unsigned int vfid)659 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
660 {
661 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP))
662 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME);
663 }
664
pf_enter_vf_resumed(struct xe_gt * gt,unsigned int vfid)665 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
666 {
667 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
668 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
669 pf_exit_vf_mismatch(gt, vfid);
670 pf_exit_vf_wip(gt, vfid);
671 }
672
pf_enter_vf_resume_completed(struct xe_gt * gt,unsigned int vfid)673 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid)
674 {
675 pf_enter_vf_resumed(gt, vfid);
676 }
677
pf_enter_vf_resume_failed(struct xe_gt * gt,unsigned int vfid)678 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid)
679 {
680 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
681 pf_exit_vf_wip(gt, vfid);
682 }
683
pf_enter_vf_resume_rejected(struct xe_gt * gt,unsigned int vfid)684 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid)
685 {
686 pf_enter_vf_mismatch(gt, vfid);
687 pf_enter_vf_resume_failed(gt, vfid);
688 }
689
pf_enter_vf_resume_send_resume(struct xe_gt * gt,unsigned int vfid)690 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
691 {
692 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
693 pf_enter_vf_state_machine_bug(gt, vfid);
694
695 pf_queue_vf(gt, vfid);
696 }
697
pf_exit_vf_resume_send_resume(struct xe_gt * gt,unsigned int vfid)698 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
699 {
700 int err;
701
702 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
703 return false;
704
705 err = pf_send_vf_resume(gt, vfid);
706 if (err == -EBUSY)
707 pf_enter_vf_resume_send_resume(gt, vfid);
708 else if (err == -EIO)
709 pf_enter_vf_resume_rejected(gt, vfid);
710 else if (err)
711 pf_enter_vf_resume_failed(gt, vfid);
712 else
713 pf_enter_vf_resume_completed(gt, vfid);
714 return true;
715 }
716
pf_enter_vf_resume_wip(struct xe_gt * gt,unsigned int vfid)717 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
718 {
719 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) {
720 pf_enter_vf_wip(gt, vfid);
721 pf_enter_vf_resume_send_resume(gt, vfid);
722 return true;
723 }
724
725 return false;
726 }
727
728 /**
729 * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
730 * @gt: the &xe_gt
731 * @vfid: the VF identifier
732 *
733 * This function is for PF only.
734 *
735 * Return: 0 on success or a negative error code on failure.
736 */
xe_gt_sriov_pf_control_resume_vf(struct xe_gt * gt,unsigned int vfid)737 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
738 {
739 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP);
740 int err;
741
742 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
743 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
744 return -EPERM;
745 }
746
747 if (!pf_enter_vf_resume_wip(gt, vfid)) {
748 xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
749 return -EALREADY;
750 }
751
752 err = pf_wait_vf_wip_done(gt, vfid, timeout);
753 if (err)
754 return err;
755
756 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) {
757 xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid);
758 return 0;
759 }
760
761 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) {
762 xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid);
763 return -EIO;
764 }
765
766 xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid);
767 return -ECANCELED;
768 }
769
770 /**
771 * DOC: The VF STOP state machine
772 *
773 * The VF STOP state machine looks like::
774 *
775 * (READY,PAUSED,RESUMED)<-------<--------------------o
776 * | \
777 * stop \
778 * | \
779 * ....V..............................STOP_WIP...... \
780 * : \ : o
781 * : \ o----<----busy : |
782 * : \ / / : |
783 * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED)
784 * : / \ : |
785 * : acked rejected-------->--------o--->(MISMATCH)
786 * : / :
787 * :....o..............o...............o...........:
788 * | | |
789 * completed flr restart
790 * | | |
791 * V .....V..... V
792 * (STOPPED) : FLR_WIP : (READY)
793 * :.........:
794 *
795 * For the full state machine view, see `The VF state machine`_.
796 */
797
pf_exit_vf_stop_wip(struct xe_gt * gt,unsigned int vfid)798 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
799 {
800 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP))
801 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP);
802 }
803
pf_enter_vf_stopped(struct xe_gt * gt,unsigned int vfid)804 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
805 {
806 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED))
807 pf_enter_vf_state_machine_bug(gt, vfid);
808
809 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
810 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
811 pf_exit_vf_mismatch(gt, vfid);
812 pf_exit_vf_wip(gt, vfid);
813 }
814
pf_enter_vf_stop_completed(struct xe_gt * gt,unsigned int vfid)815 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid)
816 {
817 pf_enter_vf_stopped(gt, vfid);
818 }
819
pf_enter_vf_stop_failed(struct xe_gt * gt,unsigned int vfid)820 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid)
821 {
822 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
823 pf_exit_vf_wip(gt, vfid);
824 }
825
pf_enter_vf_stop_rejected(struct xe_gt * gt,unsigned int vfid)826 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid)
827 {
828 pf_enter_vf_mismatch(gt, vfid);
829 pf_enter_vf_stop_failed(gt, vfid);
830 }
831
pf_enter_vf_stop_send_stop(struct xe_gt * gt,unsigned int vfid)832 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
833 {
834 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
835 pf_enter_vf_state_machine_bug(gt, vfid);
836
837 pf_queue_vf(gt, vfid);
838 }
839
pf_exit_vf_stop_send_stop(struct xe_gt * gt,unsigned int vfid)840 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
841 {
842 int err;
843
844 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
845 return false;
846
847 err = pf_send_vf_stop(gt, vfid);
848 if (err == -EBUSY)
849 pf_enter_vf_stop_send_stop(gt, vfid);
850 else if (err == -EIO)
851 pf_enter_vf_stop_rejected(gt, vfid);
852 else if (err)
853 pf_enter_vf_stop_failed(gt, vfid);
854 else
855 pf_enter_vf_stop_completed(gt, vfid);
856 return true;
857 }
858
pf_enter_vf_stop_wip(struct xe_gt * gt,unsigned int vfid)859 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
860 {
861 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) {
862 pf_enter_vf_wip(gt, vfid);
863 pf_enter_vf_stop_send_stop(gt, vfid);
864 return true;
865 }
866 return false;
867 }
868
869 /**
870 * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
871 * @gt: the &xe_gt
872 * @vfid: the VF identifier
873 *
874 * This function is for PF only.
875 *
876 * Return: 0 on success or a negative error code on failure.
877 */
xe_gt_sriov_pf_control_stop_vf(struct xe_gt * gt,unsigned int vfid)878 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
879 {
880 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP);
881 int err;
882
883 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
884 xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid);
885 return -ESTALE;
886 }
887
888 if (!pf_enter_vf_stop_wip(gt, vfid)) {
889 xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid);
890 return -EALREADY;
891 }
892
893 err = pf_wait_vf_wip_done(gt, vfid, timeout);
894 if (err)
895 return err;
896
897 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
898 xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid);
899 return 0;
900 }
901
902 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) {
903 xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid);
904 return -EIO;
905 }
906
907 xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid);
908 return -ECANCELED;
909 }
910
911 /**
912 * DOC: The VF FLR state machine
913 *
914 * The VF FLR state machine looks like::
915 *
916 * (READY,PAUSED,STOPPED)<------------<--------------o
917 * | \
918 * flr \
919 * | \
920 * ....V..........................FLR_WIP........... \
921 * : \ : \
922 * : \ o----<----busy : |
923 * : \ / / : |
924 * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o
925 * : | \ : | |
926 * : acked rejected----->-----------o--->(MISMATCH) |
927 * : | : ^ |
928 * : v : | |
929 * : FLR_WAIT_GUC : | |
930 * : | : | |
931 * : done : | |
932 * : | : | |
933 * : v : | |
934 * : FLR_GUC_DONE : | |
935 * : | : | |
936 * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
937 * : | : | |
938 * : FLR_RESET_DATA : | |
939 * : | : | |
940 * : FLR_RESET_MMIO : | |
941 * : | : | |
942 * : | o----<----busy : | |
943 * : |/ / : | |
944 * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o
945 * : / \ : |
946 * : acked rejected----->-----------o--------o
947 * : / :
948 * :....o..............................o...........:
949 * | |
950 * completed restart
951 * | /
952 * V /
953 * (READY)<----------<------------o
954 *
955 * For the full state machine view, see `The VF state machine`_.
956 */
957
pf_enter_vf_flr_send_start(struct xe_gt * gt,unsigned int vfid)958 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
959 {
960 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
961 pf_enter_vf_state_machine_bug(gt, vfid);
962
963 pf_queue_vf(gt, vfid);
964 }
965
pf_enter_vf_flr_wip(struct xe_gt * gt,unsigned int vfid)966 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
967 {
968 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
969 xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
970 return;
971 }
972
973 pf_enter_vf_wip(gt, vfid);
974 pf_enter_vf_flr_send_start(gt, vfid);
975 }
976
pf_exit_vf_flr_wip(struct xe_gt * gt,unsigned int vfid)977 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
978 {
979 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
980 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH);
981 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO);
982 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA);
983 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
984 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
985 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
986 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
987 }
988 }
989
pf_enter_vf_flr_completed(struct xe_gt * gt,unsigned int vfid)990 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid)
991 {
992 pf_enter_vf_ready(gt, vfid);
993 }
994
pf_enter_vf_flr_failed(struct xe_gt * gt,unsigned int vfid)995 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid)
996 {
997 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
998 xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid);
999 pf_exit_vf_wip(gt, vfid);
1000 }
1001
pf_enter_vf_flr_rejected(struct xe_gt * gt,unsigned int vfid)1002 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid)
1003 {
1004 pf_enter_vf_mismatch(gt, vfid);
1005 pf_enter_vf_flr_failed(gt, vfid);
1006 }
1007
pf_enter_vf_flr_send_finish(struct xe_gt * gt,unsigned int vfid)1008 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1009 {
1010 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1011 pf_enter_vf_state_machine_bug(gt, vfid);
1012
1013 pf_queue_vf(gt, vfid);
1014 }
1015
pf_exit_vf_flr_send_finish(struct xe_gt * gt,unsigned int vfid)1016 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1017 {
1018 int err;
1019
1020 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1021 return false;
1022
1023 err = pf_send_vf_flr_finish(gt, vfid);
1024 if (err == -EBUSY)
1025 pf_enter_vf_flr_send_finish(gt, vfid);
1026 else if (err == -EIO)
1027 pf_enter_vf_flr_rejected(gt, vfid);
1028 else if (err)
1029 pf_enter_vf_flr_failed(gt, vfid);
1030 else
1031 pf_enter_vf_flr_completed(gt, vfid);
1032 return true;
1033 }
1034
pf_enter_vf_flr_reset_mmio(struct xe_gt * gt,unsigned int vfid)1035 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1036 {
1037 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1038 pf_enter_vf_state_machine_bug(gt, vfid);
1039
1040 pf_queue_vf(gt, vfid);
1041 }
1042
pf_exit_vf_flr_reset_mmio(struct xe_gt * gt,unsigned int vfid)1043 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1044 {
1045 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1046 return false;
1047
1048 xe_gt_sriov_pf_sanitize_hw(gt, vfid);
1049
1050 pf_enter_vf_flr_send_finish(gt, vfid);
1051 return true;
1052 }
1053
pf_enter_vf_flr_reset_data(struct xe_gt * gt,unsigned int vfid)1054 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1055 {
1056 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1057 pf_enter_vf_state_machine_bug(gt, vfid);
1058
1059 pf_queue_vf(gt, vfid);
1060 }
1061
pf_exit_vf_flr_reset_data(struct xe_gt * gt,unsigned int vfid)1062 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1063 {
1064 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1065 return false;
1066
1067 xe_gt_sriov_pf_service_reset(gt, vfid);
1068 xe_gt_sriov_pf_monitor_flr(gt, vfid);
1069
1070 pf_enter_vf_flr_reset_mmio(gt, vfid);
1071 return true;
1072 }
1073
pf_enter_vf_flr_reset_config(struct xe_gt * gt,unsigned int vfid)1074 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1075 {
1076 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1077 pf_enter_vf_state_machine_bug(gt, vfid);
1078
1079 pf_queue_vf(gt, vfid);
1080 }
1081
pf_exit_vf_flr_reset_config(struct xe_gt * gt,unsigned int vfid)1082 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1083 {
1084 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
1085 int err;
1086
1087 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1088 return false;
1089
1090 err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout);
1091 if (err)
1092 pf_enter_vf_flr_failed(gt, vfid);
1093 else
1094 pf_enter_vf_flr_reset_data(gt, vfid);
1095 return true;
1096 }
1097
pf_enter_vf_flr_wait_guc(struct xe_gt * gt,unsigned int vfid)1098 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1099 {
1100 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC))
1101 pf_enter_vf_state_machine_bug(gt, vfid);
1102 }
1103
pf_exit_vf_flr_wait_guc(struct xe_gt * gt,unsigned int vfid)1104 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1105 {
1106 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
1107 }
1108
pf_exit_vf_flr_send_start(struct xe_gt * gt,unsigned int vfid)1109 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
1110 {
1111 int err;
1112
1113 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
1114 return false;
1115
1116 /* GuC may actually send a FLR_DONE before we get a RESPONSE */
1117 pf_enter_vf_flr_wait_guc(gt, vfid);
1118
1119 err = pf_send_vf_flr_start(gt, vfid);
1120 if (err) {
1121 /* send failed, so we shouldn't expect FLR_DONE from GuC */
1122 pf_exit_vf_flr_wait_guc(gt, vfid);
1123
1124 if (err == -EBUSY)
1125 pf_enter_vf_flr_send_start(gt, vfid);
1126 else if (err == -EIO)
1127 pf_enter_vf_flr_rejected(gt, vfid);
1128 else
1129 pf_enter_vf_flr_failed(gt, vfid);
1130 } else {
1131 /*
1132 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
1133 * but since GuC didn't complain, we may clear MISMATCH
1134 */
1135 pf_exit_vf_mismatch(gt, vfid);
1136 }
1137
1138 return true;
1139 }
1140
pf_exit_vf_flr_guc_done(struct xe_gt * gt,unsigned int vfid)1141 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1142 {
1143 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1144 return false;
1145
1146 pf_enter_vf_flr_reset_config(gt, vfid);
1147 return true;
1148 }
1149
pf_enter_vf_flr_guc_done(struct xe_gt * gt,unsigned int vfid)1150 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1151 {
1152 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1153 pf_queue_vf(gt, vfid);
1154 }
1155
1156 /**
1157 * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence.
1158 * @gt: the &xe_gt
1159 * @vfid: the VF identifier
1160 *
1161 * This function is for PF only.
1162 *
1163 * Return: 0 on success or a negative error code on failure.
1164 */
xe_gt_sriov_pf_control_trigger_flr(struct xe_gt * gt,unsigned int vfid)1165 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
1166 {
1167 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP);
1168 int err;
1169
1170 pf_enter_vf_flr_wip(gt, vfid);
1171
1172 err = pf_wait_vf_wip_done(gt, vfid, timeout);
1173 if (err) {
1174 xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n",
1175 vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
1176 return err;
1177 }
1178
1179 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1180 return -EIO;
1181
1182 return 0;
1183 }
1184
1185 /**
1186 * DOC: The VF FLR Flow with GuC
1187 *
1188 * The VF FLR flow includes several steps::
1189 *
1190 * PF GUC PCI
1191 * ========================================================
1192 * | | |
1193 * (1) | [ ] <----- FLR --|
1194 * | [ ] :
1195 * (2) [ ] <-------- NOTIFY FLR --[ ]
1196 * [ ] |
1197 * (3) [ ] |
1198 * [ ] |
1199 * [ ]-- START FLR ---------> [ ]
1200 * | [ ]
1201 * (4) | [ ]
1202 * | [ ]
1203 * [ ] <--------- FLR DONE -- [ ]
1204 * [ ] |
1205 * (5) [ ] |
1206 * [ ] |
1207 * [ ]-- FINISH FLR --------> [ ]
1208 * | |
1209 *
1210 * * Step 1: PCI HW generates interrupt to the GuC about VF FLR
1211 * * Step 2: GuC FW sends G2H notification to the PF about VF FLR
1212 * * Step 2a: on some platforms G2H is only received from root GuC
1213 * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
1214 * * Step 3a: on some platforms PF must send H2G to all other GuCs
1215 * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
1216 * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
1217 */
1218
needs_dispatch_flr(struct xe_device * xe)1219 static bool needs_dispatch_flr(struct xe_device *xe)
1220 {
1221 return xe->info.platform == XE_PVC;
1222 }
1223
pf_handle_vf_flr(struct xe_gt * gt,u32 vfid)1224 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
1225 {
1226 struct xe_device *xe = gt_to_xe(gt);
1227 struct xe_gt *gtit;
1228 unsigned int gtid;
1229
1230 xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
1231
1232 if (needs_dispatch_flr(xe)) {
1233 for_each_gt(gtit, xe, gtid)
1234 pf_enter_vf_flr_wip(gtit, vfid);
1235 } else {
1236 pf_enter_vf_flr_wip(gt, vfid);
1237 }
1238 }
1239
pf_handle_vf_flr_done(struct xe_gt * gt,u32 vfid)1240 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
1241 {
1242 if (!pf_exit_vf_flr_wait_guc(gt, vfid)) {
1243 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid);
1244 pf_enter_vf_mismatch(gt, vfid);
1245 return;
1246 }
1247
1248 pf_enter_vf_flr_guc_done(gt, vfid);
1249 }
1250
pf_handle_vf_pause_done(struct xe_gt * gt,u32 vfid)1251 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid)
1252 {
1253 if (!pf_exit_pause_wait_guc(gt, vfid)) {
1254 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid);
1255 pf_enter_vf_mismatch(gt, vfid);
1256 return;
1257 }
1258
1259 pf_enter_vf_pause_guc_done(gt, vfid);
1260 }
1261
pf_handle_vf_event(struct xe_gt * gt,u32 vfid,u32 eventid)1262 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
1263 {
1264 xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid);
1265
1266 if (vfid > xe_gt_sriov_pf_get_totalvfs(gt))
1267 return -EPROTO;
1268
1269 switch (eventid) {
1270 case GUC_PF_NOTIFY_VF_FLR:
1271 pf_handle_vf_flr(gt, vfid);
1272 break;
1273 case GUC_PF_NOTIFY_VF_FLR_DONE:
1274 pf_handle_vf_flr_done(gt, vfid);
1275 break;
1276 case GUC_PF_NOTIFY_VF_PAUSE_DONE:
1277 pf_handle_vf_pause_done(gt, vfid);
1278 break;
1279 case GUC_PF_NOTIFY_VF_FIXUP_DONE:
1280 break;
1281 default:
1282 return -ENOPKG;
1283 }
1284 return 0;
1285 }
1286
pf_handle_pf_event(struct xe_gt * gt,u32 eventid)1287 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
1288 {
1289 switch (eventid) {
1290 case GUC_PF_NOTIFY_VF_ENABLE:
1291 xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
1292 str_enabled_disabled(true),
1293 str_enabled_disabled(false));
1294 break;
1295 default:
1296 return -ENOPKG;
1297 }
1298 return 0;
1299 }
1300
1301 /**
1302 * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
1303 * @gt: the &xe_gt
1304 * @msg: the G2H message
1305 * @len: the length of the G2H message
1306 *
1307 * This function is for PF only.
1308 *
1309 * Return: 0 on success or a negative error code on failure.
1310 */
xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt * gt,const u32 * msg,u32 len)1311 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
1312 {
1313 u32 vfid;
1314 u32 eventid;
1315
1316 xe_gt_assert(gt, len);
1317 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
1318 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
1319 xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
1320 GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
1321
1322 if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
1323 return -EPROTO;
1324
1325 if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
1326 return -EPFNOSUPPORT;
1327
1328 if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
1329 return -EPROTO;
1330
1331 vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
1332 eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
1333
1334 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
1335 }
1336
pf_process_vf_state_machine(struct xe_gt * gt,unsigned int vfid)1337 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
1338 {
1339 if (pf_exit_vf_flr_send_start(gt, vfid))
1340 return true;
1341
1342 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) {
1343 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1344 control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC));
1345 return false;
1346 }
1347
1348 if (pf_exit_vf_flr_guc_done(gt, vfid))
1349 return true;
1350
1351 if (pf_exit_vf_flr_reset_config(gt, vfid))
1352 return true;
1353
1354 if (pf_exit_vf_flr_reset_data(gt, vfid))
1355 return true;
1356
1357 if (pf_exit_vf_flr_reset_mmio(gt, vfid))
1358 return true;
1359
1360 if (pf_exit_vf_flr_send_finish(gt, vfid))
1361 return true;
1362
1363 if (pf_exit_vf_stop_send_stop(gt, vfid))
1364 return true;
1365
1366 if (pf_exit_vf_pause_send_pause(gt, vfid))
1367 return true;
1368
1369 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) {
1370 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1371 control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC));
1372 return true;
1373 }
1374
1375 if (pf_exit_vf_pause_guc_done(gt, vfid))
1376 return true;
1377
1378 if (pf_exit_vf_pause_save_guc(gt, vfid))
1379 return true;
1380
1381 if (pf_exit_vf_resume_send_resume(gt, vfid))
1382 return true;
1383
1384 return false;
1385 }
1386
pf_control_state_index(struct xe_gt * gt,struct xe_gt_sriov_control_state * cs)1387 static unsigned int pf_control_state_index(struct xe_gt *gt,
1388 struct xe_gt_sriov_control_state *cs)
1389 {
1390 return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs;
1391 }
1392
pf_worker_find_work(struct xe_gt * gt)1393 static void pf_worker_find_work(struct xe_gt *gt)
1394 {
1395 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control;
1396 struct xe_gt_sriov_control_state *cs;
1397 unsigned int vfid;
1398 bool empty;
1399 bool more;
1400
1401 spin_lock(&pfc->lock);
1402 cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link);
1403 if (cs)
1404 list_del_init(&cs->link);
1405 empty = list_empty(&pfc->list);
1406 spin_unlock(&pfc->lock);
1407
1408 if (!cs)
1409 return;
1410
1411 /* VF metadata structures are indexed by the VFID */
1412 vfid = pf_control_state_index(gt, cs);
1413 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
1414
1415 more = pf_process_vf_state_machine(gt, vfid);
1416 if (more)
1417 pf_queue_vf(gt, vfid);
1418 else if (!empty)
1419 pf_queue_control_worker(gt);
1420 }
1421
control_worker_func(struct work_struct * w)1422 static void control_worker_func(struct work_struct *w)
1423 {
1424 struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker);
1425
1426 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1427 pf_worker_find_work(gt);
1428 }
1429
pf_stop_worker(struct xe_gt * gt)1430 static void pf_stop_worker(struct xe_gt *gt)
1431 {
1432 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1433 cancel_work_sync(>->sriov.pf.control.worker);
1434 }
1435
control_fini_action(struct drm_device * dev,void * data)1436 static void control_fini_action(struct drm_device *dev, void *data)
1437 {
1438 struct xe_gt *gt = data;
1439
1440 pf_stop_worker(gt);
1441 }
1442
1443 /**
1444 * xe_gt_sriov_pf_control_init() - Initialize PF's control data.
1445 * @gt: the &xe_gt
1446 *
1447 * This function is for PF only.
1448 *
1449 * Return: 0 on success or a negative error code on failure.
1450 */
xe_gt_sriov_pf_control_init(struct xe_gt * gt)1451 int xe_gt_sriov_pf_control_init(struct xe_gt *gt)
1452 {
1453 struct xe_device *xe = gt_to_xe(gt);
1454 unsigned int n, totalvfs;
1455
1456 xe_gt_assert(gt, IS_SRIOV_PF(xe));
1457
1458 totalvfs = xe_sriov_pf_get_totalvfs(xe);
1459 for (n = 0; n <= totalvfs; n++) {
1460 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n);
1461
1462 init_completion(&cs->done);
1463 INIT_LIST_HEAD(&cs->link);
1464 }
1465
1466 spin_lock_init(>->sriov.pf.control.lock);
1467 INIT_LIST_HEAD(>->sriov.pf.control.list);
1468 INIT_WORK(>->sriov.pf.control.worker, control_worker_func);
1469
1470 return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt);
1471 }
1472
1473 /**
1474 * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset.
1475 * @gt: the &xe_gt
1476 *
1477 * Any per-VF status maintained by the PF or any ongoing VF control activity
1478 * performed by the PF must be reset or cancelled when the GT is reset.
1479 *
1480 * This function is for PF only.
1481 */
xe_gt_sriov_pf_control_restart(struct xe_gt * gt)1482 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt)
1483 {
1484 struct xe_device *xe = gt_to_xe(gt);
1485 unsigned int n, totalvfs;
1486
1487 xe_gt_assert(gt, IS_SRIOV_PF(xe));
1488
1489 pf_stop_worker(gt);
1490
1491 totalvfs = xe_sriov_pf_get_totalvfs(xe);
1492 for (n = 1; n <= totalvfs; n++)
1493 pf_enter_vf_ready(gt, n);
1494 }
1495