1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2023-2024 Intel Corporation
4 */
5
6 #include <drm/drm_managed.h>
7
8 #include "abi/guc_actions_sriov_abi.h"
9
10 #include "xe_device.h"
11 #include "xe_gt.h"
12 #include "xe_gt_sriov_pf.h"
13 #include "xe_gt_sriov_pf_config.h"
14 #include "xe_gt_sriov_pf_control.h"
15 #include "xe_gt_sriov_pf_helpers.h"
16 #include "xe_gt_sriov_pf_migration.h"
17 #include "xe_gt_sriov_pf_monitor.h"
18 #include "xe_gt_sriov_printk.h"
19 #include "xe_guc_ct.h"
20 #include "xe_sriov.h"
21 #include "xe_sriov_pf_service.h"
22 #include "xe_tile.h"
23
control_cmd_to_string(u32 cmd)24 static const char *control_cmd_to_string(u32 cmd)
25 {
26 switch (cmd) {
27 case GUC_PF_TRIGGER_VF_PAUSE:
28 return "PAUSE";
29 case GUC_PF_TRIGGER_VF_RESUME:
30 return "RESUME";
31 case GUC_PF_TRIGGER_VF_STOP:
32 return "STOP";
33 case GUC_PF_TRIGGER_VF_FLR_START:
34 return "FLR_START";
35 case GUC_PF_TRIGGER_VF_FLR_FINISH:
36 return "FLR_FINISH";
37 default:
38 return "<unknown>";
39 }
40 }
41
guc_action_vf_control_cmd(struct xe_guc * guc,u32 vfid,u32 cmd)42 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
43 {
44 u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
45 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
46 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
47 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
48 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
49 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
50 };
51 int ret;
52
53 ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
54 return ret > 0 ? -EPROTO : ret;
55 }
56
pf_send_vf_control_cmd(struct xe_gt * gt,unsigned int vfid,u32 cmd)57 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
58 {
59 int err;
60
61 xe_gt_assert(gt, vfid != PFID);
62 xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n",
63 vfid, control_cmd_to_string(cmd));
64
65 err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd);
66 if (unlikely(err))
67 xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
68 vfid, control_cmd_to_string(cmd), ERR_PTR(err));
69 return err;
70 }
71
pf_send_vf_pause(struct xe_gt * gt,unsigned int vfid)72 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
73 {
74 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
75 }
76
pf_send_vf_resume(struct xe_gt * gt,unsigned int vfid)77 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
78 {
79 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
80 }
81
pf_send_vf_stop(struct xe_gt * gt,unsigned int vfid)82 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
83 {
84 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
85 }
86
pf_send_vf_flr_start(struct xe_gt * gt,unsigned int vfid)87 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
88 {
89 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
90 }
91
pf_send_vf_flr_finish(struct xe_gt * gt,unsigned int vfid)92 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
93 {
94 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
95 }
96
97 /**
98 * DOC: The VF state machine
99 *
100 * The simplified VF state machine could be presented as::
101 *
102 * pause--------------------------o
103 * / |
104 * / v
105 * (READY)<------------------resume-----(PAUSED)
106 * ^ \ / /
107 * | \ / /
108 * | stop---->(STOPPED)<----stop /
109 * | / /
110 * | / /
111 * o--------<-----flr /
112 * \ /
113 * o------<--------------------flr
114 *
115 * Where:
116 *
117 * * READY - represents a state in which VF is fully operable
118 * * PAUSED - represents a state in which VF activity is temporarily suspended
119 * * STOPPED - represents a state in which VF activity is definitely halted
120 * * pause - represents a request to temporarily suspend VF activity
121 * * resume - represents a request to resume VF activity
122 * * stop - represents a request to definitely halt VF activity
123 * * flr - represents a request to perform VF FLR to restore VF activity
124 *
125 * However, each state transition requires additional steps that involves
126 * communication with GuC that might fail or be interrupted by other requests::
127 *
128 * .................................WIP....
129 * : :
130 * pause--------------------->PAUSE_WIP----------------------------o
131 * / : / \ : |
132 * / : o----<---stop flr--o : |
133 * / : | \ / | : V
134 * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED)
135 * ^ \ \ : | | : / /
136 * | \ \ : | | : / /
137 * | \ \ : | | : / /
138 * | \ \ : o----<----------------------+--<-------stop /
139 * | \ \ : | | : /
140 * | \ \ : V | : /
141 * | \ stop----->STOP_WIP---------flr--->-----o : /
142 * | \ : | | : /
143 * | \ : | V : /
144 * | flr--------+----->----------------->FLR_WIP<-----flr
145 * | : | / ^ :
146 * | : | / | :
147 * o--------<-------:----+-----<----------------o | :
148 * : | | :
149 * :....|...........................|.....:
150 * | |
151 * V |
152 * (STOPPED)--------------------flr
153 *
154 * For details about each internal WIP state machine see:
155 *
156 * * `The VF PAUSE state machine`_
157 * * `The VF RESUME state machine`_
158 * * `The VF STOP state machine`_
159 * * `The VF FLR state machine`_
160 */
161
162 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV
control_bit_to_string(enum xe_gt_sriov_control_bits bit)163 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
164 {
165 switch (bit) {
166 #define CASE2STR(_X) \
167 case XE_GT_SRIOV_STATE_##_X: return #_X
168 CASE2STR(WIP);
169 CASE2STR(FLR_WIP);
170 CASE2STR(FLR_SEND_START);
171 CASE2STR(FLR_WAIT_GUC);
172 CASE2STR(FLR_GUC_DONE);
173 CASE2STR(FLR_RESET_CONFIG);
174 CASE2STR(FLR_RESET_DATA);
175 CASE2STR(FLR_RESET_MMIO);
176 CASE2STR(FLR_SEND_FINISH);
177 CASE2STR(FLR_FAILED);
178 CASE2STR(PAUSE_WIP);
179 CASE2STR(PAUSE_SEND_PAUSE);
180 CASE2STR(PAUSE_WAIT_GUC);
181 CASE2STR(PAUSE_GUC_DONE);
182 CASE2STR(PAUSE_SAVE_GUC);
183 CASE2STR(PAUSE_FAILED);
184 CASE2STR(PAUSED);
185 CASE2STR(RESUME_WIP);
186 CASE2STR(RESUME_SEND_RESUME);
187 CASE2STR(RESUME_FAILED);
188 CASE2STR(RESUMED);
189 CASE2STR(STOP_WIP);
190 CASE2STR(STOP_SEND_STOP);
191 CASE2STR(STOP_FAILED);
192 CASE2STR(STOPPED);
193 CASE2STR(MISMATCH);
194 #undef CASE2STR
195 default: return "?";
196 }
197 }
198 #endif
199
pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)200 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
201 {
202 switch (bit) {
203 case XE_GT_SRIOV_STATE_FLR_WAIT_GUC:
204 case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC:
205 return HZ / 2;
206 case XE_GT_SRIOV_STATE_FLR_WIP:
207 case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
208 return 5 * HZ;
209 default:
210 return HZ;
211 }
212 }
213
pf_pick_vf_control(struct xe_gt * gt,unsigned int vfid)214 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid)
215 {
216 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
217 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
218
219 return >->sriov.pf.vfs[vfid].control;
220 }
221
pf_peek_vf_state(struct xe_gt * gt,unsigned int vfid)222 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid)
223 {
224 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
225
226 return &cs->state;
227 }
228
pf_check_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)229 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid,
230 enum xe_gt_sriov_control_bits bit)
231 {
232 return test_bit(bit, pf_peek_vf_state(gt, vfid));
233 }
234
pf_dump_vf_state(struct xe_gt * gt,unsigned int vfid)235 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid)
236 {
237 unsigned long state = *pf_peek_vf_state(gt, vfid);
238 enum xe_gt_sriov_control_bits bit;
239
240 if (state) {
241 xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n",
242 vfid, state, state ? " bits " : "",
243 (int)BITS_PER_LONG, &state);
244 for_each_set_bit(bit, &state, BITS_PER_LONG)
245 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n",
246 vfid, control_bit_to_string(bit), bit);
247 } else {
248 xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid);
249 }
250 }
251
pf_expect_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)252 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid,
253 enum xe_gt_sriov_control_bits bit)
254 {
255 bool result = pf_check_vf_state(gt, vfid, bit);
256
257 if (unlikely(!result))
258 pf_dump_vf_state(gt, vfid);
259
260 return result;
261 }
262
pf_expect_vf_not_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)263 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid,
264 enum xe_gt_sriov_control_bits bit)
265 {
266 bool result = !pf_check_vf_state(gt, vfid, bit);
267
268 if (unlikely(!result))
269 pf_dump_vf_state(gt, vfid);
270
271 return result;
272 }
273
pf_enter_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)274 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid,
275 enum xe_gt_sriov_control_bits bit)
276 {
277 if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) {
278 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n",
279 vfid, control_bit_to_string(bit), bit);
280 return true;
281 }
282 return false;
283 }
284
pf_exit_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)285 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid,
286 enum xe_gt_sriov_control_bits bit)
287 {
288 if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) {
289 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n",
290 vfid, control_bit_to_string(bit), bit);
291 return true;
292 }
293 return false;
294 }
295
pf_escape_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)296 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid,
297 enum xe_gt_sriov_control_bits bit)
298 {
299 if (pf_exit_vf_state(gt, vfid, bit))
300 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n",
301 vfid, control_bit_to_string(bit), bit,
302 __builtin_return_address(0));
303 }
304
pf_enter_vf_mismatch(struct xe_gt * gt,unsigned int vfid)305 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
306 {
307 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) {
308 xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n",
309 vfid, __builtin_return_address(0));
310 pf_dump_vf_state(gt, vfid);
311 }
312 }
313
pf_exit_vf_mismatch(struct xe_gt * gt,unsigned int vfid)314 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
315 {
316 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH))
317 xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n",
318 vfid, __builtin_return_address(0));
319
320 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
321 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
322 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
323 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
324 }
325
326 #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \
327 pf_enter_vf_mismatch((gt), (vfid)); \
328 })
329
pf_queue_control_worker(struct xe_gt * gt)330 static void pf_queue_control_worker(struct xe_gt *gt)
331 {
332 struct xe_device *xe = gt_to_xe(gt);
333
334 xe_gt_assert(gt, IS_SRIOV_PF(xe));
335
336 queue_work(xe->sriov.wq, >->sriov.pf.control.worker);
337 }
338
pf_queue_vf(struct xe_gt * gt,unsigned int vfid)339 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
340 {
341 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control;
342
343 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
344
345 spin_lock(&pfc->lock);
346 list_move_tail(>->sriov.pf.vfs[vfid].control.link, &pfc->list);
347 spin_unlock(&pfc->lock);
348
349 pf_queue_control_worker(gt);
350 }
351
352 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
353 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
354 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
355 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
356
pf_enter_vf_wip(struct xe_gt * gt,unsigned int vfid)357 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid)
358 {
359 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
360 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
361
362 reinit_completion(&cs->done);
363 return true;
364 }
365 return false;
366 }
367
pf_exit_vf_wip(struct xe_gt * gt,unsigned int vfid)368 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
369 {
370 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
371 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
372
373 pf_exit_vf_flr_wip(gt, vfid);
374 pf_exit_vf_stop_wip(gt, vfid);
375 pf_exit_vf_pause_wip(gt, vfid);
376 pf_exit_vf_resume_wip(gt, vfid);
377
378 complete_all(&cs->done);
379 }
380 }
381
pf_wait_vf_wip_done(struct xe_gt * gt,unsigned int vfid,unsigned long timeout)382 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout)
383 {
384 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
385
386 return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT;
387 }
388
pf_enter_vf_ready(struct xe_gt * gt,unsigned int vfid)389 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
390 {
391 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
392 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
393 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
394 pf_exit_vf_mismatch(gt, vfid);
395 pf_exit_vf_wip(gt, vfid);
396 }
397
398 /**
399 * DOC: The VF PAUSE state machine
400 *
401 * The VF PAUSE state machine looks like::
402 *
403 * (READY,RESUMED)<-------------<---------------------o---------o
404 * | \ \
405 * pause \ \
406 * | \ \
407 * ....V...........................PAUSE_WIP........ \ \
408 * : \ : o \
409 * : \ o------<-----busy : | \
410 * : \ / / : | |
411 * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) |
412 * : | \ : | |
413 * : acked rejected---->----------o--->(MISMATCH) /
414 * : | : /
415 * : v : /
416 * : PAUSE_WAIT_GUC : /
417 * : | : /
418 * : done : /
419 * : | : /
420 * : v : /
421 * : PAUSE_GUC_DONE o-----restart
422 * : | :
423 * : | o---<--busy :
424 * : v / / :
425 * : PAUSE_SAVE_GUC :
426 * : / :
427 * : / :
428 * :....o..............o...............o...........:
429 * | | |
430 * completed flr stop
431 * | | |
432 * V .....V..... ......V.....
433 * (PAUSED) : FLR_WIP : : STOP_WIP :
434 * :.........: :..........:
435 *
436 * For the full state machine view, see `The VF state machine`_.
437 */
438
pf_exit_vf_pause_wip(struct xe_gt * gt,unsigned int vfid)439 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
440 {
441 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
442 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
443 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
444 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
445 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC);
446 }
447 }
448
pf_enter_vf_paused(struct xe_gt * gt,unsigned int vfid)449 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid)
450 {
451 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED))
452 pf_enter_vf_state_machine_bug(gt, vfid);
453
454 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
455 pf_exit_vf_mismatch(gt, vfid);
456 pf_exit_vf_wip(gt, vfid);
457 }
458
pf_enter_vf_pause_completed(struct xe_gt * gt,unsigned int vfid)459 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid)
460 {
461 pf_enter_vf_paused(gt, vfid);
462 }
463
pf_enter_vf_pause_failed(struct xe_gt * gt,unsigned int vfid)464 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid)
465 {
466 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
467 pf_exit_vf_wip(gt, vfid);
468 }
469
pf_enter_vf_pause_rejected(struct xe_gt * gt,unsigned int vfid)470 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
471 {
472 pf_enter_vf_mismatch(gt, vfid);
473 pf_enter_vf_pause_failed(gt, vfid);
474 }
475
pf_enter_vf_pause_save_guc(struct xe_gt * gt,unsigned int vfid)476 static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
477 {
478 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
479 pf_enter_vf_state_machine_bug(gt, vfid);
480 }
481
pf_exit_vf_pause_save_guc(struct xe_gt * gt,unsigned int vfid)482 static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
483 {
484 int err;
485
486 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
487 return false;
488
489 err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid);
490 if (err) {
491 /* retry if busy */
492 if (err == -EBUSY) {
493 pf_enter_vf_pause_save_guc(gt, vfid);
494 return true;
495 }
496 /* give up on error */
497 if (err == -EIO)
498 pf_enter_vf_mismatch(gt, vfid);
499 }
500
501 pf_enter_vf_pause_completed(gt, vfid);
502 return true;
503 }
504
pf_exit_vf_pause_guc_done(struct xe_gt * gt,unsigned int vfid)505 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
506 {
507 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
508 return false;
509
510 pf_enter_vf_pause_save_guc(gt, vfid);
511 return true;
512 }
513
pf_enter_vf_pause_guc_done(struct xe_gt * gt,unsigned int vfid)514 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
515 {
516 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
517 pf_queue_vf(gt, vfid);
518 }
519
pf_enter_pause_wait_guc(struct xe_gt * gt,unsigned int vfid)520 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
521 {
522 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC))
523 pf_enter_vf_state_machine_bug(gt, vfid);
524 }
525
pf_exit_pause_wait_guc(struct xe_gt * gt,unsigned int vfid)526 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
527 {
528 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
529 }
530
pf_enter_vf_pause_send_pause(struct xe_gt * gt,unsigned int vfid)531 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
532 {
533 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
534 pf_enter_vf_state_machine_bug(gt, vfid);
535
536 pf_queue_vf(gt, vfid);
537 }
538
pf_exit_vf_pause_send_pause(struct xe_gt * gt,unsigned int vfid)539 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
540 {
541 int err;
542
543 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
544 return false;
545
546 /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */
547 pf_enter_pause_wait_guc(gt, vfid);
548
549 err = pf_send_vf_pause(gt, vfid);
550 if (err) {
551 /* send failed, so we shouldn't expect PAUSE_DONE from GuC */
552 pf_exit_pause_wait_guc(gt, vfid);
553
554 if (err == -EBUSY)
555 pf_enter_vf_pause_send_pause(gt, vfid);
556 else if (err == -EIO)
557 pf_enter_vf_pause_rejected(gt, vfid);
558 else
559 pf_enter_vf_pause_failed(gt, vfid);
560 } else {
561 /*
562 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
563 * but since GuC didn't complain, we may clear MISMATCH
564 */
565 pf_exit_vf_mismatch(gt, vfid);
566 }
567
568 return true;
569 }
570
pf_enter_vf_pause_wip(struct xe_gt * gt,unsigned int vfid)571 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
572 {
573 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
574 pf_enter_vf_wip(gt, vfid);
575 pf_enter_vf_pause_send_pause(gt, vfid);
576 return true;
577 }
578
579 return false;
580 }
581
582 /**
583 * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
584 * @gt: the &xe_gt
585 * @vfid: the VF identifier
586 *
587 * This function is for PF only.
588 *
589 * Return: 0 on success or a negative error code on failure.
590 */
xe_gt_sriov_pf_control_pause_vf(struct xe_gt * gt,unsigned int vfid)591 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
592 {
593 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP);
594 int err;
595
596 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
597 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
598 return -EPERM;
599 }
600
601 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
602 xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid);
603 return -ESTALE;
604 }
605
606 if (!pf_enter_vf_pause_wip(gt, vfid)) {
607 xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid);
608 return -EALREADY;
609 }
610
611 err = pf_wait_vf_wip_done(gt, vfid, timeout);
612 if (err) {
613 xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n",
614 vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
615 return err;
616 }
617
618 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
619 xe_gt_sriov_info(gt, "VF%u paused!\n", vfid);
620 return 0;
621 }
622
623 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) {
624 xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid);
625 return -EIO;
626 }
627
628 xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid);
629 return -ECANCELED;
630 }
631
632 /**
633 * DOC: The VF RESUME state machine
634 *
635 * The VF RESUME state machine looks like::
636 *
637 * (PAUSED)<-----------------<------------------------o
638 * | \
639 * resume \
640 * | \
641 * ....V............................RESUME_WIP...... \
642 * : \ : o
643 * : \ o-------<-----busy : |
644 * : \ / / : |
645 * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED)
646 * : / \ : |
647 * : acked rejected---->---------o--->(MISMATCH)
648 * : / :
649 * :....o..............o...............o.....o.....:
650 * | | | \
651 * completed flr stop restart-->(READY)
652 * | | |
653 * V .....V..... ......V.....
654 * (RESUMED) : FLR_WIP : : STOP_WIP :
655 * :.........: :..........:
656 *
657 * For the full state machine view, see `The VF state machine`_.
658 */
659
pf_exit_vf_resume_wip(struct xe_gt * gt,unsigned int vfid)660 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
661 {
662 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP))
663 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME);
664 }
665
pf_enter_vf_resumed(struct xe_gt * gt,unsigned int vfid)666 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
667 {
668 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
669 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
670 pf_exit_vf_mismatch(gt, vfid);
671 pf_exit_vf_wip(gt, vfid);
672 }
673
pf_enter_vf_resume_completed(struct xe_gt * gt,unsigned int vfid)674 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid)
675 {
676 pf_enter_vf_resumed(gt, vfid);
677 }
678
pf_enter_vf_resume_failed(struct xe_gt * gt,unsigned int vfid)679 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid)
680 {
681 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
682 pf_exit_vf_wip(gt, vfid);
683 }
684
pf_enter_vf_resume_rejected(struct xe_gt * gt,unsigned int vfid)685 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid)
686 {
687 pf_enter_vf_mismatch(gt, vfid);
688 pf_enter_vf_resume_failed(gt, vfid);
689 }
690
pf_enter_vf_resume_send_resume(struct xe_gt * gt,unsigned int vfid)691 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
692 {
693 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
694 pf_enter_vf_state_machine_bug(gt, vfid);
695
696 pf_queue_vf(gt, vfid);
697 }
698
pf_exit_vf_resume_send_resume(struct xe_gt * gt,unsigned int vfid)699 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
700 {
701 int err;
702
703 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
704 return false;
705
706 err = pf_send_vf_resume(gt, vfid);
707 if (err == -EBUSY)
708 pf_enter_vf_resume_send_resume(gt, vfid);
709 else if (err == -EIO)
710 pf_enter_vf_resume_rejected(gt, vfid);
711 else if (err)
712 pf_enter_vf_resume_failed(gt, vfid);
713 else
714 pf_enter_vf_resume_completed(gt, vfid);
715 return true;
716 }
717
pf_enter_vf_resume_wip(struct xe_gt * gt,unsigned int vfid)718 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
719 {
720 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) {
721 pf_enter_vf_wip(gt, vfid);
722 pf_enter_vf_resume_send_resume(gt, vfid);
723 return true;
724 }
725
726 return false;
727 }
728
729 /**
730 * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
731 * @gt: the &xe_gt
732 * @vfid: the VF identifier
733 *
734 * This function is for PF only.
735 *
736 * Return: 0 on success or a negative error code on failure.
737 */
xe_gt_sriov_pf_control_resume_vf(struct xe_gt * gt,unsigned int vfid)738 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
739 {
740 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP);
741 int err;
742
743 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
744 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
745 return -EPERM;
746 }
747
748 if (!pf_enter_vf_resume_wip(gt, vfid)) {
749 xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
750 return -EALREADY;
751 }
752
753 err = pf_wait_vf_wip_done(gt, vfid, timeout);
754 if (err)
755 return err;
756
757 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) {
758 xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid);
759 return 0;
760 }
761
762 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) {
763 xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid);
764 return -EIO;
765 }
766
767 xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid);
768 return -ECANCELED;
769 }
770
771 /**
772 * DOC: The VF STOP state machine
773 *
774 * The VF STOP state machine looks like::
775 *
776 * (READY,PAUSED,RESUMED)<-------<--------------------o
777 * | \
778 * stop \
779 * | \
780 * ....V..............................STOP_WIP...... \
781 * : \ : o
782 * : \ o----<----busy : |
783 * : \ / / : |
784 * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED)
785 * : / \ : |
786 * : acked rejected-------->--------o--->(MISMATCH)
787 * : / :
788 * :....o..............o...............o...........:
789 * | | |
790 * completed flr restart
791 * | | |
792 * V .....V..... V
793 * (STOPPED) : FLR_WIP : (READY)
794 * :.........:
795 *
796 * For the full state machine view, see `The VF state machine`_.
797 */
798
pf_exit_vf_stop_wip(struct xe_gt * gt,unsigned int vfid)799 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
800 {
801 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP))
802 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP);
803 }
804
pf_enter_vf_stopped(struct xe_gt * gt,unsigned int vfid)805 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
806 {
807 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED))
808 pf_enter_vf_state_machine_bug(gt, vfid);
809
810 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
811 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
812 pf_exit_vf_mismatch(gt, vfid);
813 pf_exit_vf_wip(gt, vfid);
814 }
815
pf_enter_vf_stop_completed(struct xe_gt * gt,unsigned int vfid)816 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid)
817 {
818 pf_enter_vf_stopped(gt, vfid);
819 }
820
pf_enter_vf_stop_failed(struct xe_gt * gt,unsigned int vfid)821 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid)
822 {
823 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
824 pf_exit_vf_wip(gt, vfid);
825 }
826
pf_enter_vf_stop_rejected(struct xe_gt * gt,unsigned int vfid)827 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid)
828 {
829 pf_enter_vf_mismatch(gt, vfid);
830 pf_enter_vf_stop_failed(gt, vfid);
831 }
832
pf_enter_vf_stop_send_stop(struct xe_gt * gt,unsigned int vfid)833 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
834 {
835 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
836 pf_enter_vf_state_machine_bug(gt, vfid);
837
838 pf_queue_vf(gt, vfid);
839 }
840
pf_exit_vf_stop_send_stop(struct xe_gt * gt,unsigned int vfid)841 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
842 {
843 int err;
844
845 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
846 return false;
847
848 err = pf_send_vf_stop(gt, vfid);
849 if (err == -EBUSY)
850 pf_enter_vf_stop_send_stop(gt, vfid);
851 else if (err == -EIO)
852 pf_enter_vf_stop_rejected(gt, vfid);
853 else if (err)
854 pf_enter_vf_stop_failed(gt, vfid);
855 else
856 pf_enter_vf_stop_completed(gt, vfid);
857 return true;
858 }
859
pf_enter_vf_stop_wip(struct xe_gt * gt,unsigned int vfid)860 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
861 {
862 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) {
863 pf_enter_vf_wip(gt, vfid);
864 pf_enter_vf_stop_send_stop(gt, vfid);
865 return true;
866 }
867 return false;
868 }
869
870 /**
871 * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
872 * @gt: the &xe_gt
873 * @vfid: the VF identifier
874 *
875 * This function is for PF only.
876 *
877 * Return: 0 on success or a negative error code on failure.
878 */
xe_gt_sriov_pf_control_stop_vf(struct xe_gt * gt,unsigned int vfid)879 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
880 {
881 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP);
882 int err;
883
884 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
885 xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid);
886 return -ESTALE;
887 }
888
889 if (!pf_enter_vf_stop_wip(gt, vfid)) {
890 xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid);
891 return -EALREADY;
892 }
893
894 err = pf_wait_vf_wip_done(gt, vfid, timeout);
895 if (err)
896 return err;
897
898 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
899 xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid);
900 return 0;
901 }
902
903 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) {
904 xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid);
905 return -EIO;
906 }
907
908 xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid);
909 return -ECANCELED;
910 }
911
912 /**
913 * DOC: The VF FLR state machine
914 *
915 * The VF FLR state machine looks like::
916 *
917 * (READY,PAUSED,STOPPED)<------------<--------------o
918 * | \
919 * flr \
920 * | \
921 * ....V..........................FLR_WIP........... \
922 * : \ : \
923 * : \ o----<----busy : |
924 * : \ / / : |
925 * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o
926 * : | \ : | |
927 * : acked rejected----->-----------o--->(MISMATCH) |
928 * : | : ^ |
929 * : v : | |
930 * : FLR_WAIT_GUC : | |
931 * : | : | |
932 * : done : | |
933 * : | : | |
934 * : v : | |
935 * : FLR_GUC_DONE : | |
936 * : | : | |
937 * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
938 * : | : | |
939 * : FLR_RESET_DATA : | |
940 * : | : | |
941 * : FLR_RESET_MMIO : | |
942 * : | : | |
943 * : | o----<----busy : | |
944 * : |/ / : | |
945 * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o
946 * : / \ : |
947 * : acked rejected----->-----------o--------o
948 * : / :
949 * :....o..............................o...........:
950 * | |
951 * completed restart
952 * | /
953 * V /
954 * (READY)<----------<------------o
955 *
956 * For the full state machine view, see `The VF state machine`_.
957 */
958
pf_enter_vf_flr_send_start(struct xe_gt * gt,unsigned int vfid)959 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
960 {
961 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
962 pf_enter_vf_state_machine_bug(gt, vfid);
963
964 pf_queue_vf(gt, vfid);
965 }
966
pf_enter_vf_flr_wip(struct xe_gt * gt,unsigned int vfid)967 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
968 {
969 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
970 xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
971 return;
972 }
973
974 pf_enter_vf_wip(gt, vfid);
975 pf_enter_vf_flr_send_start(gt, vfid);
976 }
977
pf_exit_vf_flr_wip(struct xe_gt * gt,unsigned int vfid)978 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
979 {
980 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
981 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH);
982 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO);
983 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA);
984 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
985 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
986 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
987 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
988 }
989 }
990
pf_enter_vf_flr_completed(struct xe_gt * gt,unsigned int vfid)991 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid)
992 {
993 pf_enter_vf_ready(gt, vfid);
994 }
995
pf_enter_vf_flr_failed(struct xe_gt * gt,unsigned int vfid)996 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid)
997 {
998 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
999 xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid);
1000 pf_exit_vf_wip(gt, vfid);
1001 }
1002
pf_enter_vf_flr_rejected(struct xe_gt * gt,unsigned int vfid)1003 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid)
1004 {
1005 pf_enter_vf_mismatch(gt, vfid);
1006 pf_enter_vf_flr_failed(gt, vfid);
1007 }
1008
pf_enter_vf_flr_send_finish(struct xe_gt * gt,unsigned int vfid)1009 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1010 {
1011 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1012 pf_enter_vf_state_machine_bug(gt, vfid);
1013
1014 pf_queue_vf(gt, vfid);
1015 }
1016
pf_exit_vf_flr_send_finish(struct xe_gt * gt,unsigned int vfid)1017 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1018 {
1019 int err;
1020
1021 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1022 return false;
1023
1024 err = pf_send_vf_flr_finish(gt, vfid);
1025 if (err == -EBUSY)
1026 pf_enter_vf_flr_send_finish(gt, vfid);
1027 else if (err == -EIO)
1028 pf_enter_vf_flr_rejected(gt, vfid);
1029 else if (err)
1030 pf_enter_vf_flr_failed(gt, vfid);
1031 else
1032 pf_enter_vf_flr_completed(gt, vfid);
1033 return true;
1034 }
1035
pf_enter_vf_flr_reset_mmio(struct xe_gt * gt,unsigned int vfid)1036 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1037 {
1038 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1039 pf_enter_vf_state_machine_bug(gt, vfid);
1040
1041 pf_queue_vf(gt, vfid);
1042 }
1043
pf_exit_vf_flr_reset_mmio(struct xe_gt * gt,unsigned int vfid)1044 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1045 {
1046 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1047 return false;
1048
1049 xe_gt_sriov_pf_sanitize_hw(gt, vfid);
1050
1051 pf_enter_vf_flr_send_finish(gt, vfid);
1052 return true;
1053 }
1054
pf_enter_vf_flr_reset_data(struct xe_gt * gt,unsigned int vfid)1055 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1056 {
1057 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1058 pf_enter_vf_state_machine_bug(gt, vfid);
1059
1060 pf_queue_vf(gt, vfid);
1061 }
1062
pf_exit_vf_flr_reset_data(struct xe_gt * gt,unsigned int vfid)1063 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1064 {
1065 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1066 return false;
1067
1068 if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt))
1069 xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid);
1070
1071 xe_gt_sriov_pf_monitor_flr(gt, vfid);
1072
1073 pf_enter_vf_flr_reset_mmio(gt, vfid);
1074 return true;
1075 }
1076
pf_enter_vf_flr_reset_config(struct xe_gt * gt,unsigned int vfid)1077 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1078 {
1079 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1080 pf_enter_vf_state_machine_bug(gt, vfid);
1081
1082 pf_queue_vf(gt, vfid);
1083 }
1084
pf_exit_vf_flr_reset_config(struct xe_gt * gt,unsigned int vfid)1085 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1086 {
1087 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
1088 int err;
1089
1090 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1091 return false;
1092
1093 err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout);
1094 if (err)
1095 pf_enter_vf_flr_failed(gt, vfid);
1096 else
1097 pf_enter_vf_flr_reset_data(gt, vfid);
1098 return true;
1099 }
1100
pf_enter_vf_flr_wait_guc(struct xe_gt * gt,unsigned int vfid)1101 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1102 {
1103 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC))
1104 pf_enter_vf_state_machine_bug(gt, vfid);
1105 }
1106
pf_exit_vf_flr_wait_guc(struct xe_gt * gt,unsigned int vfid)1107 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1108 {
1109 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
1110 }
1111
pf_exit_vf_flr_send_start(struct xe_gt * gt,unsigned int vfid)1112 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
1113 {
1114 int err;
1115
1116 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
1117 return false;
1118
1119 /* GuC may actually send a FLR_DONE before we get a RESPONSE */
1120 pf_enter_vf_flr_wait_guc(gt, vfid);
1121
1122 err = pf_send_vf_flr_start(gt, vfid);
1123 if (err) {
1124 /* send failed, so we shouldn't expect FLR_DONE from GuC */
1125 pf_exit_vf_flr_wait_guc(gt, vfid);
1126
1127 if (err == -EBUSY)
1128 pf_enter_vf_flr_send_start(gt, vfid);
1129 else if (err == -EIO)
1130 pf_enter_vf_flr_rejected(gt, vfid);
1131 else
1132 pf_enter_vf_flr_failed(gt, vfid);
1133 } else {
1134 /*
1135 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
1136 * but since GuC didn't complain, we may clear MISMATCH
1137 */
1138 pf_exit_vf_mismatch(gt, vfid);
1139 }
1140
1141 return true;
1142 }
1143
pf_exit_vf_flr_guc_done(struct xe_gt * gt,unsigned int vfid)1144 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1145 {
1146 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1147 return false;
1148
1149 pf_enter_vf_flr_reset_config(gt, vfid);
1150 return true;
1151 }
1152
pf_enter_vf_flr_guc_done(struct xe_gt * gt,unsigned int vfid)1153 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1154 {
1155 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1156 pf_queue_vf(gt, vfid);
1157 }
1158
1159 /**
1160 * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence.
1161 * @gt: the &xe_gt
1162 * @vfid: the VF identifier
1163 *
1164 * This function is for PF only.
1165 *
1166 * Return: 0 on success or a negative error code on failure.
1167 */
xe_gt_sriov_pf_control_trigger_flr(struct xe_gt * gt,unsigned int vfid)1168 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
1169 {
1170 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP);
1171 int err;
1172
1173 pf_enter_vf_flr_wip(gt, vfid);
1174
1175 err = pf_wait_vf_wip_done(gt, vfid, timeout);
1176 if (err) {
1177 xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n",
1178 vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
1179 return err;
1180 }
1181
1182 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1183 return -EIO;
1184
1185 return 0;
1186 }
1187
1188 /**
1189 * DOC: The VF FLR Flow with GuC
1190 *
1191 * The VF FLR flow includes several steps::
1192 *
1193 * PF GUC PCI
1194 * ========================================================
1195 * | | |
1196 * (1) | [ ] <----- FLR --|
1197 * | [ ] :
1198 * (2) [ ] <-------- NOTIFY FLR --[ ]
1199 * [ ] |
1200 * (3) [ ] |
1201 * [ ] |
1202 * [ ]-- START FLR ---------> [ ]
1203 * | [ ]
1204 * (4) | [ ]
1205 * | [ ]
1206 * [ ] <--------- FLR DONE -- [ ]
1207 * [ ] |
1208 * (5) [ ] |
1209 * [ ] |
1210 * [ ]-- FINISH FLR --------> [ ]
1211 * | |
1212 *
1213 * * Step 1: PCI HW generates interrupt to the GuC about VF FLR
1214 * * Step 2: GuC FW sends G2H notification to the PF about VF FLR
1215 * * Step 2a: on some platforms G2H is only received from root GuC
1216 * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
1217 * * Step 3a: on some platforms PF must send H2G to all other GuCs
1218 * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
1219 * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
1220 */
1221
needs_dispatch_flr(struct xe_device * xe)1222 static bool needs_dispatch_flr(struct xe_device *xe)
1223 {
1224 return xe->info.platform == XE_PVC;
1225 }
1226
pf_handle_vf_flr(struct xe_gt * gt,u32 vfid)1227 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
1228 {
1229 struct xe_device *xe = gt_to_xe(gt);
1230 struct xe_gt *gtit;
1231 unsigned int gtid;
1232
1233 xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
1234
1235 if (needs_dispatch_flr(xe)) {
1236 for_each_gt(gtit, xe, gtid)
1237 pf_enter_vf_flr_wip(gtit, vfid);
1238 } else {
1239 pf_enter_vf_flr_wip(gt, vfid);
1240 }
1241 }
1242
pf_handle_vf_flr_done(struct xe_gt * gt,u32 vfid)1243 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
1244 {
1245 if (!pf_exit_vf_flr_wait_guc(gt, vfid)) {
1246 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid);
1247 pf_enter_vf_mismatch(gt, vfid);
1248 return;
1249 }
1250
1251 pf_enter_vf_flr_guc_done(gt, vfid);
1252 }
1253
pf_handle_vf_pause_done(struct xe_gt * gt,u32 vfid)1254 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid)
1255 {
1256 if (!pf_exit_pause_wait_guc(gt, vfid)) {
1257 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid);
1258 pf_enter_vf_mismatch(gt, vfid);
1259 return;
1260 }
1261
1262 pf_enter_vf_pause_guc_done(gt, vfid);
1263 }
1264
pf_handle_vf_event(struct xe_gt * gt,u32 vfid,u32 eventid)1265 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
1266 {
1267 xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid);
1268
1269 if (vfid > xe_gt_sriov_pf_get_totalvfs(gt))
1270 return -EPROTO;
1271
1272 switch (eventid) {
1273 case GUC_PF_NOTIFY_VF_FLR:
1274 pf_handle_vf_flr(gt, vfid);
1275 break;
1276 case GUC_PF_NOTIFY_VF_FLR_DONE:
1277 pf_handle_vf_flr_done(gt, vfid);
1278 break;
1279 case GUC_PF_NOTIFY_VF_PAUSE_DONE:
1280 pf_handle_vf_pause_done(gt, vfid);
1281 break;
1282 case GUC_PF_NOTIFY_VF_FIXUP_DONE:
1283 break;
1284 default:
1285 return -ENOPKG;
1286 }
1287 return 0;
1288 }
1289
pf_handle_pf_event(struct xe_gt * gt,u32 eventid)1290 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
1291 {
1292 switch (eventid) {
1293 case GUC_PF_NOTIFY_VF_ENABLE:
1294 xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
1295 str_enabled_disabled(true),
1296 str_enabled_disabled(false));
1297 break;
1298 default:
1299 return -ENOPKG;
1300 }
1301 return 0;
1302 }
1303
1304 /**
1305 * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
1306 * @gt: the &xe_gt
1307 * @msg: the G2H message
1308 * @len: the length of the G2H message
1309 *
1310 * This function is for PF only.
1311 *
1312 * Return: 0 on success or a negative error code on failure.
1313 */
xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt * gt,const u32 * msg,u32 len)1314 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
1315 {
1316 u32 vfid;
1317 u32 eventid;
1318
1319 xe_gt_assert(gt, len);
1320 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
1321 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
1322 xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
1323 GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
1324
1325 if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
1326 return -EPROTO;
1327
1328 if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
1329 return -EPFNOSUPPORT;
1330
1331 if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
1332 return -EPROTO;
1333
1334 vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
1335 eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
1336
1337 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
1338 }
1339
pf_process_vf_state_machine(struct xe_gt * gt,unsigned int vfid)1340 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
1341 {
1342 if (pf_exit_vf_flr_send_start(gt, vfid))
1343 return true;
1344
1345 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) {
1346 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1347 control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC));
1348 return false;
1349 }
1350
1351 if (pf_exit_vf_flr_guc_done(gt, vfid))
1352 return true;
1353
1354 if (pf_exit_vf_flr_reset_config(gt, vfid))
1355 return true;
1356
1357 if (pf_exit_vf_flr_reset_data(gt, vfid))
1358 return true;
1359
1360 if (pf_exit_vf_flr_reset_mmio(gt, vfid))
1361 return true;
1362
1363 if (pf_exit_vf_flr_send_finish(gt, vfid))
1364 return true;
1365
1366 if (pf_exit_vf_stop_send_stop(gt, vfid))
1367 return true;
1368
1369 if (pf_exit_vf_pause_send_pause(gt, vfid))
1370 return true;
1371
1372 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) {
1373 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1374 control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC));
1375 return true;
1376 }
1377
1378 if (pf_exit_vf_pause_guc_done(gt, vfid))
1379 return true;
1380
1381 if (pf_exit_vf_pause_save_guc(gt, vfid))
1382 return true;
1383
1384 if (pf_exit_vf_resume_send_resume(gt, vfid))
1385 return true;
1386
1387 return false;
1388 }
1389
pf_control_state_index(struct xe_gt * gt,struct xe_gt_sriov_control_state * cs)1390 static unsigned int pf_control_state_index(struct xe_gt *gt,
1391 struct xe_gt_sriov_control_state *cs)
1392 {
1393 return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs;
1394 }
1395
pf_worker_find_work(struct xe_gt * gt)1396 static void pf_worker_find_work(struct xe_gt *gt)
1397 {
1398 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control;
1399 struct xe_gt_sriov_control_state *cs;
1400 unsigned int vfid;
1401 bool empty;
1402 bool more;
1403
1404 spin_lock(&pfc->lock);
1405 cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link);
1406 if (cs)
1407 list_del_init(&cs->link);
1408 empty = list_empty(&pfc->list);
1409 spin_unlock(&pfc->lock);
1410
1411 if (!cs)
1412 return;
1413
1414 /* VF metadata structures are indexed by the VFID */
1415 vfid = pf_control_state_index(gt, cs);
1416 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
1417
1418 more = pf_process_vf_state_machine(gt, vfid);
1419 if (more)
1420 pf_queue_vf(gt, vfid);
1421 else if (!empty)
1422 pf_queue_control_worker(gt);
1423 }
1424
control_worker_func(struct work_struct * w)1425 static void control_worker_func(struct work_struct *w)
1426 {
1427 struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker);
1428
1429 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1430 pf_worker_find_work(gt);
1431 }
1432
pf_stop_worker(struct xe_gt * gt)1433 static void pf_stop_worker(struct xe_gt *gt)
1434 {
1435 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1436 cancel_work_sync(>->sriov.pf.control.worker);
1437 }
1438
control_fini_action(struct drm_device * dev,void * data)1439 static void control_fini_action(struct drm_device *dev, void *data)
1440 {
1441 struct xe_gt *gt = data;
1442
1443 pf_stop_worker(gt);
1444 }
1445
1446 /**
1447 * xe_gt_sriov_pf_control_init() - Initialize PF's control data.
1448 * @gt: the &xe_gt
1449 *
1450 * This function is for PF only.
1451 *
1452 * Return: 0 on success or a negative error code on failure.
1453 */
xe_gt_sriov_pf_control_init(struct xe_gt * gt)1454 int xe_gt_sriov_pf_control_init(struct xe_gt *gt)
1455 {
1456 struct xe_device *xe = gt_to_xe(gt);
1457 unsigned int n, totalvfs;
1458
1459 xe_gt_assert(gt, IS_SRIOV_PF(xe));
1460
1461 totalvfs = xe_sriov_pf_get_totalvfs(xe);
1462 for (n = 0; n <= totalvfs; n++) {
1463 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n);
1464
1465 init_completion(&cs->done);
1466 INIT_LIST_HEAD(&cs->link);
1467 }
1468
1469 spin_lock_init(>->sriov.pf.control.lock);
1470 INIT_LIST_HEAD(>->sriov.pf.control.list);
1471 INIT_WORK(>->sriov.pf.control.worker, control_worker_func);
1472
1473 return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt);
1474 }
1475
1476 /**
1477 * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset.
1478 * @gt: the &xe_gt
1479 *
1480 * Any per-VF status maintained by the PF or any ongoing VF control activity
1481 * performed by the PF must be reset or cancelled when the GT is reset.
1482 *
1483 * This function is for PF only.
1484 */
xe_gt_sriov_pf_control_restart(struct xe_gt * gt)1485 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt)
1486 {
1487 struct xe_device *xe = gt_to_xe(gt);
1488 unsigned int n, totalvfs;
1489
1490 xe_gt_assert(gt, IS_SRIOV_PF(xe));
1491
1492 pf_stop_worker(gt);
1493
1494 totalvfs = xe_sriov_pf_get_totalvfs(xe);
1495 for (n = 1; n <= totalvfs; n++)
1496 pf_enter_vf_ready(gt, n);
1497 }
1498