xref: /linux/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023-2024 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "abi/guc_actions_sriov_abi.h"
9 
10 #include "xe_device.h"
11 #include "xe_gt.h"
12 #include "xe_gt_sriov_pf.h"
13 #include "xe_gt_sriov_pf_config.h"
14 #include "xe_gt_sriov_pf_control.h"
15 #include "xe_gt_sriov_pf_helpers.h"
16 #include "xe_gt_sriov_pf_migration.h"
17 #include "xe_gt_sriov_pf_monitor.h"
18 #include "xe_gt_sriov_printk.h"
19 #include "xe_guc_ct.h"
20 #include "xe_sriov.h"
21 #include "xe_sriov_packet.h"
22 #include "xe_sriov_packet_types.h"
23 #include "xe_sriov_pf_control.h"
24 #include "xe_sriov_pf_migration.h"
25 #include "xe_sriov_pf_service.h"
26 #include "xe_tile.h"
27 
28 static const char *control_cmd_to_string(u32 cmd)
29 {
30 	switch (cmd) {
31 	case GUC_PF_TRIGGER_VF_PAUSE:
32 		return "PAUSE";
33 	case GUC_PF_TRIGGER_VF_RESUME:
34 		return "RESUME";
35 	case GUC_PF_TRIGGER_VF_STOP:
36 		return "STOP";
37 	case GUC_PF_TRIGGER_VF_FLR_START:
38 		return "FLR_START";
39 	case GUC_PF_TRIGGER_VF_FLR_FINISH:
40 		return "FLR_FINISH";
41 	default:
42 		return "<unknown>";
43 	}
44 }
45 
46 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
47 {
48 	u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
49 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
50 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
51 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
52 		FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
53 		FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
54 	};
55 	int ret;
56 
57 	ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
58 	return ret > 0 ? -EPROTO : ret;
59 }
60 
61 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
62 {
63 	int err;
64 
65 	xe_gt_assert(gt, vfid != PFID);
66 	xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n",
67 				vfid, control_cmd_to_string(cmd));
68 
69 	err = guc_action_vf_control_cmd(&gt->uc.guc, vfid, cmd);
70 	if (unlikely(err))
71 		xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
72 				vfid, control_cmd_to_string(cmd), ERR_PTR(err));
73 	return err;
74 }
75 
76 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
77 {
78 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
79 }
80 
81 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
82 {
83 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
84 }
85 
86 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
87 {
88 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
89 }
90 
91 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
92 {
93 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
94 }
95 
96 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
97 {
98 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
99 }
100 
101 /**
102  * DOC: The VF state machine
103  *
104  * The simplified VF state machine could be presented as::
105  *
106  *	               pause--------------------------o
107  *	              /                               |
108  *	             /                                v
109  *	      (READY)<------------------resume-----(PAUSED)
110  *	         ^   \                             /    /
111  *	         |    \                           /    /
112  *	         |     stop---->(STOPPED)<----stop    /
113  *	         |                  /                /
114  *	         |                 /                /
115  *	         o--------<-----flr                /
116  *	          \                               /
117  *	           o------<--------------------flr
118  *
119  * Where:
120  *
121  * * READY - represents a state in which VF is fully operable
122  * * PAUSED - represents a state in which VF activity is temporarily suspended
123  * * STOPPED - represents a state in which VF activity is definitely halted
124  * * pause - represents a request to temporarily suspend VF activity
125  * * resume - represents a request to resume VF activity
126  * * stop - represents a request to definitely halt VF activity
127  * * flr - represents a request to perform VF FLR to restore VF activity
128  *
129  * However, each state transition requires additional steps that involves
130  * communication with GuC that might fail or be interrupted by other requests::
131  *
132  *	                   .................................WIP....
133  *	                   :                                      :
134  *	          pause--------------------->PAUSE_WIP----------------------------o
135  *	         /         :                /         \           :               |
136  *	        /          :    o----<---stop          flr--o     :               |
137  *	       /           :    |           \         /     |     :               V
138  *	(READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED)
139  *	  ^ \  \           :    |                           |     :          /   /
140  *	  |  \  \          :    |                           |     :         /   /
141  *	  |   \  \         :    |                           |     :        /   /
142  *	  |    \  \        :    o----<----------------------+--<-------stop   /
143  *	  |     \  \       :    |                           |     :          /
144  *	  |      \  \      :    V                           |     :         /
145  *	  |       \  stop----->STOP_WIP---------flr--->-----o     :        /
146  *	  |        \       :    |                           |     :       /
147  *	  |         \      :    |                           V     :      /
148  *	  |          flr--------+----->----------------->FLR_WIP<-----flr
149  *	  |                :    |                        /  ^     :
150  *	  |                :    |                       /   |     :
151  *	  o--------<-------:----+-----<----------------o    |     :
152  *	                   :    |                           |     :
153  *	                   :....|...........................|.....:
154  *	                        |                           |
155  *	                        V                           |
156  *	                     (STOPPED)--------------------flr
157  *
158  * For details about each internal WIP state machine see:
159  *
160  * * `The VF PAUSE state machine`_
161  * * `The VF RESUME state machine`_
162  * * `The VF STOP state machine`_
163  * * `The VF FLR state machine`_
164  */
165 
166 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV
167 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
168 {
169 	switch (bit) {
170 #define CASE2STR(_X) \
171 	case XE_GT_SRIOV_STATE_##_X: return #_X
172 	CASE2STR(WIP);
173 	CASE2STR(FLR_WIP);
174 	CASE2STR(FLR_SEND_START);
175 	CASE2STR(FLR_WAIT_GUC);
176 	CASE2STR(FLR_GUC_DONE);
177 	CASE2STR(FLR_SYNC);
178 	CASE2STR(FLR_RESET_CONFIG);
179 	CASE2STR(FLR_RESET_DATA);
180 	CASE2STR(FLR_RESET_MMIO);
181 	CASE2STR(FLR_SEND_FINISH);
182 	CASE2STR(FLR_FAILED);
183 	CASE2STR(PAUSE_WIP);
184 	CASE2STR(PAUSE_SEND_PAUSE);
185 	CASE2STR(PAUSE_WAIT_GUC);
186 	CASE2STR(PAUSE_GUC_DONE);
187 	CASE2STR(PAUSE_FAILED);
188 	CASE2STR(PAUSED);
189 	CASE2STR(SAVE_WIP);
190 	CASE2STR(SAVE_PROCESS_DATA);
191 	CASE2STR(SAVE_WAIT_DATA);
192 	CASE2STR(SAVE_DATA_DONE);
193 	CASE2STR(SAVE_FAILED);
194 	CASE2STR(SAVED);
195 	CASE2STR(RESTORE_WIP);
196 	CASE2STR(RESTORE_PROCESS_DATA);
197 	CASE2STR(RESTORE_WAIT_DATA);
198 	CASE2STR(RESTORE_DATA_DONE);
199 	CASE2STR(RESTORE_FAILED);
200 	CASE2STR(RESTORED);
201 	CASE2STR(RESUME_WIP);
202 	CASE2STR(RESUME_SEND_RESUME);
203 	CASE2STR(RESUME_FAILED);
204 	CASE2STR(RESUMED);
205 	CASE2STR(STOP_WIP);
206 	CASE2STR(STOP_SEND_STOP);
207 	CASE2STR(STOP_FAILED);
208 	CASE2STR(STOPPED);
209 	CASE2STR(MISMATCH);
210 #undef  CASE2STR
211 	default: return "?";
212 	}
213 }
214 #endif
215 
216 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
217 {
218 	switch (bit) {
219 	case XE_GT_SRIOV_STATE_FLR_WAIT_GUC:
220 	case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC:
221 		return HZ / 2;
222 	case XE_GT_SRIOV_STATE_FLR_WIP:
223 	case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
224 		return 5 * HZ;
225 	case XE_GT_SRIOV_STATE_RESTORE_WIP:
226 		return 20 * HZ;
227 	default:
228 		return HZ;
229 	}
230 }
231 
232 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid)
233 {
234 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
235 	xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
236 
237 	return &gt->sriov.pf.vfs[vfid].control;
238 }
239 
240 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid)
241 {
242 	struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
243 
244 	return cs->state;
245 }
246 
247 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid,
248 			      enum xe_gt_sriov_control_bits bit)
249 {
250 	return test_bit(bit, pf_peek_vf_state(gt, vfid));
251 }
252 
253 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid)
254 {
255 	unsigned long state = *pf_peek_vf_state(gt, vfid);
256 	enum xe_gt_sriov_control_bits bit;
257 
258 	if (state) {
259 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n",
260 					vfid, state, state ? " bits " : "",
261 					(int)BITS_PER_LONG, &state);
262 		for_each_set_bit(bit, &state, BITS_PER_LONG)
263 			xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n",
264 						vfid, control_bit_to_string(bit), bit);
265 	} else {
266 		xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid);
267 	}
268 }
269 
270 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid,
271 			       enum xe_gt_sriov_control_bits bit)
272 {
273 	bool result = pf_check_vf_state(gt, vfid, bit);
274 
275 	if (unlikely(!result))
276 		pf_dump_vf_state(gt, vfid);
277 
278 	return result;
279 }
280 
281 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid,
282 				   enum xe_gt_sriov_control_bits bit)
283 {
284 	bool result = !pf_check_vf_state(gt, vfid, bit);
285 
286 	if (unlikely(!result))
287 		pf_dump_vf_state(gt, vfid);
288 
289 	return result;
290 }
291 
292 static void pf_track_vf_state(struct xe_gt *gt, unsigned int vfid,
293 			      enum xe_gt_sriov_control_bits bit,
294 			      const char *what)
295 {
296 	xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) %s\n",
297 				vfid, control_bit_to_string(bit), bit, what);
298 }
299 
300 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid,
301 			      enum xe_gt_sriov_control_bits bit)
302 {
303 	if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) {
304 		pf_track_vf_state(gt, vfid, bit, "enter");
305 		return true;
306 	}
307 	return false;
308 }
309 
310 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid,
311 			     enum xe_gt_sriov_control_bits bit)
312 {
313 	if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) {
314 		pf_track_vf_state(gt, vfid, bit, "exit");
315 		return true;
316 	}
317 	return false;
318 }
319 
320 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid,
321 			       enum xe_gt_sriov_control_bits bit)
322 {
323 	if (pf_exit_vf_state(gt, vfid, bit))
324 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n",
325 					vfid, control_bit_to_string(bit), bit,
326 					__builtin_return_address(0));
327 }
328 
329 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
330 {
331 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) {
332 		xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n",
333 				vfid, __builtin_return_address(0));
334 		pf_dump_vf_state(gt, vfid);
335 	}
336 }
337 
338 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
339 {
340 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH))
341 		xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n",
342 				vfid, __builtin_return_address(0));
343 
344 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
345 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
346 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
347 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
348 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED);
349 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED);
350 }
351 
352 #define pf_enter_vf_state_machine_bug(gt, vfid) ({	\
353 	pf_enter_vf_mismatch((gt), (vfid));		\
354 })
355 
356 static void pf_queue_control_worker(struct xe_gt *gt)
357 {
358 	struct xe_device *xe = gt_to_xe(gt);
359 
360 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
361 
362 	queue_work(xe->sriov.wq, &gt->sriov.pf.control.worker);
363 }
364 
365 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
366 {
367 	struct xe_gt_sriov_pf_control *pfc = &gt->sriov.pf.control;
368 
369 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
370 
371 	spin_lock(&pfc->lock);
372 	list_move_tail(&gt->sriov.pf.vfs[vfid].control.link, &pfc->list);
373 	spin_unlock(&pfc->lock);
374 
375 	pf_queue_control_worker(gt);
376 }
377 
378 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
379 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
380 static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid);
381 static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid);
382 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
383 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
384 
385 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid)
386 {
387 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
388 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
389 
390 		reinit_completion(&cs->done);
391 		return true;
392 	}
393 	return false;
394 }
395 
396 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
397 {
398 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
399 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
400 
401 		pf_exit_vf_flr_wip(gt, vfid);
402 		pf_exit_vf_stop_wip(gt, vfid);
403 		pf_exit_vf_save_wip(gt, vfid);
404 		pf_exit_vf_restore_wip(gt, vfid);
405 		pf_exit_vf_pause_wip(gt, vfid);
406 		pf_exit_vf_resume_wip(gt, vfid);
407 
408 		complete_all(&cs->done);
409 	}
410 }
411 
412 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout)
413 {
414 	struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
415 
416 	return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT;
417 }
418 
419 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
420 {
421 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
422 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
423 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
424 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
425 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
426 	pf_exit_vf_mismatch(gt, vfid);
427 	pf_exit_vf_wip(gt, vfid);
428 }
429 
430 /**
431  * DOC: The VF PAUSE state machine
432  *
433  * The VF PAUSE state machine looks like::
434  *
435  *	 (READY,RESUMED)<-------------<---------------------o---------o
436  *	    |                                                \         \
437  *	   pause                                              \         \
438  *	    |                                                  \         \
439  *	....V...........................PAUSE_WIP........       \         \
440  *	:    \                                          :        o         \
441  *	:     \   o------<-----busy                     :        |          \
442  *	:      \ /              /                       :        |           |
443  *	:       PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED)  |
444  *	:        |              \                       :        |           |
445  *	:      acked             rejected---->----------o--->(MISMATCH)     /
446  *	:        |                                      :                  /
447  *	:        v                                      :                 /
448  *	:       PAUSE_WAIT_GUC                          :                /
449  *	:        |                                      :               /
450  *	:       done                                    :              /
451  *	:        |                                      :             /
452  *	:        v                                      :            /
453  *	:       PAUSE_GUC_DONE                          o-----restart
454  *	:        |                                      :
455  *	:        |   o---<--busy                        :
456  *	:       /                                       :
457  *	:      /                                        :
458  *	:     /                                         :
459  *	:....o..............o...............o...........:
460  *	     |              |               |
461  *	  completed        flr             stop
462  *	     |              |               |
463  *	     V         .....V.....    ......V.....
464  *	 (PAUSED)      : FLR_WIP :    : STOP_WIP :
465  *	               :.........:    :..........:
466  *
467  * For the full state machine view, see `The VF state machine`_.
468  */
469 
470 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
471 {
472 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
473 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
474 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
475 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
476 	}
477 }
478 
479 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid)
480 {
481 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED))
482 		pf_enter_vf_state_machine_bug(gt, vfid);
483 
484 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
485 	pf_exit_vf_mismatch(gt, vfid);
486 	pf_exit_vf_wip(gt, vfid);
487 }
488 
489 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid)
490 {
491 	pf_enter_vf_paused(gt, vfid);
492 }
493 
494 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid)
495 {
496 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
497 	pf_exit_vf_wip(gt, vfid);
498 }
499 
500 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
501 {
502 	pf_enter_vf_mismatch(gt, vfid);
503 	pf_enter_vf_pause_failed(gt, vfid);
504 }
505 
506 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
507 {
508 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
509 		return false;
510 
511 	pf_enter_vf_pause_completed(gt, vfid);
512 	return true;
513 }
514 
515 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
516 {
517 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
518 		pf_queue_vf(gt, vfid);
519 }
520 
521 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
522 {
523 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC))
524 		pf_enter_vf_state_machine_bug(gt, vfid);
525 }
526 
527 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
528 {
529 	return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
530 }
531 
532 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
533 {
534 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
535 		pf_enter_vf_state_machine_bug(gt, vfid);
536 
537 	pf_queue_vf(gt, vfid);
538 }
539 
540 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
541 {
542 	int err;
543 
544 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
545 		return false;
546 
547 	/* GuC may actually send a PAUSE_DONE before we get a RESPONSE */
548 	pf_enter_pause_wait_guc(gt, vfid);
549 
550 	err = pf_send_vf_pause(gt, vfid);
551 	if (err) {
552 		/* send failed, so we shouldn't expect PAUSE_DONE from GuC */
553 		pf_exit_pause_wait_guc(gt, vfid);
554 
555 		if (err == -EBUSY)
556 			pf_enter_vf_pause_send_pause(gt, vfid);
557 		else if (err == -EIO)
558 			pf_enter_vf_pause_rejected(gt, vfid);
559 		else
560 			pf_enter_vf_pause_failed(gt, vfid);
561 	} else {
562 		/*
563 		 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
564 		 * but since GuC didn't complain, we may clear MISMATCH
565 		 */
566 		pf_exit_vf_mismatch(gt, vfid);
567 	}
568 
569 	return true;
570 }
571 
572 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
573 {
574 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
575 		pf_enter_vf_wip(gt, vfid);
576 		pf_enter_vf_pause_send_pause(gt, vfid);
577 		return true;
578 	}
579 
580 	return false;
581 }
582 
583 /**
584  * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
585  * @gt: the &xe_gt
586  * @vfid: the VF identifier
587  *
588  * This function is for PF only.
589  *
590  * Return: 0 on success or a negative error code on failure.
591  */
592 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
593 {
594 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP);
595 	int err;
596 
597 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
598 		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
599 		return -EPERM;
600 	}
601 
602 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
603 		xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid);
604 		return -ESTALE;
605 	}
606 
607 	if (!pf_enter_vf_pause_wip(gt, vfid)) {
608 		xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid);
609 		return -EALREADY;
610 	}
611 
612 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
613 	if (err) {
614 		xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n",
615 				vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
616 		return err;
617 	}
618 
619 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
620 		xe_gt_sriov_dbg(gt, "VF%u paused!\n", vfid);
621 		return 0;
622 	}
623 
624 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) {
625 		xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid);
626 		return -EIO;
627 	}
628 
629 	xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid);
630 	return -ECANCELED;
631 }
632 
633 /**
634  * DOC: The VF RESUME state machine
635  *
636  * The VF RESUME state machine looks like::
637  *
638  *	 (PAUSED)<-----------------<------------------------o
639  *	    |                                                \
640  *	   resume                                             \
641  *	    |                                                  \
642  *	....V............................RESUME_WIP......       \
643  *	:    \                                          :        o
644  *	:     \   o-------<-----busy                    :        |
645  *	:      \ /                /                     :        |
646  *	:       RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED)
647  *	:       /                \                      :        |
648  *	:    acked                rejected---->---------o--->(MISMATCH)
649  *	:     /                                         :
650  *	:....o..............o...............o.....o.....:
651  *	     |              |               |      \
652  *	  completed        flr            stop      restart-->(READY)
653  *	     |              |               |
654  *	     V         .....V.....    ......V.....
655  *	 (RESUMED)     : FLR_WIP :    : STOP_WIP :
656  *	               :.........:    :..........:
657  *
658  * For the full state machine view, see `The VF state machine`_.
659  */
660 
661 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
662 {
663 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP))
664 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME);
665 }
666 
667 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
668 {
669 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
670 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
671 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
672 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
673 	pf_exit_vf_mismatch(gt, vfid);
674 	pf_exit_vf_wip(gt, vfid);
675 }
676 
677 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid)
678 {
679 	pf_enter_vf_resumed(gt, vfid);
680 }
681 
682 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid)
683 {
684 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
685 	pf_exit_vf_wip(gt, vfid);
686 }
687 
688 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid)
689 {
690 	pf_enter_vf_mismatch(gt, vfid);
691 	pf_enter_vf_resume_failed(gt, vfid);
692 }
693 
694 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
695 {
696 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
697 		pf_enter_vf_state_machine_bug(gt, vfid);
698 
699 	pf_queue_vf(gt, vfid);
700 }
701 
702 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
703 {
704 	int err;
705 
706 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
707 		return false;
708 
709 	err = pf_send_vf_resume(gt, vfid);
710 	if (err == -EBUSY)
711 		pf_enter_vf_resume_send_resume(gt, vfid);
712 	else if (err == -EIO)
713 		pf_enter_vf_resume_rejected(gt, vfid);
714 	else if (err)
715 		pf_enter_vf_resume_failed(gt, vfid);
716 	else
717 		pf_enter_vf_resume_completed(gt, vfid);
718 	return true;
719 }
720 
721 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
722 {
723 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) {
724 		pf_enter_vf_wip(gt, vfid);
725 		pf_enter_vf_resume_send_resume(gt, vfid);
726 		return true;
727 	}
728 
729 	return false;
730 }
731 
732 /**
733  * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
734  * @gt: the &xe_gt
735  * @vfid: the VF identifier
736  *
737  * This function is for PF only.
738  *
739  * Return: 0 on success or a negative error code on failure.
740  */
741 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
742 {
743 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP);
744 	int err;
745 
746 	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
747 		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
748 		return -EPERM;
749 	}
750 
751 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
752 		xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid);
753 		return -EBUSY;
754 	}
755 
756 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
757 		xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid);
758 		return -EBUSY;
759 	}
760 
761 	if (!pf_enter_vf_resume_wip(gt, vfid)) {
762 		xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
763 		return -EALREADY;
764 	}
765 
766 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
767 	if (err)
768 		return err;
769 
770 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) {
771 		xe_gt_sriov_dbg(gt, "VF%u resumed!\n", vfid);
772 		return 0;
773 	}
774 
775 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) {
776 		xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid);
777 		return -EIO;
778 	}
779 
780 	xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid);
781 	return -ECANCELED;
782 }
783 
784 /**
785  * DOC: The VF SAVE state machine
786  *
787  * SAVE extends the PAUSED state.
788  *
789  * The VF SAVE state machine looks like::
790  *
791  *  ....PAUSED....................................................
792  *  :                                                            :
793  *  :     (O)<---------o                                         :
794  *  :      |            \                                        :
795  *  :    save          (SAVED)    (SAVE_FAILED)                  :
796  *  :      |               ^           ^                         :
797  *  :      |               |           |                         :
798  *  :  ....V...............o...........o......SAVE_WIP.........  :
799  *  :  :   |               |           |                      :  :
800  *  :  :   |             empty         |                      :  :
801  *  :  :   |               |           |                      :  :
802  *  :  :   |               |           |                      :  :
803  *  :  :   |           DATA_DONE       |                      :  :
804  *  :  :   |               ^           |                      :  :
805  *  :  :   |               |        error                     :  :
806  *  :  :   |            no_data       /                       :  :
807  *  :  :   |              /          /                        :  :
808  *  :  :   |             /          /                         :  :
809  *  :  :   |            /          /                          :  :
810  *  :  :   o---------->PROCESS_DATA<----consume               :  :
811  *  :  :                \                      \              :  :
812  *  :  :                 \                      \             :  :
813  *  :  :                  \                      \            :  :
814  *  :  :                   ring_full----->WAIT_DATA           :  :
815  *  :  :                                                      :  :
816  *  :  :......................................................:  :
817  *  :............................................................:
818  *
819  * For the full state machine view, see `The VF state machine`_.
820  */
821 
822 static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid)
823 {
824 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
825 		xe_gt_sriov_pf_migration_ring_free(gt, vfid);
826 
827 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA);
828 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA);
829 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE);
830 	}
831 }
832 
833 static void pf_enter_vf_saved(struct xe_gt *gt, unsigned int vfid)
834 {
835 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED))
836 		pf_enter_vf_state_machine_bug(gt, vfid);
837 
838 	xe_gt_sriov_dbg(gt, "VF%u saved!\n", vfid);
839 
840 	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
841 	pf_exit_vf_mismatch(gt, vfid);
842 	pf_exit_vf_wip(gt, vfid);
843 }
844 
845 static void pf_enter_vf_save_failed(struct xe_gt *gt, unsigned int vfid)
846 {
847 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED))
848 		pf_enter_vf_state_machine_bug(gt, vfid);
849 
850 	wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid));
851 
852 	pf_exit_vf_wip(gt, vfid);
853 }
854 
855 static int pf_handle_vf_save_data(struct xe_gt *gt, unsigned int vfid)
856 {
857 	int ret;
858 
859 	if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid,
860 						       XE_SRIOV_PACKET_TYPE_GUC)) {
861 		ret = xe_gt_sriov_pf_migration_guc_save(gt, vfid);
862 		if (ret)
863 			return ret;
864 
865 		xe_gt_sriov_pf_migration_save_data_complete(gt, vfid,
866 							    XE_SRIOV_PACKET_TYPE_GUC);
867 
868 		return -EAGAIN;
869 	}
870 
871 	if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid,
872 						       XE_SRIOV_PACKET_TYPE_GGTT)) {
873 		ret = xe_gt_sriov_pf_migration_ggtt_save(gt, vfid);
874 		if (ret)
875 			return ret;
876 
877 		xe_gt_sriov_pf_migration_save_data_complete(gt, vfid,
878 							    XE_SRIOV_PACKET_TYPE_GGTT);
879 
880 		return -EAGAIN;
881 	}
882 
883 	if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid,
884 						       XE_SRIOV_PACKET_TYPE_MMIO)) {
885 		ret = xe_gt_sriov_pf_migration_mmio_save(gt, vfid);
886 		if (ret)
887 			return ret;
888 
889 		xe_gt_sriov_pf_migration_save_data_complete(gt, vfid,
890 							    XE_SRIOV_PACKET_TYPE_MMIO);
891 
892 		return -EAGAIN;
893 	}
894 
895 	if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid,
896 						       XE_SRIOV_PACKET_TYPE_VRAM)) {
897 		ret = xe_gt_sriov_pf_migration_vram_save(gt, vfid);
898 		if (ret == -EAGAIN)
899 			return -EAGAIN;
900 		else if (ret)
901 			return ret;
902 
903 		xe_gt_sriov_pf_migration_save_data_complete(gt, vfid,
904 							    XE_SRIOV_PACKET_TYPE_VRAM);
905 
906 		return -EAGAIN;
907 	}
908 
909 	return 0;
910 }
911 
912 static bool pf_handle_vf_save(struct xe_gt *gt, unsigned int vfid)
913 {
914 	int ret;
915 
916 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA))
917 		return false;
918 
919 	if (xe_gt_sriov_pf_migration_ring_full(gt, vfid)) {
920 		pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA);
921 		return true;
922 	}
923 
924 	ret = pf_handle_vf_save_data(gt, vfid);
925 	if (ret == -EAGAIN)
926 		pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA);
927 	else if (ret)
928 		pf_enter_vf_save_failed(gt, vfid);
929 	else
930 		pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE);
931 
932 	return true;
933 }
934 
935 static void pf_exit_vf_save_wait_data(struct xe_gt *gt, unsigned int vfid)
936 {
937 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA))
938 		return;
939 
940 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA);
941 	pf_queue_vf(gt, vfid);
942 }
943 
944 static bool pf_enter_vf_save_wip(struct xe_gt *gt, unsigned int vfid)
945 {
946 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
947 		xe_gt_sriov_pf_migration_save_init(gt, vfid);
948 		pf_enter_vf_wip(gt, vfid);
949 		pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA);
950 		pf_queue_vf(gt, vfid);
951 		return true;
952 	}
953 
954 	return false;
955 }
956 
957 /**
958  * xe_gt_sriov_pf_control_check_save_data_done() - Check if all save migration data was produced.
959  * @gt: the &xe_gt
960  * @vfid: the VF identifier
961  *
962  * This function is for PF only.
963  *
964  * Return: true if all migration data was produced, false otherwise.
965  */
966 bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid)
967 {
968 	return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE);
969 }
970 
971 /**
972  * xe_gt_sriov_pf_control_check_save_failed() - Check if save processing has failed.
973  * @gt: the &xe_gt
974  * @vfid: the VF identifier
975  *
976  * This function is for PF only.
977  *
978  * Return: true if save processing failed, false otherwise.
979  */
980 bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid)
981 {
982 	return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED);
983 }
984 
985 /**
986  * xe_gt_sriov_pf_control_process_save_data() - Queue VF save migration data processing.
987  * @gt: the &xe_gt
988  * @vfid: the VF identifier
989  *
990  * This function is for PF only.
991  *
992  * Return: 0 on success or a negative error code on failure.
993  */
994 int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid)
995 {
996 	if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED))
997 		return -EIO;
998 
999 	pf_exit_vf_save_wait_data(gt, vfid);
1000 
1001 	return 0;
1002 }
1003 
1004 /**
1005  * xe_gt_sriov_pf_control_trigger_save_vf() - Start an SR-IOV VF migration data save sequence.
1006  * @gt: the &xe_gt
1007  * @vfid: the VF identifier
1008  *
1009  * This function is for PF only.
1010  *
1011  * Return: 0 on success or a negative error code on failure.
1012  */
1013 int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid)
1014 {
1015 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
1016 		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
1017 		return -EPERM;
1018 	}
1019 
1020 	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
1021 		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
1022 		return -EPERM;
1023 	}
1024 
1025 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
1026 		xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid);
1027 		return -EBUSY;
1028 	}
1029 
1030 	if (!pf_enter_vf_save_wip(gt, vfid)) {
1031 		xe_gt_sriov_dbg(gt, "VF%u save already in progress!\n", vfid);
1032 		return -EALREADY;
1033 	}
1034 
1035 	return 0;
1036 }
1037 
1038 /**
1039  * xe_gt_sriov_pf_control_finish_save_vf() - Complete a VF migration data save sequence.
1040  * @gt: the &xe_gt
1041  * @vfid: the VF identifier
1042  *
1043  * This function is for PF only.
1044  *
1045  * Return: 0 on success or a negative error code on failure.
1046  */
1047 int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid)
1048 {
1049 	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE)) {
1050 		xe_gt_sriov_err(gt, "VF%u save is still in progress!\n", vfid);
1051 		return -EIO;
1052 	}
1053 
1054 	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
1055 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE);
1056 	pf_enter_vf_saved(gt, vfid);
1057 
1058 	return 0;
1059 }
1060 
1061 /**
1062  * DOC: The VF RESTORE state machine
1063  *
1064  * RESTORE extends the PAUSED state.
1065  *
1066  * The VF RESTORE state machine looks like::
1067  *
1068  *  ....PAUSED....................................................
1069  *  :                                                            :
1070  *  :     (O)<---------o                                         :
1071  *  :      |            \                                        :
1072  *  :    restore      (RESTORED)  (RESTORE_FAILED)               :
1073  *  :      |               ^           ^                         :
1074  *  :      |               |           |                         :
1075  *  :  ....V...............o...........o......RESTORE_WIP......  :
1076  *  :  :   |               |           |                      :  :
1077  *  :  :   |             empty         |                      :  :
1078  *  :  :   |               |           |                      :  :
1079  *  :  :   |               |           |                      :  :
1080  *  :  :   |           DATA_DONE       |                      :  :
1081  *  :  :   |               ^           |                      :  :
1082  *  :  :   |               |        error                     :  :
1083  *  :  :   |           trailer        /                       :  :
1084  *  :  :   |              /          /                        :  :
1085  *  :  :   |             /          /                         :  :
1086  *  :  :   |            /          /                          :  :
1087  *  :  :   o---------->PROCESS_DATA<----produce               :  :
1088  *  :  :                \                      \              :  :
1089  *  :  :                 \                      \             :  :
1090  *  :  :                  \                      \            :  :
1091  *  :  :                   ring_empty---->WAIT_DATA           :  :
1092  *  :  :                                                      :  :
1093  *  :  :......................................................:  :
1094  *  :............................................................:
1095  *
1096  * For the full state machine view, see `The VF state machine`_.
1097  */
1098 
1099 static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid)
1100 {
1101 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
1102 		xe_gt_sriov_pf_migration_ring_free(gt, vfid);
1103 
1104 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA);
1105 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA);
1106 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE);
1107 	}
1108 }
1109 
1110 static void pf_enter_vf_restored(struct xe_gt *gt, unsigned int vfid)
1111 {
1112 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED))
1113 		pf_enter_vf_state_machine_bug(gt, vfid);
1114 
1115 	xe_gt_sriov_dbg(gt, "VF%u restored!\n", vfid);
1116 
1117 	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
1118 	pf_exit_vf_mismatch(gt, vfid);
1119 	pf_exit_vf_wip(gt, vfid);
1120 }
1121 
1122 static void pf_enter_vf_restore_failed(struct xe_gt *gt, unsigned int vfid)
1123 {
1124 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED))
1125 		pf_enter_vf_state_machine_bug(gt, vfid);
1126 
1127 	wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid));
1128 
1129 	pf_exit_vf_wip(gt, vfid);
1130 }
1131 
1132 static int pf_handle_vf_restore_data(struct xe_gt *gt, unsigned int vfid)
1133 {
1134 	struct xe_sriov_packet *data = xe_gt_sriov_pf_migration_restore_consume(gt, vfid);
1135 	int ret = 0;
1136 
1137 	switch (data->hdr.type) {
1138 	case XE_SRIOV_PACKET_TYPE_GGTT:
1139 		ret = xe_gt_sriov_pf_migration_ggtt_restore(gt, vfid, data);
1140 		break;
1141 	case XE_SRIOV_PACKET_TYPE_MMIO:
1142 		ret = xe_gt_sriov_pf_migration_mmio_restore(gt, vfid, data);
1143 		break;
1144 	case XE_SRIOV_PACKET_TYPE_GUC:
1145 		ret = xe_gt_sriov_pf_migration_guc_restore(gt, vfid, data);
1146 		break;
1147 	case XE_SRIOV_PACKET_TYPE_VRAM:
1148 		ret = xe_gt_sriov_pf_migration_vram_restore(gt, vfid, data);
1149 		break;
1150 	default:
1151 		xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n",
1152 				   vfid, data->hdr.type);
1153 		break;
1154 	}
1155 
1156 	xe_sriov_packet_free(data);
1157 
1158 	return ret;
1159 }
1160 
1161 static bool pf_handle_vf_restore(struct xe_gt *gt, unsigned int vfid)
1162 {
1163 	int ret;
1164 
1165 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA))
1166 		return false;
1167 
1168 	if (xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) {
1169 		if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE))
1170 			pf_enter_vf_restored(gt, vfid);
1171 		else
1172 			pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA);
1173 
1174 		return true;
1175 	}
1176 
1177 	ret = pf_handle_vf_restore_data(gt, vfid);
1178 	if (ret)
1179 		pf_enter_vf_restore_failed(gt, vfid);
1180 	else
1181 		pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA);
1182 
1183 	return true;
1184 }
1185 
1186 static void pf_exit_vf_restore_wait_data(struct xe_gt *gt, unsigned int vfid)
1187 {
1188 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA))
1189 		return;
1190 
1191 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA);
1192 	pf_queue_vf(gt, vfid);
1193 }
1194 
1195 static bool pf_enter_vf_restore_wip(struct xe_gt *gt, unsigned int vfid)
1196 {
1197 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
1198 		pf_enter_vf_wip(gt, vfid);
1199 		pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA);
1200 		pf_queue_vf(gt, vfid);
1201 		return true;
1202 	}
1203 
1204 	return false;
1205 }
1206 
1207 /**
1208  * xe_gt_sriov_pf_control_check_restore_failed() - Check if restore processing has failed.
1209  * @gt: the &xe_gt
1210  * @vfid: the VF identifier
1211  *
1212  * This function is for PF only.
1213  *
1214  * Return: true if restore processing failed, false otherwise.
1215  */
1216 bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid)
1217 {
1218 	return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED);
1219 }
1220 
1221 /**
1222  * xe_gt_sriov_pf_control_restore_data_done() - Indicate the end of VF migration data stream.
1223  * @gt: the &xe_gt
1224  * @vfid: the VF identifier
1225  *
1226  * This function is for PF only.
1227  *
1228  * Return: 0 on success or a negative error code on failure.
1229  */
1230 int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid)
1231 {
1232 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) {
1233 		pf_enter_vf_state_machine_bug(gt, vfid);
1234 		return -EIO;
1235 	}
1236 
1237 	return xe_gt_sriov_pf_control_process_restore_data(gt, vfid);
1238 }
1239 
1240 /**
1241  * xe_gt_sriov_pf_control_process_restore_data() - Queue VF restore migration data processing.
1242  * @gt: the &xe_gt
1243  * @vfid: the VF identifier
1244  *
1245  * This function is for PF only.
1246  *
1247  * Return: 0 on success or a negative error code on failure.
1248  */
1249 int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid)
1250 {
1251 	if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) {
1252 		xe_gt_sriov_pf_migration_ring_free(gt, vfid);
1253 		return -EIO;
1254 	}
1255 
1256 	pf_exit_vf_restore_wait_data(gt, vfid);
1257 
1258 	return 0;
1259 }
1260 
1261 /**
1262  * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence.
1263  * @gt: the &xe_gt
1264  * @vfid: the VF identifier
1265  *
1266  * This function is for PF only.
1267  *
1268  * Return: 0 on success or a negative error code on failure.
1269  */
1270 int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid)
1271 {
1272 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
1273 		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
1274 		return -EPERM;
1275 	}
1276 
1277 	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
1278 		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
1279 		return -EPERM;
1280 	}
1281 
1282 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
1283 		xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid);
1284 		return -EBUSY;
1285 	}
1286 
1287 	if (!pf_enter_vf_restore_wip(gt, vfid)) {
1288 		xe_gt_sriov_dbg(gt, "VF%u restore already in progress!\n", vfid);
1289 		return -EALREADY;
1290 	}
1291 
1292 	return 0;
1293 }
1294 
1295 static int pf_wait_vf_restore_done(struct xe_gt *gt, unsigned int vfid)
1296 {
1297 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESTORE_WIP);
1298 	int err;
1299 
1300 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
1301 	if (err) {
1302 		xe_gt_sriov_notice(gt, "VF%u RESTORE didn't finish in %u ms (%pe)\n",
1303 				   vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
1304 		return err;
1305 	}
1306 
1307 	if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED))
1308 		return -EIO;
1309 
1310 	return 0;
1311 }
1312 
1313 /**
1314  * xe_gt_sriov_pf_control_finish_restore_vf() - Complete a VF migration data restore sequence.
1315  * @gt: the &xe_gt
1316  * @vfid: the VF identifier
1317  *
1318  * This function is for PF only.
1319  *
1320  * Return: 0 on success or a negative error code on failure.
1321  */
1322 int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid)
1323 {
1324 	int ret;
1325 
1326 	ret = pf_wait_vf_restore_done(gt, vfid);
1327 	if (ret)
1328 		return ret;
1329 
1330 	if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) {
1331 		pf_enter_vf_mismatch(gt, vfid);
1332 		return -EIO;
1333 	}
1334 
1335 	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
1336 
1337 	return 0;
1338 }
1339 
1340 /**
1341  * DOC: The VF STOP state machine
1342  *
1343  * The VF STOP state machine looks like::
1344  *
1345  *	 (READY,PAUSED,RESUMED)<-------<--------------------o
1346  *	    |                                                \
1347  *	   stop                                               \
1348  *	    |                                                  \
1349  *	....V..............................STOP_WIP......       \
1350  *	:    \                                          :        o
1351  *	:     \   o----<----busy                        :        |
1352  *	:      \ /            /                         :        |
1353  *	:       STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED)
1354  *	:       /             \                         :        |
1355  *	:    acked             rejected-------->--------o--->(MISMATCH)
1356  *	:     /                                         :
1357  *	:....o..............o...............o...........:
1358  *	     |              |               |
1359  *	  completed        flr            restart
1360  *	     |              |               |
1361  *	     V         .....V.....          V
1362  *	 (STOPPED)     : FLR_WIP :       (READY)
1363  *	               :.........:
1364  *
1365  * For the full state machine view, see `The VF state machine`_.
1366  */
1367 
1368 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
1369 {
1370 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP))
1371 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP);
1372 }
1373 
1374 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
1375 {
1376 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED))
1377 		pf_enter_vf_state_machine_bug(gt, vfid);
1378 
1379 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
1380 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
1381 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
1382 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
1383 	pf_exit_vf_mismatch(gt, vfid);
1384 	pf_exit_vf_wip(gt, vfid);
1385 }
1386 
1387 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid)
1388 {
1389 	pf_enter_vf_stopped(gt, vfid);
1390 }
1391 
1392 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid)
1393 {
1394 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
1395 	pf_exit_vf_wip(gt, vfid);
1396 }
1397 
1398 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid)
1399 {
1400 	pf_enter_vf_mismatch(gt, vfid);
1401 	pf_enter_vf_stop_failed(gt, vfid);
1402 }
1403 
1404 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
1405 {
1406 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
1407 		pf_enter_vf_state_machine_bug(gt, vfid);
1408 
1409 	pf_queue_vf(gt, vfid);
1410 }
1411 
1412 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
1413 {
1414 	int err;
1415 
1416 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
1417 		return false;
1418 
1419 	err = pf_send_vf_stop(gt, vfid);
1420 	if (err == -EBUSY)
1421 		pf_enter_vf_stop_send_stop(gt, vfid);
1422 	else if (err == -EIO)
1423 		pf_enter_vf_stop_rejected(gt, vfid);
1424 	else if (err)
1425 		pf_enter_vf_stop_failed(gt, vfid);
1426 	else
1427 		pf_enter_vf_stop_completed(gt, vfid);
1428 	return true;
1429 }
1430 
1431 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
1432 {
1433 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) {
1434 		pf_enter_vf_wip(gt, vfid);
1435 		pf_enter_vf_stop_send_stop(gt, vfid);
1436 		return true;
1437 	}
1438 	return false;
1439 }
1440 
1441 /**
1442  * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
1443  * @gt: the &xe_gt
1444  * @vfid: the VF identifier
1445  *
1446  * This function is for PF only.
1447  *
1448  * Return: 0 on success or a negative error code on failure.
1449  */
1450 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
1451 {
1452 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP);
1453 	int err;
1454 
1455 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
1456 		xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid);
1457 		return -ESTALE;
1458 	}
1459 
1460 	if (!pf_enter_vf_stop_wip(gt, vfid)) {
1461 		xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid);
1462 		return -EALREADY;
1463 	}
1464 
1465 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
1466 	if (err)
1467 		return err;
1468 
1469 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
1470 		xe_gt_sriov_dbg(gt, "VF%u stopped!\n", vfid);
1471 		return 0;
1472 	}
1473 
1474 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) {
1475 		xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid);
1476 		return -EIO;
1477 	}
1478 
1479 	xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid);
1480 	return -ECANCELED;
1481 }
1482 
1483 /**
1484  * DOC: The VF FLR state machine
1485  *
1486  * The VF FLR state machine looks like::
1487  *
1488  *	 (READY,PAUSED,STOPPED)<------------<--------------o
1489  *	    |                                               \
1490  *	   flr                                               \
1491  *	    |                                                 \
1492  *	....V..........................FLR_WIP...........      \
1493  *	:    \                                          :       \
1494  *	:     \   o----<----busy                        :        |
1495  *	:      \ /            /                         :        |
1496  *	:       FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o
1497  *	:        |            \                         :        |           |
1498  *	:      acked           rejected----->-----------o--->(MISMATCH)      |
1499  *	:        |                                      :        ^           |
1500  *	:        v                                      :        |           |
1501  *	:       FLR_WAIT_GUC                            :        |           |
1502  *	:        |                                      :        |           |
1503  *	:       done                                    :        |           |
1504  *	:        |                                      :        |           |
1505  *	:        v                                      :        |           |
1506  *	:       FLR_GUC_DONE                            :        |           |
1507  *	:        |                                      :        |           |
1508  *	:        | o--<--sync                           :        |           |
1509  *	:        |/        /                            :        |           |
1510  *	:       FLR_SYNC--o                             :        |           |
1511  *	:        |                                      :        |           |
1512  *	:       FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
1513  *	:        |                                      :        |           |
1514  *	:       FLR_RESET_DATA                          :        |           |
1515  *	:        |                                      :        |           |
1516  *	:       FLR_RESET_MMIO                          :        |           |
1517  *	:        |                                      :        |           |
1518  *	:        | o----<----busy                       :        |           |
1519  *	:        |/            /                        :        |           |
1520  *	:       FLR_SEND_FINISH----failed--->-----------o--------+-----------o
1521  *	:       /             \                         :        |
1522  *	:     acked            rejected----->-----------o--------o
1523  *	:     /                                         :
1524  *	:....o..............................o...........:
1525  *	     |                              |
1526  *	  completed                       restart
1527  *	     |                             /
1528  *	     V                            /
1529  *	  (READY)<----------<------------o
1530  *
1531  * For the full state machine view, see `The VF state machine`_.
1532  */
1533 
1534 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
1535 {
1536 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
1537 		pf_enter_vf_state_machine_bug(gt, vfid);
1538 
1539 	pf_queue_vf(gt, vfid);
1540 }
1541 
1542 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
1543 {
1544 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
1545 		xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
1546 		return;
1547 	}
1548 
1549 	pf_enter_vf_wip(gt, vfid);
1550 	pf_enter_vf_flr_send_start(gt, vfid);
1551 }
1552 
1553 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
1554 {
1555 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
1556 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH);
1557 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO);
1558 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA);
1559 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
1560 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
1561 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
1562 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
1563 
1564 		xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid);
1565 	}
1566 }
1567 
1568 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid)
1569 {
1570 	pf_enter_vf_ready(gt, vfid);
1571 }
1572 
1573 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid)
1574 {
1575 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1576 		xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid);
1577 	pf_exit_vf_wip(gt, vfid);
1578 }
1579 
1580 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid)
1581 {
1582 	pf_enter_vf_mismatch(gt, vfid);
1583 	pf_enter_vf_flr_failed(gt, vfid);
1584 }
1585 
1586 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1587 {
1588 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1589 		pf_enter_vf_state_machine_bug(gt, vfid);
1590 
1591 	pf_queue_vf(gt, vfid);
1592 }
1593 
1594 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1595 {
1596 	int err;
1597 
1598 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1599 		return false;
1600 
1601 	err = pf_send_vf_flr_finish(gt, vfid);
1602 	if (err == -EBUSY)
1603 		pf_enter_vf_flr_send_finish(gt, vfid);
1604 	else if (err == -EIO)
1605 		pf_enter_vf_flr_rejected(gt, vfid);
1606 	else if (err)
1607 		pf_enter_vf_flr_failed(gt, vfid);
1608 	else
1609 		pf_enter_vf_flr_completed(gt, vfid);
1610 	return true;
1611 }
1612 
1613 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1614 {
1615 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1616 		pf_enter_vf_state_machine_bug(gt, vfid);
1617 
1618 	pf_queue_vf(gt, vfid);
1619 }
1620 
1621 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1622 {
1623 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1624 		return false;
1625 
1626 	xe_gt_sriov_pf_sanitize_hw(gt, vfid);
1627 
1628 	pf_enter_vf_flr_send_finish(gt, vfid);
1629 	return true;
1630 }
1631 
1632 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1633 {
1634 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1635 		pf_enter_vf_state_machine_bug(gt, vfid);
1636 
1637 	pf_queue_vf(gt, vfid);
1638 }
1639 
1640 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1641 {
1642 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1643 		return false;
1644 
1645 	if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt))
1646 		xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid);
1647 
1648 	xe_gt_sriov_pf_monitor_flr(gt, vfid);
1649 
1650 	pf_enter_vf_flr_reset_mmio(gt, vfid);
1651 	return true;
1652 }
1653 
1654 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1655 {
1656 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1657 		pf_enter_vf_state_machine_bug(gt, vfid);
1658 
1659 	pf_queue_vf(gt, vfid);
1660 }
1661 
1662 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1663 {
1664 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
1665 	int err;
1666 
1667 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1668 		return false;
1669 
1670 	err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout);
1671 	if (err)
1672 		pf_enter_vf_flr_failed(gt, vfid);
1673 	else
1674 		pf_enter_vf_flr_reset_data(gt, vfid);
1675 	return true;
1676 }
1677 
1678 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1679 {
1680 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC))
1681 		pf_enter_vf_state_machine_bug(gt, vfid);
1682 }
1683 
1684 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1685 {
1686 	return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
1687 }
1688 
1689 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
1690 {
1691 	int err;
1692 
1693 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
1694 		return false;
1695 
1696 	/* GuC may actually send a FLR_DONE before we get a RESPONSE */
1697 	pf_enter_vf_flr_wait_guc(gt, vfid);
1698 
1699 	err = pf_send_vf_flr_start(gt, vfid);
1700 	if (err) {
1701 		/* send failed, so we shouldn't expect FLR_DONE from GuC */
1702 		pf_exit_vf_flr_wait_guc(gt, vfid);
1703 
1704 		if (err == -EBUSY)
1705 			pf_enter_vf_flr_send_start(gt, vfid);
1706 		else if (err == -EIO)
1707 			pf_enter_vf_flr_rejected(gt, vfid);
1708 		else
1709 			pf_enter_vf_flr_failed(gt, vfid);
1710 	} else {
1711 		/*
1712 		 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
1713 		 * but since GuC didn't complain, we may clear MISMATCH
1714 		 */
1715 		pf_exit_vf_mismatch(gt, vfid);
1716 	}
1717 
1718 	return true;
1719 }
1720 
1721 static bool pf_exit_vf_flr_sync(struct xe_gt *gt, unsigned int vfid)
1722 {
1723 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
1724 		return false;
1725 
1726 	pf_enter_vf_flr_reset_config(gt, vfid);
1727 	return true;
1728 }
1729 
1730 static void pf_enter_vf_flr_sync(struct xe_gt *gt, unsigned int vfid)
1731 {
1732 	int ret;
1733 
1734 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
1735 		pf_enter_vf_state_machine_bug(gt, vfid);
1736 
1737 	ret = xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid);
1738 	if (ret < 0) {
1739 		xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint %pe\n", ERR_PTR(ret));
1740 		pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC);
1741 	} else {
1742 		xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint pass\n");
1743 		pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC);
1744 	}
1745 }
1746 
1747 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1748 {
1749 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1750 		return false;
1751 
1752 	pf_enter_vf_flr_sync(gt, vfid);
1753 	return true;
1754 }
1755 
1756 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1757 {
1758 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1759 		pf_queue_vf(gt, vfid);
1760 }
1761 
1762 /**
1763  * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence.
1764  * @gt: the &xe_gt
1765  * @vfid: the VF identifier
1766  *
1767  * This function is for PF only.
1768  *
1769  * Return: 0 on success or a negative error code on failure.
1770  */
1771 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
1772 {
1773 	pf_enter_vf_flr_wip(gt, vfid);
1774 
1775 	return 0;
1776 }
1777 
1778 /**
1779  * xe_gt_sriov_pf_control_sync_flr() - Synchronize on the VF FLR checkpoint.
1780  * @gt: the &xe_gt
1781  * @vfid: the VF identifier
1782  * @sync: if true it will allow to exit the checkpoint
1783  *
1784  * Return: non-zero if FLR checkpoint has been reached, zero if the is no FLR
1785  *         in progress, or a negative error code on the FLR busy or failed.
1786  */
1787 int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync)
1788 {
1789 	if (sync && pf_exit_vf_flr_sync(gt, vfid))
1790 		return 1;
1791 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
1792 		return 1;
1793 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP))
1794 		return -EBUSY;
1795 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1796 		return -EIO;
1797 	return 0;
1798 }
1799 
1800 /**
1801  * xe_gt_sriov_pf_control_wait_flr() - Wait for a VF FLR to complete.
1802  * @gt: the &xe_gt
1803  * @vfid: the VF identifier
1804  *
1805  * This function is for PF only.
1806  *
1807  * Return: 0 on success or a negative error code on failure.
1808  */
1809 int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid)
1810 {
1811 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP);
1812 	int err;
1813 
1814 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1815 		return -EIO;
1816 
1817 	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP))
1818 		return 0;
1819 
1820 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
1821 	if (err) {
1822 		xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n",
1823 				   vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
1824 		return err;
1825 	}
1826 
1827 	if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1828 		return -EIO;
1829 
1830 	return 0;
1831 }
1832 
1833 /**
1834  * DOC: The VF FLR Flow with GuC
1835  *
1836  * The VF FLR flow includes several steps::
1837  *
1838  *	         PF                        GUC             PCI
1839  *	========================================================
1840  *	         |                          |               |
1841  *	(1)      |                         [ ] <----- FLR --|
1842  *	         |                         [ ]              :
1843  *	(2)     [ ] <-------- NOTIFY FLR --[ ]
1844  *	        [ ]                         |
1845  *	(3)     [ ]                         |
1846  *	        [ ]                         |
1847  *	        [ ]-- START FLR ---------> [ ]
1848  *	         |                         [ ]
1849  *	(4)      |                         [ ]
1850  *	         |                         [ ]
1851  *	        [ ] <--------- FLR DONE -- [ ]
1852  *	        [ ]                         |
1853  *	(5)     [ ]                         |
1854  *	        [ ]                         |
1855  *	        [ ]-- FINISH FLR --------> [ ]
1856  *	         |                          |
1857  *
1858  * * Step 1: PCI HW generates interrupt to the GuC about VF FLR
1859  * * Step 2: GuC FW sends G2H notification to the PF about VF FLR
1860  * * Step 2a: on some platforms G2H is only received from root GuC
1861  * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
1862  * * Step 3a: on some platforms PF must send H2G to all other GuCs
1863  * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
1864  * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
1865  */
1866 
1867 static bool needs_dispatch_flr(struct xe_device *xe)
1868 {
1869 	return xe->info.platform == XE_PVC;
1870 }
1871 
1872 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
1873 {
1874 	struct xe_device *xe = gt_to_xe(gt);
1875 	struct xe_gt *gtit;
1876 	unsigned int gtid;
1877 
1878 	xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
1879 
1880 	if (needs_dispatch_flr(xe)) {
1881 		for_each_gt(gtit, xe, gtid)
1882 			pf_enter_vf_flr_wip(gtit, vfid);
1883 	} else {
1884 		pf_enter_vf_flr_wip(gt, vfid);
1885 	}
1886 }
1887 
1888 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
1889 {
1890 	if (!pf_exit_vf_flr_wait_guc(gt, vfid)) {
1891 		xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid);
1892 		pf_enter_vf_mismatch(gt, vfid);
1893 		return;
1894 	}
1895 
1896 	pf_enter_vf_flr_guc_done(gt, vfid);
1897 }
1898 
1899 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid)
1900 {
1901 	if (!pf_exit_pause_wait_guc(gt, vfid)) {
1902 		xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid);
1903 		pf_enter_vf_mismatch(gt, vfid);
1904 		return;
1905 	}
1906 
1907 	pf_enter_vf_pause_guc_done(gt, vfid);
1908 }
1909 
1910 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
1911 {
1912 	xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid);
1913 
1914 	if (vfid > xe_gt_sriov_pf_get_totalvfs(gt))
1915 		return -EPROTO;
1916 
1917 	switch (eventid) {
1918 	case GUC_PF_NOTIFY_VF_FLR:
1919 		pf_handle_vf_flr(gt, vfid);
1920 		break;
1921 	case GUC_PF_NOTIFY_VF_FLR_DONE:
1922 		pf_handle_vf_flr_done(gt, vfid);
1923 		break;
1924 	case GUC_PF_NOTIFY_VF_PAUSE_DONE:
1925 		pf_handle_vf_pause_done(gt, vfid);
1926 		break;
1927 	case GUC_PF_NOTIFY_VF_FIXUP_DONE:
1928 		break;
1929 	default:
1930 		return -ENOPKG;
1931 	}
1932 	return 0;
1933 }
1934 
1935 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
1936 {
1937 	switch (eventid) {
1938 	case GUC_PF_NOTIFY_VF_ENABLE:
1939 		xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
1940 					str_enabled_disabled(true),
1941 					str_enabled_disabled(false));
1942 		break;
1943 	default:
1944 		return -ENOPKG;
1945 	}
1946 	return 0;
1947 }
1948 
1949 /**
1950  * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
1951  * @gt: the &xe_gt
1952  * @msg: the G2H message
1953  * @len: the length of the G2H message
1954  *
1955  * This function is for PF only.
1956  *
1957  * Return: 0 on success or a negative error code on failure.
1958  */
1959 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
1960 {
1961 	u32 vfid;
1962 	u32 eventid;
1963 
1964 	xe_gt_assert(gt, len);
1965 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
1966 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
1967 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
1968 		     GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
1969 
1970 	if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
1971 		return -EPROTO;
1972 
1973 	if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
1974 		return -EPFNOSUPPORT;
1975 
1976 	if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
1977 		return -EPROTO;
1978 
1979 	vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
1980 	eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
1981 
1982 	return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
1983 }
1984 
1985 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
1986 {
1987 	if (pf_exit_vf_flr_send_start(gt, vfid))
1988 		return true;
1989 
1990 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) {
1991 		xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1992 					control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC));
1993 		return false;
1994 	}
1995 
1996 	if (pf_exit_vf_flr_guc_done(gt, vfid))
1997 		return true;
1998 
1999 	if (pf_exit_vf_flr_reset_config(gt, vfid))
2000 		return true;
2001 
2002 	if (pf_exit_vf_flr_reset_data(gt, vfid))
2003 		return true;
2004 
2005 	if (pf_exit_vf_flr_reset_mmio(gt, vfid))
2006 		return true;
2007 
2008 	if (pf_exit_vf_flr_send_finish(gt, vfid))
2009 		return true;
2010 
2011 	if (pf_exit_vf_stop_send_stop(gt, vfid))
2012 		return true;
2013 
2014 	if (pf_exit_vf_pause_send_pause(gt, vfid))
2015 		return true;
2016 
2017 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) {
2018 		xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
2019 					control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC));
2020 		return true;
2021 	}
2022 
2023 	if (pf_exit_vf_pause_guc_done(gt, vfid))
2024 		return true;
2025 
2026 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) {
2027 		xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
2028 					control_bit_to_string(XE_GT_SRIOV_STATE_SAVE_WAIT_DATA));
2029 		return false;
2030 	}
2031 
2032 	if (pf_handle_vf_save(gt, vfid))
2033 		return true;
2034 
2035 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) {
2036 		xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
2037 					control_bit_to_string(XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA));
2038 		return false;
2039 	}
2040 
2041 	if (pf_handle_vf_restore(gt, vfid))
2042 		return true;
2043 
2044 	if (pf_exit_vf_resume_send_resume(gt, vfid))
2045 		return true;
2046 
2047 	return false;
2048 }
2049 
2050 static unsigned int pf_control_state_index(struct xe_gt *gt,
2051 					   struct xe_gt_sriov_control_state *cs)
2052 {
2053 	return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs;
2054 }
2055 
2056 static void pf_worker_find_work(struct xe_gt *gt)
2057 {
2058 	struct xe_gt_sriov_pf_control *pfc = &gt->sriov.pf.control;
2059 	struct xe_gt_sriov_control_state *cs;
2060 	unsigned int vfid;
2061 	bool empty;
2062 	bool more;
2063 
2064 	spin_lock(&pfc->lock);
2065 	cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link);
2066 	if (cs)
2067 		list_del_init(&cs->link);
2068 	empty = list_empty(&pfc->list);
2069 	spin_unlock(&pfc->lock);
2070 
2071 	if (!cs)
2072 		return;
2073 
2074 	/* VF metadata structures are indexed by the VFID */
2075 	vfid = pf_control_state_index(gt, cs);
2076 	xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
2077 
2078 	more = pf_process_vf_state_machine(gt, vfid);
2079 	if (more)
2080 		pf_queue_vf(gt, vfid);
2081 	else if (!empty)
2082 		pf_queue_control_worker(gt);
2083 }
2084 
2085 static void control_worker_func(struct work_struct *w)
2086 {
2087 	struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker);
2088 
2089 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
2090 	pf_worker_find_work(gt);
2091 }
2092 
2093 static void pf_stop_worker(struct xe_gt *gt)
2094 {
2095 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
2096 	cancel_work_sync(&gt->sriov.pf.control.worker);
2097 }
2098 
2099 static void control_fini_action(struct drm_device *dev, void *data)
2100 {
2101 	struct xe_gt *gt = data;
2102 
2103 	pf_stop_worker(gt);
2104 }
2105 
2106 /**
2107  * xe_gt_sriov_pf_control_init() - Initialize PF's control data.
2108  * @gt: the &xe_gt
2109  *
2110  * This function is for PF only.
2111  *
2112  * Return: 0 on success or a negative error code on failure.
2113  */
2114 int xe_gt_sriov_pf_control_init(struct xe_gt *gt)
2115 {
2116 	struct xe_device *xe = gt_to_xe(gt);
2117 	unsigned int n, totalvfs;
2118 
2119 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
2120 
2121 	totalvfs = xe_sriov_pf_get_totalvfs(xe);
2122 	for (n = 0; n <= totalvfs; n++) {
2123 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n);
2124 
2125 		init_completion(&cs->done);
2126 		INIT_LIST_HEAD(&cs->link);
2127 	}
2128 
2129 	spin_lock_init(&gt->sriov.pf.control.lock);
2130 	INIT_LIST_HEAD(&gt->sriov.pf.control.list);
2131 	INIT_WORK(&gt->sriov.pf.control.worker, control_worker_func);
2132 
2133 	return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt);
2134 }
2135 
2136 /**
2137  * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset.
2138  * @gt: the &xe_gt
2139  *
2140  * Any per-VF status maintained by the PF or any ongoing VF control activity
2141  * performed by the PF must be reset or cancelled when the GT is reset.
2142  *
2143  * This function is for PF only.
2144  */
2145 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt)
2146 {
2147 	struct xe_device *xe = gt_to_xe(gt);
2148 	unsigned int n, totalvfs;
2149 
2150 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
2151 
2152 	pf_stop_worker(gt);
2153 
2154 	totalvfs = xe_sriov_pf_get_totalvfs(xe);
2155 	for (n = 1; n <= totalvfs; n++)
2156 		pf_enter_vf_ready(gt, n);
2157 }
2158