xref: /linux/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c (revision 7f4f3b14e8079ecde096bd734af10e30d40c27b7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023-2024 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "abi/guc_actions_sriov_abi.h"
9 
10 #include "xe_device.h"
11 #include "xe_gt.h"
12 #include "xe_gt_sriov_pf.h"
13 #include "xe_gt_sriov_pf_config.h"
14 #include "xe_gt_sriov_pf_control.h"
15 #include "xe_gt_sriov_pf_helpers.h"
16 #include "xe_gt_sriov_pf_migration.h"
17 #include "xe_gt_sriov_pf_monitor.h"
18 #include "xe_gt_sriov_pf_service.h"
19 #include "xe_gt_sriov_printk.h"
20 #include "xe_guc_ct.h"
21 #include "xe_sriov.h"
22 
23 static const char *control_cmd_to_string(u32 cmd)
24 {
25 	switch (cmd) {
26 	case GUC_PF_TRIGGER_VF_PAUSE:
27 		return "PAUSE";
28 	case GUC_PF_TRIGGER_VF_RESUME:
29 		return "RESUME";
30 	case GUC_PF_TRIGGER_VF_STOP:
31 		return "STOP";
32 	case GUC_PF_TRIGGER_VF_FLR_START:
33 		return "FLR_START";
34 	case GUC_PF_TRIGGER_VF_FLR_FINISH:
35 		return "FLR_FINISH";
36 	default:
37 		return "<unknown>";
38 	}
39 }
40 
41 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
42 {
43 	u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
44 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
45 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
46 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
47 		FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
48 		FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
49 	};
50 	int ret;
51 
52 	ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
53 	return ret > 0 ? -EPROTO : ret;
54 }
55 
56 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
57 {
58 	int err;
59 
60 	xe_gt_assert(gt, vfid != PFID);
61 	xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n",
62 				vfid, control_cmd_to_string(cmd));
63 
64 	err = guc_action_vf_control_cmd(&gt->uc.guc, vfid, cmd);
65 	if (unlikely(err))
66 		xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
67 				vfid, control_cmd_to_string(cmd), ERR_PTR(err));
68 	return err;
69 }
70 
71 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
72 {
73 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
74 }
75 
76 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
77 {
78 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
79 }
80 
81 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
82 {
83 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
84 }
85 
86 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
87 {
88 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
89 }
90 
91 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
92 {
93 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
94 }
95 
96 /**
97  * DOC: The VF state machine
98  *
99  * The simplified VF state machine could be presented as::
100  *
101  *	               pause--------------------------o
102  *	              /                               |
103  *	             /                                v
104  *	      (READY)<------------------resume-----(PAUSED)
105  *	         ^   \                             /    /
106  *	         |    \                           /    /
107  *	         |     stop---->(STOPPED)<----stop    /
108  *	         |                  /                /
109  *	         |                 /                /
110  *	         o--------<-----flr                /
111  *	          \                               /
112  *	           o------<--------------------flr
113  *
114  * Where:
115  *
116  * * READY - represents a state in which VF is fully operable
117  * * PAUSED - represents a state in which VF activity is temporarily suspended
118  * * STOPPED - represents a state in which VF activity is definitely halted
119  * * pause - represents a request to temporarily suspend VF activity
120  * * resume - represents a request to resume VF activity
121  * * stop - represents a request to definitely halt VF activity
122  * * flr - represents a request to perform VF FLR to restore VF activity
123  *
124  * However, each state transition requires additional steps that involves
125  * communication with GuC that might fail or be interrupted by other requests::
126  *
127  *	                   .................................WIP....
128  *	                   :                                      :
129  *	          pause--------------------->PAUSE_WIP----------------------------o
130  *	         /         :                /         \           :               |
131  *	        /          :    o----<---stop          flr--o     :               |
132  *	       /           :    |           \         /     |     :               V
133  *	(READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED)
134  *	  ^ \  \           :    |                           |     :          /   /
135  *	  |  \  \          :    |                           |     :         /   /
136  *	  |   \  \         :    |                           |     :        /   /
137  *	  |    \  \        :    o----<----------------------+--<-------stop   /
138  *	  |     \  \       :    |                           |     :          /
139  *	  |      \  \      :    V                           |     :         /
140  *	  |       \  stop----->STOP_WIP---------flr--->-----o     :        /
141  *	  |        \       :    |                           |     :       /
142  *	  |         \      :    |                           V     :      /
143  *	  |          flr--------+----->----------------->FLR_WIP<-----flr
144  *	  |                :    |                        /  ^     :
145  *	  |                :    |                       /   |     :
146  *	  o--------<-------:----+-----<----------------o    |     :
147  *	                   :    |                           |     :
148  *	                   :....|...........................|.....:
149  *	                        |                           |
150  *	                        V                           |
151  *	                     (STOPPED)--------------------flr
152  *
153  * For details about each internal WIP state machine see:
154  *
155  * * `The VF PAUSE state machine`_
156  * * `The VF RESUME state machine`_
157  * * `The VF STOP state machine`_
158  * * `The VF FLR state machine`_
159  */
160 
161 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV
162 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
163 {
164 	switch (bit) {
165 #define CASE2STR(_X) \
166 	case XE_GT_SRIOV_STATE_##_X: return #_X
167 	CASE2STR(WIP);
168 	CASE2STR(FLR_WIP);
169 	CASE2STR(FLR_SEND_START);
170 	CASE2STR(FLR_WAIT_GUC);
171 	CASE2STR(FLR_GUC_DONE);
172 	CASE2STR(FLR_RESET_CONFIG);
173 	CASE2STR(FLR_RESET_DATA);
174 	CASE2STR(FLR_RESET_MMIO);
175 	CASE2STR(FLR_SEND_FINISH);
176 	CASE2STR(FLR_FAILED);
177 	CASE2STR(PAUSE_WIP);
178 	CASE2STR(PAUSE_SEND_PAUSE);
179 	CASE2STR(PAUSE_WAIT_GUC);
180 	CASE2STR(PAUSE_GUC_DONE);
181 	CASE2STR(PAUSE_SAVE_GUC);
182 	CASE2STR(PAUSE_FAILED);
183 	CASE2STR(PAUSED);
184 	CASE2STR(RESUME_WIP);
185 	CASE2STR(RESUME_SEND_RESUME);
186 	CASE2STR(RESUME_FAILED);
187 	CASE2STR(RESUMED);
188 	CASE2STR(STOP_WIP);
189 	CASE2STR(STOP_SEND_STOP);
190 	CASE2STR(STOP_FAILED);
191 	CASE2STR(STOPPED);
192 	CASE2STR(MISMATCH);
193 #undef  CASE2STR
194 	default: return "?";
195 	}
196 }
197 #endif
198 
199 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
200 {
201 	switch (bit) {
202 	case XE_GT_SRIOV_STATE_FLR_WAIT_GUC:
203 	case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC:
204 		return HZ / 2;
205 	case XE_GT_SRIOV_STATE_FLR_WIP:
206 	case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
207 		return 5 * HZ;
208 	default:
209 		return HZ;
210 	}
211 }
212 
213 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid)
214 {
215 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
216 	xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
217 
218 	return &gt->sriov.pf.vfs[vfid].control;
219 }
220 
221 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid)
222 {
223 	struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
224 
225 	return &cs->state;
226 }
227 
228 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid,
229 			      enum xe_gt_sriov_control_bits bit)
230 {
231 	return test_bit(bit, pf_peek_vf_state(gt, vfid));
232 }
233 
234 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid)
235 {
236 	unsigned long state = *pf_peek_vf_state(gt, vfid);
237 	enum xe_gt_sriov_control_bits bit;
238 
239 	if (state) {
240 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n",
241 					vfid, state, state ? " bits " : "",
242 					(int)BITS_PER_LONG, &state);
243 		for_each_set_bit(bit, &state, BITS_PER_LONG)
244 			xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n",
245 						vfid, control_bit_to_string(bit), bit);
246 	} else {
247 		xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid);
248 	}
249 }
250 
251 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid,
252 			       enum xe_gt_sriov_control_bits bit)
253 {
254 	bool result = pf_check_vf_state(gt, vfid, bit);
255 
256 	if (unlikely(!result))
257 		pf_dump_vf_state(gt, vfid);
258 
259 	return result;
260 }
261 
262 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid,
263 				   enum xe_gt_sriov_control_bits bit)
264 {
265 	bool result = !pf_check_vf_state(gt, vfid, bit);
266 
267 	if (unlikely(!result))
268 		pf_dump_vf_state(gt, vfid);
269 
270 	return result;
271 }
272 
273 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid,
274 			      enum xe_gt_sriov_control_bits bit)
275 {
276 	if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) {
277 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n",
278 					vfid, control_bit_to_string(bit), bit);
279 		return true;
280 	}
281 	return false;
282 }
283 
284 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid,
285 			     enum xe_gt_sriov_control_bits bit)
286 {
287 	if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) {
288 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n",
289 					vfid, control_bit_to_string(bit), bit);
290 		return true;
291 	}
292 	return false;
293 }
294 
295 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid,
296 			       enum xe_gt_sriov_control_bits bit)
297 {
298 	if (pf_exit_vf_state(gt, vfid, bit))
299 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n",
300 					vfid, control_bit_to_string(bit), bit,
301 					__builtin_return_address(0));
302 }
303 
304 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
305 {
306 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) {
307 		xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n",
308 				vfid, __builtin_return_address(0));
309 		pf_dump_vf_state(gt, vfid);
310 	}
311 }
312 
313 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
314 {
315 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH))
316 		xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n",
317 				vfid, __builtin_return_address(0));
318 
319 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
320 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
321 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
322 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
323 }
324 
325 #define pf_enter_vf_state_machine_bug(gt, vfid) ({	\
326 	pf_enter_vf_mismatch((gt), (vfid));		\
327 })
328 
329 static void pf_queue_control_worker(struct xe_gt *gt)
330 {
331 	struct xe_device *xe = gt_to_xe(gt);
332 
333 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
334 
335 	queue_work(xe->sriov.wq, &gt->sriov.pf.control.worker);
336 }
337 
338 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
339 {
340 	struct xe_gt_sriov_pf_control *pfc = &gt->sriov.pf.control;
341 
342 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
343 
344 	spin_lock(&pfc->lock);
345 	list_move_tail(&gt->sriov.pf.vfs[vfid].control.link, &pfc->list);
346 	spin_unlock(&pfc->lock);
347 
348 	pf_queue_control_worker(gt);
349 }
350 
351 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
352 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
353 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
354 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
355 
356 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid)
357 {
358 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
359 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
360 
361 		reinit_completion(&cs->done);
362 		return true;
363 	}
364 	return false;
365 }
366 
367 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
368 {
369 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
370 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
371 
372 		pf_exit_vf_flr_wip(gt, vfid);
373 		pf_exit_vf_stop_wip(gt, vfid);
374 		pf_exit_vf_pause_wip(gt, vfid);
375 		pf_exit_vf_resume_wip(gt, vfid);
376 
377 		complete_all(&cs->done);
378 	}
379 }
380 
381 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout)
382 {
383 	struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
384 
385 	return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT;
386 }
387 
388 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
389 {
390 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
391 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
392 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
393 	pf_exit_vf_mismatch(gt, vfid);
394 	pf_exit_vf_wip(gt, vfid);
395 }
396 
397 /**
398  * DOC: The VF PAUSE state machine
399  *
400  * The VF PAUSE state machine looks like::
401  *
402  *	 (READY,RESUMED)<-------------<---------------------o---------o
403  *	    |                                                \         \
404  *	   pause                                              \         \
405  *	    |                                                  \         \
406  *	....V...........................PAUSE_WIP........       \         \
407  *	:    \                                          :        o         \
408  *	:     \   o------<-----busy                     :        |          \
409  *	:      \ /              /                       :        |           |
410  *	:       PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED)  |
411  *	:        |              \                       :        |           |
412  *	:      acked             rejected---->----------o--->(MISMATCH)     /
413  *	:        |                                      :                  /
414  *	:        v                                      :                 /
415  *	:       PAUSE_WAIT_GUC                          :                /
416  *	:        |                                      :               /
417  *	:       done                                    :              /
418  *	:        |                                      :             /
419  *	:        v                                      :            /
420  *	:       PAUSE_GUC_DONE                          o-----restart
421  *	:        |                                      :
422  *	:        |   o---<--busy                        :
423  *	:        v  /         /                         :
424  *	:       PAUSE_SAVE_GUC                          :
425  *	:      /                                        :
426  *	:     /                                         :
427  *	:....o..............o...............o...........:
428  *	     |              |               |
429  *	  completed        flr             stop
430  *	     |              |               |
431  *	     V         .....V.....    ......V.....
432  *	 (PAUSED)      : FLR_WIP :    : STOP_WIP :
433  *	               :.........:    :..........:
434  *
435  * For the full state machine view, see `The VF state machine`_.
436  */
437 
438 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
439 {
440 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
441 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
442 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
443 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
444 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC);
445 	}
446 }
447 
448 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid)
449 {
450 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED))
451 		pf_enter_vf_state_machine_bug(gt, vfid);
452 
453 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
454 	pf_exit_vf_mismatch(gt, vfid);
455 	pf_exit_vf_wip(gt, vfid);
456 }
457 
458 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid)
459 {
460 	pf_enter_vf_paused(gt, vfid);
461 }
462 
463 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid)
464 {
465 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
466 	pf_exit_vf_wip(gt, vfid);
467 }
468 
469 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
470 {
471 	pf_enter_vf_mismatch(gt, vfid);
472 	pf_enter_vf_pause_failed(gt, vfid);
473 }
474 
475 static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
476 {
477 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
478 		pf_enter_vf_state_machine_bug(gt, vfid);
479 }
480 
481 static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
482 {
483 	int err;
484 
485 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
486 		return false;
487 
488 	err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid);
489 	if (err) {
490 		/* retry if busy */
491 		if (err == -EBUSY) {
492 			pf_enter_vf_pause_save_guc(gt, vfid);
493 			return true;
494 		}
495 		/* give up on error */
496 		if (err == -EIO)
497 			pf_enter_vf_mismatch(gt, vfid);
498 	}
499 
500 	pf_enter_vf_pause_completed(gt, vfid);
501 	return true;
502 }
503 
504 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
505 {
506 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
507 		return false;
508 
509 	pf_enter_vf_pause_save_guc(gt, vfid);
510 	return true;
511 }
512 
513 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
514 {
515 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
516 		pf_queue_vf(gt, vfid);
517 }
518 
519 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
520 {
521 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC))
522 		pf_enter_vf_state_machine_bug(gt, vfid);
523 }
524 
525 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
526 {
527 	return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
528 }
529 
530 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
531 {
532 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
533 		pf_enter_vf_state_machine_bug(gt, vfid);
534 
535 	pf_queue_vf(gt, vfid);
536 }
537 
538 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
539 {
540 	int err;
541 
542 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
543 		return false;
544 
545 	/* GuC may actually send a PAUSE_DONE before we get a RESPONSE */
546 	pf_enter_pause_wait_guc(gt, vfid);
547 
548 	err = pf_send_vf_pause(gt, vfid);
549 	if (err) {
550 		/* send failed, so we shouldn't expect PAUSE_DONE from GuC */
551 		pf_exit_pause_wait_guc(gt, vfid);
552 
553 		if (err == -EBUSY)
554 			pf_enter_vf_pause_send_pause(gt, vfid);
555 		else if (err == -EIO)
556 			pf_enter_vf_pause_rejected(gt, vfid);
557 		else
558 			pf_enter_vf_pause_failed(gt, vfid);
559 	} else {
560 		/*
561 		 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
562 		 * but since GuC didn't complain, we may clear MISMATCH
563 		 */
564 		pf_exit_vf_mismatch(gt, vfid);
565 	}
566 
567 	return true;
568 }
569 
570 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
571 {
572 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
573 		pf_enter_vf_wip(gt, vfid);
574 		pf_enter_vf_pause_send_pause(gt, vfid);
575 		return true;
576 	}
577 
578 	return false;
579 }
580 
581 /**
582  * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
583  * @gt: the &xe_gt
584  * @vfid: the VF identifier
585  *
586  * This function is for PF only.
587  *
588  * Return: 0 on success or a negative error code on failure.
589  */
590 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
591 {
592 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP);
593 	int err;
594 
595 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
596 		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
597 		return -EPERM;
598 	}
599 
600 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
601 		xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid);
602 		return -ESTALE;
603 	}
604 
605 	if (!pf_enter_vf_pause_wip(gt, vfid)) {
606 		xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid);
607 		return -EALREADY;
608 	}
609 
610 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
611 	if (err) {
612 		xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n",
613 				vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
614 		return err;
615 	}
616 
617 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
618 		xe_gt_sriov_info(gt, "VF%u paused!\n", vfid);
619 		return 0;
620 	}
621 
622 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) {
623 		xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid);
624 		return -EIO;
625 	}
626 
627 	xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid);
628 	return -ECANCELED;
629 }
630 
631 /**
632  * DOC: The VF RESUME state machine
633  *
634  * The VF RESUME state machine looks like::
635  *
636  *	 (PAUSED)<-----------------<------------------------o
637  *	    |                                                \
638  *	   resume                                             \
639  *	    |                                                  \
640  *	....V............................RESUME_WIP......       \
641  *	:    \                                          :        o
642  *	:     \   o-------<-----busy                    :        |
643  *	:      \ /                /                     :        |
644  *	:       RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED)
645  *	:       /                \                      :        |
646  *	:    acked                rejected---->---------o--->(MISMATCH)
647  *	:     /                                         :
648  *	:....o..............o...............o.....o.....:
649  *	     |              |               |      \
650  *	  completed        flr            stop      restart-->(READY)
651  *	     |              |               |
652  *	     V         .....V.....    ......V.....
653  *	 (RESUMED)     : FLR_WIP :    : STOP_WIP :
654  *	               :.........:    :..........:
655  *
656  * For the full state machine view, see `The VF state machine`_.
657  */
658 
659 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
660 {
661 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP))
662 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME);
663 }
664 
665 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
666 {
667 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
668 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
669 	pf_exit_vf_mismatch(gt, vfid);
670 	pf_exit_vf_wip(gt, vfid);
671 }
672 
673 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid)
674 {
675 	pf_enter_vf_resumed(gt, vfid);
676 }
677 
678 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid)
679 {
680 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
681 	pf_exit_vf_wip(gt, vfid);
682 }
683 
684 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid)
685 {
686 	pf_enter_vf_mismatch(gt, vfid);
687 	pf_enter_vf_resume_failed(gt, vfid);
688 }
689 
690 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
691 {
692 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
693 		pf_enter_vf_state_machine_bug(gt, vfid);
694 
695 	pf_queue_vf(gt, vfid);
696 }
697 
698 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
699 {
700 	int err;
701 
702 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
703 		return false;
704 
705 	err = pf_send_vf_resume(gt, vfid);
706 	if (err == -EBUSY)
707 		pf_enter_vf_resume_send_resume(gt, vfid);
708 	else if (err == -EIO)
709 		pf_enter_vf_resume_rejected(gt, vfid);
710 	else if (err)
711 		pf_enter_vf_resume_failed(gt, vfid);
712 	else
713 		pf_enter_vf_resume_completed(gt, vfid);
714 	return true;
715 }
716 
717 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
718 {
719 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) {
720 		pf_enter_vf_wip(gt, vfid);
721 		pf_enter_vf_resume_send_resume(gt, vfid);
722 		return true;
723 	}
724 
725 	return false;
726 }
727 
728 /**
729  * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
730  * @gt: the &xe_gt
731  * @vfid: the VF identifier
732  *
733  * This function is for PF only.
734  *
735  * Return: 0 on success or a negative error code on failure.
736  */
737 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
738 {
739 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP);
740 	int err;
741 
742 	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
743 		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
744 		return -EPERM;
745 	}
746 
747 	if (!pf_enter_vf_resume_wip(gt, vfid)) {
748 		xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
749 		return -EALREADY;
750 	}
751 
752 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
753 	if (err)
754 		return err;
755 
756 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) {
757 		xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid);
758 		return 0;
759 	}
760 
761 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) {
762 		xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid);
763 		return -EIO;
764 	}
765 
766 	xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid);
767 	return -ECANCELED;
768 }
769 
770 /**
771  * DOC: The VF STOP state machine
772  *
773  * The VF STOP state machine looks like::
774  *
775  *	 (READY,PAUSED,RESUMED)<-------<--------------------o
776  *	    |                                                \
777  *	   stop                                               \
778  *	    |                                                  \
779  *	....V..............................STOP_WIP......       \
780  *	:    \                                          :        o
781  *	:     \   o----<----busy                        :        |
782  *	:      \ /            /                         :        |
783  *	:       STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED)
784  *	:       /             \                         :        |
785  *	:    acked             rejected-------->--------o--->(MISMATCH)
786  *	:     /                                         :
787  *	:....o..............o...............o...........:
788  *	     |              |               |
789  *	  completed        flr            restart
790  *	     |              |               |
791  *	     V         .....V.....          V
792  *	 (STOPPED)     : FLR_WIP :       (READY)
793  *	               :.........:
794  *
795  * For the full state machine view, see `The VF state machine`_.
796  */
797 
798 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
799 {
800 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP))
801 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP);
802 }
803 
804 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
805 {
806 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED))
807 		pf_enter_vf_state_machine_bug(gt, vfid);
808 
809 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
810 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
811 	pf_exit_vf_mismatch(gt, vfid);
812 	pf_exit_vf_wip(gt, vfid);
813 }
814 
815 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid)
816 {
817 	pf_enter_vf_stopped(gt, vfid);
818 }
819 
820 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid)
821 {
822 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
823 	pf_exit_vf_wip(gt, vfid);
824 }
825 
826 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid)
827 {
828 	pf_enter_vf_mismatch(gt, vfid);
829 	pf_enter_vf_stop_failed(gt, vfid);
830 }
831 
832 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
833 {
834 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
835 		pf_enter_vf_state_machine_bug(gt, vfid);
836 
837 	pf_queue_vf(gt, vfid);
838 }
839 
840 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
841 {
842 	int err;
843 
844 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
845 		return false;
846 
847 	err = pf_send_vf_stop(gt, vfid);
848 	if (err == -EBUSY)
849 		pf_enter_vf_stop_send_stop(gt, vfid);
850 	else if (err == -EIO)
851 		pf_enter_vf_stop_rejected(gt, vfid);
852 	else if (err)
853 		pf_enter_vf_stop_failed(gt, vfid);
854 	else
855 		pf_enter_vf_stop_completed(gt, vfid);
856 	return true;
857 }
858 
859 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
860 {
861 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) {
862 		pf_enter_vf_wip(gt, vfid);
863 		pf_enter_vf_stop_send_stop(gt, vfid);
864 		return true;
865 	}
866 	return false;
867 }
868 
869 /**
870  * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
871  * @gt: the &xe_gt
872  * @vfid: the VF identifier
873  *
874  * This function is for PF only.
875  *
876  * Return: 0 on success or a negative error code on failure.
877  */
878 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
879 {
880 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP);
881 	int err;
882 
883 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
884 		xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid);
885 		return -ESTALE;
886 	}
887 
888 	if (!pf_enter_vf_stop_wip(gt, vfid)) {
889 		xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid);
890 		return -EALREADY;
891 	}
892 
893 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
894 	if (err)
895 		return err;
896 
897 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
898 		xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid);
899 		return 0;
900 	}
901 
902 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) {
903 		xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid);
904 		return -EIO;
905 	}
906 
907 	xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid);
908 	return -ECANCELED;
909 }
910 
911 /**
912  * DOC: The VF FLR state machine
913  *
914  * The VF FLR state machine looks like::
915  *
916  *	 (READY,PAUSED,STOPPED)<------------<--------------o
917  *	    |                                               \
918  *	   flr                                               \
919  *	    |                                                 \
920  *	....V..........................FLR_WIP...........      \
921  *	:    \                                          :       \
922  *	:     \   o----<----busy                        :        |
923  *	:      \ /            /                         :        |
924  *	:       FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o
925  *	:        |            \                         :        |           |
926  *	:      acked           rejected----->-----------o--->(MISMATCH)      |
927  *	:        |                                      :        ^           |
928  *	:        v                                      :        |           |
929  *	:       FLR_WAIT_GUC                            :        |           |
930  *	:        |                                      :        |           |
931  *	:       done                                    :        |           |
932  *	:        |                                      :        |           |
933  *	:        v                                      :        |           |
934  *	:       FLR_GUC_DONE                            :        |           |
935  *	:        |                                      :        |           |
936  *	:       FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
937  *	:        |                                      :        |           |
938  *	:       FLR_RESET_DATA                          :        |           |
939  *	:        |                                      :        |           |
940  *	:       FLR_RESET_MMIO                          :        |           |
941  *	:        |                                      :        |           |
942  *	:        | o----<----busy                       :        |           |
943  *	:        |/            /                        :        |           |
944  *	:       FLR_SEND_FINISH----failed--->-----------o--------+-----------o
945  *	:       /             \                         :        |
946  *	:     acked            rejected----->-----------o--------o
947  *	:     /                                         :
948  *	:....o..............................o...........:
949  *	     |                              |
950  *	  completed                       restart
951  *	     |                             /
952  *	     V                            /
953  *	  (READY)<----------<------------o
954  *
955  * For the full state machine view, see `The VF state machine`_.
956  */
957 
958 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
959 {
960 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
961 		pf_enter_vf_state_machine_bug(gt, vfid);
962 
963 	pf_queue_vf(gt, vfid);
964 }
965 
966 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
967 {
968 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
969 		xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
970 		return;
971 	}
972 
973 	pf_enter_vf_wip(gt, vfid);
974 	pf_enter_vf_flr_send_start(gt, vfid);
975 }
976 
977 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
978 {
979 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
980 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH);
981 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO);
982 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA);
983 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
984 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
985 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
986 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
987 	}
988 }
989 
990 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid)
991 {
992 	pf_enter_vf_ready(gt, vfid);
993 }
994 
995 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid)
996 {
997 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
998 		xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid);
999 	pf_exit_vf_wip(gt, vfid);
1000 }
1001 
1002 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid)
1003 {
1004 	pf_enter_vf_mismatch(gt, vfid);
1005 	pf_enter_vf_flr_failed(gt, vfid);
1006 }
1007 
1008 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1009 {
1010 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1011 		pf_enter_vf_state_machine_bug(gt, vfid);
1012 
1013 	pf_queue_vf(gt, vfid);
1014 }
1015 
1016 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1017 {
1018 	int err;
1019 
1020 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1021 		return false;
1022 
1023 	err = pf_send_vf_flr_finish(gt, vfid);
1024 	if (err == -EBUSY)
1025 		pf_enter_vf_flr_send_finish(gt, vfid);
1026 	else if (err == -EIO)
1027 		pf_enter_vf_flr_rejected(gt, vfid);
1028 	else if (err)
1029 		pf_enter_vf_flr_failed(gt, vfid);
1030 	else
1031 		pf_enter_vf_flr_completed(gt, vfid);
1032 	return true;
1033 }
1034 
1035 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1036 {
1037 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1038 		pf_enter_vf_state_machine_bug(gt, vfid);
1039 
1040 	pf_queue_vf(gt, vfid);
1041 }
1042 
1043 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1044 {
1045 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1046 		return false;
1047 
1048 	xe_gt_sriov_pf_sanitize_hw(gt, vfid);
1049 
1050 	pf_enter_vf_flr_send_finish(gt, vfid);
1051 	return true;
1052 }
1053 
1054 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1055 {
1056 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1057 		pf_enter_vf_state_machine_bug(gt, vfid);
1058 
1059 	pf_queue_vf(gt, vfid);
1060 }
1061 
1062 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1063 {
1064 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1065 		return false;
1066 
1067 	xe_gt_sriov_pf_service_reset(gt, vfid);
1068 	xe_gt_sriov_pf_monitor_flr(gt, vfid);
1069 
1070 	pf_enter_vf_flr_reset_mmio(gt, vfid);
1071 	return true;
1072 }
1073 
1074 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1075 {
1076 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1077 		pf_enter_vf_state_machine_bug(gt, vfid);
1078 
1079 	pf_queue_vf(gt, vfid);
1080 }
1081 
1082 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1083 {
1084 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
1085 	int err;
1086 
1087 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1088 		return false;
1089 
1090 	err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout);
1091 	if (err)
1092 		pf_enter_vf_flr_failed(gt, vfid);
1093 	else
1094 		pf_enter_vf_flr_reset_data(gt, vfid);
1095 	return true;
1096 }
1097 
1098 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1099 {
1100 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC))
1101 		pf_enter_vf_state_machine_bug(gt, vfid);
1102 }
1103 
1104 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1105 {
1106 	return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
1107 }
1108 
1109 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
1110 {
1111 	int err;
1112 
1113 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
1114 		return false;
1115 
1116 	/* GuC may actually send a FLR_DONE before we get a RESPONSE */
1117 	pf_enter_vf_flr_wait_guc(gt, vfid);
1118 
1119 	err = pf_send_vf_flr_start(gt, vfid);
1120 	if (err) {
1121 		/* send failed, so we shouldn't expect FLR_DONE from GuC */
1122 		pf_exit_vf_flr_wait_guc(gt, vfid);
1123 
1124 		if (err == -EBUSY)
1125 			pf_enter_vf_flr_send_start(gt, vfid);
1126 		else if (err == -EIO)
1127 			pf_enter_vf_flr_rejected(gt, vfid);
1128 		else
1129 			pf_enter_vf_flr_failed(gt, vfid);
1130 	} else {
1131 		/*
1132 		 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
1133 		 * but since GuC didn't complain, we may clear MISMATCH
1134 		 */
1135 		pf_exit_vf_mismatch(gt, vfid);
1136 	}
1137 
1138 	return true;
1139 }
1140 
1141 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1142 {
1143 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1144 		return false;
1145 
1146 	pf_enter_vf_flr_reset_config(gt, vfid);
1147 	return true;
1148 }
1149 
1150 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1151 {
1152 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1153 		pf_queue_vf(gt, vfid);
1154 }
1155 
1156 /**
1157  * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence.
1158  * @gt: the &xe_gt
1159  * @vfid: the VF identifier
1160  *
1161  * This function is for PF only.
1162  *
1163  * Return: 0 on success or a negative error code on failure.
1164  */
1165 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
1166 {
1167 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP);
1168 	int err;
1169 
1170 	pf_enter_vf_flr_wip(gt, vfid);
1171 
1172 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
1173 	if (err) {
1174 		xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n",
1175 				   vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
1176 		return err;
1177 	}
1178 
1179 	if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1180 		return -EIO;
1181 
1182 	return 0;
1183 }
1184 
1185 /**
1186  * DOC: The VF FLR Flow with GuC
1187  *
1188  * The VF FLR flow includes several steps::
1189  *
1190  *	         PF                        GUC             PCI
1191  *	========================================================
1192  *	         |                          |               |
1193  *	(1)      |                         [ ] <----- FLR --|
1194  *	         |                         [ ]              :
1195  *	(2)     [ ] <-------- NOTIFY FLR --[ ]
1196  *	        [ ]                         |
1197  *	(3)     [ ]                         |
1198  *	        [ ]                         |
1199  *	        [ ]-- START FLR ---------> [ ]
1200  *	         |                         [ ]
1201  *	(4)      |                         [ ]
1202  *	         |                         [ ]
1203  *	        [ ] <--------- FLR DONE -- [ ]
1204  *	        [ ]                         |
1205  *	(5)     [ ]                         |
1206  *	        [ ]                         |
1207  *	        [ ]-- FINISH FLR --------> [ ]
1208  *	         |                          |
1209  *
1210  * * Step 1: PCI HW generates interrupt to the GuC about VF FLR
1211  * * Step 2: GuC FW sends G2H notification to the PF about VF FLR
1212  * * Step 2a: on some platforms G2H is only received from root GuC
1213  * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
1214  * * Step 3a: on some platforms PF must send H2G to all other GuCs
1215  * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
1216  * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
1217  */
1218 
1219 static bool needs_dispatch_flr(struct xe_device *xe)
1220 {
1221 	return xe->info.platform == XE_PVC;
1222 }
1223 
1224 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
1225 {
1226 	struct xe_device *xe = gt_to_xe(gt);
1227 	struct xe_gt *gtit;
1228 	unsigned int gtid;
1229 
1230 	xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
1231 
1232 	if (needs_dispatch_flr(xe)) {
1233 		for_each_gt(gtit, xe, gtid)
1234 			pf_enter_vf_flr_wip(gtit, vfid);
1235 	} else {
1236 		pf_enter_vf_flr_wip(gt, vfid);
1237 	}
1238 }
1239 
1240 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
1241 {
1242 	if (!pf_exit_vf_flr_wait_guc(gt, vfid)) {
1243 		xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid);
1244 		pf_enter_vf_mismatch(gt, vfid);
1245 		return;
1246 	}
1247 
1248 	pf_enter_vf_flr_guc_done(gt, vfid);
1249 }
1250 
1251 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid)
1252 {
1253 	if (!pf_exit_pause_wait_guc(gt, vfid)) {
1254 		xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid);
1255 		pf_enter_vf_mismatch(gt, vfid);
1256 		return;
1257 	}
1258 
1259 	pf_enter_vf_pause_guc_done(gt, vfid);
1260 }
1261 
1262 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
1263 {
1264 	xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid);
1265 
1266 	if (vfid > xe_gt_sriov_pf_get_totalvfs(gt))
1267 		return -EPROTO;
1268 
1269 	switch (eventid) {
1270 	case GUC_PF_NOTIFY_VF_FLR:
1271 		pf_handle_vf_flr(gt, vfid);
1272 		break;
1273 	case GUC_PF_NOTIFY_VF_FLR_DONE:
1274 		pf_handle_vf_flr_done(gt, vfid);
1275 		break;
1276 	case GUC_PF_NOTIFY_VF_PAUSE_DONE:
1277 		pf_handle_vf_pause_done(gt, vfid);
1278 		break;
1279 	case GUC_PF_NOTIFY_VF_FIXUP_DONE:
1280 		break;
1281 	default:
1282 		return -ENOPKG;
1283 	}
1284 	return 0;
1285 }
1286 
1287 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
1288 {
1289 	switch (eventid) {
1290 	case GUC_PF_NOTIFY_VF_ENABLE:
1291 		xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
1292 					str_enabled_disabled(true),
1293 					str_enabled_disabled(false));
1294 		break;
1295 	default:
1296 		return -ENOPKG;
1297 	}
1298 	return 0;
1299 }
1300 
1301 /**
1302  * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
1303  * @gt: the &xe_gt
1304  * @msg: the G2H message
1305  * @len: the length of the G2H message
1306  *
1307  * This function is for PF only.
1308  *
1309  * Return: 0 on success or a negative error code on failure.
1310  */
1311 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
1312 {
1313 	u32 vfid;
1314 	u32 eventid;
1315 
1316 	xe_gt_assert(gt, len);
1317 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
1318 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
1319 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
1320 		     GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
1321 
1322 	if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
1323 		return -EPROTO;
1324 
1325 	if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
1326 		return -EPFNOSUPPORT;
1327 
1328 	if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
1329 		return -EPROTO;
1330 
1331 	vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
1332 	eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
1333 
1334 	return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
1335 }
1336 
1337 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
1338 {
1339 	if (pf_exit_vf_flr_send_start(gt, vfid))
1340 		return true;
1341 
1342 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) {
1343 		xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1344 					control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC));
1345 		return false;
1346 	}
1347 
1348 	if (pf_exit_vf_flr_guc_done(gt, vfid))
1349 		return true;
1350 
1351 	if (pf_exit_vf_flr_reset_config(gt, vfid))
1352 		return true;
1353 
1354 	if (pf_exit_vf_flr_reset_data(gt, vfid))
1355 		return true;
1356 
1357 	if (pf_exit_vf_flr_reset_mmio(gt, vfid))
1358 		return true;
1359 
1360 	if (pf_exit_vf_flr_send_finish(gt, vfid))
1361 		return true;
1362 
1363 	if (pf_exit_vf_stop_send_stop(gt, vfid))
1364 		return true;
1365 
1366 	if (pf_exit_vf_pause_send_pause(gt, vfid))
1367 		return true;
1368 
1369 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) {
1370 		xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1371 					control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC));
1372 		return true;
1373 	}
1374 
1375 	if (pf_exit_vf_pause_guc_done(gt, vfid))
1376 		return true;
1377 
1378 	if (pf_exit_vf_pause_save_guc(gt, vfid))
1379 		return true;
1380 
1381 	if (pf_exit_vf_resume_send_resume(gt, vfid))
1382 		return true;
1383 
1384 	return false;
1385 }
1386 
1387 static unsigned int pf_control_state_index(struct xe_gt *gt,
1388 					   struct xe_gt_sriov_control_state *cs)
1389 {
1390 	return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs;
1391 }
1392 
1393 static void pf_worker_find_work(struct xe_gt *gt)
1394 {
1395 	struct xe_gt_sriov_pf_control *pfc = &gt->sriov.pf.control;
1396 	struct xe_gt_sriov_control_state *cs;
1397 	unsigned int vfid;
1398 	bool empty;
1399 	bool more;
1400 
1401 	spin_lock(&pfc->lock);
1402 	cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link);
1403 	if (cs)
1404 		list_del_init(&cs->link);
1405 	empty = list_empty(&pfc->list);
1406 	spin_unlock(&pfc->lock);
1407 
1408 	if (!cs)
1409 		return;
1410 
1411 	/* VF metadata structures are indexed by the VFID */
1412 	vfid = pf_control_state_index(gt, cs);
1413 	xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
1414 
1415 	more = pf_process_vf_state_machine(gt, vfid);
1416 	if (more)
1417 		pf_queue_vf(gt, vfid);
1418 	else if (!empty)
1419 		pf_queue_control_worker(gt);
1420 }
1421 
1422 static void control_worker_func(struct work_struct *w)
1423 {
1424 	struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker);
1425 
1426 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1427 	pf_worker_find_work(gt);
1428 }
1429 
1430 static void pf_stop_worker(struct xe_gt *gt)
1431 {
1432 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1433 	cancel_work_sync(&gt->sriov.pf.control.worker);
1434 }
1435 
1436 static void control_fini_action(struct drm_device *dev, void *data)
1437 {
1438 	struct xe_gt *gt = data;
1439 
1440 	pf_stop_worker(gt);
1441 }
1442 
1443 /**
1444  * xe_gt_sriov_pf_control_init() - Initialize PF's control data.
1445  * @gt: the &xe_gt
1446  *
1447  * This function is for PF only.
1448  *
1449  * Return: 0 on success or a negative error code on failure.
1450  */
1451 int xe_gt_sriov_pf_control_init(struct xe_gt *gt)
1452 {
1453 	struct xe_device *xe = gt_to_xe(gt);
1454 	unsigned int n, totalvfs;
1455 
1456 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
1457 
1458 	totalvfs = xe_sriov_pf_get_totalvfs(xe);
1459 	for (n = 0; n <= totalvfs; n++) {
1460 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n);
1461 
1462 		init_completion(&cs->done);
1463 		INIT_LIST_HEAD(&cs->link);
1464 	}
1465 
1466 	spin_lock_init(&gt->sriov.pf.control.lock);
1467 	INIT_LIST_HEAD(&gt->sriov.pf.control.list);
1468 	INIT_WORK(&gt->sriov.pf.control.worker, control_worker_func);
1469 
1470 	return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt);
1471 }
1472 
1473 /**
1474  * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset.
1475  * @gt: the &xe_gt
1476  *
1477  * Any per-VF status maintained by the PF or any ongoing VF control activity
1478  * performed by the PF must be reset or cancelled when the GT is reset.
1479  *
1480  * This function is for PF only.
1481  */
1482 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt)
1483 {
1484 	struct xe_device *xe = gt_to_xe(gt);
1485 	unsigned int n, totalvfs;
1486 
1487 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
1488 
1489 	pf_stop_worker(gt);
1490 
1491 	totalvfs = xe_sriov_pf_get_totalvfs(xe);
1492 	for (n = 1; n <= totalvfs; n++)
1493 		pf_enter_vf_ready(gt, n);
1494 }
1495