xref: /linux/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c (revision face6a3615a649456eb4549f6d474221d877d604)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023-2024 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "abi/guc_actions_sriov_abi.h"
9 
10 #include "xe_device.h"
11 #include "xe_gt.h"
12 #include "xe_gt_sriov_pf.h"
13 #include "xe_gt_sriov_pf_config.h"
14 #include "xe_gt_sriov_pf_control.h"
15 #include "xe_gt_sriov_pf_helpers.h"
16 #include "xe_gt_sriov_pf_migration.h"
17 #include "xe_gt_sriov_pf_monitor.h"
18 #include "xe_gt_sriov_printk.h"
19 #include "xe_guc_ct.h"
20 #include "xe_sriov.h"
21 #include "xe_sriov_pf_control.h"
22 #include "xe_sriov_pf_service.h"
23 #include "xe_tile.h"
24 
25 static const char *control_cmd_to_string(u32 cmd)
26 {
27 	switch (cmd) {
28 	case GUC_PF_TRIGGER_VF_PAUSE:
29 		return "PAUSE";
30 	case GUC_PF_TRIGGER_VF_RESUME:
31 		return "RESUME";
32 	case GUC_PF_TRIGGER_VF_STOP:
33 		return "STOP";
34 	case GUC_PF_TRIGGER_VF_FLR_START:
35 		return "FLR_START";
36 	case GUC_PF_TRIGGER_VF_FLR_FINISH:
37 		return "FLR_FINISH";
38 	default:
39 		return "<unknown>";
40 	}
41 }
42 
43 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
44 {
45 	u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
46 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
47 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
48 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
49 		FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
50 		FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
51 	};
52 	int ret;
53 
54 	ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
55 	return ret > 0 ? -EPROTO : ret;
56 }
57 
58 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
59 {
60 	int err;
61 
62 	xe_gt_assert(gt, vfid != PFID);
63 	xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n",
64 				vfid, control_cmd_to_string(cmd));
65 
66 	err = guc_action_vf_control_cmd(&gt->uc.guc, vfid, cmd);
67 	if (unlikely(err))
68 		xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
69 				vfid, control_cmd_to_string(cmd), ERR_PTR(err));
70 	return err;
71 }
72 
73 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
74 {
75 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
76 }
77 
78 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
79 {
80 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
81 }
82 
83 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
84 {
85 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
86 }
87 
88 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
89 {
90 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
91 }
92 
93 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
94 {
95 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
96 }
97 
98 /**
99  * DOC: The VF state machine
100  *
101  * The simplified VF state machine could be presented as::
102  *
103  *	               pause--------------------------o
104  *	              /                               |
105  *	             /                                v
106  *	      (READY)<------------------resume-----(PAUSED)
107  *	         ^   \                             /    /
108  *	         |    \                           /    /
109  *	         |     stop---->(STOPPED)<----stop    /
110  *	         |                  /                /
111  *	         |                 /                /
112  *	         o--------<-----flr                /
113  *	          \                               /
114  *	           o------<--------------------flr
115  *
116  * Where:
117  *
118  * * READY - represents a state in which VF is fully operable
119  * * PAUSED - represents a state in which VF activity is temporarily suspended
120  * * STOPPED - represents a state in which VF activity is definitely halted
121  * * pause - represents a request to temporarily suspend VF activity
122  * * resume - represents a request to resume VF activity
123  * * stop - represents a request to definitely halt VF activity
124  * * flr - represents a request to perform VF FLR to restore VF activity
125  *
126  * However, each state transition requires additional steps that involves
127  * communication with GuC that might fail or be interrupted by other requests::
128  *
129  *	                   .................................WIP....
130  *	                   :                                      :
131  *	          pause--------------------->PAUSE_WIP----------------------------o
132  *	         /         :                /         \           :               |
133  *	        /          :    o----<---stop          flr--o     :               |
134  *	       /           :    |           \         /     |     :               V
135  *	(READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED)
136  *	  ^ \  \           :    |                           |     :          /   /
137  *	  |  \  \          :    |                           |     :         /   /
138  *	  |   \  \         :    |                           |     :        /   /
139  *	  |    \  \        :    o----<----------------------+--<-------stop   /
140  *	  |     \  \       :    |                           |     :          /
141  *	  |      \  \      :    V                           |     :         /
142  *	  |       \  stop----->STOP_WIP---------flr--->-----o     :        /
143  *	  |        \       :    |                           |     :       /
144  *	  |         \      :    |                           V     :      /
145  *	  |          flr--------+----->----------------->FLR_WIP<-----flr
146  *	  |                :    |                        /  ^     :
147  *	  |                :    |                       /   |     :
148  *	  o--------<-------:----+-----<----------------o    |     :
149  *	                   :    |                           |     :
150  *	                   :....|...........................|.....:
151  *	                        |                           |
152  *	                        V                           |
153  *	                     (STOPPED)--------------------flr
154  *
155  * For details about each internal WIP state machine see:
156  *
157  * * `The VF PAUSE state machine`_
158  * * `The VF RESUME state machine`_
159  * * `The VF STOP state machine`_
160  * * `The VF FLR state machine`_
161  */
162 
163 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV
164 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
165 {
166 	switch (bit) {
167 #define CASE2STR(_X) \
168 	case XE_GT_SRIOV_STATE_##_X: return #_X
169 	CASE2STR(WIP);
170 	CASE2STR(FLR_WIP);
171 	CASE2STR(FLR_SEND_START);
172 	CASE2STR(FLR_WAIT_GUC);
173 	CASE2STR(FLR_GUC_DONE);
174 	CASE2STR(FLR_SYNC);
175 	CASE2STR(FLR_RESET_CONFIG);
176 	CASE2STR(FLR_RESET_DATA);
177 	CASE2STR(FLR_RESET_MMIO);
178 	CASE2STR(FLR_SEND_FINISH);
179 	CASE2STR(FLR_FAILED);
180 	CASE2STR(PAUSE_WIP);
181 	CASE2STR(PAUSE_SEND_PAUSE);
182 	CASE2STR(PAUSE_WAIT_GUC);
183 	CASE2STR(PAUSE_GUC_DONE);
184 	CASE2STR(PAUSE_SAVE_GUC);
185 	CASE2STR(PAUSE_FAILED);
186 	CASE2STR(PAUSED);
187 	CASE2STR(RESUME_WIP);
188 	CASE2STR(RESUME_SEND_RESUME);
189 	CASE2STR(RESUME_FAILED);
190 	CASE2STR(RESUMED);
191 	CASE2STR(STOP_WIP);
192 	CASE2STR(STOP_SEND_STOP);
193 	CASE2STR(STOP_FAILED);
194 	CASE2STR(STOPPED);
195 	CASE2STR(MISMATCH);
196 #undef  CASE2STR
197 	default: return "?";
198 	}
199 }
200 #endif
201 
202 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
203 {
204 	switch (bit) {
205 	case XE_GT_SRIOV_STATE_FLR_WAIT_GUC:
206 	case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC:
207 		return HZ / 2;
208 	case XE_GT_SRIOV_STATE_FLR_WIP:
209 	case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
210 		return 5 * HZ;
211 	default:
212 		return HZ;
213 	}
214 }
215 
216 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid)
217 {
218 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
219 	xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
220 
221 	return &gt->sriov.pf.vfs[vfid].control;
222 }
223 
224 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid)
225 {
226 	struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
227 
228 	return &cs->state;
229 }
230 
231 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid,
232 			      enum xe_gt_sriov_control_bits bit)
233 {
234 	return test_bit(bit, pf_peek_vf_state(gt, vfid));
235 }
236 
237 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid)
238 {
239 	unsigned long state = *pf_peek_vf_state(gt, vfid);
240 	enum xe_gt_sriov_control_bits bit;
241 
242 	if (state) {
243 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n",
244 					vfid, state, state ? " bits " : "",
245 					(int)BITS_PER_LONG, &state);
246 		for_each_set_bit(bit, &state, BITS_PER_LONG)
247 			xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n",
248 						vfid, control_bit_to_string(bit), bit);
249 	} else {
250 		xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid);
251 	}
252 }
253 
254 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid,
255 			       enum xe_gt_sriov_control_bits bit)
256 {
257 	bool result = pf_check_vf_state(gt, vfid, bit);
258 
259 	if (unlikely(!result))
260 		pf_dump_vf_state(gt, vfid);
261 
262 	return result;
263 }
264 
265 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid,
266 				   enum xe_gt_sriov_control_bits bit)
267 {
268 	bool result = !pf_check_vf_state(gt, vfid, bit);
269 
270 	if (unlikely(!result))
271 		pf_dump_vf_state(gt, vfid);
272 
273 	return result;
274 }
275 
276 static void pf_track_vf_state(struct xe_gt *gt, unsigned int vfid,
277 			      enum xe_gt_sriov_control_bits bit,
278 			      const char *what)
279 {
280 	xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) %s\n",
281 				vfid, control_bit_to_string(bit), bit, what);
282 }
283 
284 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid,
285 			      enum xe_gt_sriov_control_bits bit)
286 {
287 	if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) {
288 		pf_track_vf_state(gt, vfid, bit, "enter");
289 		return true;
290 	}
291 	return false;
292 }
293 
294 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid,
295 			     enum xe_gt_sriov_control_bits bit)
296 {
297 	if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) {
298 		pf_track_vf_state(gt, vfid, bit, "exit");
299 		return true;
300 	}
301 	return false;
302 }
303 
304 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid,
305 			       enum xe_gt_sriov_control_bits bit)
306 {
307 	if (pf_exit_vf_state(gt, vfid, bit))
308 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n",
309 					vfid, control_bit_to_string(bit), bit,
310 					__builtin_return_address(0));
311 }
312 
313 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
314 {
315 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) {
316 		xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n",
317 				vfid, __builtin_return_address(0));
318 		pf_dump_vf_state(gt, vfid);
319 	}
320 }
321 
322 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
323 {
324 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH))
325 		xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n",
326 				vfid, __builtin_return_address(0));
327 
328 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
329 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
330 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
331 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
332 }
333 
334 #define pf_enter_vf_state_machine_bug(gt, vfid) ({	\
335 	pf_enter_vf_mismatch((gt), (vfid));		\
336 })
337 
338 static void pf_queue_control_worker(struct xe_gt *gt)
339 {
340 	struct xe_device *xe = gt_to_xe(gt);
341 
342 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
343 
344 	queue_work(xe->sriov.wq, &gt->sriov.pf.control.worker);
345 }
346 
347 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
348 {
349 	struct xe_gt_sriov_pf_control *pfc = &gt->sriov.pf.control;
350 
351 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
352 
353 	spin_lock(&pfc->lock);
354 	list_move_tail(&gt->sriov.pf.vfs[vfid].control.link, &pfc->list);
355 	spin_unlock(&pfc->lock);
356 
357 	pf_queue_control_worker(gt);
358 }
359 
360 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
361 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
362 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
363 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
364 
365 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid)
366 {
367 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
368 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
369 
370 		reinit_completion(&cs->done);
371 		return true;
372 	}
373 	return false;
374 }
375 
376 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
377 {
378 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
379 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
380 
381 		pf_exit_vf_flr_wip(gt, vfid);
382 		pf_exit_vf_stop_wip(gt, vfid);
383 		pf_exit_vf_pause_wip(gt, vfid);
384 		pf_exit_vf_resume_wip(gt, vfid);
385 
386 		complete_all(&cs->done);
387 	}
388 }
389 
390 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout)
391 {
392 	struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
393 
394 	return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT;
395 }
396 
397 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
398 {
399 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
400 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
401 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
402 	pf_exit_vf_mismatch(gt, vfid);
403 	pf_exit_vf_wip(gt, vfid);
404 }
405 
406 /**
407  * DOC: The VF PAUSE state machine
408  *
409  * The VF PAUSE state machine looks like::
410  *
411  *	 (READY,RESUMED)<-------------<---------------------o---------o
412  *	    |                                                \         \
413  *	   pause                                              \         \
414  *	    |                                                  \         \
415  *	....V...........................PAUSE_WIP........       \         \
416  *	:    \                                          :        o         \
417  *	:     \   o------<-----busy                     :        |          \
418  *	:      \ /              /                       :        |           |
419  *	:       PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED)  |
420  *	:        |              \                       :        |           |
421  *	:      acked             rejected---->----------o--->(MISMATCH)     /
422  *	:        |                                      :                  /
423  *	:        v                                      :                 /
424  *	:       PAUSE_WAIT_GUC                          :                /
425  *	:        |                                      :               /
426  *	:       done                                    :              /
427  *	:        |                                      :             /
428  *	:        v                                      :            /
429  *	:       PAUSE_GUC_DONE                          o-----restart
430  *	:        |                                      :
431  *	:        |   o---<--busy                        :
432  *	:        v  /         /                         :
433  *	:       PAUSE_SAVE_GUC                          :
434  *	:      /                                        :
435  *	:     /                                         :
436  *	:....o..............o...............o...........:
437  *	     |              |               |
438  *	  completed        flr             stop
439  *	     |              |               |
440  *	     V         .....V.....    ......V.....
441  *	 (PAUSED)      : FLR_WIP :    : STOP_WIP :
442  *	               :.........:    :..........:
443  *
444  * For the full state machine view, see `The VF state machine`_.
445  */
446 
447 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
448 {
449 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
450 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
451 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
452 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
453 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC);
454 	}
455 }
456 
457 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid)
458 {
459 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED))
460 		pf_enter_vf_state_machine_bug(gt, vfid);
461 
462 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
463 	pf_exit_vf_mismatch(gt, vfid);
464 	pf_exit_vf_wip(gt, vfid);
465 }
466 
467 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid)
468 {
469 	pf_enter_vf_paused(gt, vfid);
470 }
471 
472 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid)
473 {
474 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
475 	pf_exit_vf_wip(gt, vfid);
476 }
477 
478 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
479 {
480 	pf_enter_vf_mismatch(gt, vfid);
481 	pf_enter_vf_pause_failed(gt, vfid);
482 }
483 
484 static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
485 {
486 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
487 		pf_enter_vf_state_machine_bug(gt, vfid);
488 }
489 
490 static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
491 {
492 	int err;
493 
494 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
495 		return false;
496 
497 	err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid);
498 	if (err) {
499 		/* retry if busy */
500 		if (err == -EBUSY) {
501 			pf_enter_vf_pause_save_guc(gt, vfid);
502 			return true;
503 		}
504 		/* give up on error */
505 		if (err == -EIO)
506 			pf_enter_vf_mismatch(gt, vfid);
507 	}
508 
509 	pf_enter_vf_pause_completed(gt, vfid);
510 	return true;
511 }
512 
513 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
514 {
515 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
516 		return false;
517 
518 	pf_enter_vf_pause_save_guc(gt, vfid);
519 	return true;
520 }
521 
522 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
523 {
524 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
525 		pf_queue_vf(gt, vfid);
526 }
527 
528 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
529 {
530 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC))
531 		pf_enter_vf_state_machine_bug(gt, vfid);
532 }
533 
534 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
535 {
536 	return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
537 }
538 
539 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
540 {
541 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
542 		pf_enter_vf_state_machine_bug(gt, vfid);
543 
544 	pf_queue_vf(gt, vfid);
545 }
546 
547 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
548 {
549 	int err;
550 
551 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
552 		return false;
553 
554 	/* GuC may actually send a PAUSE_DONE before we get a RESPONSE */
555 	pf_enter_pause_wait_guc(gt, vfid);
556 
557 	err = pf_send_vf_pause(gt, vfid);
558 	if (err) {
559 		/* send failed, so we shouldn't expect PAUSE_DONE from GuC */
560 		pf_exit_pause_wait_guc(gt, vfid);
561 
562 		if (err == -EBUSY)
563 			pf_enter_vf_pause_send_pause(gt, vfid);
564 		else if (err == -EIO)
565 			pf_enter_vf_pause_rejected(gt, vfid);
566 		else
567 			pf_enter_vf_pause_failed(gt, vfid);
568 	} else {
569 		/*
570 		 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
571 		 * but since GuC didn't complain, we may clear MISMATCH
572 		 */
573 		pf_exit_vf_mismatch(gt, vfid);
574 	}
575 
576 	return true;
577 }
578 
579 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
580 {
581 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
582 		pf_enter_vf_wip(gt, vfid);
583 		pf_enter_vf_pause_send_pause(gt, vfid);
584 		return true;
585 	}
586 
587 	return false;
588 }
589 
590 /**
591  * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
592  * @gt: the &xe_gt
593  * @vfid: the VF identifier
594  *
595  * This function is for PF only.
596  *
597  * Return: 0 on success or a negative error code on failure.
598  */
599 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
600 {
601 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP);
602 	int err;
603 
604 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
605 		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
606 		return -EPERM;
607 	}
608 
609 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
610 		xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid);
611 		return -ESTALE;
612 	}
613 
614 	if (!pf_enter_vf_pause_wip(gt, vfid)) {
615 		xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid);
616 		return -EALREADY;
617 	}
618 
619 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
620 	if (err) {
621 		xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n",
622 				vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
623 		return err;
624 	}
625 
626 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
627 		xe_gt_sriov_dbg(gt, "VF%u paused!\n", vfid);
628 		return 0;
629 	}
630 
631 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) {
632 		xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid);
633 		return -EIO;
634 	}
635 
636 	xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid);
637 	return -ECANCELED;
638 }
639 
640 /**
641  * DOC: The VF RESUME state machine
642  *
643  * The VF RESUME state machine looks like::
644  *
645  *	 (PAUSED)<-----------------<------------------------o
646  *	    |                                                \
647  *	   resume                                             \
648  *	    |                                                  \
649  *	....V............................RESUME_WIP......       \
650  *	:    \                                          :        o
651  *	:     \   o-------<-----busy                    :        |
652  *	:      \ /                /                     :        |
653  *	:       RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED)
654  *	:       /                \                      :        |
655  *	:    acked                rejected---->---------o--->(MISMATCH)
656  *	:     /                                         :
657  *	:....o..............o...............o.....o.....:
658  *	     |              |               |      \
659  *	  completed        flr            stop      restart-->(READY)
660  *	     |              |               |
661  *	     V         .....V.....    ......V.....
662  *	 (RESUMED)     : FLR_WIP :    : STOP_WIP :
663  *	               :.........:    :..........:
664  *
665  * For the full state machine view, see `The VF state machine`_.
666  */
667 
668 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
669 {
670 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP))
671 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME);
672 }
673 
674 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
675 {
676 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
677 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
678 	pf_exit_vf_mismatch(gt, vfid);
679 	pf_exit_vf_wip(gt, vfid);
680 }
681 
682 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid)
683 {
684 	pf_enter_vf_resumed(gt, vfid);
685 }
686 
687 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid)
688 {
689 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
690 	pf_exit_vf_wip(gt, vfid);
691 }
692 
693 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid)
694 {
695 	pf_enter_vf_mismatch(gt, vfid);
696 	pf_enter_vf_resume_failed(gt, vfid);
697 }
698 
699 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
700 {
701 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
702 		pf_enter_vf_state_machine_bug(gt, vfid);
703 
704 	pf_queue_vf(gt, vfid);
705 }
706 
707 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
708 {
709 	int err;
710 
711 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
712 		return false;
713 
714 	err = pf_send_vf_resume(gt, vfid);
715 	if (err == -EBUSY)
716 		pf_enter_vf_resume_send_resume(gt, vfid);
717 	else if (err == -EIO)
718 		pf_enter_vf_resume_rejected(gt, vfid);
719 	else if (err)
720 		pf_enter_vf_resume_failed(gt, vfid);
721 	else
722 		pf_enter_vf_resume_completed(gt, vfid);
723 	return true;
724 }
725 
726 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
727 {
728 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) {
729 		pf_enter_vf_wip(gt, vfid);
730 		pf_enter_vf_resume_send_resume(gt, vfid);
731 		return true;
732 	}
733 
734 	return false;
735 }
736 
737 /**
738  * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
739  * @gt: the &xe_gt
740  * @vfid: the VF identifier
741  *
742  * This function is for PF only.
743  *
744  * Return: 0 on success or a negative error code on failure.
745  */
746 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
747 {
748 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP);
749 	int err;
750 
751 	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
752 		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
753 		return -EPERM;
754 	}
755 
756 	if (!pf_enter_vf_resume_wip(gt, vfid)) {
757 		xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
758 		return -EALREADY;
759 	}
760 
761 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
762 	if (err)
763 		return err;
764 
765 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) {
766 		xe_gt_sriov_dbg(gt, "VF%u resumed!\n", vfid);
767 		return 0;
768 	}
769 
770 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) {
771 		xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid);
772 		return -EIO;
773 	}
774 
775 	xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid);
776 	return -ECANCELED;
777 }
778 
779 /**
780  * DOC: The VF STOP state machine
781  *
782  * The VF STOP state machine looks like::
783  *
784  *	 (READY,PAUSED,RESUMED)<-------<--------------------o
785  *	    |                                                \
786  *	   stop                                               \
787  *	    |                                                  \
788  *	....V..............................STOP_WIP......       \
789  *	:    \                                          :        o
790  *	:     \   o----<----busy                        :        |
791  *	:      \ /            /                         :        |
792  *	:       STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED)
793  *	:       /             \                         :        |
794  *	:    acked             rejected-------->--------o--->(MISMATCH)
795  *	:     /                                         :
796  *	:....o..............o...............o...........:
797  *	     |              |               |
798  *	  completed        flr            restart
799  *	     |              |               |
800  *	     V         .....V.....          V
801  *	 (STOPPED)     : FLR_WIP :       (READY)
802  *	               :.........:
803  *
804  * For the full state machine view, see `The VF state machine`_.
805  */
806 
807 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
808 {
809 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP))
810 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP);
811 }
812 
813 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
814 {
815 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED))
816 		pf_enter_vf_state_machine_bug(gt, vfid);
817 
818 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
819 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
820 	pf_exit_vf_mismatch(gt, vfid);
821 	pf_exit_vf_wip(gt, vfid);
822 }
823 
824 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid)
825 {
826 	pf_enter_vf_stopped(gt, vfid);
827 }
828 
829 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid)
830 {
831 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
832 	pf_exit_vf_wip(gt, vfid);
833 }
834 
835 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid)
836 {
837 	pf_enter_vf_mismatch(gt, vfid);
838 	pf_enter_vf_stop_failed(gt, vfid);
839 }
840 
841 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
842 {
843 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
844 		pf_enter_vf_state_machine_bug(gt, vfid);
845 
846 	pf_queue_vf(gt, vfid);
847 }
848 
849 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
850 {
851 	int err;
852 
853 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
854 		return false;
855 
856 	err = pf_send_vf_stop(gt, vfid);
857 	if (err == -EBUSY)
858 		pf_enter_vf_stop_send_stop(gt, vfid);
859 	else if (err == -EIO)
860 		pf_enter_vf_stop_rejected(gt, vfid);
861 	else if (err)
862 		pf_enter_vf_stop_failed(gt, vfid);
863 	else
864 		pf_enter_vf_stop_completed(gt, vfid);
865 	return true;
866 }
867 
868 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
869 {
870 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) {
871 		pf_enter_vf_wip(gt, vfid);
872 		pf_enter_vf_stop_send_stop(gt, vfid);
873 		return true;
874 	}
875 	return false;
876 }
877 
878 /**
879  * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
880  * @gt: the &xe_gt
881  * @vfid: the VF identifier
882  *
883  * This function is for PF only.
884  *
885  * Return: 0 on success or a negative error code on failure.
886  */
887 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
888 {
889 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP);
890 	int err;
891 
892 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
893 		xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid);
894 		return -ESTALE;
895 	}
896 
897 	if (!pf_enter_vf_stop_wip(gt, vfid)) {
898 		xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid);
899 		return -EALREADY;
900 	}
901 
902 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
903 	if (err)
904 		return err;
905 
906 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
907 		xe_gt_sriov_dbg(gt, "VF%u stopped!\n", vfid);
908 		return 0;
909 	}
910 
911 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) {
912 		xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid);
913 		return -EIO;
914 	}
915 
916 	xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid);
917 	return -ECANCELED;
918 }
919 
920 /**
921  * DOC: The VF FLR state machine
922  *
923  * The VF FLR state machine looks like::
924  *
925  *	 (READY,PAUSED,STOPPED)<------------<--------------o
926  *	    |                                               \
927  *	   flr                                               \
928  *	    |                                                 \
929  *	....V..........................FLR_WIP...........      \
930  *	:    \                                          :       \
931  *	:     \   o----<----busy                        :        |
932  *	:      \ /            /                         :        |
933  *	:       FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o
934  *	:        |            \                         :        |           |
935  *	:      acked           rejected----->-----------o--->(MISMATCH)      |
936  *	:        |                                      :        ^           |
937  *	:        v                                      :        |           |
938  *	:       FLR_WAIT_GUC                            :        |           |
939  *	:        |                                      :        |           |
940  *	:       done                                    :        |           |
941  *	:        |                                      :        |           |
942  *	:        v                                      :        |           |
943  *	:       FLR_GUC_DONE                            :        |           |
944  *	:        |                                      :        |           |
945  *	:        | o--<--sync                           :        |           |
946  *	:        |/        /                            :        |           |
947  *	:       FLR_SYNC--o                             :        |           |
948  *	:        |                                      :        |           |
949  *	:       FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
950  *	:        |                                      :        |           |
951  *	:       FLR_RESET_DATA                          :        |           |
952  *	:        |                                      :        |           |
953  *	:       FLR_RESET_MMIO                          :        |           |
954  *	:        |                                      :        |           |
955  *	:        | o----<----busy                       :        |           |
956  *	:        |/            /                        :        |           |
957  *	:       FLR_SEND_FINISH----failed--->-----------o--------+-----------o
958  *	:       /             \                         :        |
959  *	:     acked            rejected----->-----------o--------o
960  *	:     /                                         :
961  *	:....o..............................o...........:
962  *	     |                              |
963  *	  completed                       restart
964  *	     |                             /
965  *	     V                            /
966  *	  (READY)<----------<------------o
967  *
968  * For the full state machine view, see `The VF state machine`_.
969  */
970 
971 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
972 {
973 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
974 		pf_enter_vf_state_machine_bug(gt, vfid);
975 
976 	pf_queue_vf(gt, vfid);
977 }
978 
979 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
980 {
981 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
982 		xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
983 		return;
984 	}
985 
986 	pf_enter_vf_wip(gt, vfid);
987 	pf_enter_vf_flr_send_start(gt, vfid);
988 }
989 
990 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
991 {
992 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
993 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH);
994 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO);
995 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA);
996 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
997 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
998 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
999 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
1000 
1001 		xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid);
1002 	}
1003 }
1004 
1005 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid)
1006 {
1007 	pf_enter_vf_ready(gt, vfid);
1008 }
1009 
1010 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid)
1011 {
1012 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1013 		xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid);
1014 	pf_exit_vf_wip(gt, vfid);
1015 }
1016 
1017 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid)
1018 {
1019 	pf_enter_vf_mismatch(gt, vfid);
1020 	pf_enter_vf_flr_failed(gt, vfid);
1021 }
1022 
1023 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1024 {
1025 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1026 		pf_enter_vf_state_machine_bug(gt, vfid);
1027 
1028 	pf_queue_vf(gt, vfid);
1029 }
1030 
1031 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1032 {
1033 	int err;
1034 
1035 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1036 		return false;
1037 
1038 	err = pf_send_vf_flr_finish(gt, vfid);
1039 	if (err == -EBUSY)
1040 		pf_enter_vf_flr_send_finish(gt, vfid);
1041 	else if (err == -EIO)
1042 		pf_enter_vf_flr_rejected(gt, vfid);
1043 	else if (err)
1044 		pf_enter_vf_flr_failed(gt, vfid);
1045 	else
1046 		pf_enter_vf_flr_completed(gt, vfid);
1047 	return true;
1048 }
1049 
1050 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1051 {
1052 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1053 		pf_enter_vf_state_machine_bug(gt, vfid);
1054 
1055 	pf_queue_vf(gt, vfid);
1056 }
1057 
1058 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1059 {
1060 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1061 		return false;
1062 
1063 	xe_gt_sriov_pf_sanitize_hw(gt, vfid);
1064 
1065 	pf_enter_vf_flr_send_finish(gt, vfid);
1066 	return true;
1067 }
1068 
1069 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1070 {
1071 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1072 		pf_enter_vf_state_machine_bug(gt, vfid);
1073 
1074 	pf_queue_vf(gt, vfid);
1075 }
1076 
1077 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1078 {
1079 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1080 		return false;
1081 
1082 	if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt))
1083 		xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid);
1084 
1085 	xe_gt_sriov_pf_monitor_flr(gt, vfid);
1086 
1087 	pf_enter_vf_flr_reset_mmio(gt, vfid);
1088 	return true;
1089 }
1090 
1091 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1092 {
1093 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1094 		pf_enter_vf_state_machine_bug(gt, vfid);
1095 
1096 	pf_queue_vf(gt, vfid);
1097 }
1098 
1099 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1100 {
1101 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
1102 	int err;
1103 
1104 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1105 		return false;
1106 
1107 	err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout);
1108 	if (err)
1109 		pf_enter_vf_flr_failed(gt, vfid);
1110 	else
1111 		pf_enter_vf_flr_reset_data(gt, vfid);
1112 	return true;
1113 }
1114 
1115 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1116 {
1117 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC))
1118 		pf_enter_vf_state_machine_bug(gt, vfid);
1119 }
1120 
1121 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1122 {
1123 	return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
1124 }
1125 
1126 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
1127 {
1128 	int err;
1129 
1130 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
1131 		return false;
1132 
1133 	/* GuC may actually send a FLR_DONE before we get a RESPONSE */
1134 	pf_enter_vf_flr_wait_guc(gt, vfid);
1135 
1136 	err = pf_send_vf_flr_start(gt, vfid);
1137 	if (err) {
1138 		/* send failed, so we shouldn't expect FLR_DONE from GuC */
1139 		pf_exit_vf_flr_wait_guc(gt, vfid);
1140 
1141 		if (err == -EBUSY)
1142 			pf_enter_vf_flr_send_start(gt, vfid);
1143 		else if (err == -EIO)
1144 			pf_enter_vf_flr_rejected(gt, vfid);
1145 		else
1146 			pf_enter_vf_flr_failed(gt, vfid);
1147 	} else {
1148 		/*
1149 		 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
1150 		 * but since GuC didn't complain, we may clear MISMATCH
1151 		 */
1152 		pf_exit_vf_mismatch(gt, vfid);
1153 	}
1154 
1155 	return true;
1156 }
1157 
1158 static bool pf_exit_vf_flr_sync(struct xe_gt *gt, unsigned int vfid)
1159 {
1160 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
1161 		return false;
1162 
1163 	pf_enter_vf_flr_reset_config(gt, vfid);
1164 	return true;
1165 }
1166 
1167 static void pf_enter_vf_flr_sync(struct xe_gt *gt, unsigned int vfid)
1168 {
1169 	int ret;
1170 
1171 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
1172 		pf_enter_vf_state_machine_bug(gt, vfid);
1173 
1174 	ret = xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid);
1175 	if (ret < 0) {
1176 		xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint %pe\n", ERR_PTR(ret));
1177 		pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC);
1178 	} else {
1179 		xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint pass\n");
1180 		pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC);
1181 	}
1182 }
1183 
1184 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1185 {
1186 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1187 		return false;
1188 
1189 	pf_enter_vf_flr_sync(gt, vfid);
1190 	return true;
1191 }
1192 
1193 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1194 {
1195 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1196 		pf_queue_vf(gt, vfid);
1197 }
1198 
1199 /**
1200  * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence.
1201  * @gt: the &xe_gt
1202  * @vfid: the VF identifier
1203  *
1204  * This function is for PF only.
1205  *
1206  * Return: 0 on success or a negative error code on failure.
1207  */
1208 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
1209 {
1210 	pf_enter_vf_flr_wip(gt, vfid);
1211 
1212 	return 0;
1213 }
1214 
1215 /**
1216  * xe_gt_sriov_pf_control_sync_flr() - Synchronize on the VF FLR checkpoint.
1217  * @gt: the &xe_gt
1218  * @vfid: the VF identifier
1219  * @sync: if true it will allow to exit the checkpoint
1220  *
1221  * Return: non-zero if FLR checkpoint has been reached, zero if the is no FLR
1222  *         in progress, or a negative error code on the FLR busy or failed.
1223  */
1224 int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync)
1225 {
1226 	if (sync && pf_exit_vf_flr_sync(gt, vfid))
1227 		return 1;
1228 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
1229 		return 1;
1230 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP))
1231 		return -EBUSY;
1232 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1233 		return -EIO;
1234 	return 0;
1235 }
1236 
1237 /**
1238  * xe_gt_sriov_pf_control_wait_flr() - Wait for a VF FLR to complete.
1239  * @gt: the &xe_gt
1240  * @vfid: the VF identifier
1241  *
1242  * This function is for PF only.
1243  *
1244  * Return: 0 on success or a negative error code on failure.
1245  */
1246 int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid)
1247 {
1248 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP);
1249 	int err;
1250 
1251 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1252 		return -EIO;
1253 
1254 	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP))
1255 		return 0;
1256 
1257 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
1258 	if (err) {
1259 		xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n",
1260 				   vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
1261 		return err;
1262 	}
1263 
1264 	if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1265 		return -EIO;
1266 
1267 	return 0;
1268 }
1269 
1270 /**
1271  * DOC: The VF FLR Flow with GuC
1272  *
1273  * The VF FLR flow includes several steps::
1274  *
1275  *	         PF                        GUC             PCI
1276  *	========================================================
1277  *	         |                          |               |
1278  *	(1)      |                         [ ] <----- FLR --|
1279  *	         |                         [ ]              :
1280  *	(2)     [ ] <-------- NOTIFY FLR --[ ]
1281  *	        [ ]                         |
1282  *	(3)     [ ]                         |
1283  *	        [ ]                         |
1284  *	        [ ]-- START FLR ---------> [ ]
1285  *	         |                         [ ]
1286  *	(4)      |                         [ ]
1287  *	         |                         [ ]
1288  *	        [ ] <--------- FLR DONE -- [ ]
1289  *	        [ ]                         |
1290  *	(5)     [ ]                         |
1291  *	        [ ]                         |
1292  *	        [ ]-- FINISH FLR --------> [ ]
1293  *	         |                          |
1294  *
1295  * * Step 1: PCI HW generates interrupt to the GuC about VF FLR
1296  * * Step 2: GuC FW sends G2H notification to the PF about VF FLR
1297  * * Step 2a: on some platforms G2H is only received from root GuC
1298  * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
1299  * * Step 3a: on some platforms PF must send H2G to all other GuCs
1300  * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
1301  * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
1302  */
1303 
1304 static bool needs_dispatch_flr(struct xe_device *xe)
1305 {
1306 	return xe->info.platform == XE_PVC;
1307 }
1308 
1309 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
1310 {
1311 	struct xe_device *xe = gt_to_xe(gt);
1312 	struct xe_gt *gtit;
1313 	unsigned int gtid;
1314 
1315 	xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
1316 
1317 	if (needs_dispatch_flr(xe)) {
1318 		for_each_gt(gtit, xe, gtid)
1319 			pf_enter_vf_flr_wip(gtit, vfid);
1320 	} else {
1321 		pf_enter_vf_flr_wip(gt, vfid);
1322 	}
1323 }
1324 
1325 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
1326 {
1327 	if (!pf_exit_vf_flr_wait_guc(gt, vfid)) {
1328 		xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid);
1329 		pf_enter_vf_mismatch(gt, vfid);
1330 		return;
1331 	}
1332 
1333 	pf_enter_vf_flr_guc_done(gt, vfid);
1334 }
1335 
1336 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid)
1337 {
1338 	if (!pf_exit_pause_wait_guc(gt, vfid)) {
1339 		xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid);
1340 		pf_enter_vf_mismatch(gt, vfid);
1341 		return;
1342 	}
1343 
1344 	pf_enter_vf_pause_guc_done(gt, vfid);
1345 }
1346 
1347 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
1348 {
1349 	xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid);
1350 
1351 	if (vfid > xe_gt_sriov_pf_get_totalvfs(gt))
1352 		return -EPROTO;
1353 
1354 	switch (eventid) {
1355 	case GUC_PF_NOTIFY_VF_FLR:
1356 		pf_handle_vf_flr(gt, vfid);
1357 		break;
1358 	case GUC_PF_NOTIFY_VF_FLR_DONE:
1359 		pf_handle_vf_flr_done(gt, vfid);
1360 		break;
1361 	case GUC_PF_NOTIFY_VF_PAUSE_DONE:
1362 		pf_handle_vf_pause_done(gt, vfid);
1363 		break;
1364 	case GUC_PF_NOTIFY_VF_FIXUP_DONE:
1365 		break;
1366 	default:
1367 		return -ENOPKG;
1368 	}
1369 	return 0;
1370 }
1371 
1372 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
1373 {
1374 	switch (eventid) {
1375 	case GUC_PF_NOTIFY_VF_ENABLE:
1376 		xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
1377 					str_enabled_disabled(true),
1378 					str_enabled_disabled(false));
1379 		break;
1380 	default:
1381 		return -ENOPKG;
1382 	}
1383 	return 0;
1384 }
1385 
1386 /**
1387  * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
1388  * @gt: the &xe_gt
1389  * @msg: the G2H message
1390  * @len: the length of the G2H message
1391  *
1392  * This function is for PF only.
1393  *
1394  * Return: 0 on success or a negative error code on failure.
1395  */
1396 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
1397 {
1398 	u32 vfid;
1399 	u32 eventid;
1400 
1401 	xe_gt_assert(gt, len);
1402 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
1403 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
1404 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
1405 		     GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
1406 
1407 	if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
1408 		return -EPROTO;
1409 
1410 	if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
1411 		return -EPFNOSUPPORT;
1412 
1413 	if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
1414 		return -EPROTO;
1415 
1416 	vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
1417 	eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
1418 
1419 	return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
1420 }
1421 
1422 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
1423 {
1424 	if (pf_exit_vf_flr_send_start(gt, vfid))
1425 		return true;
1426 
1427 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) {
1428 		xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1429 					control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC));
1430 		return false;
1431 	}
1432 
1433 	if (pf_exit_vf_flr_guc_done(gt, vfid))
1434 		return true;
1435 
1436 	if (pf_exit_vf_flr_reset_config(gt, vfid))
1437 		return true;
1438 
1439 	if (pf_exit_vf_flr_reset_data(gt, vfid))
1440 		return true;
1441 
1442 	if (pf_exit_vf_flr_reset_mmio(gt, vfid))
1443 		return true;
1444 
1445 	if (pf_exit_vf_flr_send_finish(gt, vfid))
1446 		return true;
1447 
1448 	if (pf_exit_vf_stop_send_stop(gt, vfid))
1449 		return true;
1450 
1451 	if (pf_exit_vf_pause_send_pause(gt, vfid))
1452 		return true;
1453 
1454 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) {
1455 		xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1456 					control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC));
1457 		return true;
1458 	}
1459 
1460 	if (pf_exit_vf_pause_guc_done(gt, vfid))
1461 		return true;
1462 
1463 	if (pf_exit_vf_pause_save_guc(gt, vfid))
1464 		return true;
1465 
1466 	if (pf_exit_vf_resume_send_resume(gt, vfid))
1467 		return true;
1468 
1469 	return false;
1470 }
1471 
1472 static unsigned int pf_control_state_index(struct xe_gt *gt,
1473 					   struct xe_gt_sriov_control_state *cs)
1474 {
1475 	return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs;
1476 }
1477 
1478 static void pf_worker_find_work(struct xe_gt *gt)
1479 {
1480 	struct xe_gt_sriov_pf_control *pfc = &gt->sriov.pf.control;
1481 	struct xe_gt_sriov_control_state *cs;
1482 	unsigned int vfid;
1483 	bool empty;
1484 	bool more;
1485 
1486 	spin_lock(&pfc->lock);
1487 	cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link);
1488 	if (cs)
1489 		list_del_init(&cs->link);
1490 	empty = list_empty(&pfc->list);
1491 	spin_unlock(&pfc->lock);
1492 
1493 	if (!cs)
1494 		return;
1495 
1496 	/* VF metadata structures are indexed by the VFID */
1497 	vfid = pf_control_state_index(gt, cs);
1498 	xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
1499 
1500 	more = pf_process_vf_state_machine(gt, vfid);
1501 	if (more)
1502 		pf_queue_vf(gt, vfid);
1503 	else if (!empty)
1504 		pf_queue_control_worker(gt);
1505 }
1506 
1507 static void control_worker_func(struct work_struct *w)
1508 {
1509 	struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker);
1510 
1511 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1512 	pf_worker_find_work(gt);
1513 }
1514 
1515 static void pf_stop_worker(struct xe_gt *gt)
1516 {
1517 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1518 	cancel_work_sync(&gt->sriov.pf.control.worker);
1519 }
1520 
1521 static void control_fini_action(struct drm_device *dev, void *data)
1522 {
1523 	struct xe_gt *gt = data;
1524 
1525 	pf_stop_worker(gt);
1526 }
1527 
1528 /**
1529  * xe_gt_sriov_pf_control_init() - Initialize PF's control data.
1530  * @gt: the &xe_gt
1531  *
1532  * This function is for PF only.
1533  *
1534  * Return: 0 on success or a negative error code on failure.
1535  */
1536 int xe_gt_sriov_pf_control_init(struct xe_gt *gt)
1537 {
1538 	struct xe_device *xe = gt_to_xe(gt);
1539 	unsigned int n, totalvfs;
1540 
1541 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
1542 
1543 	totalvfs = xe_sriov_pf_get_totalvfs(xe);
1544 	for (n = 0; n <= totalvfs; n++) {
1545 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n);
1546 
1547 		init_completion(&cs->done);
1548 		INIT_LIST_HEAD(&cs->link);
1549 	}
1550 
1551 	spin_lock_init(&gt->sriov.pf.control.lock);
1552 	INIT_LIST_HEAD(&gt->sriov.pf.control.list);
1553 	INIT_WORK(&gt->sriov.pf.control.worker, control_worker_func);
1554 
1555 	return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt);
1556 }
1557 
1558 /**
1559  * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset.
1560  * @gt: the &xe_gt
1561  *
1562  * Any per-VF status maintained by the PF or any ongoing VF control activity
1563  * performed by the PF must be reset or cancelled when the GT is reset.
1564  *
1565  * This function is for PF only.
1566  */
1567 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt)
1568 {
1569 	struct xe_device *xe = gt_to_xe(gt);
1570 	unsigned int n, totalvfs;
1571 
1572 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
1573 
1574 	pf_stop_worker(gt);
1575 
1576 	totalvfs = xe_sriov_pf_get_totalvfs(xe);
1577 	for (n = 1; n <= totalvfs; n++)
1578 		pf_enter_vf_ready(gt, n);
1579 }
1580