xref: /linux/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c (revision c297aa7d3fb6755890b78b483e82c9cf07370d50)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023-2024 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "abi/guc_actions_sriov_abi.h"
9 
10 #include "xe_device.h"
11 #include "xe_gt.h"
12 #include "xe_gt_sriov_pf_config.h"
13 #include "xe_gt_sriov_pf_control.h"
14 #include "xe_gt_sriov_pf_helpers.h"
15 #include "xe_gt_sriov_pf_monitor.h"
16 #include "xe_gt_sriov_pf_service.h"
17 #include "xe_gt_sriov_printk.h"
18 #include "xe_guc_ct.h"
19 #include "xe_sriov.h"
20 
21 static const char *control_cmd_to_string(u32 cmd)
22 {
23 	switch (cmd) {
24 	case GUC_PF_TRIGGER_VF_PAUSE:
25 		return "PAUSE";
26 	case GUC_PF_TRIGGER_VF_RESUME:
27 		return "RESUME";
28 	case GUC_PF_TRIGGER_VF_STOP:
29 		return "STOP";
30 	case GUC_PF_TRIGGER_VF_FLR_START:
31 		return "FLR_START";
32 	case GUC_PF_TRIGGER_VF_FLR_FINISH:
33 		return "FLR_FINISH";
34 	default:
35 		return "<unknown>";
36 	}
37 }
38 
39 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
40 {
41 	u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
42 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
43 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
44 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
45 		FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
46 		FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
47 	};
48 	int ret;
49 
50 	ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
51 	return ret > 0 ? -EPROTO : ret;
52 }
53 
54 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
55 {
56 	int err;
57 
58 	xe_gt_assert(gt, vfid != PFID);
59 	xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n",
60 				vfid, control_cmd_to_string(cmd));
61 
62 	err = guc_action_vf_control_cmd(&gt->uc.guc, vfid, cmd);
63 	if (unlikely(err))
64 		xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
65 				vfid, control_cmd_to_string(cmd), ERR_PTR(err));
66 	return err;
67 }
68 
69 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
70 {
71 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
72 }
73 
74 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
75 {
76 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
77 }
78 
79 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
80 {
81 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
82 }
83 
84 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
85 {
86 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
87 }
88 
89 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
90 {
91 	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
92 }
93 
94 /**
95  * DOC: The VF state machine
96  *
97  * The simplified VF state machine could be presented as::
98  *
99  *	               pause--------------------------o
100  *	              /                               |
101  *	             /                                v
102  *	      (READY)<------------------resume-----(PAUSED)
103  *	         ^   \                             /    /
104  *	         |    \                           /    /
105  *	         |     stop---->(STOPPED)<----stop    /
106  *	         |                  /                /
107  *	         |                 /                /
108  *	         o--------<-----flr                /
109  *	          \                               /
110  *	           o------<--------------------flr
111  *
112  * Where:
113  *
114  * * READY - represents a state in which VF is fully operable
115  * * PAUSED - represents a state in which VF activity is temporarily suspended
116  * * STOPPED - represents a state in which VF activity is definitely halted
117  * * pause - represents a request to temporarily suspend VF activity
118  * * resume - represents a request to resume VF activity
119  * * stop - represents a request to definitely halt VF activity
120  * * flr - represents a request to perform VF FLR to restore VF activity
121  *
122  * However, each state transition requires additional steps that involves
123  * communication with GuC that might fail or be interrupted by other requests::
124  *
125  *	                   .................................WIP....
126  *	                   :                                      :
127  *	          pause--------------------->PAUSE_WIP----------------------------o
128  *	         /         :                /         \           :               |
129  *	        /          :    o----<---stop          flr--o     :               |
130  *	       /           :    |           \         /     |     :               V
131  *	(READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED)
132  *	  ^ \  \           :    |                           |     :          /   /
133  *	  |  \  \          :    |                           |     :         /   /
134  *	  |   \  \         :    |                           |     :        /   /
135  *	  |    \  \        :    o----<----------------------+--<-------stop   /
136  *	  |     \  \       :    |                           |     :          /
137  *	  |      \  \      :    V                           |     :         /
138  *	  |       \  stop----->STOP_WIP---------flr--->-----o     :        /
139  *	  |        \       :    |                           |     :       /
140  *	  |         \      :    |                           V     :      /
141  *	  |          flr--------+----->----------------->FLR_WIP<-----flr
142  *	  |                :    |                        /  ^     :
143  *	  |                :    |                       /   |     :
144  *	  o--------<-------:----+-----<----------------o    |     :
145  *	                   :    |                           |     :
146  *	                   :....|...........................|.....:
147  *	                        |                           |
148  *	                        V                           |
149  *	                     (STOPPED)--------------------flr
150  *
151  * For details about each internal WIP state machine see:
152  *
153  * * `The VF PAUSE state machine`_
154  * * `The VF RESUME state machine`_
155  * * `The VF STOP state machine`_
156  * * `The VF FLR state machine`_
157  */
158 
159 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV
160 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
161 {
162 	switch (bit) {
163 #define CASE2STR(_X) \
164 	case XE_GT_SRIOV_STATE_##_X: return #_X
165 	CASE2STR(WIP);
166 	CASE2STR(FLR_WIP);
167 	CASE2STR(FLR_SEND_START);
168 	CASE2STR(FLR_WAIT_GUC);
169 	CASE2STR(FLR_GUC_DONE);
170 	CASE2STR(FLR_RESET_CONFIG);
171 	CASE2STR(FLR_RESET_DATA);
172 	CASE2STR(FLR_RESET_MMIO);
173 	CASE2STR(FLR_SEND_FINISH);
174 	CASE2STR(FLR_FAILED);
175 	CASE2STR(PAUSE_WIP);
176 	CASE2STR(PAUSE_SEND_PAUSE);
177 	CASE2STR(PAUSE_WAIT_GUC);
178 	CASE2STR(PAUSE_GUC_DONE);
179 	CASE2STR(PAUSE_FAILED);
180 	CASE2STR(PAUSED);
181 	CASE2STR(RESUME_WIP);
182 	CASE2STR(RESUME_SEND_RESUME);
183 	CASE2STR(RESUME_FAILED);
184 	CASE2STR(RESUMED);
185 	CASE2STR(STOP_WIP);
186 	CASE2STR(STOP_SEND_STOP);
187 	CASE2STR(STOP_FAILED);
188 	CASE2STR(STOPPED);
189 	CASE2STR(MISMATCH);
190 #undef  CASE2STR
191 	default: return "?";
192 	}
193 }
194 #endif
195 
196 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
197 {
198 	switch (bit) {
199 	case XE_GT_SRIOV_STATE_FLR_WAIT_GUC:
200 	case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC:
201 		return HZ / 2;
202 	case XE_GT_SRIOV_STATE_FLR_WIP:
203 	case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
204 		return 5 * HZ;
205 	default:
206 		return HZ;
207 	}
208 }
209 
210 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid)
211 {
212 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
213 	xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
214 
215 	return &gt->sriov.pf.vfs[vfid].control;
216 }
217 
218 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid)
219 {
220 	struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
221 
222 	return &cs->state;
223 }
224 
225 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid,
226 			      enum xe_gt_sriov_control_bits bit)
227 {
228 	return test_bit(bit, pf_peek_vf_state(gt, vfid));
229 }
230 
231 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid)
232 {
233 	unsigned long state = *pf_peek_vf_state(gt, vfid);
234 	enum xe_gt_sriov_control_bits bit;
235 
236 	if (state) {
237 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n",
238 					vfid, state, state ? " bits " : "",
239 					(int)BITS_PER_LONG, &state);
240 		for_each_set_bit(bit, &state, BITS_PER_LONG)
241 			xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n",
242 						vfid, control_bit_to_string(bit), bit);
243 	} else {
244 		xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid);
245 	}
246 }
247 
248 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid,
249 			       enum xe_gt_sriov_control_bits bit)
250 {
251 	bool result = pf_check_vf_state(gt, vfid, bit);
252 
253 	if (unlikely(!result))
254 		pf_dump_vf_state(gt, vfid);
255 
256 	return result;
257 }
258 
259 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid,
260 				   enum xe_gt_sriov_control_bits bit)
261 {
262 	bool result = !pf_check_vf_state(gt, vfid, bit);
263 
264 	if (unlikely(!result))
265 		pf_dump_vf_state(gt, vfid);
266 
267 	return result;
268 }
269 
270 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid,
271 			      enum xe_gt_sriov_control_bits bit)
272 {
273 	if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) {
274 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n",
275 					vfid, control_bit_to_string(bit), bit);
276 		return true;
277 	}
278 	return false;
279 }
280 
281 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid,
282 			     enum xe_gt_sriov_control_bits bit)
283 {
284 	if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) {
285 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n",
286 					vfid, control_bit_to_string(bit), bit);
287 		return true;
288 	}
289 	return false;
290 }
291 
292 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid,
293 			       enum xe_gt_sriov_control_bits bit)
294 {
295 	if (pf_exit_vf_state(gt, vfid, bit))
296 		xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n",
297 					vfid, control_bit_to_string(bit), bit,
298 					__builtin_return_address(0));
299 }
300 
301 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
302 {
303 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) {
304 		xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n",
305 				vfid, __builtin_return_address(0));
306 		pf_dump_vf_state(gt, vfid);
307 	}
308 }
309 
310 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
311 {
312 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH))
313 		xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n",
314 				vfid, __builtin_return_address(0));
315 
316 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
317 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
318 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
319 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
320 }
321 
322 #define pf_enter_vf_state_machine_bug(gt, vfid) ({	\
323 	pf_enter_vf_mismatch((gt), (vfid));		\
324 })
325 
326 static void pf_queue_control_worker(struct xe_gt *gt)
327 {
328 	struct xe_device *xe = gt_to_xe(gt);
329 
330 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
331 
332 	queue_work(xe->sriov.wq, &gt->sriov.pf.control.worker);
333 }
334 
335 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
336 {
337 	struct xe_gt_sriov_pf_control *pfc = &gt->sriov.pf.control;
338 
339 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
340 
341 	spin_lock(&pfc->lock);
342 	list_move_tail(&gt->sriov.pf.vfs[vfid].control.link, &pfc->list);
343 	spin_unlock(&pfc->lock);
344 
345 	pf_queue_control_worker(gt);
346 }
347 
348 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
349 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
350 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
351 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
352 
353 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid)
354 {
355 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
356 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
357 
358 		reinit_completion(&cs->done);
359 		return true;
360 	}
361 	return false;
362 }
363 
364 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
365 {
366 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
367 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
368 
369 		pf_exit_vf_flr_wip(gt, vfid);
370 		pf_exit_vf_stop_wip(gt, vfid);
371 		pf_exit_vf_pause_wip(gt, vfid);
372 		pf_exit_vf_resume_wip(gt, vfid);
373 
374 		complete_all(&cs->done);
375 	}
376 }
377 
378 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout)
379 {
380 	struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
381 
382 	return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT;
383 }
384 
385 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
386 {
387 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
388 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
389 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
390 	pf_exit_vf_mismatch(gt, vfid);
391 	pf_exit_vf_wip(gt, vfid);
392 }
393 
394 /**
395  * DOC: The VF PAUSE state machine
396  *
397  * The VF PAUSE state machine looks like::
398  *
399  *	 (READY,RESUMED)<-------------<---------------------o---------o
400  *	    |                                                \         \
401  *	   pause                                              \         \
402  *	    |                                                  \         \
403  *	....V...........................PAUSE_WIP........       \         \
404  *	:    \                                          :        o         \
405  *	:     \   o------<-----busy                     :        |          \
406  *	:      \ /              /                       :        |           |
407  *	:       PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED)  |
408  *	:        |              \                       :        |           |
409  *	:      acked             rejected---->----------o--->(MISMATCH)     /
410  *	:        |                                      :                  /
411  *	:        v                                      :                 /
412  *	:       PAUSE_WAIT_GUC                          :                /
413  *	:        |                                      :               /
414  *	:       done                                    :              /
415  *	:        |                                      :             /
416  *	:        v                                      :            /
417  *	:       PAUSE_GUC_DONE                          o-----restart
418  *	:      /                                        :
419  *	:     /                                         :
420  *	:....o..............o...............o...........:
421  *	     |              |               |
422  *	  completed        flr             stop
423  *	     |              |               |
424  *	     V         .....V.....    ......V.....
425  *	 (PAUSED)      : FLR_WIP :    : STOP_WIP :
426  *	               :.........:    :..........:
427  *
428  * For the full state machine view, see `The VF state machine`_.
429  */
430 
431 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
432 {
433 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
434 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
435 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
436 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
437 	}
438 }
439 
440 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid)
441 {
442 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED))
443 		pf_enter_vf_state_machine_bug(gt, vfid);
444 
445 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
446 	pf_exit_vf_mismatch(gt, vfid);
447 	pf_exit_vf_wip(gt, vfid);
448 }
449 
450 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid)
451 {
452 	pf_enter_vf_paused(gt, vfid);
453 }
454 
455 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid)
456 {
457 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
458 	pf_exit_vf_wip(gt, vfid);
459 }
460 
461 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
462 {
463 	pf_enter_vf_mismatch(gt, vfid);
464 	pf_enter_vf_pause_failed(gt, vfid);
465 }
466 
467 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
468 {
469 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
470 		return false;
471 
472 	pf_enter_vf_pause_completed(gt, vfid);
473 	return true;
474 }
475 
476 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
477 {
478 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
479 		pf_queue_vf(gt, vfid);
480 }
481 
482 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
483 {
484 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC))
485 		pf_enter_vf_state_machine_bug(gt, vfid);
486 }
487 
488 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
489 {
490 	return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
491 }
492 
493 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
494 {
495 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
496 		pf_enter_vf_state_machine_bug(gt, vfid);
497 
498 	pf_queue_vf(gt, vfid);
499 }
500 
501 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
502 {
503 	int err;
504 
505 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
506 		return false;
507 
508 	/* GuC may actually send a PAUSE_DONE before we get a RESPONSE */
509 	pf_enter_pause_wait_guc(gt, vfid);
510 
511 	err = pf_send_vf_pause(gt, vfid);
512 	if (err) {
513 		/* send failed, so we shouldn't expect PAUSE_DONE from GuC */
514 		pf_exit_pause_wait_guc(gt, vfid);
515 
516 		if (err == -EBUSY)
517 			pf_enter_vf_pause_send_pause(gt, vfid);
518 		else if (err == -EIO)
519 			pf_enter_vf_pause_rejected(gt, vfid);
520 		else
521 			pf_enter_vf_pause_failed(gt, vfid);
522 	} else {
523 		/*
524 		 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
525 		 * but since GuC didn't complain, we may clear MISMATCH
526 		 */
527 		pf_exit_vf_mismatch(gt, vfid);
528 	}
529 
530 	return true;
531 }
532 
533 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
534 {
535 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
536 		pf_enter_vf_wip(gt, vfid);
537 		pf_enter_vf_pause_send_pause(gt, vfid);
538 		return true;
539 	}
540 
541 	return false;
542 }
543 
544 /**
545  * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
546  * @gt: the &xe_gt
547  * @vfid: the VF identifier
548  *
549  * This function is for PF only.
550  *
551  * Return: 0 on success or a negative error code on failure.
552  */
553 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
554 {
555 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP);
556 	int err;
557 
558 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
559 		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
560 		return -EPERM;
561 	}
562 
563 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
564 		xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid);
565 		return -ESTALE;
566 	}
567 
568 	if (!pf_enter_vf_pause_wip(gt, vfid)) {
569 		xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid);
570 		return -EALREADY;
571 	}
572 
573 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
574 	if (err) {
575 		xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n",
576 				vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
577 		return err;
578 	}
579 
580 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
581 		xe_gt_sriov_info(gt, "VF%u paused!\n", vfid);
582 		return 0;
583 	}
584 
585 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) {
586 		xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid);
587 		return -EIO;
588 	}
589 
590 	xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid);
591 	return -ECANCELED;
592 }
593 
594 /**
595  * DOC: The VF RESUME state machine
596  *
597  * The VF RESUME state machine looks like::
598  *
599  *	 (PAUSED)<-----------------<------------------------o
600  *	    |                                                \
601  *	   resume                                             \
602  *	    |                                                  \
603  *	....V............................RESUME_WIP......       \
604  *	:    \                                          :        o
605  *	:     \   o-------<-----busy                    :        |
606  *	:      \ /                /                     :        |
607  *	:       RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED)
608  *	:       /                \                      :        |
609  *	:    acked                rejected---->---------o--->(MISMATCH)
610  *	:     /                                         :
611  *	:....o..............o...............o.....o.....:
612  *	     |              |               |      \
613  *	  completed        flr            stop      restart-->(READY)
614  *	     |              |               |
615  *	     V         .....V.....    ......V.....
616  *	 (RESUMED)     : FLR_WIP :    : STOP_WIP :
617  *	               :.........:    :..........:
618  *
619  * For the full state machine view, see `The VF state machine`_.
620  */
621 
622 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
623 {
624 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP))
625 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME);
626 }
627 
628 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
629 {
630 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
631 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
632 	pf_exit_vf_mismatch(gt, vfid);
633 	pf_exit_vf_wip(gt, vfid);
634 }
635 
636 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid)
637 {
638 	pf_enter_vf_resumed(gt, vfid);
639 }
640 
641 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid)
642 {
643 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
644 	pf_exit_vf_wip(gt, vfid);
645 }
646 
647 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid)
648 {
649 	pf_enter_vf_mismatch(gt, vfid);
650 	pf_enter_vf_resume_failed(gt, vfid);
651 }
652 
653 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
654 {
655 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
656 		pf_enter_vf_state_machine_bug(gt, vfid);
657 
658 	pf_queue_vf(gt, vfid);
659 }
660 
661 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
662 {
663 	int err;
664 
665 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
666 		return false;
667 
668 	err = pf_send_vf_resume(gt, vfid);
669 	if (err == -EBUSY)
670 		pf_enter_vf_resume_send_resume(gt, vfid);
671 	else if (err == -EIO)
672 		pf_enter_vf_resume_rejected(gt, vfid);
673 	else if (err)
674 		pf_enter_vf_resume_failed(gt, vfid);
675 	else
676 		pf_enter_vf_resume_completed(gt, vfid);
677 	return true;
678 }
679 
680 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
681 {
682 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) {
683 		pf_enter_vf_wip(gt, vfid);
684 		pf_enter_vf_resume_send_resume(gt, vfid);
685 		return true;
686 	}
687 
688 	return false;
689 }
690 
691 /**
692  * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
693  * @gt: the &xe_gt
694  * @vfid: the VF identifier
695  *
696  * This function is for PF only.
697  *
698  * Return: 0 on success or a negative error code on failure.
699  */
700 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
701 {
702 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP);
703 	int err;
704 
705 	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
706 		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
707 		return -EPERM;
708 	}
709 
710 	if (!pf_enter_vf_resume_wip(gt, vfid)) {
711 		xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
712 		return -EALREADY;
713 	}
714 
715 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
716 	if (err)
717 		return err;
718 
719 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) {
720 		xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid);
721 		return 0;
722 	}
723 
724 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) {
725 		xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid);
726 		return -EIO;
727 	}
728 
729 	xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid);
730 	return -ECANCELED;
731 }
732 
733 /**
734  * DOC: The VF STOP state machine
735  *
736  * The VF STOP state machine looks like::
737  *
738  *	 (READY,PAUSED,RESUMED)<-------<--------------------o
739  *	    |                                                \
740  *	   stop                                               \
741  *	    |                                                  \
742  *	....V..............................STOP_WIP......       \
743  *	:    \                                          :        o
744  *	:     \   o----<----busy                        :        |
745  *	:      \ /            /                         :        |
746  *	:       STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED)
747  *	:       /             \                         :        |
748  *	:    acked             rejected-------->--------o--->(MISMATCH)
749  *	:     /                                         :
750  *	:....o..............o...............o...........:
751  *	     |              |               |
752  *	  completed        flr            restart
753  *	     |              |               |
754  *	     V         .....V.....          V
755  *	 (STOPPED)     : FLR_WIP :       (READY)
756  *	               :.........:
757  *
758  * For the full state machine view, see `The VF state machine`_.
759  */
760 
761 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
762 {
763 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP))
764 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP);
765 }
766 
767 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
768 {
769 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED))
770 		pf_enter_vf_state_machine_bug(gt, vfid);
771 
772 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
773 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
774 	pf_exit_vf_mismatch(gt, vfid);
775 	pf_exit_vf_wip(gt, vfid);
776 }
777 
778 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid)
779 {
780 	pf_enter_vf_stopped(gt, vfid);
781 }
782 
783 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid)
784 {
785 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
786 	pf_exit_vf_wip(gt, vfid);
787 }
788 
789 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid)
790 {
791 	pf_enter_vf_mismatch(gt, vfid);
792 	pf_enter_vf_stop_failed(gt, vfid);
793 }
794 
795 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
796 {
797 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
798 		pf_enter_vf_state_machine_bug(gt, vfid);
799 
800 	pf_queue_vf(gt, vfid);
801 }
802 
803 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
804 {
805 	int err;
806 
807 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
808 		return false;
809 
810 	err = pf_send_vf_stop(gt, vfid);
811 	if (err == -EBUSY)
812 		pf_enter_vf_stop_send_stop(gt, vfid);
813 	else if (err == -EIO)
814 		pf_enter_vf_stop_rejected(gt, vfid);
815 	else if (err)
816 		pf_enter_vf_stop_failed(gt, vfid);
817 	else
818 		pf_enter_vf_stop_completed(gt, vfid);
819 	return true;
820 }
821 
822 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
823 {
824 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) {
825 		pf_enter_vf_wip(gt, vfid);
826 		pf_enter_vf_stop_send_stop(gt, vfid);
827 		return true;
828 	}
829 	return false;
830 }
831 
832 /**
833  * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
834  * @gt: the &xe_gt
835  * @vfid: the VF identifier
836  *
837  * This function is for PF only.
838  *
839  * Return: 0 on success or a negative error code on failure.
840  */
841 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
842 {
843 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP);
844 	int err;
845 
846 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
847 		xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid);
848 		return -ESTALE;
849 	}
850 
851 	if (!pf_enter_vf_stop_wip(gt, vfid)) {
852 		xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid);
853 		return -EALREADY;
854 	}
855 
856 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
857 	if (err)
858 		return err;
859 
860 	if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
861 		xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid);
862 		return 0;
863 	}
864 
865 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) {
866 		xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid);
867 		return -EIO;
868 	}
869 
870 	xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid);
871 	return -ECANCELED;
872 }
873 
874 /**
875  * DOC: The VF FLR state machine
876  *
877  * The VF FLR state machine looks like::
878  *
879  *	 (READY,PAUSED,STOPPED)<------------<--------------o
880  *	    |                                               \
881  *	   flr                                               \
882  *	    |                                                 \
883  *	....V..........................FLR_WIP...........      \
884  *	:    \                                          :       \
885  *	:     \   o----<----busy                        :        |
886  *	:      \ /            /                         :        |
887  *	:       FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o
888  *	:        |            \                         :        |           |
889  *	:      acked           rejected----->-----------o--->(MISMATCH)      |
890  *	:        |                                      :        ^           |
891  *	:        v                                      :        |           |
892  *	:       FLR_WAIT_GUC                            :        |           |
893  *	:        |                                      :        |           |
894  *	:       done                                    :        |           |
895  *	:        |                                      :        |           |
896  *	:        v                                      :        |           |
897  *	:       FLR_GUC_DONE                            :        |           |
898  *	:        |                                      :        |           |
899  *	:       FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
900  *	:        |                                      :        |           |
901  *	:       FLR_RESET_DATA                          :        |           |
902  *	:        |                                      :        |           |
903  *	:       FLR_RESET_MMIO                          :        |           |
904  *	:        |                                      :        |           |
905  *	:        | o----<----busy                       :        |           |
906  *	:        |/            /                        :        |           |
907  *	:       FLR_SEND_FINISH----failed--->-----------o--------+-----------o
908  *	:       /             \                         :        |
909  *	:     acked            rejected----->-----------o--------o
910  *	:     /                                         :
911  *	:....o..............................o...........:
912  *	     |                              |
913  *	  completed                       restart
914  *	     |                             /
915  *	     V                            /
916  *	  (READY)<----------<------------o
917  *
918  * For the full state machine view, see `The VF state machine`_.
919  */
920 
921 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
922 {
923 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
924 		pf_enter_vf_state_machine_bug(gt, vfid);
925 
926 	pf_queue_vf(gt, vfid);
927 }
928 
929 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
930 {
931 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
932 		xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
933 		return;
934 	}
935 
936 	pf_enter_vf_wip(gt, vfid);
937 	pf_enter_vf_flr_send_start(gt, vfid);
938 }
939 
940 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
941 {
942 	if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
943 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH);
944 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO);
945 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA);
946 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
947 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
948 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
949 		pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
950 	}
951 }
952 
953 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid)
954 {
955 	pf_enter_vf_ready(gt, vfid);
956 }
957 
958 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid)
959 {
960 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
961 		xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid);
962 	pf_exit_vf_wip(gt, vfid);
963 }
964 
965 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid)
966 {
967 	pf_enter_vf_mismatch(gt, vfid);
968 	pf_enter_vf_flr_failed(gt, vfid);
969 }
970 
971 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
972 {
973 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
974 		pf_enter_vf_state_machine_bug(gt, vfid);
975 
976 	pf_queue_vf(gt, vfid);
977 }
978 
979 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
980 {
981 	int err;
982 
983 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
984 		return false;
985 
986 	err = pf_send_vf_flr_finish(gt, vfid);
987 	if (err == -EBUSY)
988 		pf_enter_vf_flr_send_finish(gt, vfid);
989 	else if (err == -EIO)
990 		pf_enter_vf_flr_rejected(gt, vfid);
991 	else if (err)
992 		pf_enter_vf_flr_failed(gt, vfid);
993 	else
994 		pf_enter_vf_flr_completed(gt, vfid);
995 	return true;
996 }
997 
998 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
999 {
1000 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1001 		pf_enter_vf_state_machine_bug(gt, vfid);
1002 
1003 	pf_queue_vf(gt, vfid);
1004 }
1005 
1006 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1007 {
1008 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1009 		return false;
1010 
1011 	/* XXX: placeholder */
1012 
1013 	pf_enter_vf_flr_send_finish(gt, vfid);
1014 	return true;
1015 }
1016 
1017 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1018 {
1019 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1020 		pf_enter_vf_state_machine_bug(gt, vfid);
1021 
1022 	pf_queue_vf(gt, vfid);
1023 }
1024 
1025 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1026 {
1027 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1028 		return false;
1029 
1030 	xe_gt_sriov_pf_service_reset(gt, vfid);
1031 	xe_gt_sriov_pf_monitor_flr(gt, vfid);
1032 
1033 	pf_enter_vf_flr_reset_mmio(gt, vfid);
1034 	return true;
1035 }
1036 
1037 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1038 {
1039 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1040 		pf_enter_vf_state_machine_bug(gt, vfid);
1041 
1042 	pf_queue_vf(gt, vfid);
1043 }
1044 
1045 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1046 {
1047 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
1048 	int err;
1049 
1050 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1051 		return false;
1052 
1053 	err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout);
1054 	if (err)
1055 		pf_enter_vf_flr_failed(gt, vfid);
1056 	else
1057 		pf_enter_vf_flr_reset_data(gt, vfid);
1058 	return true;
1059 }
1060 
1061 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1062 {
1063 	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC))
1064 		pf_enter_vf_state_machine_bug(gt, vfid);
1065 }
1066 
1067 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1068 {
1069 	return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
1070 }
1071 
1072 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
1073 {
1074 	int err;
1075 
1076 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
1077 		return false;
1078 
1079 	/* GuC may actually send a FLR_DONE before we get a RESPONSE */
1080 	pf_enter_vf_flr_wait_guc(gt, vfid);
1081 
1082 	err = pf_send_vf_flr_start(gt, vfid);
1083 	if (err) {
1084 		/* send failed, so we shouldn't expect FLR_DONE from GuC */
1085 		pf_exit_vf_flr_wait_guc(gt, vfid);
1086 
1087 		if (err == -EBUSY)
1088 			pf_enter_vf_flr_send_start(gt, vfid);
1089 		else if (err == -EIO)
1090 			pf_enter_vf_flr_rejected(gt, vfid);
1091 		else
1092 			pf_enter_vf_flr_failed(gt, vfid);
1093 	} else {
1094 		/*
1095 		 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
1096 		 * but since GuC didn't complain, we may clear MISMATCH
1097 		 */
1098 		pf_exit_vf_mismatch(gt, vfid);
1099 	}
1100 
1101 	return true;
1102 }
1103 
1104 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1105 {
1106 	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1107 		return false;
1108 
1109 	pf_enter_vf_flr_reset_config(gt, vfid);
1110 	return true;
1111 }
1112 
1113 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1114 {
1115 	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1116 		pf_queue_vf(gt, vfid);
1117 }
1118 
1119 /**
1120  * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence.
1121  * @gt: the &xe_gt
1122  * @vfid: the VF identifier
1123  *
1124  * This function is for PF only.
1125  *
1126  * Return: 0 on success or a negative error code on failure.
1127  */
1128 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
1129 {
1130 	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP);
1131 	int err;
1132 
1133 	pf_enter_vf_flr_wip(gt, vfid);
1134 
1135 	err = pf_wait_vf_wip_done(gt, vfid, timeout);
1136 	if (err) {
1137 		xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n",
1138 				   vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
1139 		return err;
1140 	}
1141 
1142 	if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1143 		return -EIO;
1144 
1145 	return 0;
1146 }
1147 
1148 /**
1149  * DOC: The VF FLR Flow with GuC
1150  *
1151  * The VF FLR flow includes several steps::
1152  *
1153  *	         PF                        GUC             PCI
1154  *	========================================================
1155  *	         |                          |               |
1156  *	(1)      |                         [ ] <----- FLR --|
1157  *	         |                         [ ]              :
1158  *	(2)     [ ] <-------- NOTIFY FLR --[ ]
1159  *	        [ ]                         |
1160  *	(3)     [ ]                         |
1161  *	        [ ]                         |
1162  *	        [ ]-- START FLR ---------> [ ]
1163  *	         |                         [ ]
1164  *	(4)      |                         [ ]
1165  *	         |                         [ ]
1166  *	        [ ] <--------- FLR DONE -- [ ]
1167  *	        [ ]                         |
1168  *	(5)     [ ]                         |
1169  *	        [ ]                         |
1170  *	        [ ]-- FINISH FLR --------> [ ]
1171  *	         |                          |
1172  *
1173  * * Step 1: PCI HW generates interrupt to the GuC about VF FLR
1174  * * Step 2: GuC FW sends G2H notification to the PF about VF FLR
1175  * * Step 2a: on some platforms G2H is only received from root GuC
1176  * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
1177  * * Step 3a: on some platforms PF must send H2G to all other GuCs
1178  * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
1179  * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
1180  */
1181 
1182 static bool needs_dispatch_flr(struct xe_device *xe)
1183 {
1184 	return xe->info.platform == XE_PVC;
1185 }
1186 
1187 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
1188 {
1189 	struct xe_device *xe = gt_to_xe(gt);
1190 	struct xe_gt *gtit;
1191 	unsigned int gtid;
1192 
1193 	xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
1194 
1195 	if (needs_dispatch_flr(xe)) {
1196 		for_each_gt(gtit, xe, gtid)
1197 			pf_enter_vf_flr_wip(gtit, vfid);
1198 	} else {
1199 		pf_enter_vf_flr_wip(gt, vfid);
1200 	}
1201 }
1202 
1203 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
1204 {
1205 	if (!pf_exit_vf_flr_wait_guc(gt, vfid)) {
1206 		xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid);
1207 		pf_enter_vf_mismatch(gt, vfid);
1208 		return;
1209 	}
1210 
1211 	pf_enter_vf_flr_guc_done(gt, vfid);
1212 }
1213 
1214 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid)
1215 {
1216 	if (!pf_exit_pause_wait_guc(gt, vfid)) {
1217 		xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid);
1218 		pf_enter_vf_mismatch(gt, vfid);
1219 		return;
1220 	}
1221 
1222 	pf_enter_vf_pause_guc_done(gt, vfid);
1223 }
1224 
1225 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
1226 {
1227 	xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid);
1228 
1229 	if (vfid > xe_gt_sriov_pf_get_totalvfs(gt))
1230 		return -EPROTO;
1231 
1232 	switch (eventid) {
1233 	case GUC_PF_NOTIFY_VF_FLR:
1234 		pf_handle_vf_flr(gt, vfid);
1235 		break;
1236 	case GUC_PF_NOTIFY_VF_FLR_DONE:
1237 		pf_handle_vf_flr_done(gt, vfid);
1238 		break;
1239 	case GUC_PF_NOTIFY_VF_PAUSE_DONE:
1240 		pf_handle_vf_pause_done(gt, vfid);
1241 		break;
1242 	case GUC_PF_NOTIFY_VF_FIXUP_DONE:
1243 		break;
1244 	default:
1245 		return -ENOPKG;
1246 	}
1247 	return 0;
1248 }
1249 
1250 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
1251 {
1252 	switch (eventid) {
1253 	case GUC_PF_NOTIFY_VF_ENABLE:
1254 		xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
1255 					str_enabled_disabled(true),
1256 					str_enabled_disabled(false));
1257 		break;
1258 	default:
1259 		return -ENOPKG;
1260 	}
1261 	return 0;
1262 }
1263 
1264 /**
1265  * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
1266  * @gt: the &xe_gt
1267  * @msg: the G2H message
1268  * @len: the length of the G2H message
1269  *
1270  * This function is for PF only.
1271  *
1272  * Return: 0 on success or a negative error code on failure.
1273  */
1274 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
1275 {
1276 	u32 vfid;
1277 	u32 eventid;
1278 
1279 	xe_gt_assert(gt, len);
1280 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
1281 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
1282 	xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
1283 		     GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
1284 
1285 	if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
1286 		return -EPROTO;
1287 
1288 	if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
1289 		return -EPFNOSUPPORT;
1290 
1291 	if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
1292 		return -EPROTO;
1293 
1294 	vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
1295 	eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
1296 
1297 	return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
1298 }
1299 
1300 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
1301 {
1302 	if (pf_exit_vf_flr_send_start(gt, vfid))
1303 		return true;
1304 
1305 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) {
1306 		xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1307 					control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC));
1308 		return false;
1309 	}
1310 
1311 	if (pf_exit_vf_flr_guc_done(gt, vfid))
1312 		return true;
1313 
1314 	if (pf_exit_vf_flr_reset_config(gt, vfid))
1315 		return true;
1316 
1317 	if (pf_exit_vf_flr_reset_data(gt, vfid))
1318 		return true;
1319 
1320 	if (pf_exit_vf_flr_reset_mmio(gt, vfid))
1321 		return true;
1322 
1323 	if (pf_exit_vf_flr_send_finish(gt, vfid))
1324 		return true;
1325 
1326 	if (pf_exit_vf_stop_send_stop(gt, vfid))
1327 		return true;
1328 
1329 	if (pf_exit_vf_pause_send_pause(gt, vfid))
1330 		return true;
1331 
1332 	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) {
1333 		xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1334 					control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC));
1335 		return true;
1336 	}
1337 
1338 	if (pf_exit_vf_pause_guc_done(gt, vfid))
1339 		return true;
1340 
1341 	if (pf_exit_vf_resume_send_resume(gt, vfid))
1342 		return true;
1343 
1344 	return false;
1345 }
1346 
1347 static unsigned int pf_control_state_index(struct xe_gt *gt,
1348 					   struct xe_gt_sriov_control_state *cs)
1349 {
1350 	return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs;
1351 }
1352 
1353 static void pf_worker_find_work(struct xe_gt *gt)
1354 {
1355 	struct xe_gt_sriov_pf_control *pfc = &gt->sriov.pf.control;
1356 	struct xe_gt_sriov_control_state *cs;
1357 	unsigned int vfid;
1358 	bool empty;
1359 	bool more;
1360 
1361 	spin_lock(&pfc->lock);
1362 	cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link);
1363 	if (cs)
1364 		list_del_init(&cs->link);
1365 	empty = list_empty(&pfc->list);
1366 	spin_unlock(&pfc->lock);
1367 
1368 	if (!cs)
1369 		return;
1370 
1371 	/* VF metadata structures are indexed by the VFID */
1372 	vfid = pf_control_state_index(gt, cs);
1373 	xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
1374 
1375 	more = pf_process_vf_state_machine(gt, vfid);
1376 	if (more)
1377 		pf_queue_vf(gt, vfid);
1378 	else if (!empty)
1379 		pf_queue_control_worker(gt);
1380 }
1381 
1382 static void control_worker_func(struct work_struct *w)
1383 {
1384 	struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker);
1385 
1386 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1387 	pf_worker_find_work(gt);
1388 }
1389 
1390 static void pf_stop_worker(struct xe_gt *gt)
1391 {
1392 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1393 	cancel_work_sync(&gt->sriov.pf.control.worker);
1394 }
1395 
1396 static void control_fini_action(struct drm_device *dev, void *data)
1397 {
1398 	struct xe_gt *gt = data;
1399 
1400 	pf_stop_worker(gt);
1401 }
1402 
1403 /**
1404  * xe_gt_sriov_pf_control_init() - Initialize PF's control data.
1405  * @gt: the &xe_gt
1406  *
1407  * This function is for PF only.
1408  *
1409  * Return: 0 on success or a negative error code on failure.
1410  */
1411 int xe_gt_sriov_pf_control_init(struct xe_gt *gt)
1412 {
1413 	struct xe_device *xe = gt_to_xe(gt);
1414 	unsigned int n, totalvfs;
1415 
1416 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
1417 
1418 	totalvfs = xe_sriov_pf_get_totalvfs(xe);
1419 	for (n = 0; n <= totalvfs; n++) {
1420 		struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n);
1421 
1422 		init_completion(&cs->done);
1423 		INIT_LIST_HEAD(&cs->link);
1424 	}
1425 
1426 	spin_lock_init(&gt->sriov.pf.control.lock);
1427 	INIT_LIST_HEAD(&gt->sriov.pf.control.list);
1428 	INIT_WORK(&gt->sriov.pf.control.worker, control_worker_func);
1429 
1430 	return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt);
1431 }
1432 
1433 /**
1434  * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset.
1435  * @gt: the &xe_gt
1436  *
1437  * Any per-VF status maintained by the PF or any ongoing VF control activity
1438  * performed by the PF must be reset or cancelled when the GT is reset.
1439  *
1440  * This function is for PF only.
1441  */
1442 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt)
1443 {
1444 	struct xe_device *xe = gt_to_xe(gt);
1445 	unsigned int n, totalvfs;
1446 
1447 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
1448 
1449 	pf_stop_worker(gt);
1450 
1451 	totalvfs = xe_sriov_pf_get_totalvfs(xe);
1452 	for (n = 1; n <= totalvfs; n++)
1453 		pf_enter_vf_ready(gt, n);
1454 }
1455