xref: /linux/drivers/gpu/drm/xe/xe_guc_relay.c (revision 1fd1dc41724319406b0aff221a352a400b0ddfc5)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5 
6 #include <linux/bitfield.h>
7 #include <linux/delay.h>
8 #include <linux/fault-inject.h>
9 
10 #include <drm/drm_managed.h>
11 
12 #include <kunit/static_stub.h>
13 #include <kunit/test-bug.h>
14 
15 #include "abi/guc_actions_sriov_abi.h"
16 #include "abi/guc_relay_actions_abi.h"
17 #include "abi/guc_relay_communication_abi.h"
18 
19 #include "xe_assert.h"
20 #include "xe_device_types.h"
21 #include "xe_gt_sriov_printk.h"
22 #include "xe_gt_sriov_pf_service.h"
23 #include "xe_guc.h"
24 #include "xe_guc_ct.h"
25 #include "xe_guc_hxg_helpers.h"
26 #include "xe_guc_relay.h"
27 #include "xe_guc_relay_types.h"
28 #include "xe_sriov.h"
29 
30 /*
31  * How long should we wait for the response?
32  * XXX this value is subject for the profiling.
33  */
34 #define RELAY_TIMEOUT_MSEC	(2500)
35 
36 static void relays_worker_fn(struct work_struct *w);
37 
38 static struct xe_guc *relay_to_guc(struct xe_guc_relay *relay)
39 {
40 	return container_of(relay, struct xe_guc, relay);
41 }
42 
43 static struct xe_guc_ct *relay_to_ct(struct xe_guc_relay *relay)
44 {
45 	return &relay_to_guc(relay)->ct;
46 }
47 
48 static struct xe_gt *relay_to_gt(struct xe_guc_relay *relay)
49 {
50 	return guc_to_gt(relay_to_guc(relay));
51 }
52 
53 static struct xe_device *relay_to_xe(struct xe_guc_relay *relay)
54 {
55 	return gt_to_xe(relay_to_gt(relay));
56 }
57 
58 #define XE_RELAY_DIAG_RATELIMIT_INTERVAL	(10 * HZ)
59 #define XE_RELAY_DIAG_RATELIMIT_BURST		10
60 
61 #define relay_ratelimit_printk(relay, _level, fmt...) ({			\
62 	typeof(relay) _r = (relay);						\
63 	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ||				\
64 	    ___ratelimit(&_r->diag_ratelimit, "xe_guc_relay"))			\
65 		xe_gt_sriov_##_level(relay_to_gt(_r), "relay: " fmt);		\
66 })
67 
68 #define relay_assert(relay, condition)	xe_gt_assert(relay_to_gt(relay), condition)
69 #define relay_notice(relay, msg...)	relay_ratelimit_printk((relay), notice, msg)
70 #define relay_debug(relay, msg...)	relay_ratelimit_printk((relay), dbg_verbose, msg)
71 
72 static int relay_get_totalvfs(struct xe_guc_relay *relay)
73 {
74 	struct xe_device *xe = relay_to_xe(relay);
75 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
76 
77 	KUNIT_STATIC_STUB_REDIRECT(relay_get_totalvfs, relay);
78 	return IS_SRIOV_VF(xe) ? 0 : pci_sriov_get_totalvfs(pdev);
79 }
80 
81 static bool relay_is_ready(struct xe_guc_relay *relay)
82 {
83 	return mempool_initialized(&relay->pool);
84 }
85 
86 static u32 relay_get_next_rid(struct xe_guc_relay *relay)
87 {
88 	u32 rid;
89 
90 	spin_lock(&relay->lock);
91 	rid = ++relay->last_rid;
92 	spin_unlock(&relay->lock);
93 
94 	return rid;
95 }
96 
97 /**
98  * struct relay_transaction - internal data used to handle transactions
99  *
100  * Relation between struct relay_transaction members::
101  *
102  *                 <-------------------- GUC_CTB_MAX_DWORDS -------------->
103  *                                  <-------- GUC_RELAY_MSG_MAX_LEN --->
104  *                 <--- offset ---> <--- request_len ------->
105  *                +----------------+-------------------------+----------+--+
106  *                |                |                         |          |  |
107  *                +----------------+-------------------------+----------+--+
108  *                ^                ^
109  *               /                /
110  *    request_buf          request
111  *
112  *                 <-------------------- GUC_CTB_MAX_DWORDS -------------->
113  *                                  <-------- GUC_RELAY_MSG_MAX_LEN --->
114  *                 <--- offset ---> <--- response_len --->
115  *                +----------------+----------------------+-------------+--+
116  *                |                |                      |             |  |
117  *                +----------------+----------------------+-------------+--+
118  *                ^                ^
119  *               /                /
120  *   response_buf         response
121  */
122 struct relay_transaction {
123 	/**
124 	 * @incoming: indicates whether this transaction represents an incoming
125 	 *            request from the remote VF/PF or this transaction
126 	 *            represents outgoing request to the remote VF/PF.
127 	 */
128 	bool incoming;
129 
130 	/**
131 	 * @remote: PF/VF identifier of the origin (or target) of the relay
132 	 *          request message.
133 	 */
134 	u32 remote;
135 
136 	/** @rid: identifier of the VF/PF relay message. */
137 	u32 rid;
138 
139 	/**
140 	 * @request: points to the inner VF/PF request message, copied to the
141 	 *           #response_buf starting at #offset.
142 	 */
143 	u32 *request;
144 
145 	/** @request_len: length of the inner VF/PF request message. */
146 	u32 request_len;
147 
148 	/**
149 	 * @response: points to the placeholder buffer where inner VF/PF
150 	 *            response will be located, for outgoing transaction
151 	 *            this could be caller's buffer (if provided) otherwise
152 	 *            it points to the #response_buf starting at #offset.
153 	 */
154 	u32 *response;
155 
156 	/**
157 	 * @response_len: length of the inner VF/PF response message (only
158 	 *                if #status is 0), initially set to the size of the
159 	 *                placeholder buffer where response message will be
160 	 *                copied.
161 	 */
162 	u32 response_len;
163 
164 	/**
165 	 * @offset: offset to the start of the inner VF/PF relay message inside
166 	 *          buffers; this offset is equal the length of the outer GuC
167 	 *          relay header message.
168 	 */
169 	u32 offset;
170 
171 	/**
172 	 * @request_buf: buffer with VF/PF request message including outer
173 	 *               transport message.
174 	 */
175 	u32 request_buf[GUC_CTB_MAX_DWORDS];
176 
177 	/**
178 	 * @response_buf: buffer with VF/PF response message including outer
179 	 *                transport message.
180 	 */
181 	u32 response_buf[GUC_CTB_MAX_DWORDS];
182 
183 	/**
184 	 * @reply: status of the reply, 0 means that data pointed by the
185 	 *         #response is valid.
186 	 */
187 	int reply;
188 
189 	/** @done: completion of the outgoing transaction. */
190 	struct completion done;
191 
192 	/** @link: transaction list link */
193 	struct list_head link;
194 };
195 
196 static u32 prepare_pf2guc(u32 *msg, u32 target, u32 rid)
197 {
198 	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
199 		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
200 		 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF);
201 	msg[1] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, target);
202 	msg[2] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, rid);
203 
204 	return PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN;
205 }
206 
207 static u32 prepare_vf2guc(u32 *msg, u32 rid)
208 {
209 	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
210 		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
211 		 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_VF2GUC_RELAY_TO_PF);
212 	msg[1] = FIELD_PREP(VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID, rid);
213 
214 	return VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN;
215 }
216 
217 static struct relay_transaction *
218 __relay_get_transaction(struct xe_guc_relay *relay, bool incoming, u32 remote, u32 rid,
219 			const u32 *action, u32 action_len, u32 *resp, u32 resp_size)
220 {
221 	struct relay_transaction *txn;
222 
223 	relay_assert(relay, action_len >= GUC_RELAY_MSG_MIN_LEN);
224 	relay_assert(relay, action_len <= GUC_RELAY_MSG_MAX_LEN);
225 	relay_assert(relay, !(!!resp ^ !!resp_size));
226 	relay_assert(relay, resp_size <= GUC_RELAY_MSG_MAX_LEN);
227 	relay_assert(relay, resp_size == 0 || resp_size >= GUC_RELAY_MSG_MIN_LEN);
228 
229 	if (unlikely(!relay_is_ready(relay)))
230 		return ERR_PTR(-ENODEV);
231 
232 	/*
233 	 * For incoming requests we can't use GFP_KERNEL as those are delivered
234 	 * with CTB lock held which is marked as used in the reclaim path.
235 	 * Btw, that's one of the reason why we use mempool here!
236 	 */
237 	txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_NOWAIT);
238 	if (!txn)
239 		return ERR_PTR(-ENOMEM);
240 
241 	txn->incoming = incoming;
242 	txn->remote = remote;
243 	txn->rid = rid;
244 	txn->offset = remote ?
245 		prepare_pf2guc(incoming ? txn->response_buf : txn->request_buf, remote, rid) :
246 		prepare_vf2guc(incoming ? txn->response_buf : txn->request_buf, rid);
247 
248 	relay_assert(relay, txn->offset);
249 	relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->request_buf));
250 	relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->response_buf));
251 
252 	txn->request = txn->request_buf + txn->offset;
253 	memcpy(&txn->request_buf[txn->offset], action, sizeof(u32) * action_len);
254 	txn->request_len = action_len;
255 
256 	txn->response = resp ?: txn->response_buf + txn->offset;
257 	txn->response_len = resp_size ?: GUC_RELAY_MSG_MAX_LEN;
258 	txn->reply = -ENOMSG;
259 	INIT_LIST_HEAD(&txn->link);
260 	init_completion(&txn->done);
261 
262 	return txn;
263 }
264 
265 static struct relay_transaction *
266 relay_new_transaction(struct xe_guc_relay *relay, u32 target, const u32 *action, u32 len,
267 		      u32 *resp, u32 resp_size)
268 {
269 	u32 rid = relay_get_next_rid(relay);
270 
271 	return __relay_get_transaction(relay, false, target, rid, action, len, resp, resp_size);
272 }
273 
274 static struct relay_transaction *
275 relay_new_incoming_transaction(struct xe_guc_relay *relay, u32 origin, u32 rid,
276 			       const u32 *action, u32 len)
277 {
278 	return __relay_get_transaction(relay, true, origin, rid, action, len, NULL, 0);
279 }
280 
281 static void relay_release_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
282 {
283 	relay_assert(relay, list_empty(&txn->link));
284 
285 	txn->offset = 0;
286 	txn->response = NULL;
287 	txn->reply = -ESTALE;
288 	mempool_free(txn, &relay->pool);
289 }
290 
291 static int relay_send_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
292 {
293 	u32 len = txn->incoming ? txn->response_len : txn->request_len;
294 	u32 *buf = txn->incoming ? txn->response_buf : txn->request_buf;
295 	u32 *msg = buf + txn->offset;
296 	int ret;
297 
298 	relay_assert(relay, txn->offset);
299 	relay_assert(relay, txn->offset + len <= GUC_CTB_MAX_DWORDS);
300 	relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
301 	relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
302 
303 	relay_debug(relay, "sending %s.%u to %u = %*ph\n",
304 		    guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
305 		    txn->rid, txn->remote, (int)sizeof(u32) * len, msg);
306 
307 	ret = xe_guc_ct_send_block(relay_to_ct(relay), buf, len + txn->offset);
308 
309 	if (unlikely(ret > 0)) {
310 		relay_notice(relay, "Unexpected data=%d from GuC, wrong ABI?\n", ret);
311 		ret = -EPROTO;
312 	}
313 	if (unlikely(ret < 0)) {
314 		relay_notice(relay, "Failed to send %s.%x to GuC (%pe) %*ph ...\n",
315 			     guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, buf[0])),
316 			     FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, buf[0]),
317 			     ERR_PTR(ret), (int)sizeof(u32) * txn->offset, buf);
318 		relay_notice(relay, "Failed to send %s.%u to %u (%pe) %*ph\n",
319 			     guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
320 			     txn->rid, txn->remote, ERR_PTR(ret), (int)sizeof(u32) * len, msg);
321 	}
322 
323 	return ret;
324 }
325 
326 static void __fini_relay(struct drm_device *drm, void *arg)
327 {
328 	struct xe_guc_relay *relay = arg;
329 
330 	mempool_exit(&relay->pool);
331 }
332 
333 /**
334  * xe_guc_relay_init - Initialize a &xe_guc_relay
335  * @relay: the &xe_guc_relay to initialize
336  *
337  * Initialize remaining members of &xe_guc_relay that may depend
338  * on the SR-IOV mode.
339  *
340  * Return: 0 on success or a negative error code on failure.
341  */
342 int xe_guc_relay_init(struct xe_guc_relay *relay)
343 {
344 	const int XE_RELAY_MEMPOOL_MIN_NUM = 1;
345 	struct xe_device *xe = relay_to_xe(relay);
346 	int err;
347 
348 	relay_assert(relay, !relay_is_ready(relay));
349 
350 	if (!IS_SRIOV(xe))
351 		return 0;
352 
353 	spin_lock_init(&relay->lock);
354 	INIT_WORK(&relay->worker, relays_worker_fn);
355 	INIT_LIST_HEAD(&relay->pending_relays);
356 	INIT_LIST_HEAD(&relay->incoming_actions);
357 	ratelimit_state_init(&relay->diag_ratelimit,
358 			     XE_RELAY_DIAG_RATELIMIT_INTERVAL,
359 			     XE_RELAY_DIAG_RATELIMIT_BURST);
360 
361 	err = mempool_init_kmalloc_pool(&relay->pool, XE_RELAY_MEMPOOL_MIN_NUM +
362 					relay_get_totalvfs(relay),
363 					sizeof(struct relay_transaction));
364 	if (err)
365 		return err;
366 
367 	relay_debug(relay, "using mempool with %d elements\n", relay->pool.min_nr);
368 
369 	return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay);
370 }
371 ALLOW_ERROR_INJECTION(xe_guc_relay_init, ERRNO); /* See xe_pci_probe() */
372 
373 static u32 to_relay_error(int err)
374 {
375 	/* XXX: assume that relay errors match errno codes */
376 	return err < 0 ? -err : GUC_RELAY_ERROR_UNDISCLOSED;
377 }
378 
379 static int from_relay_error(u32 error)
380 {
381 	/* XXX: assume that relay errors match errno codes */
382 	return error ? -error : -ENODATA;
383 }
384 
385 static u32 sanitize_relay_error(u32 error)
386 {
387 	/* XXX TBD if generic error codes will be allowed */
388 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
389 		error = GUC_RELAY_ERROR_UNDISCLOSED;
390 	return error;
391 }
392 
393 static u32 sanitize_relay_error_hint(u32 hint)
394 {
395 	/* XXX TBD if generic error codes will be allowed */
396 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
397 		hint = 0;
398 	return hint;
399 }
400 
401 static u32 prepare_error_reply(u32 *msg, u32 error, u32 hint)
402 {
403 	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
404 		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) |
405 		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) |
406 		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error);
407 
408 	XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_ERROR, error));
409 	XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_HINT, hint));
410 
411 	return GUC_HXG_FAILURE_MSG_LEN;
412 }
413 
414 static void relay_testonly_nop(struct xe_guc_relay *relay)
415 {
416 	KUNIT_STATIC_STUB_REDIRECT(relay_testonly_nop, relay);
417 }
418 
419 static int relay_send_message_and_wait(struct xe_guc_relay *relay,
420 				       struct relay_transaction *txn,
421 				       u32 *buf, u32 buf_size)
422 {
423 	unsigned long timeout = msecs_to_jiffies(RELAY_TIMEOUT_MSEC);
424 	u32 *msg = &txn->request_buf[txn->offset];
425 	u32 len = txn->request_len;
426 	u32 type, action, data0;
427 	int ret;
428 	long n;
429 
430 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
431 	action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
432 	data0 = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
433 
434 	relay_debug(relay, "%s.%u to %u action %#x:%u\n",
435 		    guc_hxg_type_to_string(type),
436 		    txn->rid, txn->remote, action, data0);
437 
438 	/* list ordering does not need to match RID ordering */
439 	spin_lock(&relay->lock);
440 	list_add_tail(&txn->link, &relay->pending_relays);
441 	spin_unlock(&relay->lock);
442 
443 resend:
444 	ret = relay_send_transaction(relay, txn);
445 	if (unlikely(ret < 0))
446 		goto unlink;
447 
448 wait:
449 	n = wait_for_completion_timeout(&txn->done, timeout);
450 	if (unlikely(n == 0 && txn->reply)) {
451 		ret = -ETIME;
452 		goto unlink;
453 	}
454 
455 	relay_debug(relay, "%u.%u reply %d after %u msec\n",
456 		    txn->remote, txn->rid, txn->reply, jiffies_to_msecs(timeout - n));
457 	if (unlikely(txn->reply)) {
458 		reinit_completion(&txn->done);
459 		if (txn->reply == -EAGAIN)
460 			goto resend;
461 		if (txn->reply == -EBUSY) {
462 			relay_testonly_nop(relay);
463 			goto wait;
464 		}
465 		if (txn->reply > 0)
466 			ret = from_relay_error(txn->reply);
467 		else
468 			ret = txn->reply;
469 		goto unlink;
470 	}
471 
472 	relay_debug(relay, "%u.%u response %*ph\n", txn->remote, txn->rid,
473 		    (int)sizeof(u32) * txn->response_len, txn->response);
474 	relay_assert(relay, txn->response_len >= GUC_RELAY_MSG_MIN_LEN);
475 	ret = txn->response_len;
476 
477 unlink:
478 	spin_lock(&relay->lock);
479 	list_del_init(&txn->link);
480 	spin_unlock(&relay->lock);
481 
482 	if (unlikely(ret < 0)) {
483 		relay_notice(relay, "Unsuccessful %s.%u %#x:%u to %u (%pe) %*ph\n",
484 			     guc_hxg_type_to_string(type), txn->rid,
485 			     action, data0, txn->remote, ERR_PTR(ret),
486 			     (int)sizeof(u32) * len, msg);
487 	}
488 
489 	return ret;
490 }
491 
492 static int relay_send_to(struct xe_guc_relay *relay, u32 target,
493 			 const u32 *msg, u32 len, u32 *buf, u32 buf_size)
494 {
495 	struct relay_transaction *txn;
496 	int ret;
497 
498 	relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
499 	relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
500 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_HOST);
501 	relay_assert(relay, guc_hxg_type_is_action(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])));
502 
503 	if (unlikely(!relay_is_ready(relay)))
504 		return -ENODEV;
505 
506 	txn = relay_new_transaction(relay, target, msg, len, buf, buf_size);
507 	if (IS_ERR(txn))
508 		return PTR_ERR(txn);
509 
510 	switch (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])) {
511 	case GUC_HXG_TYPE_REQUEST:
512 		ret = relay_send_message_and_wait(relay, txn, buf, buf_size);
513 		break;
514 	case GUC_HXG_TYPE_FAST_REQUEST:
515 		relay_assert(relay, !GUC_HXG_TYPE_FAST_REQUEST);
516 		fallthrough;
517 	case GUC_HXG_TYPE_EVENT:
518 		ret = relay_send_transaction(relay, txn);
519 		break;
520 	default:
521 		ret = -EINVAL;
522 		break;
523 	}
524 
525 	relay_release_transaction(relay, txn);
526 	return ret;
527 }
528 
529 #ifdef CONFIG_PCI_IOV
530 /**
531  * xe_guc_relay_send_to_vf - Send a message to the VF.
532  * @relay: the &xe_guc_relay which will send the message
533  * @target: target VF number
534  * @msg: request message to be sent
535  * @len: length of the request message (in dwords, can't be 0)
536  * @buf: placeholder for the response message
537  * @buf_size: size of the response message placeholder (in dwords)
538  *
539  * This function can only be used by the driver running in the SR-IOV PF mode.
540  *
541  * Return: Non-negative response length (in dwords) or
542  *         a negative error code on failure.
543  */
544 int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
545 			    const u32 *msg, u32 len, u32 *buf, u32 buf_size)
546 {
547 	relay_assert(relay, IS_SRIOV_PF(relay_to_xe(relay)));
548 
549 	return relay_send_to(relay, target, msg, len, buf, buf_size);
550 }
551 #endif
552 
553 /**
554  * xe_guc_relay_send_to_pf - Send a message to the PF.
555  * @relay: the &xe_guc_relay which will send the message
556  * @msg: request message to be sent
557  * @len: length of the message (in dwords, can't be 0)
558  * @buf: placeholder for the response message
559  * @buf_size: size of the response message placeholder (in dwords)
560  *
561  * This function can only be used by driver running in SR-IOV VF mode.
562  *
563  * Return: Non-negative response length (in dwords) or
564  *         a negative error code on failure.
565  */
566 int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay,
567 			    const u32 *msg, u32 len, u32 *buf, u32 buf_size)
568 {
569 	relay_assert(relay, IS_SRIOV_VF(relay_to_xe(relay)));
570 
571 	return relay_send_to(relay, PFID, msg, len, buf, buf_size);
572 }
573 
574 static int relay_handle_reply(struct xe_guc_relay *relay, u32 origin,
575 			      u32 rid, int reply, const u32 *msg, u32 len)
576 {
577 	struct relay_transaction *pending;
578 	int err = -ESRCH;
579 
580 	spin_lock(&relay->lock);
581 	list_for_each_entry(pending, &relay->pending_relays, link) {
582 		if (pending->remote != origin || pending->rid != rid) {
583 			relay_debug(relay, "%u.%u still awaits response\n",
584 				    pending->remote, pending->rid);
585 			continue;
586 		}
587 		err = 0; /* found! */
588 		if (reply == 0) {
589 			if (len > pending->response_len) {
590 				reply = -ENOBUFS;
591 				err = -ENOBUFS;
592 			} else {
593 				memcpy(pending->response, msg, 4 * len);
594 				pending->response_len = len;
595 			}
596 		}
597 		pending->reply = reply;
598 		complete_all(&pending->done);
599 		break;
600 	}
601 	spin_unlock(&relay->lock);
602 
603 	return err;
604 }
605 
606 static int relay_handle_failure(struct xe_guc_relay *relay, u32 origin,
607 				u32 rid, const u32 *msg, u32 len)
608 {
609 	int error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
610 	u32 hint __maybe_unused = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
611 
612 	relay_assert(relay, len);
613 	relay_debug(relay, "%u.%u error %#x (%pe) hint %u debug %*ph\n",
614 		    origin, rid, error, ERR_PTR(-error), hint, 4 * (len - 1), msg + 1);
615 
616 	return relay_handle_reply(relay, origin, rid, error ?: -EREMOTEIO, NULL, 0);
617 }
618 
619 static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin,
620 					 const u32 *msg, u32 len, u32 *response, u32 size)
621 {
622 	static ktime_t last_reply = 0;
623 	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
624 	u32 action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
625 	u32 opcode = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
626 	ktime_t now = ktime_get();
627 	bool busy;
628 	int ret;
629 
630 	relay_assert(relay, guc_hxg_type_is_action(type));
631 	relay_assert(relay, action == GUC_RELAY_ACTION_VFXPF_TESTLOOP);
632 
633 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
634 		return -ECONNREFUSED;
635 
636 	if (!last_reply)
637 		last_reply = now;
638 	busy = ktime_before(now, ktime_add_ms(last_reply, 2 * RELAY_TIMEOUT_MSEC));
639 	if (!busy)
640 		last_reply = now;
641 
642 	switch (opcode) {
643 	case VFXPF_TESTLOOP_OPCODE_NOP:
644 		if (type == GUC_HXG_TYPE_EVENT)
645 			return 0;
646 		return guc_hxg_msg_encode_success(response, 0);
647 	case VFXPF_TESTLOOP_OPCODE_BUSY:
648 		if (type == GUC_HXG_TYPE_EVENT)
649 			return -EPROTO;
650 		msleep(RELAY_TIMEOUT_MSEC / 8);
651 		if (busy)
652 			return -EINPROGRESS;
653 		return guc_hxg_msg_encode_success(response, 0);
654 	case VFXPF_TESTLOOP_OPCODE_RETRY:
655 		if (type == GUC_HXG_TYPE_EVENT)
656 			return -EPROTO;
657 		msleep(RELAY_TIMEOUT_MSEC / 8);
658 		if (busy)
659 			return guc_hxg_msg_encode_retry(response, 0);
660 		return guc_hxg_msg_encode_success(response, 0);
661 	case VFXPF_TESTLOOP_OPCODE_ECHO:
662 		if (type == GUC_HXG_TYPE_EVENT)
663 			return -EPROTO;
664 		if (size < len)
665 			return -ENOBUFS;
666 		ret = guc_hxg_msg_encode_success(response, len);
667 		memcpy(response + ret, msg + ret, (len - ret) * sizeof(u32));
668 		return len;
669 	case VFXPF_TESTLOOP_OPCODE_FAIL:
670 		return -EHWPOISON;
671 	default:
672 		break;
673 	}
674 
675 	relay_notice(relay, "Unexpected action %#x opcode %#x\n", action, opcode);
676 	return -EBADRQC;
677 }
678 
679 static int relay_action_handler(struct xe_guc_relay *relay, u32 origin,
680 				const u32 *msg, u32 len, u32 *response, u32 size)
681 {
682 	struct xe_gt *gt = relay_to_gt(relay);
683 	u32 type;
684 	int ret;
685 
686 	relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
687 
688 	if (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]) == GUC_RELAY_ACTION_VFXPF_TESTLOOP)
689 		return relay_testloop_action_handler(relay, origin, msg, len, response, size);
690 
691 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
692 
693 	if (IS_SRIOV_PF(relay_to_xe(relay)))
694 		ret = xe_gt_sriov_pf_service_process_request(gt, origin, msg, len, response, size);
695 	else
696 		ret = -EOPNOTSUPP;
697 
698 	if (type == GUC_HXG_TYPE_EVENT)
699 		relay_assert(relay, ret <= 0);
700 
701 	return ret;
702 }
703 
704 static struct relay_transaction *relay_dequeue_transaction(struct xe_guc_relay *relay)
705 {
706 	struct relay_transaction *txn;
707 
708 	spin_lock(&relay->lock);
709 	txn = list_first_entry_or_null(&relay->incoming_actions, struct relay_transaction, link);
710 	if (txn)
711 		list_del_init(&txn->link);
712 	spin_unlock(&relay->lock);
713 
714 	return txn;
715 }
716 
717 static void relay_process_incoming_action(struct xe_guc_relay *relay)
718 {
719 	struct relay_transaction *txn;
720 	bool again = false;
721 	u32 type;
722 	int ret;
723 
724 	txn = relay_dequeue_transaction(relay);
725 	if (!txn)
726 		return;
727 
728 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, txn->request_buf[txn->offset]);
729 
730 	ret = relay_action_handler(relay, txn->remote,
731 				   txn->request_buf + txn->offset, txn->request_len,
732 				   txn->response_buf + txn->offset,
733 				   ARRAY_SIZE(txn->response_buf) - txn->offset);
734 
735 	if (ret == -EINPROGRESS) {
736 		again = true;
737 		ret = guc_hxg_msg_encode_busy(txn->response_buf + txn->offset, 0);
738 	}
739 
740 	if (ret > 0) {
741 		txn->response_len = ret;
742 		ret = relay_send_transaction(relay, txn);
743 	}
744 
745 	if (ret < 0) {
746 		u32 error = to_relay_error(ret);
747 
748 		relay_notice(relay, "Failed to handle %s.%u from %u (%pe) %*ph\n",
749 			     guc_hxg_type_to_string(type), txn->rid, txn->remote,
750 			     ERR_PTR(ret), 4 * txn->request_len, txn->request_buf + txn->offset);
751 
752 		txn->response_len = prepare_error_reply(txn->response_buf + txn->offset,
753 							txn->remote ?
754 							sanitize_relay_error(error) : error,
755 							txn->remote ?
756 							sanitize_relay_error_hint(-ret) : -ret);
757 		ret = relay_send_transaction(relay, txn);
758 		again = false;
759 	}
760 
761 	if (again) {
762 		spin_lock(&relay->lock);
763 		list_add(&txn->link, &relay->incoming_actions);
764 		spin_unlock(&relay->lock);
765 		return;
766 	}
767 
768 	if (unlikely(ret < 0))
769 		relay_notice(relay, "Failed to process action.%u (%pe) %*ph\n",
770 			     txn->rid, ERR_PTR(ret), 4 * txn->request_len,
771 			     txn->request_buf + txn->offset);
772 
773 	relay_release_transaction(relay, txn);
774 }
775 
776 static bool relay_needs_worker(struct xe_guc_relay *relay)
777 {
778 	bool is_empty;
779 
780 	spin_lock(&relay->lock);
781 	is_empty = list_empty(&relay->incoming_actions);
782 	spin_unlock(&relay->lock);
783 
784 	return !is_empty;
785 
786 }
787 
788 static void relay_kick_worker(struct xe_guc_relay *relay)
789 {
790 	KUNIT_STATIC_STUB_REDIRECT(relay_kick_worker, relay);
791 	queue_work(relay_to_xe(relay)->sriov.wq, &relay->worker);
792 }
793 
794 static void relays_worker_fn(struct work_struct *w)
795 {
796 	struct xe_guc_relay *relay = container_of(w, struct xe_guc_relay, worker);
797 
798 	relay_process_incoming_action(relay);
799 
800 	if (relay_needs_worker(relay))
801 		relay_kick_worker(relay);
802 }
803 
804 static int relay_queue_action_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
805 				  const u32 *msg, u32 len)
806 {
807 	struct relay_transaction *txn;
808 
809 	txn = relay_new_incoming_transaction(relay, origin, rid, msg, len);
810 	if (IS_ERR(txn))
811 		return PTR_ERR(txn);
812 
813 	spin_lock(&relay->lock);
814 	list_add_tail(&txn->link, &relay->incoming_actions);
815 	spin_unlock(&relay->lock);
816 
817 	relay_kick_worker(relay);
818 	return 0;
819 }
820 
821 static int relay_process_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
822 			     const u32 *msg, u32 len)
823 {
824 	u32 type;
825 	int err;
826 
827 	if (unlikely(len < GUC_HXG_MSG_MIN_LEN))
828 		return -EPROTO;
829 
830 	if (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) != GUC_HXG_ORIGIN_HOST)
831 		return -EPROTO;
832 
833 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
834 	relay_debug(relay, "received %s.%u from %u = %*ph\n",
835 		    guc_hxg_type_to_string(type), rid, origin, 4 * len, msg);
836 
837 	switch (type) {
838 	case GUC_HXG_TYPE_REQUEST:
839 	case GUC_HXG_TYPE_FAST_REQUEST:
840 	case GUC_HXG_TYPE_EVENT:
841 		err = relay_queue_action_msg(relay, origin, rid, msg, len);
842 		break;
843 	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
844 		err = relay_handle_reply(relay, origin, rid, 0, msg, len);
845 		break;
846 	case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
847 		err = relay_handle_reply(relay, origin, rid, -EBUSY, NULL, 0);
848 		break;
849 	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
850 		err = relay_handle_reply(relay, origin, rid, -EAGAIN, NULL, 0);
851 		break;
852 	case GUC_HXG_TYPE_RESPONSE_FAILURE:
853 		err = relay_handle_failure(relay, origin, rid, msg, len);
854 		break;
855 	default:
856 		err = -EBADRQC;
857 	}
858 
859 	if (unlikely(err))
860 		relay_notice(relay, "Failed to process %s.%u from %u (%pe) %*ph\n",
861 			     guc_hxg_type_to_string(type), rid, origin,
862 			     ERR_PTR(err), 4 * len, msg);
863 
864 	return err;
865 }
866 
867 /**
868  * xe_guc_relay_process_guc2vf - Handle relay notification message from the GuC.
869  * @relay: the &xe_guc_relay which will handle the message
870  * @msg: message to be handled
871  * @len: length of the message (in dwords)
872  *
873  * This function will handle relay messages received from the GuC.
874  *
875  * This function is can only be used if driver is running in SR-IOV mode.
876  *
877  * Return: 0 on success or a negative error code on failure.
878  */
879 int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
880 {
881 	u32 rid;
882 
883 	relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
884 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
885 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
886 	relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
887 		     XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF);
888 
889 	if (unlikely(!IS_SRIOV_VF(relay_to_xe(relay)) && !kunit_get_current_test()))
890 		return -EPERM;
891 
892 	if (unlikely(!relay_is_ready(relay)))
893 		return -ENODEV;
894 
895 	if (unlikely(len < GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN))
896 		return -EPROTO;
897 
898 	if (unlikely(len > GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN))
899 		return -EMSGSIZE;
900 
901 	if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
902 		return -EPFNOSUPPORT;
903 
904 	rid = FIELD_GET(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, msg[1]);
905 
906 	return relay_process_msg(relay, PFID, rid,
907 				 msg + GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN,
908 				 len - GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN);
909 }
910 
911 #ifdef CONFIG_PCI_IOV
912 /**
913  * xe_guc_relay_process_guc2pf - Handle relay notification message from the GuC.
914  * @relay: the &xe_guc_relay which will handle the message
915  * @msg: message to be handled
916  * @len: length of the message (in dwords)
917  *
918  * This function will handle relay messages received from the GuC.
919  *
920  * This function can only be used if driver is running in SR-IOV PF mode.
921  *
922  * Return: 0 on success or a negative error code on failure.
923  */
924 int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
925 {
926 	u32 origin, rid;
927 	int err;
928 
929 	relay_assert(relay, len >= GUC_HXG_EVENT_MSG_MIN_LEN);
930 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
931 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
932 	relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
933 		     XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF);
934 
935 	if (unlikely(!IS_SRIOV_PF(relay_to_xe(relay)) && !kunit_get_current_test()))
936 		return -EPERM;
937 
938 	if (unlikely(!relay_is_ready(relay)))
939 		return -ENODEV;
940 
941 	if (unlikely(len < GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN))
942 		return -EPROTO;
943 
944 	if (unlikely(len > GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN))
945 		return -EMSGSIZE;
946 
947 	if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
948 		return -EPFNOSUPPORT;
949 
950 	origin = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, msg[1]);
951 	rid = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, msg[2]);
952 
953 	if (unlikely(origin > relay_get_totalvfs(relay)))
954 		return -ENOENT;
955 
956 	err = relay_process_msg(relay, origin, rid,
957 				msg + GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN,
958 				len - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN);
959 
960 	return err;
961 }
962 #endif
963 
964 #if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
965 #include "tests/xe_guc_relay_test.c"
966 #endif
967