xref: /linux/drivers/gpu/drm/xe/xe_guc_relay.c (revision f6e8dc9edf963dbc99085e54f6ced6da9daa6100)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5 
6 #include <linux/bitfield.h>
7 #include <linux/delay.h>
8 #include <linux/fault-inject.h>
9 
10 #include <drm/drm_managed.h>
11 
12 #include <kunit/static_stub.h>
13 #include <kunit/test-bug.h>
14 
15 #include "abi/guc_actions_sriov_abi.h"
16 #include "abi/guc_relay_actions_abi.h"
17 #include "abi/guc_relay_communication_abi.h"
18 
19 #include "xe_assert.h"
20 #include "xe_device.h"
21 #include "xe_gt.h"
22 #include "xe_gt_sriov_printk.h"
23 #include "xe_gt_sriov_pf_service.h"
24 #include "xe_guc.h"
25 #include "xe_guc_ct.h"
26 #include "xe_guc_hxg_helpers.h"
27 #include "xe_guc_relay.h"
28 #include "xe_guc_relay_types.h"
29 #include "xe_sriov.h"
30 
31 /*
32  * How long should we wait for the response?
33  * XXX this value is subject for the profiling.
34  */
35 #define RELAY_TIMEOUT_MSEC	(2500)
36 
37 static void relays_worker_fn(struct work_struct *w);
38 
39 static struct xe_guc *relay_to_guc(struct xe_guc_relay *relay)
40 {
41 	return container_of(relay, struct xe_guc, relay);
42 }
43 
44 static struct xe_guc_ct *relay_to_ct(struct xe_guc_relay *relay)
45 {
46 	return &relay_to_guc(relay)->ct;
47 }
48 
49 static struct xe_gt *relay_to_gt(struct xe_guc_relay *relay)
50 {
51 	return guc_to_gt(relay_to_guc(relay));
52 }
53 
54 static struct xe_device *relay_to_xe(struct xe_guc_relay *relay)
55 {
56 	return gt_to_xe(relay_to_gt(relay));
57 }
58 
59 #define XE_RELAY_DIAG_RATELIMIT_INTERVAL	(10 * HZ)
60 #define XE_RELAY_DIAG_RATELIMIT_BURST		10
61 
62 #define relay_ratelimit_printk(relay, _level, fmt...) ({			\
63 	typeof(relay) _r = (relay);						\
64 	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ||				\
65 	    ___ratelimit(&_r->diag_ratelimit, "xe_guc_relay"))			\
66 		xe_gt_sriov_##_level(relay_to_gt(_r), "relay: " fmt);		\
67 })
68 
69 #define relay_assert(relay, condition)	xe_gt_assert(relay_to_gt(relay), condition)
70 #define relay_notice(relay, msg...)	relay_ratelimit_printk((relay), notice, msg)
71 #define relay_debug(relay, msg...)	relay_ratelimit_printk((relay), dbg_verbose, msg)
72 
73 static int relay_get_totalvfs(struct xe_guc_relay *relay)
74 {
75 	struct xe_device *xe = relay_to_xe(relay);
76 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
77 
78 	KUNIT_STATIC_STUB_REDIRECT(relay_get_totalvfs, relay);
79 	return IS_SRIOV_VF(xe) ? 0 : pci_sriov_get_totalvfs(pdev);
80 }
81 
82 static bool relay_is_ready(struct xe_guc_relay *relay)
83 {
84 	return mempool_initialized(&relay->pool);
85 }
86 
87 static u32 relay_get_next_rid(struct xe_guc_relay *relay)
88 {
89 	u32 rid;
90 
91 	spin_lock(&relay->lock);
92 	rid = ++relay->last_rid;
93 	spin_unlock(&relay->lock);
94 
95 	return rid;
96 }
97 
98 /**
99  * struct relay_transaction - internal data used to handle transactions
100  *
101  * Relation between struct relay_transaction members::
102  *
103  *                 <-------------------- GUC_CTB_MAX_DWORDS -------------->
104  *                                  <-------- GUC_RELAY_MSG_MAX_LEN --->
105  *                 <--- offset ---> <--- request_len ------->
106  *                +----------------+-------------------------+----------+--+
107  *                |                |                         |          |  |
108  *                +----------------+-------------------------+----------+--+
109  *                ^                ^
110  *               /                /
111  *    request_buf          request
112  *
113  *                 <-------------------- GUC_CTB_MAX_DWORDS -------------->
114  *                                  <-------- GUC_RELAY_MSG_MAX_LEN --->
115  *                 <--- offset ---> <--- response_len --->
116  *                +----------------+----------------------+-------------+--+
117  *                |                |                      |             |  |
118  *                +----------------+----------------------+-------------+--+
119  *                ^                ^
120  *               /                /
121  *   response_buf         response
122  */
123 struct relay_transaction {
124 	/**
125 	 * @incoming: indicates whether this transaction represents an incoming
126 	 *            request from the remote VF/PF or this transaction
127 	 *            represents outgoing request to the remote VF/PF.
128 	 */
129 	bool incoming;
130 
131 	/**
132 	 * @remote: PF/VF identifier of the origin (or target) of the relay
133 	 *          request message.
134 	 */
135 	u32 remote;
136 
137 	/** @rid: identifier of the VF/PF relay message. */
138 	u32 rid;
139 
140 	/**
141 	 * @request: points to the inner VF/PF request message, copied to the
142 	 *           #response_buf starting at #offset.
143 	 */
144 	u32 *request;
145 
146 	/** @request_len: length of the inner VF/PF request message. */
147 	u32 request_len;
148 
149 	/**
150 	 * @response: points to the placeholder buffer where inner VF/PF
151 	 *            response will be located, for outgoing transaction
152 	 *            this could be caller's buffer (if provided) otherwise
153 	 *            it points to the #response_buf starting at #offset.
154 	 */
155 	u32 *response;
156 
157 	/**
158 	 * @response_len: length of the inner VF/PF response message (only
159 	 *                if #status is 0), initially set to the size of the
160 	 *                placeholder buffer where response message will be
161 	 *                copied.
162 	 */
163 	u32 response_len;
164 
165 	/**
166 	 * @offset: offset to the start of the inner VF/PF relay message inside
167 	 *          buffers; this offset is equal the length of the outer GuC
168 	 *          relay header message.
169 	 */
170 	u32 offset;
171 
172 	/**
173 	 * @request_buf: buffer with VF/PF request message including outer
174 	 *               transport message.
175 	 */
176 	u32 request_buf[GUC_CTB_MAX_DWORDS];
177 
178 	/**
179 	 * @response_buf: buffer with VF/PF response message including outer
180 	 *                transport message.
181 	 */
182 	u32 response_buf[GUC_CTB_MAX_DWORDS];
183 
184 	/**
185 	 * @reply: status of the reply, 0 means that data pointed by the
186 	 *         #response is valid.
187 	 */
188 	int reply;
189 
190 	/** @done: completion of the outgoing transaction. */
191 	struct completion done;
192 
193 	/** @link: transaction list link */
194 	struct list_head link;
195 };
196 
197 static u32 prepare_pf2guc(u32 *msg, u32 target, u32 rid)
198 {
199 	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
200 		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
201 		 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF);
202 	msg[1] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, target);
203 	msg[2] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, rid);
204 
205 	return PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN;
206 }
207 
208 static u32 prepare_vf2guc(u32 *msg, u32 rid)
209 {
210 	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
211 		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
212 		 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_VF2GUC_RELAY_TO_PF);
213 	msg[1] = FIELD_PREP(VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID, rid);
214 
215 	return VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN;
216 }
217 
218 static struct relay_transaction *
219 __relay_get_transaction(struct xe_guc_relay *relay, bool incoming, u32 remote, u32 rid,
220 			const u32 *action, u32 action_len, u32 *resp, u32 resp_size)
221 {
222 	struct relay_transaction *txn;
223 
224 	relay_assert(relay, action_len >= GUC_RELAY_MSG_MIN_LEN);
225 	relay_assert(relay, action_len <= GUC_RELAY_MSG_MAX_LEN);
226 	relay_assert(relay, !(!!resp ^ !!resp_size));
227 	relay_assert(relay, resp_size <= GUC_RELAY_MSG_MAX_LEN);
228 	relay_assert(relay, resp_size == 0 || resp_size >= GUC_RELAY_MSG_MIN_LEN);
229 
230 	if (unlikely(!relay_is_ready(relay)))
231 		return ERR_PTR(-ENODEV);
232 
233 	/*
234 	 * For incoming requests we can't use GFP_KERNEL as those are delivered
235 	 * with CTB lock held which is marked as used in the reclaim path.
236 	 * Btw, that's one of the reason why we use mempool here!
237 	 */
238 	txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_NOWAIT);
239 	if (!txn)
240 		return ERR_PTR(-ENOMEM);
241 
242 	txn->incoming = incoming;
243 	txn->remote = remote;
244 	txn->rid = rid;
245 	txn->offset = remote ?
246 		prepare_pf2guc(incoming ? txn->response_buf : txn->request_buf, remote, rid) :
247 		prepare_vf2guc(incoming ? txn->response_buf : txn->request_buf, rid);
248 
249 	relay_assert(relay, txn->offset);
250 	relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->request_buf));
251 	relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->response_buf));
252 
253 	txn->request = txn->request_buf + txn->offset;
254 	memcpy(&txn->request_buf[txn->offset], action, sizeof(u32) * action_len);
255 	txn->request_len = action_len;
256 
257 	txn->response = resp ?: txn->response_buf + txn->offset;
258 	txn->response_len = resp_size ?: GUC_RELAY_MSG_MAX_LEN;
259 	txn->reply = -ENOMSG;
260 	INIT_LIST_HEAD(&txn->link);
261 	init_completion(&txn->done);
262 
263 	return txn;
264 }
265 
266 static struct relay_transaction *
267 relay_new_transaction(struct xe_guc_relay *relay, u32 target, const u32 *action, u32 len,
268 		      u32 *resp, u32 resp_size)
269 {
270 	u32 rid = relay_get_next_rid(relay);
271 
272 	return __relay_get_transaction(relay, false, target, rid, action, len, resp, resp_size);
273 }
274 
275 static struct relay_transaction *
276 relay_new_incoming_transaction(struct xe_guc_relay *relay, u32 origin, u32 rid,
277 			       const u32 *action, u32 len)
278 {
279 	return __relay_get_transaction(relay, true, origin, rid, action, len, NULL, 0);
280 }
281 
282 static void relay_release_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
283 {
284 	relay_assert(relay, list_empty(&txn->link));
285 
286 	txn->offset = 0;
287 	txn->response = NULL;
288 	txn->reply = -ESTALE;
289 	mempool_free(txn, &relay->pool);
290 }
291 
292 static int relay_send_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
293 {
294 	u32 len = txn->incoming ? txn->response_len : txn->request_len;
295 	u32 *buf = txn->incoming ? txn->response_buf : txn->request_buf;
296 	u32 *msg = buf + txn->offset;
297 	int ret;
298 
299 	relay_assert(relay, txn->offset);
300 	relay_assert(relay, txn->offset + len <= GUC_CTB_MAX_DWORDS);
301 	relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
302 	relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
303 
304 	relay_debug(relay, "sending %s.%u to %u = %*ph\n",
305 		    guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
306 		    txn->rid, txn->remote, (int)sizeof(u32) * len, msg);
307 
308 	ret = xe_guc_ct_send_block(relay_to_ct(relay), buf, len + txn->offset);
309 
310 	if (unlikely(ret > 0)) {
311 		relay_notice(relay, "Unexpected data=%d from GuC, wrong ABI?\n", ret);
312 		ret = -EPROTO;
313 	}
314 	if (unlikely(ret < 0)) {
315 		relay_notice(relay, "Failed to send %s.%x to GuC (%pe) %*ph ...\n",
316 			     guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, buf[0])),
317 			     FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, buf[0]),
318 			     ERR_PTR(ret), (int)sizeof(u32) * txn->offset, buf);
319 		relay_notice(relay, "Failed to send %s.%u to %u (%pe) %*ph\n",
320 			     guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
321 			     txn->rid, txn->remote, ERR_PTR(ret), (int)sizeof(u32) * len, msg);
322 	}
323 
324 	return ret;
325 }
326 
327 static void __fini_relay(struct drm_device *drm, void *arg)
328 {
329 	struct xe_guc_relay *relay = arg;
330 
331 	mempool_exit(&relay->pool);
332 }
333 
334 /**
335  * xe_guc_relay_init - Initialize a &xe_guc_relay
336  * @relay: the &xe_guc_relay to initialize
337  *
338  * Initialize remaining members of &xe_guc_relay that may depend
339  * on the SR-IOV mode.
340  *
341  * Return: 0 on success or a negative error code on failure.
342  */
343 int xe_guc_relay_init(struct xe_guc_relay *relay)
344 {
345 	const int XE_RELAY_MEMPOOL_MIN_NUM = 1;
346 	struct xe_device *xe = relay_to_xe(relay);
347 	int err;
348 
349 	relay_assert(relay, !relay_is_ready(relay));
350 
351 	if (!IS_SRIOV(xe))
352 		return 0;
353 
354 	spin_lock_init(&relay->lock);
355 	INIT_WORK(&relay->worker, relays_worker_fn);
356 	INIT_LIST_HEAD(&relay->pending_relays);
357 	INIT_LIST_HEAD(&relay->incoming_actions);
358 	ratelimit_state_init(&relay->diag_ratelimit,
359 			     XE_RELAY_DIAG_RATELIMIT_INTERVAL,
360 			     XE_RELAY_DIAG_RATELIMIT_BURST);
361 
362 	err = mempool_init_kmalloc_pool(&relay->pool, XE_RELAY_MEMPOOL_MIN_NUM +
363 					relay_get_totalvfs(relay),
364 					sizeof(struct relay_transaction));
365 	if (err)
366 		return err;
367 
368 	relay_debug(relay, "using mempool with %d elements\n", relay->pool.min_nr);
369 
370 	return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay);
371 }
372 ALLOW_ERROR_INJECTION(xe_guc_relay_init, ERRNO); /* See xe_pci_probe() */
373 
374 static u32 to_relay_error(int err)
375 {
376 	/* XXX: assume that relay errors match errno codes */
377 	return err < 0 ? -err : GUC_RELAY_ERROR_UNDISCLOSED;
378 }
379 
380 static int from_relay_error(u32 error)
381 {
382 	/* XXX: assume that relay errors match errno codes */
383 	return error ? -error : -ENODATA;
384 }
385 
386 static u32 sanitize_relay_error(u32 error)
387 {
388 	/* XXX TBD if generic error codes will be allowed */
389 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
390 		error = GUC_RELAY_ERROR_UNDISCLOSED;
391 	return error;
392 }
393 
394 static u32 sanitize_relay_error_hint(u32 hint)
395 {
396 	/* XXX TBD if generic error codes will be allowed */
397 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
398 		hint = 0;
399 	return hint;
400 }
401 
402 static u32 prepare_error_reply(u32 *msg, u32 error, u32 hint)
403 {
404 	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
405 		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) |
406 		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) |
407 		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error);
408 
409 	XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_ERROR, error));
410 	XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_HINT, hint));
411 
412 	return GUC_HXG_FAILURE_MSG_LEN;
413 }
414 
415 static void relay_testonly_nop(struct xe_guc_relay *relay)
416 {
417 	KUNIT_STATIC_STUB_REDIRECT(relay_testonly_nop, relay);
418 }
419 
420 static int relay_send_message_and_wait(struct xe_guc_relay *relay,
421 				       struct relay_transaction *txn,
422 				       u32 *buf, u32 buf_size)
423 {
424 	unsigned long timeout = msecs_to_jiffies(RELAY_TIMEOUT_MSEC);
425 	u32 *msg = &txn->request_buf[txn->offset];
426 	u32 len = txn->request_len;
427 	u32 type, action, data0;
428 	int ret;
429 	long n;
430 
431 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
432 	action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
433 	data0 = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
434 
435 	relay_debug(relay, "%s.%u to %u action %#x:%u\n",
436 		    guc_hxg_type_to_string(type),
437 		    txn->rid, txn->remote, action, data0);
438 
439 	/* list ordering does not need to match RID ordering */
440 	spin_lock(&relay->lock);
441 	list_add_tail(&txn->link, &relay->pending_relays);
442 	spin_unlock(&relay->lock);
443 
444 resend:
445 	ret = relay_send_transaction(relay, txn);
446 	if (unlikely(ret < 0))
447 		goto unlink;
448 
449 wait:
450 	n = wait_for_completion_timeout(&txn->done, timeout);
451 	if (unlikely(n == 0 && txn->reply)) {
452 		ret = -ETIME;
453 		goto unlink;
454 	}
455 
456 	relay_debug(relay, "%u.%u reply %d after %u msec\n",
457 		    txn->remote, txn->rid, txn->reply, jiffies_to_msecs(timeout - n));
458 	if (unlikely(txn->reply)) {
459 		reinit_completion(&txn->done);
460 		if (txn->reply == -EAGAIN)
461 			goto resend;
462 		if (txn->reply == -EBUSY) {
463 			relay_testonly_nop(relay);
464 			goto wait;
465 		}
466 		if (txn->reply > 0)
467 			ret = from_relay_error(txn->reply);
468 		else
469 			ret = txn->reply;
470 		goto unlink;
471 	}
472 
473 	relay_debug(relay, "%u.%u response %*ph\n", txn->remote, txn->rid,
474 		    (int)sizeof(u32) * txn->response_len, txn->response);
475 	relay_assert(relay, txn->response_len >= GUC_RELAY_MSG_MIN_LEN);
476 	ret = txn->response_len;
477 
478 unlink:
479 	spin_lock(&relay->lock);
480 	list_del_init(&txn->link);
481 	spin_unlock(&relay->lock);
482 
483 	if (unlikely(ret < 0)) {
484 		relay_notice(relay, "Unsuccessful %s.%u %#x:%u to %u (%pe) %*ph\n",
485 			     guc_hxg_type_to_string(type), txn->rid,
486 			     action, data0, txn->remote, ERR_PTR(ret),
487 			     (int)sizeof(u32) * len, msg);
488 	}
489 
490 	return ret;
491 }
492 
493 static int relay_send_to(struct xe_guc_relay *relay, u32 target,
494 			 const u32 *msg, u32 len, u32 *buf, u32 buf_size)
495 {
496 	struct relay_transaction *txn;
497 	int ret;
498 
499 	relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
500 	relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
501 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_HOST);
502 	relay_assert(relay, guc_hxg_type_is_action(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])));
503 
504 	if (unlikely(!relay_is_ready(relay)))
505 		return -ENODEV;
506 
507 	txn = relay_new_transaction(relay, target, msg, len, buf, buf_size);
508 	if (IS_ERR(txn))
509 		return PTR_ERR(txn);
510 
511 	switch (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])) {
512 	case GUC_HXG_TYPE_REQUEST:
513 		ret = relay_send_message_and_wait(relay, txn, buf, buf_size);
514 		break;
515 	case GUC_HXG_TYPE_FAST_REQUEST:
516 		relay_assert(relay, !GUC_HXG_TYPE_FAST_REQUEST);
517 		fallthrough;
518 	case GUC_HXG_TYPE_EVENT:
519 		ret = relay_send_transaction(relay, txn);
520 		break;
521 	default:
522 		ret = -EINVAL;
523 		break;
524 	}
525 
526 	relay_release_transaction(relay, txn);
527 	return ret;
528 }
529 
530 #ifdef CONFIG_PCI_IOV
531 /**
532  * xe_guc_relay_send_to_vf - Send a message to the VF.
533  * @relay: the &xe_guc_relay which will send the message
534  * @target: target VF number
535  * @msg: request message to be sent
536  * @len: length of the request message (in dwords, can't be 0)
537  * @buf: placeholder for the response message
538  * @buf_size: size of the response message placeholder (in dwords)
539  *
540  * This function can only be used by the driver running in the SR-IOV PF mode.
541  *
542  * Return: Non-negative response length (in dwords) or
543  *         a negative error code on failure.
544  */
545 int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
546 			    const u32 *msg, u32 len, u32 *buf, u32 buf_size)
547 {
548 	relay_assert(relay, IS_SRIOV_PF(relay_to_xe(relay)));
549 
550 	return relay_send_to(relay, target, msg, len, buf, buf_size);
551 }
552 #endif
553 
554 /**
555  * xe_guc_relay_send_to_pf - Send a message to the PF.
556  * @relay: the &xe_guc_relay which will send the message
557  * @msg: request message to be sent
558  * @len: length of the message (in dwords, can't be 0)
559  * @buf: placeholder for the response message
560  * @buf_size: size of the response message placeholder (in dwords)
561  *
562  * This function can only be used by driver running in SR-IOV VF mode.
563  *
564  * Return: Non-negative response length (in dwords) or
565  *         a negative error code on failure.
566  */
567 int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay,
568 			    const u32 *msg, u32 len, u32 *buf, u32 buf_size)
569 {
570 	relay_assert(relay, IS_SRIOV_VF(relay_to_xe(relay)));
571 
572 	return relay_send_to(relay, PFID, msg, len, buf, buf_size);
573 }
574 
575 static int relay_handle_reply(struct xe_guc_relay *relay, u32 origin,
576 			      u32 rid, int reply, const u32 *msg, u32 len)
577 {
578 	struct relay_transaction *pending;
579 	int err = -ESRCH;
580 
581 	spin_lock(&relay->lock);
582 	list_for_each_entry(pending, &relay->pending_relays, link) {
583 		if (pending->remote != origin || pending->rid != rid) {
584 			relay_debug(relay, "%u.%u still awaits response\n",
585 				    pending->remote, pending->rid);
586 			continue;
587 		}
588 		err = 0; /* found! */
589 		if (reply == 0) {
590 			if (len > pending->response_len) {
591 				reply = -ENOBUFS;
592 				err = -ENOBUFS;
593 			} else {
594 				memcpy(pending->response, msg, 4 * len);
595 				pending->response_len = len;
596 			}
597 		}
598 		pending->reply = reply;
599 		complete_all(&pending->done);
600 		break;
601 	}
602 	spin_unlock(&relay->lock);
603 
604 	return err;
605 }
606 
607 static int relay_handle_failure(struct xe_guc_relay *relay, u32 origin,
608 				u32 rid, const u32 *msg, u32 len)
609 {
610 	int error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
611 	u32 hint __maybe_unused = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
612 
613 	relay_assert(relay, len);
614 	relay_debug(relay, "%u.%u error %#x (%pe) hint %u debug %*ph\n",
615 		    origin, rid, error, ERR_PTR(-error), hint, 4 * (len - 1), msg + 1);
616 
617 	return relay_handle_reply(relay, origin, rid, error ?: -EREMOTEIO, NULL, 0);
618 }
619 
620 static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin,
621 					 const u32 *msg, u32 len, u32 *response, u32 size)
622 {
623 	static ktime_t last_reply = 0;
624 	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
625 	u32 action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
626 	u32 opcode = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
627 	ktime_t now = ktime_get();
628 	bool busy;
629 	int ret;
630 
631 	relay_assert(relay, guc_hxg_type_is_action(type));
632 	relay_assert(relay, action == GUC_RELAY_ACTION_VFXPF_TESTLOOP);
633 
634 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
635 		return -ECONNREFUSED;
636 
637 	if (!last_reply)
638 		last_reply = now;
639 	busy = ktime_before(now, ktime_add_ms(last_reply, 2 * RELAY_TIMEOUT_MSEC));
640 	if (!busy)
641 		last_reply = now;
642 
643 	switch (opcode) {
644 	case VFXPF_TESTLOOP_OPCODE_NOP:
645 		if (type == GUC_HXG_TYPE_EVENT)
646 			return 0;
647 		return guc_hxg_msg_encode_success(response, 0);
648 	case VFXPF_TESTLOOP_OPCODE_BUSY:
649 		if (type == GUC_HXG_TYPE_EVENT)
650 			return -EPROTO;
651 		msleep(RELAY_TIMEOUT_MSEC / 8);
652 		if (busy)
653 			return -EINPROGRESS;
654 		return guc_hxg_msg_encode_success(response, 0);
655 	case VFXPF_TESTLOOP_OPCODE_RETRY:
656 		if (type == GUC_HXG_TYPE_EVENT)
657 			return -EPROTO;
658 		msleep(RELAY_TIMEOUT_MSEC / 8);
659 		if (busy)
660 			return guc_hxg_msg_encode_retry(response, 0);
661 		return guc_hxg_msg_encode_success(response, 0);
662 	case VFXPF_TESTLOOP_OPCODE_ECHO:
663 		if (type == GUC_HXG_TYPE_EVENT)
664 			return -EPROTO;
665 		if (size < len)
666 			return -ENOBUFS;
667 		ret = guc_hxg_msg_encode_success(response, len);
668 		memcpy(response + ret, msg + ret, (len - ret) * sizeof(u32));
669 		return len;
670 	case VFXPF_TESTLOOP_OPCODE_FAIL:
671 		return -EHWPOISON;
672 	default:
673 		break;
674 	}
675 
676 	relay_notice(relay, "Unexpected action %#x opcode %#x\n", action, opcode);
677 	return -EBADRQC;
678 }
679 
680 static int relay_action_handler(struct xe_guc_relay *relay, u32 origin,
681 				const u32 *msg, u32 len, u32 *response, u32 size)
682 {
683 	struct xe_gt *gt = relay_to_gt(relay);
684 	u32 type;
685 	int ret;
686 
687 	relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
688 
689 	if (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]) == GUC_RELAY_ACTION_VFXPF_TESTLOOP)
690 		return relay_testloop_action_handler(relay, origin, msg, len, response, size);
691 
692 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
693 
694 	if (IS_SRIOV_PF(relay_to_xe(relay)))
695 		ret = xe_gt_sriov_pf_service_process_request(gt, origin, msg, len, response, size);
696 	else
697 		ret = -EOPNOTSUPP;
698 
699 	if (type == GUC_HXG_TYPE_EVENT)
700 		relay_assert(relay, ret <= 0);
701 
702 	return ret;
703 }
704 
705 static struct relay_transaction *relay_dequeue_transaction(struct xe_guc_relay *relay)
706 {
707 	struct relay_transaction *txn;
708 
709 	spin_lock(&relay->lock);
710 	txn = list_first_entry_or_null(&relay->incoming_actions, struct relay_transaction, link);
711 	if (txn)
712 		list_del_init(&txn->link);
713 	spin_unlock(&relay->lock);
714 
715 	return txn;
716 }
717 
718 static void relay_process_incoming_action(struct xe_guc_relay *relay)
719 {
720 	struct relay_transaction *txn;
721 	bool again = false;
722 	u32 type;
723 	int ret;
724 
725 	txn = relay_dequeue_transaction(relay);
726 	if (!txn)
727 		return;
728 
729 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, txn->request_buf[txn->offset]);
730 
731 	ret = relay_action_handler(relay, txn->remote,
732 				   txn->request_buf + txn->offset, txn->request_len,
733 				   txn->response_buf + txn->offset,
734 				   ARRAY_SIZE(txn->response_buf) - txn->offset);
735 
736 	if (ret == -EINPROGRESS) {
737 		again = true;
738 		ret = guc_hxg_msg_encode_busy(txn->response_buf + txn->offset, 0);
739 	}
740 
741 	if (ret > 0) {
742 		txn->response_len = ret;
743 		ret = relay_send_transaction(relay, txn);
744 	}
745 
746 	if (ret < 0) {
747 		u32 error = to_relay_error(ret);
748 
749 		relay_notice(relay, "Failed to handle %s.%u from %u (%pe) %*ph\n",
750 			     guc_hxg_type_to_string(type), txn->rid, txn->remote,
751 			     ERR_PTR(ret), 4 * txn->request_len, txn->request_buf + txn->offset);
752 
753 		txn->response_len = prepare_error_reply(txn->response_buf + txn->offset,
754 							txn->remote ?
755 							sanitize_relay_error(error) : error,
756 							txn->remote ?
757 							sanitize_relay_error_hint(-ret) : -ret);
758 		ret = relay_send_transaction(relay, txn);
759 		again = false;
760 	}
761 
762 	if (again) {
763 		spin_lock(&relay->lock);
764 		list_add(&txn->link, &relay->incoming_actions);
765 		spin_unlock(&relay->lock);
766 		return;
767 	}
768 
769 	if (unlikely(ret < 0))
770 		relay_notice(relay, "Failed to process action.%u (%pe) %*ph\n",
771 			     txn->rid, ERR_PTR(ret), 4 * txn->request_len,
772 			     txn->request_buf + txn->offset);
773 
774 	relay_release_transaction(relay, txn);
775 }
776 
777 static bool relay_needs_worker(struct xe_guc_relay *relay)
778 {
779 	bool is_empty;
780 
781 	spin_lock(&relay->lock);
782 	is_empty = list_empty(&relay->incoming_actions);
783 	spin_unlock(&relay->lock);
784 
785 	return !is_empty;
786 
787 }
788 
789 static void relay_kick_worker(struct xe_guc_relay *relay)
790 {
791 	KUNIT_STATIC_STUB_REDIRECT(relay_kick_worker, relay);
792 	queue_work(relay_to_xe(relay)->sriov.wq, &relay->worker);
793 }
794 
795 static void relays_worker_fn(struct work_struct *w)
796 {
797 	struct xe_guc_relay *relay = container_of(w, struct xe_guc_relay, worker);
798 
799 	relay_process_incoming_action(relay);
800 
801 	if (relay_needs_worker(relay))
802 		relay_kick_worker(relay);
803 }
804 
805 static int relay_queue_action_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
806 				  const u32 *msg, u32 len)
807 {
808 	struct relay_transaction *txn;
809 
810 	txn = relay_new_incoming_transaction(relay, origin, rid, msg, len);
811 	if (IS_ERR(txn))
812 		return PTR_ERR(txn);
813 
814 	spin_lock(&relay->lock);
815 	list_add_tail(&txn->link, &relay->incoming_actions);
816 	spin_unlock(&relay->lock);
817 
818 	relay_kick_worker(relay);
819 	return 0;
820 }
821 
822 static int relay_process_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
823 			     const u32 *msg, u32 len)
824 {
825 	u32 type;
826 	int err;
827 
828 	if (unlikely(len < GUC_HXG_MSG_MIN_LEN))
829 		return -EPROTO;
830 
831 	if (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) != GUC_HXG_ORIGIN_HOST)
832 		return -EPROTO;
833 
834 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
835 	relay_debug(relay, "received %s.%u from %u = %*ph\n",
836 		    guc_hxg_type_to_string(type), rid, origin, 4 * len, msg);
837 
838 	switch (type) {
839 	case GUC_HXG_TYPE_REQUEST:
840 	case GUC_HXG_TYPE_FAST_REQUEST:
841 	case GUC_HXG_TYPE_EVENT:
842 		err = relay_queue_action_msg(relay, origin, rid, msg, len);
843 		break;
844 	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
845 		err = relay_handle_reply(relay, origin, rid, 0, msg, len);
846 		break;
847 	case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
848 		err = relay_handle_reply(relay, origin, rid, -EBUSY, NULL, 0);
849 		break;
850 	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
851 		err = relay_handle_reply(relay, origin, rid, -EAGAIN, NULL, 0);
852 		break;
853 	case GUC_HXG_TYPE_RESPONSE_FAILURE:
854 		err = relay_handle_failure(relay, origin, rid, msg, len);
855 		break;
856 	default:
857 		err = -EBADRQC;
858 	}
859 
860 	if (unlikely(err))
861 		relay_notice(relay, "Failed to process %s.%u from %u (%pe) %*ph\n",
862 			     guc_hxg_type_to_string(type), rid, origin,
863 			     ERR_PTR(err), 4 * len, msg);
864 
865 	return err;
866 }
867 
868 /**
869  * xe_guc_relay_process_guc2vf - Handle relay notification message from the GuC.
870  * @relay: the &xe_guc_relay which will handle the message
871  * @msg: message to be handled
872  * @len: length of the message (in dwords)
873  *
874  * This function will handle relay messages received from the GuC.
875  *
876  * This function is can only be used if driver is running in SR-IOV mode.
877  *
878  * Return: 0 on success or a negative error code on failure.
879  */
880 int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
881 {
882 	u32 rid;
883 
884 	relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
885 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
886 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
887 	relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
888 		     XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF);
889 
890 	if (unlikely(!IS_SRIOV_VF(relay_to_xe(relay)) && !kunit_get_current_test()))
891 		return -EPERM;
892 
893 	if (unlikely(!relay_is_ready(relay)))
894 		return -ENODEV;
895 
896 	if (unlikely(len < GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN))
897 		return -EPROTO;
898 
899 	if (unlikely(len > GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN))
900 		return -EMSGSIZE;
901 
902 	if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
903 		return -EPFNOSUPPORT;
904 
905 	rid = FIELD_GET(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, msg[1]);
906 
907 	return relay_process_msg(relay, PFID, rid,
908 				 msg + GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN,
909 				 len - GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN);
910 }
911 
912 #ifdef CONFIG_PCI_IOV
913 /**
914  * xe_guc_relay_process_guc2pf - Handle relay notification message from the GuC.
915  * @relay: the &xe_guc_relay which will handle the message
916  * @msg: message to be handled
917  * @len: length of the message (in dwords)
918  *
919  * This function will handle relay messages received from the GuC.
920  *
921  * This function can only be used if driver is running in SR-IOV PF mode.
922  *
923  * Return: 0 on success or a negative error code on failure.
924  */
925 int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
926 {
927 	u32 origin, rid;
928 	int err;
929 
930 	relay_assert(relay, len >= GUC_HXG_EVENT_MSG_MIN_LEN);
931 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
932 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
933 	relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
934 		     XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF);
935 
936 	if (unlikely(!IS_SRIOV_PF(relay_to_xe(relay)) && !kunit_get_current_test()))
937 		return -EPERM;
938 
939 	if (unlikely(!relay_is_ready(relay)))
940 		return -ENODEV;
941 
942 	if (unlikely(len < GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN))
943 		return -EPROTO;
944 
945 	if (unlikely(len > GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN))
946 		return -EMSGSIZE;
947 
948 	if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
949 		return -EPFNOSUPPORT;
950 
951 	origin = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, msg[1]);
952 	rid = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, msg[2]);
953 
954 	if (unlikely(origin > relay_get_totalvfs(relay)))
955 		return -ENOENT;
956 
957 	err = relay_process_msg(relay, origin, rid,
958 				msg + GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN,
959 				len - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN);
960 
961 	return err;
962 }
963 #endif
964 
965 #if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
966 #include "tests/xe_guc_relay_test.c"
967 #endif
968