xref: /linux/drivers/gpu/drm/xe/xe_guc_relay.c (revision 79d2e1919a2728ef49d938eb20ebd5903c14dfb0)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5 
6 #include <linux/bitfield.h>
7 #include <linux/delay.h>
8 #include <linux/fault-inject.h>
9 
10 #include <drm/drm_managed.h>
11 
12 #include <kunit/static_stub.h>
13 #include <kunit/test-bug.h>
14 
15 #include "abi/guc_actions_sriov_abi.h"
16 #include "abi/guc_relay_actions_abi.h"
17 #include "abi/guc_relay_communication_abi.h"
18 
19 #include "xe_assert.h"
20 #include "xe_device.h"
21 #include "xe_gt.h"
22 #include "xe_gt_sriov_printk.h"
23 #include "xe_gt_sriov_pf_service.h"
24 #include "xe_guc.h"
25 #include "xe_guc_ct.h"
26 #include "xe_guc_hxg_helpers.h"
27 #include "xe_guc_relay.h"
28 #include "xe_guc_relay_types.h"
29 #include "xe_sriov.h"
30 
31 /*
32  * How long should we wait for the response?
33  * XXX this value is subject for the profiling.
34  */
35 #define RELAY_TIMEOUT_MSEC	(2500)
36 
37 static void relays_worker_fn(struct work_struct *w);
38 
39 static struct xe_guc *relay_to_guc(struct xe_guc_relay *relay)
40 {
41 	return container_of(relay, struct xe_guc, relay);
42 }
43 
44 static struct xe_guc_ct *relay_to_ct(struct xe_guc_relay *relay)
45 {
46 	return &relay_to_guc(relay)->ct;
47 }
48 
49 static struct xe_gt *relay_to_gt(struct xe_guc_relay *relay)
50 {
51 	return guc_to_gt(relay_to_guc(relay));
52 }
53 
54 static struct xe_device *relay_to_xe(struct xe_guc_relay *relay)
55 {
56 	return gt_to_xe(relay_to_gt(relay));
57 }
58 
59 #define relay_assert(relay, condition)	xe_gt_assert(relay_to_gt(relay), condition)
60 #define relay_notice(relay, msg...)	xe_gt_sriov_notice(relay_to_gt(relay), "relay: " msg)
61 #define relay_debug(relay, msg...)	xe_gt_sriov_dbg_verbose(relay_to_gt(relay), "relay: " msg)
62 
63 static int relay_get_totalvfs(struct xe_guc_relay *relay)
64 {
65 	struct xe_device *xe = relay_to_xe(relay);
66 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
67 
68 	KUNIT_STATIC_STUB_REDIRECT(relay_get_totalvfs, relay);
69 	return IS_SRIOV_VF(xe) ? 0 : pci_sriov_get_totalvfs(pdev);
70 }
71 
72 static bool relay_is_ready(struct xe_guc_relay *relay)
73 {
74 	return mempool_initialized(&relay->pool);
75 }
76 
77 static u32 relay_get_next_rid(struct xe_guc_relay *relay)
78 {
79 	u32 rid;
80 
81 	spin_lock(&relay->lock);
82 	rid = ++relay->last_rid;
83 	spin_unlock(&relay->lock);
84 
85 	return rid;
86 }
87 
88 /**
89  * struct relay_transaction - internal data used to handle transactions
90  *
91  * Relation between struct relay_transaction members::
92  *
93  *                 <-------------------- GUC_CTB_MAX_DWORDS -------------->
94  *                                  <-------- GUC_RELAY_MSG_MAX_LEN --->
95  *                 <--- offset ---> <--- request_len ------->
96  *                +----------------+-------------------------+----------+--+
97  *                |                |                         |          |  |
98  *                +----------------+-------------------------+----------+--+
99  *                ^                ^
100  *               /                /
101  *    request_buf          request
102  *
103  *                 <-------------------- GUC_CTB_MAX_DWORDS -------------->
104  *                                  <-------- GUC_RELAY_MSG_MAX_LEN --->
105  *                 <--- offset ---> <--- response_len --->
106  *                +----------------+----------------------+-------------+--+
107  *                |                |                      |             |  |
108  *                +----------------+----------------------+-------------+--+
109  *                ^                ^
110  *               /                /
111  *   response_buf         response
112  */
113 struct relay_transaction {
114 	/**
115 	 * @incoming: indicates whether this transaction represents an incoming
116 	 *            request from the remote VF/PF or this transaction
117 	 *            represents outgoing request to the remote VF/PF.
118 	 */
119 	bool incoming;
120 
121 	/**
122 	 * @remote: PF/VF identifier of the origin (or target) of the relay
123 	 *          request message.
124 	 */
125 	u32 remote;
126 
127 	/** @rid: identifier of the VF/PF relay message. */
128 	u32 rid;
129 
130 	/**
131 	 * @request: points to the inner VF/PF request message, copied to the
132 	 *           #response_buf starting at #offset.
133 	 */
134 	u32 *request;
135 
136 	/** @request_len: length of the inner VF/PF request message. */
137 	u32 request_len;
138 
139 	/**
140 	 * @response: points to the placeholder buffer where inner VF/PF
141 	 *            response will be located, for outgoing transaction
142 	 *            this could be caller's buffer (if provided) otherwise
143 	 *            it points to the #response_buf starting at #offset.
144 	 */
145 	u32 *response;
146 
147 	/**
148 	 * @response_len: length of the inner VF/PF response message (only
149 	 *                if #status is 0), initially set to the size of the
150 	 *                placeholder buffer where response message will be
151 	 *                copied.
152 	 */
153 	u32 response_len;
154 
155 	/**
156 	 * @offset: offset to the start of the inner VF/PF relay message inside
157 	 *          buffers; this offset is equal the length of the outer GuC
158 	 *          relay header message.
159 	 */
160 	u32 offset;
161 
162 	/**
163 	 * @request_buf: buffer with VF/PF request message including outer
164 	 *               transport message.
165 	 */
166 	u32 request_buf[GUC_CTB_MAX_DWORDS];
167 
168 	/**
169 	 * @response_buf: buffer with VF/PF response message including outer
170 	 *                transport message.
171 	 */
172 	u32 response_buf[GUC_CTB_MAX_DWORDS];
173 
174 	/**
175 	 * @reply: status of the reply, 0 means that data pointed by the
176 	 *         #response is valid.
177 	 */
178 	int reply;
179 
180 	/** @done: completion of the outgoing transaction. */
181 	struct completion done;
182 
183 	/** @link: transaction list link */
184 	struct list_head link;
185 };
186 
187 static u32 prepare_pf2guc(u32 *msg, u32 target, u32 rid)
188 {
189 	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
190 		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
191 		 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF);
192 	msg[1] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, target);
193 	msg[2] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, rid);
194 
195 	return PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN;
196 }
197 
198 static u32 prepare_vf2guc(u32 *msg, u32 rid)
199 {
200 	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
201 		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
202 		 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_VF2GUC_RELAY_TO_PF);
203 	msg[1] = FIELD_PREP(VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID, rid);
204 
205 	return VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN;
206 }
207 
208 static struct relay_transaction *
209 __relay_get_transaction(struct xe_guc_relay *relay, bool incoming, u32 remote, u32 rid,
210 			const u32 *action, u32 action_len, u32 *resp, u32 resp_size)
211 {
212 	struct relay_transaction *txn;
213 
214 	relay_assert(relay, action_len >= GUC_RELAY_MSG_MIN_LEN);
215 	relay_assert(relay, action_len <= GUC_RELAY_MSG_MAX_LEN);
216 	relay_assert(relay, !(!!resp ^ !!resp_size));
217 	relay_assert(relay, resp_size <= GUC_RELAY_MSG_MAX_LEN);
218 	relay_assert(relay, resp_size == 0 || resp_size >= GUC_RELAY_MSG_MIN_LEN);
219 
220 	if (unlikely(!relay_is_ready(relay)))
221 		return ERR_PTR(-ENODEV);
222 
223 	/*
224 	 * For incoming requests we can't use GFP_KERNEL as those are delivered
225 	 * with CTB lock held which is marked as used in the reclaim path.
226 	 * Btw, that's one of the reason why we use mempool here!
227 	 */
228 	txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_KERNEL);
229 	if (!txn)
230 		return ERR_PTR(-ENOMEM);
231 
232 	txn->incoming = incoming;
233 	txn->remote = remote;
234 	txn->rid = rid;
235 	txn->offset = remote ?
236 		prepare_pf2guc(incoming ? txn->response_buf : txn->request_buf, remote, rid) :
237 		prepare_vf2guc(incoming ? txn->response_buf : txn->request_buf, rid);
238 
239 	relay_assert(relay, txn->offset);
240 	relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->request_buf));
241 	relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->response_buf));
242 
243 	txn->request = txn->request_buf + txn->offset;
244 	memcpy(&txn->request_buf[txn->offset], action, sizeof(u32) * action_len);
245 	txn->request_len = action_len;
246 
247 	txn->response = resp ?: txn->response_buf + txn->offset;
248 	txn->response_len = resp_size ?: GUC_RELAY_MSG_MAX_LEN;
249 	txn->reply = -ENOMSG;
250 	INIT_LIST_HEAD(&txn->link);
251 	init_completion(&txn->done);
252 
253 	return txn;
254 }
255 
256 static struct relay_transaction *
257 relay_new_transaction(struct xe_guc_relay *relay, u32 target, const u32 *action, u32 len,
258 		      u32 *resp, u32 resp_size)
259 {
260 	u32 rid = relay_get_next_rid(relay);
261 
262 	return __relay_get_transaction(relay, false, target, rid, action, len, resp, resp_size);
263 }
264 
265 static struct relay_transaction *
266 relay_new_incoming_transaction(struct xe_guc_relay *relay, u32 origin, u32 rid,
267 			       const u32 *action, u32 len)
268 {
269 	return __relay_get_transaction(relay, true, origin, rid, action, len, NULL, 0);
270 }
271 
272 static void relay_release_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
273 {
274 	relay_assert(relay, list_empty(&txn->link));
275 
276 	txn->offset = 0;
277 	txn->response = NULL;
278 	txn->reply = -ESTALE;
279 	mempool_free(txn, &relay->pool);
280 }
281 
282 static int relay_send_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
283 {
284 	u32 len = txn->incoming ? txn->response_len : txn->request_len;
285 	u32 *buf = txn->incoming ? txn->response_buf : txn->request_buf;
286 	u32 *msg = buf + txn->offset;
287 	int ret;
288 
289 	relay_assert(relay, txn->offset);
290 	relay_assert(relay, txn->offset + len <= GUC_CTB_MAX_DWORDS);
291 	relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
292 	relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
293 
294 	relay_debug(relay, "sending %s.%u to %u = %*ph\n",
295 		    guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
296 		    txn->rid, txn->remote, (int)sizeof(u32) * len, msg);
297 
298 	ret = xe_guc_ct_send_block(relay_to_ct(relay), buf, len + txn->offset);
299 
300 	if (unlikely(ret > 0)) {
301 		relay_notice(relay, "Unexpected data=%d from GuC, wrong ABI?\n", ret);
302 		ret = -EPROTO;
303 	}
304 	if (unlikely(ret < 0)) {
305 		relay_notice(relay, "Failed to send %s.%x to GuC (%pe) %*ph ...\n",
306 			     guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, buf[0])),
307 			     FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, buf[0]),
308 			     ERR_PTR(ret), (int)sizeof(u32) * txn->offset, buf);
309 		relay_notice(relay, "Failed to send %s.%u to %u (%pe) %*ph\n",
310 			     guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
311 			     txn->rid, txn->remote, ERR_PTR(ret), (int)sizeof(u32) * len, msg);
312 	}
313 
314 	return ret;
315 }
316 
317 static void __fini_relay(struct drm_device *drm, void *arg)
318 {
319 	struct xe_guc_relay *relay = arg;
320 
321 	mempool_exit(&relay->pool);
322 }
323 
324 /**
325  * xe_guc_relay_init - Initialize a &xe_guc_relay
326  * @relay: the &xe_guc_relay to initialize
327  *
328  * Initialize remaining members of &xe_guc_relay that may depend
329  * on the SR-IOV mode.
330  *
331  * Return: 0 on success or a negative error code on failure.
332  */
333 int xe_guc_relay_init(struct xe_guc_relay *relay)
334 {
335 	const int XE_RELAY_MEMPOOL_MIN_NUM = 1;
336 	struct xe_device *xe = relay_to_xe(relay);
337 	int err;
338 
339 	relay_assert(relay, !relay_is_ready(relay));
340 
341 	if (!IS_SRIOV(xe))
342 		return 0;
343 
344 	spin_lock_init(&relay->lock);
345 	INIT_WORK(&relay->worker, relays_worker_fn);
346 	INIT_LIST_HEAD(&relay->pending_relays);
347 	INIT_LIST_HEAD(&relay->incoming_actions);
348 
349 	err = mempool_init_kmalloc_pool(&relay->pool, XE_RELAY_MEMPOOL_MIN_NUM +
350 					relay_get_totalvfs(relay),
351 					sizeof(struct relay_transaction));
352 	if (err)
353 		return err;
354 
355 	relay_debug(relay, "using mempool with %d elements\n", relay->pool.min_nr);
356 
357 	return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay);
358 }
359 ALLOW_ERROR_INJECTION(xe_guc_relay_init, ERRNO); /* See xe_pci_probe() */
360 
361 static u32 to_relay_error(int err)
362 {
363 	/* XXX: assume that relay errors match errno codes */
364 	return err < 0 ? -err : GUC_RELAY_ERROR_UNDISCLOSED;
365 }
366 
367 static int from_relay_error(u32 error)
368 {
369 	/* XXX: assume that relay errors match errno codes */
370 	return error ? -error : -ENODATA;
371 }
372 
373 static u32 sanitize_relay_error(u32 error)
374 {
375 	/* XXX TBD if generic error codes will be allowed */
376 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
377 		error = GUC_RELAY_ERROR_UNDISCLOSED;
378 	return error;
379 }
380 
381 static u32 sanitize_relay_error_hint(u32 hint)
382 {
383 	/* XXX TBD if generic error codes will be allowed */
384 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
385 		hint = 0;
386 	return hint;
387 }
388 
389 static u32 prepare_error_reply(u32 *msg, u32 error, u32 hint)
390 {
391 	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
392 		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) |
393 		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) |
394 		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error);
395 
396 	XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_ERROR, error));
397 	XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_HINT, hint));
398 
399 	return GUC_HXG_FAILURE_MSG_LEN;
400 }
401 
402 static void relay_testonly_nop(struct xe_guc_relay *relay)
403 {
404 	KUNIT_STATIC_STUB_REDIRECT(relay_testonly_nop, relay);
405 }
406 
407 static int relay_send_message_and_wait(struct xe_guc_relay *relay,
408 				       struct relay_transaction *txn,
409 				       u32 *buf, u32 buf_size)
410 {
411 	unsigned long timeout = msecs_to_jiffies(RELAY_TIMEOUT_MSEC);
412 	u32 *msg = &txn->request_buf[txn->offset];
413 	u32 len = txn->request_len;
414 	u32 type, action, data0;
415 	int ret;
416 	long n;
417 
418 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
419 	action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
420 	data0 = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
421 
422 	relay_debug(relay, "%s.%u to %u action %#x:%u\n",
423 		    guc_hxg_type_to_string(type),
424 		    txn->rid, txn->remote, action, data0);
425 
426 	/* list ordering does not need to match RID ordering */
427 	spin_lock(&relay->lock);
428 	list_add_tail(&txn->link, &relay->pending_relays);
429 	spin_unlock(&relay->lock);
430 
431 resend:
432 	ret = relay_send_transaction(relay, txn);
433 	if (unlikely(ret < 0))
434 		goto unlink;
435 
436 wait:
437 	n = wait_for_completion_timeout(&txn->done, timeout);
438 	if (unlikely(n == 0 && txn->reply)) {
439 		ret = -ETIME;
440 		goto unlink;
441 	}
442 
443 	relay_debug(relay, "%u.%u reply %d after %u msec\n",
444 		    txn->remote, txn->rid, txn->reply, jiffies_to_msecs(timeout - n));
445 	if (unlikely(txn->reply)) {
446 		reinit_completion(&txn->done);
447 		if (txn->reply == -EAGAIN)
448 			goto resend;
449 		if (txn->reply == -EBUSY) {
450 			relay_testonly_nop(relay);
451 			goto wait;
452 		}
453 		if (txn->reply > 0)
454 			ret = from_relay_error(txn->reply);
455 		else
456 			ret = txn->reply;
457 		goto unlink;
458 	}
459 
460 	relay_debug(relay, "%u.%u response %*ph\n", txn->remote, txn->rid,
461 		    (int)sizeof(u32) * txn->response_len, txn->response);
462 	relay_assert(relay, txn->response_len >= GUC_RELAY_MSG_MIN_LEN);
463 	ret = txn->response_len;
464 
465 unlink:
466 	spin_lock(&relay->lock);
467 	list_del_init(&txn->link);
468 	spin_unlock(&relay->lock);
469 
470 	if (unlikely(ret < 0)) {
471 		relay_notice(relay, "Unsuccessful %s.%u %#x:%u to %u (%pe) %*ph\n",
472 			     guc_hxg_type_to_string(type), txn->rid,
473 			     action, data0, txn->remote, ERR_PTR(ret),
474 			     (int)sizeof(u32) * len, msg);
475 	}
476 
477 	return ret;
478 }
479 
480 static int relay_send_to(struct xe_guc_relay *relay, u32 target,
481 			 const u32 *msg, u32 len, u32 *buf, u32 buf_size)
482 {
483 	struct relay_transaction *txn;
484 	int ret;
485 
486 	relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
487 	relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
488 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_HOST);
489 	relay_assert(relay, guc_hxg_type_is_action(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])));
490 
491 	if (unlikely(!relay_is_ready(relay)))
492 		return -ENODEV;
493 
494 	txn = relay_new_transaction(relay, target, msg, len, buf, buf_size);
495 	if (IS_ERR(txn))
496 		return PTR_ERR(txn);
497 
498 	switch (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])) {
499 	case GUC_HXG_TYPE_REQUEST:
500 		ret = relay_send_message_and_wait(relay, txn, buf, buf_size);
501 		break;
502 	case GUC_HXG_TYPE_FAST_REQUEST:
503 		relay_assert(relay, !GUC_HXG_TYPE_FAST_REQUEST);
504 		fallthrough;
505 	case GUC_HXG_TYPE_EVENT:
506 		ret = relay_send_transaction(relay, txn);
507 		break;
508 	default:
509 		ret = -EINVAL;
510 		break;
511 	}
512 
513 	relay_release_transaction(relay, txn);
514 	return ret;
515 }
516 
517 #ifdef CONFIG_PCI_IOV
518 /**
519  * xe_guc_relay_send_to_vf - Send a message to the VF.
520  * @relay: the &xe_guc_relay which will send the message
521  * @target: target VF number
522  * @msg: request message to be sent
523  * @len: length of the request message (in dwords, can't be 0)
524  * @buf: placeholder for the response message
525  * @buf_size: size of the response message placeholder (in dwords)
526  *
527  * This function can only be used by the driver running in the SR-IOV PF mode.
528  *
529  * Return: Non-negative response length (in dwords) or
530  *         a negative error code on failure.
531  */
532 int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
533 			    const u32 *msg, u32 len, u32 *buf, u32 buf_size)
534 {
535 	relay_assert(relay, IS_SRIOV_PF(relay_to_xe(relay)));
536 
537 	return relay_send_to(relay, target, msg, len, buf, buf_size);
538 }
539 #endif
540 
541 /**
542  * xe_guc_relay_send_to_pf - Send a message to the PF.
543  * @relay: the &xe_guc_relay which will send the message
544  * @msg: request message to be sent
545  * @len: length of the message (in dwords, can't be 0)
546  * @buf: placeholder for the response message
547  * @buf_size: size of the response message placeholder (in dwords)
548  *
549  * This function can only be used by driver running in SR-IOV VF mode.
550  *
551  * Return: Non-negative response length (in dwords) or
552  *         a negative error code on failure.
553  */
554 int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay,
555 			    const u32 *msg, u32 len, u32 *buf, u32 buf_size)
556 {
557 	relay_assert(relay, IS_SRIOV_VF(relay_to_xe(relay)));
558 
559 	return relay_send_to(relay, PFID, msg, len, buf, buf_size);
560 }
561 
562 static int relay_handle_reply(struct xe_guc_relay *relay, u32 origin,
563 			      u32 rid, int reply, const u32 *msg, u32 len)
564 {
565 	struct relay_transaction *pending;
566 	int err = -ESRCH;
567 
568 	spin_lock(&relay->lock);
569 	list_for_each_entry(pending, &relay->pending_relays, link) {
570 		if (pending->remote != origin || pending->rid != rid) {
571 			relay_debug(relay, "%u.%u still awaits response\n",
572 				    pending->remote, pending->rid);
573 			continue;
574 		}
575 		err = 0; /* found! */
576 		if (reply == 0) {
577 			if (len > pending->response_len) {
578 				reply = -ENOBUFS;
579 				err = -ENOBUFS;
580 			} else {
581 				memcpy(pending->response, msg, 4 * len);
582 				pending->response_len = len;
583 			}
584 		}
585 		pending->reply = reply;
586 		complete_all(&pending->done);
587 		break;
588 	}
589 	spin_unlock(&relay->lock);
590 
591 	return err;
592 }
593 
594 static int relay_handle_failure(struct xe_guc_relay *relay, u32 origin,
595 				u32 rid, const u32 *msg, u32 len)
596 {
597 	int error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
598 	u32 hint __maybe_unused = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
599 
600 	relay_assert(relay, len);
601 	relay_debug(relay, "%u.%u error %#x (%pe) hint %u debug %*ph\n",
602 		    origin, rid, error, ERR_PTR(-error), hint, 4 * (len - 1), msg + 1);
603 
604 	return relay_handle_reply(relay, origin, rid, error ?: -EREMOTEIO, NULL, 0);
605 }
606 
607 static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin,
608 					 const u32 *msg, u32 len, u32 *response, u32 size)
609 {
610 	static ktime_t last_reply = 0;
611 	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
612 	u32 action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
613 	u32 opcode = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
614 	ktime_t now = ktime_get();
615 	bool busy;
616 	int ret;
617 
618 	relay_assert(relay, guc_hxg_type_is_action(type));
619 	relay_assert(relay, action == GUC_RELAY_ACTION_VFXPF_TESTLOOP);
620 
621 	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
622 		return -ECONNREFUSED;
623 
624 	if (!last_reply)
625 		last_reply = now;
626 	busy = ktime_before(now, ktime_add_ms(last_reply, 2 * RELAY_TIMEOUT_MSEC));
627 	if (!busy)
628 		last_reply = now;
629 
630 	switch (opcode) {
631 	case VFXPF_TESTLOOP_OPCODE_NOP:
632 		if (type == GUC_HXG_TYPE_EVENT)
633 			return 0;
634 		return guc_hxg_msg_encode_success(response, 0);
635 	case VFXPF_TESTLOOP_OPCODE_BUSY:
636 		if (type == GUC_HXG_TYPE_EVENT)
637 			return -EPROTO;
638 		msleep(RELAY_TIMEOUT_MSEC / 8);
639 		if (busy)
640 			return -EINPROGRESS;
641 		return guc_hxg_msg_encode_success(response, 0);
642 	case VFXPF_TESTLOOP_OPCODE_RETRY:
643 		if (type == GUC_HXG_TYPE_EVENT)
644 			return -EPROTO;
645 		msleep(RELAY_TIMEOUT_MSEC / 8);
646 		if (busy)
647 			return guc_hxg_msg_encode_retry(response, 0);
648 		return guc_hxg_msg_encode_success(response, 0);
649 	case VFXPF_TESTLOOP_OPCODE_ECHO:
650 		if (type == GUC_HXG_TYPE_EVENT)
651 			return -EPROTO;
652 		if (size < len)
653 			return -ENOBUFS;
654 		ret = guc_hxg_msg_encode_success(response, len);
655 		memcpy(response + ret, msg + ret, (len - ret) * sizeof(u32));
656 		return len;
657 	case VFXPF_TESTLOOP_OPCODE_FAIL:
658 		return -EHWPOISON;
659 	default:
660 		break;
661 	}
662 
663 	relay_notice(relay, "Unexpected action %#x opcode %#x\n", action, opcode);
664 	return -EBADRQC;
665 }
666 
667 static int relay_action_handler(struct xe_guc_relay *relay, u32 origin,
668 				const u32 *msg, u32 len, u32 *response, u32 size)
669 {
670 	struct xe_gt *gt = relay_to_gt(relay);
671 	u32 type;
672 	int ret;
673 
674 	relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
675 
676 	if (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]) == GUC_RELAY_ACTION_VFXPF_TESTLOOP)
677 		return relay_testloop_action_handler(relay, origin, msg, len, response, size);
678 
679 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
680 
681 	if (IS_SRIOV_PF(relay_to_xe(relay)))
682 		ret = xe_gt_sriov_pf_service_process_request(gt, origin, msg, len, response, size);
683 	else
684 		ret = -EOPNOTSUPP;
685 
686 	if (type == GUC_HXG_TYPE_EVENT)
687 		relay_assert(relay, ret <= 0);
688 
689 	return ret;
690 }
691 
692 static struct relay_transaction *relay_dequeue_transaction(struct xe_guc_relay *relay)
693 {
694 	struct relay_transaction *txn;
695 
696 	spin_lock(&relay->lock);
697 	txn = list_first_entry_or_null(&relay->incoming_actions, struct relay_transaction, link);
698 	if (txn)
699 		list_del_init(&txn->link);
700 	spin_unlock(&relay->lock);
701 
702 	return txn;
703 }
704 
705 static void relay_process_incoming_action(struct xe_guc_relay *relay)
706 {
707 	struct relay_transaction *txn;
708 	bool again = false;
709 	u32 type;
710 	int ret;
711 
712 	txn = relay_dequeue_transaction(relay);
713 	if (!txn)
714 		return;
715 
716 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, txn->request_buf[txn->offset]);
717 
718 	ret = relay_action_handler(relay, txn->remote,
719 				   txn->request_buf + txn->offset, txn->request_len,
720 				   txn->response_buf + txn->offset,
721 				   ARRAY_SIZE(txn->response_buf) - txn->offset);
722 
723 	if (ret == -EINPROGRESS) {
724 		again = true;
725 		ret = guc_hxg_msg_encode_busy(txn->response_buf + txn->offset, 0);
726 	}
727 
728 	if (ret > 0) {
729 		txn->response_len = ret;
730 		ret = relay_send_transaction(relay, txn);
731 	}
732 
733 	if (ret < 0) {
734 		u32 error = to_relay_error(ret);
735 
736 		relay_notice(relay, "Failed to handle %s.%u from %u (%pe) %*ph\n",
737 			     guc_hxg_type_to_string(type), txn->rid, txn->remote,
738 			     ERR_PTR(ret), 4 * txn->request_len, txn->request_buf + txn->offset);
739 
740 		txn->response_len = prepare_error_reply(txn->response_buf + txn->offset,
741 							txn->remote ?
742 							sanitize_relay_error(error) : error,
743 							txn->remote ?
744 							sanitize_relay_error_hint(-ret) : -ret);
745 		ret = relay_send_transaction(relay, txn);
746 		again = false;
747 	}
748 
749 	if (again) {
750 		spin_lock(&relay->lock);
751 		list_add(&txn->link, &relay->incoming_actions);
752 		spin_unlock(&relay->lock);
753 		return;
754 	}
755 
756 	if (unlikely(ret < 0))
757 		relay_notice(relay, "Failed to process action.%u (%pe) %*ph\n",
758 			     txn->rid, ERR_PTR(ret), 4 * txn->request_len,
759 			     txn->request_buf + txn->offset);
760 
761 	relay_release_transaction(relay, txn);
762 }
763 
764 static bool relay_needs_worker(struct xe_guc_relay *relay)
765 {
766 	bool is_empty;
767 
768 	spin_lock(&relay->lock);
769 	is_empty = list_empty(&relay->incoming_actions);
770 	spin_unlock(&relay->lock);
771 
772 	return !is_empty;
773 
774 }
775 
776 static void relay_kick_worker(struct xe_guc_relay *relay)
777 {
778 	KUNIT_STATIC_STUB_REDIRECT(relay_kick_worker, relay);
779 	queue_work(relay_to_xe(relay)->sriov.wq, &relay->worker);
780 }
781 
782 static void relays_worker_fn(struct work_struct *w)
783 {
784 	struct xe_guc_relay *relay = container_of(w, struct xe_guc_relay, worker);
785 
786 	relay_process_incoming_action(relay);
787 
788 	if (relay_needs_worker(relay))
789 		relay_kick_worker(relay);
790 }
791 
792 static int relay_queue_action_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
793 				  const u32 *msg, u32 len)
794 {
795 	struct relay_transaction *txn;
796 
797 	txn = relay_new_incoming_transaction(relay, origin, rid, msg, len);
798 	if (IS_ERR(txn))
799 		return PTR_ERR(txn);
800 
801 	spin_lock(&relay->lock);
802 	list_add_tail(&txn->link, &relay->incoming_actions);
803 	spin_unlock(&relay->lock);
804 
805 	relay_kick_worker(relay);
806 	return 0;
807 }
808 
809 static int relay_process_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
810 			     const u32 *msg, u32 len)
811 {
812 	u32 type;
813 	int err;
814 
815 	if (unlikely(len < GUC_HXG_MSG_MIN_LEN))
816 		return -EPROTO;
817 
818 	if (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) != GUC_HXG_ORIGIN_HOST)
819 		return -EPROTO;
820 
821 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
822 	relay_debug(relay, "received %s.%u from %u = %*ph\n",
823 		    guc_hxg_type_to_string(type), rid, origin, 4 * len, msg);
824 
825 	switch (type) {
826 	case GUC_HXG_TYPE_REQUEST:
827 	case GUC_HXG_TYPE_FAST_REQUEST:
828 	case GUC_HXG_TYPE_EVENT:
829 		err = relay_queue_action_msg(relay, origin, rid, msg, len);
830 		break;
831 	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
832 		err = relay_handle_reply(relay, origin, rid, 0, msg, len);
833 		break;
834 	case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
835 		err = relay_handle_reply(relay, origin, rid, -EBUSY, NULL, 0);
836 		break;
837 	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
838 		err = relay_handle_reply(relay, origin, rid, -EAGAIN, NULL, 0);
839 		break;
840 	case GUC_HXG_TYPE_RESPONSE_FAILURE:
841 		err = relay_handle_failure(relay, origin, rid, msg, len);
842 		break;
843 	default:
844 		err = -EBADRQC;
845 	}
846 
847 	if (unlikely(err))
848 		relay_notice(relay, "Failed to process %s.%u from %u (%pe) %*ph\n",
849 			     guc_hxg_type_to_string(type), rid, origin,
850 			     ERR_PTR(err), 4 * len, msg);
851 
852 	return err;
853 }
854 
855 /**
856  * xe_guc_relay_process_guc2vf - Handle relay notification message from the GuC.
857  * @relay: the &xe_guc_relay which will handle the message
858  * @msg: message to be handled
859  * @len: length of the message (in dwords)
860  *
861  * This function will handle relay messages received from the GuC.
862  *
863  * This function is can only be used if driver is running in SR-IOV mode.
864  *
865  * Return: 0 on success or a negative error code on failure.
866  */
867 int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
868 {
869 	u32 rid;
870 
871 	relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
872 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
873 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
874 	relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
875 		     XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF);
876 
877 	if (unlikely(!IS_SRIOV_VF(relay_to_xe(relay)) && !kunit_get_current_test()))
878 		return -EPERM;
879 
880 	if (unlikely(!relay_is_ready(relay)))
881 		return -ENODEV;
882 
883 	if (unlikely(len < GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN))
884 		return -EPROTO;
885 
886 	if (unlikely(len > GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN))
887 		return -EMSGSIZE;
888 
889 	if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
890 		return -EPFNOSUPPORT;
891 
892 	rid = FIELD_GET(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, msg[1]);
893 
894 	return relay_process_msg(relay, PFID, rid,
895 				 msg + GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN,
896 				 len - GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN);
897 }
898 
899 #ifdef CONFIG_PCI_IOV
900 /**
901  * xe_guc_relay_process_guc2pf - Handle relay notification message from the GuC.
902  * @relay: the &xe_guc_relay which will handle the message
903  * @msg: message to be handled
904  * @len: length of the message (in dwords)
905  *
906  * This function will handle relay messages received from the GuC.
907  *
908  * This function can only be used if driver is running in SR-IOV PF mode.
909  *
910  * Return: 0 on success or a negative error code on failure.
911  */
912 int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
913 {
914 	u32 origin, rid;
915 	int err;
916 
917 	relay_assert(relay, len >= GUC_HXG_EVENT_MSG_MIN_LEN);
918 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
919 	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
920 	relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
921 		     XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF);
922 
923 	if (unlikely(!IS_SRIOV_PF(relay_to_xe(relay)) && !kunit_get_current_test()))
924 		return -EPERM;
925 
926 	if (unlikely(!relay_is_ready(relay)))
927 		return -ENODEV;
928 
929 	if (unlikely(len < GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN))
930 		return -EPROTO;
931 
932 	if (unlikely(len > GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN))
933 		return -EMSGSIZE;
934 
935 	if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
936 		return -EPFNOSUPPORT;
937 
938 	origin = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, msg[1]);
939 	rid = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, msg[2]);
940 
941 	if (unlikely(origin > relay_get_totalvfs(relay)))
942 		return -ENOENT;
943 
944 	err = relay_process_msg(relay, origin, rid,
945 				msg + GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN,
946 				len - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN);
947 
948 	return err;
949 }
950 #endif
951 
952 #if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
953 #include "tests/xe_guc_relay_test.c"
954 #endif
955