xref: /linux/drivers/gpu/drm/xe/xe_guc_ct.c (revision 90d32e92011eaae8e70a9169b4e7acf4ca8f9d3a)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_guc_ct.h"
7 
8 #include <linux/bitfield.h>
9 #include <linux/circ_buf.h>
10 #include <linux/delay.h>
11 
12 #include <kunit/static_stub.h>
13 
14 #include <drm/drm_managed.h>
15 
16 #include "abi/guc_actions_abi.h"
17 #include "abi/guc_actions_sriov_abi.h"
18 #include "abi/guc_klvs_abi.h"
19 #include "xe_bo.h"
20 #include "xe_device.h"
21 #include "xe_gt.h"
22 #include "xe_gt_pagefault.h"
23 #include "xe_gt_printk.h"
24 #include "xe_gt_sriov_pf_control.h"
25 #include "xe_gt_tlb_invalidation.h"
26 #include "xe_guc.h"
27 #include "xe_guc_relay.h"
28 #include "xe_guc_submit.h"
29 #include "xe_map.h"
30 #include "xe_pm.h"
31 #include "xe_trace.h"
32 
33 /* Used when a CT send wants to block and / or receive data */
34 struct g2h_fence {
35 	u32 *response_buffer;
36 	u32 seqno;
37 	u32 response_data;
38 	u16 response_len;
39 	u16 error;
40 	u16 hint;
41 	u16 reason;
42 	bool retry;
43 	bool fail;
44 	bool done;
45 };
46 
47 static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
48 {
49 	g2h_fence->response_buffer = response_buffer;
50 	g2h_fence->response_data = 0;
51 	g2h_fence->response_len = 0;
52 	g2h_fence->fail = false;
53 	g2h_fence->retry = false;
54 	g2h_fence->done = false;
55 	g2h_fence->seqno = ~0x0;
56 }
57 
58 static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
59 {
60 	return g2h_fence->seqno == ~0x0;
61 }
62 
63 static struct xe_guc *
64 ct_to_guc(struct xe_guc_ct *ct)
65 {
66 	return container_of(ct, struct xe_guc, ct);
67 }
68 
69 static struct xe_gt *
70 ct_to_gt(struct xe_guc_ct *ct)
71 {
72 	return container_of(ct, struct xe_gt, uc.guc.ct);
73 }
74 
75 static struct xe_device *
76 ct_to_xe(struct xe_guc_ct *ct)
77 {
78 	return gt_to_xe(ct_to_gt(ct));
79 }
80 
81 /**
82  * DOC: GuC CTB Blob
83  *
84  * We allocate single blob to hold both CTB descriptors and buffers:
85  *
86  *      +--------+-----------------------------------------------+------+
87  *      | offset | contents                                      | size |
88  *      +========+===============================================+======+
89  *      | 0x0000 | H2G CTB Descriptor (send)                     |      |
90  *      +--------+-----------------------------------------------+  4K  |
91  *      | 0x0800 | G2H CTB Descriptor (g2h)                      |      |
92  *      +--------+-----------------------------------------------+------+
93  *      | 0x1000 | H2G CT Buffer (send)                          | n*4K |
94  *      |        |                                               |      |
95  *      +--------+-----------------------------------------------+------+
96  *      | 0x1000 | G2H CT Buffer (g2h)                           | m*4K |
97  *      | + n*4K |                                               |      |
98  *      +--------+-----------------------------------------------+------+
99  *
100  * Size of each ``CT Buffer`` must be multiple of 4K.
101  * We don't expect too many messages in flight at any time, unless we are
102  * using the GuC submission. In that case each request requires a minimum
103  * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this
104  * enough space to avoid backpressure on the driver. We increase the size
105  * of the receive buffer (relative to the send) to ensure a G2H response
106  * CTB has a landing spot.
107  */
108 
109 #define CTB_DESC_SIZE		ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
110 #define CTB_H2G_BUFFER_SIZE	(SZ_4K)
111 #define CTB_G2H_BUFFER_SIZE	(4 * CTB_H2G_BUFFER_SIZE)
112 #define G2H_ROOM_BUFFER_SIZE	(CTB_G2H_BUFFER_SIZE / 4)
113 
114 static size_t guc_ct_size(void)
115 {
116 	return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
117 		CTB_G2H_BUFFER_SIZE;
118 }
119 
120 static void guc_ct_fini(struct drm_device *drm, void *arg)
121 {
122 	struct xe_guc_ct *ct = arg;
123 
124 	destroy_workqueue(ct->g2h_wq);
125 	xa_destroy(&ct->fence_lookup);
126 }
127 
128 static void g2h_worker_func(struct work_struct *w);
129 
130 static void primelockdep(struct xe_guc_ct *ct)
131 {
132 	if (!IS_ENABLED(CONFIG_LOCKDEP))
133 		return;
134 
135 	fs_reclaim_acquire(GFP_KERNEL);
136 	might_lock(&ct->lock);
137 	fs_reclaim_release(GFP_KERNEL);
138 }
139 
140 int xe_guc_ct_init(struct xe_guc_ct *ct)
141 {
142 	struct xe_device *xe = ct_to_xe(ct);
143 	struct xe_gt *gt = ct_to_gt(ct);
144 	struct xe_tile *tile = gt_to_tile(gt);
145 	struct xe_bo *bo;
146 	int err;
147 
148 	xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
149 
150 	ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
151 	if (!ct->g2h_wq)
152 		return -ENOMEM;
153 
154 	spin_lock_init(&ct->fast_lock);
155 	xa_init(&ct->fence_lookup);
156 	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
157 	init_waitqueue_head(&ct->wq);
158 	init_waitqueue_head(&ct->g2h_fence_wq);
159 
160 	err = drmm_mutex_init(&xe->drm, &ct->lock);
161 	if (err)
162 		return err;
163 
164 	primelockdep(ct);
165 
166 	bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
167 					  XE_BO_FLAG_SYSTEM |
168 					  XE_BO_FLAG_GGTT |
169 					  XE_BO_FLAG_GGTT_INVALIDATE);
170 	if (IS_ERR(bo))
171 		return PTR_ERR(bo);
172 
173 	ct->bo = bo;
174 
175 	err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
176 	if (err)
177 		return err;
178 
179 	xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
180 	ct->state = XE_GUC_CT_STATE_DISABLED;
181 	return 0;
182 }
183 
184 #define desc_read(xe_, guc_ctb__, field_)			\
185 	xe_map_rd_field(xe_, &guc_ctb__->desc, 0,		\
186 			struct guc_ct_buffer_desc, field_)
187 
188 #define desc_write(xe_, guc_ctb__, field_, val_)		\
189 	xe_map_wr_field(xe_, &guc_ctb__->desc, 0,		\
190 			struct guc_ct_buffer_desc, field_, val_)
191 
192 static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
193 				struct iosys_map *map)
194 {
195 	h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
196 	h2g->info.resv_space = 0;
197 	h2g->info.tail = 0;
198 	h2g->info.head = 0;
199 	h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
200 				     h2g->info.size) -
201 			  h2g->info.resv_space;
202 	h2g->info.broken = false;
203 
204 	h2g->desc = *map;
205 	xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
206 
207 	h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
208 }
209 
210 static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
211 				struct iosys_map *map)
212 {
213 	g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
214 	g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
215 	g2h->info.head = 0;
216 	g2h->info.tail = 0;
217 	g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head,
218 				     g2h->info.size) -
219 			  g2h->info.resv_space;
220 	g2h->info.broken = false;
221 
222 	g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
223 	xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
224 
225 	g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
226 					    CTB_H2G_BUFFER_SIZE);
227 }
228 
229 static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
230 {
231 	struct xe_guc *guc = ct_to_guc(ct);
232 	u32 desc_addr, ctb_addr, size;
233 	int err;
234 
235 	desc_addr = xe_bo_ggtt_addr(ct->bo);
236 	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
237 	size = ct->ctbs.h2g.info.size * sizeof(u32);
238 
239 	err = xe_guc_self_cfg64(guc,
240 				GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
241 				desc_addr);
242 	if (err)
243 		return err;
244 
245 	err = xe_guc_self_cfg64(guc,
246 				GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY,
247 				ctb_addr);
248 	if (err)
249 		return err;
250 
251 	return xe_guc_self_cfg32(guc,
252 				 GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY,
253 				 size);
254 }
255 
256 static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
257 {
258 	struct xe_guc *guc = ct_to_guc(ct);
259 	u32 desc_addr, ctb_addr, size;
260 	int err;
261 
262 	desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
263 	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
264 		CTB_H2G_BUFFER_SIZE;
265 	size = ct->ctbs.g2h.info.size * sizeof(u32);
266 
267 	err = xe_guc_self_cfg64(guc,
268 				GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
269 				desc_addr);
270 	if (err)
271 		return err;
272 
273 	err = xe_guc_self_cfg64(guc,
274 				GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
275 				ctb_addr);
276 	if (err)
277 		return err;
278 
279 	return xe_guc_self_cfg32(guc,
280 				 GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
281 				 size);
282 }
283 
284 static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
285 {
286 	u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
287 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
288 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
289 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
290 			   GUC_ACTION_HOST2GUC_CONTROL_CTB),
291 		FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL,
292 			   enable ? GUC_CTB_CONTROL_ENABLE :
293 			   GUC_CTB_CONTROL_DISABLE),
294 	};
295 	int ret = xe_guc_mmio_send(ct_to_guc(ct), request, ARRAY_SIZE(request));
296 
297 	return ret > 0 ? -EPROTO : ret;
298 }
299 
300 static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
301 				enum xe_guc_ct_state state)
302 {
303 	mutex_lock(&ct->lock);		/* Serialise dequeue_one_g2h() */
304 	spin_lock_irq(&ct->fast_lock);	/* Serialise CT fast-path */
305 
306 	xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
307 		     state == XE_GUC_CT_STATE_STOPPED);
308 
309 	ct->g2h_outstanding = 0;
310 	ct->state = state;
311 
312 	spin_unlock_irq(&ct->fast_lock);
313 
314 	/*
315 	 * Lockdep doesn't like this under the fast lock and he destroy only
316 	 * needs to be serialized with the send path which ct lock provides.
317 	 */
318 	xa_destroy(&ct->fence_lookup);
319 
320 	mutex_unlock(&ct->lock);
321 }
322 
323 int xe_guc_ct_enable(struct xe_guc_ct *ct)
324 {
325 	struct xe_device *xe = ct_to_xe(ct);
326 	struct xe_gt *gt = ct_to_gt(ct);
327 	int err;
328 
329 	xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
330 
331 	guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
332 	guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
333 
334 	err = guc_ct_ctb_h2g_register(ct);
335 	if (err)
336 		goto err_out;
337 
338 	err = guc_ct_ctb_g2h_register(ct);
339 	if (err)
340 		goto err_out;
341 
342 	err = guc_ct_control_toggle(ct, true);
343 	if (err)
344 		goto err_out;
345 
346 	xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED);
347 
348 	smp_mb();
349 	wake_up_all(&ct->wq);
350 	xe_gt_dbg(gt, "GuC CT communication channel enabled\n");
351 
352 	return 0;
353 
354 err_out:
355 	xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err));
356 
357 	return err;
358 }
359 
360 static void stop_g2h_handler(struct xe_guc_ct *ct)
361 {
362 	cancel_work_sync(&ct->g2h_worker);
363 }
364 
365 /**
366  * xe_guc_ct_disable - Set GuC to disabled state
367  * @ct: the &xe_guc_ct
368  *
369  * Set GuC CT to disabled state and stop g2h handler. No outstanding g2h expected
370  * in this transition.
371  */
372 void xe_guc_ct_disable(struct xe_guc_ct *ct)
373 {
374 	xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED);
375 	stop_g2h_handler(ct);
376 }
377 
378 /**
379  * xe_guc_ct_stop - Set GuC to stopped state
380  * @ct: the &xe_guc_ct
381  *
382  * Set GuC CT to stopped state, stop g2h handler, and clear any outstanding g2h
383  */
384 void xe_guc_ct_stop(struct xe_guc_ct *ct)
385 {
386 	xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED);
387 	stop_g2h_handler(ct);
388 }
389 
390 static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
391 {
392 	struct guc_ctb *h2g = &ct->ctbs.h2g;
393 
394 	lockdep_assert_held(&ct->lock);
395 
396 	if (cmd_len > h2g->info.space) {
397 		h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
398 		h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
399 					     h2g->info.size) -
400 				  h2g->info.resv_space;
401 		if (cmd_len > h2g->info.space)
402 			return false;
403 	}
404 
405 	return true;
406 }
407 
408 static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
409 {
410 	if (!g2h_len)
411 		return true;
412 
413 	lockdep_assert_held(&ct->fast_lock);
414 
415 	return ct->ctbs.g2h.info.space > g2h_len;
416 }
417 
418 static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
419 {
420 	lockdep_assert_held(&ct->lock);
421 
422 	if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len))
423 		return -EBUSY;
424 
425 	return 0;
426 }
427 
428 static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
429 {
430 	lockdep_assert_held(&ct->lock);
431 	ct->ctbs.h2g.info.space -= cmd_len;
432 }
433 
434 static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
435 {
436 	xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
437 
438 	if (g2h_len) {
439 		lockdep_assert_held(&ct->fast_lock);
440 
441 		ct->ctbs.g2h.info.space -= g2h_len;
442 		ct->g2h_outstanding += num_g2h;
443 	}
444 }
445 
446 static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
447 {
448 	lockdep_assert_held(&ct->fast_lock);
449 	xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <=
450 		     ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
451 
452 	ct->ctbs.g2h.info.space += g2h_len;
453 	--ct->g2h_outstanding;
454 }
455 
456 static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
457 {
458 	spin_lock_irq(&ct->fast_lock);
459 	__g2h_release_space(ct, g2h_len);
460 	spin_unlock_irq(&ct->fast_lock);
461 }
462 
463 #define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
464 
465 static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
466 		     u32 ct_fence_value, bool want_response)
467 {
468 	struct xe_device *xe = ct_to_xe(ct);
469 	struct xe_gt *gt = ct_to_gt(ct);
470 	struct guc_ctb *h2g = &ct->ctbs.h2g;
471 	u32 cmd[H2G_CT_HEADERS];
472 	u32 tail = h2g->info.tail;
473 	u32 full_len;
474 	struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
475 							 tail * sizeof(u32));
476 
477 	full_len = len + GUC_CTB_HDR_LEN;
478 
479 	lockdep_assert_held(&ct->lock);
480 	xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
481 	xe_gt_assert(gt, tail <= h2g->info.size);
482 
483 	/* Command will wrap, zero fill (NOPs), return and check credits again */
484 	if (tail + full_len > h2g->info.size) {
485 		xe_map_memset(xe, &map, 0, 0,
486 			      (h2g->info.size - tail) * sizeof(u32));
487 		h2g_reserve_space(ct, (h2g->info.size - tail));
488 		h2g->info.tail = 0;
489 		desc_write(xe, h2g, tail, h2g->info.tail);
490 
491 		return -EAGAIN;
492 	}
493 
494 	/*
495 	 * dw0: CT header (including fence)
496 	 * dw1: HXG header (including action code)
497 	 * dw2+: action data
498 	 */
499 	cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
500 		FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
501 		FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
502 	if (want_response) {
503 		cmd[1] =
504 			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
505 			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
506 				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
507 	} else {
508 		cmd[1] =
509 			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) |
510 			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
511 				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
512 	}
513 
514 	/* H2G header in cmd[1] replaces action[0] so: */
515 	--len;
516 	++action;
517 
518 	/* Write H2G ensuring visable before descriptor update */
519 	xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
520 	xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
521 	xe_device_wmb(xe);
522 
523 	/* Update local copies */
524 	h2g->info.tail = (tail + full_len) % h2g->info.size;
525 	h2g_reserve_space(ct, full_len);
526 
527 	/* Update descriptor */
528 	desc_write(xe, h2g, tail, h2g->info.tail);
529 
530 	trace_xe_guc_ctb_h2g(gt->info.id, *(action - 1), full_len,
531 			     desc_read(xe, h2g, head), h2g->info.tail);
532 
533 	return 0;
534 }
535 
536 /*
537  * The CT protocol accepts a 16 bits fence. This field is fully owned by the
538  * driver, the GuC will just copy it to the reply message. Since we need to
539  * be able to distinguish between replies to REQUEST and FAST_REQUEST messages,
540  * we use one bit of the seqno as an indicator for that and a rolling counter
541  * for the remaining 15 bits.
542  */
543 #define CT_SEQNO_MASK GENMASK(14, 0)
544 #define CT_SEQNO_UNTRACKED BIT(15)
545 static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
546 {
547 	u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
548 
549 	if (!is_g2h_fence)
550 		seqno |= CT_SEQNO_UNTRACKED;
551 
552 	return seqno;
553 }
554 
555 static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
556 				u32 len, u32 g2h_len, u32 num_g2h,
557 				struct g2h_fence *g2h_fence)
558 {
559 	struct xe_gt *gt __maybe_unused = ct_to_gt(ct);
560 	u16 seqno;
561 	int ret;
562 
563 	xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
564 	xe_gt_assert(gt, !g2h_len || !g2h_fence);
565 	xe_gt_assert(gt, !num_g2h || !g2h_fence);
566 	xe_gt_assert(gt, !g2h_len || num_g2h);
567 	xe_gt_assert(gt, g2h_len || !num_g2h);
568 	lockdep_assert_held(&ct->lock);
569 
570 	if (unlikely(ct->ctbs.h2g.info.broken)) {
571 		ret = -EPIPE;
572 		goto out;
573 	}
574 
575 	if (ct->state == XE_GUC_CT_STATE_DISABLED) {
576 		ret = -ENODEV;
577 		goto out;
578 	}
579 
580 	if (ct->state == XE_GUC_CT_STATE_STOPPED) {
581 		ret = -ECANCELED;
582 		goto out;
583 	}
584 
585 	xe_gt_assert(gt, xe_guc_ct_enabled(ct));
586 
587 	if (g2h_fence) {
588 		g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
589 		num_g2h = 1;
590 
591 		if (g2h_fence_needs_alloc(g2h_fence)) {
592 			void *ptr;
593 
594 			g2h_fence->seqno = next_ct_seqno(ct, true);
595 			ptr = xa_store(&ct->fence_lookup,
596 				       g2h_fence->seqno,
597 				       g2h_fence, GFP_ATOMIC);
598 			if (IS_ERR(ptr)) {
599 				ret = PTR_ERR(ptr);
600 				goto out;
601 			}
602 		}
603 
604 		seqno = g2h_fence->seqno;
605 	} else {
606 		seqno = next_ct_seqno(ct, false);
607 	}
608 
609 	if (g2h_len)
610 		spin_lock_irq(&ct->fast_lock);
611 retry:
612 	ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
613 	if (unlikely(ret))
614 		goto out_unlock;
615 
616 	ret = h2g_write(ct, action, len, seqno, !!g2h_fence);
617 	if (unlikely(ret)) {
618 		if (ret == -EAGAIN)
619 			goto retry;
620 		goto out_unlock;
621 	}
622 
623 	__g2h_reserve_space(ct, g2h_len, num_g2h);
624 	xe_guc_notify(ct_to_guc(ct));
625 out_unlock:
626 	if (g2h_len)
627 		spin_unlock_irq(&ct->fast_lock);
628 out:
629 	return ret;
630 }
631 
632 static void kick_reset(struct xe_guc_ct *ct)
633 {
634 	xe_gt_reset_async(ct_to_gt(ct));
635 }
636 
637 static int dequeue_one_g2h(struct xe_guc_ct *ct);
638 
639 static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
640 			      u32 g2h_len, u32 num_g2h,
641 			      struct g2h_fence *g2h_fence)
642 {
643 	struct xe_gt *gt = ct_to_gt(ct);
644 	struct drm_printer p = xe_gt_info_printer(gt);
645 	unsigned int sleep_period_ms = 1;
646 	int ret;
647 
648 	xe_gt_assert(gt, !g2h_len || !g2h_fence);
649 	lockdep_assert_held(&ct->lock);
650 	xe_device_assert_mem_access(ct_to_xe(ct));
651 
652 try_again:
653 	ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
654 				   g2h_fence);
655 
656 	/*
657 	 * We wait to try to restore credits for about 1 second before bailing.
658 	 * In the case of H2G credits we have no choice but just to wait for the
659 	 * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In
660 	 * the case of G2H we process any G2H in the channel, hopefully freeing
661 	 * credits as we consume the G2H messages.
662 	 */
663 	if (unlikely(ret == -EBUSY &&
664 		     !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
665 		struct guc_ctb *h2g = &ct->ctbs.h2g;
666 
667 		if (sleep_period_ms == 1024)
668 			goto broken;
669 
670 		trace_xe_guc_ct_h2g_flow_control(h2g->info.head, h2g->info.tail,
671 						 h2g->info.size,
672 						 h2g->info.space,
673 						 len + GUC_CTB_HDR_LEN);
674 		msleep(sleep_period_ms);
675 		sleep_period_ms <<= 1;
676 
677 		goto try_again;
678 	} else if (unlikely(ret == -EBUSY)) {
679 		struct xe_device *xe = ct_to_xe(ct);
680 		struct guc_ctb *g2h = &ct->ctbs.g2h;
681 
682 		trace_xe_guc_ct_g2h_flow_control(g2h->info.head,
683 						 desc_read(xe, g2h, tail),
684 						 g2h->info.size,
685 						 g2h->info.space,
686 						 g2h_fence ?
687 						 GUC_CTB_HXG_MSG_MAX_LEN :
688 						 g2h_len);
689 
690 #define g2h_avail(ct)	\
691 	(desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head)
692 		if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
693 					g2h_avail(ct), HZ))
694 			goto broken;
695 #undef g2h_avail
696 
697 		if (dequeue_one_g2h(ct) < 0)
698 			goto broken;
699 
700 		goto try_again;
701 	}
702 
703 	return ret;
704 
705 broken:
706 	xe_gt_err(gt, "No forward process on H2G, reset required\n");
707 	xe_guc_ct_print(ct, &p, true);
708 	ct->ctbs.h2g.info.broken = true;
709 
710 	return -EDEADLK;
711 }
712 
713 static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
714 		       u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence)
715 {
716 	int ret;
717 
718 	xe_gt_assert(ct_to_gt(ct), !g2h_len || !g2h_fence);
719 
720 	mutex_lock(&ct->lock);
721 	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
722 	mutex_unlock(&ct->lock);
723 
724 	return ret;
725 }
726 
727 int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
728 		   u32 g2h_len, u32 num_g2h)
729 {
730 	int ret;
731 
732 	ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL);
733 	if (ret == -EDEADLK)
734 		kick_reset(ct);
735 
736 	return ret;
737 }
738 
739 int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
740 			  u32 g2h_len, u32 num_g2h)
741 {
742 	int ret;
743 
744 	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
745 	if (ret == -EDEADLK)
746 		kick_reset(ct);
747 
748 	return ret;
749 }
750 
751 int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
752 {
753 	int ret;
754 
755 	lockdep_assert_held(&ct->lock);
756 
757 	ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
758 	if (ret == -EDEADLK)
759 		kick_reset(ct);
760 
761 	return ret;
762 }
763 
764 /*
765  * Check if a GT reset is in progress or will occur and if GT reset brought the
766  * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
767  */
768 static bool retry_failure(struct xe_guc_ct *ct, int ret)
769 {
770 	if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV))
771 		return false;
772 
773 #define ct_alive(ct)	\
774 	(xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
775 	 !ct->ctbs.g2h.info.broken)
776 	if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct),  HZ * 5))
777 		return false;
778 #undef ct_alive
779 
780 	return true;
781 }
782 
783 static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
784 			    u32 *response_buffer, bool no_fail)
785 {
786 	struct xe_gt *gt = ct_to_gt(ct);
787 	struct g2h_fence g2h_fence;
788 	int ret = 0;
789 
790 	/*
791 	 * We use a fence to implement blocking sends / receiving response data.
792 	 * The seqno of the fence is sent in the H2G, returned in the G2H, and
793 	 * an xarray is used as storage media with the seqno being to key.
794 	 * Fields in the fence hold success, failure, retry status and the
795 	 * response data. Safe to allocate on the stack as the xarray is the
796 	 * only reference and it cannot be present after this function exits.
797 	 */
798 retry:
799 	g2h_fence_init(&g2h_fence, response_buffer);
800 retry_same_fence:
801 	ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
802 	if (unlikely(ret == -ENOMEM)) {
803 		void *ptr;
804 
805 		/* Retry allocation /w GFP_KERNEL */
806 		ptr = xa_store(&ct->fence_lookup,
807 			       g2h_fence.seqno,
808 			       &g2h_fence, GFP_KERNEL);
809 		if (IS_ERR(ptr))
810 			return PTR_ERR(ptr);
811 
812 		goto retry_same_fence;
813 	} else if (unlikely(ret)) {
814 		if (ret == -EDEADLK)
815 			kick_reset(ct);
816 
817 		if (no_fail && retry_failure(ct, ret))
818 			goto retry_same_fence;
819 
820 		if (!g2h_fence_needs_alloc(&g2h_fence))
821 			xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
822 
823 		return ret;
824 	}
825 
826 	ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
827 	if (!ret) {
828 		xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
829 			  g2h_fence.seqno, action[0]);
830 		xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
831 		return -ETIME;
832 	}
833 
834 	if (g2h_fence.retry) {
835 		xe_gt_warn(gt, "H2G retry, action 0x%04x, reason %u",
836 			   action[0], g2h_fence.reason);
837 		goto retry;
838 	}
839 	if (g2h_fence.fail) {
840 		xe_gt_err(gt, "H2G send failed, action 0x%04x, error %d, hint %u",
841 			  action[0], g2h_fence.error, g2h_fence.hint);
842 		ret = -EIO;
843 	}
844 
845 	return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
846 }
847 
848 /**
849  * xe_guc_ct_send_recv - Send and receive HXG to the GuC
850  * @ct: the &xe_guc_ct
851  * @action: the dword array with `HXG Request`_ message (can't be NULL)
852  * @len: length of the `HXG Request`_ message (in dwords, can't be 0)
853  * @response_buffer: placeholder for the `HXG Response`_ message (can be NULL)
854  *
855  * Send a `HXG Request`_ message to the GuC over CT communication channel and
856  * blocks until GuC replies with a `HXG Response`_ message.
857  *
858  * For non-blocking communication with GuC use xe_guc_ct_send().
859  *
860  * Note: The size of &response_buffer must be at least GUC_CTB_MAX_DWORDS_.
861  *
862  * Return: response length (in dwords) if &response_buffer was not NULL, or
863  *         DATA0 from `HXG Response`_ if &response_buffer was NULL, or
864  *         a negative error code on failure.
865  */
866 int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
867 			u32 *response_buffer)
868 {
869 	KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer);
870 	return guc_ct_send_recv(ct, action, len, response_buffer, false);
871 }
872 
873 int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
874 				u32 len, u32 *response_buffer)
875 {
876 	return guc_ct_send_recv(ct, action, len, response_buffer, true);
877 }
878 
879 static u32 *msg_to_hxg(u32 *msg)
880 {
881 	return msg + GUC_CTB_MSG_MIN_LEN;
882 }
883 
884 static u32 msg_len_to_hxg_len(u32 len)
885 {
886 	return len - GUC_CTB_MSG_MIN_LEN;
887 }
888 
889 static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
890 {
891 	u32 *hxg = msg_to_hxg(msg);
892 	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
893 
894 	lockdep_assert_held(&ct->lock);
895 
896 	switch (action) {
897 	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
898 	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
899 	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
900 	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
901 		g2h_release_space(ct, len);
902 	}
903 
904 	return 0;
905 }
906 
907 static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
908 {
909 	struct xe_gt *gt =  ct_to_gt(ct);
910 	u32 *hxg = msg_to_hxg(msg);
911 	u32 hxg_len = msg_len_to_hxg_len(len);
912 	u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
913 	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
914 	struct g2h_fence *g2h_fence;
915 
916 	lockdep_assert_held(&ct->lock);
917 
918 	/*
919 	 * Fences for FAST_REQUEST messages are not tracked in ct->fence_lookup.
920 	 * Those messages should never fail, so if we do get an error back it
921 	 * means we're likely doing an illegal operation and the GuC is
922 	 * rejecting it. We have no way to inform the code that submitted the
923 	 * H2G that the message was rejected, so we need to escalate the
924 	 * failure to trigger a reset.
925 	 */
926 	if (fence & CT_SEQNO_UNTRACKED) {
927 		if (type == GUC_HXG_TYPE_RESPONSE_FAILURE)
928 			xe_gt_err(gt, "FAST_REQ H2G fence 0x%x failed! e=0x%x, h=%u\n",
929 				  fence,
930 				  FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]),
931 				  FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]));
932 		else
933 			xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
934 				  type, fence);
935 
936 		return -EPROTO;
937 	}
938 
939 	g2h_fence = xa_erase(&ct->fence_lookup, fence);
940 	if (unlikely(!g2h_fence)) {
941 		/* Don't tear down channel, as send could've timed out */
942 		xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
943 		g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
944 		return 0;
945 	}
946 
947 	xe_gt_assert(gt, fence == g2h_fence->seqno);
948 
949 	if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
950 		g2h_fence->fail = true;
951 		g2h_fence->error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]);
952 		g2h_fence->hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]);
953 	} else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
954 		g2h_fence->retry = true;
955 		g2h_fence->reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, hxg[0]);
956 	} else if (g2h_fence->response_buffer) {
957 		g2h_fence->response_len = hxg_len;
958 		memcpy(g2h_fence->response_buffer, hxg, hxg_len * sizeof(u32));
959 	} else {
960 		g2h_fence->response_data = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, hxg[0]);
961 	}
962 
963 	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
964 
965 	g2h_fence->done = true;
966 	smp_mb();
967 
968 	wake_up_all(&ct->g2h_fence_wq);
969 
970 	return 0;
971 }
972 
973 static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
974 {
975 	struct xe_gt *gt = ct_to_gt(ct);
976 	u32 *hxg = msg_to_hxg(msg);
977 	u32 origin, type;
978 	int ret;
979 
980 	lockdep_assert_held(&ct->lock);
981 
982 	origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]);
983 	if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
984 		xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n",
985 			  origin);
986 		ct->ctbs.g2h.info.broken = true;
987 
988 		return -EPROTO;
989 	}
990 
991 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
992 	switch (type) {
993 	case GUC_HXG_TYPE_EVENT:
994 		ret = parse_g2h_event(ct, msg, len);
995 		break;
996 	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
997 	case GUC_HXG_TYPE_RESPONSE_FAILURE:
998 	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
999 		ret = parse_g2h_response(ct, msg, len);
1000 		break;
1001 	default:
1002 		xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n",
1003 			  type);
1004 		ct->ctbs.g2h.info.broken = true;
1005 
1006 		ret = -EOPNOTSUPP;
1007 	}
1008 
1009 	return ret;
1010 }
1011 
1012 static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
1013 {
1014 	struct xe_guc *guc = ct_to_guc(ct);
1015 	struct xe_gt *gt = ct_to_gt(ct);
1016 	u32 hxg_len = msg_len_to_hxg_len(len);
1017 	u32 *hxg = msg_to_hxg(msg);
1018 	u32 action, adj_len;
1019 	u32 *payload;
1020 	int ret = 0;
1021 
1022 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
1023 		return 0;
1024 
1025 	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
1026 	payload = hxg + GUC_HXG_EVENT_MSG_MIN_LEN;
1027 	adj_len = hxg_len - GUC_HXG_EVENT_MSG_MIN_LEN;
1028 
1029 	switch (action) {
1030 	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
1031 		ret = xe_guc_sched_done_handler(guc, payload, adj_len);
1032 		break;
1033 	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
1034 		ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
1035 		break;
1036 	case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
1037 		ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len);
1038 		break;
1039 	case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
1040 		ret = xe_guc_exec_queue_reset_failure_handler(guc, payload,
1041 							      adj_len);
1042 		break;
1043 	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
1044 		/* Selftest only at the moment */
1045 		break;
1046 	case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
1047 	case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
1048 		/* FIXME: Handle this */
1049 		break;
1050 	case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
1051 		ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload,
1052 								 adj_len);
1053 		break;
1054 	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
1055 		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
1056 		break;
1057 	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
1058 		ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
1059 							   adj_len);
1060 		break;
1061 	case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
1062 		ret = xe_guc_access_counter_notify_handler(guc, payload,
1063 							   adj_len);
1064 		break;
1065 	case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
1066 		ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
1067 		break;
1068 	case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
1069 		ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len);
1070 		break;
1071 	case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY:
1072 		ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len);
1073 		break;
1074 	default:
1075 		xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
1076 	}
1077 
1078 	if (ret)
1079 		xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
1080 			  action, ERR_PTR(ret));
1081 
1082 	return 0;
1083 }
1084 
1085 static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
1086 {
1087 	struct xe_device *xe = ct_to_xe(ct);
1088 	struct xe_gt *gt = ct_to_gt(ct);
1089 	struct guc_ctb *g2h = &ct->ctbs.g2h;
1090 	u32 tail, head, len;
1091 	s32 avail;
1092 	u32 action;
1093 	u32 *hxg;
1094 
1095 	xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
1096 	lockdep_assert_held(&ct->fast_lock);
1097 
1098 	if (ct->state == XE_GUC_CT_STATE_DISABLED)
1099 		return -ENODEV;
1100 
1101 	if (ct->state == XE_GUC_CT_STATE_STOPPED)
1102 		return -ECANCELED;
1103 
1104 	if (g2h->info.broken)
1105 		return -EPIPE;
1106 
1107 	xe_gt_assert(gt, xe_guc_ct_enabled(ct));
1108 
1109 	/* Calculate DW available to read */
1110 	tail = desc_read(xe, g2h, tail);
1111 	avail = tail - g2h->info.head;
1112 	if (unlikely(avail == 0))
1113 		return 0;
1114 
1115 	if (avail < 0)
1116 		avail += g2h->info.size;
1117 
1118 	/* Read header */
1119 	xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->info.head,
1120 			   sizeof(u32));
1121 	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
1122 	if (len > avail) {
1123 		xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n",
1124 			  avail, len);
1125 		g2h->info.broken = true;
1126 
1127 		return -EPROTO;
1128 	}
1129 
1130 	head = (g2h->info.head + 1) % g2h->info.size;
1131 	avail = len - 1;
1132 
1133 	/* Read G2H message */
1134 	if (avail + head > g2h->info.size) {
1135 		u32 avail_til_wrap = g2h->info.size - head;
1136 
1137 		xe_map_memcpy_from(xe, msg + 1,
1138 				   &g2h->cmds, sizeof(u32) * head,
1139 				   avail_til_wrap * sizeof(u32));
1140 		xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap,
1141 				   &g2h->cmds, 0,
1142 				   (avail - avail_til_wrap) * sizeof(u32));
1143 	} else {
1144 		xe_map_memcpy_from(xe, msg + 1,
1145 				   &g2h->cmds, sizeof(u32) * head,
1146 				   avail * sizeof(u32));
1147 	}
1148 
1149 	hxg = msg_to_hxg(msg);
1150 	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
1151 
1152 	if (fast_path) {
1153 		if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
1154 			return 0;
1155 
1156 		switch (action) {
1157 		case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
1158 		case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
1159 			break;	/* Process these in fast-path */
1160 		default:
1161 			return 0;
1162 		}
1163 	}
1164 
1165 	/* Update local / descriptor header */
1166 	g2h->info.head = (head + avail) % g2h->info.size;
1167 	desc_write(xe, g2h, head, g2h->info.head);
1168 
1169 	trace_xe_guc_ctb_g2h(ct_to_gt(ct)->info.id, action, len,
1170 			     g2h->info.head, tail);
1171 
1172 	return len;
1173 }
1174 
1175 static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
1176 {
1177 	struct xe_gt *gt = ct_to_gt(ct);
1178 	struct xe_guc *guc = ct_to_guc(ct);
1179 	u32 hxg_len = msg_len_to_hxg_len(len);
1180 	u32 *hxg = msg_to_hxg(msg);
1181 	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
1182 	u32 *payload = hxg + GUC_HXG_MSG_MIN_LEN;
1183 	u32 adj_len = hxg_len - GUC_HXG_MSG_MIN_LEN;
1184 	int ret = 0;
1185 
1186 	switch (action) {
1187 	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
1188 		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
1189 		break;
1190 	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
1191 		__g2h_release_space(ct, len);
1192 		ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
1193 							   adj_len);
1194 		break;
1195 	default:
1196 		xe_gt_warn(gt, "NOT_POSSIBLE");
1197 	}
1198 
1199 	if (ret)
1200 		xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
1201 			  action, ERR_PTR(ret));
1202 }
1203 
1204 /**
1205  * xe_guc_ct_fast_path - process critical G2H in the IRQ handler
1206  * @ct: GuC CT object
1207  *
1208  * Anything related to page faults is critical for performance, process these
1209  * critical G2H in the IRQ. This is safe as these handlers either just wake up
1210  * waiters or queue another worker.
1211  */
1212 void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
1213 {
1214 	struct xe_device *xe = ct_to_xe(ct);
1215 	bool ongoing;
1216 	int len;
1217 
1218 	ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
1219 	if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
1220 		return;
1221 
1222 	spin_lock(&ct->fast_lock);
1223 	do {
1224 		len = g2h_read(ct, ct->fast_msg, true);
1225 		if (len > 0)
1226 			g2h_fast_path(ct, ct->fast_msg, len);
1227 	} while (len > 0);
1228 	spin_unlock(&ct->fast_lock);
1229 
1230 	if (ongoing)
1231 		xe_pm_runtime_put(xe);
1232 }
1233 
1234 /* Returns less than zero on error, 0 on done, 1 on more available */
1235 static int dequeue_one_g2h(struct xe_guc_ct *ct)
1236 {
1237 	int len;
1238 	int ret;
1239 
1240 	lockdep_assert_held(&ct->lock);
1241 
1242 	spin_lock_irq(&ct->fast_lock);
1243 	len = g2h_read(ct, ct->msg, false);
1244 	spin_unlock_irq(&ct->fast_lock);
1245 	if (len <= 0)
1246 		return len;
1247 
1248 	ret = parse_g2h_msg(ct, ct->msg, len);
1249 	if (unlikely(ret < 0))
1250 		return ret;
1251 
1252 	ret = process_g2h_msg(ct, ct->msg, len);
1253 	if (unlikely(ret < 0))
1254 		return ret;
1255 
1256 	return 1;
1257 }
1258 
1259 static void g2h_worker_func(struct work_struct *w)
1260 {
1261 	struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
1262 	struct xe_gt *gt = ct_to_gt(ct);
1263 	bool ongoing;
1264 	int ret;
1265 
1266 	/*
1267 	 * Normal users must always hold mem_access.ref around CT calls. However
1268 	 * during the runtime pm callbacks we rely on CT to talk to the GuC, but
1269 	 * at this stage we can't rely on mem_access.ref and even the
1270 	 * callback_task will be different than current.  For such cases we just
1271 	 * need to ensure we always process the responses from any blocking
1272 	 * ct_send requests or where we otherwise expect some response when
1273 	 * initiated from those callbacks (which will need to wait for the below
1274 	 * dequeue_one_g2h()).  The dequeue_one_g2h() will gracefully fail if
1275 	 * the device has suspended to the point that the CT communication has
1276 	 * been disabled.
1277 	 *
1278 	 * If we are inside the runtime pm callback, we can be the only task
1279 	 * still issuing CT requests (since that requires having the
1280 	 * mem_access.ref).  It seems like it might in theory be possible to
1281 	 * receive unsolicited events from the GuC just as we are
1282 	 * suspending-resuming, but those will currently anyway be lost when
1283 	 * eventually exiting from suspend, hence no need to wake up the device
1284 	 * here. If we ever need something stronger than get_if_ongoing() then
1285 	 * we need to be careful with blocking the pm callbacks from getting CT
1286 	 * responses, if the worker here is blocked on those callbacks
1287 	 * completing, creating a deadlock.
1288 	 */
1289 	ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
1290 	if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
1291 		return;
1292 
1293 	do {
1294 		mutex_lock(&ct->lock);
1295 		ret = dequeue_one_g2h(ct);
1296 		mutex_unlock(&ct->lock);
1297 
1298 		if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
1299 			struct drm_printer p = xe_gt_info_printer(gt);
1300 
1301 			xe_guc_ct_print(ct, &p, false);
1302 			kick_reset(ct);
1303 		}
1304 	} while (ret == 1);
1305 
1306 	if (ongoing)
1307 		xe_pm_runtime_put(ct_to_xe(ct));
1308 }
1309 
1310 static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
1311 				     struct guc_ctb_snapshot *snapshot,
1312 				     bool atomic)
1313 {
1314 	u32 head, tail;
1315 
1316 	xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
1317 			   sizeof(struct guc_ct_buffer_desc));
1318 	memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
1319 
1320 	snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
1321 				       atomic ? GFP_ATOMIC : GFP_KERNEL);
1322 
1323 	if (!snapshot->cmds) {
1324 		drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n");
1325 		return;
1326 	}
1327 
1328 	head = snapshot->desc.head;
1329 	tail = snapshot->desc.tail;
1330 
1331 	if (head != tail) {
1332 		struct iosys_map map =
1333 			IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
1334 
1335 		while (head != tail) {
1336 			snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
1337 			++head;
1338 			if (head == ctb->info.size) {
1339 				head = 0;
1340 				map = ctb->cmds;
1341 			} else {
1342 				iosys_map_incr(&map, sizeof(u32));
1343 			}
1344 		}
1345 	}
1346 }
1347 
1348 static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
1349 				   struct drm_printer *p)
1350 {
1351 	u32 head, tail;
1352 
1353 	drm_printf(p, "\tsize: %d\n", snapshot->info.size);
1354 	drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
1355 	drm_printf(p, "\thead: %d\n", snapshot->info.head);
1356 	drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
1357 	drm_printf(p, "\tspace: %d\n", snapshot->info.space);
1358 	drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
1359 	drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
1360 	drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
1361 	drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
1362 
1363 	if (!snapshot->cmds)
1364 		return;
1365 
1366 	head = snapshot->desc.head;
1367 	tail = snapshot->desc.tail;
1368 
1369 	while (head != tail) {
1370 		drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
1371 			   snapshot->cmds[head]);
1372 		++head;
1373 		if (head == snapshot->info.size)
1374 			head = 0;
1375 	}
1376 }
1377 
1378 static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
1379 {
1380 	kfree(snapshot->cmds);
1381 }
1382 
1383 /**
1384  * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
1385  * @ct: GuC CT object.
1386  * @atomic: Boolean to indicate if this is called from atomic context like
1387  * reset or CTB handler or from some regular path like debugfs.
1388  *
1389  * This can be printed out in a later stage like during dev_coredump
1390  * analysis.
1391  *
1392  * Returns: a GuC CT snapshot object that must be freed by the caller
1393  * by using `xe_guc_ct_snapshot_free`.
1394  */
1395 struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
1396 						      bool atomic)
1397 {
1398 	struct xe_device *xe = ct_to_xe(ct);
1399 	struct xe_guc_ct_snapshot *snapshot;
1400 
1401 	snapshot = kzalloc(sizeof(*snapshot),
1402 			   atomic ? GFP_ATOMIC : GFP_KERNEL);
1403 
1404 	if (!snapshot) {
1405 		drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n");
1406 		return NULL;
1407 	}
1408 
1409 	if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
1410 		snapshot->ct_enabled = true;
1411 		snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
1412 		guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
1413 					 &snapshot->h2g, atomic);
1414 		guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
1415 					 &snapshot->g2h, atomic);
1416 	}
1417 
1418 	return snapshot;
1419 }
1420 
1421 /**
1422  * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
1423  * @snapshot: GuC CT snapshot object.
1424  * @p: drm_printer where it will be printed out.
1425  *
1426  * This function prints out a given GuC CT snapshot object.
1427  */
1428 void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
1429 			      struct drm_printer *p)
1430 {
1431 	if (!snapshot)
1432 		return;
1433 
1434 	if (snapshot->ct_enabled) {
1435 		drm_puts(p, "H2G CTB (all sizes in DW):\n");
1436 		guc_ctb_snapshot_print(&snapshot->h2g, p);
1437 
1438 		drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
1439 		guc_ctb_snapshot_print(&snapshot->g2h, p);
1440 
1441 		drm_printf(p, "\tg2h outstanding: %d\n",
1442 			   snapshot->g2h_outstanding);
1443 	} else {
1444 		drm_puts(p, "CT disabled\n");
1445 	}
1446 }
1447 
1448 /**
1449  * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
1450  * @snapshot: GuC CT snapshot object.
1451  *
1452  * This function free all the memory that needed to be allocated at capture
1453  * time.
1454  */
1455 void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
1456 {
1457 	if (!snapshot)
1458 		return;
1459 
1460 	guc_ctb_snapshot_free(&snapshot->h2g);
1461 	guc_ctb_snapshot_free(&snapshot->g2h);
1462 	kfree(snapshot);
1463 }
1464 
1465 /**
1466  * xe_guc_ct_print - GuC CT Print.
1467  * @ct: GuC CT.
1468  * @p: drm_printer where it will be printed out.
1469  * @atomic: Boolean to indicate if this is called from atomic context like
1470  * reset or CTB handler or from some regular path like debugfs.
1471  *
1472  * This function quickly capture a snapshot and immediately print it out.
1473  */
1474 void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic)
1475 {
1476 	struct xe_guc_ct_snapshot *snapshot;
1477 
1478 	snapshot = xe_guc_ct_snapshot_capture(ct, atomic);
1479 	xe_guc_ct_snapshot_print(snapshot, p);
1480 	xe_guc_ct_snapshot_free(snapshot);
1481 }
1482