xref: /linux/drivers/gpu/drm/xe/xe_guc_capture.c (revision 643e2e259c2b25a2af0ae4c23c6e16586d9fd19c)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021-2024 Intel Corporation
4  */
5 
6 #include <linux/types.h>
7 
8 #include <drm/drm_managed.h>
9 #include <drm/drm_print.h>
10 
11 #include "abi/guc_actions_abi.h"
12 #include "abi/guc_capture_abi.h"
13 #include "abi/guc_log_abi.h"
14 #include "regs/xe_engine_regs.h"
15 #include "regs/xe_gt_regs.h"
16 #include "regs/xe_guc_regs.h"
17 #include "regs/xe_regs.h"
18 
19 #include "xe_bo.h"
20 #include "xe_device.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_mcr.h"
24 #include "xe_gt_printk.h"
25 #include "xe_guc.h"
26 #include "xe_guc_ads.h"
27 #include "xe_guc_capture.h"
28 #include "xe_guc_capture_types.h"
29 #include "xe_guc_ct.h"
30 #include "xe_guc_exec_queue_types.h"
31 #include "xe_guc_log.h"
32 #include "xe_guc_submit_types.h"
33 #include "xe_guc_submit.h"
34 #include "xe_hw_engine_types.h"
35 #include "xe_hw_engine.h"
36 #include "xe_lrc.h"
37 #include "xe_macros.h"
38 #include "xe_map.h"
39 #include "xe_mmio.h"
40 #include "xe_sched_job.h"
41 
42 /*
43  * struct __guc_capture_bufstate
44  *
45  * Book-keeping structure used to track read and write pointers
46  * as we extract error capture data from the GuC-log-buffer's
47  * error-capture region as a stream of dwords.
48  */
49 struct __guc_capture_bufstate {
50 	u32 size;
51 	u32 data_offset;
52 	u32 rd;
53 	u32 wr;
54 };
55 
56 /*
57  * struct __guc_capture_parsed_output - extracted error capture node
58  *
59  * A single unit of extracted error-capture output data grouped together
60  * at an engine-instance level. We keep these nodes in a linked list.
61  * See cachelist and outlist below.
62  */
63 struct __guc_capture_parsed_output {
64 	/*
65 	 * A single set of 3 capture lists: a global-list
66 	 * an engine-class-list and an engine-instance list.
67 	 * outlist in __guc_capture_parsed_output will keep
68 	 * a linked list of these nodes that will eventually
69 	 * be detached from outlist and attached into to
70 	 * xe_codedump in response to a context reset
71 	 */
72 	struct list_head link;
73 	bool is_partial;
74 	u32 eng_class;
75 	u32 eng_inst;
76 	u32 guc_id;
77 	u32 lrca;
78 	u32 type;
79 	bool locked;
80 	enum xe_hw_engine_snapshot_source_id source;
81 	struct gcap_reg_list_info {
82 		u32 vfid;
83 		u32 num_regs;
84 		struct guc_mmio_reg *regs;
85 	} reginfo[GUC_STATE_CAPTURE_TYPE_MAX];
86 #define GCAP_PARSED_REGLIST_INDEX_GLOBAL   BIT(GUC_STATE_CAPTURE_TYPE_GLOBAL)
87 #define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS)
88 };
89 
90 /*
91  * Define all device tables of GuC error capture register lists
92  * NOTE:
93  *     For engine-registers, GuC only needs the register offsets
94  *     from the engine-mmio-base
95  *
96  *     64 bit registers need 2 entries for low 32 bit register and high 32 bit
97  *     register, for example:
98  *       Register           data_type       flags   mask    Register name
99  *     { XXX_REG_LO(0),  REG_64BIT_LOW_DW,    0,      0,      NULL},
100  *     { XXX_REG_HI(0),  REG_64BIT_HI_DW,,    0,      0,      "XXX_REG"},
101  *     1. data_type: Indicate is hi/low 32 bit for a 64 bit register
102  *                   A 64 bit register define requires 2 consecutive entries,
103  *                   with low dword first and hi dword the second.
104  *     2. Register name: null for incompleted define
105  *     3. Incorrect order will trigger XE_WARN.
106  */
107 #define COMMON_XELP_BASE_GLOBAL \
108 	{ FORCEWAKE_GT,			REG_32BIT,	0,	0,	"FORCEWAKE_GT"}
109 
110 #define COMMON_BASE_ENGINE_INSTANCE \
111 	{ RING_HWSTAM(0),		REG_32BIT,	0,	0,	"HWSTAM"}, \
112 	{ RING_HWS_PGA(0),		REG_32BIT,	0,	0,	"RING_HWS_PGA"}, \
113 	{ RING_HEAD(0),			REG_32BIT,	0,	0,	"RING_HEAD"}, \
114 	{ RING_TAIL(0),			REG_32BIT,	0,	0,	"RING_TAIL"}, \
115 	{ RING_CTL(0),			REG_32BIT,	0,	0,	"RING_CTL"}, \
116 	{ RING_MI_MODE(0),		REG_32BIT,	0,	0,	"RING_MI_MODE"}, \
117 	{ RING_MODE(0),			REG_32BIT,	0,	0,	"RING_MODE"}, \
118 	{ RING_ESR(0),			REG_32BIT,	0,	0,	"RING_ESR"}, \
119 	{ RING_EMR(0),			REG_32BIT,	0,	0,	"RING_EMR"}, \
120 	{ RING_EIR(0),			REG_32BIT,	0,	0,	"RING_EIR"}, \
121 	{ RING_IMR(0),			REG_32BIT,	0,	0,	"RING_IMR"}, \
122 	{ RING_IPEHR(0),		REG_32BIT,	0,	0,	"IPEHR"}, \
123 	{ RING_INSTDONE(0),		REG_32BIT,	0,	0,	"RING_INSTDONE"}, \
124 	{ INDIRECT_RING_STATE(0),	REG_32BIT,	0,	0,	"INDIRECT_RING_STATE"}, \
125 	{ RING_ACTHD(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
126 	{ RING_ACTHD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"ACTHD"}, \
127 	{ RING_BBADDR(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
128 	{ RING_BBADDR_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"RING_BBADDR"}, \
129 	{ RING_START(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
130 	{ RING_START_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"RING_START"}, \
131 	{ RING_DMA_FADD(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
132 	{ RING_DMA_FADD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"RING_DMA_FADD"}, \
133 	{ RING_EXECLIST_STATUS_LO(0),	REG_64BIT_LOW_DW, 0,	0,	NULL}, \
134 	{ RING_EXECLIST_STATUS_HI(0),	REG_64BIT_HI_DW, 0,	0,	"RING_EXECLIST_STATUS"}, \
135 	{ RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0,	0,	NULL}, \
136 	{ RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0,	0,	"RING_EXECLIST_SQ_CONTENTS"}
137 
138 #define COMMON_XELP_RC_CLASS \
139 	{ RCU_MODE,			REG_32BIT,	0,	0,	"RCU_MODE"}
140 
141 #define COMMON_XELP_RC_CLASS_INSTDONE \
142 	{ SC_INSTDONE,			REG_32BIT,	0,	0,	"SC_INSTDONE"}, \
143 	{ SC_INSTDONE_EXTRA,		REG_32BIT,	0,	0,	"SC_INSTDONE_EXTRA"}, \
144 	{ SC_INSTDONE_EXTRA2,		REG_32BIT,	0,	0,	"SC_INSTDONE_EXTRA2"}
145 
146 #define XELP_VEC_CLASS_REGS \
147 	{ SFC_DONE(0),			0,	0,	0,	"SFC_DONE[0]"}, \
148 	{ SFC_DONE(1),			0,	0,	0,	"SFC_DONE[1]"}, \
149 	{ SFC_DONE(2),			0,	0,	0,	"SFC_DONE[2]"}, \
150 	{ SFC_DONE(3),			0,	0,	0,	"SFC_DONE[3]"}
151 
152 /* XE_LP Global */
153 static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
154 	COMMON_XELP_BASE_GLOBAL,
155 };
156 
157 /* Render / Compute Per-Engine-Instance */
158 static const struct __guc_mmio_reg_descr xe_rc_inst_regs[] = {
159 	COMMON_BASE_ENGINE_INSTANCE,
160 };
161 
162 /* Render / Compute Engine-Class */
163 static const struct __guc_mmio_reg_descr xe_rc_class_regs[] = {
164 	COMMON_XELP_RC_CLASS,
165 	COMMON_XELP_RC_CLASS_INSTDONE,
166 };
167 
168 /* Render / Compute Engine-Class for xehpg */
169 static const struct __guc_mmio_reg_descr xe_hpg_rc_class_regs[] = {
170 	COMMON_XELP_RC_CLASS,
171 };
172 
173 /* Media Decode/Encode Per-Engine-Instance */
174 static const struct __guc_mmio_reg_descr xe_vd_inst_regs[] = {
175 	COMMON_BASE_ENGINE_INSTANCE,
176 };
177 
178 /* Video Enhancement Engine-Class */
179 static const struct __guc_mmio_reg_descr xe_vec_class_regs[] = {
180 	XELP_VEC_CLASS_REGS,
181 };
182 
183 /* Video Enhancement Per-Engine-Instance */
184 static const struct __guc_mmio_reg_descr xe_vec_inst_regs[] = {
185 	COMMON_BASE_ENGINE_INSTANCE,
186 };
187 
188 /* Blitter Per-Engine-Instance */
189 static const struct __guc_mmio_reg_descr xe_blt_inst_regs[] = {
190 	COMMON_BASE_ENGINE_INSTANCE,
191 };
192 
193 /* XE_LP - GSC Per-Engine-Instance */
194 static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = {
195 	COMMON_BASE_ENGINE_INSTANCE,
196 };
197 
198 /*
199  * Empty list to prevent warnings about unknown class/instance types
200  * as not all class/instance types have entries on all platforms.
201  */
202 static const struct __guc_mmio_reg_descr empty_regs_list[] = {
203 };
204 
205 #define TO_GCAP_DEF_OWNER(x) (GUC_CAPTURE_LIST_INDEX_##x)
206 #define TO_GCAP_DEF_TYPE(x) (GUC_STATE_CAPTURE_TYPE_##x)
207 #define MAKE_REGLIST(regslist, regsowner, regstype, class) \
208 	{ \
209 		regslist, \
210 		ARRAY_SIZE(regslist), \
211 		TO_GCAP_DEF_OWNER(regsowner), \
212 		TO_GCAP_DEF_TYPE(regstype), \
213 		class \
214 	}
215 
216 /* List of lists for legacy graphic product version < 1255 */
217 static const struct __guc_mmio_reg_descr_group xe_lp_lists[] = {
218 	MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
219 	MAKE_REGLIST(xe_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
220 	MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
221 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
222 	MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
223 	MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
224 	MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
225 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
226 	MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
227 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
228 	MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
229 	{}
230 };
231 
232  /* List of lists for graphic product version >= 1255 */
233 static const struct __guc_mmio_reg_descr_group xe_hpg_lists[] = {
234 	MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
235 	MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
236 	MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
237 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
238 	MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
239 	MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
240 	MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
241 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
242 	MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
243 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
244 	MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
245 	{}
246 };
247 
248 static const char * const capture_list_type_names[] = {
249 	"Global",
250 	"Class",
251 	"Instance",
252 };
253 
254 static const char * const capture_engine_class_names[] = {
255 	"Render/Compute",
256 	"Video",
257 	"VideoEnhance",
258 	"Blitter",
259 	"GSC-Other",
260 };
261 
262 struct __guc_capture_ads_cache {
263 	bool is_valid;
264 	void *ptr;
265 	size_t size;
266 	int status;
267 };
268 
269 struct xe_guc_state_capture {
270 	const struct __guc_mmio_reg_descr_group *reglists;
271 	/**
272 	 * NOTE: steered registers have multiple instances depending on the HW configuration
273 	 * (slices or dual-sub-slices) and thus depends on HW fuses discovered
274 	 */
275 	struct __guc_mmio_reg_descr_group *extlists;
276 	struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
277 						[GUC_STATE_CAPTURE_TYPE_MAX]
278 						[GUC_CAPTURE_LIST_CLASS_MAX];
279 	void *ads_null_cache;
280 	struct list_head cachelist;
281 #define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
282 #define PREALLOC_NODES_DEFAULT_NUMREGS 64
283 
284 	int max_mmio_per_node;
285 	struct list_head outlist;
286 };
287 
288 static void
289 guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
290 					   struct __guc_capture_parsed_output *node);
291 
292 static const struct __guc_mmio_reg_descr_group *
293 guc_capture_get_device_reglist(struct xe_device *xe)
294 {
295 	if (GRAPHICS_VERx100(xe) >= 1255)
296 		return xe_hpg_lists;
297 	else
298 		return xe_lp_lists;
299 }
300 
301 static const struct __guc_mmio_reg_descr_group *
302 guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists,
303 			 u32 owner, u32 type, enum guc_capture_list_class_type capture_class)
304 {
305 	int i;
306 
307 	if (!reglists)
308 		return NULL;
309 
310 	for (i = 0; reglists[i].list; ++i) {
311 		if (reglists[i].owner == owner && reglists[i].type == type &&
312 		    (reglists[i].engine == capture_class ||
313 		     reglists[i].type == GUC_STATE_CAPTURE_TYPE_GLOBAL))
314 			return &reglists[i];
315 	}
316 
317 	return NULL;
318 }
319 
320 const struct __guc_mmio_reg_descr_group *
321 xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
322 				 enum guc_capture_list_class_type capture_class, bool is_ext)
323 {
324 	const struct __guc_mmio_reg_descr_group *reglists;
325 
326 	if (is_ext) {
327 		struct xe_guc *guc = &gt->uc.guc;
328 
329 		reglists = guc->capture->extlists;
330 	} else {
331 		reglists = guc_capture_get_device_reglist(gt_to_xe(gt));
332 	}
333 	return guc_capture_get_one_list(reglists, owner, type, capture_class);
334 }
335 
336 struct __ext_steer_reg {
337 	const char *name;
338 	struct xe_reg_mcr reg;
339 };
340 
341 static const struct __ext_steer_reg xe_extregs[] = {
342 	{"SAMPLER_INSTDONE",		SAMPLER_INSTDONE},
343 	{"ROW_INSTDONE",		ROW_INSTDONE}
344 };
345 
346 static const struct __ext_steer_reg xehpg_extregs[] = {
347 	{"SC_INSTDONE",			XEHPG_SC_INSTDONE},
348 	{"SC_INSTDONE_EXTRA",		XEHPG_SC_INSTDONE_EXTRA},
349 	{"SC_INSTDONE_EXTRA2",		XEHPG_SC_INSTDONE_EXTRA2},
350 	{"INSTDONE_GEOM_SVGUNIT",	XEHPG_INSTDONE_GEOM_SVGUNIT}
351 };
352 
353 static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
354 			   const struct __ext_steer_reg *extlist,
355 			   int slice_id, int subslice_id)
356 {
357 	if (!ext || !extlist)
358 		return;
359 
360 	ext->reg = XE_REG(extlist->reg.__reg.addr);
361 	ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
362 	ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
363 	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
364 	ext->regname = extlist->name;
365 }
366 
367 static int
368 __alloc_ext_regs(struct drm_device *drm, struct __guc_mmio_reg_descr_group *newlist,
369 		 const struct __guc_mmio_reg_descr_group *rootlist, int num_regs)
370 {
371 	struct __guc_mmio_reg_descr *list;
372 
373 	list = drmm_kzalloc(drm, num_regs * sizeof(struct __guc_mmio_reg_descr), GFP_KERNEL);
374 	if (!list)
375 		return -ENOMEM;
376 
377 	newlist->list = list;
378 	newlist->num_regs = num_regs;
379 	newlist->owner = rootlist->owner;
380 	newlist->engine = rootlist->engine;
381 	newlist->type = rootlist->type;
382 
383 	return 0;
384 }
385 
386 static int guc_capture_get_steer_reg_num(struct xe_device *xe)
387 {
388 	int num = ARRAY_SIZE(xe_extregs);
389 
390 	if (GRAPHICS_VERx100(xe) >= 1255)
391 		num += ARRAY_SIZE(xehpg_extregs);
392 
393 	return num;
394 }
395 
396 static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
397 {
398 	struct xe_gt *gt = guc_to_gt(guc);
399 	u16 slice, subslice;
400 	int iter, i, total = 0;
401 	const struct __guc_mmio_reg_descr_group *lists = guc->capture->reglists;
402 	const struct __guc_mmio_reg_descr_group *list;
403 	struct __guc_mmio_reg_descr_group *extlists;
404 	struct __guc_mmio_reg_descr *extarray;
405 	bool has_xehpg_extregs = GRAPHICS_VERx100(gt_to_xe(gt)) >= 1255;
406 	struct drm_device *drm = &gt_to_xe(gt)->drm;
407 	bool has_rcs_ccs = false;
408 	struct xe_hw_engine *hwe;
409 	enum xe_hw_engine_id id;
410 
411 	/*
412 	 * If GT has no rcs/ccs, no need to alloc steered list.
413 	 * Currently, only rcs/ccs has steering register, if in the future,
414 	 * other engine types has steering register, this condition check need
415 	 * to be extended
416 	 */
417 	for_each_hw_engine(hwe, gt, id) {
418 		if (xe_engine_class_to_guc_capture_class(hwe->class) ==
419 		    GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
420 			has_rcs_ccs = true;
421 			break;
422 		}
423 	}
424 
425 	if (!has_rcs_ccs)
426 		return;
427 
428 	/* steered registers currently only exist for the render-class */
429 	list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
430 					GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
431 					GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE);
432 	/*
433 	 * Skip if this platform has no engine class registers or if extlists
434 	 * was previously allocated
435 	 */
436 	if (!list || guc->capture->extlists)
437 		return;
438 
439 	total = bitmap_weight(gt->fuse_topo.g_dss_mask, sizeof(gt->fuse_topo.g_dss_mask) * 8) *
440 		guc_capture_get_steer_reg_num(guc_to_xe(guc));
441 
442 	if (!total)
443 		return;
444 
445 	/* allocate an extra for an end marker */
446 	extlists = drmm_kzalloc(drm, 2 * sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
447 	if (!extlists)
448 		return;
449 
450 	if (__alloc_ext_regs(drm, &extlists[0], list, total)) {
451 		drmm_kfree(drm, extlists);
452 		return;
453 	}
454 
455 	/* For steering registers, the list is generated at run-time */
456 	extarray = (struct __guc_mmio_reg_descr *)extlists[0].list;
457 	for_each_dss_steering(iter, gt, slice, subslice) {
458 		for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
459 			__fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
460 			++extarray;
461 		}
462 
463 		if (has_xehpg_extregs)
464 			for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
465 				__fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice);
466 				++extarray;
467 			}
468 	}
469 
470 	extlists[0].num_regs = total;
471 
472 	xe_gt_dbg(guc_to_gt(guc), "capture found %d ext-regs.\n", total);
473 	guc->capture->extlists = extlists;
474 }
475 
476 static int
477 guc_capture_list_init(struct xe_guc *guc, u32 owner, u32 type,
478 		      enum guc_capture_list_class_type capture_class, struct guc_mmio_reg *ptr,
479 		      u16 num_entries)
480 {
481 	u32 ptr_idx = 0, list_idx = 0;
482 	const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
483 	struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
484 	const struct __guc_mmio_reg_descr_group *match;
485 	u32 list_num;
486 
487 	if (!reglists)
488 		return -ENODEV;
489 
490 	match = guc_capture_get_one_list(reglists, owner, type, capture_class);
491 	if (!match)
492 		return -ENODATA;
493 
494 	list_num = match->num_regs;
495 	for (list_idx = 0; ptr_idx < num_entries && list_idx < list_num; ++list_idx, ++ptr_idx) {
496 		ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
497 		ptr[ptr_idx].value = 0xDEADF00D;
498 		ptr[ptr_idx].flags = match->list[list_idx].flags;
499 		ptr[ptr_idx].mask = match->list[list_idx].mask;
500 	}
501 
502 	match = guc_capture_get_one_list(extlists, owner, type, capture_class);
503 	if (match)
504 		for (ptr_idx = list_num, list_idx = 0;
505 		     ptr_idx < num_entries && list_idx < match->num_regs;
506 		     ++ptr_idx, ++list_idx) {
507 			ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
508 			ptr[ptr_idx].value = 0xDEADF00D;
509 			ptr[ptr_idx].flags = match->list[list_idx].flags;
510 			ptr[ptr_idx].mask = match->list[list_idx].mask;
511 		}
512 
513 	if (ptr_idx < num_entries)
514 		xe_gt_dbg(guc_to_gt(guc), "Got short capture reglist init: %d out-of %d.\n",
515 			  ptr_idx, num_entries);
516 
517 	return 0;
518 }
519 
520 static int
521 guc_cap_list_num_regs(struct xe_guc *guc, u32 owner, u32 type,
522 		      enum guc_capture_list_class_type capture_class)
523 {
524 	const struct __guc_mmio_reg_descr_group *match;
525 	int num_regs = 0;
526 
527 	match = guc_capture_get_one_list(guc->capture->reglists, owner, type, capture_class);
528 	if (match)
529 		num_regs = match->num_regs;
530 
531 	match = guc_capture_get_one_list(guc->capture->extlists, owner, type, capture_class);
532 	if (match)
533 		num_regs += match->num_regs;
534 	else
535 		/*
536 		 * If a caller wants the full register dump size but we have
537 		 * not yet got the hw-config, which is before max_mmio_per_node
538 		 * is initialized, then provide a worst-case number for
539 		 * extlists based on max dss fuse bits, but only ever for
540 		 * render/compute
541 		 */
542 		if (owner == GUC_CAPTURE_LIST_INDEX_PF &&
543 		    type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
544 		    capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE &&
545 		    !guc->capture->max_mmio_per_node)
546 			num_regs += guc_capture_get_steer_reg_num(guc_to_xe(guc)) *
547 				    XE_MAX_DSS_FUSE_BITS;
548 
549 	return num_regs;
550 }
551 
552 static int
553 guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
554 			enum guc_capture_list_class_type capture_class,
555 			size_t *size, bool is_purpose_est)
556 {
557 	struct xe_guc_state_capture *gc = guc->capture;
558 	struct xe_gt *gt = guc_to_gt(guc);
559 	struct __guc_capture_ads_cache *cache;
560 	int num_regs;
561 
562 	xe_gt_assert(gt, type < GUC_STATE_CAPTURE_TYPE_MAX);
563 	xe_gt_assert(gt, capture_class < GUC_CAPTURE_LIST_CLASS_MAX);
564 
565 	cache = &gc->ads_cache[owner][type][capture_class];
566 	if (!gc->reglists) {
567 		xe_gt_warn(gt, "No capture reglist for this device\n");
568 		return -ENODEV;
569 	}
570 
571 	if (cache->is_valid) {
572 		*size = cache->size;
573 		return cache->status;
574 	}
575 
576 	if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
577 	    !guc_capture_get_one_list(gc->reglists, owner, type, capture_class)) {
578 		if (type == GUC_STATE_CAPTURE_TYPE_GLOBAL)
579 			xe_gt_warn(gt, "Missing capture reglist: global!\n");
580 		else
581 			xe_gt_warn(gt, "Missing capture reglist: %s(%u):%s(%u)!\n",
582 				   capture_list_type_names[type], type,
583 				   capture_engine_class_names[capture_class], capture_class);
584 		return -ENODEV;
585 	}
586 
587 	num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
588 	/* intentional empty lists can exist depending on hw config */
589 	if (!num_regs)
590 		return -ENODATA;
591 
592 	if (size)
593 		*size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
594 				   (num_regs * sizeof(struct guc_mmio_reg)));
595 
596 	return 0;
597 }
598 
599 /**
600  * xe_guc_capture_getlistsize - Get list size for owner/type/class combination
601  * @guc: The GuC object
602  * @owner: PF/VF owner
603  * @type: GuC capture register type
604  * @capture_class: GuC capture engine class id
605  * @size: Point to the size
606  *
607  * This function will get the list for the owner/type/class combination, and
608  * return the page aligned list size.
609  *
610  * Returns: 0 on success or a negative error code on failure.
611  */
612 int
613 xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
614 			   enum guc_capture_list_class_type capture_class, size_t *size)
615 {
616 	return guc_capture_getlistsize(guc, owner, type, capture_class, size, false);
617 }
618 
619 /**
620  * xe_guc_capture_getlist - Get register capture list for owner/type/class
621  * combination
622  * @guc:	The GuC object
623  * @owner:	PF/VF owner
624  * @type:	GuC capture register type
625  * @capture_class:	GuC capture engine class id
626  * @outptr:	Point to cached register capture list
627  *
628  * This function will get the register capture list for the owner/type/class
629  * combination.
630  *
631  * Returns: 0 on success or a negative error code on failure.
632  */
633 int
634 xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type,
635 		       enum guc_capture_list_class_type capture_class, void **outptr)
636 {
637 	struct xe_guc_state_capture *gc = guc->capture;
638 	struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][capture_class];
639 	struct guc_debug_capture_list *listnode;
640 	int ret, num_regs;
641 	u8 *caplist, *tmp;
642 	size_t size = 0;
643 
644 	if (!gc->reglists)
645 		return -ENODEV;
646 
647 	if (cache->is_valid) {
648 		*outptr = cache->ptr;
649 		return cache->status;
650 	}
651 
652 	ret = xe_guc_capture_getlistsize(guc, owner, type, capture_class, &size);
653 	if (ret) {
654 		cache->is_valid = true;
655 		cache->ptr = NULL;
656 		cache->size = 0;
657 		cache->status = ret;
658 		return ret;
659 	}
660 
661 	caplist = drmm_kzalloc(guc_to_drm(guc), size, GFP_KERNEL);
662 	if (!caplist)
663 		return -ENOMEM;
664 
665 	/* populate capture list header */
666 	tmp = caplist;
667 	num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
668 	listnode = (struct guc_debug_capture_list *)tmp;
669 	listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, (u32)num_regs);
670 
671 	/* populate list of register descriptor */
672 	tmp += sizeof(struct guc_debug_capture_list);
673 	guc_capture_list_init(guc, owner, type, capture_class,
674 			      (struct guc_mmio_reg *)tmp, num_regs);
675 
676 	/* cache this list */
677 	cache->is_valid = true;
678 	cache->ptr = caplist;
679 	cache->size = size;
680 	cache->status = 0;
681 
682 	*outptr = caplist;
683 
684 	return 0;
685 }
686 
687 /**
688  * xe_guc_capture_getnullheader - Get a null list for register capture
689  * @guc:	The GuC object
690  * @outptr:	Point to cached register capture list
691  * @size:	Point to the size
692  *
693  * This function will alloc for a null list for register capture.
694  *
695  * Returns: 0 on success or a negative error code on failure.
696  */
697 int
698 xe_guc_capture_getnullheader(struct xe_guc *guc, void **outptr, size_t *size)
699 {
700 	struct xe_guc_state_capture *gc = guc->capture;
701 	int tmp = sizeof(u32) * 4;
702 	void *null_header;
703 
704 	if (gc->ads_null_cache) {
705 		*outptr = gc->ads_null_cache;
706 		*size = tmp;
707 		return 0;
708 	}
709 
710 	null_header = drmm_kzalloc(guc_to_drm(guc), tmp, GFP_KERNEL);
711 	if (!null_header)
712 		return -ENOMEM;
713 
714 	gc->ads_null_cache = null_header;
715 	*outptr = null_header;
716 	*size = tmp;
717 
718 	return 0;
719 }
720 
721 /**
722  * xe_guc_capture_ads_input_worst_size - Calculate the worst size for GuC register capture
723  * @guc: point to xe_guc structure
724  *
725  * Calculate the worst size for GuC register capture by including all possible engines classes.
726  *
727  * Returns: Calculated size
728  */
729 size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc)
730 {
731 	size_t total_size, class_size, instance_size, global_size;
732 	int i, j;
733 
734 	/*
735 	 * This function calculates the worst case register lists size by
736 	 * including all possible engines classes. It is called during the
737 	 * first of a two-phase GuC (and ADS-population) initialization
738 	 * sequence, that is, during the pre-hwconfig phase before we have
739 	 * the exact engine fusing info.
740 	 */
741 	total_size = PAGE_SIZE;	/* Pad a page in front for empty lists */
742 	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
743 		for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
744 			if (xe_guc_capture_getlistsize(guc, i,
745 						       GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
746 						       j, &class_size) < 0)
747 				class_size = 0;
748 			if (xe_guc_capture_getlistsize(guc, i,
749 						       GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
750 						       j, &instance_size) < 0)
751 				instance_size = 0;
752 			total_size += class_size + instance_size;
753 		}
754 		if (xe_guc_capture_getlistsize(guc, i,
755 					       GUC_STATE_CAPTURE_TYPE_GLOBAL,
756 					       0, &global_size) < 0)
757 			global_size = 0;
758 		total_size += global_size;
759 	}
760 
761 	return PAGE_ALIGN(total_size);
762 }
763 
764 static int guc_capture_output_size_est(struct xe_guc *guc)
765 {
766 	struct xe_gt *gt = guc_to_gt(guc);
767 	struct xe_hw_engine *hwe;
768 	enum xe_hw_engine_id id;
769 
770 	int capture_size = 0;
771 	size_t tmp = 0;
772 
773 	if (!guc->capture)
774 		return -ENODEV;
775 
776 	/*
777 	 * If every single engine-instance suffered a failure in quick succession but
778 	 * were all unrelated, then a burst of multiple error-capture events would dump
779 	 * registers for every one engine instance, one at a time. In this case, GuC
780 	 * would even dump the global-registers repeatedly.
781 	 *
782 	 * For each engine instance, there would be 1 x guc_state_capture_group_t output
783 	 * followed by 3 x guc_state_capture_t lists. The latter is how the register
784 	 * dumps are split across different register types (where the '3' are global vs class
785 	 * vs instance).
786 	 */
787 	for_each_hw_engine(hwe, gt, id) {
788 		enum guc_capture_list_class_type capture_class;
789 
790 		capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
791 		capture_size += sizeof(struct guc_state_capture_group_header_t) +
792 					 (3 * sizeof(struct guc_state_capture_header_t));
793 
794 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_GLOBAL,
795 					     0, &tmp, true))
796 			capture_size += tmp;
797 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
798 					     capture_class, &tmp, true))
799 			capture_size += tmp;
800 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
801 					     capture_class, &tmp, true))
802 			capture_size += tmp;
803 	}
804 
805 	return capture_size;
806 }
807 
808 /*
809  * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
810  * before the Xe can read the data out and process it
811  */
812 #define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
813 
814 static void check_guc_capture_size(struct xe_guc *guc)
815 {
816 	int capture_size = guc_capture_output_size_est(guc);
817 	int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
818 	u32 buffer_size = xe_guc_log_section_size_capture(&guc->log);
819 
820 	/*
821 	 * NOTE: capture_size is much smaller than the capture region
822 	 * allocation (DG2: <80K vs 1MB).
823 	 * Additionally, its based on space needed to fit all engines getting
824 	 * reset at once within the same G2H handler task slot. This is very
825 	 * unlikely. However, if GuC really does run out of space for whatever
826 	 * reason, we will see an separate warning message when processing the
827 	 * G2H event capture-notification, search for:
828 	 * xe_guc_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
829 	 */
830 	if (capture_size < 0)
831 		xe_gt_dbg(guc_to_gt(guc),
832 			  "Failed to calculate error state capture buffer minimum size: %d!\n",
833 			  capture_size);
834 	if (capture_size > buffer_size)
835 		xe_gt_dbg(guc_to_gt(guc), "Error state capture buffer maybe small: %d < %d\n",
836 			  buffer_size, capture_size);
837 	else if (spare_size > buffer_size)
838 		xe_gt_dbg(guc_to_gt(guc),
839 			  "Error state capture buffer lacks spare size: %d < %d (min = %d)\n",
840 			  buffer_size, spare_size, capture_size);
841 }
842 
843 static void
844 guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node,
845 			     struct list_head *list)
846 {
847 	list_add(&node->link, list);
848 }
849 
850 static void
851 guc_capture_add_node_to_outlist(struct xe_guc_state_capture *gc,
852 				struct __guc_capture_parsed_output *node)
853 {
854 	guc_capture_remove_stale_matches_from_list(gc, node);
855 	guc_capture_add_node_to_list(node, &gc->outlist);
856 }
857 
858 static void
859 guc_capture_add_node_to_cachelist(struct xe_guc_state_capture *gc,
860 				  struct __guc_capture_parsed_output *node)
861 {
862 	guc_capture_add_node_to_list(node, &gc->cachelist);
863 }
864 
865 static void
866 guc_capture_free_outlist_node(struct xe_guc_state_capture *gc,
867 			      struct __guc_capture_parsed_output *n)
868 {
869 	if (n) {
870 		n->locked = 0;
871 		list_del(&n->link);
872 		/* put node back to cache list */
873 		guc_capture_add_node_to_cachelist(gc, n);
874 	}
875 }
876 
877 static void
878 guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
879 					   struct __guc_capture_parsed_output *node)
880 {
881 	struct __guc_capture_parsed_output *n, *ntmp;
882 	int guc_id = node->guc_id;
883 
884 	list_for_each_entry_safe(n, ntmp, &gc->outlist, link) {
885 		if (n != node && !n->locked && n->guc_id == guc_id)
886 			guc_capture_free_outlist_node(gc, n);
887 	}
888 }
889 
890 static void
891 guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *node)
892 {
893 	struct guc_mmio_reg *tmp[GUC_STATE_CAPTURE_TYPE_MAX];
894 	int i;
895 
896 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
897 		tmp[i] = node->reginfo[i].regs;
898 		memset(tmp[i], 0, sizeof(struct guc_mmio_reg) *
899 		       guc->capture->max_mmio_per_node);
900 	}
901 	memset(node, 0, sizeof(*node));
902 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i)
903 		node->reginfo[i].regs = tmp[i];
904 
905 	INIT_LIST_HEAD(&node->link);
906 }
907 
908 /**
909  * DOC: Init, G2H-event and reporting flows for GuC-error-capture
910  *
911  * KMD Init time flows:
912  * --------------------
913  *     --> alloc A: GuC input capture regs lists (registered to GuC via ADS).
914  *                  xe_guc_ads acquires the register lists by calling
915  *                  xe_guc_capture_getlistsize and xe_guc_capture_getlist 'n' times,
916  *                  where n = 1 for global-reg-list +
917  *                            num_engine_classes for class-reg-list +
918  *                            num_engine_classes for instance-reg-list
919  *                               (since all instances of the same engine-class type
920  *                                have an identical engine-instance register-list).
921  *                  ADS module also calls separately for PF vs VF.
922  *
923  *     --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
924  *                  Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small)
925  *                  Note2: 'x 3' to hold multiple capture groups
926  *
927  * GUC Runtime notify capture:
928  * --------------------------
929  *     --> G2H STATE_CAPTURE_NOTIFICATION
930  *                   L--> xe_guc_capture_process
931  *                           L--> Loop through B (head..tail) and for each engine instance's
932  *                                err-state-captured register-list we find, we alloc 'C':
933  *      --> alloc C: A capture-output-node structure that includes misc capture info along
934  *                   with 3 register list dumps (global, engine-class and engine-instance)
935  *                   This node is created from a pre-allocated list of blank nodes in
936  *                   guc->capture->cachelist and populated with the error-capture
937  *                   data from GuC and then it's added into guc->capture->outlist linked
938  *                   list. This list is used for matchup and printout by xe_devcoredump_read
939  *                   and xe_engine_snapshot_print, (when user invokes the devcoredump sysfs).
940  *
941  * GUC --> notify context reset:
942  * -----------------------------
943  *     --> guc_exec_queue_timedout_job
944  *                   L--> xe_devcoredump
945  *                          L--> devcoredump_snapshot
946  *                               --> xe_hw_engine_snapshot_capture
947  *                               --> xe_engine_manual_capture(For manual capture)
948  *
949  * User Sysfs / Debugfs
950  * --------------------
951  *      --> xe_devcoredump_read->
952  *             L--> xxx_snapshot_print
953  *                    L--> xe_engine_snapshot_print
954  *                         Print register lists values saved at
955  *                         guc->capture->outlist
956  *
957  */
958 
959 static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
960 {
961 	if (buf->wr >= buf->rd)
962 		return (buf->wr - buf->rd);
963 	return (buf->size - buf->rd) + buf->wr;
964 }
965 
966 static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf)
967 {
968 	if (buf->rd > buf->wr)
969 		return (buf->size - buf->rd);
970 	return (buf->wr - buf->rd);
971 }
972 
973 /*
974  * GuC's error-capture output is a ring buffer populated in a byte-stream fashion:
975  *
976  * The GuC Log buffer region for error-capture is managed like a ring buffer.
977  * The GuC firmware dumps error capture logs into this ring in a byte-stream flow.
978  * Additionally, as per the current and foreseeable future, all packed error-
979  * capture output structures are dword aligned.
980  *
981  * That said, if the GuC firmware is in the midst of writing a structure that is larger
982  * than one dword but the tail end of the err-capture buffer-region has lesser space left,
983  * we would need to extract that structure one dword at a time straddled across the end,
984  * onto the start of the ring.
985  *
986  * Below function, guc_capture_log_remove_bytes is a helper for that. All callers of this
987  * function would typically do a straight-up memcpy from the ring contents and will only
988  * call this helper if their structure-extraction is straddling across the end of the
989  * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease
990  * scalability for future expansion of output data types without requiring a redesign
991  * of the flow controls.
992  */
993 static int
994 guc_capture_log_remove_bytes(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
995 			     void *out, int bytes_needed)
996 {
997 #define GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX	3
998 
999 	int fill_size = 0, tries = GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX;
1000 	int copy_size, avail;
1001 
1002 	xe_assert(guc_to_xe(guc), bytes_needed % sizeof(u32) == 0);
1003 
1004 	if (bytes_needed > guc_capture_buf_cnt(buf))
1005 		return -1;
1006 
1007 	while (bytes_needed > 0 && tries--) {
1008 		int misaligned;
1009 
1010 		avail = guc_capture_buf_cnt_to_end(buf);
1011 		misaligned = avail % sizeof(u32);
1012 		/* wrap if at end */
1013 		if (!avail) {
1014 			/* output stream clipped */
1015 			if (!buf->rd)
1016 				return fill_size;
1017 			buf->rd = 0;
1018 			continue;
1019 		}
1020 
1021 		/* Only copy to u32 aligned data */
1022 		copy_size = avail < bytes_needed ? avail - misaligned : bytes_needed;
1023 		xe_map_memcpy_from(guc_to_xe(guc), out + fill_size, &guc->log.bo->vmap,
1024 				   buf->data_offset + buf->rd, copy_size);
1025 		buf->rd += copy_size;
1026 		fill_size += copy_size;
1027 		bytes_needed -= copy_size;
1028 
1029 		if (misaligned)
1030 			xe_gt_warn(guc_to_gt(guc),
1031 				   "Bytes extraction not dword aligned, clipping.\n");
1032 	}
1033 
1034 	return fill_size;
1035 }
1036 
1037 static int
1038 guc_capture_log_get_group_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1039 			      struct guc_state_capture_group_header_t *ghdr)
1040 {
1041 	int fullsize = sizeof(struct guc_state_capture_group_header_t);
1042 
1043 	if (guc_capture_log_remove_bytes(guc, buf, ghdr, fullsize) != fullsize)
1044 		return -1;
1045 	return 0;
1046 }
1047 
1048 static int
1049 guc_capture_log_get_data_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1050 			     struct guc_state_capture_header_t *hdr)
1051 {
1052 	int fullsize = sizeof(struct guc_state_capture_header_t);
1053 
1054 	if (guc_capture_log_remove_bytes(guc, buf, hdr, fullsize) != fullsize)
1055 		return -1;
1056 	return 0;
1057 }
1058 
1059 static int
1060 guc_capture_log_get_register(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1061 			     struct guc_mmio_reg *reg)
1062 {
1063 	int fullsize = sizeof(struct guc_mmio_reg);
1064 
1065 	if (guc_capture_log_remove_bytes(guc, buf, reg, fullsize) != fullsize)
1066 		return -1;
1067 	return 0;
1068 }
1069 
1070 static struct __guc_capture_parsed_output *
1071 guc_capture_get_prealloc_node(struct xe_guc *guc)
1072 {
1073 	struct __guc_capture_parsed_output *found = NULL;
1074 
1075 	if (!list_empty(&guc->capture->cachelist)) {
1076 		struct __guc_capture_parsed_output *n, *ntmp;
1077 
1078 		/* get first avail node from the cache list */
1079 		list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) {
1080 			found = n;
1081 			break;
1082 		}
1083 	} else {
1084 		struct __guc_capture_parsed_output *n, *ntmp;
1085 
1086 		/*
1087 		 * traverse reversed and steal back the oldest node already
1088 		 * allocated
1089 		 */
1090 		list_for_each_entry_safe_reverse(n, ntmp, &guc->capture->outlist, link) {
1091 			if (!n->locked)
1092 				found = n;
1093 		}
1094 	}
1095 	if (found) {
1096 		list_del(&found->link);
1097 		guc_capture_init_node(guc, found);
1098 	}
1099 
1100 	return found;
1101 }
1102 
1103 static struct __guc_capture_parsed_output *
1104 guc_capture_clone_node(struct xe_guc *guc, struct __guc_capture_parsed_output *original,
1105 		       u32 keep_reglist_mask)
1106 {
1107 	struct __guc_capture_parsed_output *new;
1108 	int i;
1109 
1110 	new = guc_capture_get_prealloc_node(guc);
1111 	if (!new)
1112 		return NULL;
1113 	if (!original)
1114 		return new;
1115 
1116 	new->is_partial = original->is_partial;
1117 
1118 	/* copy reg-lists that we want to clone */
1119 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1120 		if (keep_reglist_mask & BIT(i)) {
1121 			XE_WARN_ON(original->reginfo[i].num_regs  >
1122 				   guc->capture->max_mmio_per_node);
1123 
1124 			memcpy(new->reginfo[i].regs, original->reginfo[i].regs,
1125 			       original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg));
1126 
1127 			new->reginfo[i].num_regs = original->reginfo[i].num_regs;
1128 			new->reginfo[i].vfid  = original->reginfo[i].vfid;
1129 
1130 			if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS) {
1131 				new->eng_class = original->eng_class;
1132 			} else if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1133 				new->eng_inst = original->eng_inst;
1134 				new->guc_id = original->guc_id;
1135 				new->lrca = original->lrca;
1136 			}
1137 		}
1138 	}
1139 
1140 	return new;
1141 }
1142 
1143 static int
1144 guc_capture_extract_reglists(struct xe_guc *guc, struct __guc_capture_bufstate *buf)
1145 {
1146 	struct xe_gt *gt = guc_to_gt(guc);
1147 	struct guc_state_capture_group_header_t ghdr = {0};
1148 	struct guc_state_capture_header_t hdr = {0};
1149 	struct __guc_capture_parsed_output *node = NULL;
1150 	struct guc_mmio_reg *regs = NULL;
1151 	int i, numlists, numregs, ret = 0;
1152 	enum guc_state_capture_type datatype;
1153 	struct guc_mmio_reg tmp;
1154 	bool is_partial = false;
1155 
1156 	i = guc_capture_buf_cnt(buf);
1157 	if (!i)
1158 		return -ENODATA;
1159 
1160 	if (i % sizeof(u32)) {
1161 		xe_gt_warn(gt, "Got mis-aligned register capture entries\n");
1162 		ret = -EIO;
1163 		goto bailout;
1164 	}
1165 
1166 	/* first get the capture group header */
1167 	if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) {
1168 		ret = -EIO;
1169 		goto bailout;
1170 	}
1171 	/*
1172 	 * we would typically expect a layout as below where n would be expected to be
1173 	 * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine
1174 	 * instances being reset together.
1175 	 * ____________________________________________
1176 	 * | Capture Group                            |
1177 	 * | ________________________________________ |
1178 	 * | | Capture Group Header:                | |
1179 	 * | |  - num_captures = 5                  | |
1180 	 * | |______________________________________| |
1181 	 * | ________________________________________ |
1182 	 * | | Capture1:                            | |
1183 	 * | |  Hdr: GLOBAL, numregs=a              | |
1184 	 * | | ____________________________________ | |
1185 	 * | | | Reglist                          | | |
1186 	 * | | | - reg1, reg2, ... rega           | | |
1187 	 * | | |__________________________________| | |
1188 	 * | |______________________________________| |
1189 	 * | ________________________________________ |
1190 	 * | | Capture2:                            | |
1191 	 * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=b| |
1192 	 * | | ____________________________________ | |
1193 	 * | | | Reglist                          | | |
1194 	 * | | | - reg1, reg2, ... regb           | | |
1195 	 * | | |__________________________________| | |
1196 	 * | |______________________________________| |
1197 	 * | ________________________________________ |
1198 	 * | | Capture3:                            | |
1199 	 * | |  Hdr: INSTANCE=RCS, numregs=c        | |
1200 	 * | | ____________________________________ | |
1201 	 * | | | Reglist                          | | |
1202 	 * | | | - reg1, reg2, ... regc           | | |
1203 	 * | | |__________________________________| | |
1204 	 * | |______________________________________| |
1205 	 * | ________________________________________ |
1206 	 * | | Capture4:                            | |
1207 	 * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=d| |
1208 	 * | | ____________________________________ | |
1209 	 * | | | Reglist                          | | |
1210 	 * | | | - reg1, reg2, ... regd           | | |
1211 	 * | | |__________________________________| | |
1212 	 * | |______________________________________| |
1213 	 * | ________________________________________ |
1214 	 * | | Capture5:                            | |
1215 	 * | |  Hdr: INSTANCE=CCS0, numregs=e       | |
1216 	 * | | ____________________________________ | |
1217 	 * | | | Reglist                          | | |
1218 	 * | | | - reg1, reg2, ... rege           | | |
1219 	 * | | |__________________________________| | |
1220 	 * | |______________________________________| |
1221 	 * |__________________________________________|
1222 	 */
1223 	is_partial = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE, ghdr.info);
1224 	numlists = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES, ghdr.info);
1225 
1226 	while (numlists--) {
1227 		if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) {
1228 			ret = -EIO;
1229 			break;
1230 		}
1231 
1232 		datatype = FIELD_GET(GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE, hdr.info);
1233 		if (datatype > GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1234 			/* unknown capture type - skip over to next capture set */
1235 			numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
1236 					    hdr.num_mmio_entries);
1237 			while (numregs--) {
1238 				if (guc_capture_log_get_register(guc, buf, &tmp)) {
1239 					ret = -EIO;
1240 					break;
1241 				}
1242 			}
1243 			continue;
1244 		} else if (node) {
1245 			/*
1246 			 * Based on the current capture type and what we have so far,
1247 			 * decide if we should add the current node into the internal
1248 			 * linked list for match-up when xe_devcoredump calls later
1249 			 * (and alloc a blank node for the next set of reglists)
1250 			 * or continue with the same node or clone the current node
1251 			 * but only retain the global or class registers (such as the
1252 			 * case of dependent engine resets).
1253 			 */
1254 			if (datatype == GUC_STATE_CAPTURE_TYPE_GLOBAL) {
1255 				guc_capture_add_node_to_outlist(guc->capture, node);
1256 				node = NULL;
1257 			} else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
1258 				   node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS].num_regs) {
1259 				/* Add to list, clone node and duplicate global list */
1260 				guc_capture_add_node_to_outlist(guc->capture, node);
1261 				node = guc_capture_clone_node(guc, node,
1262 							      GCAP_PARSED_REGLIST_INDEX_GLOBAL);
1263 			} else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE &&
1264 				   node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE].num_regs) {
1265 				/* Add to list, clone node and duplicate global + class lists */
1266 				guc_capture_add_node_to_outlist(guc->capture, node);
1267 				node = guc_capture_clone_node(guc, node,
1268 							      (GCAP_PARSED_REGLIST_INDEX_GLOBAL |
1269 							      GCAP_PARSED_REGLIST_INDEX_ENGCLASS));
1270 			}
1271 		}
1272 
1273 		if (!node) {
1274 			node = guc_capture_get_prealloc_node(guc);
1275 			if (!node) {
1276 				ret = -ENOMEM;
1277 				break;
1278 			}
1279 			if (datatype != GUC_STATE_CAPTURE_TYPE_GLOBAL)
1280 				xe_gt_dbg(gt, "Register capture missing global dump: %08x!\n",
1281 					  datatype);
1282 		}
1283 		node->is_partial = is_partial;
1284 		node->reginfo[datatype].vfid = FIELD_GET(GUC_STATE_CAPTURE_HEADER_VFID, hdr.owner);
1285 		node->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
1286 		node->type = datatype;
1287 
1288 		switch (datatype) {
1289 		case GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE:
1290 			node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
1291 						    hdr.info);
1292 			node->eng_inst = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE,
1293 						   hdr.info);
1294 			node->lrca = hdr.lrca;
1295 			node->guc_id = hdr.guc_id;
1296 			break;
1297 		case GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS:
1298 			node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
1299 						    hdr.info);
1300 			break;
1301 		default:
1302 			break;
1303 		}
1304 
1305 		numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
1306 				    hdr.num_mmio_entries);
1307 		if (numregs > guc->capture->max_mmio_per_node) {
1308 			xe_gt_dbg(gt, "Register capture list extraction clipped by prealloc!\n");
1309 			numregs = guc->capture->max_mmio_per_node;
1310 		}
1311 		node->reginfo[datatype].num_regs = numregs;
1312 		regs = node->reginfo[datatype].regs;
1313 		i = 0;
1314 		while (numregs--) {
1315 			if (guc_capture_log_get_register(guc, buf, &regs[i++])) {
1316 				ret = -EIO;
1317 				break;
1318 			}
1319 		}
1320 	}
1321 
1322 bailout:
1323 	if (node) {
1324 		/* If we have data, add to linked list for match-up when xe_devcoredump calls */
1325 		for (i = GUC_STATE_CAPTURE_TYPE_GLOBAL; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1326 			if (node->reginfo[i].regs) {
1327 				guc_capture_add_node_to_outlist(guc->capture, node);
1328 				node = NULL;
1329 				break;
1330 			}
1331 		}
1332 		if (node) /* else return it back to cache list */
1333 			guc_capture_add_node_to_cachelist(guc->capture, node);
1334 	}
1335 	return ret;
1336 }
1337 
1338 static int __guc_capture_flushlog_complete(struct xe_guc *guc)
1339 {
1340 	u32 action[] = {
1341 		XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
1342 		GUC_LOG_BUFFER_CAPTURE
1343 	};
1344 
1345 	return xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
1346 }
1347 
1348 static void __guc_capture_process_output(struct xe_guc *guc)
1349 {
1350 	unsigned int buffer_size, read_offset, write_offset, full_count;
1351 	struct xe_uc *uc = container_of(guc, typeof(*uc), guc);
1352 	struct guc_log_buffer_state log_buf_state_local;
1353 	struct __guc_capture_bufstate buf;
1354 	bool new_overflow;
1355 	int ret, tmp;
1356 	u32 log_buf_state_offset;
1357 	u32 src_data_offset;
1358 
1359 	log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_BUFFER_CAPTURE;
1360 	src_data_offset = xe_guc_get_log_buffer_offset(&guc->log, GUC_LOG_BUFFER_CAPTURE);
1361 
1362 	/*
1363 	 * Make a copy of the state structure, inside GuC log buffer
1364 	 * (which is uncached mapped), on the stack to avoid reading
1365 	 * from it multiple times.
1366 	 */
1367 	xe_map_memcpy_from(guc_to_xe(guc), &log_buf_state_local, &guc->log.bo->vmap,
1368 			   log_buf_state_offset, sizeof(struct guc_log_buffer_state));
1369 
1370 	buffer_size = xe_guc_get_log_buffer_size(&guc->log, GUC_LOG_BUFFER_CAPTURE);
1371 	read_offset = log_buf_state_local.read_ptr;
1372 	write_offset = log_buf_state_local.sampled_write_ptr;
1373 	full_count = FIELD_GET(GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT, log_buf_state_local.flags);
1374 
1375 	/* Bookkeeping stuff */
1376 	tmp = FIELD_GET(GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE, log_buf_state_local.flags);
1377 	guc->log.stats[GUC_LOG_BUFFER_CAPTURE].flush += tmp;
1378 	new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_BUFFER_CAPTURE,
1379 						     full_count);
1380 
1381 	/* Now copy the actual logs. */
1382 	if (unlikely(new_overflow)) {
1383 		/* copy the whole buffer in case of overflow */
1384 		read_offset = 0;
1385 		write_offset = buffer_size;
1386 	} else if (unlikely((read_offset > buffer_size) ||
1387 			(write_offset > buffer_size))) {
1388 		xe_gt_err(guc_to_gt(guc),
1389 			  "Register capture buffer in invalid state: read = 0x%X, size = 0x%X!\n",
1390 			  read_offset, buffer_size);
1391 		/* copy whole buffer as offsets are unreliable */
1392 		read_offset = 0;
1393 		write_offset = buffer_size;
1394 	}
1395 
1396 	buf.size = buffer_size;
1397 	buf.rd = read_offset;
1398 	buf.wr = write_offset;
1399 	buf.data_offset = src_data_offset;
1400 
1401 	if (!xe_guc_read_stopped(guc)) {
1402 		do {
1403 			ret = guc_capture_extract_reglists(guc, &buf);
1404 			if (ret && ret != -ENODATA)
1405 				xe_gt_dbg(guc_to_gt(guc), "Capture extraction failed:%d\n", ret);
1406 		} while (ret >= 0);
1407 	}
1408 
1409 	/* Update the state of log buffer err-cap state */
1410 	xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
1411 		  log_buf_state_offset + offsetof(struct guc_log_buffer_state, read_ptr), u32,
1412 		  write_offset);
1413 
1414 	/*
1415 	 * Clear the flush_to_file from local first, the local was loaded by above
1416 	 * xe_map_memcpy_from, then write out the "updated local" through
1417 	 * xe_map_wr()
1418 	 */
1419 	log_buf_state_local.flags &= ~GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE;
1420 	xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
1421 		  log_buf_state_offset + offsetof(struct guc_log_buffer_state, flags), u32,
1422 		  log_buf_state_local.flags);
1423 	__guc_capture_flushlog_complete(guc);
1424 }
1425 
1426 /*
1427  * xe_guc_capture_process - Process GuC register captured data
1428  * @guc: The GuC object
1429  *
1430  * When GuC captured data is ready, GuC will send message
1431  * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
1432  * called to process the data comes with the message.
1433  *
1434  * Returns: None
1435  */
1436 void xe_guc_capture_process(struct xe_guc *guc)
1437 {
1438 	if (guc->capture)
1439 		__guc_capture_process_output(guc);
1440 }
1441 
1442 static struct __guc_capture_parsed_output *
1443 guc_capture_alloc_one_node(struct xe_guc *guc)
1444 {
1445 	struct drm_device *drm = guc_to_drm(guc);
1446 	struct __guc_capture_parsed_output *new;
1447 	int i;
1448 
1449 	new = drmm_kzalloc(drm, sizeof(*new), GFP_KERNEL);
1450 	if (!new)
1451 		return NULL;
1452 
1453 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1454 		new->reginfo[i].regs = drmm_kzalloc(drm, guc->capture->max_mmio_per_node *
1455 						    sizeof(struct guc_mmio_reg), GFP_KERNEL);
1456 		if (!new->reginfo[i].regs) {
1457 			while (i)
1458 				drmm_kfree(drm, new->reginfo[--i].regs);
1459 			drmm_kfree(drm, new);
1460 			return NULL;
1461 		}
1462 	}
1463 	guc_capture_init_node(guc, new);
1464 
1465 	return new;
1466 }
1467 
1468 static void
1469 __guc_capture_create_prealloc_nodes(struct xe_guc *guc)
1470 {
1471 	struct __guc_capture_parsed_output *node = NULL;
1472 	int i;
1473 
1474 	for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
1475 		node = guc_capture_alloc_one_node(guc);
1476 		if (!node) {
1477 			xe_gt_warn(guc_to_gt(guc), "Register capture pre-alloc-cache failure\n");
1478 			/* dont free the priors, use what we got and cleanup at shutdown */
1479 			return;
1480 		}
1481 		guc_capture_add_node_to_cachelist(guc->capture, node);
1482 	}
1483 }
1484 
1485 static int
1486 guc_get_max_reglist_count(struct xe_guc *guc)
1487 {
1488 	int i, j, k, tmp, maxregcount = 0;
1489 
1490 	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
1491 		for (j = 0; j < GUC_STATE_CAPTURE_TYPE_MAX; ++j) {
1492 			for (k = 0; k < GUC_CAPTURE_LIST_CLASS_MAX; ++k) {
1493 				const struct __guc_mmio_reg_descr_group *match;
1494 
1495 				if (j == GUC_STATE_CAPTURE_TYPE_GLOBAL && k > 0)
1496 					continue;
1497 
1498 				tmp = 0;
1499 				match = guc_capture_get_one_list(guc->capture->reglists, i, j, k);
1500 				if (match)
1501 					tmp = match->num_regs;
1502 
1503 				match = guc_capture_get_one_list(guc->capture->extlists, i, j, k);
1504 				if (match)
1505 					tmp += match->num_regs;
1506 
1507 				if (tmp > maxregcount)
1508 					maxregcount = tmp;
1509 			}
1510 		}
1511 	}
1512 	if (!maxregcount)
1513 		maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS;
1514 
1515 	return maxregcount;
1516 }
1517 
1518 static void
1519 guc_capture_create_prealloc_nodes(struct xe_guc *guc)
1520 {
1521 	/* skip if we've already done the pre-alloc */
1522 	if (guc->capture->max_mmio_per_node)
1523 		return;
1524 
1525 	guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc);
1526 	__guc_capture_create_prealloc_nodes(guc);
1527 }
1528 
1529 static void
1530 read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_group *list,
1531 		 struct guc_mmio_reg *regs)
1532 {
1533 	int i;
1534 
1535 	if (!list || !list->list || list->num_regs == 0)
1536 		return;
1537 
1538 	if (!regs)
1539 		return;
1540 
1541 	for (i = 0; i < list->num_regs; i++) {
1542 		struct __guc_mmio_reg_descr desc = list->list[i];
1543 		u32 value;
1544 
1545 		if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1546 			value = xe_hw_engine_mmio_read32(hwe, desc.reg);
1547 		} else {
1548 			if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
1549 			    FIELD_GET(GUC_REGSET_STEERING_NEEDED, desc.flags)) {
1550 				int group, instance;
1551 
1552 				group = FIELD_GET(GUC_REGSET_STEERING_GROUP, desc.flags);
1553 				instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, desc.flags);
1554 				value = xe_gt_mcr_unicast_read(hwe->gt, XE_REG_MCR(desc.reg.addr),
1555 							       group, instance);
1556 			} else {
1557 				value = xe_mmio_read32(&hwe->gt->mmio, desc.reg);
1558 			}
1559 		}
1560 
1561 		regs[i].value = value;
1562 		regs[i].offset = desc.reg.addr;
1563 		regs[i].flags = desc.flags;
1564 		regs[i].mask = desc.mask;
1565 	}
1566 }
1567 
1568 /**
1569  * xe_engine_manual_capture - Take a manual engine snapshot from engine.
1570  * @hwe: Xe HW Engine.
1571  * @snapshot: The engine snapshot
1572  *
1573  * Take engine snapshot from engine read.
1574  *
1575  * Returns: None
1576  */
1577 void
1578 xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot)
1579 {
1580 	struct xe_gt *gt = hwe->gt;
1581 	struct xe_device *xe = gt_to_xe(gt);
1582 	struct xe_guc *guc = &gt->uc.guc;
1583 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1584 	enum guc_capture_list_class_type capture_class;
1585 	const struct __guc_mmio_reg_descr_group *list;
1586 	struct __guc_capture_parsed_output *new;
1587 	enum guc_state_capture_type type;
1588 	u16 guc_id = 0;
1589 	u32 lrca = 0;
1590 
1591 	if (IS_SRIOV_VF(xe))
1592 		return;
1593 
1594 	new = guc_capture_get_prealloc_node(guc);
1595 	if (!new)
1596 		return;
1597 
1598 	capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
1599 	for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
1600 		struct gcap_reg_list_info *reginfo = &new->reginfo[type];
1601 		/*
1602 		 * regsinfo->regs is allocated based on guc->capture->max_mmio_per_node
1603 		 * which is based on the descriptor list driving the population so
1604 		 * should not overflow
1605 		 */
1606 
1607 		/* Get register list for the type/class */
1608 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
1609 							capture_class, false);
1610 		if (!list) {
1611 			xe_gt_dbg(gt, "Empty GuC capture register descriptor for %s",
1612 				  hwe->name);
1613 			continue;
1614 		}
1615 
1616 		read_reg_to_node(hwe, list, reginfo->regs);
1617 		reginfo->num_regs = list->num_regs;
1618 
1619 		/* Capture steering registers for rcs/ccs */
1620 		if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
1621 			list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
1622 								type, capture_class, true);
1623 			if (list) {
1624 				read_reg_to_node(hwe, list, &reginfo->regs[reginfo->num_regs]);
1625 				reginfo->num_regs += list->num_regs;
1626 			}
1627 		}
1628 	}
1629 
1630 	if (devcoredump && devcoredump->captured) {
1631 		struct xe_guc_submit_exec_queue_snapshot *ge = devcoredump->snapshot.ge;
1632 
1633 		if (ge) {
1634 			guc_id = ge->guc.id;
1635 			if (ge->lrc[0])
1636 				lrca = ge->lrc[0]->context_desc;
1637 		}
1638 	}
1639 
1640 	new->eng_class = xe_engine_class_to_guc_class(hwe->class);
1641 	new->eng_inst = hwe->instance;
1642 	new->guc_id = guc_id;
1643 	new->lrca = lrca;
1644 	new->is_partial = 0;
1645 	new->locked = 1;
1646 	new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
1647 
1648 	guc_capture_add_node_to_outlist(guc->capture, new);
1649 	devcoredump->snapshot.matched_node = new;
1650 }
1651 
1652 static struct guc_mmio_reg *
1653 guc_capture_find_reg(struct gcap_reg_list_info *reginfo, u32 addr, u32 flags)
1654 {
1655 	int i;
1656 
1657 	if (reginfo && reginfo->num_regs > 0) {
1658 		struct guc_mmio_reg *regs = reginfo->regs;
1659 
1660 		if (regs)
1661 			for (i = 0; i < reginfo->num_regs; i++)
1662 				if (regs[i].offset == addr && regs[i].flags == flags)
1663 					return &regs[i];
1664 	}
1665 
1666 	return NULL;
1667 }
1668 
1669 static void
1670 snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p,
1671 			     u32 type, const struct __guc_mmio_reg_descr_group *list)
1672 {
1673 	struct xe_gt *gt = snapshot->hwe->gt;
1674 	struct xe_device *xe = gt_to_xe(gt);
1675 	struct xe_guc *guc = &gt->uc.guc;
1676 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1677 	struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot;
1678 	struct gcap_reg_list_info *reginfo = NULL;
1679 	u32 i, last_value = 0;
1680 	bool is_ext, low32_ready = false;
1681 
1682 	if (!list || !list->list || list->num_regs == 0)
1683 		return;
1684 	XE_WARN_ON(!devcore_snapshot->matched_node);
1685 
1686 	is_ext = list == guc->capture->extlists;
1687 	reginfo = &devcore_snapshot->matched_node->reginfo[type];
1688 
1689 	/*
1690 	 * loop through descriptor first and find the register in the node
1691 	 * this is more scalable for developer maintenance as it will ensure
1692 	 * the printout matched the ordering of the static descriptor
1693 	 * table-of-lists
1694 	 */
1695 	for (i = 0; i < list->num_regs; i++) {
1696 		const struct __guc_mmio_reg_descr *reg_desc = &list->list[i];
1697 		struct guc_mmio_reg *reg;
1698 		u32 value;
1699 
1700 		reg = guc_capture_find_reg(reginfo, reg_desc->reg.addr, reg_desc->flags);
1701 		if (!reg)
1702 			continue;
1703 
1704 		value = reg->value;
1705 		switch (reg_desc->data_type) {
1706 		case REG_64BIT_LOW_DW:
1707 			last_value = value;
1708 
1709 			/*
1710 			 * A 64 bit register define requires 2 consecutive
1711 			 * entries in register list, with low dword first
1712 			 * and hi dword the second, like:
1713 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1714 			 *  { XXX_REG_HI(0), REG_64BIT_HI_DW,  0, 0, "XXX_REG"},
1715 			 *
1716 			 * Incorrect order will trigger XE_WARN.
1717 			 *
1718 			 * Possible double low here, for example:
1719 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1720 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1721 			 */
1722 			XE_WARN_ON(low32_ready);
1723 			low32_ready = true;
1724 			/* Low 32 bit dword saved, continue for high 32 bit */
1725 			break;
1726 
1727 		case REG_64BIT_HI_DW: {
1728 			u64 value_qw = ((u64)value << 32) | last_value;
1729 
1730 			/*
1731 			 * Incorrect 64bit register order. Possible missing low.
1732 			 * for example:
1733 			 *  { XXX_REG(0), REG_32BIT, 0, 0, NULL},
1734 			 *  { XXX_REG_HI(0), REG_64BIT_HI_DW, 0, 0, NULL},
1735 			 */
1736 			XE_WARN_ON(!low32_ready);
1737 			low32_ready = false;
1738 
1739 			drm_printf(p, "\t%s: 0x%016llx\n", reg_desc->regname, value_qw);
1740 			break;
1741 		}
1742 
1743 		case REG_32BIT:
1744 			/*
1745 			 * Incorrect 64bit register order. Possible missing high.
1746 			 * for example:
1747 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1748 			 *  { XXX_REG(0), REG_32BIT, 0, 0, "XXX_REG"},
1749 			 */
1750 			XE_WARN_ON(low32_ready);
1751 
1752 			if (is_ext) {
1753 				int dss, group, instance;
1754 
1755 				group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags);
1756 				instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags);
1757 				dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance);
1758 
1759 				drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value);
1760 			} else {
1761 				drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
1762 			}
1763 			break;
1764 		}
1765 	}
1766 
1767 	/*
1768 	 * Incorrect 64bit register order. Possible missing high.
1769 	 * for example:
1770 	 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1771 	 *  } // <- Register list end
1772 	 */
1773 	XE_WARN_ON(low32_ready);
1774 }
1775 
1776 /**
1777  * xe_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
1778  * @snapshot: Xe HW Engine snapshot object.
1779  * @p: drm_printer where it will be printed out.
1780  *
1781  * This function prints out a given Xe HW Engine snapshot object.
1782  */
1783 void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
1784 {
1785 	const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = {
1786 		"full-capture",
1787 		"partial-capture"
1788 	};
1789 	int type;
1790 	const struct __guc_mmio_reg_descr_group *list;
1791 	enum guc_capture_list_class_type capture_class;
1792 
1793 	struct xe_gt *gt;
1794 	struct xe_device *xe;
1795 	struct xe_devcoredump *devcoredump;
1796 	struct xe_devcoredump_snapshot *devcore_snapshot;
1797 
1798 	if (!snapshot)
1799 		return;
1800 
1801 	gt = snapshot->hwe->gt;
1802 	xe = gt_to_xe(gt);
1803 	devcoredump = &xe->devcoredump;
1804 	devcore_snapshot = &devcoredump->snapshot;
1805 
1806 	if (!devcore_snapshot->matched_node)
1807 		return;
1808 
1809 	xe_gt_assert(gt, snapshot->source <= XE_ENGINE_CAPTURE_SOURCE_GUC);
1810 	xe_gt_assert(gt, snapshot->hwe);
1811 
1812 	capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class);
1813 
1814 	drm_printf(p, "%s (physical), logical instance=%d\n",
1815 		   snapshot->name ? snapshot->name : "",
1816 		   snapshot->logical_instance);
1817 	drm_printf(p, "\tCapture_source: %s\n",
1818 		   snapshot->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? "GuC" : "Manual");
1819 	drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]);
1820 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
1821 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
1822 	drm_printf(p, "\tReserved: %s\n",
1823 		   str_yes_no(snapshot->kernel_reserved));
1824 
1825 	for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
1826 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
1827 							capture_class, false);
1828 		snapshot_print_by_list_order(snapshot, p, type, list);
1829 	}
1830 
1831 	if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
1832 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
1833 							GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
1834 							capture_class, true);
1835 		snapshot_print_by_list_order(snapshot, p, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
1836 					     list);
1837 	}
1838 
1839 	drm_puts(p, "\n");
1840 }
1841 
1842 /**
1843  * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job.
1844  * @job: The job object.
1845  *
1846  * Search within the capture outlist for the job, could be used for check if
1847  * GuC capture is ready for the job.
1848  * If found, the locked boolean of the node will be flagged.
1849  *
1850  * Returns: found guc-capture node ptr else NULL
1851  */
1852 struct __guc_capture_parsed_output *
1853 xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
1854 {
1855 	struct xe_hw_engine *hwe;
1856 	enum xe_hw_engine_id id;
1857 	struct xe_exec_queue *q;
1858 	struct xe_device *xe;
1859 	u16 guc_class = GUC_LAST_ENGINE_CLASS + 1;
1860 	struct xe_devcoredump_snapshot *ss;
1861 
1862 	if (!job)
1863 		return NULL;
1864 
1865 	q = job->q;
1866 	if (!q || !q->gt)
1867 		return NULL;
1868 
1869 	xe = gt_to_xe(q->gt);
1870 	if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe) || IS_SRIOV_VF(xe))
1871 		return NULL;
1872 
1873 	ss = &xe->devcoredump.snapshot;
1874 	if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC)
1875 		return ss->matched_node;
1876 
1877 	/* Find hwe for the job */
1878 	for_each_hw_engine(hwe, q->gt, id) {
1879 		if (hwe != q->hwe)
1880 			continue;
1881 		guc_class = xe_engine_class_to_guc_class(hwe->class);
1882 		break;
1883 	}
1884 
1885 	if (guc_class <= GUC_LAST_ENGINE_CLASS) {
1886 		struct __guc_capture_parsed_output *n, *ntmp;
1887 		struct xe_guc *guc =  &q->gt->uc.guc;
1888 		u16 guc_id = q->guc->id;
1889 		u32 lrca = xe_lrc_ggtt_addr(q->lrc[0]);
1890 
1891 		/*
1892 		 * Look for a matching GuC reported error capture node from
1893 		 * the internal output link-list based on engine, guc id and
1894 		 * lrca info.
1895 		 */
1896 		list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
1897 			if (n->eng_class == guc_class && n->eng_inst == hwe->instance &&
1898 			    n->guc_id == guc_id && n->lrca == lrca &&
1899 			    n->source == XE_ENGINE_CAPTURE_SOURCE_GUC) {
1900 				n->locked = 1;
1901 				return n;
1902 			}
1903 		}
1904 	}
1905 	return NULL;
1906 }
1907 
1908 /**
1909  * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine
1910  * @job: The job object
1911  *
1912  * Take snapshot of associated HW Engine
1913  *
1914  * Returns: None.
1915  */
1916 void
1917 xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
1918 {
1919 	struct xe_exec_queue *q = job->q;
1920 	struct xe_device *xe = gt_to_xe(q->gt);
1921 	struct xe_devcoredump *coredump = &xe->devcoredump;
1922 	struct xe_hw_engine *hwe;
1923 	enum xe_hw_engine_id id;
1924 	u32 adj_logical_mask = q->logical_mask;
1925 
1926 	if (IS_SRIOV_VF(xe))
1927 		return;
1928 
1929 	for_each_hw_engine(hwe, q->gt, id) {
1930 		if (hwe->class != q->hwe->class ||
1931 		    !(BIT(hwe->logical_instance) & adj_logical_mask)) {
1932 			coredump->snapshot.hwe[id] = NULL;
1933 			continue;
1934 		}
1935 
1936 		if (!coredump->snapshot.hwe[id]) {
1937 			coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job);
1938 		} else {
1939 			struct __guc_capture_parsed_output *new;
1940 
1941 			new = xe_guc_capture_get_matching_and_lock(job);
1942 			if (new) {
1943 				struct xe_guc *guc =  &q->gt->uc.guc;
1944 
1945 				/*
1946 				 * If we are in here, it means we found a fresh
1947 				 * GuC-err-capture node for this engine after
1948 				 * previously failing to find a match in the
1949 				 * early part of guc_exec_queue_timedout_job.
1950 				 * Thus we must free the manually captured node
1951 				 */
1952 				guc_capture_free_outlist_node(guc->capture,
1953 							      coredump->snapshot.matched_node);
1954 				coredump->snapshot.matched_node = new;
1955 			}
1956 		}
1957 
1958 		break;
1959 	}
1960 }
1961 
1962 /*
1963  * xe_guc_capture_put_matched_nodes - Cleanup macthed nodes
1964  * @guc: The GuC object
1965  *
1966  * Free matched node and all nodes with the equal guc_id from
1967  * GuC captured outlist
1968  */
1969 void xe_guc_capture_put_matched_nodes(struct xe_guc *guc)
1970 {
1971 	struct xe_device *xe = guc_to_xe(guc);
1972 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1973 	struct __guc_capture_parsed_output *n = devcoredump->snapshot.matched_node;
1974 
1975 	if (n) {
1976 		guc_capture_remove_stale_matches_from_list(guc->capture, n);
1977 		guc_capture_free_outlist_node(guc->capture, n);
1978 		devcoredump->snapshot.matched_node = NULL;
1979 	}
1980 }
1981 
1982 /*
1983  * xe_guc_capture_steered_list_init - Init steering register list
1984  * @guc: The GuC object
1985  *
1986  * Init steering register list for GuC register capture, create pre-alloc node
1987  */
1988 void xe_guc_capture_steered_list_init(struct xe_guc *guc)
1989 {
1990 	/*
1991 	 * For certain engine classes, there are slice and subslice
1992 	 * level registers requiring steering. We allocate and populate
1993 	 * these based on hw config and add it as an extension list at
1994 	 * the end of the pre-populated render list.
1995 	 */
1996 	guc_capture_alloc_steered_lists(guc);
1997 	check_guc_capture_size(guc);
1998 	guc_capture_create_prealloc_nodes(guc);
1999 }
2000 
2001 /*
2002  * xe_guc_capture_init - Init for GuC register capture
2003  * @guc: The GuC object
2004  *
2005  * Init for GuC register capture, alloc memory for capture data structure.
2006  *
2007  * Returns: 0 if success.
2008  *	    -ENOMEM if out of memory
2009  */
2010 int xe_guc_capture_init(struct xe_guc *guc)
2011 {
2012 	guc->capture = drmm_kzalloc(guc_to_drm(guc), sizeof(*guc->capture), GFP_KERNEL);
2013 	if (!guc->capture)
2014 		return -ENOMEM;
2015 
2016 	guc->capture->reglists = guc_capture_get_device_reglist(guc_to_xe(guc));
2017 
2018 	INIT_LIST_HEAD(&guc->capture->outlist);
2019 	INIT_LIST_HEAD(&guc->capture->cachelist);
2020 
2021 	return 0;
2022 }
2023