xref: /linux/drivers/gpu/drm/xe/xe_guc_capture.c (revision ca220141fa8ebae09765a242076b2b77338106b0)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021-2024 Intel Corporation
4  */
5 
6 #include <linux/types.h>
7 
8 #include <drm/drm_managed.h>
9 #include <drm/drm_print.h>
10 
11 #include "abi/guc_actions_abi.h"
12 #include "abi/guc_capture_abi.h"
13 #include "abi/guc_log_abi.h"
14 #include "regs/xe_engine_regs.h"
15 #include "regs/xe_gt_regs.h"
16 
17 #include "xe_bo_types.h"
18 #include "xe_device.h"
19 #include "xe_exec_queue_types.h"
20 #include "xe_gt.h"
21 #include "xe_gt_mcr.h"
22 #include "xe_gt_printk.h"
23 #include "xe_guc.h"
24 #include "xe_guc_capture.h"
25 #include "xe_guc_capture_types.h"
26 #include "xe_guc_ct.h"
27 #include "xe_guc_exec_queue_types.h"
28 #include "xe_guc_log.h"
29 #include "xe_guc_submit_types.h"
30 #include "xe_guc_submit.h"
31 #include "xe_hw_engine_types.h"
32 #include "xe_hw_engine.h"
33 #include "xe_lrc.h"
34 #include "xe_macros.h"
35 #include "xe_map.h"
36 #include "xe_mmio.h"
37 #include "xe_sched_job.h"
38 
39 /*
40  * struct __guc_capture_bufstate
41  *
42  * Book-keeping structure used to track read and write pointers
43  * as we extract error capture data from the GuC-log-buffer's
44  * error-capture region as a stream of dwords.
45  */
46 struct __guc_capture_bufstate {
47 	u32 size;
48 	u32 data_offset;
49 	u32 rd;
50 	u32 wr;
51 };
52 
53 /*
54  * struct __guc_capture_parsed_output - extracted error capture node
55  *
56  * A single unit of extracted error-capture output data grouped together
57  * at an engine-instance level. We keep these nodes in a linked list.
58  * See cachelist and outlist below.
59  */
60 struct __guc_capture_parsed_output {
61 	/*
62 	 * A single set of 3 capture lists: a global-list
63 	 * an engine-class-list and an engine-instance list.
64 	 * outlist in __guc_capture_parsed_output will keep
65 	 * a linked list of these nodes that will eventually
66 	 * be detached from outlist and attached into to
67 	 * xe_codedump in response to a context reset
68 	 */
69 	struct list_head link;
70 	bool is_partial;
71 	u32 eng_class;
72 	u32 eng_inst;
73 	u32 guc_id;
74 	u32 lrca;
75 	u32 type;
76 	bool locked;
77 	enum xe_hw_engine_snapshot_source_id source;
78 	struct gcap_reg_list_info {
79 		u32 vfid;
80 		u32 num_regs;
81 		struct guc_mmio_reg *regs;
82 	} reginfo[GUC_STATE_CAPTURE_TYPE_MAX];
83 #define GCAP_PARSED_REGLIST_INDEX_GLOBAL   BIT(GUC_STATE_CAPTURE_TYPE_GLOBAL)
84 #define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS)
85 };
86 
87 /*
88  * Define all device tables of GuC error capture register lists
89  * NOTE:
90  *     For engine-registers, GuC only needs the register offsets
91  *     from the engine-mmio-base
92  *
93  *     64 bit registers need 2 entries for low 32 bit register and high 32 bit
94  *     register, for example:
95  *       Register           data_type       flags   mask    Register name
96  *     { XXX_REG_LO(0),  REG_64BIT_LOW_DW,    0,      0,      NULL},
97  *     { XXX_REG_HI(0),  REG_64BIT_HI_DW,,    0,      0,      "XXX_REG"},
98  *     1. data_type: Indicate is hi/low 32 bit for a 64 bit register
99  *                   A 64 bit register define requires 2 consecutive entries,
100  *                   with low dword first and hi dword the second.
101  *     2. Register name: null for incompleted define
102  *     3. Incorrect order will trigger XE_WARN.
103  */
104 #define COMMON_XELP_BASE_GLOBAL \
105 	{ FORCEWAKE_GT,			REG_32BIT,	0,	0,	0,	"FORCEWAKE_GT"}
106 
107 #define COMMON_BASE_ENGINE_INSTANCE \
108 	{ RING_HWSTAM(0),		REG_32BIT,	0,	0,	0,	"HWSTAM"}, \
109 	{ RING_HWS_PGA(0),		REG_32BIT,	0,	0,	0,	"RING_HWS_PGA"}, \
110 	{ RING_HEAD(0),			REG_32BIT,	0,	0,	0,	"RING_HEAD"}, \
111 	{ RING_TAIL(0),			REG_32BIT,	0,	0,	0,	"RING_TAIL"}, \
112 	{ RING_CTL(0),			REG_32BIT,	0,	0,	0,	"RING_CTL"}, \
113 	{ RING_MI_MODE(0),		REG_32BIT,	0,	0,	0,	"RING_MI_MODE"}, \
114 	{ RING_MODE(0),			REG_32BIT,	0,	0,	0,	"RING_MODE"}, \
115 	{ RING_ESR(0),			REG_32BIT,	0,	0,	0,	"RING_ESR"}, \
116 	{ RING_EMR(0),			REG_32BIT,	0,	0,	0,	"RING_EMR"}, \
117 	{ RING_EIR(0),			REG_32BIT,	0,	0,	0,	"RING_EIR"}, \
118 	{ RING_IMR(0),			REG_32BIT,	0,	0,	0,	"RING_IMR"}, \
119 	{ RING_IPEHR(0),		REG_32BIT,	0,	0,	0,	"IPEHR"}, \
120 	{ RING_INSTDONE(0),		REG_32BIT,	0,	0,	0,	"RING_INSTDONE"}, \
121 	{ INDIRECT_RING_STATE(0),	REG_32BIT,	0,	0,	0,	"INDIRECT_RING_STATE"}, \
122 	{ RING_CURRENT_LRCA(0),		REG_32BIT,	0,	0,	0,	"CURRENT_LRCA"}, \
123 	{ RING_ACTHD(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
124 	{ RING_ACTHD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"ACTHD"}, \
125 	{ RING_BBADDR(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
126 	{ RING_BBADDR_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_BBADDR"}, \
127 	{ RING_START(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
128 	{ RING_START_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_START"}, \
129 	{ RING_DMA_FADD(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
130 	{ RING_DMA_FADD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_DMA_FADD"}, \
131 	{ RING_EXECLIST_STATUS_LO(0),	REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
132 	{ RING_EXECLIST_STATUS_HI(0),	REG_64BIT_HI_DW, 0,	0,	0,	"RING_EXECLIST_STATUS"}, \
133 	{ RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
134 	{ RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0,	0,	0,	"RING_EXECLIST_SQ_CONTENTS"}
135 
136 #define COMMON_XELP_RC_CLASS \
137 	{ RCU_MODE,			REG_32BIT,	0,	0,	0,	"RCU_MODE"}
138 
139 #define COMMON_XELP_RC_CLASS_INSTDONE \
140 	{ SC_INSTDONE,			REG_32BIT,	0,	0,	0,	"SC_INSTDONE"}, \
141 	{ SC_INSTDONE_EXTRA,		REG_32BIT,	0,	0,	0,	"SC_INSTDONE_EXTRA"}, \
142 	{ SC_INSTDONE_EXTRA2,		REG_32BIT,	0,	0,	0,	"SC_INSTDONE_EXTRA2"}
143 
144 #define XELP_VEC_CLASS_REGS \
145 	{ SFC_DONE(0),			0,	0,	0,	0,	"SFC_DONE[0]"}, \
146 	{ SFC_DONE(1),			0,	0,	0,	0,	"SFC_DONE[1]"}, \
147 	{ SFC_DONE(2),			0,	0,	0,	0,	"SFC_DONE[2]"}, \
148 	{ SFC_DONE(3),			0,	0,	0,	0,	"SFC_DONE[3]"}
149 
150 #define XE3P_BASE_ENGINE_INSTANCE \
151 	{ RING_CSMQDEBUG(0),		REG_32BIT,	0,	0,	0,	"CSMQDEBUG"}
152 
153 /* XE_LP Global */
154 static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
155 	COMMON_XELP_BASE_GLOBAL,
156 };
157 
158 /* Render / Compute Per-Engine-Instance */
159 static const struct __guc_mmio_reg_descr xe_rc_inst_regs[] = {
160 	COMMON_BASE_ENGINE_INSTANCE,
161 };
162 
163 /* Render / Compute Engine-Class */
164 static const struct __guc_mmio_reg_descr xe_rc_class_regs[] = {
165 	COMMON_XELP_RC_CLASS,
166 	COMMON_XELP_RC_CLASS_INSTDONE,
167 };
168 
169 /* Render / Compute Engine-Class for xehpg */
170 static const struct __guc_mmio_reg_descr xe_hpg_rc_class_regs[] = {
171 	COMMON_XELP_RC_CLASS,
172 };
173 
174 /* Media Decode/Encode Per-Engine-Instance */
175 static const struct __guc_mmio_reg_descr xe_vd_inst_regs[] = {
176 	COMMON_BASE_ENGINE_INSTANCE,
177 };
178 
179 /* Video Enhancement Engine-Class */
180 static const struct __guc_mmio_reg_descr xe_vec_class_regs[] = {
181 	XELP_VEC_CLASS_REGS,
182 };
183 
184 /* Video Enhancement Per-Engine-Instance */
185 static const struct __guc_mmio_reg_descr xe_vec_inst_regs[] = {
186 	COMMON_BASE_ENGINE_INSTANCE,
187 };
188 
189 /* Blitter Per-Engine-Instance */
190 static const struct __guc_mmio_reg_descr xe_blt_inst_regs[] = {
191 	COMMON_BASE_ENGINE_INSTANCE,
192 };
193 
194 /* XE_LP - GSC Per-Engine-Instance */
195 static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = {
196 	COMMON_BASE_ENGINE_INSTANCE,
197 };
198 
199 /* Render / Compute Per-Engine-Instance */
200 static const struct __guc_mmio_reg_descr xe3p_rc_inst_regs[] = {
201 	COMMON_BASE_ENGINE_INSTANCE,
202 	XE3P_BASE_ENGINE_INSTANCE,
203 };
204 
205 /*
206  * Empty list to prevent warnings about unknown class/instance types
207  * as not all class/instance types have entries on all platforms.
208  */
209 static const struct __guc_mmio_reg_descr empty_regs_list[] = {
210 };
211 
212 #define TO_GCAP_DEF_OWNER(x) (GUC_CAPTURE_LIST_INDEX_##x)
213 #define TO_GCAP_DEF_TYPE(x) (GUC_STATE_CAPTURE_TYPE_##x)
214 #define MAKE_REGLIST(regslist, regsowner, regstype, class) \
215 	{ \
216 		regslist, \
217 		ARRAY_SIZE(regslist), \
218 		TO_GCAP_DEF_OWNER(regsowner), \
219 		TO_GCAP_DEF_TYPE(regstype), \
220 		class \
221 	}
222 
223 /* List of lists for legacy graphic product version < 1255 */
224 static const struct __guc_mmio_reg_descr_group xe_lp_lists[] = {
225 	MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
226 	MAKE_REGLIST(xe_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
227 	MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
228 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
229 	MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
230 	MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
231 	MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
232 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
233 	MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
234 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
235 	MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
236 	{}
237 };
238 
239  /* List of lists for graphic product version >= 1255 */
240 static const struct __guc_mmio_reg_descr_group xe_hpg_lists[] = {
241 	MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
242 	MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
243 	MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
244 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
245 	MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
246 	MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
247 	MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
248 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
249 	MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
250 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
251 	MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
252 	{}
253 };
254 
255  /* List of lists for Xe3p and beyond */
256 static const struct __guc_mmio_reg_descr_group xe3p_lists[] = {
257 	MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
258 	MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
259 	MAKE_REGLIST(xe3p_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
260 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
261 	MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
262 	MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
263 	MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
264 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
265 	MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
266 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
267 	MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
268 	{}
269 };
270 static const char * const capture_list_type_names[] = {
271 	"Global",
272 	"Class",
273 	"Instance",
274 };
275 
276 static const char * const capture_engine_class_names[] = {
277 	"Render/Compute",
278 	"Video",
279 	"VideoEnhance",
280 	"Blitter",
281 	"GSC-Other",
282 };
283 
284 struct __guc_capture_ads_cache {
285 	bool is_valid;
286 	void *ptr;
287 	size_t size;
288 	int status;
289 };
290 
291 struct xe_guc_state_capture {
292 	const struct __guc_mmio_reg_descr_group *reglists;
293 	/**
294 	 * NOTE: steered registers have multiple instances depending on the HW configuration
295 	 * (slices or dual-sub-slices) and thus depends on HW fuses discovered
296 	 */
297 	struct __guc_mmio_reg_descr_group *extlists;
298 	struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
299 						[GUC_STATE_CAPTURE_TYPE_MAX]
300 						[GUC_CAPTURE_LIST_CLASS_MAX];
301 	void *ads_null_cache;
302 	struct list_head cachelist;
303 #define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
304 #define PREALLOC_NODES_DEFAULT_NUMREGS 64
305 
306 	int max_mmio_per_node;
307 	struct list_head outlist;
308 };
309 
310 static void
311 guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
312 					   struct __guc_capture_parsed_output *node);
313 
314 static const struct __guc_mmio_reg_descr_group *
315 guc_capture_get_device_reglist(struct xe_device *xe)
316 {
317 	if (GRAPHICS_VER(xe) >= 35)
318 		return xe3p_lists;
319 	else if (GRAPHICS_VERx100(xe) >= 1255)
320 		return xe_hpg_lists;
321 	else
322 		return xe_lp_lists;
323 }
324 
325 static const struct __guc_mmio_reg_descr_group *
326 guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists,
327 			 u32 owner, u32 type, enum guc_capture_list_class_type capture_class)
328 {
329 	int i;
330 
331 	if (!reglists)
332 		return NULL;
333 
334 	for (i = 0; reglists[i].list; ++i) {
335 		if (reglists[i].owner == owner && reglists[i].type == type &&
336 		    (reglists[i].engine == capture_class ||
337 		     reglists[i].type == GUC_STATE_CAPTURE_TYPE_GLOBAL))
338 			return &reglists[i];
339 	}
340 
341 	return NULL;
342 }
343 
344 const struct __guc_mmio_reg_descr_group *
345 xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
346 				 enum guc_capture_list_class_type capture_class, bool is_ext)
347 {
348 	const struct __guc_mmio_reg_descr_group *reglists;
349 
350 	if (is_ext) {
351 		struct xe_guc *guc = &gt->uc.guc;
352 
353 		reglists = guc->capture->extlists;
354 	} else {
355 		reglists = guc_capture_get_device_reglist(gt_to_xe(gt));
356 	}
357 	return guc_capture_get_one_list(reglists, owner, type, capture_class);
358 }
359 
360 struct __ext_steer_reg {
361 	const char *name;
362 	struct xe_reg_mcr reg;
363 };
364 
365 static const struct __ext_steer_reg xe_extregs[] = {
366 	{"SAMPLER_INSTDONE",		SAMPLER_INSTDONE},
367 	{"ROW_INSTDONE",		ROW_INSTDONE}
368 };
369 
370 static const struct __ext_steer_reg xehpg_extregs[] = {
371 	{"SC_INSTDONE",			XEHPG_SC_INSTDONE},
372 	{"SC_INSTDONE_EXTRA",		XEHPG_SC_INSTDONE_EXTRA},
373 	{"SC_INSTDONE_EXTRA2",		XEHPG_SC_INSTDONE_EXTRA2},
374 	{"INSTDONE_GEOM_SVGUNIT",	XEHPG_INSTDONE_GEOM_SVGUNIT}
375 };
376 
377 static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
378 			   const struct __ext_steer_reg *extlist,
379 			   u32 dss_id, u16 slice_id, u16 subslice_id)
380 {
381 	if (!ext || !extlist)
382 		return;
383 
384 	ext->reg = XE_REG(extlist->reg.__reg.addr);
385 	ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
386 	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
387 	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
388 	ext->dss_id = dss_id;
389 	ext->regname = extlist->name;
390 }
391 
392 static int
393 __alloc_ext_regs(struct drm_device *drm, struct __guc_mmio_reg_descr_group *newlist,
394 		 const struct __guc_mmio_reg_descr_group *rootlist, int num_regs)
395 {
396 	struct __guc_mmio_reg_descr *list;
397 
398 	list = drmm_kzalloc(drm, num_regs * sizeof(struct __guc_mmio_reg_descr), GFP_KERNEL);
399 	if (!list)
400 		return -ENOMEM;
401 
402 	newlist->list = list;
403 	newlist->num_regs = num_regs;
404 	newlist->owner = rootlist->owner;
405 	newlist->engine = rootlist->engine;
406 	newlist->type = rootlist->type;
407 
408 	return 0;
409 }
410 
411 static int guc_capture_get_steer_reg_num(struct xe_device *xe)
412 {
413 	int num = ARRAY_SIZE(xe_extregs);
414 
415 	if (GRAPHICS_VERx100(xe) >= 1255)
416 		num += ARRAY_SIZE(xehpg_extregs);
417 
418 	return num;
419 }
420 
421 static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
422 {
423 	struct xe_gt *gt = guc_to_gt(guc);
424 	u16 slice, subslice;
425 	int dss, i, total = 0;
426 	const struct __guc_mmio_reg_descr_group *lists = guc->capture->reglists;
427 	const struct __guc_mmio_reg_descr_group *list;
428 	struct __guc_mmio_reg_descr_group *extlists;
429 	struct __guc_mmio_reg_descr *extarray;
430 	bool has_xehpg_extregs = GRAPHICS_VERx100(gt_to_xe(gt)) >= 1255;
431 	struct drm_device *drm = &gt_to_xe(gt)->drm;
432 	bool has_rcs_ccs = false;
433 	struct xe_hw_engine *hwe;
434 	enum xe_hw_engine_id id;
435 
436 	/*
437 	 * If GT has no rcs/ccs, no need to alloc steered list.
438 	 * Currently, only rcs/ccs has steering register, if in the future,
439 	 * other engine types has steering register, this condition check need
440 	 * to be extended
441 	 */
442 	for_each_hw_engine(hwe, gt, id) {
443 		if (xe_engine_class_to_guc_capture_class(hwe->class) ==
444 		    GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
445 			has_rcs_ccs = true;
446 			break;
447 		}
448 	}
449 
450 	if (!has_rcs_ccs)
451 		return;
452 
453 	/* steered registers currently only exist for the render-class */
454 	list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
455 					GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
456 					GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE);
457 	/*
458 	 * Skip if this platform has no engine class registers or if extlists
459 	 * was previously allocated
460 	 */
461 	if (!list || guc->capture->extlists)
462 		return;
463 
464 	total = bitmap_weight(gt->fuse_topo.g_dss_mask, sizeof(gt->fuse_topo.g_dss_mask) * 8) *
465 		guc_capture_get_steer_reg_num(guc_to_xe(guc));
466 
467 	if (!total)
468 		return;
469 
470 	/* allocate an extra for an end marker */
471 	extlists = drmm_kzalloc(drm, 2 * sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
472 	if (!extlists)
473 		return;
474 
475 	if (__alloc_ext_regs(drm, &extlists[0], list, total)) {
476 		drmm_kfree(drm, extlists);
477 		return;
478 	}
479 
480 	/* For steering registers, the list is generated at run-time */
481 	extarray = (struct __guc_mmio_reg_descr *)extlists[0].list;
482 	for_each_dss_steering(dss, gt, slice, subslice) {
483 		for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
484 			__fill_ext_reg(extarray, &xe_extregs[i], dss, slice, subslice);
485 			++extarray;
486 		}
487 
488 		if (has_xehpg_extregs)
489 			for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
490 				__fill_ext_reg(extarray, &xehpg_extregs[i], dss, slice, subslice);
491 				++extarray;
492 			}
493 	}
494 
495 	extlists[0].num_regs = total;
496 
497 	xe_gt_dbg(guc_to_gt(guc), "capture found %d ext-regs.\n", total);
498 	guc->capture->extlists = extlists;
499 }
500 
501 static int
502 guc_capture_list_init(struct xe_guc *guc, u32 owner, u32 type,
503 		      enum guc_capture_list_class_type capture_class, struct guc_mmio_reg *ptr,
504 		      u16 num_entries)
505 {
506 	u32 ptr_idx = 0, list_idx = 0;
507 	const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
508 	struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
509 	const struct __guc_mmio_reg_descr_group *match;
510 	u32 list_num;
511 
512 	if (!reglists)
513 		return -ENODEV;
514 
515 	match = guc_capture_get_one_list(reglists, owner, type, capture_class);
516 	if (!match)
517 		return -ENODATA;
518 
519 	list_num = match->num_regs;
520 	for (list_idx = 0; ptr_idx < num_entries && list_idx < list_num; ++list_idx, ++ptr_idx) {
521 		ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
522 		ptr[ptr_idx].value = 0xDEADF00D;
523 		ptr[ptr_idx].flags = match->list[list_idx].flags;
524 		ptr[ptr_idx].mask = match->list[list_idx].mask;
525 	}
526 
527 	match = guc_capture_get_one_list(extlists, owner, type, capture_class);
528 	if (match)
529 		for (ptr_idx = list_num, list_idx = 0;
530 		     ptr_idx < num_entries && list_idx < match->num_regs;
531 		     ++ptr_idx, ++list_idx) {
532 			ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
533 			ptr[ptr_idx].value = 0xDEADF00D;
534 			ptr[ptr_idx].flags = match->list[list_idx].flags;
535 			ptr[ptr_idx].mask = match->list[list_idx].mask;
536 		}
537 
538 	if (ptr_idx < num_entries)
539 		xe_gt_dbg(guc_to_gt(guc), "Got short capture reglist init: %d out-of %d.\n",
540 			  ptr_idx, num_entries);
541 
542 	return 0;
543 }
544 
545 static int
546 guc_cap_list_num_regs(struct xe_guc *guc, u32 owner, u32 type,
547 		      enum guc_capture_list_class_type capture_class)
548 {
549 	const struct __guc_mmio_reg_descr_group *match;
550 	int num_regs = 0;
551 
552 	match = guc_capture_get_one_list(guc->capture->reglists, owner, type, capture_class);
553 	if (match)
554 		num_regs = match->num_regs;
555 
556 	match = guc_capture_get_one_list(guc->capture->extlists, owner, type, capture_class);
557 	if (match)
558 		num_regs += match->num_regs;
559 	else
560 		/*
561 		 * If a caller wants the full register dump size but we have
562 		 * not yet got the hw-config, which is before max_mmio_per_node
563 		 * is initialized, then provide a worst-case number for
564 		 * extlists based on max dss fuse bits, but only ever for
565 		 * render/compute
566 		 */
567 		if (owner == GUC_CAPTURE_LIST_INDEX_PF &&
568 		    type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
569 		    capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE &&
570 		    !guc->capture->max_mmio_per_node)
571 			num_regs += guc_capture_get_steer_reg_num(guc_to_xe(guc)) *
572 				    XE_MAX_DSS_FUSE_BITS;
573 
574 	return num_regs;
575 }
576 
577 static int
578 guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
579 			enum guc_capture_list_class_type capture_class,
580 			size_t *size, bool is_purpose_est)
581 {
582 	struct xe_guc_state_capture *gc = guc->capture;
583 	struct xe_gt *gt = guc_to_gt(guc);
584 	struct __guc_capture_ads_cache *cache;
585 	int num_regs;
586 
587 	xe_gt_assert(gt, type < GUC_STATE_CAPTURE_TYPE_MAX);
588 	xe_gt_assert(gt, capture_class < GUC_CAPTURE_LIST_CLASS_MAX);
589 
590 	cache = &gc->ads_cache[owner][type][capture_class];
591 	if (!gc->reglists) {
592 		xe_gt_warn(gt, "No capture reglist for this device\n");
593 		return -ENODEV;
594 	}
595 
596 	if (cache->is_valid) {
597 		*size = cache->size;
598 		return cache->status;
599 	}
600 
601 	if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
602 	    !guc_capture_get_one_list(gc->reglists, owner, type, capture_class)) {
603 		if (type == GUC_STATE_CAPTURE_TYPE_GLOBAL)
604 			xe_gt_warn(gt, "Missing capture reglist: global!\n");
605 		else
606 			xe_gt_warn(gt, "Missing capture reglist: %s(%u):%s(%u)!\n",
607 				   capture_list_type_names[type], type,
608 				   capture_engine_class_names[capture_class], capture_class);
609 		return -ENODEV;
610 	}
611 
612 	num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
613 	/* intentional empty lists can exist depending on hw config */
614 	if (!num_regs)
615 		return -ENODATA;
616 
617 	if (size)
618 		*size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
619 				   (num_regs * sizeof(struct guc_mmio_reg)));
620 
621 	return 0;
622 }
623 
624 /**
625  * xe_guc_capture_getlistsize - Get list size for owner/type/class combination
626  * @guc: The GuC object
627  * @owner: PF/VF owner
628  * @type: GuC capture register type
629  * @capture_class: GuC capture engine class id
630  * @size: Point to the size
631  *
632  * This function will get the list for the owner/type/class combination, and
633  * return the page aligned list size.
634  *
635  * Returns: 0 on success or a negative error code on failure.
636  */
637 int
638 xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
639 			   enum guc_capture_list_class_type capture_class, size_t *size)
640 {
641 	return guc_capture_getlistsize(guc, owner, type, capture_class, size, false);
642 }
643 
644 /**
645  * xe_guc_capture_getlist - Get register capture list for owner/type/class
646  * combination
647  * @guc:	The GuC object
648  * @owner:	PF/VF owner
649  * @type:	GuC capture register type
650  * @capture_class:	GuC capture engine class id
651  * @outptr:	Point to cached register capture list
652  *
653  * This function will get the register capture list for the owner/type/class
654  * combination.
655  *
656  * Returns: 0 on success or a negative error code on failure.
657  */
658 int
659 xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type,
660 		       enum guc_capture_list_class_type capture_class, void **outptr)
661 {
662 	struct xe_guc_state_capture *gc = guc->capture;
663 	struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][capture_class];
664 	struct guc_debug_capture_list *listnode;
665 	int ret, num_regs;
666 	u8 *caplist, *tmp;
667 	size_t size = 0;
668 
669 	if (!gc->reglists)
670 		return -ENODEV;
671 
672 	if (cache->is_valid) {
673 		*outptr = cache->ptr;
674 		return cache->status;
675 	}
676 
677 	ret = xe_guc_capture_getlistsize(guc, owner, type, capture_class, &size);
678 	if (ret) {
679 		cache->is_valid = true;
680 		cache->ptr = NULL;
681 		cache->size = 0;
682 		cache->status = ret;
683 		return ret;
684 	}
685 
686 	caplist = drmm_kzalloc(guc_to_drm(guc), size, GFP_KERNEL);
687 	if (!caplist)
688 		return -ENOMEM;
689 
690 	/* populate capture list header */
691 	tmp = caplist;
692 	num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
693 	listnode = (struct guc_debug_capture_list *)tmp;
694 	listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, (u32)num_regs);
695 
696 	/* populate list of register descriptor */
697 	tmp += sizeof(struct guc_debug_capture_list);
698 	guc_capture_list_init(guc, owner, type, capture_class,
699 			      (struct guc_mmio_reg *)tmp, num_regs);
700 
701 	/* cache this list */
702 	cache->is_valid = true;
703 	cache->ptr = caplist;
704 	cache->size = size;
705 	cache->status = 0;
706 
707 	*outptr = caplist;
708 
709 	return 0;
710 }
711 
712 /**
713  * xe_guc_capture_getnullheader - Get a null list for register capture
714  * @guc:	The GuC object
715  * @outptr:	Point to cached register capture list
716  * @size:	Point to the size
717  *
718  * This function will alloc for a null list for register capture.
719  *
720  * Returns: 0 on success or a negative error code on failure.
721  */
722 int
723 xe_guc_capture_getnullheader(struct xe_guc *guc, void **outptr, size_t *size)
724 {
725 	struct xe_guc_state_capture *gc = guc->capture;
726 	int tmp = sizeof(u32) * 4;
727 	void *null_header;
728 
729 	if (gc->ads_null_cache) {
730 		*outptr = gc->ads_null_cache;
731 		*size = tmp;
732 		return 0;
733 	}
734 
735 	null_header = drmm_kzalloc(guc_to_drm(guc), tmp, GFP_KERNEL);
736 	if (!null_header)
737 		return -ENOMEM;
738 
739 	gc->ads_null_cache = null_header;
740 	*outptr = null_header;
741 	*size = tmp;
742 
743 	return 0;
744 }
745 
746 /**
747  * xe_guc_capture_ads_input_worst_size - Calculate the worst size for GuC register capture
748  * @guc: point to xe_guc structure
749  *
750  * Calculate the worst size for GuC register capture by including all possible engines classes.
751  *
752  * Returns: Calculated size
753  */
754 size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc)
755 {
756 	size_t total_size, class_size, instance_size, global_size;
757 	int i, j;
758 
759 	/*
760 	 * This function calculates the worst case register lists size by
761 	 * including all possible engines classes. It is called during the
762 	 * first of a two-phase GuC (and ADS-population) initialization
763 	 * sequence, that is, during the pre-hwconfig phase before we have
764 	 * the exact engine fusing info.
765 	 */
766 	total_size = PAGE_SIZE;	/* Pad a page in front for empty lists */
767 	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
768 		for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
769 			if (xe_guc_capture_getlistsize(guc, i,
770 						       GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
771 						       j, &class_size) < 0)
772 				class_size = 0;
773 			if (xe_guc_capture_getlistsize(guc, i,
774 						       GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
775 						       j, &instance_size) < 0)
776 				instance_size = 0;
777 			total_size += class_size + instance_size;
778 		}
779 		if (xe_guc_capture_getlistsize(guc, i,
780 					       GUC_STATE_CAPTURE_TYPE_GLOBAL,
781 					       0, &global_size) < 0)
782 			global_size = 0;
783 		total_size += global_size;
784 	}
785 
786 	return PAGE_ALIGN(total_size);
787 }
788 
789 static int guc_capture_output_size_est(struct xe_guc *guc)
790 {
791 	struct xe_gt *gt = guc_to_gt(guc);
792 	struct xe_hw_engine *hwe;
793 	enum xe_hw_engine_id id;
794 
795 	int capture_size = 0;
796 	size_t tmp = 0;
797 
798 	if (!guc->capture)
799 		return -ENODEV;
800 
801 	/*
802 	 * If every single engine-instance suffered a failure in quick succession but
803 	 * were all unrelated, then a burst of multiple error-capture events would dump
804 	 * registers for every one engine instance, one at a time. In this case, GuC
805 	 * would even dump the global-registers repeatedly.
806 	 *
807 	 * For each engine instance, there would be 1 x guc_state_capture_group_t output
808 	 * followed by 3 x guc_state_capture_t lists. The latter is how the register
809 	 * dumps are split across different register types (where the '3' are global vs class
810 	 * vs instance).
811 	 */
812 	for_each_hw_engine(hwe, gt, id) {
813 		enum guc_capture_list_class_type capture_class;
814 
815 		capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
816 		capture_size += sizeof(struct guc_state_capture_group_header_t) +
817 					 (3 * sizeof(struct guc_state_capture_header_t));
818 
819 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_GLOBAL,
820 					     0, &tmp, true))
821 			capture_size += tmp;
822 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
823 					     capture_class, &tmp, true))
824 			capture_size += tmp;
825 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
826 					     capture_class, &tmp, true))
827 			capture_size += tmp;
828 	}
829 
830 	return capture_size;
831 }
832 
833 /*
834  * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
835  * before the Xe can read the data out and process it
836  */
837 #define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
838 
839 static void check_guc_capture_size(struct xe_guc *guc)
840 {
841 	int capture_size = guc_capture_output_size_est(guc);
842 	int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
843 	u32 buffer_size = XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE;
844 
845 	/*
846 	 * NOTE: capture_size is much smaller than the capture region
847 	 * allocation (DG2: <80K vs 1MB).
848 	 * Additionally, its based on space needed to fit all engines getting
849 	 * reset at once within the same G2H handler task slot. This is very
850 	 * unlikely. However, if GuC really does run out of space for whatever
851 	 * reason, we will see an separate warning message when processing the
852 	 * G2H event capture-notification, search for:
853 	 * xe_guc_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
854 	 */
855 	if (capture_size < 0)
856 		xe_gt_dbg(guc_to_gt(guc),
857 			  "Failed to calculate error state capture buffer minimum size: %d!\n",
858 			  capture_size);
859 	if (capture_size > buffer_size)
860 		xe_gt_dbg(guc_to_gt(guc), "Error state capture buffer maybe small: %d < %d\n",
861 			  buffer_size, capture_size);
862 	else if (spare_size > buffer_size)
863 		xe_gt_dbg(guc_to_gt(guc),
864 			  "Error state capture buffer lacks spare size: %d < %d (min = %d)\n",
865 			  buffer_size, spare_size, capture_size);
866 }
867 
868 static void
869 guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node,
870 			     struct list_head *list)
871 {
872 	list_add(&node->link, list);
873 }
874 
875 static void
876 guc_capture_add_node_to_outlist(struct xe_guc_state_capture *gc,
877 				struct __guc_capture_parsed_output *node)
878 {
879 	guc_capture_remove_stale_matches_from_list(gc, node);
880 	guc_capture_add_node_to_list(node, &gc->outlist);
881 }
882 
883 static void
884 guc_capture_add_node_to_cachelist(struct xe_guc_state_capture *gc,
885 				  struct __guc_capture_parsed_output *node)
886 {
887 	guc_capture_add_node_to_list(node, &gc->cachelist);
888 }
889 
890 static void
891 guc_capture_free_outlist_node(struct xe_guc_state_capture *gc,
892 			      struct __guc_capture_parsed_output *n)
893 {
894 	if (n) {
895 		n->locked = 0;
896 		list_del(&n->link);
897 		/* put node back to cache list */
898 		guc_capture_add_node_to_cachelist(gc, n);
899 	}
900 }
901 
902 static void
903 guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
904 					   struct __guc_capture_parsed_output *node)
905 {
906 	struct __guc_capture_parsed_output *n, *ntmp;
907 	int guc_id = node->guc_id;
908 
909 	list_for_each_entry_safe(n, ntmp, &gc->outlist, link) {
910 		if (n != node && !n->locked && n->guc_id == guc_id)
911 			guc_capture_free_outlist_node(gc, n);
912 	}
913 }
914 
915 static void
916 guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *node)
917 {
918 	struct guc_mmio_reg *tmp[GUC_STATE_CAPTURE_TYPE_MAX];
919 	int i;
920 
921 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
922 		tmp[i] = node->reginfo[i].regs;
923 		memset(tmp[i], 0, sizeof(struct guc_mmio_reg) *
924 		       guc->capture->max_mmio_per_node);
925 	}
926 	memset(node, 0, sizeof(*node));
927 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i)
928 		node->reginfo[i].regs = tmp[i];
929 
930 	INIT_LIST_HEAD(&node->link);
931 }
932 
933 /**
934  * DOC: Init, G2H-event and reporting flows for GuC-error-capture
935  *
936  * KMD Init time flows:
937  * --------------------
938  *     --> alloc A: GuC input capture regs lists (registered to GuC via ADS).
939  *                  xe_guc_ads acquires the register lists by calling
940  *                  xe_guc_capture_getlistsize and xe_guc_capture_getlist 'n' times,
941  *                  where n = 1 for global-reg-list +
942  *                            num_engine_classes for class-reg-list +
943  *                            num_engine_classes for instance-reg-list
944  *                               (since all instances of the same engine-class type
945  *                                have an identical engine-instance register-list).
946  *                  ADS module also calls separately for PF vs VF.
947  *
948  *     --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
949  *                  Size = XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE (warns if on too-small)
950  *                  Note2: 'x 3' to hold multiple capture groups
951  *
952  * GUC Runtime notify capture:
953  * --------------------------
954  *     --> G2H STATE_CAPTURE_NOTIFICATION
955  *                   L--> xe_guc_capture_process
956  *                           L--> Loop through B (head..tail) and for each engine instance's
957  *                                err-state-captured register-list we find, we alloc 'C':
958  *      --> alloc C: A capture-output-node structure that includes misc capture info along
959  *                   with 3 register list dumps (global, engine-class and engine-instance)
960  *                   This node is created from a pre-allocated list of blank nodes in
961  *                   guc->capture->cachelist and populated with the error-capture
962  *                   data from GuC and then it's added into guc->capture->outlist linked
963  *                   list. This list is used for matchup and printout by xe_devcoredump_read
964  *                   and xe_engine_snapshot_print, (when user invokes the devcoredump sysfs).
965  *
966  * GUC --> notify context reset:
967  * -----------------------------
968  *     --> guc_exec_queue_timedout_job
969  *                   L--> xe_devcoredump
970  *                          L--> devcoredump_snapshot
971  *                               --> xe_hw_engine_snapshot_capture
972  *                               --> xe_engine_manual_capture(For manual capture)
973  *
974  * User Sysfs / Debugfs
975  * --------------------
976  *      --> xe_devcoredump_read->
977  *             L--> xxx_snapshot_print
978  *                    L--> xe_engine_snapshot_print
979  *                         Print register lists values saved at
980  *                         guc->capture->outlist
981  *
982  */
983 
984 static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
985 {
986 	if (buf->wr >= buf->rd)
987 		return (buf->wr - buf->rd);
988 	return (buf->size - buf->rd) + buf->wr;
989 }
990 
991 static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf)
992 {
993 	if (buf->rd > buf->wr)
994 		return (buf->size - buf->rd);
995 	return (buf->wr - buf->rd);
996 }
997 
998 /*
999  * GuC's error-capture output is a ring buffer populated in a byte-stream fashion:
1000  *
1001  * The GuC Log buffer region for error-capture is managed like a ring buffer.
1002  * The GuC firmware dumps error capture logs into this ring in a byte-stream flow.
1003  * Additionally, as per the current and foreseeable future, all packed error-
1004  * capture output structures are dword aligned.
1005  *
1006  * That said, if the GuC firmware is in the midst of writing a structure that is larger
1007  * than one dword but the tail end of the err-capture buffer-region has lesser space left,
1008  * we would need to extract that structure one dword at a time straddled across the end,
1009  * onto the start of the ring.
1010  *
1011  * Below function, guc_capture_log_remove_bytes is a helper for that. All callers of this
1012  * function would typically do a straight-up memcpy from the ring contents and will only
1013  * call this helper if their structure-extraction is straddling across the end of the
1014  * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease
1015  * scalability for future expansion of output data types without requiring a redesign
1016  * of the flow controls.
1017  */
1018 static int
1019 guc_capture_log_remove_bytes(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1020 			     void *out, int bytes_needed)
1021 {
1022 #define GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX	3
1023 
1024 	int fill_size = 0, tries = GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX;
1025 	int copy_size, avail;
1026 
1027 	xe_assert(guc_to_xe(guc), bytes_needed % sizeof(u32) == 0);
1028 
1029 	if (bytes_needed > guc_capture_buf_cnt(buf))
1030 		return -1;
1031 
1032 	while (bytes_needed > 0 && tries--) {
1033 		int misaligned;
1034 
1035 		avail = guc_capture_buf_cnt_to_end(buf);
1036 		misaligned = avail % sizeof(u32);
1037 		/* wrap if at end */
1038 		if (!avail) {
1039 			/* output stream clipped */
1040 			if (!buf->rd)
1041 				return fill_size;
1042 			buf->rd = 0;
1043 			continue;
1044 		}
1045 
1046 		/* Only copy to u32 aligned data */
1047 		copy_size = avail < bytes_needed ? avail - misaligned : bytes_needed;
1048 		xe_map_memcpy_from(guc_to_xe(guc), out + fill_size, &guc->log.bo->vmap,
1049 				   buf->data_offset + buf->rd, copy_size);
1050 		buf->rd += copy_size;
1051 		fill_size += copy_size;
1052 		bytes_needed -= copy_size;
1053 
1054 		if (misaligned)
1055 			xe_gt_warn(guc_to_gt(guc),
1056 				   "Bytes extraction not dword aligned, clipping.\n");
1057 	}
1058 
1059 	return fill_size;
1060 }
1061 
1062 static int
1063 guc_capture_log_get_group_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1064 			      struct guc_state_capture_group_header_t *ghdr)
1065 {
1066 	int fullsize = sizeof(struct guc_state_capture_group_header_t);
1067 
1068 	if (guc_capture_log_remove_bytes(guc, buf, ghdr, fullsize) != fullsize)
1069 		return -1;
1070 	return 0;
1071 }
1072 
1073 static int
1074 guc_capture_log_get_data_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1075 			     struct guc_state_capture_header_t *hdr)
1076 {
1077 	int fullsize = sizeof(struct guc_state_capture_header_t);
1078 
1079 	if (guc_capture_log_remove_bytes(guc, buf, hdr, fullsize) != fullsize)
1080 		return -1;
1081 	return 0;
1082 }
1083 
1084 static int
1085 guc_capture_log_get_register(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1086 			     struct guc_mmio_reg *reg)
1087 {
1088 	int fullsize = sizeof(struct guc_mmio_reg);
1089 
1090 	if (guc_capture_log_remove_bytes(guc, buf, reg, fullsize) != fullsize)
1091 		return -1;
1092 	return 0;
1093 }
1094 
1095 static struct __guc_capture_parsed_output *
1096 guc_capture_get_prealloc_node(struct xe_guc *guc)
1097 {
1098 	struct __guc_capture_parsed_output *found = NULL;
1099 
1100 	if (!list_empty(&guc->capture->cachelist)) {
1101 		struct __guc_capture_parsed_output *n, *ntmp;
1102 
1103 		/* get first avail node from the cache list */
1104 		list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) {
1105 			found = n;
1106 			break;
1107 		}
1108 	} else {
1109 		struct __guc_capture_parsed_output *n, *ntmp;
1110 
1111 		/*
1112 		 * traverse reversed and steal back the oldest node already
1113 		 * allocated
1114 		 */
1115 		list_for_each_entry_safe_reverse(n, ntmp, &guc->capture->outlist, link) {
1116 			if (!n->locked)
1117 				found = n;
1118 		}
1119 	}
1120 	if (found) {
1121 		list_del(&found->link);
1122 		guc_capture_init_node(guc, found);
1123 	}
1124 
1125 	return found;
1126 }
1127 
1128 static struct __guc_capture_parsed_output *
1129 guc_capture_clone_node(struct xe_guc *guc, struct __guc_capture_parsed_output *original,
1130 		       u32 keep_reglist_mask)
1131 {
1132 	struct __guc_capture_parsed_output *new;
1133 	int i;
1134 
1135 	new = guc_capture_get_prealloc_node(guc);
1136 	if (!new)
1137 		return NULL;
1138 	if (!original)
1139 		return new;
1140 
1141 	new->is_partial = original->is_partial;
1142 
1143 	/* copy reg-lists that we want to clone */
1144 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1145 		if (keep_reglist_mask & BIT(i)) {
1146 			XE_WARN_ON(original->reginfo[i].num_regs  >
1147 				   guc->capture->max_mmio_per_node);
1148 
1149 			memcpy(new->reginfo[i].regs, original->reginfo[i].regs,
1150 			       original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg));
1151 
1152 			new->reginfo[i].num_regs = original->reginfo[i].num_regs;
1153 			new->reginfo[i].vfid  = original->reginfo[i].vfid;
1154 
1155 			if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS) {
1156 				new->eng_class = original->eng_class;
1157 			} else if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1158 				new->eng_inst = original->eng_inst;
1159 				new->guc_id = original->guc_id;
1160 				new->lrca = original->lrca;
1161 			}
1162 		}
1163 	}
1164 
1165 	return new;
1166 }
1167 
1168 static int
1169 guc_capture_extract_reglists(struct xe_guc *guc, struct __guc_capture_bufstate *buf)
1170 {
1171 	struct xe_gt *gt = guc_to_gt(guc);
1172 	struct guc_state_capture_group_header_t ghdr = {0};
1173 	struct guc_state_capture_header_t hdr = {0};
1174 	struct __guc_capture_parsed_output *node = NULL;
1175 	struct guc_mmio_reg *regs = NULL;
1176 	int i, numlists, numregs, ret = 0;
1177 	enum guc_state_capture_type datatype;
1178 	struct guc_mmio_reg tmp;
1179 	bool is_partial = false;
1180 
1181 	i = guc_capture_buf_cnt(buf);
1182 	if (!i)
1183 		return -ENODATA;
1184 
1185 	if (i % sizeof(u32)) {
1186 		xe_gt_warn(gt, "Got mis-aligned register capture entries\n");
1187 		ret = -EIO;
1188 		goto bailout;
1189 	}
1190 
1191 	/* first get the capture group header */
1192 	if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) {
1193 		ret = -EIO;
1194 		goto bailout;
1195 	}
1196 	/*
1197 	 * we would typically expect a layout as below where n would be expected to be
1198 	 * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine
1199 	 * instances being reset together.
1200 	 * ____________________________________________
1201 	 * | Capture Group                            |
1202 	 * | ________________________________________ |
1203 	 * | | Capture Group Header:                | |
1204 	 * | |  - num_captures = 5                  | |
1205 	 * | |______________________________________| |
1206 	 * | ________________________________________ |
1207 	 * | | Capture1:                            | |
1208 	 * | |  Hdr: GLOBAL, numregs=a              | |
1209 	 * | | ____________________________________ | |
1210 	 * | | | Reglist                          | | |
1211 	 * | | | - reg1, reg2, ... rega           | | |
1212 	 * | | |__________________________________| | |
1213 	 * | |______________________________________| |
1214 	 * | ________________________________________ |
1215 	 * | | Capture2:                            | |
1216 	 * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=b| |
1217 	 * | | ____________________________________ | |
1218 	 * | | | Reglist                          | | |
1219 	 * | | | - reg1, reg2, ... regb           | | |
1220 	 * | | |__________________________________| | |
1221 	 * | |______________________________________| |
1222 	 * | ________________________________________ |
1223 	 * | | Capture3:                            | |
1224 	 * | |  Hdr: INSTANCE=RCS, numregs=c        | |
1225 	 * | | ____________________________________ | |
1226 	 * | | | Reglist                          | | |
1227 	 * | | | - reg1, reg2, ... regc           | | |
1228 	 * | | |__________________________________| | |
1229 	 * | |______________________________________| |
1230 	 * | ________________________________________ |
1231 	 * | | Capture4:                            | |
1232 	 * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=d| |
1233 	 * | | ____________________________________ | |
1234 	 * | | | Reglist                          | | |
1235 	 * | | | - reg1, reg2, ... regd           | | |
1236 	 * | | |__________________________________| | |
1237 	 * | |______________________________________| |
1238 	 * | ________________________________________ |
1239 	 * | | Capture5:                            | |
1240 	 * | |  Hdr: INSTANCE=CCS0, numregs=e       | |
1241 	 * | | ____________________________________ | |
1242 	 * | | | Reglist                          | | |
1243 	 * | | | - reg1, reg2, ... rege           | | |
1244 	 * | | |__________________________________| | |
1245 	 * | |______________________________________| |
1246 	 * |__________________________________________|
1247 	 */
1248 	is_partial = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE, ghdr.info);
1249 	numlists = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES, ghdr.info);
1250 
1251 	while (numlists--) {
1252 		if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) {
1253 			ret = -EIO;
1254 			break;
1255 		}
1256 
1257 		datatype = FIELD_GET(GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE, hdr.info);
1258 		if (datatype > GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1259 			/* unknown capture type - skip over to next capture set */
1260 			numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
1261 					    hdr.num_mmio_entries);
1262 			while (numregs--) {
1263 				if (guc_capture_log_get_register(guc, buf, &tmp)) {
1264 					ret = -EIO;
1265 					break;
1266 				}
1267 			}
1268 			continue;
1269 		} else if (node) {
1270 			/*
1271 			 * Based on the current capture type and what we have so far,
1272 			 * decide if we should add the current node into the internal
1273 			 * linked list for match-up when xe_devcoredump calls later
1274 			 * (and alloc a blank node for the next set of reglists)
1275 			 * or continue with the same node or clone the current node
1276 			 * but only retain the global or class registers (such as the
1277 			 * case of dependent engine resets).
1278 			 */
1279 			if (datatype == GUC_STATE_CAPTURE_TYPE_GLOBAL) {
1280 				guc_capture_add_node_to_outlist(guc->capture, node);
1281 				node = NULL;
1282 			} else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
1283 				   node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS].num_regs) {
1284 				/* Add to list, clone node and duplicate global list */
1285 				guc_capture_add_node_to_outlist(guc->capture, node);
1286 				node = guc_capture_clone_node(guc, node,
1287 							      GCAP_PARSED_REGLIST_INDEX_GLOBAL);
1288 			} else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE &&
1289 				   node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE].num_regs) {
1290 				/* Add to list, clone node and duplicate global + class lists */
1291 				guc_capture_add_node_to_outlist(guc->capture, node);
1292 				node = guc_capture_clone_node(guc, node,
1293 							      (GCAP_PARSED_REGLIST_INDEX_GLOBAL |
1294 							      GCAP_PARSED_REGLIST_INDEX_ENGCLASS));
1295 			}
1296 		}
1297 
1298 		if (!node) {
1299 			node = guc_capture_get_prealloc_node(guc);
1300 			if (!node) {
1301 				ret = -ENOMEM;
1302 				break;
1303 			}
1304 			if (datatype != GUC_STATE_CAPTURE_TYPE_GLOBAL)
1305 				xe_gt_dbg(gt, "Register capture missing global dump: %08x!\n",
1306 					  datatype);
1307 		}
1308 		node->is_partial = is_partial;
1309 		node->reginfo[datatype].vfid = FIELD_GET(GUC_STATE_CAPTURE_HEADER_VFID, hdr.owner);
1310 		node->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
1311 		node->type = datatype;
1312 
1313 		switch (datatype) {
1314 		case GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE:
1315 			node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
1316 						    hdr.info);
1317 			node->eng_inst = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE,
1318 						   hdr.info);
1319 			node->lrca = hdr.lrca;
1320 			node->guc_id = hdr.guc_id;
1321 			break;
1322 		case GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS:
1323 			node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
1324 						    hdr.info);
1325 			break;
1326 		default:
1327 			break;
1328 		}
1329 
1330 		numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
1331 				    hdr.num_mmio_entries);
1332 		if (numregs > guc->capture->max_mmio_per_node) {
1333 			xe_gt_dbg(gt, "Register capture list extraction clipped by prealloc!\n");
1334 			numregs = guc->capture->max_mmio_per_node;
1335 		}
1336 		node->reginfo[datatype].num_regs = numregs;
1337 		regs = node->reginfo[datatype].regs;
1338 		i = 0;
1339 		while (numregs--) {
1340 			if (guc_capture_log_get_register(guc, buf, &regs[i++])) {
1341 				ret = -EIO;
1342 				break;
1343 			}
1344 		}
1345 	}
1346 
1347 bailout:
1348 	if (node) {
1349 		/* If we have data, add to linked list for match-up when xe_devcoredump calls */
1350 		for (i = GUC_STATE_CAPTURE_TYPE_GLOBAL; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1351 			if (node->reginfo[i].regs) {
1352 				guc_capture_add_node_to_outlist(guc->capture, node);
1353 				node = NULL;
1354 				break;
1355 			}
1356 		}
1357 		if (node) /* else return it back to cache list */
1358 			guc_capture_add_node_to_cachelist(guc->capture, node);
1359 	}
1360 	return ret;
1361 }
1362 
1363 static int __guc_capture_flushlog_complete(struct xe_guc *guc)
1364 {
1365 	u32 action[] = {
1366 		XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
1367 		GUC_LOG_TYPE_STATE_CAPTURE
1368 	};
1369 
1370 	return xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
1371 }
1372 
1373 static void __guc_capture_process_output(struct xe_guc *guc)
1374 {
1375 	unsigned int buffer_size, read_offset, write_offset, full_count;
1376 	struct xe_uc *uc = container_of(guc, typeof(*uc), guc);
1377 	struct guc_log_buffer_state log_buf_state_local;
1378 	struct __guc_capture_bufstate buf;
1379 	bool new_overflow;
1380 	int ret, tmp;
1381 	u32 log_buf_state_offset;
1382 	u32 src_data_offset;
1383 
1384 	log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_TYPE_STATE_CAPTURE;
1385 	src_data_offset = XE_GUC_LOG_STATE_CAPTURE_OFFSET;
1386 
1387 	/*
1388 	 * Make a copy of the state structure, inside GuC log buffer
1389 	 * (which is uncached mapped), on the stack to avoid reading
1390 	 * from it multiple times.
1391 	 */
1392 	xe_map_memcpy_from(guc_to_xe(guc), &log_buf_state_local, &guc->log.bo->vmap,
1393 			   log_buf_state_offset, sizeof(struct guc_log_buffer_state));
1394 
1395 	buffer_size = XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE;
1396 	read_offset = log_buf_state_local.read_ptr;
1397 	write_offset = log_buf_state_local.sampled_write_ptr;
1398 	full_count = FIELD_GET(GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT, log_buf_state_local.flags);
1399 
1400 	/* Bookkeeping stuff */
1401 	tmp = FIELD_GET(GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE, log_buf_state_local.flags);
1402 	guc->log.stats[GUC_LOG_TYPE_STATE_CAPTURE].flush += tmp;
1403 	new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_TYPE_STATE_CAPTURE,
1404 						     full_count);
1405 
1406 	/* Now copy the actual logs. */
1407 	if (unlikely(new_overflow)) {
1408 		/* copy the whole buffer in case of overflow */
1409 		read_offset = 0;
1410 		write_offset = buffer_size;
1411 	} else if (unlikely((read_offset > buffer_size) ||
1412 			(write_offset > buffer_size))) {
1413 		xe_gt_err(guc_to_gt(guc),
1414 			  "Register capture buffer in invalid state: read = 0x%X, size = 0x%X!\n",
1415 			  read_offset, buffer_size);
1416 		/* copy whole buffer as offsets are unreliable */
1417 		read_offset = 0;
1418 		write_offset = buffer_size;
1419 	}
1420 
1421 	buf.size = buffer_size;
1422 	buf.rd = read_offset;
1423 	buf.wr = write_offset;
1424 	buf.data_offset = src_data_offset;
1425 
1426 	if (!xe_guc_read_stopped(guc)) {
1427 		do {
1428 			ret = guc_capture_extract_reglists(guc, &buf);
1429 			if (ret && ret != -ENODATA)
1430 				xe_gt_dbg(guc_to_gt(guc), "Capture extraction failed:%d\n", ret);
1431 		} while (ret >= 0);
1432 	}
1433 
1434 	/* Update the state of log buffer err-cap state */
1435 	xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
1436 		  log_buf_state_offset + offsetof(struct guc_log_buffer_state, read_ptr), u32,
1437 		  write_offset);
1438 
1439 	/*
1440 	 * Clear the flush_to_file from local first, the local was loaded by above
1441 	 * xe_map_memcpy_from, then write out the "updated local" through
1442 	 * xe_map_wr()
1443 	 */
1444 	log_buf_state_local.flags &= ~GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE;
1445 	xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
1446 		  log_buf_state_offset + offsetof(struct guc_log_buffer_state, flags), u32,
1447 		  log_buf_state_local.flags);
1448 	__guc_capture_flushlog_complete(guc);
1449 }
1450 
1451 /*
1452  * xe_guc_capture_process - Process GuC register captured data
1453  * @guc: The GuC object
1454  *
1455  * When GuC captured data is ready, GuC will send message
1456  * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
1457  * called to process the data comes with the message.
1458  *
1459  * Returns: None
1460  */
1461 void xe_guc_capture_process(struct xe_guc *guc)
1462 {
1463 	if (guc->capture)
1464 		__guc_capture_process_output(guc);
1465 }
1466 
1467 static struct __guc_capture_parsed_output *
1468 guc_capture_alloc_one_node(struct xe_guc *guc)
1469 {
1470 	struct drm_device *drm = guc_to_drm(guc);
1471 	struct __guc_capture_parsed_output *new;
1472 	int i;
1473 
1474 	new = drmm_kzalloc(drm, sizeof(*new), GFP_KERNEL);
1475 	if (!new)
1476 		return NULL;
1477 
1478 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1479 		new->reginfo[i].regs = drmm_kzalloc(drm, guc->capture->max_mmio_per_node *
1480 						    sizeof(struct guc_mmio_reg), GFP_KERNEL);
1481 		if (!new->reginfo[i].regs) {
1482 			while (i)
1483 				drmm_kfree(drm, new->reginfo[--i].regs);
1484 			drmm_kfree(drm, new);
1485 			return NULL;
1486 		}
1487 	}
1488 	guc_capture_init_node(guc, new);
1489 
1490 	return new;
1491 }
1492 
1493 static void
1494 __guc_capture_create_prealloc_nodes(struct xe_guc *guc)
1495 {
1496 	struct __guc_capture_parsed_output *node = NULL;
1497 	int i;
1498 
1499 	for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
1500 		node = guc_capture_alloc_one_node(guc);
1501 		if (!node) {
1502 			xe_gt_warn(guc_to_gt(guc), "Register capture pre-alloc-cache failure\n");
1503 			/* dont free the priors, use what we got and cleanup at shutdown */
1504 			return;
1505 		}
1506 		guc_capture_add_node_to_cachelist(guc->capture, node);
1507 	}
1508 }
1509 
1510 static int
1511 guc_get_max_reglist_count(struct xe_guc *guc)
1512 {
1513 	int i, j, k, tmp, maxregcount = 0;
1514 
1515 	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
1516 		for (j = 0; j < GUC_STATE_CAPTURE_TYPE_MAX; ++j) {
1517 			for (k = 0; k < GUC_CAPTURE_LIST_CLASS_MAX; ++k) {
1518 				const struct __guc_mmio_reg_descr_group *match;
1519 
1520 				if (j == GUC_STATE_CAPTURE_TYPE_GLOBAL && k > 0)
1521 					continue;
1522 
1523 				tmp = 0;
1524 				match = guc_capture_get_one_list(guc->capture->reglists, i, j, k);
1525 				if (match)
1526 					tmp = match->num_regs;
1527 
1528 				match = guc_capture_get_one_list(guc->capture->extlists, i, j, k);
1529 				if (match)
1530 					tmp += match->num_regs;
1531 
1532 				if (tmp > maxregcount)
1533 					maxregcount = tmp;
1534 			}
1535 		}
1536 	}
1537 	if (!maxregcount)
1538 		maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS;
1539 
1540 	return maxregcount;
1541 }
1542 
1543 static void
1544 guc_capture_create_prealloc_nodes(struct xe_guc *guc)
1545 {
1546 	/* skip if we've already done the pre-alloc */
1547 	if (guc->capture->max_mmio_per_node)
1548 		return;
1549 
1550 	guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc);
1551 	__guc_capture_create_prealloc_nodes(guc);
1552 }
1553 
1554 static void
1555 read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_group *list,
1556 		 struct guc_mmio_reg *regs)
1557 {
1558 	int i;
1559 
1560 	if (!list || !list->list || list->num_regs == 0)
1561 		return;
1562 
1563 	if (!regs)
1564 		return;
1565 
1566 	for (i = 0; i < list->num_regs; i++) {
1567 		struct __guc_mmio_reg_descr desc = list->list[i];
1568 		u32 value;
1569 
1570 		if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1571 			value = xe_hw_engine_mmio_read32(hwe, desc.reg);
1572 		} else {
1573 			if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
1574 			    FIELD_GET(GUC_REGSET_STEERING_NEEDED, desc.flags)) {
1575 				int group, instance;
1576 
1577 				group = FIELD_GET(GUC_REGSET_STEERING_GROUP, desc.flags);
1578 				instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, desc.flags);
1579 				value = xe_gt_mcr_unicast_read(hwe->gt, XE_REG_MCR(desc.reg.addr),
1580 							       group, instance);
1581 			} else {
1582 				value = xe_mmio_read32(&hwe->gt->mmio, desc.reg);
1583 			}
1584 		}
1585 
1586 		regs[i].value = value;
1587 		regs[i].offset = desc.reg.addr;
1588 		regs[i].flags = desc.flags;
1589 		regs[i].mask = desc.mask;
1590 	}
1591 }
1592 
1593 /**
1594  * xe_engine_manual_capture - Take a manual engine snapshot from engine.
1595  * @hwe: Xe HW Engine.
1596  * @snapshot: The engine snapshot
1597  *
1598  * Take engine snapshot from engine read.
1599  *
1600  * Returns: None
1601  */
1602 void
1603 xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot)
1604 {
1605 	struct xe_gt *gt = hwe->gt;
1606 	struct xe_device *xe = gt_to_xe(gt);
1607 	struct xe_guc *guc = &gt->uc.guc;
1608 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1609 	enum guc_capture_list_class_type capture_class;
1610 	const struct __guc_mmio_reg_descr_group *list;
1611 	struct __guc_capture_parsed_output *new;
1612 	enum guc_state_capture_type type;
1613 	u16 guc_id = 0;
1614 	u32 lrca = 0;
1615 
1616 	if (IS_SRIOV_VF(xe))
1617 		return;
1618 
1619 	new = guc_capture_get_prealloc_node(guc);
1620 	if (!new)
1621 		return;
1622 
1623 	capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
1624 	for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
1625 		struct gcap_reg_list_info *reginfo = &new->reginfo[type];
1626 		/*
1627 		 * regsinfo->regs is allocated based on guc->capture->max_mmio_per_node
1628 		 * which is based on the descriptor list driving the population so
1629 		 * should not overflow
1630 		 */
1631 
1632 		/* Get register list for the type/class */
1633 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
1634 							capture_class, false);
1635 		if (!list) {
1636 			xe_gt_dbg(gt, "Empty GuC capture register descriptor for %s",
1637 				  hwe->name);
1638 			continue;
1639 		}
1640 
1641 		read_reg_to_node(hwe, list, reginfo->regs);
1642 		reginfo->num_regs = list->num_regs;
1643 
1644 		/* Capture steering registers for rcs/ccs */
1645 		if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
1646 			list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
1647 								type, capture_class, true);
1648 			if (list) {
1649 				read_reg_to_node(hwe, list, &reginfo->regs[reginfo->num_regs]);
1650 				reginfo->num_regs += list->num_regs;
1651 			}
1652 		}
1653 	}
1654 
1655 	if (devcoredump && devcoredump->captured) {
1656 		struct xe_guc_submit_exec_queue_snapshot *ge = devcoredump->snapshot.ge;
1657 
1658 		if (ge) {
1659 			guc_id = ge->guc.id;
1660 			if (ge->lrc[0])
1661 				lrca = ge->lrc[0]->context_desc;
1662 		}
1663 	}
1664 
1665 	new->eng_class = xe_engine_class_to_guc_class(hwe->class);
1666 	new->eng_inst = hwe->instance;
1667 	new->guc_id = guc_id;
1668 	new->lrca = lrca;
1669 	new->is_partial = 0;
1670 	new->locked = 1;
1671 	new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
1672 
1673 	guc_capture_add_node_to_outlist(guc->capture, new);
1674 	devcoredump->snapshot.matched_node = new;
1675 }
1676 
1677 static struct guc_mmio_reg *
1678 guc_capture_find_reg(struct gcap_reg_list_info *reginfo, u32 addr, u32 flags)
1679 {
1680 	int i;
1681 
1682 	if (reginfo && reginfo->num_regs > 0) {
1683 		struct guc_mmio_reg *regs = reginfo->regs;
1684 
1685 		if (regs)
1686 			for (i = 0; i < reginfo->num_regs; i++)
1687 				if (regs[i].offset == addr && regs[i].flags == flags)
1688 					return &regs[i];
1689 	}
1690 
1691 	return NULL;
1692 }
1693 
1694 static void
1695 snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p,
1696 			     u32 type, const struct __guc_mmio_reg_descr_group *list)
1697 {
1698 	struct xe_gt *gt = snapshot->hwe->gt;
1699 	struct xe_device *xe = gt_to_xe(gt);
1700 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1701 	struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot;
1702 	struct gcap_reg_list_info *reginfo = NULL;
1703 	u32 i, last_value = 0;
1704 	bool low32_ready = false;
1705 
1706 	if (!list || !list->list || list->num_regs == 0)
1707 		return;
1708 	XE_WARN_ON(!devcore_snapshot->matched_node);
1709 
1710 	reginfo = &devcore_snapshot->matched_node->reginfo[type];
1711 
1712 	/*
1713 	 * loop through descriptor first and find the register in the node
1714 	 * this is more scalable for developer maintenance as it will ensure
1715 	 * the printout matched the ordering of the static descriptor
1716 	 * table-of-lists
1717 	 */
1718 	for (i = 0; i < list->num_regs; i++) {
1719 		const struct __guc_mmio_reg_descr *reg_desc = &list->list[i];
1720 		struct guc_mmio_reg *reg;
1721 		u32 value;
1722 
1723 		reg = guc_capture_find_reg(reginfo, reg_desc->reg.addr, reg_desc->flags);
1724 		if (!reg)
1725 			continue;
1726 
1727 		value = reg->value;
1728 		switch (reg_desc->data_type) {
1729 		case REG_64BIT_LOW_DW:
1730 			last_value = value;
1731 
1732 			/*
1733 			 * A 64 bit register define requires 2 consecutive
1734 			 * entries in register list, with low dword first
1735 			 * and hi dword the second, like:
1736 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1737 			 *  { XXX_REG_HI(0), REG_64BIT_HI_DW,  0, 0, "XXX_REG"},
1738 			 *
1739 			 * Incorrect order will trigger XE_WARN.
1740 			 *
1741 			 * Possible double low here, for example:
1742 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1743 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1744 			 */
1745 			XE_WARN_ON(low32_ready);
1746 			low32_ready = true;
1747 			/* Low 32 bit dword saved, continue for high 32 bit */
1748 			break;
1749 
1750 		case REG_64BIT_HI_DW: {
1751 			u64 value_qw = ((u64)value << 32) | last_value;
1752 
1753 			/*
1754 			 * Incorrect 64bit register order. Possible missing low.
1755 			 * for example:
1756 			 *  { XXX_REG(0), REG_32BIT, 0, 0, NULL},
1757 			 *  { XXX_REG_HI(0), REG_64BIT_HI_DW, 0, 0, NULL},
1758 			 */
1759 			XE_WARN_ON(!low32_ready);
1760 			low32_ready = false;
1761 
1762 			drm_printf(p, "\t%s: 0x%016llx\n", reg_desc->regname, value_qw);
1763 			break;
1764 		}
1765 
1766 		case REG_32BIT:
1767 			/*
1768 			 * Incorrect 64bit register order. Possible missing high.
1769 			 * for example:
1770 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1771 			 *  { XXX_REG(0), REG_32BIT, 0, 0, "XXX_REG"},
1772 			 */
1773 			XE_WARN_ON(low32_ready);
1774 
1775 			if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags))
1776 				drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname,
1777 					   reg_desc->dss_id, value);
1778 			else
1779 				drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
1780 
1781 			break;
1782 		}
1783 	}
1784 
1785 	/*
1786 	 * Incorrect 64bit register order. Possible missing high.
1787 	 * for example:
1788 	 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1789 	 *  } // <- Register list end
1790 	 */
1791 	XE_WARN_ON(low32_ready);
1792 }
1793 
1794 /**
1795  * xe_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
1796  * @snapshot: Xe HW Engine snapshot object.
1797  * @p: drm_printer where it will be printed out.
1798  *
1799  * This function prints out a given Xe HW Engine snapshot object.
1800  */
1801 void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
1802 {
1803 	const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = {
1804 		"full-capture",
1805 		"partial-capture"
1806 	};
1807 	int type;
1808 	const struct __guc_mmio_reg_descr_group *list;
1809 	enum guc_capture_list_class_type capture_class;
1810 
1811 	struct xe_gt *gt;
1812 	struct xe_device *xe;
1813 	struct xe_devcoredump *devcoredump;
1814 	struct xe_devcoredump_snapshot *devcore_snapshot;
1815 
1816 	if (!snapshot)
1817 		return;
1818 
1819 	gt = snapshot->hwe->gt;
1820 	xe = gt_to_xe(gt);
1821 	devcoredump = &xe->devcoredump;
1822 	devcore_snapshot = &devcoredump->snapshot;
1823 
1824 	if (!devcore_snapshot->matched_node)
1825 		return;
1826 
1827 	xe_gt_assert(gt, snapshot->hwe);
1828 
1829 	capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class);
1830 
1831 	drm_printf(p, "%s (physical), logical instance=%d\n",
1832 		   snapshot->name ? snapshot->name : "",
1833 		   snapshot->logical_instance);
1834 	drm_printf(p, "\tCapture_source: %s\n",
1835 		   devcore_snapshot->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ?
1836 		   "GuC" : "Manual");
1837 	drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]);
1838 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
1839 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
1840 	drm_printf(p, "\tReserved: %s\n",
1841 		   str_yes_no(snapshot->kernel_reserved));
1842 
1843 	for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
1844 		/*
1845 		 * FIXME: During devcoredump print we should avoid accessing the
1846 		 * driver pointers for gt or engine. Printing should be done only
1847 		 * using the snapshot captured. Here we are accessing the gt
1848 		 * pointer. It should be fixed.
1849 		 */
1850 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
1851 							capture_class, false);
1852 		snapshot_print_by_list_order(snapshot, p, type, list);
1853 	}
1854 
1855 	if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
1856 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
1857 							GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
1858 							capture_class, true);
1859 		snapshot_print_by_list_order(snapshot, p, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
1860 					     list);
1861 	}
1862 
1863 	drm_puts(p, "\n");
1864 }
1865 
1866 /**
1867  * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the queue.
1868  * @q: The exec queue object
1869  *
1870  * Search within the capture outlist for the queue, could be used for check if
1871  * GuC capture is ready for the queue.
1872  * If found, the locked boolean of the node will be flagged.
1873  *
1874  * Returns: found guc-capture node ptr else NULL
1875  */
1876 struct __guc_capture_parsed_output *
1877 xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q)
1878 {
1879 	struct xe_hw_engine *hwe;
1880 	enum xe_hw_engine_id id;
1881 	struct xe_device *xe;
1882 	u16 guc_class = GUC_LAST_ENGINE_CLASS + 1;
1883 	struct xe_devcoredump_snapshot *ss;
1884 
1885 	if (!q || !q->gt)
1886 		return NULL;
1887 
1888 	xe = gt_to_xe(q->gt);
1889 
1890 	if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)
1891 		return NULL;
1892 
1893 	if (!xe_device_uc_enabled(xe))
1894 		return NULL;
1895 
1896 	if (IS_SRIOV_VF(xe))
1897 		return NULL;
1898 
1899 	ss = &xe->devcoredump.snapshot;
1900 	if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC)
1901 		return ss->matched_node;
1902 
1903 	/* Find hwe for the queue */
1904 	for_each_hw_engine(hwe, q->gt, id) {
1905 		if (hwe != q->hwe)
1906 			continue;
1907 		guc_class = xe_engine_class_to_guc_class(hwe->class);
1908 		break;
1909 	}
1910 
1911 	if (guc_class <= GUC_LAST_ENGINE_CLASS) {
1912 		struct __guc_capture_parsed_output *n, *ntmp;
1913 		struct xe_guc *guc =  &q->gt->uc.guc;
1914 		u16 guc_id = q->guc->id;
1915 		u32 lrca = xe_lrc_ggtt_addr(q->lrc[0]);
1916 
1917 		/*
1918 		 * Look for a matching GuC reported error capture node from
1919 		 * the internal output link-list based on engine, guc id and
1920 		 * lrca info.
1921 		 */
1922 		list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
1923 			if (n->eng_class == guc_class && n->eng_inst == hwe->instance &&
1924 			    n->guc_id == guc_id && n->lrca == lrca &&
1925 			    n->source == XE_ENGINE_CAPTURE_SOURCE_GUC) {
1926 				n->locked = 1;
1927 				return n;
1928 			}
1929 		}
1930 	}
1931 	return NULL;
1932 }
1933 
1934 /**
1935  * xe_engine_snapshot_capture_for_queue - Take snapshot of associated engine
1936  * @q: The exec queue object
1937  *
1938  * Take snapshot of associated HW Engine
1939  *
1940  * Returns: None.
1941  */
1942 void
1943 xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q)
1944 {
1945 	struct xe_device *xe = gt_to_xe(q->gt);
1946 	struct xe_devcoredump *coredump = &xe->devcoredump;
1947 	struct xe_hw_engine *hwe;
1948 	enum xe_hw_engine_id id;
1949 	u32 adj_logical_mask = q->logical_mask;
1950 
1951 	if (IS_SRIOV_VF(xe))
1952 		return;
1953 
1954 	for_each_hw_engine(hwe, q->gt, id) {
1955 		if (hwe->class != q->hwe->class ||
1956 		    !(BIT(hwe->logical_instance) & adj_logical_mask)) {
1957 			coredump->snapshot.hwe[id] = NULL;
1958 			continue;
1959 		}
1960 
1961 		if (!coredump->snapshot.hwe[id]) {
1962 			coredump->snapshot.hwe[id] =
1963 				xe_hw_engine_snapshot_capture(hwe, q);
1964 		} else {
1965 			struct __guc_capture_parsed_output *new;
1966 
1967 			new = xe_guc_capture_get_matching_and_lock(q);
1968 			if (new) {
1969 				struct xe_guc *guc =  &q->gt->uc.guc;
1970 
1971 				/*
1972 				 * If we are in here, it means we found a fresh
1973 				 * GuC-err-capture node for this engine after
1974 				 * previously failing to find a match in the
1975 				 * early part of guc_exec_queue_timedout_job.
1976 				 * Thus we must free the manually captured node
1977 				 */
1978 				guc_capture_free_outlist_node(guc->capture,
1979 							      coredump->snapshot.matched_node);
1980 				coredump->snapshot.matched_node = new;
1981 			}
1982 		}
1983 
1984 		break;
1985 	}
1986 }
1987 
1988 /*
1989  * xe_guc_capture_put_matched_nodes - Cleanup matched nodes
1990  * @guc: The GuC object
1991  *
1992  * Free matched node and all nodes with the equal guc_id from
1993  * GuC captured outlist
1994  */
1995 void xe_guc_capture_put_matched_nodes(struct xe_guc *guc)
1996 {
1997 	struct xe_device *xe = guc_to_xe(guc);
1998 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1999 	struct __guc_capture_parsed_output *n = devcoredump->snapshot.matched_node;
2000 
2001 	if (n) {
2002 		guc_capture_remove_stale_matches_from_list(guc->capture, n);
2003 		guc_capture_free_outlist_node(guc->capture, n);
2004 		devcoredump->snapshot.matched_node = NULL;
2005 	}
2006 }
2007 
2008 /*
2009  * xe_guc_capture_steered_list_init - Init steering register list
2010  * @guc: The GuC object
2011  *
2012  * Init steering register list for GuC register capture, create pre-alloc node
2013  */
2014 void xe_guc_capture_steered_list_init(struct xe_guc *guc)
2015 {
2016 	/*
2017 	 * For certain engine classes, there are slice and subslice
2018 	 * level registers requiring steering. We allocate and populate
2019 	 * these based on hw config and add it as an extension list at
2020 	 * the end of the pre-populated render list.
2021 	 */
2022 	guc_capture_alloc_steered_lists(guc);
2023 	check_guc_capture_size(guc);
2024 	guc_capture_create_prealloc_nodes(guc);
2025 }
2026 
2027 /*
2028  * xe_guc_capture_init - Init for GuC register capture
2029  * @guc: The GuC object
2030  *
2031  * Init for GuC register capture, alloc memory for capture data structure.
2032  *
2033  * Returns: 0 if success.
2034  *	    -ENOMEM if out of memory
2035  */
2036 int xe_guc_capture_init(struct xe_guc *guc)
2037 {
2038 	guc->capture = drmm_kzalloc(guc_to_drm(guc), sizeof(*guc->capture), GFP_KERNEL);
2039 	if (!guc->capture)
2040 		return -ENOMEM;
2041 
2042 	guc->capture->reglists = guc_capture_get_device_reglist(guc_to_xe(guc));
2043 
2044 	INIT_LIST_HEAD(&guc->capture->outlist);
2045 	INIT_LIST_HEAD(&guc->capture->cachelist);
2046 
2047 	return 0;
2048 }
2049