xref: /linux/drivers/gpu/drm/xe/xe_guc_capture.c (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021-2024 Intel Corporation
4  */
5 
6 #include <linux/types.h>
7 
8 #include <drm/drm_managed.h>
9 #include <drm/drm_print.h>
10 
11 #include "abi/guc_actions_abi.h"
12 #include "abi/guc_capture_abi.h"
13 #include "abi/guc_log_abi.h"
14 #include "regs/xe_engine_regs.h"
15 #include "regs/xe_gt_regs.h"
16 #include "regs/xe_guc_regs.h"
17 #include "regs/xe_regs.h"
18 
19 #include "xe_bo.h"
20 #include "xe_device.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_mcr.h"
24 #include "xe_gt_printk.h"
25 #include "xe_guc.h"
26 #include "xe_guc_ads.h"
27 #include "xe_guc_capture.h"
28 #include "xe_guc_capture_types.h"
29 #include "xe_guc_ct.h"
30 #include "xe_guc_exec_queue_types.h"
31 #include "xe_guc_log.h"
32 #include "xe_guc_submit_types.h"
33 #include "xe_guc_submit.h"
34 #include "xe_hw_engine_types.h"
35 #include "xe_hw_engine.h"
36 #include "xe_lrc.h"
37 #include "xe_macros.h"
38 #include "xe_map.h"
39 #include "xe_mmio.h"
40 #include "xe_sched_job.h"
41 
42 /*
43  * struct __guc_capture_bufstate
44  *
45  * Book-keeping structure used to track read and write pointers
46  * as we extract error capture data from the GuC-log-buffer's
47  * error-capture region as a stream of dwords.
48  */
49 struct __guc_capture_bufstate {
50 	u32 size;
51 	u32 data_offset;
52 	u32 rd;
53 	u32 wr;
54 };
55 
56 /*
57  * struct __guc_capture_parsed_output - extracted error capture node
58  *
59  * A single unit of extracted error-capture output data grouped together
60  * at an engine-instance level. We keep these nodes in a linked list.
61  * See cachelist and outlist below.
62  */
63 struct __guc_capture_parsed_output {
64 	/*
65 	 * A single set of 3 capture lists: a global-list
66 	 * an engine-class-list and an engine-instance list.
67 	 * outlist in __guc_capture_parsed_output will keep
68 	 * a linked list of these nodes that will eventually
69 	 * be detached from outlist and attached into to
70 	 * xe_codedump in response to a context reset
71 	 */
72 	struct list_head link;
73 	bool is_partial;
74 	u32 eng_class;
75 	u32 eng_inst;
76 	u32 guc_id;
77 	u32 lrca;
78 	u32 type;
79 	bool locked;
80 	enum xe_hw_engine_snapshot_source_id source;
81 	struct gcap_reg_list_info {
82 		u32 vfid;
83 		u32 num_regs;
84 		struct guc_mmio_reg *regs;
85 	} reginfo[GUC_STATE_CAPTURE_TYPE_MAX];
86 #define GCAP_PARSED_REGLIST_INDEX_GLOBAL   BIT(GUC_STATE_CAPTURE_TYPE_GLOBAL)
87 #define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS)
88 };
89 
90 /*
91  * Define all device tables of GuC error capture register lists
92  * NOTE:
93  *     For engine-registers, GuC only needs the register offsets
94  *     from the engine-mmio-base
95  *
96  *     64 bit registers need 2 entries for low 32 bit register and high 32 bit
97  *     register, for example:
98  *       Register           data_type       flags   mask    Register name
99  *     { XXX_REG_LO(0),  REG_64BIT_LOW_DW,    0,      0,      NULL},
100  *     { XXX_REG_HI(0),  REG_64BIT_HI_DW,,    0,      0,      "XXX_REG"},
101  *     1. data_type: Indicate is hi/low 32 bit for a 64 bit register
102  *                   A 64 bit register define requires 2 consecutive entries,
103  *                   with low dword first and hi dword the second.
104  *     2. Register name: null for incompleted define
105  *     3. Incorrect order will trigger XE_WARN.
106  */
107 #define COMMON_XELP_BASE_GLOBAL \
108 	{ FORCEWAKE_GT,			REG_32BIT,	0,	0,	0,	"FORCEWAKE_GT"}
109 
110 #define COMMON_BASE_ENGINE_INSTANCE \
111 	{ RING_HWSTAM(0),		REG_32BIT,	0,	0,	0,	"HWSTAM"}, \
112 	{ RING_HWS_PGA(0),		REG_32BIT,	0,	0,	0,	"RING_HWS_PGA"}, \
113 	{ RING_HEAD(0),			REG_32BIT,	0,	0,	0,	"RING_HEAD"}, \
114 	{ RING_TAIL(0),			REG_32BIT,	0,	0,	0,	"RING_TAIL"}, \
115 	{ RING_CTL(0),			REG_32BIT,	0,	0,	0,	"RING_CTL"}, \
116 	{ RING_MI_MODE(0),		REG_32BIT,	0,	0,	0,	"RING_MI_MODE"}, \
117 	{ RING_MODE(0),			REG_32BIT,	0,	0,	0,	"RING_MODE"}, \
118 	{ RING_ESR(0),			REG_32BIT,	0,	0,	0,	"RING_ESR"}, \
119 	{ RING_EMR(0),			REG_32BIT,	0,	0,	0,	"RING_EMR"}, \
120 	{ RING_EIR(0),			REG_32BIT,	0,	0,	0,	"RING_EIR"}, \
121 	{ RING_IMR(0),			REG_32BIT,	0,	0,	0,	"RING_IMR"}, \
122 	{ RING_IPEHR(0),		REG_32BIT,	0,	0,	0,	"IPEHR"}, \
123 	{ RING_INSTDONE(0),		REG_32BIT,	0,	0,	0,	"RING_INSTDONE"}, \
124 	{ INDIRECT_RING_STATE(0),	REG_32BIT,	0,	0,	0,	"INDIRECT_RING_STATE"}, \
125 	{ RING_CURRENT_LRCA(0),		REG_32BIT,	0,	0,	0,	"CURRENT_LRCA"}, \
126 	{ RING_ACTHD(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
127 	{ RING_ACTHD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"ACTHD"}, \
128 	{ RING_BBADDR(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
129 	{ RING_BBADDR_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_BBADDR"}, \
130 	{ RING_START(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
131 	{ RING_START_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_START"}, \
132 	{ RING_DMA_FADD(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
133 	{ RING_DMA_FADD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_DMA_FADD"}, \
134 	{ RING_EXECLIST_STATUS_LO(0),	REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
135 	{ RING_EXECLIST_STATUS_HI(0),	REG_64BIT_HI_DW, 0,	0,	0,	"RING_EXECLIST_STATUS"}, \
136 	{ RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
137 	{ RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0,	0,	0,	"RING_EXECLIST_SQ_CONTENTS"}
138 
139 #define COMMON_XELP_RC_CLASS \
140 	{ RCU_MODE,			REG_32BIT,	0,	0,	0,	"RCU_MODE"}
141 
142 #define COMMON_XELP_RC_CLASS_INSTDONE \
143 	{ SC_INSTDONE,			REG_32BIT,	0,	0,	0,	"SC_INSTDONE"}, \
144 	{ SC_INSTDONE_EXTRA,		REG_32BIT,	0,	0,	0,	"SC_INSTDONE_EXTRA"}, \
145 	{ SC_INSTDONE_EXTRA2,		REG_32BIT,	0,	0,	0,	"SC_INSTDONE_EXTRA2"}
146 
147 #define XELP_VEC_CLASS_REGS \
148 	{ SFC_DONE(0),			0,	0,	0,	0,	"SFC_DONE[0]"}, \
149 	{ SFC_DONE(1),			0,	0,	0,	0,	"SFC_DONE[1]"}, \
150 	{ SFC_DONE(2),			0,	0,	0,	0,	"SFC_DONE[2]"}, \
151 	{ SFC_DONE(3),			0,	0,	0,	0,	"SFC_DONE[3]"}
152 
153 #define XE3P_BASE_ENGINE_INSTANCE \
154 	{ RING_CSMQDEBUG(0),		REG_32BIT,	0,	0,	0,	"CSMQDEBUG"}
155 
156 /* XE_LP Global */
157 static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
158 	COMMON_XELP_BASE_GLOBAL,
159 };
160 
161 /* Render / Compute Per-Engine-Instance */
162 static const struct __guc_mmio_reg_descr xe_rc_inst_regs[] = {
163 	COMMON_BASE_ENGINE_INSTANCE,
164 };
165 
166 /* Render / Compute Engine-Class */
167 static const struct __guc_mmio_reg_descr xe_rc_class_regs[] = {
168 	COMMON_XELP_RC_CLASS,
169 	COMMON_XELP_RC_CLASS_INSTDONE,
170 };
171 
172 /* Render / Compute Engine-Class for xehpg */
173 static const struct __guc_mmio_reg_descr xe_hpg_rc_class_regs[] = {
174 	COMMON_XELP_RC_CLASS,
175 };
176 
177 /* Media Decode/Encode Per-Engine-Instance */
178 static const struct __guc_mmio_reg_descr xe_vd_inst_regs[] = {
179 	COMMON_BASE_ENGINE_INSTANCE,
180 };
181 
182 /* Video Enhancement Engine-Class */
183 static const struct __guc_mmio_reg_descr xe_vec_class_regs[] = {
184 	XELP_VEC_CLASS_REGS,
185 };
186 
187 /* Video Enhancement Per-Engine-Instance */
188 static const struct __guc_mmio_reg_descr xe_vec_inst_regs[] = {
189 	COMMON_BASE_ENGINE_INSTANCE,
190 };
191 
192 /* Blitter Per-Engine-Instance */
193 static const struct __guc_mmio_reg_descr xe_blt_inst_regs[] = {
194 	COMMON_BASE_ENGINE_INSTANCE,
195 };
196 
197 /* XE_LP - GSC Per-Engine-Instance */
198 static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = {
199 	COMMON_BASE_ENGINE_INSTANCE,
200 };
201 
202 /* Render / Compute Per-Engine-Instance */
203 static const struct __guc_mmio_reg_descr xe3p_rc_inst_regs[] = {
204 	COMMON_BASE_ENGINE_INSTANCE,
205 	XE3P_BASE_ENGINE_INSTANCE,
206 };
207 
208 /*
209  * Empty list to prevent warnings about unknown class/instance types
210  * as not all class/instance types have entries on all platforms.
211  */
212 static const struct __guc_mmio_reg_descr empty_regs_list[] = {
213 };
214 
215 #define TO_GCAP_DEF_OWNER(x) (GUC_CAPTURE_LIST_INDEX_##x)
216 #define TO_GCAP_DEF_TYPE(x) (GUC_STATE_CAPTURE_TYPE_##x)
217 #define MAKE_REGLIST(regslist, regsowner, regstype, class) \
218 	{ \
219 		regslist, \
220 		ARRAY_SIZE(regslist), \
221 		TO_GCAP_DEF_OWNER(regsowner), \
222 		TO_GCAP_DEF_TYPE(regstype), \
223 		class \
224 	}
225 
226 /* List of lists for legacy graphic product version < 1255 */
227 static const struct __guc_mmio_reg_descr_group xe_lp_lists[] = {
228 	MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
229 	MAKE_REGLIST(xe_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
230 	MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
231 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
232 	MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
233 	MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
234 	MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
235 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
236 	MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
237 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
238 	MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
239 	{}
240 };
241 
242  /* List of lists for graphic product version >= 1255 */
243 static const struct __guc_mmio_reg_descr_group xe_hpg_lists[] = {
244 	MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
245 	MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
246 	MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
247 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
248 	MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
249 	MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
250 	MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
251 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
252 	MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
253 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
254 	MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
255 	{}
256 };
257 
258  /* List of lists for Xe3p and beyond */
259 static const struct __guc_mmio_reg_descr_group xe3p_lists[] = {
260 	MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
261 	MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
262 	MAKE_REGLIST(xe3p_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
263 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
264 	MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
265 	MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
266 	MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
267 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
268 	MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
269 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
270 	MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
271 	{}
272 };
273 static const char * const capture_list_type_names[] = {
274 	"Global",
275 	"Class",
276 	"Instance",
277 };
278 
279 static const char * const capture_engine_class_names[] = {
280 	"Render/Compute",
281 	"Video",
282 	"VideoEnhance",
283 	"Blitter",
284 	"GSC-Other",
285 };
286 
287 struct __guc_capture_ads_cache {
288 	bool is_valid;
289 	void *ptr;
290 	size_t size;
291 	int status;
292 };
293 
294 struct xe_guc_state_capture {
295 	const struct __guc_mmio_reg_descr_group *reglists;
296 	/**
297 	 * NOTE: steered registers have multiple instances depending on the HW configuration
298 	 * (slices or dual-sub-slices) and thus depends on HW fuses discovered
299 	 */
300 	struct __guc_mmio_reg_descr_group *extlists;
301 	struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
302 						[GUC_STATE_CAPTURE_TYPE_MAX]
303 						[GUC_CAPTURE_LIST_CLASS_MAX];
304 	void *ads_null_cache;
305 	struct list_head cachelist;
306 #define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
307 #define PREALLOC_NODES_DEFAULT_NUMREGS 64
308 
309 	int max_mmio_per_node;
310 	struct list_head outlist;
311 };
312 
313 static void
314 guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
315 					   struct __guc_capture_parsed_output *node);
316 
317 static const struct __guc_mmio_reg_descr_group *
318 guc_capture_get_device_reglist(struct xe_device *xe)
319 {
320 	if (GRAPHICS_VER(xe) >= 35)
321 		return xe3p_lists;
322 	else if (GRAPHICS_VERx100(xe) >= 1255)
323 		return xe_hpg_lists;
324 	else
325 		return xe_lp_lists;
326 }
327 
328 static const struct __guc_mmio_reg_descr_group *
329 guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists,
330 			 u32 owner, u32 type, enum guc_capture_list_class_type capture_class)
331 {
332 	int i;
333 
334 	if (!reglists)
335 		return NULL;
336 
337 	for (i = 0; reglists[i].list; ++i) {
338 		if (reglists[i].owner == owner && reglists[i].type == type &&
339 		    (reglists[i].engine == capture_class ||
340 		     reglists[i].type == GUC_STATE_CAPTURE_TYPE_GLOBAL))
341 			return &reglists[i];
342 	}
343 
344 	return NULL;
345 }
346 
347 const struct __guc_mmio_reg_descr_group *
348 xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
349 				 enum guc_capture_list_class_type capture_class, bool is_ext)
350 {
351 	const struct __guc_mmio_reg_descr_group *reglists;
352 
353 	if (is_ext) {
354 		struct xe_guc *guc = &gt->uc.guc;
355 
356 		reglists = guc->capture->extlists;
357 	} else {
358 		reglists = guc_capture_get_device_reglist(gt_to_xe(gt));
359 	}
360 	return guc_capture_get_one_list(reglists, owner, type, capture_class);
361 }
362 
363 struct __ext_steer_reg {
364 	const char *name;
365 	struct xe_reg_mcr reg;
366 };
367 
368 static const struct __ext_steer_reg xe_extregs[] = {
369 	{"SAMPLER_INSTDONE",		SAMPLER_INSTDONE},
370 	{"ROW_INSTDONE",		ROW_INSTDONE}
371 };
372 
373 static const struct __ext_steer_reg xehpg_extregs[] = {
374 	{"SC_INSTDONE",			XEHPG_SC_INSTDONE},
375 	{"SC_INSTDONE_EXTRA",		XEHPG_SC_INSTDONE_EXTRA},
376 	{"SC_INSTDONE_EXTRA2",		XEHPG_SC_INSTDONE_EXTRA2},
377 	{"INSTDONE_GEOM_SVGUNIT",	XEHPG_INSTDONE_GEOM_SVGUNIT}
378 };
379 
380 static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
381 			   const struct __ext_steer_reg *extlist,
382 			   u32 dss_id, u16 slice_id, u16 subslice_id)
383 {
384 	if (!ext || !extlist)
385 		return;
386 
387 	ext->reg = XE_REG(extlist->reg.__reg.addr);
388 	ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
389 	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
390 	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
391 	ext->dss_id = dss_id;
392 	ext->regname = extlist->name;
393 }
394 
395 static int
396 __alloc_ext_regs(struct drm_device *drm, struct __guc_mmio_reg_descr_group *newlist,
397 		 const struct __guc_mmio_reg_descr_group *rootlist, int num_regs)
398 {
399 	struct __guc_mmio_reg_descr *list;
400 
401 	list = drmm_kzalloc(drm, num_regs * sizeof(struct __guc_mmio_reg_descr), GFP_KERNEL);
402 	if (!list)
403 		return -ENOMEM;
404 
405 	newlist->list = list;
406 	newlist->num_regs = num_regs;
407 	newlist->owner = rootlist->owner;
408 	newlist->engine = rootlist->engine;
409 	newlist->type = rootlist->type;
410 
411 	return 0;
412 }
413 
414 static int guc_capture_get_steer_reg_num(struct xe_device *xe)
415 {
416 	int num = ARRAY_SIZE(xe_extregs);
417 
418 	if (GRAPHICS_VERx100(xe) >= 1255)
419 		num += ARRAY_SIZE(xehpg_extregs);
420 
421 	return num;
422 }
423 
424 static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
425 {
426 	struct xe_gt *gt = guc_to_gt(guc);
427 	u16 slice, subslice;
428 	int dss, i, total = 0;
429 	const struct __guc_mmio_reg_descr_group *lists = guc->capture->reglists;
430 	const struct __guc_mmio_reg_descr_group *list;
431 	struct __guc_mmio_reg_descr_group *extlists;
432 	struct __guc_mmio_reg_descr *extarray;
433 	bool has_xehpg_extregs = GRAPHICS_VERx100(gt_to_xe(gt)) >= 1255;
434 	struct drm_device *drm = &gt_to_xe(gt)->drm;
435 	bool has_rcs_ccs = false;
436 	struct xe_hw_engine *hwe;
437 	enum xe_hw_engine_id id;
438 
439 	/*
440 	 * If GT has no rcs/ccs, no need to alloc steered list.
441 	 * Currently, only rcs/ccs has steering register, if in the future,
442 	 * other engine types has steering register, this condition check need
443 	 * to be extended
444 	 */
445 	for_each_hw_engine(hwe, gt, id) {
446 		if (xe_engine_class_to_guc_capture_class(hwe->class) ==
447 		    GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
448 			has_rcs_ccs = true;
449 			break;
450 		}
451 	}
452 
453 	if (!has_rcs_ccs)
454 		return;
455 
456 	/* steered registers currently only exist for the render-class */
457 	list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
458 					GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
459 					GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE);
460 	/*
461 	 * Skip if this platform has no engine class registers or if extlists
462 	 * was previously allocated
463 	 */
464 	if (!list || guc->capture->extlists)
465 		return;
466 
467 	total = bitmap_weight(gt->fuse_topo.g_dss_mask, sizeof(gt->fuse_topo.g_dss_mask) * 8) *
468 		guc_capture_get_steer_reg_num(guc_to_xe(guc));
469 
470 	if (!total)
471 		return;
472 
473 	/* allocate an extra for an end marker */
474 	extlists = drmm_kzalloc(drm, 2 * sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
475 	if (!extlists)
476 		return;
477 
478 	if (__alloc_ext_regs(drm, &extlists[0], list, total)) {
479 		drmm_kfree(drm, extlists);
480 		return;
481 	}
482 
483 	/* For steering registers, the list is generated at run-time */
484 	extarray = (struct __guc_mmio_reg_descr *)extlists[0].list;
485 	for_each_dss_steering(dss, gt, slice, subslice) {
486 		for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
487 			__fill_ext_reg(extarray, &xe_extregs[i], dss, slice, subslice);
488 			++extarray;
489 		}
490 
491 		if (has_xehpg_extregs)
492 			for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
493 				__fill_ext_reg(extarray, &xehpg_extregs[i], dss, slice, subslice);
494 				++extarray;
495 			}
496 	}
497 
498 	extlists[0].num_regs = total;
499 
500 	xe_gt_dbg(guc_to_gt(guc), "capture found %d ext-regs.\n", total);
501 	guc->capture->extlists = extlists;
502 }
503 
504 static int
505 guc_capture_list_init(struct xe_guc *guc, u32 owner, u32 type,
506 		      enum guc_capture_list_class_type capture_class, struct guc_mmio_reg *ptr,
507 		      u16 num_entries)
508 {
509 	u32 ptr_idx = 0, list_idx = 0;
510 	const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
511 	struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
512 	const struct __guc_mmio_reg_descr_group *match;
513 	u32 list_num;
514 
515 	if (!reglists)
516 		return -ENODEV;
517 
518 	match = guc_capture_get_one_list(reglists, owner, type, capture_class);
519 	if (!match)
520 		return -ENODATA;
521 
522 	list_num = match->num_regs;
523 	for (list_idx = 0; ptr_idx < num_entries && list_idx < list_num; ++list_idx, ++ptr_idx) {
524 		ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
525 		ptr[ptr_idx].value = 0xDEADF00D;
526 		ptr[ptr_idx].flags = match->list[list_idx].flags;
527 		ptr[ptr_idx].mask = match->list[list_idx].mask;
528 	}
529 
530 	match = guc_capture_get_one_list(extlists, owner, type, capture_class);
531 	if (match)
532 		for (ptr_idx = list_num, list_idx = 0;
533 		     ptr_idx < num_entries && list_idx < match->num_regs;
534 		     ++ptr_idx, ++list_idx) {
535 			ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
536 			ptr[ptr_idx].value = 0xDEADF00D;
537 			ptr[ptr_idx].flags = match->list[list_idx].flags;
538 			ptr[ptr_idx].mask = match->list[list_idx].mask;
539 		}
540 
541 	if (ptr_idx < num_entries)
542 		xe_gt_dbg(guc_to_gt(guc), "Got short capture reglist init: %d out-of %d.\n",
543 			  ptr_idx, num_entries);
544 
545 	return 0;
546 }
547 
548 static int
549 guc_cap_list_num_regs(struct xe_guc *guc, u32 owner, u32 type,
550 		      enum guc_capture_list_class_type capture_class)
551 {
552 	const struct __guc_mmio_reg_descr_group *match;
553 	int num_regs = 0;
554 
555 	match = guc_capture_get_one_list(guc->capture->reglists, owner, type, capture_class);
556 	if (match)
557 		num_regs = match->num_regs;
558 
559 	match = guc_capture_get_one_list(guc->capture->extlists, owner, type, capture_class);
560 	if (match)
561 		num_regs += match->num_regs;
562 	else
563 		/*
564 		 * If a caller wants the full register dump size but we have
565 		 * not yet got the hw-config, which is before max_mmio_per_node
566 		 * is initialized, then provide a worst-case number for
567 		 * extlists based on max dss fuse bits, but only ever for
568 		 * render/compute
569 		 */
570 		if (owner == GUC_CAPTURE_LIST_INDEX_PF &&
571 		    type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
572 		    capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE &&
573 		    !guc->capture->max_mmio_per_node)
574 			num_regs += guc_capture_get_steer_reg_num(guc_to_xe(guc)) *
575 				    XE_MAX_DSS_FUSE_BITS;
576 
577 	return num_regs;
578 }
579 
580 static int
581 guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
582 			enum guc_capture_list_class_type capture_class,
583 			size_t *size, bool is_purpose_est)
584 {
585 	struct xe_guc_state_capture *gc = guc->capture;
586 	struct xe_gt *gt = guc_to_gt(guc);
587 	struct __guc_capture_ads_cache *cache;
588 	int num_regs;
589 
590 	xe_gt_assert(gt, type < GUC_STATE_CAPTURE_TYPE_MAX);
591 	xe_gt_assert(gt, capture_class < GUC_CAPTURE_LIST_CLASS_MAX);
592 
593 	cache = &gc->ads_cache[owner][type][capture_class];
594 	if (!gc->reglists) {
595 		xe_gt_warn(gt, "No capture reglist for this device\n");
596 		return -ENODEV;
597 	}
598 
599 	if (cache->is_valid) {
600 		*size = cache->size;
601 		return cache->status;
602 	}
603 
604 	if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
605 	    !guc_capture_get_one_list(gc->reglists, owner, type, capture_class)) {
606 		if (type == GUC_STATE_CAPTURE_TYPE_GLOBAL)
607 			xe_gt_warn(gt, "Missing capture reglist: global!\n");
608 		else
609 			xe_gt_warn(gt, "Missing capture reglist: %s(%u):%s(%u)!\n",
610 				   capture_list_type_names[type], type,
611 				   capture_engine_class_names[capture_class], capture_class);
612 		return -ENODEV;
613 	}
614 
615 	num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
616 	/* intentional empty lists can exist depending on hw config */
617 	if (!num_regs)
618 		return -ENODATA;
619 
620 	if (size)
621 		*size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
622 				   (num_regs * sizeof(struct guc_mmio_reg)));
623 
624 	return 0;
625 }
626 
627 /**
628  * xe_guc_capture_getlistsize - Get list size for owner/type/class combination
629  * @guc: The GuC object
630  * @owner: PF/VF owner
631  * @type: GuC capture register type
632  * @capture_class: GuC capture engine class id
633  * @size: Point to the size
634  *
635  * This function will get the list for the owner/type/class combination, and
636  * return the page aligned list size.
637  *
638  * Returns: 0 on success or a negative error code on failure.
639  */
640 int
641 xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
642 			   enum guc_capture_list_class_type capture_class, size_t *size)
643 {
644 	return guc_capture_getlistsize(guc, owner, type, capture_class, size, false);
645 }
646 
647 /**
648  * xe_guc_capture_getlist - Get register capture list for owner/type/class
649  * combination
650  * @guc:	The GuC object
651  * @owner:	PF/VF owner
652  * @type:	GuC capture register type
653  * @capture_class:	GuC capture engine class id
654  * @outptr:	Point to cached register capture list
655  *
656  * This function will get the register capture list for the owner/type/class
657  * combination.
658  *
659  * Returns: 0 on success or a negative error code on failure.
660  */
661 int
662 xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type,
663 		       enum guc_capture_list_class_type capture_class, void **outptr)
664 {
665 	struct xe_guc_state_capture *gc = guc->capture;
666 	struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][capture_class];
667 	struct guc_debug_capture_list *listnode;
668 	int ret, num_regs;
669 	u8 *caplist, *tmp;
670 	size_t size = 0;
671 
672 	if (!gc->reglists)
673 		return -ENODEV;
674 
675 	if (cache->is_valid) {
676 		*outptr = cache->ptr;
677 		return cache->status;
678 	}
679 
680 	ret = xe_guc_capture_getlistsize(guc, owner, type, capture_class, &size);
681 	if (ret) {
682 		cache->is_valid = true;
683 		cache->ptr = NULL;
684 		cache->size = 0;
685 		cache->status = ret;
686 		return ret;
687 	}
688 
689 	caplist = drmm_kzalloc(guc_to_drm(guc), size, GFP_KERNEL);
690 	if (!caplist)
691 		return -ENOMEM;
692 
693 	/* populate capture list header */
694 	tmp = caplist;
695 	num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
696 	listnode = (struct guc_debug_capture_list *)tmp;
697 	listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, (u32)num_regs);
698 
699 	/* populate list of register descriptor */
700 	tmp += sizeof(struct guc_debug_capture_list);
701 	guc_capture_list_init(guc, owner, type, capture_class,
702 			      (struct guc_mmio_reg *)tmp, num_regs);
703 
704 	/* cache this list */
705 	cache->is_valid = true;
706 	cache->ptr = caplist;
707 	cache->size = size;
708 	cache->status = 0;
709 
710 	*outptr = caplist;
711 
712 	return 0;
713 }
714 
715 /**
716  * xe_guc_capture_getnullheader - Get a null list for register capture
717  * @guc:	The GuC object
718  * @outptr:	Point to cached register capture list
719  * @size:	Point to the size
720  *
721  * This function will alloc for a null list for register capture.
722  *
723  * Returns: 0 on success or a negative error code on failure.
724  */
725 int
726 xe_guc_capture_getnullheader(struct xe_guc *guc, void **outptr, size_t *size)
727 {
728 	struct xe_guc_state_capture *gc = guc->capture;
729 	int tmp = sizeof(u32) * 4;
730 	void *null_header;
731 
732 	if (gc->ads_null_cache) {
733 		*outptr = gc->ads_null_cache;
734 		*size = tmp;
735 		return 0;
736 	}
737 
738 	null_header = drmm_kzalloc(guc_to_drm(guc), tmp, GFP_KERNEL);
739 	if (!null_header)
740 		return -ENOMEM;
741 
742 	gc->ads_null_cache = null_header;
743 	*outptr = null_header;
744 	*size = tmp;
745 
746 	return 0;
747 }
748 
749 /**
750  * xe_guc_capture_ads_input_worst_size - Calculate the worst size for GuC register capture
751  * @guc: point to xe_guc structure
752  *
753  * Calculate the worst size for GuC register capture by including all possible engines classes.
754  *
755  * Returns: Calculated size
756  */
757 size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc)
758 {
759 	size_t total_size, class_size, instance_size, global_size;
760 	int i, j;
761 
762 	/*
763 	 * This function calculates the worst case register lists size by
764 	 * including all possible engines classes. It is called during the
765 	 * first of a two-phase GuC (and ADS-population) initialization
766 	 * sequence, that is, during the pre-hwconfig phase before we have
767 	 * the exact engine fusing info.
768 	 */
769 	total_size = PAGE_SIZE;	/* Pad a page in front for empty lists */
770 	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
771 		for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
772 			if (xe_guc_capture_getlistsize(guc, i,
773 						       GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
774 						       j, &class_size) < 0)
775 				class_size = 0;
776 			if (xe_guc_capture_getlistsize(guc, i,
777 						       GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
778 						       j, &instance_size) < 0)
779 				instance_size = 0;
780 			total_size += class_size + instance_size;
781 		}
782 		if (xe_guc_capture_getlistsize(guc, i,
783 					       GUC_STATE_CAPTURE_TYPE_GLOBAL,
784 					       0, &global_size) < 0)
785 			global_size = 0;
786 		total_size += global_size;
787 	}
788 
789 	return PAGE_ALIGN(total_size);
790 }
791 
792 static int guc_capture_output_size_est(struct xe_guc *guc)
793 {
794 	struct xe_gt *gt = guc_to_gt(guc);
795 	struct xe_hw_engine *hwe;
796 	enum xe_hw_engine_id id;
797 
798 	int capture_size = 0;
799 	size_t tmp = 0;
800 
801 	if (!guc->capture)
802 		return -ENODEV;
803 
804 	/*
805 	 * If every single engine-instance suffered a failure in quick succession but
806 	 * were all unrelated, then a burst of multiple error-capture events would dump
807 	 * registers for every one engine instance, one at a time. In this case, GuC
808 	 * would even dump the global-registers repeatedly.
809 	 *
810 	 * For each engine instance, there would be 1 x guc_state_capture_group_t output
811 	 * followed by 3 x guc_state_capture_t lists. The latter is how the register
812 	 * dumps are split across different register types (where the '3' are global vs class
813 	 * vs instance).
814 	 */
815 	for_each_hw_engine(hwe, gt, id) {
816 		enum guc_capture_list_class_type capture_class;
817 
818 		capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
819 		capture_size += sizeof(struct guc_state_capture_group_header_t) +
820 					 (3 * sizeof(struct guc_state_capture_header_t));
821 
822 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_GLOBAL,
823 					     0, &tmp, true))
824 			capture_size += tmp;
825 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
826 					     capture_class, &tmp, true))
827 			capture_size += tmp;
828 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
829 					     capture_class, &tmp, true))
830 			capture_size += tmp;
831 	}
832 
833 	return capture_size;
834 }
835 
836 /*
837  * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
838  * before the Xe can read the data out and process it
839  */
840 #define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
841 
842 static void check_guc_capture_size(struct xe_guc *guc)
843 {
844 	int capture_size = guc_capture_output_size_est(guc);
845 	int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
846 	u32 buffer_size = xe_guc_log_section_size_capture(&guc->log);
847 
848 	/*
849 	 * NOTE: capture_size is much smaller than the capture region
850 	 * allocation (DG2: <80K vs 1MB).
851 	 * Additionally, its based on space needed to fit all engines getting
852 	 * reset at once within the same G2H handler task slot. This is very
853 	 * unlikely. However, if GuC really does run out of space for whatever
854 	 * reason, we will see an separate warning message when processing the
855 	 * G2H event capture-notification, search for:
856 	 * xe_guc_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
857 	 */
858 	if (capture_size < 0)
859 		xe_gt_dbg(guc_to_gt(guc),
860 			  "Failed to calculate error state capture buffer minimum size: %d!\n",
861 			  capture_size);
862 	if (capture_size > buffer_size)
863 		xe_gt_dbg(guc_to_gt(guc), "Error state capture buffer maybe small: %d < %d\n",
864 			  buffer_size, capture_size);
865 	else if (spare_size > buffer_size)
866 		xe_gt_dbg(guc_to_gt(guc),
867 			  "Error state capture buffer lacks spare size: %d < %d (min = %d)\n",
868 			  buffer_size, spare_size, capture_size);
869 }
870 
871 static void
872 guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node,
873 			     struct list_head *list)
874 {
875 	list_add(&node->link, list);
876 }
877 
878 static void
879 guc_capture_add_node_to_outlist(struct xe_guc_state_capture *gc,
880 				struct __guc_capture_parsed_output *node)
881 {
882 	guc_capture_remove_stale_matches_from_list(gc, node);
883 	guc_capture_add_node_to_list(node, &gc->outlist);
884 }
885 
886 static void
887 guc_capture_add_node_to_cachelist(struct xe_guc_state_capture *gc,
888 				  struct __guc_capture_parsed_output *node)
889 {
890 	guc_capture_add_node_to_list(node, &gc->cachelist);
891 }
892 
893 static void
894 guc_capture_free_outlist_node(struct xe_guc_state_capture *gc,
895 			      struct __guc_capture_parsed_output *n)
896 {
897 	if (n) {
898 		n->locked = 0;
899 		list_del(&n->link);
900 		/* put node back to cache list */
901 		guc_capture_add_node_to_cachelist(gc, n);
902 	}
903 }
904 
905 static void
906 guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
907 					   struct __guc_capture_parsed_output *node)
908 {
909 	struct __guc_capture_parsed_output *n, *ntmp;
910 	int guc_id = node->guc_id;
911 
912 	list_for_each_entry_safe(n, ntmp, &gc->outlist, link) {
913 		if (n != node && !n->locked && n->guc_id == guc_id)
914 			guc_capture_free_outlist_node(gc, n);
915 	}
916 }
917 
918 static void
919 guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *node)
920 {
921 	struct guc_mmio_reg *tmp[GUC_STATE_CAPTURE_TYPE_MAX];
922 	int i;
923 
924 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
925 		tmp[i] = node->reginfo[i].regs;
926 		memset(tmp[i], 0, sizeof(struct guc_mmio_reg) *
927 		       guc->capture->max_mmio_per_node);
928 	}
929 	memset(node, 0, sizeof(*node));
930 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i)
931 		node->reginfo[i].regs = tmp[i];
932 
933 	INIT_LIST_HEAD(&node->link);
934 }
935 
936 /**
937  * DOC: Init, G2H-event and reporting flows for GuC-error-capture
938  *
939  * KMD Init time flows:
940  * --------------------
941  *     --> alloc A: GuC input capture regs lists (registered to GuC via ADS).
942  *                  xe_guc_ads acquires the register lists by calling
943  *                  xe_guc_capture_getlistsize and xe_guc_capture_getlist 'n' times,
944  *                  where n = 1 for global-reg-list +
945  *                            num_engine_classes for class-reg-list +
946  *                            num_engine_classes for instance-reg-list
947  *                               (since all instances of the same engine-class type
948  *                                have an identical engine-instance register-list).
949  *                  ADS module also calls separately for PF vs VF.
950  *
951  *     --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
952  *                  Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small)
953  *                  Note2: 'x 3' to hold multiple capture groups
954  *
955  * GUC Runtime notify capture:
956  * --------------------------
957  *     --> G2H STATE_CAPTURE_NOTIFICATION
958  *                   L--> xe_guc_capture_process
959  *                           L--> Loop through B (head..tail) and for each engine instance's
960  *                                err-state-captured register-list we find, we alloc 'C':
961  *      --> alloc C: A capture-output-node structure that includes misc capture info along
962  *                   with 3 register list dumps (global, engine-class and engine-instance)
963  *                   This node is created from a pre-allocated list of blank nodes in
964  *                   guc->capture->cachelist and populated with the error-capture
965  *                   data from GuC and then it's added into guc->capture->outlist linked
966  *                   list. This list is used for matchup and printout by xe_devcoredump_read
967  *                   and xe_engine_snapshot_print, (when user invokes the devcoredump sysfs).
968  *
969  * GUC --> notify context reset:
970  * -----------------------------
971  *     --> guc_exec_queue_timedout_job
972  *                   L--> xe_devcoredump
973  *                          L--> devcoredump_snapshot
974  *                               --> xe_hw_engine_snapshot_capture
975  *                               --> xe_engine_manual_capture(For manual capture)
976  *
977  * User Sysfs / Debugfs
978  * --------------------
979  *      --> xe_devcoredump_read->
980  *             L--> xxx_snapshot_print
981  *                    L--> xe_engine_snapshot_print
982  *                         Print register lists values saved at
983  *                         guc->capture->outlist
984  *
985  */
986 
987 static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
988 {
989 	if (buf->wr >= buf->rd)
990 		return (buf->wr - buf->rd);
991 	return (buf->size - buf->rd) + buf->wr;
992 }
993 
994 static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf)
995 {
996 	if (buf->rd > buf->wr)
997 		return (buf->size - buf->rd);
998 	return (buf->wr - buf->rd);
999 }
1000 
1001 /*
1002  * GuC's error-capture output is a ring buffer populated in a byte-stream fashion:
1003  *
1004  * The GuC Log buffer region for error-capture is managed like a ring buffer.
1005  * The GuC firmware dumps error capture logs into this ring in a byte-stream flow.
1006  * Additionally, as per the current and foreseeable future, all packed error-
1007  * capture output structures are dword aligned.
1008  *
1009  * That said, if the GuC firmware is in the midst of writing a structure that is larger
1010  * than one dword but the tail end of the err-capture buffer-region has lesser space left,
1011  * we would need to extract that structure one dword at a time straddled across the end,
1012  * onto the start of the ring.
1013  *
1014  * Below function, guc_capture_log_remove_bytes is a helper for that. All callers of this
1015  * function would typically do a straight-up memcpy from the ring contents and will only
1016  * call this helper if their structure-extraction is straddling across the end of the
1017  * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease
1018  * scalability for future expansion of output data types without requiring a redesign
1019  * of the flow controls.
1020  */
1021 static int
1022 guc_capture_log_remove_bytes(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1023 			     void *out, int bytes_needed)
1024 {
1025 #define GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX	3
1026 
1027 	int fill_size = 0, tries = GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX;
1028 	int copy_size, avail;
1029 
1030 	xe_assert(guc_to_xe(guc), bytes_needed % sizeof(u32) == 0);
1031 
1032 	if (bytes_needed > guc_capture_buf_cnt(buf))
1033 		return -1;
1034 
1035 	while (bytes_needed > 0 && tries--) {
1036 		int misaligned;
1037 
1038 		avail = guc_capture_buf_cnt_to_end(buf);
1039 		misaligned = avail % sizeof(u32);
1040 		/* wrap if at end */
1041 		if (!avail) {
1042 			/* output stream clipped */
1043 			if (!buf->rd)
1044 				return fill_size;
1045 			buf->rd = 0;
1046 			continue;
1047 		}
1048 
1049 		/* Only copy to u32 aligned data */
1050 		copy_size = avail < bytes_needed ? avail - misaligned : bytes_needed;
1051 		xe_map_memcpy_from(guc_to_xe(guc), out + fill_size, &guc->log.bo->vmap,
1052 				   buf->data_offset + buf->rd, copy_size);
1053 		buf->rd += copy_size;
1054 		fill_size += copy_size;
1055 		bytes_needed -= copy_size;
1056 
1057 		if (misaligned)
1058 			xe_gt_warn(guc_to_gt(guc),
1059 				   "Bytes extraction not dword aligned, clipping.\n");
1060 	}
1061 
1062 	return fill_size;
1063 }
1064 
1065 static int
1066 guc_capture_log_get_group_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1067 			      struct guc_state_capture_group_header_t *ghdr)
1068 {
1069 	int fullsize = sizeof(struct guc_state_capture_group_header_t);
1070 
1071 	if (guc_capture_log_remove_bytes(guc, buf, ghdr, fullsize) != fullsize)
1072 		return -1;
1073 	return 0;
1074 }
1075 
1076 static int
1077 guc_capture_log_get_data_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1078 			     struct guc_state_capture_header_t *hdr)
1079 {
1080 	int fullsize = sizeof(struct guc_state_capture_header_t);
1081 
1082 	if (guc_capture_log_remove_bytes(guc, buf, hdr, fullsize) != fullsize)
1083 		return -1;
1084 	return 0;
1085 }
1086 
1087 static int
1088 guc_capture_log_get_register(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1089 			     struct guc_mmio_reg *reg)
1090 {
1091 	int fullsize = sizeof(struct guc_mmio_reg);
1092 
1093 	if (guc_capture_log_remove_bytes(guc, buf, reg, fullsize) != fullsize)
1094 		return -1;
1095 	return 0;
1096 }
1097 
1098 static struct __guc_capture_parsed_output *
1099 guc_capture_get_prealloc_node(struct xe_guc *guc)
1100 {
1101 	struct __guc_capture_parsed_output *found = NULL;
1102 
1103 	if (!list_empty(&guc->capture->cachelist)) {
1104 		struct __guc_capture_parsed_output *n, *ntmp;
1105 
1106 		/* get first avail node from the cache list */
1107 		list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) {
1108 			found = n;
1109 			break;
1110 		}
1111 	} else {
1112 		struct __guc_capture_parsed_output *n, *ntmp;
1113 
1114 		/*
1115 		 * traverse reversed and steal back the oldest node already
1116 		 * allocated
1117 		 */
1118 		list_for_each_entry_safe_reverse(n, ntmp, &guc->capture->outlist, link) {
1119 			if (!n->locked)
1120 				found = n;
1121 		}
1122 	}
1123 	if (found) {
1124 		list_del(&found->link);
1125 		guc_capture_init_node(guc, found);
1126 	}
1127 
1128 	return found;
1129 }
1130 
1131 static struct __guc_capture_parsed_output *
1132 guc_capture_clone_node(struct xe_guc *guc, struct __guc_capture_parsed_output *original,
1133 		       u32 keep_reglist_mask)
1134 {
1135 	struct __guc_capture_parsed_output *new;
1136 	int i;
1137 
1138 	new = guc_capture_get_prealloc_node(guc);
1139 	if (!new)
1140 		return NULL;
1141 	if (!original)
1142 		return new;
1143 
1144 	new->is_partial = original->is_partial;
1145 
1146 	/* copy reg-lists that we want to clone */
1147 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1148 		if (keep_reglist_mask & BIT(i)) {
1149 			XE_WARN_ON(original->reginfo[i].num_regs  >
1150 				   guc->capture->max_mmio_per_node);
1151 
1152 			memcpy(new->reginfo[i].regs, original->reginfo[i].regs,
1153 			       original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg));
1154 
1155 			new->reginfo[i].num_regs = original->reginfo[i].num_regs;
1156 			new->reginfo[i].vfid  = original->reginfo[i].vfid;
1157 
1158 			if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS) {
1159 				new->eng_class = original->eng_class;
1160 			} else if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1161 				new->eng_inst = original->eng_inst;
1162 				new->guc_id = original->guc_id;
1163 				new->lrca = original->lrca;
1164 			}
1165 		}
1166 	}
1167 
1168 	return new;
1169 }
1170 
1171 static int
1172 guc_capture_extract_reglists(struct xe_guc *guc, struct __guc_capture_bufstate *buf)
1173 {
1174 	struct xe_gt *gt = guc_to_gt(guc);
1175 	struct guc_state_capture_group_header_t ghdr = {0};
1176 	struct guc_state_capture_header_t hdr = {0};
1177 	struct __guc_capture_parsed_output *node = NULL;
1178 	struct guc_mmio_reg *regs = NULL;
1179 	int i, numlists, numregs, ret = 0;
1180 	enum guc_state_capture_type datatype;
1181 	struct guc_mmio_reg tmp;
1182 	bool is_partial = false;
1183 
1184 	i = guc_capture_buf_cnt(buf);
1185 	if (!i)
1186 		return -ENODATA;
1187 
1188 	if (i % sizeof(u32)) {
1189 		xe_gt_warn(gt, "Got mis-aligned register capture entries\n");
1190 		ret = -EIO;
1191 		goto bailout;
1192 	}
1193 
1194 	/* first get the capture group header */
1195 	if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) {
1196 		ret = -EIO;
1197 		goto bailout;
1198 	}
1199 	/*
1200 	 * we would typically expect a layout as below where n would be expected to be
1201 	 * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine
1202 	 * instances being reset together.
1203 	 * ____________________________________________
1204 	 * | Capture Group                            |
1205 	 * | ________________________________________ |
1206 	 * | | Capture Group Header:                | |
1207 	 * | |  - num_captures = 5                  | |
1208 	 * | |______________________________________| |
1209 	 * | ________________________________________ |
1210 	 * | | Capture1:                            | |
1211 	 * | |  Hdr: GLOBAL, numregs=a              | |
1212 	 * | | ____________________________________ | |
1213 	 * | | | Reglist                          | | |
1214 	 * | | | - reg1, reg2, ... rega           | | |
1215 	 * | | |__________________________________| | |
1216 	 * | |______________________________________| |
1217 	 * | ________________________________________ |
1218 	 * | | Capture2:                            | |
1219 	 * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=b| |
1220 	 * | | ____________________________________ | |
1221 	 * | | | Reglist                          | | |
1222 	 * | | | - reg1, reg2, ... regb           | | |
1223 	 * | | |__________________________________| | |
1224 	 * | |______________________________________| |
1225 	 * | ________________________________________ |
1226 	 * | | Capture3:                            | |
1227 	 * | |  Hdr: INSTANCE=RCS, numregs=c        | |
1228 	 * | | ____________________________________ | |
1229 	 * | | | Reglist                          | | |
1230 	 * | | | - reg1, reg2, ... regc           | | |
1231 	 * | | |__________________________________| | |
1232 	 * | |______________________________________| |
1233 	 * | ________________________________________ |
1234 	 * | | Capture4:                            | |
1235 	 * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=d| |
1236 	 * | | ____________________________________ | |
1237 	 * | | | Reglist                          | | |
1238 	 * | | | - reg1, reg2, ... regd           | | |
1239 	 * | | |__________________________________| | |
1240 	 * | |______________________________________| |
1241 	 * | ________________________________________ |
1242 	 * | | Capture5:                            | |
1243 	 * | |  Hdr: INSTANCE=CCS0, numregs=e       | |
1244 	 * | | ____________________________________ | |
1245 	 * | | | Reglist                          | | |
1246 	 * | | | - reg1, reg2, ... rege           | | |
1247 	 * | | |__________________________________| | |
1248 	 * | |______________________________________| |
1249 	 * |__________________________________________|
1250 	 */
1251 	is_partial = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE, ghdr.info);
1252 	numlists = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES, ghdr.info);
1253 
1254 	while (numlists--) {
1255 		if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) {
1256 			ret = -EIO;
1257 			break;
1258 		}
1259 
1260 		datatype = FIELD_GET(GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE, hdr.info);
1261 		if (datatype > GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1262 			/* unknown capture type - skip over to next capture set */
1263 			numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
1264 					    hdr.num_mmio_entries);
1265 			while (numregs--) {
1266 				if (guc_capture_log_get_register(guc, buf, &tmp)) {
1267 					ret = -EIO;
1268 					break;
1269 				}
1270 			}
1271 			continue;
1272 		} else if (node) {
1273 			/*
1274 			 * Based on the current capture type and what we have so far,
1275 			 * decide if we should add the current node into the internal
1276 			 * linked list for match-up when xe_devcoredump calls later
1277 			 * (and alloc a blank node for the next set of reglists)
1278 			 * or continue with the same node or clone the current node
1279 			 * but only retain the global or class registers (such as the
1280 			 * case of dependent engine resets).
1281 			 */
1282 			if (datatype == GUC_STATE_CAPTURE_TYPE_GLOBAL) {
1283 				guc_capture_add_node_to_outlist(guc->capture, node);
1284 				node = NULL;
1285 			} else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
1286 				   node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS].num_regs) {
1287 				/* Add to list, clone node and duplicate global list */
1288 				guc_capture_add_node_to_outlist(guc->capture, node);
1289 				node = guc_capture_clone_node(guc, node,
1290 							      GCAP_PARSED_REGLIST_INDEX_GLOBAL);
1291 			} else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE &&
1292 				   node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE].num_regs) {
1293 				/* Add to list, clone node and duplicate global + class lists */
1294 				guc_capture_add_node_to_outlist(guc->capture, node);
1295 				node = guc_capture_clone_node(guc, node,
1296 							      (GCAP_PARSED_REGLIST_INDEX_GLOBAL |
1297 							      GCAP_PARSED_REGLIST_INDEX_ENGCLASS));
1298 			}
1299 		}
1300 
1301 		if (!node) {
1302 			node = guc_capture_get_prealloc_node(guc);
1303 			if (!node) {
1304 				ret = -ENOMEM;
1305 				break;
1306 			}
1307 			if (datatype != GUC_STATE_CAPTURE_TYPE_GLOBAL)
1308 				xe_gt_dbg(gt, "Register capture missing global dump: %08x!\n",
1309 					  datatype);
1310 		}
1311 		node->is_partial = is_partial;
1312 		node->reginfo[datatype].vfid = FIELD_GET(GUC_STATE_CAPTURE_HEADER_VFID, hdr.owner);
1313 		node->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
1314 		node->type = datatype;
1315 
1316 		switch (datatype) {
1317 		case GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE:
1318 			node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
1319 						    hdr.info);
1320 			node->eng_inst = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE,
1321 						   hdr.info);
1322 			node->lrca = hdr.lrca;
1323 			node->guc_id = hdr.guc_id;
1324 			break;
1325 		case GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS:
1326 			node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
1327 						    hdr.info);
1328 			break;
1329 		default:
1330 			break;
1331 		}
1332 
1333 		numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
1334 				    hdr.num_mmio_entries);
1335 		if (numregs > guc->capture->max_mmio_per_node) {
1336 			xe_gt_dbg(gt, "Register capture list extraction clipped by prealloc!\n");
1337 			numregs = guc->capture->max_mmio_per_node;
1338 		}
1339 		node->reginfo[datatype].num_regs = numregs;
1340 		regs = node->reginfo[datatype].regs;
1341 		i = 0;
1342 		while (numregs--) {
1343 			if (guc_capture_log_get_register(guc, buf, &regs[i++])) {
1344 				ret = -EIO;
1345 				break;
1346 			}
1347 		}
1348 	}
1349 
1350 bailout:
1351 	if (node) {
1352 		/* If we have data, add to linked list for match-up when xe_devcoredump calls */
1353 		for (i = GUC_STATE_CAPTURE_TYPE_GLOBAL; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1354 			if (node->reginfo[i].regs) {
1355 				guc_capture_add_node_to_outlist(guc->capture, node);
1356 				node = NULL;
1357 				break;
1358 			}
1359 		}
1360 		if (node) /* else return it back to cache list */
1361 			guc_capture_add_node_to_cachelist(guc->capture, node);
1362 	}
1363 	return ret;
1364 }
1365 
1366 static int __guc_capture_flushlog_complete(struct xe_guc *guc)
1367 {
1368 	u32 action[] = {
1369 		XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
1370 		GUC_LOG_BUFFER_CAPTURE
1371 	};
1372 
1373 	return xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
1374 }
1375 
1376 static void __guc_capture_process_output(struct xe_guc *guc)
1377 {
1378 	unsigned int buffer_size, read_offset, write_offset, full_count;
1379 	struct xe_uc *uc = container_of(guc, typeof(*uc), guc);
1380 	struct guc_log_buffer_state log_buf_state_local;
1381 	struct __guc_capture_bufstate buf;
1382 	bool new_overflow;
1383 	int ret, tmp;
1384 	u32 log_buf_state_offset;
1385 	u32 src_data_offset;
1386 
1387 	log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_BUFFER_CAPTURE;
1388 	src_data_offset = xe_guc_get_log_buffer_offset(&guc->log, GUC_LOG_BUFFER_CAPTURE);
1389 
1390 	/*
1391 	 * Make a copy of the state structure, inside GuC log buffer
1392 	 * (which is uncached mapped), on the stack to avoid reading
1393 	 * from it multiple times.
1394 	 */
1395 	xe_map_memcpy_from(guc_to_xe(guc), &log_buf_state_local, &guc->log.bo->vmap,
1396 			   log_buf_state_offset, sizeof(struct guc_log_buffer_state));
1397 
1398 	buffer_size = xe_guc_get_log_buffer_size(&guc->log, GUC_LOG_BUFFER_CAPTURE);
1399 	read_offset = log_buf_state_local.read_ptr;
1400 	write_offset = log_buf_state_local.sampled_write_ptr;
1401 	full_count = FIELD_GET(GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT, log_buf_state_local.flags);
1402 
1403 	/* Bookkeeping stuff */
1404 	tmp = FIELD_GET(GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE, log_buf_state_local.flags);
1405 	guc->log.stats[GUC_LOG_BUFFER_CAPTURE].flush += tmp;
1406 	new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_BUFFER_CAPTURE,
1407 						     full_count);
1408 
1409 	/* Now copy the actual logs. */
1410 	if (unlikely(new_overflow)) {
1411 		/* copy the whole buffer in case of overflow */
1412 		read_offset = 0;
1413 		write_offset = buffer_size;
1414 	} else if (unlikely((read_offset > buffer_size) ||
1415 			(write_offset > buffer_size))) {
1416 		xe_gt_err(guc_to_gt(guc),
1417 			  "Register capture buffer in invalid state: read = 0x%X, size = 0x%X!\n",
1418 			  read_offset, buffer_size);
1419 		/* copy whole buffer as offsets are unreliable */
1420 		read_offset = 0;
1421 		write_offset = buffer_size;
1422 	}
1423 
1424 	buf.size = buffer_size;
1425 	buf.rd = read_offset;
1426 	buf.wr = write_offset;
1427 	buf.data_offset = src_data_offset;
1428 
1429 	if (!xe_guc_read_stopped(guc)) {
1430 		do {
1431 			ret = guc_capture_extract_reglists(guc, &buf);
1432 			if (ret && ret != -ENODATA)
1433 				xe_gt_dbg(guc_to_gt(guc), "Capture extraction failed:%d\n", ret);
1434 		} while (ret >= 0);
1435 	}
1436 
1437 	/* Update the state of log buffer err-cap state */
1438 	xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
1439 		  log_buf_state_offset + offsetof(struct guc_log_buffer_state, read_ptr), u32,
1440 		  write_offset);
1441 
1442 	/*
1443 	 * Clear the flush_to_file from local first, the local was loaded by above
1444 	 * xe_map_memcpy_from, then write out the "updated local" through
1445 	 * xe_map_wr()
1446 	 */
1447 	log_buf_state_local.flags &= ~GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE;
1448 	xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
1449 		  log_buf_state_offset + offsetof(struct guc_log_buffer_state, flags), u32,
1450 		  log_buf_state_local.flags);
1451 	__guc_capture_flushlog_complete(guc);
1452 }
1453 
1454 /*
1455  * xe_guc_capture_process - Process GuC register captured data
1456  * @guc: The GuC object
1457  *
1458  * When GuC captured data is ready, GuC will send message
1459  * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
1460  * called to process the data comes with the message.
1461  *
1462  * Returns: None
1463  */
1464 void xe_guc_capture_process(struct xe_guc *guc)
1465 {
1466 	if (guc->capture)
1467 		__guc_capture_process_output(guc);
1468 }
1469 
1470 static struct __guc_capture_parsed_output *
1471 guc_capture_alloc_one_node(struct xe_guc *guc)
1472 {
1473 	struct drm_device *drm = guc_to_drm(guc);
1474 	struct __guc_capture_parsed_output *new;
1475 	int i;
1476 
1477 	new = drmm_kzalloc(drm, sizeof(*new), GFP_KERNEL);
1478 	if (!new)
1479 		return NULL;
1480 
1481 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1482 		new->reginfo[i].regs = drmm_kzalloc(drm, guc->capture->max_mmio_per_node *
1483 						    sizeof(struct guc_mmio_reg), GFP_KERNEL);
1484 		if (!new->reginfo[i].regs) {
1485 			while (i)
1486 				drmm_kfree(drm, new->reginfo[--i].regs);
1487 			drmm_kfree(drm, new);
1488 			return NULL;
1489 		}
1490 	}
1491 	guc_capture_init_node(guc, new);
1492 
1493 	return new;
1494 }
1495 
1496 static void
1497 __guc_capture_create_prealloc_nodes(struct xe_guc *guc)
1498 {
1499 	struct __guc_capture_parsed_output *node = NULL;
1500 	int i;
1501 
1502 	for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
1503 		node = guc_capture_alloc_one_node(guc);
1504 		if (!node) {
1505 			xe_gt_warn(guc_to_gt(guc), "Register capture pre-alloc-cache failure\n");
1506 			/* dont free the priors, use what we got and cleanup at shutdown */
1507 			return;
1508 		}
1509 		guc_capture_add_node_to_cachelist(guc->capture, node);
1510 	}
1511 }
1512 
1513 static int
1514 guc_get_max_reglist_count(struct xe_guc *guc)
1515 {
1516 	int i, j, k, tmp, maxregcount = 0;
1517 
1518 	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
1519 		for (j = 0; j < GUC_STATE_CAPTURE_TYPE_MAX; ++j) {
1520 			for (k = 0; k < GUC_CAPTURE_LIST_CLASS_MAX; ++k) {
1521 				const struct __guc_mmio_reg_descr_group *match;
1522 
1523 				if (j == GUC_STATE_CAPTURE_TYPE_GLOBAL && k > 0)
1524 					continue;
1525 
1526 				tmp = 0;
1527 				match = guc_capture_get_one_list(guc->capture->reglists, i, j, k);
1528 				if (match)
1529 					tmp = match->num_regs;
1530 
1531 				match = guc_capture_get_one_list(guc->capture->extlists, i, j, k);
1532 				if (match)
1533 					tmp += match->num_regs;
1534 
1535 				if (tmp > maxregcount)
1536 					maxregcount = tmp;
1537 			}
1538 		}
1539 	}
1540 	if (!maxregcount)
1541 		maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS;
1542 
1543 	return maxregcount;
1544 }
1545 
1546 static void
1547 guc_capture_create_prealloc_nodes(struct xe_guc *guc)
1548 {
1549 	/* skip if we've already done the pre-alloc */
1550 	if (guc->capture->max_mmio_per_node)
1551 		return;
1552 
1553 	guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc);
1554 	__guc_capture_create_prealloc_nodes(guc);
1555 }
1556 
1557 static void
1558 read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_group *list,
1559 		 struct guc_mmio_reg *regs)
1560 {
1561 	int i;
1562 
1563 	if (!list || !list->list || list->num_regs == 0)
1564 		return;
1565 
1566 	if (!regs)
1567 		return;
1568 
1569 	for (i = 0; i < list->num_regs; i++) {
1570 		struct __guc_mmio_reg_descr desc = list->list[i];
1571 		u32 value;
1572 
1573 		if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1574 			value = xe_hw_engine_mmio_read32(hwe, desc.reg);
1575 		} else {
1576 			if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
1577 			    FIELD_GET(GUC_REGSET_STEERING_NEEDED, desc.flags)) {
1578 				int group, instance;
1579 
1580 				group = FIELD_GET(GUC_REGSET_STEERING_GROUP, desc.flags);
1581 				instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, desc.flags);
1582 				value = xe_gt_mcr_unicast_read(hwe->gt, XE_REG_MCR(desc.reg.addr),
1583 							       group, instance);
1584 			} else {
1585 				value = xe_mmio_read32(&hwe->gt->mmio, desc.reg);
1586 			}
1587 		}
1588 
1589 		regs[i].value = value;
1590 		regs[i].offset = desc.reg.addr;
1591 		regs[i].flags = desc.flags;
1592 		regs[i].mask = desc.mask;
1593 	}
1594 }
1595 
1596 /**
1597  * xe_engine_manual_capture - Take a manual engine snapshot from engine.
1598  * @hwe: Xe HW Engine.
1599  * @snapshot: The engine snapshot
1600  *
1601  * Take engine snapshot from engine read.
1602  *
1603  * Returns: None
1604  */
1605 void
1606 xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot)
1607 {
1608 	struct xe_gt *gt = hwe->gt;
1609 	struct xe_device *xe = gt_to_xe(gt);
1610 	struct xe_guc *guc = &gt->uc.guc;
1611 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1612 	enum guc_capture_list_class_type capture_class;
1613 	const struct __guc_mmio_reg_descr_group *list;
1614 	struct __guc_capture_parsed_output *new;
1615 	enum guc_state_capture_type type;
1616 	u16 guc_id = 0;
1617 	u32 lrca = 0;
1618 
1619 	if (IS_SRIOV_VF(xe))
1620 		return;
1621 
1622 	new = guc_capture_get_prealloc_node(guc);
1623 	if (!new)
1624 		return;
1625 
1626 	capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
1627 	for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
1628 		struct gcap_reg_list_info *reginfo = &new->reginfo[type];
1629 		/*
1630 		 * regsinfo->regs is allocated based on guc->capture->max_mmio_per_node
1631 		 * which is based on the descriptor list driving the population so
1632 		 * should not overflow
1633 		 */
1634 
1635 		/* Get register list for the type/class */
1636 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
1637 							capture_class, false);
1638 		if (!list) {
1639 			xe_gt_dbg(gt, "Empty GuC capture register descriptor for %s",
1640 				  hwe->name);
1641 			continue;
1642 		}
1643 
1644 		read_reg_to_node(hwe, list, reginfo->regs);
1645 		reginfo->num_regs = list->num_regs;
1646 
1647 		/* Capture steering registers for rcs/ccs */
1648 		if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
1649 			list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
1650 								type, capture_class, true);
1651 			if (list) {
1652 				read_reg_to_node(hwe, list, &reginfo->regs[reginfo->num_regs]);
1653 				reginfo->num_regs += list->num_regs;
1654 			}
1655 		}
1656 	}
1657 
1658 	if (devcoredump && devcoredump->captured) {
1659 		struct xe_guc_submit_exec_queue_snapshot *ge = devcoredump->snapshot.ge;
1660 
1661 		if (ge) {
1662 			guc_id = ge->guc.id;
1663 			if (ge->lrc[0])
1664 				lrca = ge->lrc[0]->context_desc;
1665 		}
1666 	}
1667 
1668 	new->eng_class = xe_engine_class_to_guc_class(hwe->class);
1669 	new->eng_inst = hwe->instance;
1670 	new->guc_id = guc_id;
1671 	new->lrca = lrca;
1672 	new->is_partial = 0;
1673 	new->locked = 1;
1674 	new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
1675 
1676 	guc_capture_add_node_to_outlist(guc->capture, new);
1677 	devcoredump->snapshot.matched_node = new;
1678 }
1679 
1680 static struct guc_mmio_reg *
1681 guc_capture_find_reg(struct gcap_reg_list_info *reginfo, u32 addr, u32 flags)
1682 {
1683 	int i;
1684 
1685 	if (reginfo && reginfo->num_regs > 0) {
1686 		struct guc_mmio_reg *regs = reginfo->regs;
1687 
1688 		if (regs)
1689 			for (i = 0; i < reginfo->num_regs; i++)
1690 				if (regs[i].offset == addr && regs[i].flags == flags)
1691 					return &regs[i];
1692 	}
1693 
1694 	return NULL;
1695 }
1696 
1697 static void
1698 snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p,
1699 			     u32 type, const struct __guc_mmio_reg_descr_group *list)
1700 {
1701 	struct xe_gt *gt = snapshot->hwe->gt;
1702 	struct xe_device *xe = gt_to_xe(gt);
1703 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1704 	struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot;
1705 	struct gcap_reg_list_info *reginfo = NULL;
1706 	u32 i, last_value = 0;
1707 	bool low32_ready = false;
1708 
1709 	if (!list || !list->list || list->num_regs == 0)
1710 		return;
1711 	XE_WARN_ON(!devcore_snapshot->matched_node);
1712 
1713 	reginfo = &devcore_snapshot->matched_node->reginfo[type];
1714 
1715 	/*
1716 	 * loop through descriptor first and find the register in the node
1717 	 * this is more scalable for developer maintenance as it will ensure
1718 	 * the printout matched the ordering of the static descriptor
1719 	 * table-of-lists
1720 	 */
1721 	for (i = 0; i < list->num_regs; i++) {
1722 		const struct __guc_mmio_reg_descr *reg_desc = &list->list[i];
1723 		struct guc_mmio_reg *reg;
1724 		u32 value;
1725 
1726 		reg = guc_capture_find_reg(reginfo, reg_desc->reg.addr, reg_desc->flags);
1727 		if (!reg)
1728 			continue;
1729 
1730 		value = reg->value;
1731 		switch (reg_desc->data_type) {
1732 		case REG_64BIT_LOW_DW:
1733 			last_value = value;
1734 
1735 			/*
1736 			 * A 64 bit register define requires 2 consecutive
1737 			 * entries in register list, with low dword first
1738 			 * and hi dword the second, like:
1739 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1740 			 *  { XXX_REG_HI(0), REG_64BIT_HI_DW,  0, 0, "XXX_REG"},
1741 			 *
1742 			 * Incorrect order will trigger XE_WARN.
1743 			 *
1744 			 * Possible double low here, for example:
1745 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1746 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1747 			 */
1748 			XE_WARN_ON(low32_ready);
1749 			low32_ready = true;
1750 			/* Low 32 bit dword saved, continue for high 32 bit */
1751 			break;
1752 
1753 		case REG_64BIT_HI_DW: {
1754 			u64 value_qw = ((u64)value << 32) | last_value;
1755 
1756 			/*
1757 			 * Incorrect 64bit register order. Possible missing low.
1758 			 * for example:
1759 			 *  { XXX_REG(0), REG_32BIT, 0, 0, NULL},
1760 			 *  { XXX_REG_HI(0), REG_64BIT_HI_DW, 0, 0, NULL},
1761 			 */
1762 			XE_WARN_ON(!low32_ready);
1763 			low32_ready = false;
1764 
1765 			drm_printf(p, "\t%s: 0x%016llx\n", reg_desc->regname, value_qw);
1766 			break;
1767 		}
1768 
1769 		case REG_32BIT:
1770 			/*
1771 			 * Incorrect 64bit register order. Possible missing high.
1772 			 * for example:
1773 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1774 			 *  { XXX_REG(0), REG_32BIT, 0, 0, "XXX_REG"},
1775 			 */
1776 			XE_WARN_ON(low32_ready);
1777 
1778 			if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags))
1779 				drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname,
1780 					   reg_desc->dss_id, value);
1781 			else
1782 				drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
1783 
1784 			break;
1785 		}
1786 	}
1787 
1788 	/*
1789 	 * Incorrect 64bit register order. Possible missing high.
1790 	 * for example:
1791 	 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1792 	 *  } // <- Register list end
1793 	 */
1794 	XE_WARN_ON(low32_ready);
1795 }
1796 
1797 /**
1798  * xe_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
1799  * @snapshot: Xe HW Engine snapshot object.
1800  * @p: drm_printer where it will be printed out.
1801  *
1802  * This function prints out a given Xe HW Engine snapshot object.
1803  */
1804 void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
1805 {
1806 	const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = {
1807 		"full-capture",
1808 		"partial-capture"
1809 	};
1810 	int type;
1811 	const struct __guc_mmio_reg_descr_group *list;
1812 	enum guc_capture_list_class_type capture_class;
1813 
1814 	struct xe_gt *gt;
1815 	struct xe_device *xe;
1816 	struct xe_devcoredump *devcoredump;
1817 	struct xe_devcoredump_snapshot *devcore_snapshot;
1818 
1819 	if (!snapshot)
1820 		return;
1821 
1822 	gt = snapshot->hwe->gt;
1823 	xe = gt_to_xe(gt);
1824 	devcoredump = &xe->devcoredump;
1825 	devcore_snapshot = &devcoredump->snapshot;
1826 
1827 	if (!devcore_snapshot->matched_node)
1828 		return;
1829 
1830 	xe_gt_assert(gt, snapshot->hwe);
1831 
1832 	capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class);
1833 
1834 	drm_printf(p, "%s (physical), logical instance=%d\n",
1835 		   snapshot->name ? snapshot->name : "",
1836 		   snapshot->logical_instance);
1837 	drm_printf(p, "\tCapture_source: %s\n",
1838 		   devcore_snapshot->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ?
1839 		   "GuC" : "Manual");
1840 	drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]);
1841 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
1842 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
1843 	drm_printf(p, "\tReserved: %s\n",
1844 		   str_yes_no(snapshot->kernel_reserved));
1845 
1846 	for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
1847 		/*
1848 		 * FIXME: During devcoredump print we should avoid accessing the
1849 		 * driver pointers for gt or engine. Printing should be done only
1850 		 * using the snapshot captured. Here we are accessing the gt
1851 		 * pointer. It should be fixed.
1852 		 */
1853 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
1854 							capture_class, false);
1855 		snapshot_print_by_list_order(snapshot, p, type, list);
1856 	}
1857 
1858 	if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
1859 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
1860 							GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
1861 							capture_class, true);
1862 		snapshot_print_by_list_order(snapshot, p, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
1863 					     list);
1864 	}
1865 
1866 	drm_puts(p, "\n");
1867 }
1868 
1869 /**
1870  * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the queue.
1871  * @q: The exec queue object
1872  *
1873  * Search within the capture outlist for the queue, could be used for check if
1874  * GuC capture is ready for the queue.
1875  * If found, the locked boolean of the node will be flagged.
1876  *
1877  * Returns: found guc-capture node ptr else NULL
1878  */
1879 struct __guc_capture_parsed_output *
1880 xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q)
1881 {
1882 	struct xe_hw_engine *hwe;
1883 	enum xe_hw_engine_id id;
1884 	struct xe_device *xe;
1885 	u16 guc_class = GUC_LAST_ENGINE_CLASS + 1;
1886 	struct xe_devcoredump_snapshot *ss;
1887 
1888 	if (!q || !q->gt)
1889 		return NULL;
1890 
1891 	xe = gt_to_xe(q->gt);
1892 	if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe) || IS_SRIOV_VF(xe))
1893 		return NULL;
1894 
1895 	ss = &xe->devcoredump.snapshot;
1896 	if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC)
1897 		return ss->matched_node;
1898 
1899 	/* Find hwe for the queue */
1900 	for_each_hw_engine(hwe, q->gt, id) {
1901 		if (hwe != q->hwe)
1902 			continue;
1903 		guc_class = xe_engine_class_to_guc_class(hwe->class);
1904 		break;
1905 	}
1906 
1907 	if (guc_class <= GUC_LAST_ENGINE_CLASS) {
1908 		struct __guc_capture_parsed_output *n, *ntmp;
1909 		struct xe_guc *guc =  &q->gt->uc.guc;
1910 		u16 guc_id = q->guc->id;
1911 		u32 lrca = xe_lrc_ggtt_addr(q->lrc[0]);
1912 
1913 		/*
1914 		 * Look for a matching GuC reported error capture node from
1915 		 * the internal output link-list based on engine, guc id and
1916 		 * lrca info.
1917 		 */
1918 		list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
1919 			if (n->eng_class == guc_class && n->eng_inst == hwe->instance &&
1920 			    n->guc_id == guc_id && n->lrca == lrca &&
1921 			    n->source == XE_ENGINE_CAPTURE_SOURCE_GUC) {
1922 				n->locked = 1;
1923 				return n;
1924 			}
1925 		}
1926 	}
1927 	return NULL;
1928 }
1929 
1930 /**
1931  * xe_engine_snapshot_capture_for_queue - Take snapshot of associated engine
1932  * @q: The exec queue object
1933  *
1934  * Take snapshot of associated HW Engine
1935  *
1936  * Returns: None.
1937  */
1938 void
1939 xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q)
1940 {
1941 	struct xe_device *xe = gt_to_xe(q->gt);
1942 	struct xe_devcoredump *coredump = &xe->devcoredump;
1943 	struct xe_hw_engine *hwe;
1944 	enum xe_hw_engine_id id;
1945 	u32 adj_logical_mask = q->logical_mask;
1946 
1947 	if (IS_SRIOV_VF(xe))
1948 		return;
1949 
1950 	for_each_hw_engine(hwe, q->gt, id) {
1951 		if (hwe->class != q->hwe->class ||
1952 		    !(BIT(hwe->logical_instance) & adj_logical_mask)) {
1953 			coredump->snapshot.hwe[id] = NULL;
1954 			continue;
1955 		}
1956 
1957 		if (!coredump->snapshot.hwe[id]) {
1958 			coredump->snapshot.hwe[id] =
1959 				xe_hw_engine_snapshot_capture(hwe, q);
1960 		} else {
1961 			struct __guc_capture_parsed_output *new;
1962 
1963 			new = xe_guc_capture_get_matching_and_lock(q);
1964 			if (new) {
1965 				struct xe_guc *guc =  &q->gt->uc.guc;
1966 
1967 				/*
1968 				 * If we are in here, it means we found a fresh
1969 				 * GuC-err-capture node for this engine after
1970 				 * previously failing to find a match in the
1971 				 * early part of guc_exec_queue_timedout_job.
1972 				 * Thus we must free the manually captured node
1973 				 */
1974 				guc_capture_free_outlist_node(guc->capture,
1975 							      coredump->snapshot.matched_node);
1976 				coredump->snapshot.matched_node = new;
1977 			}
1978 		}
1979 
1980 		break;
1981 	}
1982 }
1983 
1984 /*
1985  * xe_guc_capture_put_matched_nodes - Cleanup matched nodes
1986  * @guc: The GuC object
1987  *
1988  * Free matched node and all nodes with the equal guc_id from
1989  * GuC captured outlist
1990  */
1991 void xe_guc_capture_put_matched_nodes(struct xe_guc *guc)
1992 {
1993 	struct xe_device *xe = guc_to_xe(guc);
1994 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1995 	struct __guc_capture_parsed_output *n = devcoredump->snapshot.matched_node;
1996 
1997 	if (n) {
1998 		guc_capture_remove_stale_matches_from_list(guc->capture, n);
1999 		guc_capture_free_outlist_node(guc->capture, n);
2000 		devcoredump->snapshot.matched_node = NULL;
2001 	}
2002 }
2003 
2004 /*
2005  * xe_guc_capture_steered_list_init - Init steering register list
2006  * @guc: The GuC object
2007  *
2008  * Init steering register list for GuC register capture, create pre-alloc node
2009  */
2010 void xe_guc_capture_steered_list_init(struct xe_guc *guc)
2011 {
2012 	/*
2013 	 * For certain engine classes, there are slice and subslice
2014 	 * level registers requiring steering. We allocate and populate
2015 	 * these based on hw config and add it as an extension list at
2016 	 * the end of the pre-populated render list.
2017 	 */
2018 	guc_capture_alloc_steered_lists(guc);
2019 	check_guc_capture_size(guc);
2020 	guc_capture_create_prealloc_nodes(guc);
2021 }
2022 
2023 /*
2024  * xe_guc_capture_init - Init for GuC register capture
2025  * @guc: The GuC object
2026  *
2027  * Init for GuC register capture, alloc memory for capture data structure.
2028  *
2029  * Returns: 0 if success.
2030  *	    -ENOMEM if out of memory
2031  */
2032 int xe_guc_capture_init(struct xe_guc *guc)
2033 {
2034 	guc->capture = drmm_kzalloc(guc_to_drm(guc), sizeof(*guc->capture), GFP_KERNEL);
2035 	if (!guc->capture)
2036 		return -ENOMEM;
2037 
2038 	guc->capture->reglists = guc_capture_get_device_reglist(guc_to_xe(guc));
2039 
2040 	INIT_LIST_HEAD(&guc->capture->outlist);
2041 	INIT_LIST_HEAD(&guc->capture->cachelist);
2042 
2043 	return 0;
2044 }
2045