xref: /linux/drivers/gpu/drm/xe/xe_guc_capture.c (revision 85502b2214d50ba0ddf2a5fb454e4d28a160d175)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021-2024 Intel Corporation
4  */
5 
6 #include <linux/types.h>
7 
8 #include <drm/drm_managed.h>
9 #include <drm/drm_print.h>
10 
11 #include "abi/guc_actions_abi.h"
12 #include "abi/guc_capture_abi.h"
13 #include "abi/guc_log_abi.h"
14 #include "regs/xe_engine_regs.h"
15 #include "regs/xe_gt_regs.h"
16 #include "regs/xe_guc_regs.h"
17 #include "regs/xe_regs.h"
18 
19 #include "xe_bo.h"
20 #include "xe_device.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_mcr.h"
24 #include "xe_gt_printk.h"
25 #include "xe_guc.h"
26 #include "xe_guc_ads.h"
27 #include "xe_guc_capture.h"
28 #include "xe_guc_capture_types.h"
29 #include "xe_guc_ct.h"
30 #include "xe_guc_exec_queue_types.h"
31 #include "xe_guc_log.h"
32 #include "xe_guc_submit_types.h"
33 #include "xe_guc_submit.h"
34 #include "xe_hw_engine_types.h"
35 #include "xe_hw_engine.h"
36 #include "xe_lrc.h"
37 #include "xe_macros.h"
38 #include "xe_map.h"
39 #include "xe_mmio.h"
40 #include "xe_sched_job.h"
41 
42 /*
43  * struct __guc_capture_bufstate
44  *
45  * Book-keeping structure used to track read and write pointers
46  * as we extract error capture data from the GuC-log-buffer's
47  * error-capture region as a stream of dwords.
48  */
49 struct __guc_capture_bufstate {
50 	u32 size;
51 	u32 data_offset;
52 	u32 rd;
53 	u32 wr;
54 };
55 
56 /*
57  * struct __guc_capture_parsed_output - extracted error capture node
58  *
59  * A single unit of extracted error-capture output data grouped together
60  * at an engine-instance level. We keep these nodes in a linked list.
61  * See cachelist and outlist below.
62  */
63 struct __guc_capture_parsed_output {
64 	/*
65 	 * A single set of 3 capture lists: a global-list
66 	 * an engine-class-list and an engine-instance list.
67 	 * outlist in __guc_capture_parsed_output will keep
68 	 * a linked list of these nodes that will eventually
69 	 * be detached from outlist and attached into to
70 	 * xe_codedump in response to a context reset
71 	 */
72 	struct list_head link;
73 	bool is_partial;
74 	u32 eng_class;
75 	u32 eng_inst;
76 	u32 guc_id;
77 	u32 lrca;
78 	u32 type;
79 	bool locked;
80 	enum xe_hw_engine_snapshot_source_id source;
81 	struct gcap_reg_list_info {
82 		u32 vfid;
83 		u32 num_regs;
84 		struct guc_mmio_reg *regs;
85 	} reginfo[GUC_STATE_CAPTURE_TYPE_MAX];
86 #define GCAP_PARSED_REGLIST_INDEX_GLOBAL   BIT(GUC_STATE_CAPTURE_TYPE_GLOBAL)
87 #define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS)
88 };
89 
90 /*
91  * Define all device tables of GuC error capture register lists
92  * NOTE:
93  *     For engine-registers, GuC only needs the register offsets
94  *     from the engine-mmio-base
95  *
96  *     64 bit registers need 2 entries for low 32 bit register and high 32 bit
97  *     register, for example:
98  *       Register           data_type       flags   mask    Register name
99  *     { XXX_REG_LO(0),  REG_64BIT_LOW_DW,    0,      0,      NULL},
100  *     { XXX_REG_HI(0),  REG_64BIT_HI_DW,,    0,      0,      "XXX_REG"},
101  *     1. data_type: Indicate is hi/low 32 bit for a 64 bit register
102  *                   A 64 bit register define requires 2 consecutive entries,
103  *                   with low dword first and hi dword the second.
104  *     2. Register name: null for incompleted define
105  *     3. Incorrect order will trigger XE_WARN.
106  */
107 #define COMMON_XELP_BASE_GLOBAL \
108 	{ FORCEWAKE_GT,			REG_32BIT,	0,	0,	0,	"FORCEWAKE_GT"}
109 
110 #define COMMON_BASE_ENGINE_INSTANCE \
111 	{ RING_HWSTAM(0),		REG_32BIT,	0,	0,	0,	"HWSTAM"}, \
112 	{ RING_HWS_PGA(0),		REG_32BIT,	0,	0,	0,	"RING_HWS_PGA"}, \
113 	{ RING_HEAD(0),			REG_32BIT,	0,	0,	0,	"RING_HEAD"}, \
114 	{ RING_TAIL(0),			REG_32BIT,	0,	0,	0,	"RING_TAIL"}, \
115 	{ RING_CTL(0),			REG_32BIT,	0,	0,	0,	"RING_CTL"}, \
116 	{ RING_MI_MODE(0),		REG_32BIT,	0,	0,	0,	"RING_MI_MODE"}, \
117 	{ RING_MODE(0),			REG_32BIT,	0,	0,	0,	"RING_MODE"}, \
118 	{ RING_ESR(0),			REG_32BIT,	0,	0,	0,	"RING_ESR"}, \
119 	{ RING_EMR(0),			REG_32BIT,	0,	0,	0,	"RING_EMR"}, \
120 	{ RING_EIR(0),			REG_32BIT,	0,	0,	0,	"RING_EIR"}, \
121 	{ RING_IMR(0),			REG_32BIT,	0,	0,	0,	"RING_IMR"}, \
122 	{ RING_IPEHR(0),		REG_32BIT,	0,	0,	0,	"IPEHR"}, \
123 	{ RING_INSTDONE(0),		REG_32BIT,	0,	0,	0,	"RING_INSTDONE"}, \
124 	{ INDIRECT_RING_STATE(0),	REG_32BIT,	0,	0,	0,	"INDIRECT_RING_STATE"}, \
125 	{ RING_ACTHD(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
126 	{ RING_ACTHD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"ACTHD"}, \
127 	{ RING_BBADDR(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
128 	{ RING_BBADDR_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_BBADDR"}, \
129 	{ RING_START(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
130 	{ RING_START_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_START"}, \
131 	{ RING_DMA_FADD(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
132 	{ RING_DMA_FADD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_DMA_FADD"}, \
133 	{ RING_EXECLIST_STATUS_LO(0),	REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
134 	{ RING_EXECLIST_STATUS_HI(0),	REG_64BIT_HI_DW, 0,	0,	0,	"RING_EXECLIST_STATUS"}, \
135 	{ RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
136 	{ RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0,	0,	0,	"RING_EXECLIST_SQ_CONTENTS"}
137 
138 #define COMMON_XELP_RC_CLASS \
139 	{ RCU_MODE,			REG_32BIT,	0,	0,	0,	"RCU_MODE"}
140 
141 #define COMMON_XELP_RC_CLASS_INSTDONE \
142 	{ SC_INSTDONE,			REG_32BIT,	0,	0,	0,	"SC_INSTDONE"}, \
143 	{ SC_INSTDONE_EXTRA,		REG_32BIT,	0,	0,	0,	"SC_INSTDONE_EXTRA"}, \
144 	{ SC_INSTDONE_EXTRA2,		REG_32BIT,	0,	0,	0,	"SC_INSTDONE_EXTRA2"}
145 
146 #define XELP_VEC_CLASS_REGS \
147 	{ SFC_DONE(0),			0,	0,	0,	0,	"SFC_DONE[0]"}, \
148 	{ SFC_DONE(1),			0,	0,	0,	0,	"SFC_DONE[1]"}, \
149 	{ SFC_DONE(2),			0,	0,	0,	0,	"SFC_DONE[2]"}, \
150 	{ SFC_DONE(3),			0,	0,	0,	0,	"SFC_DONE[3]"}
151 
152 /* XE_LP Global */
153 static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
154 	COMMON_XELP_BASE_GLOBAL,
155 };
156 
157 /* Render / Compute Per-Engine-Instance */
158 static const struct __guc_mmio_reg_descr xe_rc_inst_regs[] = {
159 	COMMON_BASE_ENGINE_INSTANCE,
160 };
161 
162 /* Render / Compute Engine-Class */
163 static const struct __guc_mmio_reg_descr xe_rc_class_regs[] = {
164 	COMMON_XELP_RC_CLASS,
165 	COMMON_XELP_RC_CLASS_INSTDONE,
166 };
167 
168 /* Render / Compute Engine-Class for xehpg */
169 static const struct __guc_mmio_reg_descr xe_hpg_rc_class_regs[] = {
170 	COMMON_XELP_RC_CLASS,
171 };
172 
173 /* Media Decode/Encode Per-Engine-Instance */
174 static const struct __guc_mmio_reg_descr xe_vd_inst_regs[] = {
175 	COMMON_BASE_ENGINE_INSTANCE,
176 };
177 
178 /* Video Enhancement Engine-Class */
179 static const struct __guc_mmio_reg_descr xe_vec_class_regs[] = {
180 	XELP_VEC_CLASS_REGS,
181 };
182 
183 /* Video Enhancement Per-Engine-Instance */
184 static const struct __guc_mmio_reg_descr xe_vec_inst_regs[] = {
185 	COMMON_BASE_ENGINE_INSTANCE,
186 };
187 
188 /* Blitter Per-Engine-Instance */
189 static const struct __guc_mmio_reg_descr xe_blt_inst_regs[] = {
190 	COMMON_BASE_ENGINE_INSTANCE,
191 };
192 
193 /* XE_LP - GSC Per-Engine-Instance */
194 static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = {
195 	COMMON_BASE_ENGINE_INSTANCE,
196 };
197 
198 /*
199  * Empty list to prevent warnings about unknown class/instance types
200  * as not all class/instance types have entries on all platforms.
201  */
202 static const struct __guc_mmio_reg_descr empty_regs_list[] = {
203 };
204 
205 #define TO_GCAP_DEF_OWNER(x) (GUC_CAPTURE_LIST_INDEX_##x)
206 #define TO_GCAP_DEF_TYPE(x) (GUC_STATE_CAPTURE_TYPE_##x)
207 #define MAKE_REGLIST(regslist, regsowner, regstype, class) \
208 	{ \
209 		regslist, \
210 		ARRAY_SIZE(regslist), \
211 		TO_GCAP_DEF_OWNER(regsowner), \
212 		TO_GCAP_DEF_TYPE(regstype), \
213 		class \
214 	}
215 
216 /* List of lists for legacy graphic product version < 1255 */
217 static const struct __guc_mmio_reg_descr_group xe_lp_lists[] = {
218 	MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
219 	MAKE_REGLIST(xe_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
220 	MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
221 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
222 	MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
223 	MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
224 	MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
225 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
226 	MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
227 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
228 	MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
229 	{}
230 };
231 
232  /* List of lists for graphic product version >= 1255 */
233 static const struct __guc_mmio_reg_descr_group xe_hpg_lists[] = {
234 	MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
235 	MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
236 	MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
237 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
238 	MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
239 	MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
240 	MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
241 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
242 	MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
243 	MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
244 	MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
245 	{}
246 };
247 
248 static const char * const capture_list_type_names[] = {
249 	"Global",
250 	"Class",
251 	"Instance",
252 };
253 
254 static const char * const capture_engine_class_names[] = {
255 	"Render/Compute",
256 	"Video",
257 	"VideoEnhance",
258 	"Blitter",
259 	"GSC-Other",
260 };
261 
262 struct __guc_capture_ads_cache {
263 	bool is_valid;
264 	void *ptr;
265 	size_t size;
266 	int status;
267 };
268 
269 struct xe_guc_state_capture {
270 	const struct __guc_mmio_reg_descr_group *reglists;
271 	/**
272 	 * NOTE: steered registers have multiple instances depending on the HW configuration
273 	 * (slices or dual-sub-slices) and thus depends on HW fuses discovered
274 	 */
275 	struct __guc_mmio_reg_descr_group *extlists;
276 	struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
277 						[GUC_STATE_CAPTURE_TYPE_MAX]
278 						[GUC_CAPTURE_LIST_CLASS_MAX];
279 	void *ads_null_cache;
280 	struct list_head cachelist;
281 #define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
282 #define PREALLOC_NODES_DEFAULT_NUMREGS 64
283 
284 	int max_mmio_per_node;
285 	struct list_head outlist;
286 };
287 
288 static void
289 guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
290 					   struct __guc_capture_parsed_output *node);
291 
292 static const struct __guc_mmio_reg_descr_group *
guc_capture_get_device_reglist(struct xe_device * xe)293 guc_capture_get_device_reglist(struct xe_device *xe)
294 {
295 	if (GRAPHICS_VERx100(xe) >= 1255)
296 		return xe_hpg_lists;
297 	else
298 		return xe_lp_lists;
299 }
300 
301 static const struct __guc_mmio_reg_descr_group *
guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group * reglists,u32 owner,u32 type,enum guc_capture_list_class_type capture_class)302 guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists,
303 			 u32 owner, u32 type, enum guc_capture_list_class_type capture_class)
304 {
305 	int i;
306 
307 	if (!reglists)
308 		return NULL;
309 
310 	for (i = 0; reglists[i].list; ++i) {
311 		if (reglists[i].owner == owner && reglists[i].type == type &&
312 		    (reglists[i].engine == capture_class ||
313 		     reglists[i].type == GUC_STATE_CAPTURE_TYPE_GLOBAL))
314 			return &reglists[i];
315 	}
316 
317 	return NULL;
318 }
319 
320 const struct __guc_mmio_reg_descr_group *
xe_guc_capture_get_reg_desc_list(struct xe_gt * gt,u32 owner,u32 type,enum guc_capture_list_class_type capture_class,bool is_ext)321 xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
322 				 enum guc_capture_list_class_type capture_class, bool is_ext)
323 {
324 	const struct __guc_mmio_reg_descr_group *reglists;
325 
326 	if (is_ext) {
327 		struct xe_guc *guc = &gt->uc.guc;
328 
329 		reglists = guc->capture->extlists;
330 	} else {
331 		reglists = guc_capture_get_device_reglist(gt_to_xe(gt));
332 	}
333 	return guc_capture_get_one_list(reglists, owner, type, capture_class);
334 }
335 
336 struct __ext_steer_reg {
337 	const char *name;
338 	struct xe_reg_mcr reg;
339 };
340 
341 static const struct __ext_steer_reg xe_extregs[] = {
342 	{"SAMPLER_INSTDONE",		SAMPLER_INSTDONE},
343 	{"ROW_INSTDONE",		ROW_INSTDONE}
344 };
345 
346 static const struct __ext_steer_reg xehpg_extregs[] = {
347 	{"SC_INSTDONE",			XEHPG_SC_INSTDONE},
348 	{"SC_INSTDONE_EXTRA",		XEHPG_SC_INSTDONE_EXTRA},
349 	{"SC_INSTDONE_EXTRA2",		XEHPG_SC_INSTDONE_EXTRA2},
350 	{"INSTDONE_GEOM_SVGUNIT",	XEHPG_INSTDONE_GEOM_SVGUNIT}
351 };
352 
__fill_ext_reg(struct __guc_mmio_reg_descr * ext,const struct __ext_steer_reg * extlist,u32 dss_id,u16 slice_id,u16 subslice_id)353 static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
354 			   const struct __ext_steer_reg *extlist,
355 			   u32 dss_id, u16 slice_id, u16 subslice_id)
356 {
357 	if (!ext || !extlist)
358 		return;
359 
360 	ext->reg = XE_REG(extlist->reg.__reg.addr);
361 	ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
362 	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
363 	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
364 	ext->dss_id = dss_id;
365 	ext->regname = extlist->name;
366 }
367 
368 static int
__alloc_ext_regs(struct drm_device * drm,struct __guc_mmio_reg_descr_group * newlist,const struct __guc_mmio_reg_descr_group * rootlist,int num_regs)369 __alloc_ext_regs(struct drm_device *drm, struct __guc_mmio_reg_descr_group *newlist,
370 		 const struct __guc_mmio_reg_descr_group *rootlist, int num_regs)
371 {
372 	struct __guc_mmio_reg_descr *list;
373 
374 	list = drmm_kzalloc(drm, num_regs * sizeof(struct __guc_mmio_reg_descr), GFP_KERNEL);
375 	if (!list)
376 		return -ENOMEM;
377 
378 	newlist->list = list;
379 	newlist->num_regs = num_regs;
380 	newlist->owner = rootlist->owner;
381 	newlist->engine = rootlist->engine;
382 	newlist->type = rootlist->type;
383 
384 	return 0;
385 }
386 
guc_capture_get_steer_reg_num(struct xe_device * xe)387 static int guc_capture_get_steer_reg_num(struct xe_device *xe)
388 {
389 	int num = ARRAY_SIZE(xe_extregs);
390 
391 	if (GRAPHICS_VERx100(xe) >= 1255)
392 		num += ARRAY_SIZE(xehpg_extregs);
393 
394 	return num;
395 }
396 
guc_capture_alloc_steered_lists(struct xe_guc * guc)397 static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
398 {
399 	struct xe_gt *gt = guc_to_gt(guc);
400 	u16 slice, subslice;
401 	int dss, i, total = 0;
402 	const struct __guc_mmio_reg_descr_group *lists = guc->capture->reglists;
403 	const struct __guc_mmio_reg_descr_group *list;
404 	struct __guc_mmio_reg_descr_group *extlists;
405 	struct __guc_mmio_reg_descr *extarray;
406 	bool has_xehpg_extregs = GRAPHICS_VERx100(gt_to_xe(gt)) >= 1255;
407 	struct drm_device *drm = &gt_to_xe(gt)->drm;
408 	bool has_rcs_ccs = false;
409 	struct xe_hw_engine *hwe;
410 	enum xe_hw_engine_id id;
411 
412 	/*
413 	 * If GT has no rcs/ccs, no need to alloc steered list.
414 	 * Currently, only rcs/ccs has steering register, if in the future,
415 	 * other engine types has steering register, this condition check need
416 	 * to be extended
417 	 */
418 	for_each_hw_engine(hwe, gt, id) {
419 		if (xe_engine_class_to_guc_capture_class(hwe->class) ==
420 		    GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
421 			has_rcs_ccs = true;
422 			break;
423 		}
424 	}
425 
426 	if (!has_rcs_ccs)
427 		return;
428 
429 	/* steered registers currently only exist for the render-class */
430 	list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
431 					GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
432 					GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE);
433 	/*
434 	 * Skip if this platform has no engine class registers or if extlists
435 	 * was previously allocated
436 	 */
437 	if (!list || guc->capture->extlists)
438 		return;
439 
440 	total = bitmap_weight(gt->fuse_topo.g_dss_mask, sizeof(gt->fuse_topo.g_dss_mask) * 8) *
441 		guc_capture_get_steer_reg_num(guc_to_xe(guc));
442 
443 	if (!total)
444 		return;
445 
446 	/* allocate an extra for an end marker */
447 	extlists = drmm_kzalloc(drm, 2 * sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
448 	if (!extlists)
449 		return;
450 
451 	if (__alloc_ext_regs(drm, &extlists[0], list, total)) {
452 		drmm_kfree(drm, extlists);
453 		return;
454 	}
455 
456 	/* For steering registers, the list is generated at run-time */
457 	extarray = (struct __guc_mmio_reg_descr *)extlists[0].list;
458 	for_each_dss_steering(dss, gt, slice, subslice) {
459 		for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
460 			__fill_ext_reg(extarray, &xe_extregs[i], dss, slice, subslice);
461 			++extarray;
462 		}
463 
464 		if (has_xehpg_extregs)
465 			for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
466 				__fill_ext_reg(extarray, &xehpg_extregs[i], dss, slice, subslice);
467 				++extarray;
468 			}
469 	}
470 
471 	extlists[0].num_regs = total;
472 
473 	xe_gt_dbg(guc_to_gt(guc), "capture found %d ext-regs.\n", total);
474 	guc->capture->extlists = extlists;
475 }
476 
477 static int
guc_capture_list_init(struct xe_guc * guc,u32 owner,u32 type,enum guc_capture_list_class_type capture_class,struct guc_mmio_reg * ptr,u16 num_entries)478 guc_capture_list_init(struct xe_guc *guc, u32 owner, u32 type,
479 		      enum guc_capture_list_class_type capture_class, struct guc_mmio_reg *ptr,
480 		      u16 num_entries)
481 {
482 	u32 ptr_idx = 0, list_idx = 0;
483 	const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
484 	struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
485 	const struct __guc_mmio_reg_descr_group *match;
486 	u32 list_num;
487 
488 	if (!reglists)
489 		return -ENODEV;
490 
491 	match = guc_capture_get_one_list(reglists, owner, type, capture_class);
492 	if (!match)
493 		return -ENODATA;
494 
495 	list_num = match->num_regs;
496 	for (list_idx = 0; ptr_idx < num_entries && list_idx < list_num; ++list_idx, ++ptr_idx) {
497 		ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
498 		ptr[ptr_idx].value = 0xDEADF00D;
499 		ptr[ptr_idx].flags = match->list[list_idx].flags;
500 		ptr[ptr_idx].mask = match->list[list_idx].mask;
501 	}
502 
503 	match = guc_capture_get_one_list(extlists, owner, type, capture_class);
504 	if (match)
505 		for (ptr_idx = list_num, list_idx = 0;
506 		     ptr_idx < num_entries && list_idx < match->num_regs;
507 		     ++ptr_idx, ++list_idx) {
508 			ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
509 			ptr[ptr_idx].value = 0xDEADF00D;
510 			ptr[ptr_idx].flags = match->list[list_idx].flags;
511 			ptr[ptr_idx].mask = match->list[list_idx].mask;
512 		}
513 
514 	if (ptr_idx < num_entries)
515 		xe_gt_dbg(guc_to_gt(guc), "Got short capture reglist init: %d out-of %d.\n",
516 			  ptr_idx, num_entries);
517 
518 	return 0;
519 }
520 
521 static int
guc_cap_list_num_regs(struct xe_guc * guc,u32 owner,u32 type,enum guc_capture_list_class_type capture_class)522 guc_cap_list_num_regs(struct xe_guc *guc, u32 owner, u32 type,
523 		      enum guc_capture_list_class_type capture_class)
524 {
525 	const struct __guc_mmio_reg_descr_group *match;
526 	int num_regs = 0;
527 
528 	match = guc_capture_get_one_list(guc->capture->reglists, owner, type, capture_class);
529 	if (match)
530 		num_regs = match->num_regs;
531 
532 	match = guc_capture_get_one_list(guc->capture->extlists, owner, type, capture_class);
533 	if (match)
534 		num_regs += match->num_regs;
535 	else
536 		/*
537 		 * If a caller wants the full register dump size but we have
538 		 * not yet got the hw-config, which is before max_mmio_per_node
539 		 * is initialized, then provide a worst-case number for
540 		 * extlists based on max dss fuse bits, but only ever for
541 		 * render/compute
542 		 */
543 		if (owner == GUC_CAPTURE_LIST_INDEX_PF &&
544 		    type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
545 		    capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE &&
546 		    !guc->capture->max_mmio_per_node)
547 			num_regs += guc_capture_get_steer_reg_num(guc_to_xe(guc)) *
548 				    XE_MAX_DSS_FUSE_BITS;
549 
550 	return num_regs;
551 }
552 
553 static int
guc_capture_getlistsize(struct xe_guc * guc,u32 owner,u32 type,enum guc_capture_list_class_type capture_class,size_t * size,bool is_purpose_est)554 guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
555 			enum guc_capture_list_class_type capture_class,
556 			size_t *size, bool is_purpose_est)
557 {
558 	struct xe_guc_state_capture *gc = guc->capture;
559 	struct xe_gt *gt = guc_to_gt(guc);
560 	struct __guc_capture_ads_cache *cache;
561 	int num_regs;
562 
563 	xe_gt_assert(gt, type < GUC_STATE_CAPTURE_TYPE_MAX);
564 	xe_gt_assert(gt, capture_class < GUC_CAPTURE_LIST_CLASS_MAX);
565 
566 	cache = &gc->ads_cache[owner][type][capture_class];
567 	if (!gc->reglists) {
568 		xe_gt_warn(gt, "No capture reglist for this device\n");
569 		return -ENODEV;
570 	}
571 
572 	if (cache->is_valid) {
573 		*size = cache->size;
574 		return cache->status;
575 	}
576 
577 	if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
578 	    !guc_capture_get_one_list(gc->reglists, owner, type, capture_class)) {
579 		if (type == GUC_STATE_CAPTURE_TYPE_GLOBAL)
580 			xe_gt_warn(gt, "Missing capture reglist: global!\n");
581 		else
582 			xe_gt_warn(gt, "Missing capture reglist: %s(%u):%s(%u)!\n",
583 				   capture_list_type_names[type], type,
584 				   capture_engine_class_names[capture_class], capture_class);
585 		return -ENODEV;
586 	}
587 
588 	num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
589 	/* intentional empty lists can exist depending on hw config */
590 	if (!num_regs)
591 		return -ENODATA;
592 
593 	if (size)
594 		*size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
595 				   (num_regs * sizeof(struct guc_mmio_reg)));
596 
597 	return 0;
598 }
599 
600 /**
601  * xe_guc_capture_getlistsize - Get list size for owner/type/class combination
602  * @guc: The GuC object
603  * @owner: PF/VF owner
604  * @type: GuC capture register type
605  * @capture_class: GuC capture engine class id
606  * @size: Point to the size
607  *
608  * This function will get the list for the owner/type/class combination, and
609  * return the page aligned list size.
610  *
611  * Returns: 0 on success or a negative error code on failure.
612  */
613 int
xe_guc_capture_getlistsize(struct xe_guc * guc,u32 owner,u32 type,enum guc_capture_list_class_type capture_class,size_t * size)614 xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
615 			   enum guc_capture_list_class_type capture_class, size_t *size)
616 {
617 	return guc_capture_getlistsize(guc, owner, type, capture_class, size, false);
618 }
619 
620 /**
621  * xe_guc_capture_getlist - Get register capture list for owner/type/class
622  * combination
623  * @guc:	The GuC object
624  * @owner:	PF/VF owner
625  * @type:	GuC capture register type
626  * @capture_class:	GuC capture engine class id
627  * @outptr:	Point to cached register capture list
628  *
629  * This function will get the register capture list for the owner/type/class
630  * combination.
631  *
632  * Returns: 0 on success or a negative error code on failure.
633  */
634 int
xe_guc_capture_getlist(struct xe_guc * guc,u32 owner,u32 type,enum guc_capture_list_class_type capture_class,void ** outptr)635 xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type,
636 		       enum guc_capture_list_class_type capture_class, void **outptr)
637 {
638 	struct xe_guc_state_capture *gc = guc->capture;
639 	struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][capture_class];
640 	struct guc_debug_capture_list *listnode;
641 	int ret, num_regs;
642 	u8 *caplist, *tmp;
643 	size_t size = 0;
644 
645 	if (!gc->reglists)
646 		return -ENODEV;
647 
648 	if (cache->is_valid) {
649 		*outptr = cache->ptr;
650 		return cache->status;
651 	}
652 
653 	ret = xe_guc_capture_getlistsize(guc, owner, type, capture_class, &size);
654 	if (ret) {
655 		cache->is_valid = true;
656 		cache->ptr = NULL;
657 		cache->size = 0;
658 		cache->status = ret;
659 		return ret;
660 	}
661 
662 	caplist = drmm_kzalloc(guc_to_drm(guc), size, GFP_KERNEL);
663 	if (!caplist)
664 		return -ENOMEM;
665 
666 	/* populate capture list header */
667 	tmp = caplist;
668 	num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
669 	listnode = (struct guc_debug_capture_list *)tmp;
670 	listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, (u32)num_regs);
671 
672 	/* populate list of register descriptor */
673 	tmp += sizeof(struct guc_debug_capture_list);
674 	guc_capture_list_init(guc, owner, type, capture_class,
675 			      (struct guc_mmio_reg *)tmp, num_regs);
676 
677 	/* cache this list */
678 	cache->is_valid = true;
679 	cache->ptr = caplist;
680 	cache->size = size;
681 	cache->status = 0;
682 
683 	*outptr = caplist;
684 
685 	return 0;
686 }
687 
688 /**
689  * xe_guc_capture_getnullheader - Get a null list for register capture
690  * @guc:	The GuC object
691  * @outptr:	Point to cached register capture list
692  * @size:	Point to the size
693  *
694  * This function will alloc for a null list for register capture.
695  *
696  * Returns: 0 on success or a negative error code on failure.
697  */
698 int
xe_guc_capture_getnullheader(struct xe_guc * guc,void ** outptr,size_t * size)699 xe_guc_capture_getnullheader(struct xe_guc *guc, void **outptr, size_t *size)
700 {
701 	struct xe_guc_state_capture *gc = guc->capture;
702 	int tmp = sizeof(u32) * 4;
703 	void *null_header;
704 
705 	if (gc->ads_null_cache) {
706 		*outptr = gc->ads_null_cache;
707 		*size = tmp;
708 		return 0;
709 	}
710 
711 	null_header = drmm_kzalloc(guc_to_drm(guc), tmp, GFP_KERNEL);
712 	if (!null_header)
713 		return -ENOMEM;
714 
715 	gc->ads_null_cache = null_header;
716 	*outptr = null_header;
717 	*size = tmp;
718 
719 	return 0;
720 }
721 
722 /**
723  * xe_guc_capture_ads_input_worst_size - Calculate the worst size for GuC register capture
724  * @guc: point to xe_guc structure
725  *
726  * Calculate the worst size for GuC register capture by including all possible engines classes.
727  *
728  * Returns: Calculated size
729  */
xe_guc_capture_ads_input_worst_size(struct xe_guc * guc)730 size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc)
731 {
732 	size_t total_size, class_size, instance_size, global_size;
733 	int i, j;
734 
735 	/*
736 	 * This function calculates the worst case register lists size by
737 	 * including all possible engines classes. It is called during the
738 	 * first of a two-phase GuC (and ADS-population) initialization
739 	 * sequence, that is, during the pre-hwconfig phase before we have
740 	 * the exact engine fusing info.
741 	 */
742 	total_size = PAGE_SIZE;	/* Pad a page in front for empty lists */
743 	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
744 		for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
745 			if (xe_guc_capture_getlistsize(guc, i,
746 						       GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
747 						       j, &class_size) < 0)
748 				class_size = 0;
749 			if (xe_guc_capture_getlistsize(guc, i,
750 						       GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
751 						       j, &instance_size) < 0)
752 				instance_size = 0;
753 			total_size += class_size + instance_size;
754 		}
755 		if (xe_guc_capture_getlistsize(guc, i,
756 					       GUC_STATE_CAPTURE_TYPE_GLOBAL,
757 					       0, &global_size) < 0)
758 			global_size = 0;
759 		total_size += global_size;
760 	}
761 
762 	return PAGE_ALIGN(total_size);
763 }
764 
guc_capture_output_size_est(struct xe_guc * guc)765 static int guc_capture_output_size_est(struct xe_guc *guc)
766 {
767 	struct xe_gt *gt = guc_to_gt(guc);
768 	struct xe_hw_engine *hwe;
769 	enum xe_hw_engine_id id;
770 
771 	int capture_size = 0;
772 	size_t tmp = 0;
773 
774 	if (!guc->capture)
775 		return -ENODEV;
776 
777 	/*
778 	 * If every single engine-instance suffered a failure in quick succession but
779 	 * were all unrelated, then a burst of multiple error-capture events would dump
780 	 * registers for every one engine instance, one at a time. In this case, GuC
781 	 * would even dump the global-registers repeatedly.
782 	 *
783 	 * For each engine instance, there would be 1 x guc_state_capture_group_t output
784 	 * followed by 3 x guc_state_capture_t lists. The latter is how the register
785 	 * dumps are split across different register types (where the '3' are global vs class
786 	 * vs instance).
787 	 */
788 	for_each_hw_engine(hwe, gt, id) {
789 		enum guc_capture_list_class_type capture_class;
790 
791 		capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
792 		capture_size += sizeof(struct guc_state_capture_group_header_t) +
793 					 (3 * sizeof(struct guc_state_capture_header_t));
794 
795 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_GLOBAL,
796 					     0, &tmp, true))
797 			capture_size += tmp;
798 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
799 					     capture_class, &tmp, true))
800 			capture_size += tmp;
801 		if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
802 					     capture_class, &tmp, true))
803 			capture_size += tmp;
804 	}
805 
806 	return capture_size;
807 }
808 
809 /*
810  * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
811  * before the Xe can read the data out and process it
812  */
813 #define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
814 
check_guc_capture_size(struct xe_guc * guc)815 static void check_guc_capture_size(struct xe_guc *guc)
816 {
817 	int capture_size = guc_capture_output_size_est(guc);
818 	int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
819 	u32 buffer_size = xe_guc_log_section_size_capture(&guc->log);
820 
821 	/*
822 	 * NOTE: capture_size is much smaller than the capture region
823 	 * allocation (DG2: <80K vs 1MB).
824 	 * Additionally, its based on space needed to fit all engines getting
825 	 * reset at once within the same G2H handler task slot. This is very
826 	 * unlikely. However, if GuC really does run out of space for whatever
827 	 * reason, we will see an separate warning message when processing the
828 	 * G2H event capture-notification, search for:
829 	 * xe_guc_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
830 	 */
831 	if (capture_size < 0)
832 		xe_gt_dbg(guc_to_gt(guc),
833 			  "Failed to calculate error state capture buffer minimum size: %d!\n",
834 			  capture_size);
835 	if (capture_size > buffer_size)
836 		xe_gt_dbg(guc_to_gt(guc), "Error state capture buffer maybe small: %d < %d\n",
837 			  buffer_size, capture_size);
838 	else if (spare_size > buffer_size)
839 		xe_gt_dbg(guc_to_gt(guc),
840 			  "Error state capture buffer lacks spare size: %d < %d (min = %d)\n",
841 			  buffer_size, spare_size, capture_size);
842 }
843 
844 static void
guc_capture_add_node_to_list(struct __guc_capture_parsed_output * node,struct list_head * list)845 guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node,
846 			     struct list_head *list)
847 {
848 	list_add(&node->link, list);
849 }
850 
851 static void
guc_capture_add_node_to_outlist(struct xe_guc_state_capture * gc,struct __guc_capture_parsed_output * node)852 guc_capture_add_node_to_outlist(struct xe_guc_state_capture *gc,
853 				struct __guc_capture_parsed_output *node)
854 {
855 	guc_capture_remove_stale_matches_from_list(gc, node);
856 	guc_capture_add_node_to_list(node, &gc->outlist);
857 }
858 
859 static void
guc_capture_add_node_to_cachelist(struct xe_guc_state_capture * gc,struct __guc_capture_parsed_output * node)860 guc_capture_add_node_to_cachelist(struct xe_guc_state_capture *gc,
861 				  struct __guc_capture_parsed_output *node)
862 {
863 	guc_capture_add_node_to_list(node, &gc->cachelist);
864 }
865 
866 static void
guc_capture_free_outlist_node(struct xe_guc_state_capture * gc,struct __guc_capture_parsed_output * n)867 guc_capture_free_outlist_node(struct xe_guc_state_capture *gc,
868 			      struct __guc_capture_parsed_output *n)
869 {
870 	if (n) {
871 		n->locked = 0;
872 		list_del(&n->link);
873 		/* put node back to cache list */
874 		guc_capture_add_node_to_cachelist(gc, n);
875 	}
876 }
877 
878 static void
guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture * gc,struct __guc_capture_parsed_output * node)879 guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
880 					   struct __guc_capture_parsed_output *node)
881 {
882 	struct __guc_capture_parsed_output *n, *ntmp;
883 	int guc_id = node->guc_id;
884 
885 	list_for_each_entry_safe(n, ntmp, &gc->outlist, link) {
886 		if (n != node && !n->locked && n->guc_id == guc_id)
887 			guc_capture_free_outlist_node(gc, n);
888 	}
889 }
890 
891 static void
guc_capture_init_node(struct xe_guc * guc,struct __guc_capture_parsed_output * node)892 guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *node)
893 {
894 	struct guc_mmio_reg *tmp[GUC_STATE_CAPTURE_TYPE_MAX];
895 	int i;
896 
897 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
898 		tmp[i] = node->reginfo[i].regs;
899 		memset(tmp[i], 0, sizeof(struct guc_mmio_reg) *
900 		       guc->capture->max_mmio_per_node);
901 	}
902 	memset(node, 0, sizeof(*node));
903 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i)
904 		node->reginfo[i].regs = tmp[i];
905 
906 	INIT_LIST_HEAD(&node->link);
907 }
908 
909 /**
910  * DOC: Init, G2H-event and reporting flows for GuC-error-capture
911  *
912  * KMD Init time flows:
913  * --------------------
914  *     --> alloc A: GuC input capture regs lists (registered to GuC via ADS).
915  *                  xe_guc_ads acquires the register lists by calling
916  *                  xe_guc_capture_getlistsize and xe_guc_capture_getlist 'n' times,
917  *                  where n = 1 for global-reg-list +
918  *                            num_engine_classes for class-reg-list +
919  *                            num_engine_classes for instance-reg-list
920  *                               (since all instances of the same engine-class type
921  *                                have an identical engine-instance register-list).
922  *                  ADS module also calls separately for PF vs VF.
923  *
924  *     --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
925  *                  Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small)
926  *                  Note2: 'x 3' to hold multiple capture groups
927  *
928  * GUC Runtime notify capture:
929  * --------------------------
930  *     --> G2H STATE_CAPTURE_NOTIFICATION
931  *                   L--> xe_guc_capture_process
932  *                           L--> Loop through B (head..tail) and for each engine instance's
933  *                                err-state-captured register-list we find, we alloc 'C':
934  *      --> alloc C: A capture-output-node structure that includes misc capture info along
935  *                   with 3 register list dumps (global, engine-class and engine-instance)
936  *                   This node is created from a pre-allocated list of blank nodes in
937  *                   guc->capture->cachelist and populated with the error-capture
938  *                   data from GuC and then it's added into guc->capture->outlist linked
939  *                   list. This list is used for matchup and printout by xe_devcoredump_read
940  *                   and xe_engine_snapshot_print, (when user invokes the devcoredump sysfs).
941  *
942  * GUC --> notify context reset:
943  * -----------------------------
944  *     --> guc_exec_queue_timedout_job
945  *                   L--> xe_devcoredump
946  *                          L--> devcoredump_snapshot
947  *                               --> xe_hw_engine_snapshot_capture
948  *                               --> xe_engine_manual_capture(For manual capture)
949  *
950  * User Sysfs / Debugfs
951  * --------------------
952  *      --> xe_devcoredump_read->
953  *             L--> xxx_snapshot_print
954  *                    L--> xe_engine_snapshot_print
955  *                         Print register lists values saved at
956  *                         guc->capture->outlist
957  *
958  */
959 
guc_capture_buf_cnt(struct __guc_capture_bufstate * buf)960 static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
961 {
962 	if (buf->wr >= buf->rd)
963 		return (buf->wr - buf->rd);
964 	return (buf->size - buf->rd) + buf->wr;
965 }
966 
guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate * buf)967 static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf)
968 {
969 	if (buf->rd > buf->wr)
970 		return (buf->size - buf->rd);
971 	return (buf->wr - buf->rd);
972 }
973 
974 /*
975  * GuC's error-capture output is a ring buffer populated in a byte-stream fashion:
976  *
977  * The GuC Log buffer region for error-capture is managed like a ring buffer.
978  * The GuC firmware dumps error capture logs into this ring in a byte-stream flow.
979  * Additionally, as per the current and foreseeable future, all packed error-
980  * capture output structures are dword aligned.
981  *
982  * That said, if the GuC firmware is in the midst of writing a structure that is larger
983  * than one dword but the tail end of the err-capture buffer-region has lesser space left,
984  * we would need to extract that structure one dword at a time straddled across the end,
985  * onto the start of the ring.
986  *
987  * Below function, guc_capture_log_remove_bytes is a helper for that. All callers of this
988  * function would typically do a straight-up memcpy from the ring contents and will only
989  * call this helper if their structure-extraction is straddling across the end of the
990  * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease
991  * scalability for future expansion of output data types without requiring a redesign
992  * of the flow controls.
993  */
994 static int
guc_capture_log_remove_bytes(struct xe_guc * guc,struct __guc_capture_bufstate * buf,void * out,int bytes_needed)995 guc_capture_log_remove_bytes(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
996 			     void *out, int bytes_needed)
997 {
998 #define GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX	3
999 
1000 	int fill_size = 0, tries = GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX;
1001 	int copy_size, avail;
1002 
1003 	xe_assert(guc_to_xe(guc), bytes_needed % sizeof(u32) == 0);
1004 
1005 	if (bytes_needed > guc_capture_buf_cnt(buf))
1006 		return -1;
1007 
1008 	while (bytes_needed > 0 && tries--) {
1009 		int misaligned;
1010 
1011 		avail = guc_capture_buf_cnt_to_end(buf);
1012 		misaligned = avail % sizeof(u32);
1013 		/* wrap if at end */
1014 		if (!avail) {
1015 			/* output stream clipped */
1016 			if (!buf->rd)
1017 				return fill_size;
1018 			buf->rd = 0;
1019 			continue;
1020 		}
1021 
1022 		/* Only copy to u32 aligned data */
1023 		copy_size = avail < bytes_needed ? avail - misaligned : bytes_needed;
1024 		xe_map_memcpy_from(guc_to_xe(guc), out + fill_size, &guc->log.bo->vmap,
1025 				   buf->data_offset + buf->rd, copy_size);
1026 		buf->rd += copy_size;
1027 		fill_size += copy_size;
1028 		bytes_needed -= copy_size;
1029 
1030 		if (misaligned)
1031 			xe_gt_warn(guc_to_gt(guc),
1032 				   "Bytes extraction not dword aligned, clipping.\n");
1033 	}
1034 
1035 	return fill_size;
1036 }
1037 
1038 static int
guc_capture_log_get_group_hdr(struct xe_guc * guc,struct __guc_capture_bufstate * buf,struct guc_state_capture_group_header_t * ghdr)1039 guc_capture_log_get_group_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1040 			      struct guc_state_capture_group_header_t *ghdr)
1041 {
1042 	int fullsize = sizeof(struct guc_state_capture_group_header_t);
1043 
1044 	if (guc_capture_log_remove_bytes(guc, buf, ghdr, fullsize) != fullsize)
1045 		return -1;
1046 	return 0;
1047 }
1048 
1049 static int
guc_capture_log_get_data_hdr(struct xe_guc * guc,struct __guc_capture_bufstate * buf,struct guc_state_capture_header_t * hdr)1050 guc_capture_log_get_data_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1051 			     struct guc_state_capture_header_t *hdr)
1052 {
1053 	int fullsize = sizeof(struct guc_state_capture_header_t);
1054 
1055 	if (guc_capture_log_remove_bytes(guc, buf, hdr, fullsize) != fullsize)
1056 		return -1;
1057 	return 0;
1058 }
1059 
1060 static int
guc_capture_log_get_register(struct xe_guc * guc,struct __guc_capture_bufstate * buf,struct guc_mmio_reg * reg)1061 guc_capture_log_get_register(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
1062 			     struct guc_mmio_reg *reg)
1063 {
1064 	int fullsize = sizeof(struct guc_mmio_reg);
1065 
1066 	if (guc_capture_log_remove_bytes(guc, buf, reg, fullsize) != fullsize)
1067 		return -1;
1068 	return 0;
1069 }
1070 
1071 static struct __guc_capture_parsed_output *
guc_capture_get_prealloc_node(struct xe_guc * guc)1072 guc_capture_get_prealloc_node(struct xe_guc *guc)
1073 {
1074 	struct __guc_capture_parsed_output *found = NULL;
1075 
1076 	if (!list_empty(&guc->capture->cachelist)) {
1077 		struct __guc_capture_parsed_output *n, *ntmp;
1078 
1079 		/* get first avail node from the cache list */
1080 		list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) {
1081 			found = n;
1082 			break;
1083 		}
1084 	} else {
1085 		struct __guc_capture_parsed_output *n, *ntmp;
1086 
1087 		/*
1088 		 * traverse reversed and steal back the oldest node already
1089 		 * allocated
1090 		 */
1091 		list_for_each_entry_safe_reverse(n, ntmp, &guc->capture->outlist, link) {
1092 			if (!n->locked)
1093 				found = n;
1094 		}
1095 	}
1096 	if (found) {
1097 		list_del(&found->link);
1098 		guc_capture_init_node(guc, found);
1099 	}
1100 
1101 	return found;
1102 }
1103 
1104 static struct __guc_capture_parsed_output *
guc_capture_clone_node(struct xe_guc * guc,struct __guc_capture_parsed_output * original,u32 keep_reglist_mask)1105 guc_capture_clone_node(struct xe_guc *guc, struct __guc_capture_parsed_output *original,
1106 		       u32 keep_reglist_mask)
1107 {
1108 	struct __guc_capture_parsed_output *new;
1109 	int i;
1110 
1111 	new = guc_capture_get_prealloc_node(guc);
1112 	if (!new)
1113 		return NULL;
1114 	if (!original)
1115 		return new;
1116 
1117 	new->is_partial = original->is_partial;
1118 
1119 	/* copy reg-lists that we want to clone */
1120 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1121 		if (keep_reglist_mask & BIT(i)) {
1122 			XE_WARN_ON(original->reginfo[i].num_regs  >
1123 				   guc->capture->max_mmio_per_node);
1124 
1125 			memcpy(new->reginfo[i].regs, original->reginfo[i].regs,
1126 			       original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg));
1127 
1128 			new->reginfo[i].num_regs = original->reginfo[i].num_regs;
1129 			new->reginfo[i].vfid  = original->reginfo[i].vfid;
1130 
1131 			if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS) {
1132 				new->eng_class = original->eng_class;
1133 			} else if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1134 				new->eng_inst = original->eng_inst;
1135 				new->guc_id = original->guc_id;
1136 				new->lrca = original->lrca;
1137 			}
1138 		}
1139 	}
1140 
1141 	return new;
1142 }
1143 
1144 static int
guc_capture_extract_reglists(struct xe_guc * guc,struct __guc_capture_bufstate * buf)1145 guc_capture_extract_reglists(struct xe_guc *guc, struct __guc_capture_bufstate *buf)
1146 {
1147 	struct xe_gt *gt = guc_to_gt(guc);
1148 	struct guc_state_capture_group_header_t ghdr = {0};
1149 	struct guc_state_capture_header_t hdr = {0};
1150 	struct __guc_capture_parsed_output *node = NULL;
1151 	struct guc_mmio_reg *regs = NULL;
1152 	int i, numlists, numregs, ret = 0;
1153 	enum guc_state_capture_type datatype;
1154 	struct guc_mmio_reg tmp;
1155 	bool is_partial = false;
1156 
1157 	i = guc_capture_buf_cnt(buf);
1158 	if (!i)
1159 		return -ENODATA;
1160 
1161 	if (i % sizeof(u32)) {
1162 		xe_gt_warn(gt, "Got mis-aligned register capture entries\n");
1163 		ret = -EIO;
1164 		goto bailout;
1165 	}
1166 
1167 	/* first get the capture group header */
1168 	if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) {
1169 		ret = -EIO;
1170 		goto bailout;
1171 	}
1172 	/*
1173 	 * we would typically expect a layout as below where n would be expected to be
1174 	 * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine
1175 	 * instances being reset together.
1176 	 * ____________________________________________
1177 	 * | Capture Group                            |
1178 	 * | ________________________________________ |
1179 	 * | | Capture Group Header:                | |
1180 	 * | |  - num_captures = 5                  | |
1181 	 * | |______________________________________| |
1182 	 * | ________________________________________ |
1183 	 * | | Capture1:                            | |
1184 	 * | |  Hdr: GLOBAL, numregs=a              | |
1185 	 * | | ____________________________________ | |
1186 	 * | | | Reglist                          | | |
1187 	 * | | | - reg1, reg2, ... rega           | | |
1188 	 * | | |__________________________________| | |
1189 	 * | |______________________________________| |
1190 	 * | ________________________________________ |
1191 	 * | | Capture2:                            | |
1192 	 * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=b| |
1193 	 * | | ____________________________________ | |
1194 	 * | | | Reglist                          | | |
1195 	 * | | | - reg1, reg2, ... regb           | | |
1196 	 * | | |__________________________________| | |
1197 	 * | |______________________________________| |
1198 	 * | ________________________________________ |
1199 	 * | | Capture3:                            | |
1200 	 * | |  Hdr: INSTANCE=RCS, numregs=c        | |
1201 	 * | | ____________________________________ | |
1202 	 * | | | Reglist                          | | |
1203 	 * | | | - reg1, reg2, ... regc           | | |
1204 	 * | | |__________________________________| | |
1205 	 * | |______________________________________| |
1206 	 * | ________________________________________ |
1207 	 * | | Capture4:                            | |
1208 	 * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=d| |
1209 	 * | | ____________________________________ | |
1210 	 * | | | Reglist                          | | |
1211 	 * | | | - reg1, reg2, ... regd           | | |
1212 	 * | | |__________________________________| | |
1213 	 * | |______________________________________| |
1214 	 * | ________________________________________ |
1215 	 * | | Capture5:                            | |
1216 	 * | |  Hdr: INSTANCE=CCS0, numregs=e       | |
1217 	 * | | ____________________________________ | |
1218 	 * | | | Reglist                          | | |
1219 	 * | | | - reg1, reg2, ... rege           | | |
1220 	 * | | |__________________________________| | |
1221 	 * | |______________________________________| |
1222 	 * |__________________________________________|
1223 	 */
1224 	is_partial = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE, ghdr.info);
1225 	numlists = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES, ghdr.info);
1226 
1227 	while (numlists--) {
1228 		if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) {
1229 			ret = -EIO;
1230 			break;
1231 		}
1232 
1233 		datatype = FIELD_GET(GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE, hdr.info);
1234 		if (datatype > GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1235 			/* unknown capture type - skip over to next capture set */
1236 			numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
1237 					    hdr.num_mmio_entries);
1238 			while (numregs--) {
1239 				if (guc_capture_log_get_register(guc, buf, &tmp)) {
1240 					ret = -EIO;
1241 					break;
1242 				}
1243 			}
1244 			continue;
1245 		} else if (node) {
1246 			/*
1247 			 * Based on the current capture type and what we have so far,
1248 			 * decide if we should add the current node into the internal
1249 			 * linked list for match-up when xe_devcoredump calls later
1250 			 * (and alloc a blank node for the next set of reglists)
1251 			 * or continue with the same node or clone the current node
1252 			 * but only retain the global or class registers (such as the
1253 			 * case of dependent engine resets).
1254 			 */
1255 			if (datatype == GUC_STATE_CAPTURE_TYPE_GLOBAL) {
1256 				guc_capture_add_node_to_outlist(guc->capture, node);
1257 				node = NULL;
1258 			} else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
1259 				   node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS].num_regs) {
1260 				/* Add to list, clone node and duplicate global list */
1261 				guc_capture_add_node_to_outlist(guc->capture, node);
1262 				node = guc_capture_clone_node(guc, node,
1263 							      GCAP_PARSED_REGLIST_INDEX_GLOBAL);
1264 			} else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE &&
1265 				   node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE].num_regs) {
1266 				/* Add to list, clone node and duplicate global + class lists */
1267 				guc_capture_add_node_to_outlist(guc->capture, node);
1268 				node = guc_capture_clone_node(guc, node,
1269 							      (GCAP_PARSED_REGLIST_INDEX_GLOBAL |
1270 							      GCAP_PARSED_REGLIST_INDEX_ENGCLASS));
1271 			}
1272 		}
1273 
1274 		if (!node) {
1275 			node = guc_capture_get_prealloc_node(guc);
1276 			if (!node) {
1277 				ret = -ENOMEM;
1278 				break;
1279 			}
1280 			if (datatype != GUC_STATE_CAPTURE_TYPE_GLOBAL)
1281 				xe_gt_dbg(gt, "Register capture missing global dump: %08x!\n",
1282 					  datatype);
1283 		}
1284 		node->is_partial = is_partial;
1285 		node->reginfo[datatype].vfid = FIELD_GET(GUC_STATE_CAPTURE_HEADER_VFID, hdr.owner);
1286 		node->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
1287 		node->type = datatype;
1288 
1289 		switch (datatype) {
1290 		case GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE:
1291 			node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
1292 						    hdr.info);
1293 			node->eng_inst = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE,
1294 						   hdr.info);
1295 			node->lrca = hdr.lrca;
1296 			node->guc_id = hdr.guc_id;
1297 			break;
1298 		case GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS:
1299 			node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
1300 						    hdr.info);
1301 			break;
1302 		default:
1303 			break;
1304 		}
1305 
1306 		numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
1307 				    hdr.num_mmio_entries);
1308 		if (numregs > guc->capture->max_mmio_per_node) {
1309 			xe_gt_dbg(gt, "Register capture list extraction clipped by prealloc!\n");
1310 			numregs = guc->capture->max_mmio_per_node;
1311 		}
1312 		node->reginfo[datatype].num_regs = numregs;
1313 		regs = node->reginfo[datatype].regs;
1314 		i = 0;
1315 		while (numregs--) {
1316 			if (guc_capture_log_get_register(guc, buf, &regs[i++])) {
1317 				ret = -EIO;
1318 				break;
1319 			}
1320 		}
1321 	}
1322 
1323 bailout:
1324 	if (node) {
1325 		/* If we have data, add to linked list for match-up when xe_devcoredump calls */
1326 		for (i = GUC_STATE_CAPTURE_TYPE_GLOBAL; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1327 			if (node->reginfo[i].regs) {
1328 				guc_capture_add_node_to_outlist(guc->capture, node);
1329 				node = NULL;
1330 				break;
1331 			}
1332 		}
1333 		if (node) /* else return it back to cache list */
1334 			guc_capture_add_node_to_cachelist(guc->capture, node);
1335 	}
1336 	return ret;
1337 }
1338 
__guc_capture_flushlog_complete(struct xe_guc * guc)1339 static int __guc_capture_flushlog_complete(struct xe_guc *guc)
1340 {
1341 	u32 action[] = {
1342 		XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
1343 		GUC_LOG_BUFFER_CAPTURE
1344 	};
1345 
1346 	return xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
1347 }
1348 
__guc_capture_process_output(struct xe_guc * guc)1349 static void __guc_capture_process_output(struct xe_guc *guc)
1350 {
1351 	unsigned int buffer_size, read_offset, write_offset, full_count;
1352 	struct xe_uc *uc = container_of(guc, typeof(*uc), guc);
1353 	struct guc_log_buffer_state log_buf_state_local;
1354 	struct __guc_capture_bufstate buf;
1355 	bool new_overflow;
1356 	int ret, tmp;
1357 	u32 log_buf_state_offset;
1358 	u32 src_data_offset;
1359 
1360 	log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_BUFFER_CAPTURE;
1361 	src_data_offset = xe_guc_get_log_buffer_offset(&guc->log, GUC_LOG_BUFFER_CAPTURE);
1362 
1363 	/*
1364 	 * Make a copy of the state structure, inside GuC log buffer
1365 	 * (which is uncached mapped), on the stack to avoid reading
1366 	 * from it multiple times.
1367 	 */
1368 	xe_map_memcpy_from(guc_to_xe(guc), &log_buf_state_local, &guc->log.bo->vmap,
1369 			   log_buf_state_offset, sizeof(struct guc_log_buffer_state));
1370 
1371 	buffer_size = xe_guc_get_log_buffer_size(&guc->log, GUC_LOG_BUFFER_CAPTURE);
1372 	read_offset = log_buf_state_local.read_ptr;
1373 	write_offset = log_buf_state_local.sampled_write_ptr;
1374 	full_count = FIELD_GET(GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT, log_buf_state_local.flags);
1375 
1376 	/* Bookkeeping stuff */
1377 	tmp = FIELD_GET(GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE, log_buf_state_local.flags);
1378 	guc->log.stats[GUC_LOG_BUFFER_CAPTURE].flush += tmp;
1379 	new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_BUFFER_CAPTURE,
1380 						     full_count);
1381 
1382 	/* Now copy the actual logs. */
1383 	if (unlikely(new_overflow)) {
1384 		/* copy the whole buffer in case of overflow */
1385 		read_offset = 0;
1386 		write_offset = buffer_size;
1387 	} else if (unlikely((read_offset > buffer_size) ||
1388 			(write_offset > buffer_size))) {
1389 		xe_gt_err(guc_to_gt(guc),
1390 			  "Register capture buffer in invalid state: read = 0x%X, size = 0x%X!\n",
1391 			  read_offset, buffer_size);
1392 		/* copy whole buffer as offsets are unreliable */
1393 		read_offset = 0;
1394 		write_offset = buffer_size;
1395 	}
1396 
1397 	buf.size = buffer_size;
1398 	buf.rd = read_offset;
1399 	buf.wr = write_offset;
1400 	buf.data_offset = src_data_offset;
1401 
1402 	if (!xe_guc_read_stopped(guc)) {
1403 		do {
1404 			ret = guc_capture_extract_reglists(guc, &buf);
1405 			if (ret && ret != -ENODATA)
1406 				xe_gt_dbg(guc_to_gt(guc), "Capture extraction failed:%d\n", ret);
1407 		} while (ret >= 0);
1408 	}
1409 
1410 	/* Update the state of log buffer err-cap state */
1411 	xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
1412 		  log_buf_state_offset + offsetof(struct guc_log_buffer_state, read_ptr), u32,
1413 		  write_offset);
1414 
1415 	/*
1416 	 * Clear the flush_to_file from local first, the local was loaded by above
1417 	 * xe_map_memcpy_from, then write out the "updated local" through
1418 	 * xe_map_wr()
1419 	 */
1420 	log_buf_state_local.flags &= ~GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE;
1421 	xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
1422 		  log_buf_state_offset + offsetof(struct guc_log_buffer_state, flags), u32,
1423 		  log_buf_state_local.flags);
1424 	__guc_capture_flushlog_complete(guc);
1425 }
1426 
1427 /*
1428  * xe_guc_capture_process - Process GuC register captured data
1429  * @guc: The GuC object
1430  *
1431  * When GuC captured data is ready, GuC will send message
1432  * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
1433  * called to process the data comes with the message.
1434  *
1435  * Returns: None
1436  */
xe_guc_capture_process(struct xe_guc * guc)1437 void xe_guc_capture_process(struct xe_guc *guc)
1438 {
1439 	if (guc->capture)
1440 		__guc_capture_process_output(guc);
1441 }
1442 
1443 static struct __guc_capture_parsed_output *
guc_capture_alloc_one_node(struct xe_guc * guc)1444 guc_capture_alloc_one_node(struct xe_guc *guc)
1445 {
1446 	struct drm_device *drm = guc_to_drm(guc);
1447 	struct __guc_capture_parsed_output *new;
1448 	int i;
1449 
1450 	new = drmm_kzalloc(drm, sizeof(*new), GFP_KERNEL);
1451 	if (!new)
1452 		return NULL;
1453 
1454 	for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
1455 		new->reginfo[i].regs = drmm_kzalloc(drm, guc->capture->max_mmio_per_node *
1456 						    sizeof(struct guc_mmio_reg), GFP_KERNEL);
1457 		if (!new->reginfo[i].regs) {
1458 			while (i)
1459 				drmm_kfree(drm, new->reginfo[--i].regs);
1460 			drmm_kfree(drm, new);
1461 			return NULL;
1462 		}
1463 	}
1464 	guc_capture_init_node(guc, new);
1465 
1466 	return new;
1467 }
1468 
1469 static void
__guc_capture_create_prealloc_nodes(struct xe_guc * guc)1470 __guc_capture_create_prealloc_nodes(struct xe_guc *guc)
1471 {
1472 	struct __guc_capture_parsed_output *node = NULL;
1473 	int i;
1474 
1475 	for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
1476 		node = guc_capture_alloc_one_node(guc);
1477 		if (!node) {
1478 			xe_gt_warn(guc_to_gt(guc), "Register capture pre-alloc-cache failure\n");
1479 			/* dont free the priors, use what we got and cleanup at shutdown */
1480 			return;
1481 		}
1482 		guc_capture_add_node_to_cachelist(guc->capture, node);
1483 	}
1484 }
1485 
1486 static int
guc_get_max_reglist_count(struct xe_guc * guc)1487 guc_get_max_reglist_count(struct xe_guc *guc)
1488 {
1489 	int i, j, k, tmp, maxregcount = 0;
1490 
1491 	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
1492 		for (j = 0; j < GUC_STATE_CAPTURE_TYPE_MAX; ++j) {
1493 			for (k = 0; k < GUC_CAPTURE_LIST_CLASS_MAX; ++k) {
1494 				const struct __guc_mmio_reg_descr_group *match;
1495 
1496 				if (j == GUC_STATE_CAPTURE_TYPE_GLOBAL && k > 0)
1497 					continue;
1498 
1499 				tmp = 0;
1500 				match = guc_capture_get_one_list(guc->capture->reglists, i, j, k);
1501 				if (match)
1502 					tmp = match->num_regs;
1503 
1504 				match = guc_capture_get_one_list(guc->capture->extlists, i, j, k);
1505 				if (match)
1506 					tmp += match->num_regs;
1507 
1508 				if (tmp > maxregcount)
1509 					maxregcount = tmp;
1510 			}
1511 		}
1512 	}
1513 	if (!maxregcount)
1514 		maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS;
1515 
1516 	return maxregcount;
1517 }
1518 
1519 static void
guc_capture_create_prealloc_nodes(struct xe_guc * guc)1520 guc_capture_create_prealloc_nodes(struct xe_guc *guc)
1521 {
1522 	/* skip if we've already done the pre-alloc */
1523 	if (guc->capture->max_mmio_per_node)
1524 		return;
1525 
1526 	guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc);
1527 	__guc_capture_create_prealloc_nodes(guc);
1528 }
1529 
1530 static void
read_reg_to_node(struct xe_hw_engine * hwe,const struct __guc_mmio_reg_descr_group * list,struct guc_mmio_reg * regs)1531 read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_group *list,
1532 		 struct guc_mmio_reg *regs)
1533 {
1534 	int i;
1535 
1536 	if (!list || !list->list || list->num_regs == 0)
1537 		return;
1538 
1539 	if (!regs)
1540 		return;
1541 
1542 	for (i = 0; i < list->num_regs; i++) {
1543 		struct __guc_mmio_reg_descr desc = list->list[i];
1544 		u32 value;
1545 
1546 		if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1547 			value = xe_hw_engine_mmio_read32(hwe, desc.reg);
1548 		} else {
1549 			if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
1550 			    FIELD_GET(GUC_REGSET_STEERING_NEEDED, desc.flags)) {
1551 				int group, instance;
1552 
1553 				group = FIELD_GET(GUC_REGSET_STEERING_GROUP, desc.flags);
1554 				instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, desc.flags);
1555 				value = xe_gt_mcr_unicast_read(hwe->gt, XE_REG_MCR(desc.reg.addr),
1556 							       group, instance);
1557 			} else {
1558 				value = xe_mmio_read32(&hwe->gt->mmio, desc.reg);
1559 			}
1560 		}
1561 
1562 		regs[i].value = value;
1563 		regs[i].offset = desc.reg.addr;
1564 		regs[i].flags = desc.flags;
1565 		regs[i].mask = desc.mask;
1566 	}
1567 }
1568 
1569 /**
1570  * xe_engine_manual_capture - Take a manual engine snapshot from engine.
1571  * @hwe: Xe HW Engine.
1572  * @snapshot: The engine snapshot
1573  *
1574  * Take engine snapshot from engine read.
1575  *
1576  * Returns: None
1577  */
1578 void
xe_engine_manual_capture(struct xe_hw_engine * hwe,struct xe_hw_engine_snapshot * snapshot)1579 xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot)
1580 {
1581 	struct xe_gt *gt = hwe->gt;
1582 	struct xe_device *xe = gt_to_xe(gt);
1583 	struct xe_guc *guc = &gt->uc.guc;
1584 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1585 	enum guc_capture_list_class_type capture_class;
1586 	const struct __guc_mmio_reg_descr_group *list;
1587 	struct __guc_capture_parsed_output *new;
1588 	enum guc_state_capture_type type;
1589 	u16 guc_id = 0;
1590 	u32 lrca = 0;
1591 
1592 	if (IS_SRIOV_VF(xe))
1593 		return;
1594 
1595 	new = guc_capture_get_prealloc_node(guc);
1596 	if (!new)
1597 		return;
1598 
1599 	capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
1600 	for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
1601 		struct gcap_reg_list_info *reginfo = &new->reginfo[type];
1602 		/*
1603 		 * regsinfo->regs is allocated based on guc->capture->max_mmio_per_node
1604 		 * which is based on the descriptor list driving the population so
1605 		 * should not overflow
1606 		 */
1607 
1608 		/* Get register list for the type/class */
1609 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
1610 							capture_class, false);
1611 		if (!list) {
1612 			xe_gt_dbg(gt, "Empty GuC capture register descriptor for %s",
1613 				  hwe->name);
1614 			continue;
1615 		}
1616 
1617 		read_reg_to_node(hwe, list, reginfo->regs);
1618 		reginfo->num_regs = list->num_regs;
1619 
1620 		/* Capture steering registers for rcs/ccs */
1621 		if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
1622 			list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
1623 								type, capture_class, true);
1624 			if (list) {
1625 				read_reg_to_node(hwe, list, &reginfo->regs[reginfo->num_regs]);
1626 				reginfo->num_regs += list->num_regs;
1627 			}
1628 		}
1629 	}
1630 
1631 	if (devcoredump && devcoredump->captured) {
1632 		struct xe_guc_submit_exec_queue_snapshot *ge = devcoredump->snapshot.ge;
1633 
1634 		if (ge) {
1635 			guc_id = ge->guc.id;
1636 			if (ge->lrc[0])
1637 				lrca = ge->lrc[0]->context_desc;
1638 		}
1639 	}
1640 
1641 	new->eng_class = xe_engine_class_to_guc_class(hwe->class);
1642 	new->eng_inst = hwe->instance;
1643 	new->guc_id = guc_id;
1644 	new->lrca = lrca;
1645 	new->is_partial = 0;
1646 	new->locked = 1;
1647 	new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
1648 
1649 	guc_capture_add_node_to_outlist(guc->capture, new);
1650 	devcoredump->snapshot.matched_node = new;
1651 }
1652 
1653 static struct guc_mmio_reg *
guc_capture_find_reg(struct gcap_reg_list_info * reginfo,u32 addr,u32 flags)1654 guc_capture_find_reg(struct gcap_reg_list_info *reginfo, u32 addr, u32 flags)
1655 {
1656 	int i;
1657 
1658 	if (reginfo && reginfo->num_regs > 0) {
1659 		struct guc_mmio_reg *regs = reginfo->regs;
1660 
1661 		if (regs)
1662 			for (i = 0; i < reginfo->num_regs; i++)
1663 				if (regs[i].offset == addr && regs[i].flags == flags)
1664 					return &regs[i];
1665 	}
1666 
1667 	return NULL;
1668 }
1669 
1670 static void
snapshot_print_by_list_order(struct xe_hw_engine_snapshot * snapshot,struct drm_printer * p,u32 type,const struct __guc_mmio_reg_descr_group * list)1671 snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p,
1672 			     u32 type, const struct __guc_mmio_reg_descr_group *list)
1673 {
1674 	struct xe_gt *gt = snapshot->hwe->gt;
1675 	struct xe_device *xe = gt_to_xe(gt);
1676 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1677 	struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot;
1678 	struct gcap_reg_list_info *reginfo = NULL;
1679 	u32 i, last_value = 0;
1680 	bool low32_ready = false;
1681 
1682 	if (!list || !list->list || list->num_regs == 0)
1683 		return;
1684 	XE_WARN_ON(!devcore_snapshot->matched_node);
1685 
1686 	reginfo = &devcore_snapshot->matched_node->reginfo[type];
1687 
1688 	/*
1689 	 * loop through descriptor first and find the register in the node
1690 	 * this is more scalable for developer maintenance as it will ensure
1691 	 * the printout matched the ordering of the static descriptor
1692 	 * table-of-lists
1693 	 */
1694 	for (i = 0; i < list->num_regs; i++) {
1695 		const struct __guc_mmio_reg_descr *reg_desc = &list->list[i];
1696 		struct guc_mmio_reg *reg;
1697 		u32 value;
1698 
1699 		reg = guc_capture_find_reg(reginfo, reg_desc->reg.addr, reg_desc->flags);
1700 		if (!reg)
1701 			continue;
1702 
1703 		value = reg->value;
1704 		switch (reg_desc->data_type) {
1705 		case REG_64BIT_LOW_DW:
1706 			last_value = value;
1707 
1708 			/*
1709 			 * A 64 bit register define requires 2 consecutive
1710 			 * entries in register list, with low dword first
1711 			 * and hi dword the second, like:
1712 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1713 			 *  { XXX_REG_HI(0), REG_64BIT_HI_DW,  0, 0, "XXX_REG"},
1714 			 *
1715 			 * Incorrect order will trigger XE_WARN.
1716 			 *
1717 			 * Possible double low here, for example:
1718 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1719 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1720 			 */
1721 			XE_WARN_ON(low32_ready);
1722 			low32_ready = true;
1723 			/* Low 32 bit dword saved, continue for high 32 bit */
1724 			break;
1725 
1726 		case REG_64BIT_HI_DW: {
1727 			u64 value_qw = ((u64)value << 32) | last_value;
1728 
1729 			/*
1730 			 * Incorrect 64bit register order. Possible missing low.
1731 			 * for example:
1732 			 *  { XXX_REG(0), REG_32BIT, 0, 0, NULL},
1733 			 *  { XXX_REG_HI(0), REG_64BIT_HI_DW, 0, 0, NULL},
1734 			 */
1735 			XE_WARN_ON(!low32_ready);
1736 			low32_ready = false;
1737 
1738 			drm_printf(p, "\t%s: 0x%016llx\n", reg_desc->regname, value_qw);
1739 			break;
1740 		}
1741 
1742 		case REG_32BIT:
1743 			/*
1744 			 * Incorrect 64bit register order. Possible missing high.
1745 			 * for example:
1746 			 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1747 			 *  { XXX_REG(0), REG_32BIT, 0, 0, "XXX_REG"},
1748 			 */
1749 			XE_WARN_ON(low32_ready);
1750 
1751 			if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags))
1752 				drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname,
1753 					   reg_desc->dss_id, value);
1754 			else
1755 				drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
1756 
1757 			break;
1758 		}
1759 	}
1760 
1761 	/*
1762 	 * Incorrect 64bit register order. Possible missing high.
1763 	 * for example:
1764 	 *  { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1765 	 *  } // <- Register list end
1766 	 */
1767 	XE_WARN_ON(low32_ready);
1768 }
1769 
1770 /**
1771  * xe_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
1772  * @snapshot: Xe HW Engine snapshot object.
1773  * @p: drm_printer where it will be printed out.
1774  *
1775  * This function prints out a given Xe HW Engine snapshot object.
1776  */
xe_engine_snapshot_print(struct xe_hw_engine_snapshot * snapshot,struct drm_printer * p)1777 void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
1778 {
1779 	const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = {
1780 		"full-capture",
1781 		"partial-capture"
1782 	};
1783 	int type;
1784 	const struct __guc_mmio_reg_descr_group *list;
1785 	enum guc_capture_list_class_type capture_class;
1786 
1787 	struct xe_gt *gt;
1788 	struct xe_device *xe;
1789 	struct xe_devcoredump *devcoredump;
1790 	struct xe_devcoredump_snapshot *devcore_snapshot;
1791 
1792 	if (!snapshot)
1793 		return;
1794 
1795 	gt = snapshot->hwe->gt;
1796 	xe = gt_to_xe(gt);
1797 	devcoredump = &xe->devcoredump;
1798 	devcore_snapshot = &devcoredump->snapshot;
1799 
1800 	if (!devcore_snapshot->matched_node)
1801 		return;
1802 
1803 	xe_gt_assert(gt, snapshot->hwe);
1804 
1805 	capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class);
1806 
1807 	drm_printf(p, "%s (physical), logical instance=%d\n",
1808 		   snapshot->name ? snapshot->name : "",
1809 		   snapshot->logical_instance);
1810 	drm_printf(p, "\tCapture_source: %s\n",
1811 		   devcore_snapshot->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ?
1812 		   "GuC" : "Manual");
1813 	drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]);
1814 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
1815 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
1816 	drm_printf(p, "\tReserved: %s\n",
1817 		   str_yes_no(snapshot->kernel_reserved));
1818 
1819 	for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
1820 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
1821 							capture_class, false);
1822 		snapshot_print_by_list_order(snapshot, p, type, list);
1823 	}
1824 
1825 	if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
1826 		list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
1827 							GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
1828 							capture_class, true);
1829 		snapshot_print_by_list_order(snapshot, p, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
1830 					     list);
1831 	}
1832 
1833 	drm_puts(p, "\n");
1834 }
1835 
1836 /**
1837  * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the queue.
1838  * @q: The exec queue object
1839  *
1840  * Search within the capture outlist for the queue, could be used for check if
1841  * GuC capture is ready for the queue.
1842  * If found, the locked boolean of the node will be flagged.
1843  *
1844  * Returns: found guc-capture node ptr else NULL
1845  */
1846 struct __guc_capture_parsed_output *
xe_guc_capture_get_matching_and_lock(struct xe_exec_queue * q)1847 xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q)
1848 {
1849 	struct xe_hw_engine *hwe;
1850 	enum xe_hw_engine_id id;
1851 	struct xe_device *xe;
1852 	u16 guc_class = GUC_LAST_ENGINE_CLASS + 1;
1853 	struct xe_devcoredump_snapshot *ss;
1854 
1855 	if (!q || !q->gt)
1856 		return NULL;
1857 
1858 	xe = gt_to_xe(q->gt);
1859 	if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe) || IS_SRIOV_VF(xe))
1860 		return NULL;
1861 
1862 	ss = &xe->devcoredump.snapshot;
1863 	if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC)
1864 		return ss->matched_node;
1865 
1866 	/* Find hwe for the queue */
1867 	for_each_hw_engine(hwe, q->gt, id) {
1868 		if (hwe != q->hwe)
1869 			continue;
1870 		guc_class = xe_engine_class_to_guc_class(hwe->class);
1871 		break;
1872 	}
1873 
1874 	if (guc_class <= GUC_LAST_ENGINE_CLASS) {
1875 		struct __guc_capture_parsed_output *n, *ntmp;
1876 		struct xe_guc *guc =  &q->gt->uc.guc;
1877 		u16 guc_id = q->guc->id;
1878 		u32 lrca = xe_lrc_ggtt_addr(q->lrc[0]);
1879 
1880 		/*
1881 		 * Look for a matching GuC reported error capture node from
1882 		 * the internal output link-list based on engine, guc id and
1883 		 * lrca info.
1884 		 */
1885 		list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
1886 			if (n->eng_class == guc_class && n->eng_inst == hwe->instance &&
1887 			    n->guc_id == guc_id && n->lrca == lrca &&
1888 			    n->source == XE_ENGINE_CAPTURE_SOURCE_GUC) {
1889 				n->locked = 1;
1890 				return n;
1891 			}
1892 		}
1893 	}
1894 	return NULL;
1895 }
1896 
1897 /**
1898  * xe_engine_snapshot_capture_for_queue - Take snapshot of associated engine
1899  * @q: The exec queue object
1900  *
1901  * Take snapshot of associated HW Engine
1902  *
1903  * Returns: None.
1904  */
1905 void
xe_engine_snapshot_capture_for_queue(struct xe_exec_queue * q)1906 xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q)
1907 {
1908 	struct xe_device *xe = gt_to_xe(q->gt);
1909 	struct xe_devcoredump *coredump = &xe->devcoredump;
1910 	struct xe_hw_engine *hwe;
1911 	enum xe_hw_engine_id id;
1912 	u32 adj_logical_mask = q->logical_mask;
1913 
1914 	if (IS_SRIOV_VF(xe))
1915 		return;
1916 
1917 	for_each_hw_engine(hwe, q->gt, id) {
1918 		if (hwe->class != q->hwe->class ||
1919 		    !(BIT(hwe->logical_instance) & adj_logical_mask)) {
1920 			coredump->snapshot.hwe[id] = NULL;
1921 			continue;
1922 		}
1923 
1924 		if (!coredump->snapshot.hwe[id]) {
1925 			coredump->snapshot.hwe[id] =
1926 				xe_hw_engine_snapshot_capture(hwe, q);
1927 		} else {
1928 			struct __guc_capture_parsed_output *new;
1929 
1930 			new = xe_guc_capture_get_matching_and_lock(q);
1931 			if (new) {
1932 				struct xe_guc *guc =  &q->gt->uc.guc;
1933 
1934 				/*
1935 				 * If we are in here, it means we found a fresh
1936 				 * GuC-err-capture node for this engine after
1937 				 * previously failing to find a match in the
1938 				 * early part of guc_exec_queue_timedout_job.
1939 				 * Thus we must free the manually captured node
1940 				 */
1941 				guc_capture_free_outlist_node(guc->capture,
1942 							      coredump->snapshot.matched_node);
1943 				coredump->snapshot.matched_node = new;
1944 			}
1945 		}
1946 
1947 		break;
1948 	}
1949 }
1950 
1951 /*
1952  * xe_guc_capture_put_matched_nodes - Cleanup matched nodes
1953  * @guc: The GuC object
1954  *
1955  * Free matched node and all nodes with the equal guc_id from
1956  * GuC captured outlist
1957  */
xe_guc_capture_put_matched_nodes(struct xe_guc * guc)1958 void xe_guc_capture_put_matched_nodes(struct xe_guc *guc)
1959 {
1960 	struct xe_device *xe = guc_to_xe(guc);
1961 	struct xe_devcoredump *devcoredump = &xe->devcoredump;
1962 	struct __guc_capture_parsed_output *n = devcoredump->snapshot.matched_node;
1963 
1964 	if (n) {
1965 		guc_capture_remove_stale_matches_from_list(guc->capture, n);
1966 		guc_capture_free_outlist_node(guc->capture, n);
1967 		devcoredump->snapshot.matched_node = NULL;
1968 	}
1969 }
1970 
1971 /*
1972  * xe_guc_capture_steered_list_init - Init steering register list
1973  * @guc: The GuC object
1974  *
1975  * Init steering register list for GuC register capture, create pre-alloc node
1976  */
xe_guc_capture_steered_list_init(struct xe_guc * guc)1977 void xe_guc_capture_steered_list_init(struct xe_guc *guc)
1978 {
1979 	/*
1980 	 * For certain engine classes, there are slice and subslice
1981 	 * level registers requiring steering. We allocate and populate
1982 	 * these based on hw config and add it as an extension list at
1983 	 * the end of the pre-populated render list.
1984 	 */
1985 	guc_capture_alloc_steered_lists(guc);
1986 	check_guc_capture_size(guc);
1987 	guc_capture_create_prealloc_nodes(guc);
1988 }
1989 
1990 /*
1991  * xe_guc_capture_init - Init for GuC register capture
1992  * @guc: The GuC object
1993  *
1994  * Init for GuC register capture, alloc memory for capture data structure.
1995  *
1996  * Returns: 0 if success.
1997  *	    -ENOMEM if out of memory
1998  */
xe_guc_capture_init(struct xe_guc * guc)1999 int xe_guc_capture_init(struct xe_guc *guc)
2000 {
2001 	guc->capture = drmm_kzalloc(guc_to_drm(guc), sizeof(*guc->capture), GFP_KERNEL);
2002 	if (!guc->capture)
2003 		return -ENOMEM;
2004 
2005 	guc->capture->reglists = guc_capture_get_device_reglist(guc_to_xe(guc));
2006 
2007 	INIT_LIST_HEAD(&guc->capture->outlist);
2008 	INIT_LIST_HEAD(&guc->capture->cachelist);
2009 
2010 	return 0;
2011 }
2012