xref: /linux/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c (revision fce96cf0443083e37455eff8f78fd240c621dae3)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3 
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10 
11 struct a6xx_gpu_state_obj {
12 	const void *handle;
13 	u32 *data;
14 };
15 
16 struct a6xx_gpu_state {
17 	struct msm_gpu_state base;
18 
19 	struct a6xx_gpu_state_obj *gmu_registers;
20 	int nr_gmu_registers;
21 
22 	struct a6xx_gpu_state_obj *registers;
23 	int nr_registers;
24 
25 	struct a6xx_gpu_state_obj *shaders;
26 	int nr_shaders;
27 
28 	struct a6xx_gpu_state_obj *clusters;
29 	int nr_clusters;
30 
31 	struct a6xx_gpu_state_obj *dbgahb_clusters;
32 	int nr_dbgahb_clusters;
33 
34 	struct a6xx_gpu_state_obj *indexed_regs;
35 	int nr_indexed_regs;
36 
37 	struct a6xx_gpu_state_obj *debugbus;
38 	int nr_debugbus;
39 
40 	struct a6xx_gpu_state_obj *vbif_debugbus;
41 
42 	struct a6xx_gpu_state_obj *cx_debugbus;
43 	int nr_cx_debugbus;
44 
45 	struct msm_gpu_state_bo *gmu_log;
46 	struct msm_gpu_state_bo *gmu_hfi;
47 	struct msm_gpu_state_bo *gmu_debug;
48 
49 	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
50 
51 	struct list_head objs;
52 
53 	bool gpu_initialized;
54 };
55 
56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
57 {
58 	in[0] = val;
59 	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
60 
61 	return 2;
62 }
63 
64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
65 {
66 	in[0] = target;
67 	in[1] = (((u64) reg) << 44 | dwords);
68 
69 	return 2;
70 }
71 
72 static inline int CRASHDUMP_FINI(u64 *in)
73 {
74 	in[0] = 0;
75 	in[1] = 0;
76 
77 	return 2;
78 }
79 
80 struct a6xx_crashdumper {
81 	void *ptr;
82 	struct drm_gem_object *bo;
83 	u64 iova;
84 };
85 
86 struct a6xx_state_memobj {
87 	struct list_head node;
88 	unsigned long long data[];
89 };
90 
91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
92 {
93 	struct a6xx_state_memobj *obj =
94 		kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
95 
96 	if (!obj)
97 		return NULL;
98 
99 	list_add_tail(&obj->node, &a6xx_state->objs);
100 	return &obj->data;
101 }
102 
103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
104 		size_t size)
105 {
106 	void *dst = state_kcalloc(a6xx_state, 1, size);
107 
108 	if (dst)
109 		memcpy(dst, src, size);
110 	return dst;
111 }
112 
113 /*
114  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
115  * the rest for the data
116  */
117 #define A6XX_CD_DATA_OFFSET 8192
118 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
119 
120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
121 		struct a6xx_crashdumper *dumper)
122 {
123 	dumper->ptr = msm_gem_kernel_new(gpu->dev,
124 		SZ_1M, MSM_BO_WC, gpu->aspace,
125 		&dumper->bo, &dumper->iova);
126 
127 	if (!IS_ERR(dumper->ptr))
128 		msm_gem_object_set_name(dumper->bo, "crashdump");
129 
130 	return PTR_ERR_OR_ZERO(dumper->ptr);
131 }
132 
133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
134 		struct a6xx_crashdumper *dumper)
135 {
136 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
137 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
138 	u32 val;
139 	int ret;
140 
141 	if (IS_ERR_OR_NULL(dumper->ptr))
142 		return -EINVAL;
143 
144 	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
145 		return -EINVAL;
146 
147 	/* Make sure all pending memory writes are posted */
148 	wmb();
149 
150 	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
151 		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
152 
153 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
154 
155 	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
156 		val & 0x02, 100, 10000);
157 
158 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
159 
160 	return ret;
161 }
162 
163 /* read a value from the GX debug bus */
164 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
165 		u32 *data)
166 {
167 	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
168 		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
169 
170 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
171 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
172 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
173 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
174 
175 	/* Wait 1 us to make sure the data is flowing */
176 	udelay(1);
177 
178 	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
179 	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
180 
181 	return 2;
182 }
183 
184 #define cxdbg_write(ptr, offset, val) \
185 	msm_writel((val), (ptr) + ((offset) << 2))
186 
187 #define cxdbg_read(ptr, offset) \
188 	msm_readl((ptr) + ((offset) << 2))
189 
190 /* read a value from the CX debug bus */
191 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
192 		u32 *data)
193 {
194 	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
195 		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
196 
197 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
198 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
199 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
200 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
201 
202 	/* Wait 1 us to make sure the data is flowing */
203 	udelay(1);
204 
205 	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
206 	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
207 
208 	return 2;
209 }
210 
211 /* Read a chunk of data from the VBIF debug bus */
212 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
213 		u32 reg, int count, u32 *data)
214 {
215 	int i;
216 
217 	gpu_write(gpu, ctrl0, reg);
218 
219 	for (i = 0; i < count; i++) {
220 		gpu_write(gpu, ctrl1, i);
221 		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
222 	}
223 
224 	return count;
225 }
226 
227 #define AXI_ARB_BLOCKS 2
228 #define XIN_AXI_BLOCKS 5
229 #define XIN_CORE_BLOCKS 4
230 
231 #define VBIF_DEBUGBUS_BLOCK_SIZE \
232 	((16 * AXI_ARB_BLOCKS) + \
233 	 (18 * XIN_AXI_BLOCKS) + \
234 	 (12 * XIN_CORE_BLOCKS))
235 
236 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
237 		struct a6xx_gpu_state *a6xx_state,
238 		struct a6xx_gpu_state_obj *obj)
239 {
240 	u32 clk, *ptr;
241 	int i;
242 
243 	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
244 		sizeof(u32));
245 	if (!obj->data)
246 		return;
247 
248 	obj->handle = NULL;
249 
250 	/* Get the current clock setting */
251 	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
252 
253 	/* Force on the bus so we can read it */
254 	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
255 		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
256 
257 	/* We will read from BUS2 first, so disable BUS1 */
258 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
259 
260 	/* Enable the VBIF bus for reading */
261 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
262 
263 	ptr = obj->data;
264 
265 	for (i = 0; i < AXI_ARB_BLOCKS; i++)
266 		ptr += vbif_debugbus_read(gpu,
267 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
268 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
269 			1 << (i + 16), 16, ptr);
270 
271 	for (i = 0; i < XIN_AXI_BLOCKS; i++)
272 		ptr += vbif_debugbus_read(gpu,
273 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
274 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
275 			1 << i, 18, ptr);
276 
277 	/* Stop BUS2 so we can turn on BUS1 */
278 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
279 
280 	for (i = 0; i < XIN_CORE_BLOCKS; i++)
281 		ptr += vbif_debugbus_read(gpu,
282 			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
283 			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
284 			1 << i, 12, ptr);
285 
286 	/* Restore the VBIF clock setting */
287 	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
288 }
289 
290 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
291 		struct a6xx_gpu_state *a6xx_state,
292 		const struct a6xx_debugbus_block *block,
293 		struct a6xx_gpu_state_obj *obj)
294 {
295 	int i;
296 	u32 *ptr;
297 
298 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
299 	if (!obj->data)
300 		return;
301 
302 	obj->handle = block;
303 
304 	for (ptr = obj->data, i = 0; i < block->count; i++)
305 		ptr += debugbus_read(gpu, block->id, i, ptr);
306 }
307 
308 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
309 		struct a6xx_gpu_state *a6xx_state,
310 		const struct a6xx_debugbus_block *block,
311 		struct a6xx_gpu_state_obj *obj)
312 {
313 	int i;
314 	u32 *ptr;
315 
316 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
317 	if (!obj->data)
318 		return;
319 
320 	obj->handle = block;
321 
322 	for (ptr = obj->data, i = 0; i < block->count; i++)
323 		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
324 }
325 
326 static void a6xx_get_debugbus(struct msm_gpu *gpu,
327 		struct a6xx_gpu_state *a6xx_state)
328 {
329 	struct resource *res;
330 	void __iomem *cxdbg = NULL;
331 	int nr_debugbus_blocks;
332 
333 	/* Set up the GX debug bus */
334 
335 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
336 		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
337 
338 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
339 		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
340 
341 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
342 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
343 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
344 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
345 
346 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
347 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
348 
349 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
350 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
351 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
352 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
353 
354 	/* Set up the CX debug bus - it lives elsewhere in the system so do a
355 	 * temporary ioremap for the registers
356 	 */
357 	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
358 			"cx_dbgc");
359 
360 	if (res)
361 		cxdbg = ioremap(res->start, resource_size(res));
362 
363 	if (cxdbg) {
364 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
365 			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
366 
367 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
368 			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
369 
370 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
371 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
372 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
373 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
374 
375 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
376 			0x76543210);
377 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
378 			0xFEDCBA98);
379 
380 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
381 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
382 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
383 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
384 	}
385 
386 	nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
387 		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
388 
389 	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
390 			sizeof(*a6xx_state->debugbus));
391 
392 	if (a6xx_state->debugbus) {
393 		int i;
394 
395 		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
396 			a6xx_get_debugbus_block(gpu,
397 				a6xx_state,
398 				&a6xx_debugbus_blocks[i],
399 				&a6xx_state->debugbus[i]);
400 
401 		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
402 
403 		/*
404 		 * GBIF has same debugbus as of other GPU blocks, fall back to
405 		 * default path if GPU uses GBIF, also GBIF uses exactly same
406 		 * ID as of VBIF.
407 		 */
408 		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
409 			a6xx_get_debugbus_block(gpu, a6xx_state,
410 				&a6xx_gbif_debugbus_block,
411 				&a6xx_state->debugbus[i]);
412 
413 			a6xx_state->nr_debugbus += 1;
414 		}
415 	}
416 
417 	/*  Dump the VBIF debugbus on applicable targets */
418 	if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
419 		a6xx_state->vbif_debugbus =
420 			state_kcalloc(a6xx_state, 1,
421 					sizeof(*a6xx_state->vbif_debugbus));
422 
423 		if (a6xx_state->vbif_debugbus)
424 			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
425 					a6xx_state->vbif_debugbus);
426 	}
427 
428 	if (cxdbg) {
429 		a6xx_state->cx_debugbus =
430 			state_kcalloc(a6xx_state,
431 			ARRAY_SIZE(a6xx_cx_debugbus_blocks),
432 			sizeof(*a6xx_state->cx_debugbus));
433 
434 		if (a6xx_state->cx_debugbus) {
435 			int i;
436 
437 			for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
438 				a6xx_get_cx_debugbus_block(cxdbg,
439 					a6xx_state,
440 					&a6xx_cx_debugbus_blocks[i],
441 					&a6xx_state->cx_debugbus[i]);
442 
443 			a6xx_state->nr_cx_debugbus =
444 				ARRAY_SIZE(a6xx_cx_debugbus_blocks);
445 		}
446 
447 		iounmap(cxdbg);
448 	}
449 }
450 
451 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
452 
453 /* Read a data cluster from behind the AHB aperture */
454 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
455 		struct a6xx_gpu_state *a6xx_state,
456 		const struct a6xx_dbgahb_cluster *dbgahb,
457 		struct a6xx_gpu_state_obj *obj,
458 		struct a6xx_crashdumper *dumper)
459 {
460 	u64 *in = dumper->ptr;
461 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
462 	size_t datasize;
463 	int i, regcount = 0;
464 
465 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
466 		int j;
467 
468 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
469 			(dbgahb->statetype + i * 2) << 8);
470 
471 		for (j = 0; j < dbgahb->count; j += 2) {
472 			int count = RANGE(dbgahb->registers, j);
473 			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
474 				dbgahb->registers[j] - (dbgahb->base >> 2);
475 
476 			in += CRASHDUMP_READ(in, offset, count, out);
477 
478 			out += count * sizeof(u32);
479 
480 			if (i == 0)
481 				regcount += count;
482 		}
483 	}
484 
485 	CRASHDUMP_FINI(in);
486 
487 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
488 
489 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
490 		return;
491 
492 	if (a6xx_crashdumper_run(gpu, dumper))
493 		return;
494 
495 	obj->handle = dbgahb;
496 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
497 		datasize);
498 }
499 
500 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
501 		struct a6xx_gpu_state *a6xx_state,
502 		struct a6xx_crashdumper *dumper)
503 {
504 	int i;
505 
506 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
507 		ARRAY_SIZE(a6xx_dbgahb_clusters),
508 		sizeof(*a6xx_state->dbgahb_clusters));
509 
510 	if (!a6xx_state->dbgahb_clusters)
511 		return;
512 
513 	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
514 
515 	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
516 		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
517 			&a6xx_dbgahb_clusters[i],
518 			&a6xx_state->dbgahb_clusters[i], dumper);
519 }
520 
521 /* Read a data cluster from the CP aperture with the crashdumper */
522 static void a6xx_get_cluster(struct msm_gpu *gpu,
523 		struct a6xx_gpu_state *a6xx_state,
524 		const struct a6xx_cluster *cluster,
525 		struct a6xx_gpu_state_obj *obj,
526 		struct a6xx_crashdumper *dumper)
527 {
528 	u64 *in = dumper->ptr;
529 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
530 	size_t datasize;
531 	int i, regcount = 0;
532 
533 	/* Some clusters need a selector register to be programmed too */
534 	if (cluster->sel_reg)
535 		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
536 
537 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
538 		int j;
539 
540 		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
541 			(cluster->id << 8) | (i << 4) | i);
542 
543 		for (j = 0; j < cluster->count; j += 2) {
544 			int count = RANGE(cluster->registers, j);
545 
546 			in += CRASHDUMP_READ(in, cluster->registers[j],
547 				count, out);
548 
549 			out += count * sizeof(u32);
550 
551 			if (i == 0)
552 				regcount += count;
553 		}
554 	}
555 
556 	CRASHDUMP_FINI(in);
557 
558 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
559 
560 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
561 		return;
562 
563 	if (a6xx_crashdumper_run(gpu, dumper))
564 		return;
565 
566 	obj->handle = cluster;
567 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
568 		datasize);
569 }
570 
571 static void a6xx_get_clusters(struct msm_gpu *gpu,
572 		struct a6xx_gpu_state *a6xx_state,
573 		struct a6xx_crashdumper *dumper)
574 {
575 	int i;
576 
577 	a6xx_state->clusters = state_kcalloc(a6xx_state,
578 		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
579 
580 	if (!a6xx_state->clusters)
581 		return;
582 
583 	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
584 
585 	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
586 		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
587 			&a6xx_state->clusters[i], dumper);
588 }
589 
590 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
591 static void a6xx_get_shader_block(struct msm_gpu *gpu,
592 		struct a6xx_gpu_state *a6xx_state,
593 		const struct a6xx_shader_block *block,
594 		struct a6xx_gpu_state_obj *obj,
595 		struct a6xx_crashdumper *dumper)
596 {
597 	u64 *in = dumper->ptr;
598 	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
599 	int i;
600 
601 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
602 		return;
603 
604 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
605 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
606 			(block->type << 8) | i);
607 
608 		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
609 			block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
610 	}
611 
612 	CRASHDUMP_FINI(in);
613 
614 	if (a6xx_crashdumper_run(gpu, dumper))
615 		return;
616 
617 	obj->handle = block;
618 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
619 		datasize);
620 }
621 
622 static void a6xx_get_shaders(struct msm_gpu *gpu,
623 		struct a6xx_gpu_state *a6xx_state,
624 		struct a6xx_crashdumper *dumper)
625 {
626 	int i;
627 
628 	a6xx_state->shaders = state_kcalloc(a6xx_state,
629 		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
630 
631 	if (!a6xx_state->shaders)
632 		return;
633 
634 	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
635 
636 	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
637 		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
638 			&a6xx_state->shaders[i], dumper);
639 }
640 
641 /* Read registers from behind the HLSQ aperture with the crashdumper */
642 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
643 		struct a6xx_gpu_state *a6xx_state,
644 		const struct a6xx_registers *regs,
645 		struct a6xx_gpu_state_obj *obj,
646 		struct a6xx_crashdumper *dumper)
647 
648 {
649 	u64 *in = dumper->ptr;
650 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
651 	int i, regcount = 0;
652 
653 	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
654 
655 	for (i = 0; i < regs->count; i += 2) {
656 		u32 count = RANGE(regs->registers, i);
657 		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
658 			regs->registers[i] - (regs->val0 >> 2);
659 
660 		in += CRASHDUMP_READ(in, offset, count, out);
661 
662 		out += count * sizeof(u32);
663 		regcount += count;
664 	}
665 
666 	CRASHDUMP_FINI(in);
667 
668 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
669 		return;
670 
671 	if (a6xx_crashdumper_run(gpu, dumper))
672 		return;
673 
674 	obj->handle = regs;
675 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
676 		regcount * sizeof(u32));
677 }
678 
679 /* Read a block of registers using the crashdumper */
680 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
681 		struct a6xx_gpu_state *a6xx_state,
682 		const struct a6xx_registers *regs,
683 		struct a6xx_gpu_state_obj *obj,
684 		struct a6xx_crashdumper *dumper)
685 
686 {
687 	u64 *in = dumper->ptr;
688 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
689 	int i, regcount = 0;
690 
691 	/* Some blocks might need to program a selector register first */
692 	if (regs->val0)
693 		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
694 
695 	for (i = 0; i < regs->count; i += 2) {
696 		u32 count = RANGE(regs->registers, i);
697 
698 		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
699 
700 		out += count * sizeof(u32);
701 		regcount += count;
702 	}
703 
704 	CRASHDUMP_FINI(in);
705 
706 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
707 		return;
708 
709 	if (a6xx_crashdumper_run(gpu, dumper))
710 		return;
711 
712 	obj->handle = regs;
713 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
714 		regcount * sizeof(u32));
715 }
716 
717 /* Read a block of registers via AHB */
718 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
719 		struct a6xx_gpu_state *a6xx_state,
720 		const struct a6xx_registers *regs,
721 		struct a6xx_gpu_state_obj *obj)
722 {
723 	int i, regcount = 0, index = 0;
724 
725 	for (i = 0; i < regs->count; i += 2)
726 		regcount += RANGE(regs->registers, i);
727 
728 	obj->handle = (const void *) regs;
729 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
730 	if (!obj->data)
731 		return;
732 
733 	for (i = 0; i < regs->count; i += 2) {
734 		u32 count = RANGE(regs->registers, i);
735 		int j;
736 
737 		for (j = 0; j < count; j++)
738 			obj->data[index++] = gpu_read(gpu,
739 				regs->registers[i] + j);
740 	}
741 }
742 
743 /* Read a block of GMU registers */
744 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
745 		struct a6xx_gpu_state *a6xx_state,
746 		const struct a6xx_registers *regs,
747 		struct a6xx_gpu_state_obj *obj,
748 		bool rscc)
749 {
750 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
751 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
752 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
753 	int i, regcount = 0, index = 0;
754 
755 	for (i = 0; i < regs->count; i += 2)
756 		regcount += RANGE(regs->registers, i);
757 
758 	obj->handle = (const void *) regs;
759 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
760 	if (!obj->data)
761 		return;
762 
763 	for (i = 0; i < regs->count; i += 2) {
764 		u32 count = RANGE(regs->registers, i);
765 		int j;
766 
767 		for (j = 0; j < count; j++) {
768 			u32 offset = regs->registers[i] + j;
769 			u32 val;
770 
771 			if (rscc)
772 				val = gmu_read_rscc(gmu, offset);
773 			else
774 				val = gmu_read(gmu, offset);
775 
776 			obj->data[index++] = val;
777 		}
778 	}
779 }
780 
781 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
782 		struct a6xx_gpu_state *a6xx_state)
783 {
784 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
785 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
786 
787 	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
788 		3, sizeof(*a6xx_state->gmu_registers));
789 
790 	if (!a6xx_state->gmu_registers)
791 		return;
792 
793 	a6xx_state->nr_gmu_registers = 3;
794 
795 	/* Get the CX GMU registers from AHB */
796 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
797 		&a6xx_state->gmu_registers[0], false);
798 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
799 		&a6xx_state->gmu_registers[1], true);
800 
801 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
802 		return;
803 
804 	/* Set the fence to ALLOW mode so we can access the registers */
805 	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
806 
807 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
808 		&a6xx_state->gmu_registers[2], false);
809 }
810 
811 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
812 		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
813 {
814 	struct msm_gpu_state_bo *snapshot;
815 
816 	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
817 	if (!snapshot)
818 		return NULL;
819 
820 	snapshot->iova = bo->iova;
821 	snapshot->size = bo->size;
822 	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
823 	if (!snapshot->data)
824 		return NULL;
825 
826 	memcpy(snapshot->data, bo->virt, bo->size);
827 
828 	return snapshot;
829 }
830 
831 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
832 					  struct a6xx_gpu_state *a6xx_state)
833 {
834 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
835 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
836 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
837 	unsigned i, j;
838 
839 	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
840 
841 	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
842 		struct a6xx_hfi_queue *queue = &gmu->queues[i];
843 		for (j = 0; j < HFI_HISTORY_SZ; j++) {
844 			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
845 			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
846 		}
847 	}
848 }
849 
850 #define A6XX_GBIF_REGLIST_SIZE   1
851 static void a6xx_get_registers(struct msm_gpu *gpu,
852 		struct a6xx_gpu_state *a6xx_state,
853 		struct a6xx_crashdumper *dumper)
854 {
855 	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
856 		ARRAY_SIZE(a6xx_reglist) +
857 		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
858 	int index = 0;
859 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
860 
861 	a6xx_state->registers = state_kcalloc(a6xx_state,
862 		count, sizeof(*a6xx_state->registers));
863 
864 	if (!a6xx_state->registers)
865 		return;
866 
867 	a6xx_state->nr_registers = count;
868 
869 	for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
870 		a6xx_get_ahb_gpu_registers(gpu,
871 			a6xx_state, &a6xx_ahb_reglist[i],
872 			&a6xx_state->registers[index++]);
873 
874 	if (a6xx_has_gbif(adreno_gpu))
875 		a6xx_get_ahb_gpu_registers(gpu,
876 				a6xx_state, &a6xx_gbif_reglist,
877 				&a6xx_state->registers[index++]);
878 	else
879 		a6xx_get_ahb_gpu_registers(gpu,
880 				a6xx_state, &a6xx_vbif_reglist,
881 				&a6xx_state->registers[index++]);
882 	if (!dumper) {
883 		/*
884 		 * We can't use the crashdumper when the SMMU is stalled,
885 		 * because the GPU has no memory access until we resume
886 		 * translation (but we don't want to do that until after
887 		 * we have captured as much useful GPU state as possible).
888 		 * So instead collect registers via the CPU:
889 		 */
890 		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
891 			a6xx_get_ahb_gpu_registers(gpu,
892 				a6xx_state, &a6xx_reglist[i],
893 				&a6xx_state->registers[index++]);
894 		return;
895 	}
896 
897 	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
898 		a6xx_get_crashdumper_registers(gpu,
899 			a6xx_state, &a6xx_reglist[i],
900 			&a6xx_state->registers[index++],
901 			dumper);
902 
903 	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
904 		a6xx_get_crashdumper_hlsq_registers(gpu,
905 			a6xx_state, &a6xx_hlsq_reglist[i],
906 			&a6xx_state->registers[index++],
907 			dumper);
908 }
909 
910 /* Read a block of data from an indexed register pair */
911 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
912 		struct a6xx_gpu_state *a6xx_state,
913 		const struct a6xx_indexed_registers *indexed,
914 		struct a6xx_gpu_state_obj *obj)
915 {
916 	int i;
917 
918 	obj->handle = (const void *) indexed;
919 	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
920 	if (!obj->data)
921 		return;
922 
923 	/* All the indexed banks start at address 0 */
924 	gpu_write(gpu, indexed->addr, 0);
925 
926 	/* Read the data - each read increments the internal address by 1 */
927 	for (i = 0; i < indexed->count; i++)
928 		obj->data[i] = gpu_read(gpu, indexed->data);
929 }
930 
931 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
932 		struct a6xx_gpu_state *a6xx_state)
933 {
934 	u32 mempool_size;
935 	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
936 	int i;
937 
938 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
939 		sizeof(*a6xx_state->indexed_regs));
940 	if (!a6xx_state->indexed_regs)
941 		return;
942 
943 	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
944 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
945 			&a6xx_state->indexed_regs[i]);
946 
947 	/* Set the CP mempool size to 0 to stabilize it while dumping */
948 	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
949 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
950 
951 	/* Get the contents of the CP mempool */
952 	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
953 		&a6xx_state->indexed_regs[i]);
954 
955 	/*
956 	 * Offset 0x2000 in the mempool is the size - copy the saved size over
957 	 * so the data is consistent
958 	 */
959 	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
960 
961 	/* Restore the size in the hardware */
962 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
963 
964 	a6xx_state->nr_indexed_regs = count;
965 }
966 
967 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
968 {
969 	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
970 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
971 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
972 	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
973 		GFP_KERNEL);
974 	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
975 			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
976 
977 	if (!a6xx_state)
978 		return ERR_PTR(-ENOMEM);
979 
980 	INIT_LIST_HEAD(&a6xx_state->objs);
981 
982 	/* Get the generic state from the adreno core */
983 	adreno_gpu_state_get(gpu, &a6xx_state->base);
984 
985 	a6xx_get_gmu_registers(gpu, a6xx_state);
986 
987 	a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
988 	a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
989 	a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
990 
991 	a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
992 
993 	/* If GX isn't on the rest of the data isn't going to be accessible */
994 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
995 		return &a6xx_state->base;
996 
997 	/* Get the banks of indexed registers */
998 	a6xx_get_indexed_registers(gpu, a6xx_state);
999 
1000 	/*
1001 	 * Try to initialize the crashdumper, if we are not dumping state
1002 	 * with the SMMU stalled.  The crashdumper needs memory access to
1003 	 * write out GPU state, so we need to skip this when the SMMU is
1004 	 * stalled in response to an iova fault
1005 	 */
1006 	if (!stalled && !gpu->needs_hw_init &&
1007 	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1008 		dumper = &_dumper;
1009 	}
1010 
1011 	a6xx_get_registers(gpu, a6xx_state, dumper);
1012 
1013 	if (dumper) {
1014 		a6xx_get_shaders(gpu, a6xx_state, dumper);
1015 		a6xx_get_clusters(gpu, a6xx_state, dumper);
1016 		a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1017 
1018 		msm_gem_kernel_put(dumper->bo, gpu->aspace);
1019 	}
1020 
1021 	if (snapshot_debugbus)
1022 		a6xx_get_debugbus(gpu, a6xx_state);
1023 
1024 	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1025 
1026 	return  &a6xx_state->base;
1027 }
1028 
1029 static void a6xx_gpu_state_destroy(struct kref *kref)
1030 {
1031 	struct a6xx_state_memobj *obj, *tmp;
1032 	struct msm_gpu_state *state = container_of(kref,
1033 			struct msm_gpu_state, ref);
1034 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1035 			struct a6xx_gpu_state, base);
1036 
1037 	if (a6xx_state->gmu_log)
1038 		kvfree(a6xx_state->gmu_log->data);
1039 
1040 	if (a6xx_state->gmu_hfi)
1041 		kvfree(a6xx_state->gmu_hfi->data);
1042 
1043 	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
1044 		kfree(obj);
1045 
1046 	adreno_gpu_state_destroy(state);
1047 	kfree(a6xx_state);
1048 }
1049 
1050 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1051 {
1052 	if (IS_ERR_OR_NULL(state))
1053 		return 1;
1054 
1055 	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1056 }
1057 
1058 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1059 		struct drm_printer *p)
1060 {
1061 	int i, index = 0;
1062 
1063 	if (!data)
1064 		return;
1065 
1066 	for (i = 0; i < count; i += 2) {
1067 		u32 count = RANGE(registers, i);
1068 		u32 offset = registers[i];
1069 		int j;
1070 
1071 		for (j = 0; j < count; index++, offset++, j++) {
1072 			if (data[index] == 0xdeafbead)
1073 				continue;
1074 
1075 			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1076 				offset << 2, data[index]);
1077 		}
1078 	}
1079 }
1080 
1081 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1082 {
1083 	char out[ASCII85_BUFSZ];
1084 	long i, l, datalen = 0;
1085 
1086 	for (i = 0; i < len >> 2; i++) {
1087 		if (data[i])
1088 			datalen = (i + 1) << 2;
1089 	}
1090 
1091 	if (datalen == 0)
1092 		return;
1093 
1094 	drm_puts(p, "    data: !!ascii85 |\n");
1095 	drm_puts(p, "      ");
1096 
1097 
1098 	l = ascii85_encode_len(datalen);
1099 
1100 	for (i = 0; i < l; i++)
1101 		drm_puts(p, ascii85_encode(data[i], out));
1102 
1103 	drm_puts(p, "\n");
1104 }
1105 
1106 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1107 {
1108 	drm_puts(p, fmt);
1109 	drm_puts(p, name);
1110 	drm_puts(p, "\n");
1111 }
1112 
1113 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1114 		struct drm_printer *p)
1115 {
1116 	const struct a6xx_shader_block *block = obj->handle;
1117 	int i;
1118 
1119 	if (!obj->handle)
1120 		return;
1121 
1122 	print_name(p, "  - type: ", block->name);
1123 
1124 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1125 		drm_printf(p, "    - bank: %d\n", i);
1126 		drm_printf(p, "      size: %d\n", block->size);
1127 
1128 		if (!obj->data)
1129 			continue;
1130 
1131 		print_ascii85(p, block->size << 2,
1132 			obj->data + (block->size * i));
1133 	}
1134 }
1135 
1136 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1137 		struct drm_printer *p)
1138 {
1139 	int ctx, index = 0;
1140 
1141 	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1142 		int j;
1143 
1144 		drm_printf(p, "    - context: %d\n", ctx);
1145 
1146 		for (j = 0; j < size; j += 2) {
1147 			u32 count = RANGE(registers, j);
1148 			u32 offset = registers[j];
1149 			int k;
1150 
1151 			for (k = 0; k < count; index++, offset++, k++) {
1152 				if (data[index] == 0xdeafbead)
1153 					continue;
1154 
1155 				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1156 					offset << 2, data[index]);
1157 			}
1158 		}
1159 	}
1160 }
1161 
1162 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1163 		struct drm_printer *p)
1164 {
1165 	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1166 
1167 	if (dbgahb) {
1168 		print_name(p, "  - cluster-name: ", dbgahb->name);
1169 		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1170 			obj->data, p);
1171 	}
1172 }
1173 
1174 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1175 		struct drm_printer *p)
1176 {
1177 	const struct a6xx_cluster *cluster = obj->handle;
1178 
1179 	if (cluster) {
1180 		print_name(p, "  - cluster-name: ", cluster->name);
1181 		a6xx_show_cluster_data(cluster->registers, cluster->count,
1182 			obj->data, p);
1183 	}
1184 }
1185 
1186 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1187 		struct drm_printer *p)
1188 {
1189 	const struct a6xx_indexed_registers *indexed = obj->handle;
1190 
1191 	if (!indexed)
1192 		return;
1193 
1194 	print_name(p, "  - regs-name: ", indexed->name);
1195 	drm_printf(p, "    dwords: %d\n", indexed->count);
1196 
1197 	print_ascii85(p, indexed->count << 2, obj->data);
1198 }
1199 
1200 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1201 		u32 *data, struct drm_printer *p)
1202 {
1203 	if (block) {
1204 		print_name(p, "  - debugbus-block: ", block->name);
1205 
1206 		/*
1207 		 * count for regular debugbus data is in quadwords,
1208 		 * but print the size in dwords for consistency
1209 		 */
1210 		drm_printf(p, "    count: %d\n", block->count << 1);
1211 
1212 		print_ascii85(p, block->count << 3, data);
1213 	}
1214 }
1215 
1216 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1217 		struct drm_printer *p)
1218 {
1219 	int i;
1220 
1221 	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1222 		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1223 
1224 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1225 	}
1226 
1227 	if (a6xx_state->vbif_debugbus) {
1228 		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1229 
1230 		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1231 		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1232 
1233 		/* vbif debugbus data is in dwords.  Confusing, huh? */
1234 		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1235 	}
1236 
1237 	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1238 		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1239 
1240 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1241 	}
1242 }
1243 
1244 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1245 		struct drm_printer *p)
1246 {
1247 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1248 			struct a6xx_gpu_state, base);
1249 	int i;
1250 
1251 	if (IS_ERR_OR_NULL(state))
1252 		return;
1253 
1254 	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1255 
1256 	adreno_show(gpu, state, p);
1257 
1258 	drm_puts(p, "gmu-log:\n");
1259 	if (a6xx_state->gmu_log) {
1260 		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1261 
1262 		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1263 		drm_printf(p, "    size: %zu\n", gmu_log->size);
1264 		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1265 				&gmu_log->encoded);
1266 	}
1267 
1268 	drm_puts(p, "gmu-hfi:\n");
1269 	if (a6xx_state->gmu_hfi) {
1270 		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1271 		unsigned i, j;
1272 
1273 		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1274 		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1275 		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1276 			drm_printf(p, "    queue-history[%u]:", i);
1277 			for (j = 0; j < HFI_HISTORY_SZ; j++) {
1278 				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1279 			}
1280 			drm_printf(p, "\n");
1281 		}
1282 		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1283 				&gmu_hfi->encoded);
1284 	}
1285 
1286 	drm_puts(p, "gmu-debug:\n");
1287 	if (a6xx_state->gmu_debug) {
1288 		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1289 
1290 		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1291 		drm_printf(p, "    size: %zu\n", gmu_debug->size);
1292 		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1293 				&gmu_debug->encoded);
1294 	}
1295 
1296 	drm_puts(p, "registers:\n");
1297 	for (i = 0; i < a6xx_state->nr_registers; i++) {
1298 		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1299 		const struct a6xx_registers *regs = obj->handle;
1300 
1301 		if (!obj->handle)
1302 			continue;
1303 
1304 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1305 	}
1306 
1307 	drm_puts(p, "registers-gmu:\n");
1308 	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1309 		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1310 		const struct a6xx_registers *regs = obj->handle;
1311 
1312 		if (!obj->handle)
1313 			continue;
1314 
1315 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1316 	}
1317 
1318 	drm_puts(p, "indexed-registers:\n");
1319 	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1320 		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1321 
1322 	drm_puts(p, "shader-blocks:\n");
1323 	for (i = 0; i < a6xx_state->nr_shaders; i++)
1324 		a6xx_show_shader(&a6xx_state->shaders[i], p);
1325 
1326 	drm_puts(p, "clusters:\n");
1327 	for (i = 0; i < a6xx_state->nr_clusters; i++)
1328 		a6xx_show_cluster(&a6xx_state->clusters[i], p);
1329 
1330 	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1331 		a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1332 
1333 	drm_puts(p, "debugbus:\n");
1334 	a6xx_show_debugbus(a6xx_state, p);
1335 }
1336