xref: /linux/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c (revision 086099893fcebeae50f9020588080de43c82e4c0)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3 
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10 
11 struct a6xx_gpu_state_obj {
12 	const void *handle;
13 	u32 *data;
14 };
15 
16 struct a6xx_gpu_state {
17 	struct msm_gpu_state base;
18 
19 	struct a6xx_gpu_state_obj *gmu_registers;
20 	int nr_gmu_registers;
21 
22 	struct a6xx_gpu_state_obj *registers;
23 	int nr_registers;
24 
25 	struct a6xx_gpu_state_obj *shaders;
26 	int nr_shaders;
27 
28 	struct a6xx_gpu_state_obj *clusters;
29 	int nr_clusters;
30 
31 	struct a6xx_gpu_state_obj *dbgahb_clusters;
32 	int nr_dbgahb_clusters;
33 
34 	struct a6xx_gpu_state_obj *indexed_regs;
35 	int nr_indexed_regs;
36 
37 	struct a6xx_gpu_state_obj *debugbus;
38 	int nr_debugbus;
39 
40 	struct a6xx_gpu_state_obj *vbif_debugbus;
41 
42 	struct a6xx_gpu_state_obj *cx_debugbus;
43 	int nr_cx_debugbus;
44 
45 	struct list_head objs;
46 };
47 
48 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
49 {
50 	in[0] = val;
51 	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
52 
53 	return 2;
54 }
55 
56 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
57 {
58 	in[0] = target;
59 	in[1] = (((u64) reg) << 44 | dwords);
60 
61 	return 2;
62 }
63 
64 static inline int CRASHDUMP_FINI(u64 *in)
65 {
66 	in[0] = 0;
67 	in[1] = 0;
68 
69 	return 2;
70 }
71 
72 struct a6xx_crashdumper {
73 	void *ptr;
74 	struct drm_gem_object *bo;
75 	u64 iova;
76 };
77 
78 struct a6xx_state_memobj {
79 	struct list_head node;
80 	unsigned long long data[];
81 };
82 
83 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
84 {
85 	struct a6xx_state_memobj *obj =
86 		kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
87 
88 	if (!obj)
89 		return NULL;
90 
91 	list_add_tail(&obj->node, &a6xx_state->objs);
92 	return &obj->data;
93 }
94 
95 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
96 		size_t size)
97 {
98 	void *dst = state_kcalloc(a6xx_state, 1, size);
99 
100 	if (dst)
101 		memcpy(dst, src, size);
102 	return dst;
103 }
104 
105 /*
106  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
107  * the rest for the data
108  */
109 #define A6XX_CD_DATA_OFFSET 8192
110 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
111 
112 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
113 		struct a6xx_crashdumper *dumper)
114 {
115 	dumper->ptr = msm_gem_kernel_new(gpu->dev,
116 		SZ_1M, MSM_BO_WC, gpu->aspace,
117 		&dumper->bo, &dumper->iova);
118 
119 	if (!IS_ERR(dumper->ptr))
120 		msm_gem_object_set_name(dumper->bo, "crashdump");
121 
122 	return PTR_ERR_OR_ZERO(dumper->ptr);
123 }
124 
125 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
126 		struct a6xx_crashdumper *dumper)
127 {
128 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
129 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
130 	u32 val;
131 	int ret;
132 
133 	if (IS_ERR_OR_NULL(dumper->ptr))
134 		return -EINVAL;
135 
136 	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
137 		return -EINVAL;
138 
139 	/* Make sure all pending memory writes are posted */
140 	wmb();
141 
142 	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
143 		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
144 
145 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
146 
147 	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
148 		val & 0x02, 100, 10000);
149 
150 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
151 
152 	return ret;
153 }
154 
155 /* read a value from the GX debug bus */
156 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
157 		u32 *data)
158 {
159 	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
160 		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
161 
162 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
163 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
164 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
165 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
166 
167 	/* Wait 1 us to make sure the data is flowing */
168 	udelay(1);
169 
170 	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
171 	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
172 
173 	return 2;
174 }
175 
176 #define cxdbg_write(ptr, offset, val) \
177 	msm_writel((val), (ptr) + ((offset) << 2))
178 
179 #define cxdbg_read(ptr, offset) \
180 	msm_readl((ptr) + ((offset) << 2))
181 
182 /* read a value from the CX debug bus */
183 static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset,
184 		u32 *data)
185 {
186 	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
187 		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
188 
189 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
190 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
191 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
192 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
193 
194 	/* Wait 1 us to make sure the data is flowing */
195 	udelay(1);
196 
197 	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
198 	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
199 
200 	return 2;
201 }
202 
203 /* Read a chunk of data from the VBIF debug bus */
204 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
205 		u32 reg, int count, u32 *data)
206 {
207 	int i;
208 
209 	gpu_write(gpu, ctrl0, reg);
210 
211 	for (i = 0; i < count; i++) {
212 		gpu_write(gpu, ctrl1, i);
213 		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
214 	}
215 
216 	return count;
217 }
218 
219 #define AXI_ARB_BLOCKS 2
220 #define XIN_AXI_BLOCKS 5
221 #define XIN_CORE_BLOCKS 4
222 
223 #define VBIF_DEBUGBUS_BLOCK_SIZE \
224 	((16 * AXI_ARB_BLOCKS) + \
225 	 (18 * XIN_AXI_BLOCKS) + \
226 	 (12 * XIN_CORE_BLOCKS))
227 
228 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
229 		struct a6xx_gpu_state *a6xx_state,
230 		struct a6xx_gpu_state_obj *obj)
231 {
232 	u32 clk, *ptr;
233 	int i;
234 
235 	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
236 		sizeof(u32));
237 	if (!obj->data)
238 		return;
239 
240 	obj->handle = NULL;
241 
242 	/* Get the current clock setting */
243 	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
244 
245 	/* Force on the bus so we can read it */
246 	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
247 		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
248 
249 	/* We will read from BUS2 first, so disable BUS1 */
250 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
251 
252 	/* Enable the VBIF bus for reading */
253 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
254 
255 	ptr = obj->data;
256 
257 	for (i = 0; i < AXI_ARB_BLOCKS; i++)
258 		ptr += vbif_debugbus_read(gpu,
259 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
260 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
261 			1 << (i + 16), 16, ptr);
262 
263 	for (i = 0; i < XIN_AXI_BLOCKS; i++)
264 		ptr += vbif_debugbus_read(gpu,
265 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
266 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
267 			1 << i, 18, ptr);
268 
269 	/* Stop BUS2 so we can turn on BUS1 */
270 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
271 
272 	for (i = 0; i < XIN_CORE_BLOCKS; i++)
273 		ptr += vbif_debugbus_read(gpu,
274 			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
275 			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
276 			1 << i, 12, ptr);
277 
278 	/* Restore the VBIF clock setting */
279 	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
280 }
281 
282 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
283 		struct a6xx_gpu_state *a6xx_state,
284 		const struct a6xx_debugbus_block *block,
285 		struct a6xx_gpu_state_obj *obj)
286 {
287 	int i;
288 	u32 *ptr;
289 
290 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
291 	if (!obj->data)
292 		return;
293 
294 	obj->handle = block;
295 
296 	for (ptr = obj->data, i = 0; i < block->count; i++)
297 		ptr += debugbus_read(gpu, block->id, i, ptr);
298 }
299 
300 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
301 		struct a6xx_gpu_state *a6xx_state,
302 		const struct a6xx_debugbus_block *block,
303 		struct a6xx_gpu_state_obj *obj)
304 {
305 	int i;
306 	u32 *ptr;
307 
308 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
309 	if (!obj->data)
310 		return;
311 
312 	obj->handle = block;
313 
314 	for (ptr = obj->data, i = 0; i < block->count; i++)
315 		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
316 }
317 
318 static void a6xx_get_debugbus(struct msm_gpu *gpu,
319 		struct a6xx_gpu_state *a6xx_state)
320 {
321 	struct resource *res;
322 	void __iomem *cxdbg = NULL;
323 	int nr_debugbus_blocks;
324 
325 	/* Set up the GX debug bus */
326 
327 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
328 		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
329 
330 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
331 		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
332 
333 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
334 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
335 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
336 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
337 
338 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
339 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
340 
341 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
342 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
343 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
344 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
345 
346 	/* Set up the CX debug bus - it lives elsewhere in the system so do a
347 	 * temporary ioremap for the registers
348 	 */
349 	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
350 			"cx_dbgc");
351 
352 	if (res)
353 		cxdbg = ioremap(res->start, resource_size(res));
354 
355 	if (cxdbg) {
356 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
357 			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
358 
359 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
360 			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
361 
362 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
363 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
364 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
365 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
366 
367 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
368 			0x76543210);
369 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
370 			0xFEDCBA98);
371 
372 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
373 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
374 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
375 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
376 	}
377 
378 	nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
379 		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
380 
381 	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
382 			sizeof(*a6xx_state->debugbus));
383 
384 	if (a6xx_state->debugbus) {
385 		int i;
386 
387 		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
388 			a6xx_get_debugbus_block(gpu,
389 				a6xx_state,
390 				&a6xx_debugbus_blocks[i],
391 				&a6xx_state->debugbus[i]);
392 
393 		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
394 
395 		/*
396 		 * GBIF has same debugbus as of other GPU blocks, fall back to
397 		 * default path if GPU uses GBIF, also GBIF uses exactly same
398 		 * ID as of VBIF.
399 		 */
400 		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
401 			a6xx_get_debugbus_block(gpu, a6xx_state,
402 				&a6xx_gbif_debugbus_block,
403 				&a6xx_state->debugbus[i]);
404 
405 			a6xx_state->nr_debugbus += 1;
406 		}
407 	}
408 
409 	/*  Dump the VBIF debugbus on applicable targets */
410 	if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
411 		a6xx_state->vbif_debugbus =
412 			state_kcalloc(a6xx_state, 1,
413 					sizeof(*a6xx_state->vbif_debugbus));
414 
415 		if (a6xx_state->vbif_debugbus)
416 			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
417 					a6xx_state->vbif_debugbus);
418 	}
419 
420 	if (cxdbg) {
421 		a6xx_state->cx_debugbus =
422 			state_kcalloc(a6xx_state,
423 			ARRAY_SIZE(a6xx_cx_debugbus_blocks),
424 			sizeof(*a6xx_state->cx_debugbus));
425 
426 		if (a6xx_state->cx_debugbus) {
427 			int i;
428 
429 			for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
430 				a6xx_get_cx_debugbus_block(cxdbg,
431 					a6xx_state,
432 					&a6xx_cx_debugbus_blocks[i],
433 					&a6xx_state->cx_debugbus[i]);
434 
435 			a6xx_state->nr_cx_debugbus =
436 				ARRAY_SIZE(a6xx_cx_debugbus_blocks);
437 		}
438 
439 		iounmap(cxdbg);
440 	}
441 }
442 
443 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
444 
445 /* Read a data cluster from behind the AHB aperture */
446 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
447 		struct a6xx_gpu_state *a6xx_state,
448 		const struct a6xx_dbgahb_cluster *dbgahb,
449 		struct a6xx_gpu_state_obj *obj,
450 		struct a6xx_crashdumper *dumper)
451 {
452 	u64 *in = dumper->ptr;
453 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
454 	size_t datasize;
455 	int i, regcount = 0;
456 
457 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
458 		int j;
459 
460 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
461 			(dbgahb->statetype + i * 2) << 8);
462 
463 		for (j = 0; j < dbgahb->count; j += 2) {
464 			int count = RANGE(dbgahb->registers, j);
465 			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
466 				dbgahb->registers[j] - (dbgahb->base >> 2);
467 
468 			in += CRASHDUMP_READ(in, offset, count, out);
469 
470 			out += count * sizeof(u32);
471 
472 			if (i == 0)
473 				regcount += count;
474 		}
475 	}
476 
477 	CRASHDUMP_FINI(in);
478 
479 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
480 
481 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
482 		return;
483 
484 	if (a6xx_crashdumper_run(gpu, dumper))
485 		return;
486 
487 	obj->handle = dbgahb;
488 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
489 		datasize);
490 }
491 
492 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
493 		struct a6xx_gpu_state *a6xx_state,
494 		struct a6xx_crashdumper *dumper)
495 {
496 	int i;
497 
498 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
499 		ARRAY_SIZE(a6xx_dbgahb_clusters),
500 		sizeof(*a6xx_state->dbgahb_clusters));
501 
502 	if (!a6xx_state->dbgahb_clusters)
503 		return;
504 
505 	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
506 
507 	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
508 		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
509 			&a6xx_dbgahb_clusters[i],
510 			&a6xx_state->dbgahb_clusters[i], dumper);
511 }
512 
513 /* Read a data cluster from the CP aperture with the crashdumper */
514 static void a6xx_get_cluster(struct msm_gpu *gpu,
515 		struct a6xx_gpu_state *a6xx_state,
516 		const struct a6xx_cluster *cluster,
517 		struct a6xx_gpu_state_obj *obj,
518 		struct a6xx_crashdumper *dumper)
519 {
520 	u64 *in = dumper->ptr;
521 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
522 	size_t datasize;
523 	int i, regcount = 0;
524 
525 	/* Some clusters need a selector register to be programmed too */
526 	if (cluster->sel_reg)
527 		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
528 
529 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
530 		int j;
531 
532 		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
533 			(cluster->id << 8) | (i << 4) | i);
534 
535 		for (j = 0; j < cluster->count; j += 2) {
536 			int count = RANGE(cluster->registers, j);
537 
538 			in += CRASHDUMP_READ(in, cluster->registers[j],
539 				count, out);
540 
541 			out += count * sizeof(u32);
542 
543 			if (i == 0)
544 				regcount += count;
545 		}
546 	}
547 
548 	CRASHDUMP_FINI(in);
549 
550 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
551 
552 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
553 		return;
554 
555 	if (a6xx_crashdumper_run(gpu, dumper))
556 		return;
557 
558 	obj->handle = cluster;
559 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
560 		datasize);
561 }
562 
563 static void a6xx_get_clusters(struct msm_gpu *gpu,
564 		struct a6xx_gpu_state *a6xx_state,
565 		struct a6xx_crashdumper *dumper)
566 {
567 	int i;
568 
569 	a6xx_state->clusters = state_kcalloc(a6xx_state,
570 		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
571 
572 	if (!a6xx_state->clusters)
573 		return;
574 
575 	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
576 
577 	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
578 		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
579 			&a6xx_state->clusters[i], dumper);
580 }
581 
582 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
583 static void a6xx_get_shader_block(struct msm_gpu *gpu,
584 		struct a6xx_gpu_state *a6xx_state,
585 		const struct a6xx_shader_block *block,
586 		struct a6xx_gpu_state_obj *obj,
587 		struct a6xx_crashdumper *dumper)
588 {
589 	u64 *in = dumper->ptr;
590 	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
591 	int i;
592 
593 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
594 		return;
595 
596 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
597 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
598 			(block->type << 8) | i);
599 
600 		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
601 			block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
602 	}
603 
604 	CRASHDUMP_FINI(in);
605 
606 	if (a6xx_crashdumper_run(gpu, dumper))
607 		return;
608 
609 	obj->handle = block;
610 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
611 		datasize);
612 }
613 
614 static void a6xx_get_shaders(struct msm_gpu *gpu,
615 		struct a6xx_gpu_state *a6xx_state,
616 		struct a6xx_crashdumper *dumper)
617 {
618 	int i;
619 
620 	a6xx_state->shaders = state_kcalloc(a6xx_state,
621 		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
622 
623 	if (!a6xx_state->shaders)
624 		return;
625 
626 	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
627 
628 	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
629 		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
630 			&a6xx_state->shaders[i], dumper);
631 }
632 
633 /* Read registers from behind the HLSQ aperture with the crashdumper */
634 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
635 		struct a6xx_gpu_state *a6xx_state,
636 		const struct a6xx_registers *regs,
637 		struct a6xx_gpu_state_obj *obj,
638 		struct a6xx_crashdumper *dumper)
639 
640 {
641 	u64 *in = dumper->ptr;
642 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
643 	int i, regcount = 0;
644 
645 	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
646 
647 	for (i = 0; i < regs->count; i += 2) {
648 		u32 count = RANGE(regs->registers, i);
649 		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
650 			regs->registers[i] - (regs->val0 >> 2);
651 
652 		in += CRASHDUMP_READ(in, offset, count, out);
653 
654 		out += count * sizeof(u32);
655 		regcount += count;
656 	}
657 
658 	CRASHDUMP_FINI(in);
659 
660 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
661 		return;
662 
663 	if (a6xx_crashdumper_run(gpu, dumper))
664 		return;
665 
666 	obj->handle = regs;
667 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
668 		regcount * sizeof(u32));
669 }
670 
671 /* Read a block of registers using the crashdumper */
672 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
673 		struct a6xx_gpu_state *a6xx_state,
674 		const struct a6xx_registers *regs,
675 		struct a6xx_gpu_state_obj *obj,
676 		struct a6xx_crashdumper *dumper)
677 
678 {
679 	u64 *in = dumper->ptr;
680 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
681 	int i, regcount = 0;
682 
683 	/* Some blocks might need to program a selector register first */
684 	if (regs->val0)
685 		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
686 
687 	for (i = 0; i < regs->count; i += 2) {
688 		u32 count = RANGE(regs->registers, i);
689 
690 		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
691 
692 		out += count * sizeof(u32);
693 		regcount += count;
694 	}
695 
696 	CRASHDUMP_FINI(in);
697 
698 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
699 		return;
700 
701 	if (a6xx_crashdumper_run(gpu, dumper))
702 		return;
703 
704 	obj->handle = regs;
705 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
706 		regcount * sizeof(u32));
707 }
708 
709 /* Read a block of registers via AHB */
710 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
711 		struct a6xx_gpu_state *a6xx_state,
712 		const struct a6xx_registers *regs,
713 		struct a6xx_gpu_state_obj *obj)
714 {
715 	int i, regcount = 0, index = 0;
716 
717 	for (i = 0; i < regs->count; i += 2)
718 		regcount += RANGE(regs->registers, i);
719 
720 	obj->handle = (const void *) regs;
721 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
722 	if (!obj->data)
723 		return;
724 
725 	for (i = 0; i < regs->count; i += 2) {
726 		u32 count = RANGE(regs->registers, i);
727 		int j;
728 
729 		for (j = 0; j < count; j++)
730 			obj->data[index++] = gpu_read(gpu,
731 				regs->registers[i] + j);
732 	}
733 }
734 
735 /* Read a block of GMU registers */
736 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
737 		struct a6xx_gpu_state *a6xx_state,
738 		const struct a6xx_registers *regs,
739 		struct a6xx_gpu_state_obj *obj,
740 		bool rscc)
741 {
742 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
743 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
744 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
745 	int i, regcount = 0, index = 0;
746 
747 	for (i = 0; i < regs->count; i += 2)
748 		regcount += RANGE(regs->registers, i);
749 
750 	obj->handle = (const void *) regs;
751 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
752 	if (!obj->data)
753 		return;
754 
755 	for (i = 0; i < regs->count; i += 2) {
756 		u32 count = RANGE(regs->registers, i);
757 		int j;
758 
759 		for (j = 0; j < count; j++) {
760 			u32 offset = regs->registers[i] + j;
761 			u32 val;
762 
763 			if (rscc)
764 				val = gmu_read_rscc(gmu, offset);
765 			else
766 				val = gmu_read(gmu, offset);
767 
768 			obj->data[index++] = val;
769 		}
770 	}
771 }
772 
773 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
774 		struct a6xx_gpu_state *a6xx_state)
775 {
776 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
777 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
778 
779 	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
780 		2, sizeof(*a6xx_state->gmu_registers));
781 
782 	if (!a6xx_state->gmu_registers)
783 		return;
784 
785 	a6xx_state->nr_gmu_registers = 2;
786 
787 	/* Get the CX GMU registers from AHB */
788 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
789 		&a6xx_state->gmu_registers[0], false);
790 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
791 		&a6xx_state->gmu_registers[1], true);
792 
793 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
794 		return;
795 
796 	/* Set the fence to ALLOW mode so we can access the registers */
797 	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
798 
799 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
800 		&a6xx_state->gmu_registers[2], false);
801 }
802 
803 #define A6XX_GBIF_REGLIST_SIZE   1
804 static void a6xx_get_registers(struct msm_gpu *gpu,
805 		struct a6xx_gpu_state *a6xx_state,
806 		struct a6xx_crashdumper *dumper)
807 {
808 	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
809 		ARRAY_SIZE(a6xx_reglist) +
810 		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
811 	int index = 0;
812 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
813 
814 	a6xx_state->registers = state_kcalloc(a6xx_state,
815 		count, sizeof(*a6xx_state->registers));
816 
817 	if (!a6xx_state->registers)
818 		return;
819 
820 	a6xx_state->nr_registers = count;
821 
822 	for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
823 		a6xx_get_ahb_gpu_registers(gpu,
824 			a6xx_state, &a6xx_ahb_reglist[i],
825 			&a6xx_state->registers[index++]);
826 
827 	if (a6xx_has_gbif(adreno_gpu))
828 		a6xx_get_ahb_gpu_registers(gpu,
829 				a6xx_state, &a6xx_gbif_reglist,
830 				&a6xx_state->registers[index++]);
831 	else
832 		a6xx_get_ahb_gpu_registers(gpu,
833 				a6xx_state, &a6xx_vbif_reglist,
834 				&a6xx_state->registers[index++]);
835 	if (!dumper) {
836 		/*
837 		 * We can't use the crashdumper when the SMMU is stalled,
838 		 * because the GPU has no memory access until we resume
839 		 * translation (but we don't want to do that until after
840 		 * we have captured as much useful GPU state as possible).
841 		 * So instead collect registers via the CPU:
842 		 */
843 		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
844 			a6xx_get_ahb_gpu_registers(gpu,
845 				a6xx_state, &a6xx_reglist[i],
846 				&a6xx_state->registers[index++]);
847 		return;
848 	}
849 
850 	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
851 		a6xx_get_crashdumper_registers(gpu,
852 			a6xx_state, &a6xx_reglist[i],
853 			&a6xx_state->registers[index++],
854 			dumper);
855 
856 	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
857 		a6xx_get_crashdumper_hlsq_registers(gpu,
858 			a6xx_state, &a6xx_hlsq_reglist[i],
859 			&a6xx_state->registers[index++],
860 			dumper);
861 }
862 
863 /* Read a block of data from an indexed register pair */
864 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
865 		struct a6xx_gpu_state *a6xx_state,
866 		const struct a6xx_indexed_registers *indexed,
867 		struct a6xx_gpu_state_obj *obj)
868 {
869 	int i;
870 
871 	obj->handle = (const void *) indexed;
872 	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
873 	if (!obj->data)
874 		return;
875 
876 	/* All the indexed banks start at address 0 */
877 	gpu_write(gpu, indexed->addr, 0);
878 
879 	/* Read the data - each read increments the internal address by 1 */
880 	for (i = 0; i < indexed->count; i++)
881 		obj->data[i] = gpu_read(gpu, indexed->data);
882 }
883 
884 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
885 		struct a6xx_gpu_state *a6xx_state)
886 {
887 	u32 mempool_size;
888 	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
889 	int i;
890 
891 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
892 		sizeof(*a6xx_state->indexed_regs));
893 	if (!a6xx_state->indexed_regs)
894 		return;
895 
896 	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
897 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
898 			&a6xx_state->indexed_regs[i]);
899 
900 	/* Set the CP mempool size to 0 to stabilize it while dumping */
901 	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
902 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
903 
904 	/* Get the contents of the CP mempool */
905 	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
906 		&a6xx_state->indexed_regs[i]);
907 
908 	/*
909 	 * Offset 0x2000 in the mempool is the size - copy the saved size over
910 	 * so the data is consistent
911 	 */
912 	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
913 
914 	/* Restore the size in the hardware */
915 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
916 
917 	a6xx_state->nr_indexed_regs = count;
918 }
919 
920 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
921 {
922 	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
923 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
924 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
925 	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
926 		GFP_KERNEL);
927 	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
928 			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
929 
930 	if (!a6xx_state)
931 		return ERR_PTR(-ENOMEM);
932 
933 	INIT_LIST_HEAD(&a6xx_state->objs);
934 
935 	/* Get the generic state from the adreno core */
936 	adreno_gpu_state_get(gpu, &a6xx_state->base);
937 
938 	a6xx_get_gmu_registers(gpu, a6xx_state);
939 
940 	/* If GX isn't on the rest of the data isn't going to be accessible */
941 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
942 		return &a6xx_state->base;
943 
944 	/* Get the banks of indexed registers */
945 	a6xx_get_indexed_registers(gpu, a6xx_state);
946 
947 	/*
948 	 * Try to initialize the crashdumper, if we are not dumping state
949 	 * with the SMMU stalled.  The crashdumper needs memory access to
950 	 * write out GPU state, so we need to skip this when the SMMU is
951 	 * stalled in response to an iova fault
952 	 */
953 	if (!stalled && !a6xx_crashdumper_init(gpu, &_dumper)) {
954 		dumper = &_dumper;
955 	}
956 
957 	a6xx_get_registers(gpu, a6xx_state, dumper);
958 
959 	if (dumper) {
960 		a6xx_get_shaders(gpu, a6xx_state, dumper);
961 		a6xx_get_clusters(gpu, a6xx_state, dumper);
962 		a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
963 
964 		msm_gem_kernel_put(dumper->bo, gpu->aspace);
965 	}
966 
967 	if (snapshot_debugbus)
968 		a6xx_get_debugbus(gpu, a6xx_state);
969 
970 	return  &a6xx_state->base;
971 }
972 
973 static void a6xx_gpu_state_destroy(struct kref *kref)
974 {
975 	struct a6xx_state_memobj *obj, *tmp;
976 	struct msm_gpu_state *state = container_of(kref,
977 			struct msm_gpu_state, ref);
978 	struct a6xx_gpu_state *a6xx_state = container_of(state,
979 			struct a6xx_gpu_state, base);
980 
981 	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
982 		kfree(obj);
983 
984 	adreno_gpu_state_destroy(state);
985 	kfree(a6xx_state);
986 }
987 
988 int a6xx_gpu_state_put(struct msm_gpu_state *state)
989 {
990 	if (IS_ERR_OR_NULL(state))
991 		return 1;
992 
993 	return kref_put(&state->ref, a6xx_gpu_state_destroy);
994 }
995 
996 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
997 		struct drm_printer *p)
998 {
999 	int i, index = 0;
1000 
1001 	if (!data)
1002 		return;
1003 
1004 	for (i = 0; i < count; i += 2) {
1005 		u32 count = RANGE(registers, i);
1006 		u32 offset = registers[i];
1007 		int j;
1008 
1009 		for (j = 0; j < count; index++, offset++, j++) {
1010 			if (data[index] == 0xdeafbead)
1011 				continue;
1012 
1013 			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1014 				offset << 2, data[index]);
1015 		}
1016 	}
1017 }
1018 
1019 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1020 {
1021 	char out[ASCII85_BUFSZ];
1022 	long i, l, datalen = 0;
1023 
1024 	for (i = 0; i < len >> 2; i++) {
1025 		if (data[i])
1026 			datalen = (i + 1) << 2;
1027 	}
1028 
1029 	if (datalen == 0)
1030 		return;
1031 
1032 	drm_puts(p, "    data: !!ascii85 |\n");
1033 	drm_puts(p, "      ");
1034 
1035 
1036 	l = ascii85_encode_len(datalen);
1037 
1038 	for (i = 0; i < l; i++)
1039 		drm_puts(p, ascii85_encode(data[i], out));
1040 
1041 	drm_puts(p, "\n");
1042 }
1043 
1044 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1045 {
1046 	drm_puts(p, fmt);
1047 	drm_puts(p, name);
1048 	drm_puts(p, "\n");
1049 }
1050 
1051 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1052 		struct drm_printer *p)
1053 {
1054 	const struct a6xx_shader_block *block = obj->handle;
1055 	int i;
1056 
1057 	if (!obj->handle)
1058 		return;
1059 
1060 	print_name(p, "  - type: ", block->name);
1061 
1062 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1063 		drm_printf(p, "    - bank: %d\n", i);
1064 		drm_printf(p, "      size: %d\n", block->size);
1065 
1066 		if (!obj->data)
1067 			continue;
1068 
1069 		print_ascii85(p, block->size << 2,
1070 			obj->data + (block->size * i));
1071 	}
1072 }
1073 
1074 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1075 		struct drm_printer *p)
1076 {
1077 	int ctx, index = 0;
1078 
1079 	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1080 		int j;
1081 
1082 		drm_printf(p, "    - context: %d\n", ctx);
1083 
1084 		for (j = 0; j < size; j += 2) {
1085 			u32 count = RANGE(registers, j);
1086 			u32 offset = registers[j];
1087 			int k;
1088 
1089 			for (k = 0; k < count; index++, offset++, k++) {
1090 				if (data[index] == 0xdeafbead)
1091 					continue;
1092 
1093 				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1094 					offset << 2, data[index]);
1095 			}
1096 		}
1097 	}
1098 }
1099 
1100 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1101 		struct drm_printer *p)
1102 {
1103 	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1104 
1105 	if (dbgahb) {
1106 		print_name(p, "  - cluster-name: ", dbgahb->name);
1107 		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1108 			obj->data, p);
1109 	}
1110 }
1111 
1112 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1113 		struct drm_printer *p)
1114 {
1115 	const struct a6xx_cluster *cluster = obj->handle;
1116 
1117 	if (cluster) {
1118 		print_name(p, "  - cluster-name: ", cluster->name);
1119 		a6xx_show_cluster_data(cluster->registers, cluster->count,
1120 			obj->data, p);
1121 	}
1122 }
1123 
1124 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1125 		struct drm_printer *p)
1126 {
1127 	const struct a6xx_indexed_registers *indexed = obj->handle;
1128 
1129 	if (!indexed)
1130 		return;
1131 
1132 	print_name(p, "  - regs-name: ", indexed->name);
1133 	drm_printf(p, "    dwords: %d\n", indexed->count);
1134 
1135 	print_ascii85(p, indexed->count << 2, obj->data);
1136 }
1137 
1138 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1139 		u32 *data, struct drm_printer *p)
1140 {
1141 	if (block) {
1142 		print_name(p, "  - debugbus-block: ", block->name);
1143 
1144 		/*
1145 		 * count for regular debugbus data is in quadwords,
1146 		 * but print the size in dwords for consistency
1147 		 */
1148 		drm_printf(p, "    count: %d\n", block->count << 1);
1149 
1150 		print_ascii85(p, block->count << 3, data);
1151 	}
1152 }
1153 
1154 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1155 		struct drm_printer *p)
1156 {
1157 	int i;
1158 
1159 	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1160 		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1161 
1162 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1163 	}
1164 
1165 	if (a6xx_state->vbif_debugbus) {
1166 		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1167 
1168 		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1169 		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1170 
1171 		/* vbif debugbus data is in dwords.  Confusing, huh? */
1172 		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1173 	}
1174 
1175 	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1176 		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1177 
1178 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1179 	}
1180 }
1181 
1182 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1183 		struct drm_printer *p)
1184 {
1185 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1186 			struct a6xx_gpu_state, base);
1187 	int i;
1188 
1189 	if (IS_ERR_OR_NULL(state))
1190 		return;
1191 
1192 	adreno_show(gpu, state, p);
1193 
1194 	drm_puts(p, "registers:\n");
1195 	for (i = 0; i < a6xx_state->nr_registers; i++) {
1196 		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1197 		const struct a6xx_registers *regs = obj->handle;
1198 
1199 		if (!obj->handle)
1200 			continue;
1201 
1202 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1203 	}
1204 
1205 	drm_puts(p, "registers-gmu:\n");
1206 	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1207 		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1208 		const struct a6xx_registers *regs = obj->handle;
1209 
1210 		if (!obj->handle)
1211 			continue;
1212 
1213 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1214 	}
1215 
1216 	drm_puts(p, "indexed-registers:\n");
1217 	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1218 		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1219 
1220 	drm_puts(p, "shader-blocks:\n");
1221 	for (i = 0; i < a6xx_state->nr_shaders; i++)
1222 		a6xx_show_shader(&a6xx_state->shaders[i], p);
1223 
1224 	drm_puts(p, "clusters:\n");
1225 	for (i = 0; i < a6xx_state->nr_clusters; i++)
1226 		a6xx_show_cluster(&a6xx_state->clusters[i], p);
1227 
1228 	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1229 		a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1230 
1231 	drm_puts(p, "debugbus:\n");
1232 	a6xx_show_debugbus(a6xx_state, p);
1233 }
1234