xref: /linux/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c (revision 173b0b5b0e865348684c02bd9cb1d22b5d46e458)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3 
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10 
11 /* Ignore diagnostics about register tables that we aren't using yet. We don't
12  * want to modify these headers too much from their original source.
13  */
14 #pragma GCC diagnostic push
15 #pragma GCC diagnostic ignored "-Wunused-variable"
16 
17 #include "adreno_gen7_0_0_snapshot.h"
18 #include "adreno_gen7_2_0_snapshot.h"
19 
20 #pragma GCC diagnostic pop
21 
22 struct a6xx_gpu_state_obj {
23 	const void *handle;
24 	u32 *data;
25 };
26 
27 struct a6xx_gpu_state {
28 	struct msm_gpu_state base;
29 
30 	struct a6xx_gpu_state_obj *gmu_registers;
31 	int nr_gmu_registers;
32 
33 	struct a6xx_gpu_state_obj *registers;
34 	int nr_registers;
35 
36 	struct a6xx_gpu_state_obj *shaders;
37 	int nr_shaders;
38 
39 	struct a6xx_gpu_state_obj *clusters;
40 	int nr_clusters;
41 
42 	struct a6xx_gpu_state_obj *dbgahb_clusters;
43 	int nr_dbgahb_clusters;
44 
45 	struct a6xx_gpu_state_obj *indexed_regs;
46 	int nr_indexed_regs;
47 
48 	struct a6xx_gpu_state_obj *debugbus;
49 	int nr_debugbus;
50 
51 	struct a6xx_gpu_state_obj *vbif_debugbus;
52 
53 	struct a6xx_gpu_state_obj *cx_debugbus;
54 	int nr_cx_debugbus;
55 
56 	struct msm_gpu_state_bo *gmu_log;
57 	struct msm_gpu_state_bo *gmu_hfi;
58 	struct msm_gpu_state_bo *gmu_debug;
59 
60 	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
61 
62 	struct list_head objs;
63 
64 	bool gpu_initialized;
65 };
66 
67 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
68 {
69 	in[0] = val;
70 	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
71 
72 	return 2;
73 }
74 
75 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
76 {
77 	in[0] = target;
78 	in[1] = (((u64) reg) << 44 | dwords);
79 
80 	return 2;
81 }
82 
83 static inline int CRASHDUMP_FINI(u64 *in)
84 {
85 	in[0] = 0;
86 	in[1] = 0;
87 
88 	return 2;
89 }
90 
91 struct a6xx_crashdumper {
92 	void *ptr;
93 	struct drm_gem_object *bo;
94 	u64 iova;
95 };
96 
97 struct a6xx_state_memobj {
98 	struct list_head node;
99 	unsigned long long data[];
100 };
101 
102 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
103 {
104 	struct a6xx_state_memobj *obj =
105 		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
106 
107 	if (!obj)
108 		return NULL;
109 
110 	list_add_tail(&obj->node, &a6xx_state->objs);
111 	return &obj->data;
112 }
113 
114 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
115 		size_t size)
116 {
117 	void *dst = state_kcalloc(a6xx_state, 1, size);
118 
119 	if (dst)
120 		memcpy(dst, src, size);
121 	return dst;
122 }
123 
124 /*
125  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
126  * the rest for the data
127  */
128 #define A6XX_CD_DATA_OFFSET 8192
129 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
130 
131 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
132 		struct a6xx_crashdumper *dumper)
133 {
134 	dumper->ptr = msm_gem_kernel_new(gpu->dev,
135 		SZ_1M, MSM_BO_WC, gpu->aspace,
136 		&dumper->bo, &dumper->iova);
137 
138 	if (!IS_ERR(dumper->ptr))
139 		msm_gem_object_set_name(dumper->bo, "crashdump");
140 
141 	return PTR_ERR_OR_ZERO(dumper->ptr);
142 }
143 
144 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
145 		struct a6xx_crashdumper *dumper)
146 {
147 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
148 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
149 	u32 val;
150 	int ret;
151 
152 	if (IS_ERR_OR_NULL(dumper->ptr))
153 		return -EINVAL;
154 
155 	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
156 		return -EINVAL;
157 
158 	/* Make sure all pending memory writes are posted */
159 	wmb();
160 
161 	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
162 
163 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
164 
165 	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
166 		val & 0x02, 100, 10000);
167 
168 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
169 
170 	return ret;
171 }
172 
173 /* read a value from the GX debug bus */
174 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
175 		u32 *data)
176 {
177 	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
178 		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
179 
180 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
181 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
182 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
183 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
184 
185 	/* Wait 1 us to make sure the data is flowing */
186 	udelay(1);
187 
188 	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
189 	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
190 
191 	return 2;
192 }
193 
194 #define cxdbg_write(ptr, offset, val) \
195 	msm_writel((val), (ptr) + ((offset) << 2))
196 
197 #define cxdbg_read(ptr, offset) \
198 	msm_readl((ptr) + ((offset) << 2))
199 
200 /* read a value from the CX debug bus */
201 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
202 		u32 *data)
203 {
204 	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
205 		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
206 
207 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
208 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
209 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
210 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
211 
212 	/* Wait 1 us to make sure the data is flowing */
213 	udelay(1);
214 
215 	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
216 	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
217 
218 	return 2;
219 }
220 
221 /* Read a chunk of data from the VBIF debug bus */
222 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
223 		u32 reg, int count, u32 *data)
224 {
225 	int i;
226 
227 	gpu_write(gpu, ctrl0, reg);
228 
229 	for (i = 0; i < count; i++) {
230 		gpu_write(gpu, ctrl1, i);
231 		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
232 	}
233 
234 	return count;
235 }
236 
237 #define AXI_ARB_BLOCKS 2
238 #define XIN_AXI_BLOCKS 5
239 #define XIN_CORE_BLOCKS 4
240 
241 #define VBIF_DEBUGBUS_BLOCK_SIZE \
242 	((16 * AXI_ARB_BLOCKS) + \
243 	 (18 * XIN_AXI_BLOCKS) + \
244 	 (12 * XIN_CORE_BLOCKS))
245 
246 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
247 		struct a6xx_gpu_state *a6xx_state,
248 		struct a6xx_gpu_state_obj *obj)
249 {
250 	u32 clk, *ptr;
251 	int i;
252 
253 	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
254 		sizeof(u32));
255 	if (!obj->data)
256 		return;
257 
258 	obj->handle = NULL;
259 
260 	/* Get the current clock setting */
261 	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
262 
263 	/* Force on the bus so we can read it */
264 	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
265 		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
266 
267 	/* We will read from BUS2 first, so disable BUS1 */
268 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
269 
270 	/* Enable the VBIF bus for reading */
271 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
272 
273 	ptr = obj->data;
274 
275 	for (i = 0; i < AXI_ARB_BLOCKS; i++)
276 		ptr += vbif_debugbus_read(gpu,
277 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
278 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
279 			1 << (i + 16), 16, ptr);
280 
281 	for (i = 0; i < XIN_AXI_BLOCKS; i++)
282 		ptr += vbif_debugbus_read(gpu,
283 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
284 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
285 			1 << i, 18, ptr);
286 
287 	/* Stop BUS2 so we can turn on BUS1 */
288 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
289 
290 	for (i = 0; i < XIN_CORE_BLOCKS; i++)
291 		ptr += vbif_debugbus_read(gpu,
292 			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
293 			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
294 			1 << i, 12, ptr);
295 
296 	/* Restore the VBIF clock setting */
297 	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
298 }
299 
300 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
301 		struct a6xx_gpu_state *a6xx_state,
302 		const struct a6xx_debugbus_block *block,
303 		struct a6xx_gpu_state_obj *obj)
304 {
305 	int i;
306 	u32 *ptr;
307 
308 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
309 	if (!obj->data)
310 		return;
311 
312 	obj->handle = block;
313 
314 	for (ptr = obj->data, i = 0; i < block->count; i++)
315 		ptr += debugbus_read(gpu, block->id, i, ptr);
316 }
317 
318 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
319 		struct a6xx_gpu_state *a6xx_state,
320 		const struct a6xx_debugbus_block *block,
321 		struct a6xx_gpu_state_obj *obj)
322 {
323 	int i;
324 	u32 *ptr;
325 
326 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
327 	if (!obj->data)
328 		return;
329 
330 	obj->handle = block;
331 
332 	for (ptr = obj->data, i = 0; i < block->count; i++)
333 		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
334 }
335 
336 static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
337 		struct a6xx_gpu_state *a6xx_state)
338 {
339 	int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
340 		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
341 
342 	if (adreno_is_a650_family(to_adreno_gpu(gpu)))
343 		nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
344 
345 	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
346 			sizeof(*a6xx_state->debugbus));
347 
348 	if (a6xx_state->debugbus) {
349 		int i;
350 
351 		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
352 			a6xx_get_debugbus_block(gpu,
353 				a6xx_state,
354 				&a6xx_debugbus_blocks[i],
355 				&a6xx_state->debugbus[i]);
356 
357 		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
358 
359 		/*
360 		 * GBIF has same debugbus as of other GPU blocks, fall back to
361 		 * default path if GPU uses GBIF, also GBIF uses exactly same
362 		 * ID as of VBIF.
363 		 */
364 		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
365 			a6xx_get_debugbus_block(gpu, a6xx_state,
366 				&a6xx_gbif_debugbus_block,
367 				&a6xx_state->debugbus[i]);
368 
369 			a6xx_state->nr_debugbus += 1;
370 		}
371 
372 
373 		if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
374 			for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
375 				a6xx_get_debugbus_block(gpu,
376 					a6xx_state,
377 					&a650_debugbus_blocks[i],
378 					&a6xx_state->debugbus[i]);
379 		}
380 	}
381 }
382 
383 static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
384 		struct a6xx_gpu_state *a6xx_state)
385 {
386 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
387 	int debugbus_blocks_count, total_debugbus_blocks;
388 	const u32 *debugbus_blocks;
389 	int i;
390 
391 	if (adreno_is_a730(adreno_gpu)) {
392 		debugbus_blocks = gen7_0_0_debugbus_blocks;
393 		debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
394 	} else {
395 		BUG_ON(!adreno_is_a740_family(adreno_gpu));
396 		debugbus_blocks = gen7_2_0_debugbus_blocks;
397 		debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
398 	}
399 
400 	total_debugbus_blocks = debugbus_blocks_count +
401 		ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
402 
403 	a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks,
404 			sizeof(*a6xx_state->debugbus));
405 
406 	if (a6xx_state->debugbus) {
407 		for (i = 0; i < debugbus_blocks_count; i++) {
408 			a6xx_get_debugbus_block(gpu,
409 				a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]],
410 				&a6xx_state->debugbus[i]);
411 		}
412 
413 		for (i = 0; i < ARRAY_SIZE(a7xx_gbif_debugbus_blocks); i++) {
414 			a6xx_get_debugbus_block(gpu,
415 				a6xx_state, &a7xx_gbif_debugbus_blocks[i],
416 				&a6xx_state->debugbus[i + debugbus_blocks_count]);
417 		}
418 	}
419 
420 }
421 
422 static void a6xx_get_debugbus(struct msm_gpu *gpu,
423 		struct a6xx_gpu_state *a6xx_state)
424 {
425 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
426 	struct resource *res;
427 	void __iomem *cxdbg = NULL;
428 
429 	/* Set up the GX debug bus */
430 
431 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
432 		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
433 
434 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
435 		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
436 
437 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
438 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
439 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
440 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
441 
442 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
443 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
444 
445 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
446 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
447 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
448 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
449 
450 	/* Set up the CX debug bus - it lives elsewhere in the system so do a
451 	 * temporary ioremap for the registers
452 	 */
453 	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
454 			"cx_dbgc");
455 
456 	if (res)
457 		cxdbg = ioremap(res->start, resource_size(res));
458 
459 	if (cxdbg) {
460 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
461 			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
462 
463 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
464 			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
465 
466 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
467 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
468 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
469 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
470 
471 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
472 			0x76543210);
473 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
474 			0xFEDCBA98);
475 
476 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
477 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
478 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
479 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
480 	}
481 
482 	if (adreno_is_a7xx(adreno_gpu)) {
483 		a7xx_get_debugbus_blocks(gpu, a6xx_state);
484 	} else {
485 		a6xx_get_debugbus_blocks(gpu, a6xx_state);
486 	}
487 
488 	/*  Dump the VBIF debugbus on applicable targets */
489 	if (!a6xx_has_gbif(adreno_gpu)) {
490 		a6xx_state->vbif_debugbus =
491 			state_kcalloc(a6xx_state, 1,
492 					sizeof(*a6xx_state->vbif_debugbus));
493 
494 		if (a6xx_state->vbif_debugbus)
495 			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
496 					a6xx_state->vbif_debugbus);
497 	}
498 
499 	if (cxdbg) {
500 		unsigned nr_cx_debugbus_blocks;
501 		const struct a6xx_debugbus_block *cx_debugbus_blocks;
502 
503 		if (adreno_is_a7xx(adreno_gpu)) {
504 			BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
505 			cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
506 			nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
507 		} else {
508 			cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
509 			nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
510 		}
511 
512 		a6xx_state->cx_debugbus =
513 			state_kcalloc(a6xx_state,
514 			nr_cx_debugbus_blocks,
515 			sizeof(*a6xx_state->cx_debugbus));
516 
517 		if (a6xx_state->cx_debugbus) {
518 			int i;
519 
520 			for (i = 0; i < nr_cx_debugbus_blocks; i++)
521 				a6xx_get_cx_debugbus_block(cxdbg,
522 					a6xx_state,
523 					&cx_debugbus_blocks[i],
524 					&a6xx_state->cx_debugbus[i]);
525 
526 			a6xx_state->nr_cx_debugbus =
527 				nr_cx_debugbus_blocks;
528 		}
529 
530 		iounmap(cxdbg);
531 	}
532 }
533 
534 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
535 
536 /* Read a data cluster from behind the AHB aperture */
537 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
538 		struct a6xx_gpu_state *a6xx_state,
539 		const struct a6xx_dbgahb_cluster *dbgahb,
540 		struct a6xx_gpu_state_obj *obj,
541 		struct a6xx_crashdumper *dumper)
542 {
543 	u64 *in = dumper->ptr;
544 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
545 	size_t datasize;
546 	int i, regcount = 0;
547 
548 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
549 		int j;
550 
551 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
552 			(dbgahb->statetype + i * 2) << 8);
553 
554 		for (j = 0; j < dbgahb->count; j += 2) {
555 			int count = RANGE(dbgahb->registers, j);
556 			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
557 				dbgahb->registers[j] - (dbgahb->base >> 2);
558 
559 			in += CRASHDUMP_READ(in, offset, count, out);
560 
561 			out += count * sizeof(u32);
562 
563 			if (i == 0)
564 				regcount += count;
565 		}
566 	}
567 
568 	CRASHDUMP_FINI(in);
569 
570 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
571 
572 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
573 		return;
574 
575 	if (a6xx_crashdumper_run(gpu, dumper))
576 		return;
577 
578 	obj->handle = dbgahb;
579 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
580 		datasize);
581 }
582 
583 static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
584 		struct a6xx_gpu_state *a6xx_state,
585 		const struct gen7_sptp_cluster_registers *dbgahb,
586 		struct a6xx_gpu_state_obj *obj,
587 		struct a6xx_crashdumper *dumper)
588 {
589 	u64 *in = dumper->ptr;
590 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
591 	size_t datasize;
592 	int i, regcount = 0;
593 
594 	in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
595 		A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) |
596 		A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) |
597 		A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype));
598 
599 	for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) {
600 		int count = RANGE(dbgahb->regs, i);
601 		u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
602 			dbgahb->regs[i] - dbgahb->regbase;
603 
604 		in += CRASHDUMP_READ(in, offset, count, out);
605 
606 		out += count * sizeof(u32);
607 		regcount += count;
608 	}
609 
610 	CRASHDUMP_FINI(in);
611 
612 	datasize = regcount * sizeof(u32);
613 
614 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
615 		return;
616 
617 	if (a6xx_crashdumper_run(gpu, dumper))
618 		return;
619 
620 	obj->handle = dbgahb;
621 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
622 		datasize);
623 }
624 
625 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
626 		struct a6xx_gpu_state *a6xx_state,
627 		struct a6xx_crashdumper *dumper)
628 {
629 	int i;
630 
631 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
632 		ARRAY_SIZE(a6xx_dbgahb_clusters),
633 		sizeof(*a6xx_state->dbgahb_clusters));
634 
635 	if (!a6xx_state->dbgahb_clusters)
636 		return;
637 
638 	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
639 
640 	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
641 		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
642 			&a6xx_dbgahb_clusters[i],
643 			&a6xx_state->dbgahb_clusters[i], dumper);
644 }
645 
646 static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
647 		struct a6xx_gpu_state *a6xx_state,
648 		struct a6xx_crashdumper *dumper)
649 {
650 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
651 	int i;
652 	const struct gen7_sptp_cluster_registers *dbgahb_clusters;
653 	unsigned dbgahb_clusters_size;
654 
655 	if (adreno_is_a730(adreno_gpu)) {
656 		dbgahb_clusters = gen7_0_0_sptp_clusters;
657 		dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
658 	} else {
659 		BUG_ON(!adreno_is_a740_family(adreno_gpu));
660 		dbgahb_clusters = gen7_2_0_sptp_clusters;
661 		dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
662 	}
663 
664 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
665 		dbgahb_clusters_size,
666 		sizeof(*a6xx_state->dbgahb_clusters));
667 
668 	if (!a6xx_state->dbgahb_clusters)
669 		return;
670 
671 	a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
672 
673 	for (i = 0; i < dbgahb_clusters_size; i++)
674 		a7xx_get_dbgahb_cluster(gpu, a6xx_state,
675 			&dbgahb_clusters[i],
676 			&a6xx_state->dbgahb_clusters[i], dumper);
677 }
678 
679 /* Read a data cluster from the CP aperture with the crashdumper */
680 static void a6xx_get_cluster(struct msm_gpu *gpu,
681 		struct a6xx_gpu_state *a6xx_state,
682 		const struct a6xx_cluster *cluster,
683 		struct a6xx_gpu_state_obj *obj,
684 		struct a6xx_crashdumper *dumper)
685 {
686 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
687 	u64 *in = dumper->ptr;
688 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
689 	size_t datasize;
690 	int i, regcount = 0;
691 	u32 id = cluster->id;
692 
693 	/* Skip registers that are not present on older generation */
694 	if (!adreno_is_a660_family(adreno_gpu) &&
695 			cluster->registers == a660_fe_cluster)
696 		return;
697 
698 	if (adreno_is_a650_family(adreno_gpu) &&
699 			cluster->registers == a6xx_ps_cluster)
700 		id = CLUSTER_VPC_PS;
701 
702 	/* Some clusters need a selector register to be programmed too */
703 	if (cluster->sel_reg)
704 		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
705 
706 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
707 		int j;
708 
709 		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
710 			(id << 8) | (i << 4) | i);
711 
712 		for (j = 0; j < cluster->count; j += 2) {
713 			int count = RANGE(cluster->registers, j);
714 
715 			in += CRASHDUMP_READ(in, cluster->registers[j],
716 				count, out);
717 
718 			out += count * sizeof(u32);
719 
720 			if (i == 0)
721 				regcount += count;
722 		}
723 	}
724 
725 	CRASHDUMP_FINI(in);
726 
727 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
728 
729 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
730 		return;
731 
732 	if (a6xx_crashdumper_run(gpu, dumper))
733 		return;
734 
735 	obj->handle = cluster;
736 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
737 		datasize);
738 }
739 
740 static void a7xx_get_cluster(struct msm_gpu *gpu,
741 		struct a6xx_gpu_state *a6xx_state,
742 		const struct gen7_cluster_registers *cluster,
743 		struct a6xx_gpu_state_obj *obj,
744 		struct a6xx_crashdumper *dumper)
745 {
746 	u64 *in = dumper->ptr;
747 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
748 	size_t datasize;
749 	int i, regcount = 0;
750 
751 	/* Some clusters need a selector register to be programmed too */
752 	if (cluster->sel)
753 		in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val);
754 
755 	in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
756 		A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) |
757 		A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) |
758 		A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id));
759 
760 	for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) {
761 		int count = RANGE(cluster->regs, i);
762 
763 		in += CRASHDUMP_READ(in, cluster->regs[i],
764 			count, out);
765 
766 		out += count * sizeof(u32);
767 		regcount += count;
768 	}
769 
770 	CRASHDUMP_FINI(in);
771 
772 	datasize = regcount * sizeof(u32);
773 
774 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
775 		return;
776 
777 	if (a6xx_crashdumper_run(gpu, dumper))
778 		return;
779 
780 	obj->handle = cluster;
781 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
782 		datasize);
783 }
784 
785 static void a6xx_get_clusters(struct msm_gpu *gpu,
786 		struct a6xx_gpu_state *a6xx_state,
787 		struct a6xx_crashdumper *dumper)
788 {
789 	int i;
790 
791 	a6xx_state->clusters = state_kcalloc(a6xx_state,
792 		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
793 
794 	if (!a6xx_state->clusters)
795 		return;
796 
797 	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
798 
799 	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
800 		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
801 			&a6xx_state->clusters[i], dumper);
802 }
803 
804 static void a7xx_get_clusters(struct msm_gpu *gpu,
805 		struct a6xx_gpu_state *a6xx_state,
806 		struct a6xx_crashdumper *dumper)
807 {
808 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
809 	int i;
810 	const struct gen7_cluster_registers *clusters;
811 	unsigned clusters_size;
812 
813 	if (adreno_is_a730(adreno_gpu)) {
814 		clusters = gen7_0_0_clusters;
815 		clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
816 	} else {
817 		BUG_ON(!adreno_is_a740_family(adreno_gpu));
818 		clusters = gen7_2_0_clusters;
819 		clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
820 	}
821 
822 	a6xx_state->clusters = state_kcalloc(a6xx_state,
823 		clusters_size, sizeof(*a6xx_state->clusters));
824 
825 	if (!a6xx_state->clusters)
826 		return;
827 
828 	a6xx_state->nr_clusters = clusters_size;
829 
830 	for (i = 0; i < clusters_size; i++)
831 		a7xx_get_cluster(gpu, a6xx_state, &clusters[i],
832 			&a6xx_state->clusters[i], dumper);
833 }
834 
835 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
836 static void a6xx_get_shader_block(struct msm_gpu *gpu,
837 		struct a6xx_gpu_state *a6xx_state,
838 		const struct a6xx_shader_block *block,
839 		struct a6xx_gpu_state_obj *obj,
840 		struct a6xx_crashdumper *dumper)
841 {
842 	u64 *in = dumper->ptr;
843 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
844 	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
845 	int i;
846 
847 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
848 		return;
849 
850 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
851 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
852 			(block->type << 8) | i);
853 
854 		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
855 			block->size, out);
856 
857 		out += block->size * sizeof(u32);
858 	}
859 
860 	CRASHDUMP_FINI(in);
861 
862 	if (a6xx_crashdumper_run(gpu, dumper))
863 		return;
864 
865 	obj->handle = block;
866 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
867 		datasize);
868 }
869 
870 static void a7xx_get_shader_block(struct msm_gpu *gpu,
871 		struct a6xx_gpu_state *a6xx_state,
872 		const struct gen7_shader_block *block,
873 		struct a6xx_gpu_state_obj *obj,
874 		struct a6xx_crashdumper *dumper)
875 {
876 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
877 	u64 *in = dumper->ptr;
878 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
879 	size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
880 	int i, j;
881 
882 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
883 		return;
884 
885 	if (adreno_is_a730(adreno_gpu)) {
886 		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3);
887 	}
888 
889 	for (i = 0; i < block->num_sps; i++) {
890 		for (j = 0; j < block->num_usptps; j++) {
891 			in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
892 				A7XX_SP_READ_SEL_LOCATION(block->location) |
893 				A7XX_SP_READ_SEL_PIPE(block->pipeid) |
894 				A7XX_SP_READ_SEL_STATETYPE(block->statetype) |
895 				A7XX_SP_READ_SEL_USPTP(j) |
896 				A7XX_SP_READ_SEL_SPTP(i));
897 
898 			in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
899 				block->size, out);
900 
901 			out += block->size * sizeof(u32);
902 		}
903 	}
904 
905 	CRASHDUMP_FINI(in);
906 
907 	if (a6xx_crashdumper_run(gpu, dumper))
908 		goto out;
909 
910 	obj->handle = block;
911 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
912 		datasize);
913 
914 out:
915 	if (adreno_is_a730(adreno_gpu)) {
916 		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0);
917 	}
918 }
919 
920 static void a6xx_get_shaders(struct msm_gpu *gpu,
921 		struct a6xx_gpu_state *a6xx_state,
922 		struct a6xx_crashdumper *dumper)
923 {
924 	int i;
925 
926 	a6xx_state->shaders = state_kcalloc(a6xx_state,
927 		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
928 
929 	if (!a6xx_state->shaders)
930 		return;
931 
932 	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
933 
934 	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
935 		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
936 			&a6xx_state->shaders[i], dumper);
937 }
938 
939 static void a7xx_get_shaders(struct msm_gpu *gpu,
940 		struct a6xx_gpu_state *a6xx_state,
941 		struct a6xx_crashdumper *dumper)
942 {
943 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
944 	const struct gen7_shader_block *shader_blocks;
945 	unsigned num_shader_blocks;
946 	int i;
947 
948 	if (adreno_is_a730(adreno_gpu)) {
949 		shader_blocks = gen7_0_0_shader_blocks;
950 		num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
951 	} else {
952 		BUG_ON(!adreno_is_a740_family(adreno_gpu));
953 		shader_blocks = gen7_2_0_shader_blocks;
954 		num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
955 	}
956 
957 	a6xx_state->shaders = state_kcalloc(a6xx_state,
958 		num_shader_blocks, sizeof(*a6xx_state->shaders));
959 
960 	if (!a6xx_state->shaders)
961 		return;
962 
963 	a6xx_state->nr_shaders = num_shader_blocks;
964 
965 	for (i = 0; i < num_shader_blocks; i++)
966 		a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i],
967 			&a6xx_state->shaders[i], dumper);
968 }
969 
970 /* Read registers from behind the HLSQ aperture with the crashdumper */
971 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
972 		struct a6xx_gpu_state *a6xx_state,
973 		const struct a6xx_registers *regs,
974 		struct a6xx_gpu_state_obj *obj,
975 		struct a6xx_crashdumper *dumper)
976 
977 {
978 	u64 *in = dumper->ptr;
979 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
980 	int i, regcount = 0;
981 
982 	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
983 
984 	for (i = 0; i < regs->count; i += 2) {
985 		u32 count = RANGE(regs->registers, i);
986 		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
987 			regs->registers[i] - (regs->val0 >> 2);
988 
989 		in += CRASHDUMP_READ(in, offset, count, out);
990 
991 		out += count * sizeof(u32);
992 		regcount += count;
993 	}
994 
995 	CRASHDUMP_FINI(in);
996 
997 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
998 		return;
999 
1000 	if (a6xx_crashdumper_run(gpu, dumper))
1001 		return;
1002 
1003 	obj->handle = regs;
1004 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1005 		regcount * sizeof(u32));
1006 }
1007 
1008 /* Read a block of registers using the crashdumper */
1009 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1010 		struct a6xx_gpu_state *a6xx_state,
1011 		const struct a6xx_registers *regs,
1012 		struct a6xx_gpu_state_obj *obj,
1013 		struct a6xx_crashdumper *dumper)
1014 
1015 {
1016 	u64 *in = dumper->ptr;
1017 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1018 	int i, regcount = 0;
1019 
1020 	/* Skip unsupported registers on older generations */
1021 	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1022 			(regs->registers == a660_registers))
1023 		return;
1024 
1025 	/* Some blocks might need to program a selector register first */
1026 	if (regs->val0)
1027 		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
1028 
1029 	for (i = 0; i < regs->count; i += 2) {
1030 		u32 count = RANGE(regs->registers, i);
1031 
1032 		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
1033 
1034 		out += count * sizeof(u32);
1035 		regcount += count;
1036 	}
1037 
1038 	CRASHDUMP_FINI(in);
1039 
1040 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1041 		return;
1042 
1043 	if (a6xx_crashdumper_run(gpu, dumper))
1044 		return;
1045 
1046 	obj->handle = regs;
1047 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1048 		regcount * sizeof(u32));
1049 }
1050 
1051 static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1052 		struct a6xx_gpu_state *a6xx_state,
1053 		const struct gen7_reg_list *regs,
1054 		struct a6xx_gpu_state_obj *obj,
1055 		struct a6xx_crashdumper *dumper)
1056 
1057 {
1058 	u64 *in = dumper->ptr;
1059 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1060 	int i, regcount = 0;
1061 
1062 	/* Some blocks might need to program a selector register first */
1063 	if (regs->sel)
1064 		in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val);
1065 
1066 	for (i = 0; regs->regs[i] != UINT_MAX; i += 2) {
1067 		u32 count = RANGE(regs->regs, i);
1068 
1069 		in += CRASHDUMP_READ(in, regs->regs[i], count, out);
1070 
1071 		out += count * sizeof(u32);
1072 		regcount += count;
1073 	}
1074 
1075 	CRASHDUMP_FINI(in);
1076 
1077 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1078 		return;
1079 
1080 	if (a6xx_crashdumper_run(gpu, dumper))
1081 		return;
1082 
1083 	obj->handle = regs->regs;
1084 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1085 		regcount * sizeof(u32));
1086 }
1087 
1088 
1089 /* Read a block of registers via AHB */
1090 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1091 		struct a6xx_gpu_state *a6xx_state,
1092 		const struct a6xx_registers *regs,
1093 		struct a6xx_gpu_state_obj *obj)
1094 {
1095 	int i, regcount = 0, index = 0;
1096 
1097 	/* Skip unsupported registers on older generations */
1098 	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1099 			(regs->registers == a660_registers))
1100 		return;
1101 
1102 	for (i = 0; i < regs->count; i += 2)
1103 		regcount += RANGE(regs->registers, i);
1104 
1105 	obj->handle = (const void *) regs;
1106 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1107 	if (!obj->data)
1108 		return;
1109 
1110 	for (i = 0; i < regs->count; i += 2) {
1111 		u32 count = RANGE(regs->registers, i);
1112 		int j;
1113 
1114 		for (j = 0; j < count; j++)
1115 			obj->data[index++] = gpu_read(gpu,
1116 				regs->registers[i] + j);
1117 	}
1118 }
1119 
1120 static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1121 		struct a6xx_gpu_state *a6xx_state,
1122 		const u32 *regs,
1123 		struct a6xx_gpu_state_obj *obj)
1124 {
1125 	int i, regcount = 0, index = 0;
1126 
1127 	for (i = 0; regs[i] != UINT_MAX; i += 2)
1128 		regcount += RANGE(regs, i);
1129 
1130 	obj->handle = (const void *) regs;
1131 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1132 	if (!obj->data)
1133 		return;
1134 
1135 	for (i = 0; regs[i] != UINT_MAX; i += 2) {
1136 		u32 count = RANGE(regs, i);
1137 		int j;
1138 
1139 		for (j = 0; j < count; j++)
1140 			obj->data[index++] = gpu_read(gpu, regs[i] + j);
1141 	}
1142 }
1143 
1144 static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1145 		struct a6xx_gpu_state *a6xx_state,
1146 		const struct gen7_reg_list *regs,
1147 		struct a6xx_gpu_state_obj *obj)
1148 {
1149 	if (regs->sel)
1150 		gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1151 
1152 	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj);
1153 }
1154 
1155 /* Read a block of GMU registers */
1156 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1157 		struct a6xx_gpu_state *a6xx_state,
1158 		const struct a6xx_registers *regs,
1159 		struct a6xx_gpu_state_obj *obj,
1160 		bool rscc)
1161 {
1162 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1163 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1164 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1165 	int i, regcount = 0, index = 0;
1166 
1167 	for (i = 0; i < regs->count; i += 2)
1168 		regcount += RANGE(regs->registers, i);
1169 
1170 	obj->handle = (const void *) regs;
1171 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1172 	if (!obj->data)
1173 		return;
1174 
1175 	for (i = 0; i < regs->count; i += 2) {
1176 		u32 count = RANGE(regs->registers, i);
1177 		int j;
1178 
1179 		for (j = 0; j < count; j++) {
1180 			u32 offset = regs->registers[i] + j;
1181 			u32 val;
1182 
1183 			if (rscc)
1184 				val = gmu_read_rscc(gmu, offset);
1185 			else
1186 				val = gmu_read(gmu, offset);
1187 
1188 			obj->data[index++] = val;
1189 		}
1190 	}
1191 }
1192 
1193 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1194 		struct a6xx_gpu_state *a6xx_state)
1195 {
1196 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1197 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1198 
1199 	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1200 		3, sizeof(*a6xx_state->gmu_registers));
1201 
1202 	if (!a6xx_state->gmu_registers)
1203 		return;
1204 
1205 	a6xx_state->nr_gmu_registers = 3;
1206 
1207 	/* Get the CX GMU registers from AHB */
1208 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
1209 		&a6xx_state->gmu_registers[0], false);
1210 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
1211 		&a6xx_state->gmu_registers[1], true);
1212 
1213 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1214 		return;
1215 
1216 	/* Set the fence to ALLOW mode so we can access the registers */
1217 	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1218 
1219 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
1220 		&a6xx_state->gmu_registers[2], false);
1221 }
1222 
1223 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1224 		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
1225 {
1226 	struct msm_gpu_state_bo *snapshot;
1227 
1228 	if (!bo->size)
1229 		return NULL;
1230 
1231 	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
1232 	if (!snapshot)
1233 		return NULL;
1234 
1235 	snapshot->iova = bo->iova;
1236 	snapshot->size = bo->size;
1237 	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
1238 	if (!snapshot->data)
1239 		return NULL;
1240 
1241 	memcpy(snapshot->data, bo->virt, bo->size);
1242 
1243 	return snapshot;
1244 }
1245 
1246 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1247 					  struct a6xx_gpu_state *a6xx_state)
1248 {
1249 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1250 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1251 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1252 	unsigned i, j;
1253 
1254 	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1255 
1256 	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
1257 		struct a6xx_hfi_queue *queue = &gmu->queues[i];
1258 		for (j = 0; j < HFI_HISTORY_SZ; j++) {
1259 			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1260 			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1261 		}
1262 	}
1263 }
1264 
1265 #define A6XX_REGLIST_SIZE        1
1266 #define A6XX_GBIF_REGLIST_SIZE   1
1267 static void a6xx_get_registers(struct msm_gpu *gpu,
1268 		struct a6xx_gpu_state *a6xx_state,
1269 		struct a6xx_crashdumper *dumper)
1270 {
1271 	int i, count = A6XX_REGLIST_SIZE +
1272 		ARRAY_SIZE(a6xx_reglist) +
1273 		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1274 	int index = 0;
1275 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1276 
1277 	a6xx_state->registers = state_kcalloc(a6xx_state,
1278 		count, sizeof(*a6xx_state->registers));
1279 
1280 	if (!a6xx_state->registers)
1281 		return;
1282 
1283 	a6xx_state->nr_registers = count;
1284 
1285 	a6xx_get_ahb_gpu_registers(gpu,
1286 		a6xx_state, &a6xx_ahb_reglist,
1287 		&a6xx_state->registers[index++]);
1288 
1289 	if (a6xx_has_gbif(adreno_gpu))
1290 		a6xx_get_ahb_gpu_registers(gpu,
1291 				a6xx_state, &a6xx_gbif_reglist,
1292 				&a6xx_state->registers[index++]);
1293 	else
1294 		a6xx_get_ahb_gpu_registers(gpu,
1295 				a6xx_state, &a6xx_vbif_reglist,
1296 				&a6xx_state->registers[index++]);
1297 	if (!dumper) {
1298 		/*
1299 		 * We can't use the crashdumper when the SMMU is stalled,
1300 		 * because the GPU has no memory access until we resume
1301 		 * translation (but we don't want to do that until after
1302 		 * we have captured as much useful GPU state as possible).
1303 		 * So instead collect registers via the CPU:
1304 		 */
1305 		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1306 			a6xx_get_ahb_gpu_registers(gpu,
1307 				a6xx_state, &a6xx_reglist[i],
1308 				&a6xx_state->registers[index++]);
1309 		return;
1310 	}
1311 
1312 	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1313 		a6xx_get_crashdumper_registers(gpu,
1314 			a6xx_state, &a6xx_reglist[i],
1315 			&a6xx_state->registers[index++],
1316 			dumper);
1317 
1318 	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1319 		a6xx_get_crashdumper_hlsq_registers(gpu,
1320 			a6xx_state, &a6xx_hlsq_reglist[i],
1321 			&a6xx_state->registers[index++],
1322 			dumper);
1323 }
1324 
1325 #define A7XX_PRE_CRASHDUMPER_SIZE    1
1326 #define A7XX_POST_CRASHDUMPER_SIZE   1
1327 static void a7xx_get_registers(struct msm_gpu *gpu,
1328 		struct a6xx_gpu_state *a6xx_state,
1329 		struct a6xx_crashdumper *dumper)
1330 {
1331 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1332 	int i, count;
1333 	int index = 0;
1334 	const u32 *pre_crashdumper_regs;
1335 	const struct gen7_reg_list *reglist;
1336 
1337 	if (adreno_is_a730(adreno_gpu)) {
1338 		reglist = gen7_0_0_reg_list;
1339 		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1340 	} else {
1341 		BUG_ON(!adreno_is_a740_family(adreno_gpu));
1342 		reglist = gen7_2_0_reg_list;
1343 		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1344 	}
1345 
1346 	count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1347 
1348 	/* The downstream reglist contains registers in other memory regions
1349 	 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1350 	 * offsets and map them to read them on the CPU. For now only read the
1351 	 * first region which is the main one.
1352 	 */
1353 	if (dumper) {
1354 		for (i = 0; reglist[i].regs; i++)
1355 			count++;
1356 	} else {
1357 		count++;
1358 	}
1359 
1360 	a6xx_state->registers = state_kcalloc(a6xx_state,
1361 		count, sizeof(*a6xx_state->registers));
1362 
1363 	if (!a6xx_state->registers)
1364 		return;
1365 
1366 	a6xx_state->nr_registers = count;
1367 
1368 	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs,
1369 		&a6xx_state->registers[index++]);
1370 
1371 	if (!dumper) {
1372 		a7xx_get_ahb_gpu_reglist(gpu,
1373 			a6xx_state, &reglist[0],
1374 			&a6xx_state->registers[index++]);
1375 		return;
1376 	}
1377 
1378 	for (i = 0; reglist[i].regs; i++)
1379 		a7xx_get_crashdumper_registers(gpu,
1380 			a6xx_state, &reglist[i],
1381 			&a6xx_state->registers[index++],
1382 			dumper);
1383 }
1384 
1385 static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1386 		struct a6xx_gpu_state *a6xx_state)
1387 {
1388 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1389 	const u32 *regs;
1390 
1391 	BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
1392 	regs = gen7_0_0_post_crashdumper_registers;
1393 
1394 	a7xx_get_ahb_gpu_registers(gpu,
1395 		a6xx_state, regs,
1396 		&a6xx_state->registers[a6xx_state->nr_registers - 1]);
1397 }
1398 
1399 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1400 {
1401 	/* The value at [16:31] is in 4dword units. Convert it to dwords */
1402 	return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
1403 }
1404 
1405 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1406 {
1407 	/*
1408 	 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1409 	 * That register however is not directly accessible from APSS on A7xx.
1410 	 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1411 	 */
1412 	gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
1413 
1414 	return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20);
1415 }
1416 
1417 /* Read a block of data from an indexed register pair */
1418 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1419 		struct a6xx_gpu_state *a6xx_state,
1420 		struct a6xx_indexed_registers *indexed,
1421 		struct a6xx_gpu_state_obj *obj)
1422 {
1423 	int i;
1424 
1425 	obj->handle = (const void *) indexed;
1426 	if (indexed->count_fn)
1427 		indexed->count = indexed->count_fn(gpu);
1428 
1429 	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
1430 	if (!obj->data)
1431 		return;
1432 
1433 	/* All the indexed banks start at address 0 */
1434 	gpu_write(gpu, indexed->addr, 0);
1435 
1436 	/* Read the data - each read increments the internal address by 1 */
1437 	for (i = 0; i < indexed->count; i++)
1438 		obj->data[i] = gpu_read(gpu, indexed->data);
1439 }
1440 
1441 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1442 		struct a6xx_gpu_state *a6xx_state)
1443 {
1444 	u32 mempool_size;
1445 	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
1446 	int i;
1447 
1448 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
1449 		sizeof(*a6xx_state->indexed_regs));
1450 	if (!a6xx_state->indexed_regs)
1451 		return;
1452 
1453 	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1454 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
1455 			&a6xx_state->indexed_regs[i]);
1456 
1457 	if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1458 		u32 val;
1459 
1460 		val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1461 		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
1462 
1463 		/* Get the contents of the CP mempool */
1464 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1465 			&a6xx_state->indexed_regs[i]);
1466 
1467 		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1468 		a6xx_state->nr_indexed_regs = count;
1469 		return;
1470 	}
1471 
1472 	/* Set the CP mempool size to 0 to stabilize it while dumping */
1473 	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1474 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1475 
1476 	/* Get the contents of the CP mempool */
1477 	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1478 		&a6xx_state->indexed_regs[i]);
1479 
1480 	/*
1481 	 * Offset 0x2000 in the mempool is the size - copy the saved size over
1482 	 * so the data is consistent
1483 	 */
1484 	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1485 
1486 	/* Restore the size in the hardware */
1487 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1488 }
1489 
1490 static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1491 		struct a6xx_gpu_state *a6xx_state)
1492 {
1493 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1494 	int i, indexed_count, mempool_count;
1495 
1496 	BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
1497 	indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1498 	mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1499 
1500 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1501 					indexed_count + mempool_count,
1502 					sizeof(*a6xx_state->indexed_regs));
1503 	if (!a6xx_state->indexed_regs)
1504 		return;
1505 
1506 	a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1507 
1508 	/* First read the common regs */
1509 	for (i = 0; i < indexed_count; i++)
1510 		a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_indexed_reglist[i],
1511 			&a6xx_state->indexed_regs[i]);
1512 
1513 	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2));
1514 	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2));
1515 
1516 	/* Get the contents of the CP_BV mempool */
1517 	for (i = 0; i < mempool_count; i++)
1518 		a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i],
1519 			&a6xx_state->indexed_regs[indexed_count + i]);
1520 
1521 	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0);
1522 	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0);
1523 	return;
1524 }
1525 
1526 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1527 {
1528 	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1529 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1530 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1531 	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1532 		GFP_KERNEL);
1533 	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1534 			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1535 
1536 	if (!a6xx_state)
1537 		return ERR_PTR(-ENOMEM);
1538 
1539 	INIT_LIST_HEAD(&a6xx_state->objs);
1540 
1541 	/* Get the generic state from the adreno core */
1542 	adreno_gpu_state_get(gpu, &a6xx_state->base);
1543 
1544 	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1545 		a6xx_get_gmu_registers(gpu, a6xx_state);
1546 
1547 		a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1548 		a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1549 		a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1550 
1551 		a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1552 	}
1553 
1554 	/* If GX isn't on the rest of the data isn't going to be accessible */
1555 	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1556 		return &a6xx_state->base;
1557 
1558 	/* Get the banks of indexed registers */
1559 	if (adreno_is_a7xx(adreno_gpu))
1560 		a7xx_get_indexed_registers(gpu, a6xx_state);
1561 	else
1562 		a6xx_get_indexed_registers(gpu, a6xx_state);
1563 
1564 	/*
1565 	 * Try to initialize the crashdumper, if we are not dumping state
1566 	 * with the SMMU stalled.  The crashdumper needs memory access to
1567 	 * write out GPU state, so we need to skip this when the SMMU is
1568 	 * stalled in response to an iova fault
1569 	 */
1570 	if (!stalled && !gpu->needs_hw_init &&
1571 	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1572 		dumper = &_dumper;
1573 	}
1574 
1575 	if (adreno_is_a7xx(adreno_gpu)) {
1576 		a7xx_get_registers(gpu, a6xx_state, dumper);
1577 
1578 		if (dumper) {
1579 			a7xx_get_shaders(gpu, a6xx_state, dumper);
1580 			a7xx_get_clusters(gpu, a6xx_state, dumper);
1581 			a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1582 
1583 			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1584 		}
1585 
1586 		a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1587 	} else {
1588 		a6xx_get_registers(gpu, a6xx_state, dumper);
1589 
1590 		if (dumper) {
1591 			a6xx_get_shaders(gpu, a6xx_state, dumper);
1592 			a6xx_get_clusters(gpu, a6xx_state, dumper);
1593 			a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1594 
1595 			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1596 		}
1597 	}
1598 
1599 	if (snapshot_debugbus)
1600 		a6xx_get_debugbus(gpu, a6xx_state);
1601 
1602 	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1603 
1604 	return  &a6xx_state->base;
1605 }
1606 
1607 static void a6xx_gpu_state_destroy(struct kref *kref)
1608 {
1609 	struct a6xx_state_memobj *obj, *tmp;
1610 	struct msm_gpu_state *state = container_of(kref,
1611 			struct msm_gpu_state, ref);
1612 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1613 			struct a6xx_gpu_state, base);
1614 
1615 	if (a6xx_state->gmu_log)
1616 		kvfree(a6xx_state->gmu_log->data);
1617 
1618 	if (a6xx_state->gmu_hfi)
1619 		kvfree(a6xx_state->gmu_hfi->data);
1620 
1621 	if (a6xx_state->gmu_debug)
1622 		kvfree(a6xx_state->gmu_debug->data);
1623 
1624 	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1625 		list_del(&obj->node);
1626 		kvfree(obj);
1627 	}
1628 
1629 	adreno_gpu_state_destroy(state);
1630 	kfree(a6xx_state);
1631 }
1632 
1633 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1634 {
1635 	if (IS_ERR_OR_NULL(state))
1636 		return 1;
1637 
1638 	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1639 }
1640 
1641 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1642 		struct drm_printer *p)
1643 {
1644 	int i, index = 0;
1645 
1646 	if (!data)
1647 		return;
1648 
1649 	for (i = 0; i < count; i += 2) {
1650 		u32 count = RANGE(registers, i);
1651 		u32 offset = registers[i];
1652 		int j;
1653 
1654 		for (j = 0; j < count; index++, offset++, j++) {
1655 			if (data[index] == 0xdeafbead)
1656 				continue;
1657 
1658 			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1659 				offset << 2, data[index]);
1660 		}
1661 	}
1662 }
1663 
1664 static void a7xx_show_registers_indented(const u32 *registers, u32 *data,
1665 		struct drm_printer *p, unsigned indent)
1666 {
1667 	int i, index = 0;
1668 
1669 	for (i = 0; registers[i] != UINT_MAX; i += 2) {
1670 		u32 count = RANGE(registers, i);
1671 		u32 offset = registers[i];
1672 		int j;
1673 
1674 		for (j = 0; j < count; index++, offset++, j++) {
1675 			int k;
1676 
1677 			if (data[index] == 0xdeafbead)
1678 				continue;
1679 
1680 			for (k = 0; k < indent; k++)
1681 				drm_printf(p, "  ");
1682 			drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1683 				offset << 2, data[index]);
1684 		}
1685 	}
1686 }
1687 
1688 static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p)
1689 {
1690 	a7xx_show_registers_indented(registers, data, p, 1);
1691 }
1692 
1693 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1694 {
1695 	char out[ASCII85_BUFSZ];
1696 	long i, l, datalen = 0;
1697 
1698 	for (i = 0; i < len >> 2; i++) {
1699 		if (data[i])
1700 			datalen = (i + 1) << 2;
1701 	}
1702 
1703 	if (datalen == 0)
1704 		return;
1705 
1706 	drm_puts(p, "    data: !!ascii85 |\n");
1707 	drm_puts(p, "      ");
1708 
1709 
1710 	l = ascii85_encode_len(datalen);
1711 
1712 	for (i = 0; i < l; i++)
1713 		drm_puts(p, ascii85_encode(data[i], out));
1714 
1715 	drm_puts(p, "\n");
1716 }
1717 
1718 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1719 {
1720 	drm_puts(p, fmt);
1721 	drm_puts(p, name);
1722 	drm_puts(p, "\n");
1723 }
1724 
1725 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1726 		struct drm_printer *p)
1727 {
1728 	const struct a6xx_shader_block *block = obj->handle;
1729 	int i;
1730 
1731 	if (!obj->handle)
1732 		return;
1733 
1734 	print_name(p, "  - type: ", block->name);
1735 
1736 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1737 		drm_printf(p, "    - bank: %d\n", i);
1738 		drm_printf(p, "      size: %d\n", block->size);
1739 
1740 		if (!obj->data)
1741 			continue;
1742 
1743 		print_ascii85(p, block->size << 2,
1744 			obj->data + (block->size * i));
1745 	}
1746 }
1747 
1748 static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1749 		struct drm_printer *p)
1750 {
1751 	const struct gen7_shader_block *block = obj->handle;
1752 	int i, j;
1753 	u32 *data = obj->data;
1754 
1755 	if (!obj->handle)
1756 		return;
1757 
1758 	print_name(p, "  - type: ", a7xx_statetype_names[block->statetype]);
1759 	print_name(p, "    - pipe: ", a7xx_pipe_names[block->pipeid]);
1760 
1761 	for (i = 0; i < block->num_sps; i++) {
1762 		drm_printf(p, "      - sp: %d\n", i);
1763 
1764 		for (j = 0; j < block->num_usptps; j++) {
1765 			drm_printf(p, "        - usptp: %d\n", j);
1766 			drm_printf(p, "          size: %d\n", block->size);
1767 
1768 			if (!obj->data)
1769 				continue;
1770 
1771 			print_ascii85(p, block->size << 2, data);
1772 
1773 			data += block->size;
1774 		}
1775 	}
1776 }
1777 
1778 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1779 		struct drm_printer *p)
1780 {
1781 	int ctx, index = 0;
1782 
1783 	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1784 		int j;
1785 
1786 		drm_printf(p, "    - context: %d\n", ctx);
1787 
1788 		for (j = 0; j < size; j += 2) {
1789 			u32 count = RANGE(registers, j);
1790 			u32 offset = registers[j];
1791 			int k;
1792 
1793 			for (k = 0; k < count; index++, offset++, k++) {
1794 				if (data[index] == 0xdeafbead)
1795 					continue;
1796 
1797 				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1798 					offset << 2, data[index]);
1799 			}
1800 		}
1801 	}
1802 }
1803 
1804 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1805 		struct drm_printer *p)
1806 {
1807 	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1808 
1809 	if (dbgahb) {
1810 		print_name(p, "  - cluster-name: ", dbgahb->name);
1811 		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1812 			obj->data, p);
1813 	}
1814 }
1815 
1816 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1817 		struct drm_printer *p)
1818 {
1819 	const struct a6xx_cluster *cluster = obj->handle;
1820 
1821 	if (cluster) {
1822 		print_name(p, "  - cluster-name: ", cluster->name);
1823 		a6xx_show_cluster_data(cluster->registers, cluster->count,
1824 			obj->data, p);
1825 	}
1826 }
1827 
1828 static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1829 		struct drm_printer *p)
1830 {
1831 	const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1832 
1833 	if (dbgahb) {
1834 		print_name(p, "  - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]);
1835 		print_name(p, "    - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]);
1836 		drm_printf(p, "      - context: %d\n", dbgahb->context_id);
1837 		a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4);
1838 	}
1839 }
1840 
1841 static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1842 		struct drm_printer *p)
1843 {
1844 	const struct gen7_cluster_registers *cluster = obj->handle;
1845 
1846 	if (cluster) {
1847 		int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
1848 
1849 		print_name(p, "  - pipe: ", a7xx_pipe_names[cluster->pipe_id]);
1850 		print_name(p, "    - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]);
1851 		drm_printf(p, "      - context: %d\n", context);
1852 		a7xx_show_registers_indented(cluster->regs, obj->data, p, 4);
1853 	}
1854 }
1855 
1856 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1857 		struct drm_printer *p)
1858 {
1859 	const struct a6xx_indexed_registers *indexed = obj->handle;
1860 
1861 	if (!indexed)
1862 		return;
1863 
1864 	print_name(p, "  - regs-name: ", indexed->name);
1865 	drm_printf(p, "    dwords: %d\n", indexed->count);
1866 
1867 	print_ascii85(p, indexed->count << 2, obj->data);
1868 }
1869 
1870 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1871 		u32 *data, struct drm_printer *p)
1872 {
1873 	if (block) {
1874 		print_name(p, "  - debugbus-block: ", block->name);
1875 
1876 		/*
1877 		 * count for regular debugbus data is in quadwords,
1878 		 * but print the size in dwords for consistency
1879 		 */
1880 		drm_printf(p, "    count: %d\n", block->count << 1);
1881 
1882 		print_ascii85(p, block->count << 3, data);
1883 	}
1884 }
1885 
1886 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1887 		struct drm_printer *p)
1888 {
1889 	int i;
1890 
1891 	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1892 		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1893 
1894 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1895 	}
1896 
1897 	if (a6xx_state->vbif_debugbus) {
1898 		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1899 
1900 		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1901 		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1902 
1903 		/* vbif debugbus data is in dwords.  Confusing, huh? */
1904 		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1905 	}
1906 
1907 	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1908 		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1909 
1910 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1911 	}
1912 }
1913 
1914 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1915 		struct drm_printer *p)
1916 {
1917 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1918 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1919 			struct a6xx_gpu_state, base);
1920 	int i;
1921 
1922 	if (IS_ERR_OR_NULL(state))
1923 		return;
1924 
1925 	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1926 
1927 	adreno_show(gpu, state, p);
1928 
1929 	drm_puts(p, "gmu-log:\n");
1930 	if (a6xx_state->gmu_log) {
1931 		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1932 
1933 		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1934 		drm_printf(p, "    size: %zu\n", gmu_log->size);
1935 		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1936 				&gmu_log->encoded);
1937 	}
1938 
1939 	drm_puts(p, "gmu-hfi:\n");
1940 	if (a6xx_state->gmu_hfi) {
1941 		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1942 		unsigned i, j;
1943 
1944 		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1945 		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1946 		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1947 			drm_printf(p, "    queue-history[%u]:", i);
1948 			for (j = 0; j < HFI_HISTORY_SZ; j++) {
1949 				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1950 			}
1951 			drm_printf(p, "\n");
1952 		}
1953 		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1954 				&gmu_hfi->encoded);
1955 	}
1956 
1957 	drm_puts(p, "gmu-debug:\n");
1958 	if (a6xx_state->gmu_debug) {
1959 		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1960 
1961 		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1962 		drm_printf(p, "    size: %zu\n", gmu_debug->size);
1963 		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1964 				&gmu_debug->encoded);
1965 	}
1966 
1967 	drm_puts(p, "registers:\n");
1968 	for (i = 0; i < a6xx_state->nr_registers; i++) {
1969 		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1970 
1971 		if (!obj->handle)
1972 			continue;
1973 
1974 		if (adreno_is_a7xx(adreno_gpu)) {
1975 			a7xx_show_registers(obj->handle, obj->data, p);
1976 		} else {
1977 			const struct a6xx_registers *regs = obj->handle;
1978 
1979 			a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1980 		}
1981 	}
1982 
1983 	drm_puts(p, "registers-gmu:\n");
1984 	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1985 		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1986 		const struct a6xx_registers *regs = obj->handle;
1987 
1988 		if (!obj->handle)
1989 			continue;
1990 
1991 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1992 	}
1993 
1994 	drm_puts(p, "indexed-registers:\n");
1995 	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1996 		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1997 
1998 	drm_puts(p, "shader-blocks:\n");
1999 	for (i = 0; i < a6xx_state->nr_shaders; i++) {
2000 		if (adreno_is_a7xx(adreno_gpu))
2001 			a7xx_show_shader(&a6xx_state->shaders[i], p);
2002 		else
2003 			a6xx_show_shader(&a6xx_state->shaders[i], p);
2004 	}
2005 
2006 	drm_puts(p, "clusters:\n");
2007 	for (i = 0; i < a6xx_state->nr_clusters; i++) {
2008 		if (adreno_is_a7xx(adreno_gpu))
2009 			a7xx_show_cluster(&a6xx_state->clusters[i], p);
2010 		else
2011 			a6xx_show_cluster(&a6xx_state->clusters[i], p);
2012 	}
2013 
2014 	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) {
2015 		if (adreno_is_a7xx(adreno_gpu))
2016 			a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2017 		else
2018 			a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2019 	}
2020 
2021 	drm_puts(p, "debugbus:\n");
2022 	a6xx_show_debugbus(a6xx_state, p);
2023 }
2024