xref: /linux/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c (revision 170aafe35cb98e0f3fbacb446ea86389fbce22ea)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3 
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10 
11 static const unsigned int *gen7_0_0_external_core_regs[] __always_unused;
12 static const unsigned int *gen7_2_0_external_core_regs[] __always_unused;
13 static const unsigned int *gen7_9_0_external_core_regs[] __always_unused;
14 static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused;
15 static const u32 gen7_9_0_cx_debugbus_blocks[] __always_unused;
16 
17 #include "adreno_gen7_0_0_snapshot.h"
18 #include "adreno_gen7_2_0_snapshot.h"
19 #include "adreno_gen7_9_0_snapshot.h"
20 
21 struct a6xx_gpu_state_obj {
22 	const void *handle;
23 	u32 *data;
24 	u32 count;	/* optional, used when count potentially read from hw */
25 };
26 
27 struct a6xx_gpu_state {
28 	struct msm_gpu_state base;
29 
30 	struct a6xx_gpu_state_obj *gmu_registers;
31 	int nr_gmu_registers;
32 
33 	struct a6xx_gpu_state_obj *registers;
34 	int nr_registers;
35 
36 	struct a6xx_gpu_state_obj *shaders;
37 	int nr_shaders;
38 
39 	struct a6xx_gpu_state_obj *clusters;
40 	int nr_clusters;
41 
42 	struct a6xx_gpu_state_obj *dbgahb_clusters;
43 	int nr_dbgahb_clusters;
44 
45 	struct a6xx_gpu_state_obj *indexed_regs;
46 	int nr_indexed_regs;
47 
48 	struct a6xx_gpu_state_obj *debugbus;
49 	int nr_debugbus;
50 
51 	struct a6xx_gpu_state_obj *vbif_debugbus;
52 
53 	struct a6xx_gpu_state_obj *cx_debugbus;
54 	int nr_cx_debugbus;
55 
56 	struct msm_gpu_state_bo *gmu_log;
57 	struct msm_gpu_state_bo *gmu_hfi;
58 	struct msm_gpu_state_bo *gmu_debug;
59 
60 	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
61 
62 	struct list_head objs;
63 
64 	bool gpu_initialized;
65 };
66 
67 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
68 {
69 	in[0] = val;
70 	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
71 
72 	return 2;
73 }
74 
75 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
76 {
77 	in[0] = target;
78 	in[1] = (((u64) reg) << 44 | dwords);
79 
80 	return 2;
81 }
82 
83 static inline int CRASHDUMP_FINI(u64 *in)
84 {
85 	in[0] = 0;
86 	in[1] = 0;
87 
88 	return 2;
89 }
90 
91 struct a6xx_crashdumper {
92 	void *ptr;
93 	struct drm_gem_object *bo;
94 	u64 iova;
95 };
96 
97 struct a6xx_state_memobj {
98 	struct list_head node;
99 	unsigned long long data[];
100 };
101 
102 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
103 {
104 	struct a6xx_state_memobj *obj =
105 		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
106 
107 	if (!obj)
108 		return NULL;
109 
110 	list_add_tail(&obj->node, &a6xx_state->objs);
111 	return &obj->data;
112 }
113 
114 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
115 		size_t size)
116 {
117 	void *dst = state_kcalloc(a6xx_state, 1, size);
118 
119 	if (dst)
120 		memcpy(dst, src, size);
121 	return dst;
122 }
123 
124 /*
125  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
126  * the rest for the data
127  */
128 #define A6XX_CD_DATA_OFFSET 8192
129 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
130 
131 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
132 		struct a6xx_crashdumper *dumper)
133 {
134 	dumper->ptr = msm_gem_kernel_new(gpu->dev,
135 		SZ_1M, MSM_BO_WC, gpu->aspace,
136 		&dumper->bo, &dumper->iova);
137 
138 	if (!IS_ERR(dumper->ptr))
139 		msm_gem_object_set_name(dumper->bo, "crashdump");
140 
141 	return PTR_ERR_OR_ZERO(dumper->ptr);
142 }
143 
144 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
145 		struct a6xx_crashdumper *dumper)
146 {
147 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
148 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
149 	u32 val;
150 	int ret;
151 
152 	if (IS_ERR_OR_NULL(dumper->ptr))
153 		return -EINVAL;
154 
155 	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
156 		return -EINVAL;
157 
158 	/* Make sure all pending memory writes are posted */
159 	wmb();
160 
161 	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
162 
163 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
164 
165 	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
166 		val & 0x02, 100, 10000);
167 
168 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
169 
170 	return ret;
171 }
172 
173 /* read a value from the GX debug bus */
174 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
175 		u32 *data)
176 {
177 	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
178 		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
179 
180 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
181 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
182 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
183 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
184 
185 	/* Wait 1 us to make sure the data is flowing */
186 	udelay(1);
187 
188 	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
189 	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
190 
191 	return 2;
192 }
193 
194 #define cxdbg_write(ptr, offset, val) \
195 	writel((val), (ptr) + ((offset) << 2))
196 
197 #define cxdbg_read(ptr, offset) \
198 	readl((ptr) + ((offset) << 2))
199 
200 /* read a value from the CX debug bus */
201 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
202 		u32 *data)
203 {
204 	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
205 		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
206 
207 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
208 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
209 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
210 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
211 
212 	/* Wait 1 us to make sure the data is flowing */
213 	udelay(1);
214 
215 	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
216 	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
217 
218 	return 2;
219 }
220 
221 /* Read a chunk of data from the VBIF debug bus */
222 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
223 		u32 reg, int count, u32 *data)
224 {
225 	int i;
226 
227 	gpu_write(gpu, ctrl0, reg);
228 
229 	for (i = 0; i < count; i++) {
230 		gpu_write(gpu, ctrl1, i);
231 		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
232 	}
233 
234 	return count;
235 }
236 
237 #define AXI_ARB_BLOCKS 2
238 #define XIN_AXI_BLOCKS 5
239 #define XIN_CORE_BLOCKS 4
240 
241 #define VBIF_DEBUGBUS_BLOCK_SIZE \
242 	((16 * AXI_ARB_BLOCKS) + \
243 	 (18 * XIN_AXI_BLOCKS) + \
244 	 (12 * XIN_CORE_BLOCKS))
245 
246 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
247 		struct a6xx_gpu_state *a6xx_state,
248 		struct a6xx_gpu_state_obj *obj)
249 {
250 	u32 clk, *ptr;
251 	int i;
252 
253 	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
254 		sizeof(u32));
255 	if (!obj->data)
256 		return;
257 
258 	obj->handle = NULL;
259 
260 	/* Get the current clock setting */
261 	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
262 
263 	/* Force on the bus so we can read it */
264 	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
265 		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
266 
267 	/* We will read from BUS2 first, so disable BUS1 */
268 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
269 
270 	/* Enable the VBIF bus for reading */
271 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
272 
273 	ptr = obj->data;
274 
275 	for (i = 0; i < AXI_ARB_BLOCKS; i++)
276 		ptr += vbif_debugbus_read(gpu,
277 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
278 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
279 			1 << (i + 16), 16, ptr);
280 
281 	for (i = 0; i < XIN_AXI_BLOCKS; i++)
282 		ptr += vbif_debugbus_read(gpu,
283 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
284 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
285 			1 << i, 18, ptr);
286 
287 	/* Stop BUS2 so we can turn on BUS1 */
288 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
289 
290 	for (i = 0; i < XIN_CORE_BLOCKS; i++)
291 		ptr += vbif_debugbus_read(gpu,
292 			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
293 			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
294 			1 << i, 12, ptr);
295 
296 	/* Restore the VBIF clock setting */
297 	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
298 }
299 
300 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
301 		struct a6xx_gpu_state *a6xx_state,
302 		const struct a6xx_debugbus_block *block,
303 		struct a6xx_gpu_state_obj *obj)
304 {
305 	int i;
306 	u32 *ptr;
307 
308 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
309 	if (!obj->data)
310 		return;
311 
312 	obj->handle = block;
313 
314 	for (ptr = obj->data, i = 0; i < block->count; i++)
315 		ptr += debugbus_read(gpu, block->id, i, ptr);
316 }
317 
318 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
319 		struct a6xx_gpu_state *a6xx_state,
320 		const struct a6xx_debugbus_block *block,
321 		struct a6xx_gpu_state_obj *obj)
322 {
323 	int i;
324 	u32 *ptr;
325 
326 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
327 	if (!obj->data)
328 		return;
329 
330 	obj->handle = block;
331 
332 	for (ptr = obj->data, i = 0; i < block->count; i++)
333 		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
334 }
335 
336 static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
337 		struct a6xx_gpu_state *a6xx_state)
338 {
339 	int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
340 		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
341 
342 	if (adreno_is_a650_family(to_adreno_gpu(gpu)))
343 		nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
344 
345 	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
346 			sizeof(*a6xx_state->debugbus));
347 
348 	if (a6xx_state->debugbus) {
349 		int i;
350 
351 		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
352 			a6xx_get_debugbus_block(gpu,
353 				a6xx_state,
354 				&a6xx_debugbus_blocks[i],
355 				&a6xx_state->debugbus[i]);
356 
357 		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
358 
359 		/*
360 		 * GBIF has same debugbus as of other GPU blocks, fall back to
361 		 * default path if GPU uses GBIF, also GBIF uses exactly same
362 		 * ID as of VBIF.
363 		 */
364 		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
365 			a6xx_get_debugbus_block(gpu, a6xx_state,
366 				&a6xx_gbif_debugbus_block,
367 				&a6xx_state->debugbus[i]);
368 
369 			a6xx_state->nr_debugbus += 1;
370 		}
371 
372 
373 		if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
374 			for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
375 				a6xx_get_debugbus_block(gpu,
376 					a6xx_state,
377 					&a650_debugbus_blocks[i],
378 					&a6xx_state->debugbus[i]);
379 		}
380 	}
381 }
382 
383 static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
384 		struct a6xx_gpu_state *a6xx_state)
385 {
386 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
387 	int debugbus_blocks_count, gbif_debugbus_blocks_count, total_debugbus_blocks;
388 	const u32 *debugbus_blocks, *gbif_debugbus_blocks;
389 	int i;
390 
391 	if (adreno_is_a730(adreno_gpu)) {
392 		debugbus_blocks = gen7_0_0_debugbus_blocks;
393 		debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
394 		gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
395 		gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
396 	} else if (adreno_is_a740_family(adreno_gpu)) {
397 		debugbus_blocks = gen7_2_0_debugbus_blocks;
398 		debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
399 		gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
400 		gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
401 	} else {
402 		BUG_ON(!adreno_is_a750(adreno_gpu));
403 		debugbus_blocks = gen7_9_0_debugbus_blocks;
404 		debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_debugbus_blocks);
405 		gbif_debugbus_blocks = gen7_9_0_gbif_debugbus_blocks;
406 		gbif_debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_gbif_debugbus_blocks);
407 	}
408 
409 	total_debugbus_blocks = debugbus_blocks_count + gbif_debugbus_blocks_count;
410 
411 	a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks,
412 			sizeof(*a6xx_state->debugbus));
413 
414 	if (a6xx_state->debugbus) {
415 		for (i = 0; i < debugbus_blocks_count; i++) {
416 			a6xx_get_debugbus_block(gpu,
417 				a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]],
418 				&a6xx_state->debugbus[i]);
419 		}
420 
421 		for (i = 0; i < gbif_debugbus_blocks_count; i++) {
422 			a6xx_get_debugbus_block(gpu,
423 				a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]],
424 				&a6xx_state->debugbus[i + debugbus_blocks_count]);
425 		}
426 	}
427 
428 }
429 
430 static void a6xx_get_debugbus(struct msm_gpu *gpu,
431 		struct a6xx_gpu_state *a6xx_state)
432 {
433 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
434 	struct resource *res;
435 	void __iomem *cxdbg = NULL;
436 
437 	/* Set up the GX debug bus */
438 
439 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
440 		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
441 
442 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
443 		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
444 
445 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
446 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
447 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
448 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
449 
450 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
451 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
452 
453 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
454 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
455 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
456 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
457 
458 	/* Set up the CX debug bus - it lives elsewhere in the system so do a
459 	 * temporary ioremap for the registers
460 	 */
461 	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
462 			"cx_dbgc");
463 
464 	if (res)
465 		cxdbg = ioremap(res->start, resource_size(res));
466 
467 	if (cxdbg) {
468 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
469 			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
470 
471 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
472 			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
473 
474 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
475 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
476 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
477 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
478 
479 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
480 			0x76543210);
481 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
482 			0xFEDCBA98);
483 
484 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
485 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
486 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
487 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
488 	}
489 
490 	if (adreno_is_a7xx(adreno_gpu)) {
491 		a7xx_get_debugbus_blocks(gpu, a6xx_state);
492 	} else {
493 		a6xx_get_debugbus_blocks(gpu, a6xx_state);
494 	}
495 
496 	/*  Dump the VBIF debugbus on applicable targets */
497 	if (!a6xx_has_gbif(adreno_gpu)) {
498 		a6xx_state->vbif_debugbus =
499 			state_kcalloc(a6xx_state, 1,
500 					sizeof(*a6xx_state->vbif_debugbus));
501 
502 		if (a6xx_state->vbif_debugbus)
503 			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
504 					a6xx_state->vbif_debugbus);
505 	}
506 
507 	if (cxdbg) {
508 		unsigned nr_cx_debugbus_blocks;
509 		const struct a6xx_debugbus_block *cx_debugbus_blocks;
510 
511 		if (adreno_is_a7xx(adreno_gpu)) {
512 			BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
513 			cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
514 			nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
515 		} else {
516 			cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
517 			nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
518 		}
519 
520 		a6xx_state->cx_debugbus =
521 			state_kcalloc(a6xx_state,
522 			nr_cx_debugbus_blocks,
523 			sizeof(*a6xx_state->cx_debugbus));
524 
525 		if (a6xx_state->cx_debugbus) {
526 			int i;
527 
528 			for (i = 0; i < nr_cx_debugbus_blocks; i++)
529 				a6xx_get_cx_debugbus_block(cxdbg,
530 					a6xx_state,
531 					&cx_debugbus_blocks[i],
532 					&a6xx_state->cx_debugbus[i]);
533 
534 			a6xx_state->nr_cx_debugbus =
535 				nr_cx_debugbus_blocks;
536 		}
537 
538 		iounmap(cxdbg);
539 	}
540 }
541 
542 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
543 
544 /* Read a data cluster from behind the AHB aperture */
545 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
546 		struct a6xx_gpu_state *a6xx_state,
547 		const struct a6xx_dbgahb_cluster *dbgahb,
548 		struct a6xx_gpu_state_obj *obj,
549 		struct a6xx_crashdumper *dumper)
550 {
551 	u64 *in = dumper->ptr;
552 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
553 	size_t datasize;
554 	int i, regcount = 0;
555 
556 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
557 		int j;
558 
559 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
560 			(dbgahb->statetype + i * 2) << 8);
561 
562 		for (j = 0; j < dbgahb->count; j += 2) {
563 			int count = RANGE(dbgahb->registers, j);
564 			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
565 				dbgahb->registers[j] - (dbgahb->base >> 2);
566 
567 			in += CRASHDUMP_READ(in, offset, count, out);
568 
569 			out += count * sizeof(u32);
570 
571 			if (i == 0)
572 				regcount += count;
573 		}
574 	}
575 
576 	CRASHDUMP_FINI(in);
577 
578 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
579 
580 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
581 		return;
582 
583 	if (a6xx_crashdumper_run(gpu, dumper))
584 		return;
585 
586 	obj->handle = dbgahb;
587 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
588 		datasize);
589 }
590 
591 static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
592 		struct a6xx_gpu_state *a6xx_state,
593 		const struct gen7_sptp_cluster_registers *dbgahb,
594 		struct a6xx_gpu_state_obj *obj,
595 		struct a6xx_crashdumper *dumper)
596 {
597 	u64 *in = dumper->ptr;
598 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
599 	size_t datasize;
600 	int i, regcount = 0;
601 
602 	in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
603 		A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) |
604 		A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) |
605 		A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype));
606 
607 	for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) {
608 		int count = RANGE(dbgahb->regs, i);
609 		u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
610 			dbgahb->regs[i] - dbgahb->regbase;
611 
612 		in += CRASHDUMP_READ(in, offset, count, out);
613 
614 		out += count * sizeof(u32);
615 		regcount += count;
616 	}
617 
618 	CRASHDUMP_FINI(in);
619 
620 	datasize = regcount * sizeof(u32);
621 
622 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
623 		return;
624 
625 	if (a6xx_crashdumper_run(gpu, dumper))
626 		return;
627 
628 	obj->handle = dbgahb;
629 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
630 		datasize);
631 }
632 
633 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
634 		struct a6xx_gpu_state *a6xx_state,
635 		struct a6xx_crashdumper *dumper)
636 {
637 	int i;
638 
639 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
640 		ARRAY_SIZE(a6xx_dbgahb_clusters),
641 		sizeof(*a6xx_state->dbgahb_clusters));
642 
643 	if (!a6xx_state->dbgahb_clusters)
644 		return;
645 
646 	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
647 
648 	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
649 		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
650 			&a6xx_dbgahb_clusters[i],
651 			&a6xx_state->dbgahb_clusters[i], dumper);
652 }
653 
654 static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
655 		struct a6xx_gpu_state *a6xx_state,
656 		struct a6xx_crashdumper *dumper)
657 {
658 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
659 	int i;
660 	const struct gen7_sptp_cluster_registers *dbgahb_clusters;
661 	unsigned dbgahb_clusters_size;
662 
663 	if (adreno_is_a730(adreno_gpu)) {
664 		dbgahb_clusters = gen7_0_0_sptp_clusters;
665 		dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
666 	} else {
667 		BUG_ON(!adreno_is_a740_family(adreno_gpu));
668 		dbgahb_clusters = gen7_2_0_sptp_clusters;
669 		dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
670 	}
671 
672 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
673 		dbgahb_clusters_size,
674 		sizeof(*a6xx_state->dbgahb_clusters));
675 
676 	if (!a6xx_state->dbgahb_clusters)
677 		return;
678 
679 	a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
680 
681 	for (i = 0; i < dbgahb_clusters_size; i++)
682 		a7xx_get_dbgahb_cluster(gpu, a6xx_state,
683 			&dbgahb_clusters[i],
684 			&a6xx_state->dbgahb_clusters[i], dumper);
685 }
686 
687 /* Read a data cluster from the CP aperture with the crashdumper */
688 static void a6xx_get_cluster(struct msm_gpu *gpu,
689 		struct a6xx_gpu_state *a6xx_state,
690 		const struct a6xx_cluster *cluster,
691 		struct a6xx_gpu_state_obj *obj,
692 		struct a6xx_crashdumper *dumper)
693 {
694 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
695 	u64 *in = dumper->ptr;
696 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
697 	size_t datasize;
698 	int i, regcount = 0;
699 	u32 id = cluster->id;
700 
701 	/* Skip registers that are not present on older generation */
702 	if (!adreno_is_a660_family(adreno_gpu) &&
703 			cluster->registers == a660_fe_cluster)
704 		return;
705 
706 	if (adreno_is_a650_family(adreno_gpu) &&
707 			cluster->registers == a6xx_ps_cluster)
708 		id = CLUSTER_VPC_PS;
709 
710 	/* Some clusters need a selector register to be programmed too */
711 	if (cluster->sel_reg)
712 		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
713 
714 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
715 		int j;
716 
717 		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
718 			(id << 8) | (i << 4) | i);
719 
720 		for (j = 0; j < cluster->count; j += 2) {
721 			int count = RANGE(cluster->registers, j);
722 
723 			in += CRASHDUMP_READ(in, cluster->registers[j],
724 				count, out);
725 
726 			out += count * sizeof(u32);
727 
728 			if (i == 0)
729 				regcount += count;
730 		}
731 	}
732 
733 	CRASHDUMP_FINI(in);
734 
735 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
736 
737 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
738 		return;
739 
740 	if (a6xx_crashdumper_run(gpu, dumper))
741 		return;
742 
743 	obj->handle = cluster;
744 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
745 		datasize);
746 }
747 
748 static void a7xx_get_cluster(struct msm_gpu *gpu,
749 		struct a6xx_gpu_state *a6xx_state,
750 		const struct gen7_cluster_registers *cluster,
751 		struct a6xx_gpu_state_obj *obj,
752 		struct a6xx_crashdumper *dumper)
753 {
754 	u64 *in = dumper->ptr;
755 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
756 	size_t datasize;
757 	int i, regcount = 0;
758 
759 	/* Some clusters need a selector register to be programmed too */
760 	if (cluster->sel)
761 		in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val);
762 
763 	in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
764 		A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) |
765 		A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) |
766 		A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id));
767 
768 	for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) {
769 		int count = RANGE(cluster->regs, i);
770 
771 		in += CRASHDUMP_READ(in, cluster->regs[i],
772 			count, out);
773 
774 		out += count * sizeof(u32);
775 		regcount += count;
776 	}
777 
778 	CRASHDUMP_FINI(in);
779 
780 	datasize = regcount * sizeof(u32);
781 
782 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
783 		return;
784 
785 	if (a6xx_crashdumper_run(gpu, dumper))
786 		return;
787 
788 	obj->handle = cluster;
789 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
790 		datasize);
791 }
792 
793 static void a6xx_get_clusters(struct msm_gpu *gpu,
794 		struct a6xx_gpu_state *a6xx_state,
795 		struct a6xx_crashdumper *dumper)
796 {
797 	int i;
798 
799 	a6xx_state->clusters = state_kcalloc(a6xx_state,
800 		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
801 
802 	if (!a6xx_state->clusters)
803 		return;
804 
805 	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
806 
807 	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
808 		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
809 			&a6xx_state->clusters[i], dumper);
810 }
811 
812 static void a7xx_get_clusters(struct msm_gpu *gpu,
813 		struct a6xx_gpu_state *a6xx_state,
814 		struct a6xx_crashdumper *dumper)
815 {
816 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
817 	int i;
818 	const struct gen7_cluster_registers *clusters;
819 	unsigned clusters_size;
820 
821 	if (adreno_is_a730(adreno_gpu)) {
822 		clusters = gen7_0_0_clusters;
823 		clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
824 	} else if (adreno_is_a740_family(adreno_gpu)) {
825 		clusters = gen7_2_0_clusters;
826 		clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
827 	} else {
828 		BUG_ON(!adreno_is_a750(adreno_gpu));
829 		clusters = gen7_9_0_clusters;
830 		clusters_size = ARRAY_SIZE(gen7_9_0_clusters);
831 	}
832 
833 	a6xx_state->clusters = state_kcalloc(a6xx_state,
834 		clusters_size, sizeof(*a6xx_state->clusters));
835 
836 	if (!a6xx_state->clusters)
837 		return;
838 
839 	a6xx_state->nr_clusters = clusters_size;
840 
841 	for (i = 0; i < clusters_size; i++)
842 		a7xx_get_cluster(gpu, a6xx_state, &clusters[i],
843 			&a6xx_state->clusters[i], dumper);
844 }
845 
846 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
847 static void a6xx_get_shader_block(struct msm_gpu *gpu,
848 		struct a6xx_gpu_state *a6xx_state,
849 		const struct a6xx_shader_block *block,
850 		struct a6xx_gpu_state_obj *obj,
851 		struct a6xx_crashdumper *dumper)
852 {
853 	u64 *in = dumper->ptr;
854 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
855 	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
856 	int i;
857 
858 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
859 		return;
860 
861 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
862 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
863 			(block->type << 8) | i);
864 
865 		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
866 			block->size, out);
867 
868 		out += block->size * sizeof(u32);
869 	}
870 
871 	CRASHDUMP_FINI(in);
872 
873 	if (a6xx_crashdumper_run(gpu, dumper))
874 		return;
875 
876 	obj->handle = block;
877 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
878 		datasize);
879 }
880 
881 static void a7xx_get_shader_block(struct msm_gpu *gpu,
882 		struct a6xx_gpu_state *a6xx_state,
883 		const struct gen7_shader_block *block,
884 		struct a6xx_gpu_state_obj *obj,
885 		struct a6xx_crashdumper *dumper)
886 {
887 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
888 	u64 *in = dumper->ptr;
889 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
890 	size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
891 	int i, j;
892 
893 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
894 		return;
895 
896 	if (adreno_is_a730(adreno_gpu)) {
897 		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3);
898 	}
899 
900 	for (i = 0; i < block->num_sps; i++) {
901 		for (j = 0; j < block->num_usptps; j++) {
902 			in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
903 				A7XX_SP_READ_SEL_LOCATION(block->location) |
904 				A7XX_SP_READ_SEL_PIPE(block->pipeid) |
905 				A7XX_SP_READ_SEL_STATETYPE(block->statetype) |
906 				A7XX_SP_READ_SEL_USPTP(j) |
907 				A7XX_SP_READ_SEL_SPTP(i));
908 
909 			in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
910 				block->size, out);
911 
912 			out += block->size * sizeof(u32);
913 		}
914 	}
915 
916 	CRASHDUMP_FINI(in);
917 
918 	if (a6xx_crashdumper_run(gpu, dumper))
919 		goto out;
920 
921 	obj->handle = block;
922 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
923 		datasize);
924 
925 out:
926 	if (adreno_is_a730(adreno_gpu)) {
927 		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0);
928 	}
929 }
930 
931 static void a6xx_get_shaders(struct msm_gpu *gpu,
932 		struct a6xx_gpu_state *a6xx_state,
933 		struct a6xx_crashdumper *dumper)
934 {
935 	int i;
936 
937 	a6xx_state->shaders = state_kcalloc(a6xx_state,
938 		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
939 
940 	if (!a6xx_state->shaders)
941 		return;
942 
943 	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
944 
945 	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
946 		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
947 			&a6xx_state->shaders[i], dumper);
948 }
949 
950 static void a7xx_get_shaders(struct msm_gpu *gpu,
951 		struct a6xx_gpu_state *a6xx_state,
952 		struct a6xx_crashdumper *dumper)
953 {
954 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
955 	const struct gen7_shader_block *shader_blocks;
956 	unsigned num_shader_blocks;
957 	int i;
958 
959 	if (adreno_is_a730(adreno_gpu)) {
960 		shader_blocks = gen7_0_0_shader_blocks;
961 		num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
962 	} else if (adreno_is_a740_family(adreno_gpu)) {
963 		shader_blocks = gen7_2_0_shader_blocks;
964 		num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
965 	} else {
966 		BUG_ON(!adreno_is_a750(adreno_gpu));
967 		shader_blocks = gen7_9_0_shader_blocks;
968 		num_shader_blocks = ARRAY_SIZE(gen7_9_0_shader_blocks);
969 	}
970 
971 	a6xx_state->shaders = state_kcalloc(a6xx_state,
972 		num_shader_blocks, sizeof(*a6xx_state->shaders));
973 
974 	if (!a6xx_state->shaders)
975 		return;
976 
977 	a6xx_state->nr_shaders = num_shader_blocks;
978 
979 	for (i = 0; i < num_shader_blocks; i++)
980 		a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i],
981 			&a6xx_state->shaders[i], dumper);
982 }
983 
984 /* Read registers from behind the HLSQ aperture with the crashdumper */
985 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
986 		struct a6xx_gpu_state *a6xx_state,
987 		const struct a6xx_registers *regs,
988 		struct a6xx_gpu_state_obj *obj,
989 		struct a6xx_crashdumper *dumper)
990 
991 {
992 	u64 *in = dumper->ptr;
993 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
994 	int i, regcount = 0;
995 
996 	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
997 
998 	for (i = 0; i < regs->count; i += 2) {
999 		u32 count = RANGE(regs->registers, i);
1000 		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
1001 			regs->registers[i] - (regs->val0 >> 2);
1002 
1003 		in += CRASHDUMP_READ(in, offset, count, out);
1004 
1005 		out += count * sizeof(u32);
1006 		regcount += count;
1007 	}
1008 
1009 	CRASHDUMP_FINI(in);
1010 
1011 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1012 		return;
1013 
1014 	if (a6xx_crashdumper_run(gpu, dumper))
1015 		return;
1016 
1017 	obj->handle = regs;
1018 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1019 		regcount * sizeof(u32));
1020 }
1021 
1022 /* Read a block of registers using the crashdumper */
1023 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1024 		struct a6xx_gpu_state *a6xx_state,
1025 		const struct a6xx_registers *regs,
1026 		struct a6xx_gpu_state_obj *obj,
1027 		struct a6xx_crashdumper *dumper)
1028 
1029 {
1030 	u64 *in = dumper->ptr;
1031 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1032 	int i, regcount = 0;
1033 
1034 	/* Skip unsupported registers on older generations */
1035 	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1036 			(regs->registers == a660_registers))
1037 		return;
1038 
1039 	/* Some blocks might need to program a selector register first */
1040 	if (regs->val0)
1041 		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
1042 
1043 	for (i = 0; i < regs->count; i += 2) {
1044 		u32 count = RANGE(regs->registers, i);
1045 
1046 		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
1047 
1048 		out += count * sizeof(u32);
1049 		regcount += count;
1050 	}
1051 
1052 	CRASHDUMP_FINI(in);
1053 
1054 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1055 		return;
1056 
1057 	if (a6xx_crashdumper_run(gpu, dumper))
1058 		return;
1059 
1060 	obj->handle = regs;
1061 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1062 		regcount * sizeof(u32));
1063 }
1064 
1065 static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1066 		struct a6xx_gpu_state *a6xx_state,
1067 		const struct gen7_reg_list *regs,
1068 		struct a6xx_gpu_state_obj *obj,
1069 		struct a6xx_crashdumper *dumper)
1070 
1071 {
1072 	u64 *in = dumper->ptr;
1073 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1074 	int i, regcount = 0;
1075 
1076 	/* Some blocks might need to program a selector register first */
1077 	if (regs->sel)
1078 		in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val);
1079 
1080 	for (i = 0; regs->regs[i] != UINT_MAX; i += 2) {
1081 		u32 count = RANGE(regs->regs, i);
1082 
1083 		in += CRASHDUMP_READ(in, regs->regs[i], count, out);
1084 
1085 		out += count * sizeof(u32);
1086 		regcount += count;
1087 	}
1088 
1089 	CRASHDUMP_FINI(in);
1090 
1091 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1092 		return;
1093 
1094 	if (a6xx_crashdumper_run(gpu, dumper))
1095 		return;
1096 
1097 	obj->handle = regs->regs;
1098 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1099 		regcount * sizeof(u32));
1100 }
1101 
1102 
1103 /* Read a block of registers via AHB */
1104 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1105 		struct a6xx_gpu_state *a6xx_state,
1106 		const struct a6xx_registers *regs,
1107 		struct a6xx_gpu_state_obj *obj)
1108 {
1109 	int i, regcount = 0, index = 0;
1110 
1111 	/* Skip unsupported registers on older generations */
1112 	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1113 			(regs->registers == a660_registers))
1114 		return;
1115 
1116 	for (i = 0; i < regs->count; i += 2)
1117 		regcount += RANGE(regs->registers, i);
1118 
1119 	obj->handle = (const void *) regs;
1120 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1121 	if (!obj->data)
1122 		return;
1123 
1124 	for (i = 0; i < regs->count; i += 2) {
1125 		u32 count = RANGE(regs->registers, i);
1126 		int j;
1127 
1128 		for (j = 0; j < count; j++)
1129 			obj->data[index++] = gpu_read(gpu,
1130 				regs->registers[i] + j);
1131 	}
1132 }
1133 
1134 static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1135 		struct a6xx_gpu_state *a6xx_state,
1136 		const u32 *regs,
1137 		struct a6xx_gpu_state_obj *obj)
1138 {
1139 	int i, regcount = 0, index = 0;
1140 
1141 	for (i = 0; regs[i] != UINT_MAX; i += 2)
1142 		regcount += RANGE(regs, i);
1143 
1144 	obj->handle = (const void *) regs;
1145 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1146 	if (!obj->data)
1147 		return;
1148 
1149 	for (i = 0; regs[i] != UINT_MAX; i += 2) {
1150 		u32 count = RANGE(regs, i);
1151 		int j;
1152 
1153 		for (j = 0; j < count; j++)
1154 			obj->data[index++] = gpu_read(gpu, regs[i] + j);
1155 	}
1156 }
1157 
1158 static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1159 		struct a6xx_gpu_state *a6xx_state,
1160 		const struct gen7_reg_list *regs,
1161 		struct a6xx_gpu_state_obj *obj)
1162 {
1163 	if (regs->sel)
1164 		gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1165 
1166 	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj);
1167 }
1168 
1169 /* Read a block of GMU registers */
1170 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1171 		struct a6xx_gpu_state *a6xx_state,
1172 		const struct a6xx_registers *regs,
1173 		struct a6xx_gpu_state_obj *obj,
1174 		bool rscc)
1175 {
1176 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1177 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1178 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1179 	int i, regcount = 0, index = 0;
1180 
1181 	for (i = 0; i < regs->count; i += 2)
1182 		regcount += RANGE(regs->registers, i);
1183 
1184 	obj->handle = (const void *) regs;
1185 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1186 	if (!obj->data)
1187 		return;
1188 
1189 	for (i = 0; i < regs->count; i += 2) {
1190 		u32 count = RANGE(regs->registers, i);
1191 		int j;
1192 
1193 		for (j = 0; j < count; j++) {
1194 			u32 offset = regs->registers[i] + j;
1195 			u32 val;
1196 
1197 			if (rscc)
1198 				val = gmu_read_rscc(gmu, offset);
1199 			else
1200 				val = gmu_read(gmu, offset);
1201 
1202 			obj->data[index++] = val;
1203 		}
1204 	}
1205 }
1206 
1207 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1208 		struct a6xx_gpu_state *a6xx_state)
1209 {
1210 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1211 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1212 
1213 	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1214 		3, sizeof(*a6xx_state->gmu_registers));
1215 
1216 	if (!a6xx_state->gmu_registers)
1217 		return;
1218 
1219 	a6xx_state->nr_gmu_registers = 3;
1220 
1221 	/* Get the CX GMU registers from AHB */
1222 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
1223 		&a6xx_state->gmu_registers[0], false);
1224 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
1225 		&a6xx_state->gmu_registers[1], true);
1226 
1227 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1228 		return;
1229 
1230 	/* Set the fence to ALLOW mode so we can access the registers */
1231 	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1232 
1233 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
1234 		&a6xx_state->gmu_registers[2], false);
1235 }
1236 
1237 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1238 		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
1239 {
1240 	struct msm_gpu_state_bo *snapshot;
1241 
1242 	if (!bo->size)
1243 		return NULL;
1244 
1245 	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
1246 	if (!snapshot)
1247 		return NULL;
1248 
1249 	snapshot->iova = bo->iova;
1250 	snapshot->size = bo->size;
1251 	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
1252 	if (!snapshot->data)
1253 		return NULL;
1254 
1255 	memcpy(snapshot->data, bo->virt, bo->size);
1256 
1257 	return snapshot;
1258 }
1259 
1260 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1261 					  struct a6xx_gpu_state *a6xx_state)
1262 {
1263 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1264 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1265 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1266 	unsigned i, j;
1267 
1268 	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1269 
1270 	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
1271 		struct a6xx_hfi_queue *queue = &gmu->queues[i];
1272 		for (j = 0; j < HFI_HISTORY_SZ; j++) {
1273 			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1274 			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1275 		}
1276 	}
1277 }
1278 
1279 #define A6XX_REGLIST_SIZE        1
1280 #define A6XX_GBIF_REGLIST_SIZE   1
1281 static void a6xx_get_registers(struct msm_gpu *gpu,
1282 		struct a6xx_gpu_state *a6xx_state,
1283 		struct a6xx_crashdumper *dumper)
1284 {
1285 	int i, count = A6XX_REGLIST_SIZE +
1286 		ARRAY_SIZE(a6xx_reglist) +
1287 		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1288 	int index = 0;
1289 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1290 
1291 	a6xx_state->registers = state_kcalloc(a6xx_state,
1292 		count, sizeof(*a6xx_state->registers));
1293 
1294 	if (!a6xx_state->registers)
1295 		return;
1296 
1297 	a6xx_state->nr_registers = count;
1298 
1299 	a6xx_get_ahb_gpu_registers(gpu,
1300 		a6xx_state, &a6xx_ahb_reglist,
1301 		&a6xx_state->registers[index++]);
1302 
1303 	if (a6xx_has_gbif(adreno_gpu))
1304 		a6xx_get_ahb_gpu_registers(gpu,
1305 				a6xx_state, &a6xx_gbif_reglist,
1306 				&a6xx_state->registers[index++]);
1307 	else
1308 		a6xx_get_ahb_gpu_registers(gpu,
1309 				a6xx_state, &a6xx_vbif_reglist,
1310 				&a6xx_state->registers[index++]);
1311 	if (!dumper) {
1312 		/*
1313 		 * We can't use the crashdumper when the SMMU is stalled,
1314 		 * because the GPU has no memory access until we resume
1315 		 * translation (but we don't want to do that until after
1316 		 * we have captured as much useful GPU state as possible).
1317 		 * So instead collect registers via the CPU:
1318 		 */
1319 		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1320 			a6xx_get_ahb_gpu_registers(gpu,
1321 				a6xx_state, &a6xx_reglist[i],
1322 				&a6xx_state->registers[index++]);
1323 		return;
1324 	}
1325 
1326 	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1327 		a6xx_get_crashdumper_registers(gpu,
1328 			a6xx_state, &a6xx_reglist[i],
1329 			&a6xx_state->registers[index++],
1330 			dumper);
1331 
1332 	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1333 		a6xx_get_crashdumper_hlsq_registers(gpu,
1334 			a6xx_state, &a6xx_hlsq_reglist[i],
1335 			&a6xx_state->registers[index++],
1336 			dumper);
1337 }
1338 
1339 #define A7XX_PRE_CRASHDUMPER_SIZE    1
1340 #define A7XX_POST_CRASHDUMPER_SIZE   1
1341 static void a7xx_get_registers(struct msm_gpu *gpu,
1342 		struct a6xx_gpu_state *a6xx_state,
1343 		struct a6xx_crashdumper *dumper)
1344 {
1345 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1346 	int i, count;
1347 	int index = 0;
1348 	const u32 *pre_crashdumper_regs;
1349 	const struct gen7_reg_list *reglist;
1350 
1351 	if (adreno_is_a730(adreno_gpu)) {
1352 		reglist = gen7_0_0_reg_list;
1353 		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1354 	} else if (adreno_is_a740_family(adreno_gpu)) {
1355 		reglist = gen7_2_0_reg_list;
1356 		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1357 	} else {
1358 		BUG_ON(!adreno_is_a750(adreno_gpu));
1359 		reglist = gen7_9_0_reg_list;
1360 		pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers;
1361 	}
1362 
1363 	count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1364 
1365 	/* The downstream reglist contains registers in other memory regions
1366 	 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1367 	 * offsets and map them to read them on the CPU. For now only read the
1368 	 * first region which is the main one.
1369 	 */
1370 	if (dumper) {
1371 		for (i = 0; reglist[i].regs; i++)
1372 			count++;
1373 	} else {
1374 		count++;
1375 	}
1376 
1377 	a6xx_state->registers = state_kcalloc(a6xx_state,
1378 		count, sizeof(*a6xx_state->registers));
1379 
1380 	if (!a6xx_state->registers)
1381 		return;
1382 
1383 	a6xx_state->nr_registers = count;
1384 
1385 	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs,
1386 		&a6xx_state->registers[index++]);
1387 
1388 	if (!dumper) {
1389 		a7xx_get_ahb_gpu_reglist(gpu,
1390 			a6xx_state, &reglist[0],
1391 			&a6xx_state->registers[index++]);
1392 		return;
1393 	}
1394 
1395 	for (i = 0; reglist[i].regs; i++)
1396 		a7xx_get_crashdumper_registers(gpu,
1397 			a6xx_state, &reglist[i],
1398 			&a6xx_state->registers[index++],
1399 			dumper);
1400 }
1401 
1402 static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1403 		struct a6xx_gpu_state *a6xx_state)
1404 {
1405 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1406 	const u32 *regs;
1407 
1408 	BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu) ||
1409 		 adreno_is_a750(adreno_gpu)));
1410 	regs = gen7_0_0_post_crashdumper_registers;
1411 
1412 	a7xx_get_ahb_gpu_registers(gpu,
1413 		a6xx_state, regs,
1414 		&a6xx_state->registers[a6xx_state->nr_registers - 1]);
1415 }
1416 
1417 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1418 {
1419 	/* The value at [16:31] is in 4dword units. Convert it to dwords */
1420 	return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
1421 }
1422 
1423 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1424 {
1425 	/*
1426 	 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1427 	 * That register however is not directly accessible from APSS on A7xx.
1428 	 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1429 	 */
1430 	gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
1431 
1432 	return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20);
1433 }
1434 
1435 /* Read a block of data from an indexed register pair */
1436 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1437 		struct a6xx_gpu_state *a6xx_state,
1438 		const struct a6xx_indexed_registers *indexed,
1439 		struct a6xx_gpu_state_obj *obj)
1440 {
1441 	u32 count = indexed->count;
1442 	int i;
1443 
1444 	obj->handle = (const void *) indexed;
1445 	if (indexed->count_fn)
1446 		count = indexed->count_fn(gpu);
1447 
1448 	obj->data = state_kcalloc(a6xx_state, count, sizeof(u32));
1449 	obj->count = count;
1450 	if (!obj->data)
1451 		return;
1452 
1453 	/* All the indexed banks start at address 0 */
1454 	gpu_write(gpu, indexed->addr, 0);
1455 
1456 	/* Read the data - each read increments the internal address by 1 */
1457 	for (i = 0; i < count; i++)
1458 		obj->data[i] = gpu_read(gpu, indexed->data);
1459 }
1460 
1461 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1462 		struct a6xx_gpu_state *a6xx_state)
1463 {
1464 	u32 mempool_size;
1465 	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
1466 	int i;
1467 
1468 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
1469 		sizeof(*a6xx_state->indexed_regs));
1470 	if (!a6xx_state->indexed_regs)
1471 		return;
1472 
1473 	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1474 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
1475 			&a6xx_state->indexed_regs[i]);
1476 
1477 	if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1478 		u32 val;
1479 
1480 		val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1481 		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
1482 
1483 		/* Get the contents of the CP mempool */
1484 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1485 			&a6xx_state->indexed_regs[i]);
1486 
1487 		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1488 		a6xx_state->nr_indexed_regs = count;
1489 		return;
1490 	}
1491 
1492 	/* Set the CP mempool size to 0 to stabilize it while dumping */
1493 	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1494 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1495 
1496 	/* Get the contents of the CP mempool */
1497 	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1498 		&a6xx_state->indexed_regs[i]);
1499 
1500 	/*
1501 	 * Offset 0x2000 in the mempool is the size - copy the saved size over
1502 	 * so the data is consistent
1503 	 */
1504 	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1505 
1506 	/* Restore the size in the hardware */
1507 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1508 }
1509 
1510 static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1511 		struct a6xx_gpu_state *a6xx_state)
1512 {
1513 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1514 	const struct a6xx_indexed_registers *indexed_regs;
1515 	int i, indexed_count, mempool_count;
1516 
1517 	if (adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)) {
1518 		indexed_regs = a7xx_indexed_reglist;
1519 		indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1520 	} else {
1521 		BUG_ON(!adreno_is_a750(adreno_gpu));
1522 		indexed_regs = gen7_9_0_cp_indexed_reg_list;
1523 		indexed_count = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list);
1524 	}
1525 
1526 	mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1527 
1528 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1529 					indexed_count + mempool_count,
1530 					sizeof(*a6xx_state->indexed_regs));
1531 	if (!a6xx_state->indexed_regs)
1532 		return;
1533 
1534 	a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1535 
1536 	/* First read the common regs */
1537 	for (i = 0; i < indexed_count; i++)
1538 		a6xx_get_indexed_regs(gpu, a6xx_state, &indexed_regs[i],
1539 			&a6xx_state->indexed_regs[i]);
1540 
1541 	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2));
1542 	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2));
1543 
1544 	/* Get the contents of the CP_BV mempool */
1545 	for (i = 0; i < mempool_count; i++)
1546 		a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i],
1547 			&a6xx_state->indexed_regs[indexed_count + i]);
1548 
1549 	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0);
1550 	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0);
1551 	return;
1552 }
1553 
1554 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1555 {
1556 	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1557 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1558 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1559 	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1560 		GFP_KERNEL);
1561 	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1562 			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1563 
1564 	if (!a6xx_state)
1565 		return ERR_PTR(-ENOMEM);
1566 
1567 	INIT_LIST_HEAD(&a6xx_state->objs);
1568 
1569 	/* Get the generic state from the adreno core */
1570 	adreno_gpu_state_get(gpu, &a6xx_state->base);
1571 
1572 	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1573 		a6xx_get_gmu_registers(gpu, a6xx_state);
1574 
1575 		a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1576 		a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1577 		a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1578 
1579 		a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1580 	}
1581 
1582 	/* If GX isn't on the rest of the data isn't going to be accessible */
1583 	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1584 		return &a6xx_state->base;
1585 
1586 	/* Get the banks of indexed registers */
1587 	if (adreno_is_a7xx(adreno_gpu))
1588 		a7xx_get_indexed_registers(gpu, a6xx_state);
1589 	else
1590 		a6xx_get_indexed_registers(gpu, a6xx_state);
1591 
1592 	/*
1593 	 * Try to initialize the crashdumper, if we are not dumping state
1594 	 * with the SMMU stalled.  The crashdumper needs memory access to
1595 	 * write out GPU state, so we need to skip this when the SMMU is
1596 	 * stalled in response to an iova fault
1597 	 */
1598 	if (!stalled && !gpu->needs_hw_init &&
1599 	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1600 		dumper = &_dumper;
1601 	}
1602 
1603 	if (adreno_is_a7xx(adreno_gpu)) {
1604 		a7xx_get_registers(gpu, a6xx_state, dumper);
1605 
1606 		if (dumper) {
1607 			a7xx_get_shaders(gpu, a6xx_state, dumper);
1608 			a7xx_get_clusters(gpu, a6xx_state, dumper);
1609 			a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1610 
1611 			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1612 		}
1613 
1614 		a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1615 	} else {
1616 		a6xx_get_registers(gpu, a6xx_state, dumper);
1617 
1618 		if (dumper) {
1619 			a6xx_get_shaders(gpu, a6xx_state, dumper);
1620 			a6xx_get_clusters(gpu, a6xx_state, dumper);
1621 			a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1622 
1623 			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1624 		}
1625 	}
1626 
1627 	if (snapshot_debugbus)
1628 		a6xx_get_debugbus(gpu, a6xx_state);
1629 
1630 	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1631 
1632 	return  &a6xx_state->base;
1633 }
1634 
1635 static void a6xx_gpu_state_destroy(struct kref *kref)
1636 {
1637 	struct a6xx_state_memobj *obj, *tmp;
1638 	struct msm_gpu_state *state = container_of(kref,
1639 			struct msm_gpu_state, ref);
1640 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1641 			struct a6xx_gpu_state, base);
1642 
1643 	if (a6xx_state->gmu_log)
1644 		kvfree(a6xx_state->gmu_log->data);
1645 
1646 	if (a6xx_state->gmu_hfi)
1647 		kvfree(a6xx_state->gmu_hfi->data);
1648 
1649 	if (a6xx_state->gmu_debug)
1650 		kvfree(a6xx_state->gmu_debug->data);
1651 
1652 	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1653 		list_del(&obj->node);
1654 		kvfree(obj);
1655 	}
1656 
1657 	adreno_gpu_state_destroy(state);
1658 	kfree(a6xx_state);
1659 }
1660 
1661 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1662 {
1663 	if (IS_ERR_OR_NULL(state))
1664 		return 1;
1665 
1666 	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1667 }
1668 
1669 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1670 		struct drm_printer *p)
1671 {
1672 	int i, index = 0;
1673 
1674 	if (!data)
1675 		return;
1676 
1677 	for (i = 0; i < count; i += 2) {
1678 		u32 count = RANGE(registers, i);
1679 		u32 offset = registers[i];
1680 		int j;
1681 
1682 		for (j = 0; j < count; index++, offset++, j++) {
1683 			if (data[index] == 0xdeafbead)
1684 				continue;
1685 
1686 			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1687 				offset << 2, data[index]);
1688 		}
1689 	}
1690 }
1691 
1692 static void a7xx_show_registers_indented(const u32 *registers, u32 *data,
1693 		struct drm_printer *p, unsigned indent)
1694 {
1695 	int i, index = 0;
1696 
1697 	for (i = 0; registers[i] != UINT_MAX; i += 2) {
1698 		u32 count = RANGE(registers, i);
1699 		u32 offset = registers[i];
1700 		int j;
1701 
1702 		for (j = 0; j < count; index++, offset++, j++) {
1703 			int k;
1704 
1705 			if (data[index] == 0xdeafbead)
1706 				continue;
1707 
1708 			for (k = 0; k < indent; k++)
1709 				drm_printf(p, "  ");
1710 			drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1711 				offset << 2, data[index]);
1712 		}
1713 	}
1714 }
1715 
1716 static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p)
1717 {
1718 	a7xx_show_registers_indented(registers, data, p, 1);
1719 }
1720 
1721 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1722 {
1723 	char out[ASCII85_BUFSZ];
1724 	long i, l, datalen = 0;
1725 
1726 	for (i = 0; i < len >> 2; i++) {
1727 		if (data[i])
1728 			datalen = (i + 1) << 2;
1729 	}
1730 
1731 	if (datalen == 0)
1732 		return;
1733 
1734 	drm_puts(p, "    data: !!ascii85 |\n");
1735 	drm_puts(p, "      ");
1736 
1737 
1738 	l = ascii85_encode_len(datalen);
1739 
1740 	for (i = 0; i < l; i++)
1741 		drm_puts(p, ascii85_encode(data[i], out));
1742 
1743 	drm_puts(p, "\n");
1744 }
1745 
1746 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1747 {
1748 	drm_puts(p, fmt);
1749 	drm_puts(p, name);
1750 	drm_puts(p, "\n");
1751 }
1752 
1753 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1754 		struct drm_printer *p)
1755 {
1756 	const struct a6xx_shader_block *block = obj->handle;
1757 	int i;
1758 
1759 	if (!obj->handle)
1760 		return;
1761 
1762 	print_name(p, "  - type: ", block->name);
1763 
1764 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1765 		drm_printf(p, "    - bank: %d\n", i);
1766 		drm_printf(p, "      size: %d\n", block->size);
1767 
1768 		if (!obj->data)
1769 			continue;
1770 
1771 		print_ascii85(p, block->size << 2,
1772 			obj->data + (block->size * i));
1773 	}
1774 }
1775 
1776 static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1777 		struct drm_printer *p)
1778 {
1779 	const struct gen7_shader_block *block = obj->handle;
1780 	int i, j;
1781 	u32 *data = obj->data;
1782 
1783 	if (!obj->handle)
1784 		return;
1785 
1786 	print_name(p, "  - type: ", a7xx_statetype_names[block->statetype]);
1787 	print_name(p, "    - pipe: ", a7xx_pipe_names[block->pipeid]);
1788 
1789 	for (i = 0; i < block->num_sps; i++) {
1790 		drm_printf(p, "      - sp: %d\n", i);
1791 
1792 		for (j = 0; j < block->num_usptps; j++) {
1793 			drm_printf(p, "        - usptp: %d\n", j);
1794 			drm_printf(p, "          size: %d\n", block->size);
1795 
1796 			if (!obj->data)
1797 				continue;
1798 
1799 			print_ascii85(p, block->size << 2, data);
1800 
1801 			data += block->size;
1802 		}
1803 	}
1804 }
1805 
1806 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1807 		struct drm_printer *p)
1808 {
1809 	int ctx, index = 0;
1810 
1811 	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1812 		int j;
1813 
1814 		drm_printf(p, "    - context: %d\n", ctx);
1815 
1816 		for (j = 0; j < size; j += 2) {
1817 			u32 count = RANGE(registers, j);
1818 			u32 offset = registers[j];
1819 			int k;
1820 
1821 			for (k = 0; k < count; index++, offset++, k++) {
1822 				if (data[index] == 0xdeafbead)
1823 					continue;
1824 
1825 				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1826 					offset << 2, data[index]);
1827 			}
1828 		}
1829 	}
1830 }
1831 
1832 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1833 		struct drm_printer *p)
1834 {
1835 	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1836 
1837 	if (dbgahb) {
1838 		print_name(p, "  - cluster-name: ", dbgahb->name);
1839 		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1840 			obj->data, p);
1841 	}
1842 }
1843 
1844 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1845 		struct drm_printer *p)
1846 {
1847 	const struct a6xx_cluster *cluster = obj->handle;
1848 
1849 	if (cluster) {
1850 		print_name(p, "  - cluster-name: ", cluster->name);
1851 		a6xx_show_cluster_data(cluster->registers, cluster->count,
1852 			obj->data, p);
1853 	}
1854 }
1855 
1856 static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1857 		struct drm_printer *p)
1858 {
1859 	const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1860 
1861 	if (dbgahb) {
1862 		print_name(p, "  - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]);
1863 		print_name(p, "    - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]);
1864 		drm_printf(p, "      - context: %d\n", dbgahb->context_id);
1865 		a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4);
1866 	}
1867 }
1868 
1869 static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1870 		struct drm_printer *p)
1871 {
1872 	const struct gen7_cluster_registers *cluster = obj->handle;
1873 
1874 	if (cluster) {
1875 		int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
1876 
1877 		print_name(p, "  - pipe: ", a7xx_pipe_names[cluster->pipe_id]);
1878 		print_name(p, "    - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]);
1879 		drm_printf(p, "      - context: %d\n", context);
1880 		a7xx_show_registers_indented(cluster->regs, obj->data, p, 4);
1881 	}
1882 }
1883 
1884 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1885 		struct drm_printer *p)
1886 {
1887 	const struct a6xx_indexed_registers *indexed = obj->handle;
1888 
1889 	if (!indexed)
1890 		return;
1891 
1892 	print_name(p, "  - regs-name: ", indexed->name);
1893 	drm_printf(p, "    dwords: %d\n", obj->count);
1894 
1895 	print_ascii85(p, obj->count << 2, obj->data);
1896 }
1897 
1898 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1899 		u32 *data, struct drm_printer *p)
1900 {
1901 	if (block) {
1902 		print_name(p, "  - debugbus-block: ", block->name);
1903 
1904 		/*
1905 		 * count for regular debugbus data is in quadwords,
1906 		 * but print the size in dwords for consistency
1907 		 */
1908 		drm_printf(p, "    count: %d\n", block->count << 1);
1909 
1910 		print_ascii85(p, block->count << 3, data);
1911 	}
1912 }
1913 
1914 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1915 		struct drm_printer *p)
1916 {
1917 	int i;
1918 
1919 	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1920 		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1921 
1922 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1923 	}
1924 
1925 	if (a6xx_state->vbif_debugbus) {
1926 		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1927 
1928 		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1929 		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1930 
1931 		/* vbif debugbus data is in dwords.  Confusing, huh? */
1932 		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1933 	}
1934 
1935 	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1936 		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1937 
1938 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1939 	}
1940 }
1941 
1942 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1943 		struct drm_printer *p)
1944 {
1945 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1946 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1947 			struct a6xx_gpu_state, base);
1948 	int i;
1949 
1950 	if (IS_ERR_OR_NULL(state))
1951 		return;
1952 
1953 	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1954 
1955 	adreno_show(gpu, state, p);
1956 
1957 	drm_puts(p, "gmu-log:\n");
1958 	if (a6xx_state->gmu_log) {
1959 		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1960 
1961 		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1962 		drm_printf(p, "    size: %zu\n", gmu_log->size);
1963 		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1964 				&gmu_log->encoded);
1965 	}
1966 
1967 	drm_puts(p, "gmu-hfi:\n");
1968 	if (a6xx_state->gmu_hfi) {
1969 		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1970 		unsigned i, j;
1971 
1972 		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1973 		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1974 		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1975 			drm_printf(p, "    queue-history[%u]:", i);
1976 			for (j = 0; j < HFI_HISTORY_SZ; j++) {
1977 				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1978 			}
1979 			drm_printf(p, "\n");
1980 		}
1981 		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1982 				&gmu_hfi->encoded);
1983 	}
1984 
1985 	drm_puts(p, "gmu-debug:\n");
1986 	if (a6xx_state->gmu_debug) {
1987 		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1988 
1989 		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1990 		drm_printf(p, "    size: %zu\n", gmu_debug->size);
1991 		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1992 				&gmu_debug->encoded);
1993 	}
1994 
1995 	drm_puts(p, "registers:\n");
1996 	for (i = 0; i < a6xx_state->nr_registers; i++) {
1997 		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1998 
1999 		if (!obj->handle)
2000 			continue;
2001 
2002 		if (adreno_is_a7xx(adreno_gpu)) {
2003 			a7xx_show_registers(obj->handle, obj->data, p);
2004 		} else {
2005 			const struct a6xx_registers *regs = obj->handle;
2006 
2007 			a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2008 		}
2009 	}
2010 
2011 	drm_puts(p, "registers-gmu:\n");
2012 	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
2013 		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
2014 		const struct a6xx_registers *regs = obj->handle;
2015 
2016 		if (!obj->handle)
2017 			continue;
2018 
2019 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2020 	}
2021 
2022 	drm_puts(p, "indexed-registers:\n");
2023 	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
2024 		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
2025 
2026 	drm_puts(p, "shader-blocks:\n");
2027 	for (i = 0; i < a6xx_state->nr_shaders; i++) {
2028 		if (adreno_is_a7xx(adreno_gpu))
2029 			a7xx_show_shader(&a6xx_state->shaders[i], p);
2030 		else
2031 			a6xx_show_shader(&a6xx_state->shaders[i], p);
2032 	}
2033 
2034 	drm_puts(p, "clusters:\n");
2035 	for (i = 0; i < a6xx_state->nr_clusters; i++) {
2036 		if (adreno_is_a7xx(adreno_gpu))
2037 			a7xx_show_cluster(&a6xx_state->clusters[i], p);
2038 		else
2039 			a6xx_show_cluster(&a6xx_state->clusters[i], p);
2040 	}
2041 
2042 	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) {
2043 		if (adreno_is_a7xx(adreno_gpu))
2044 			a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2045 		else
2046 			a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2047 	}
2048 
2049 	drm_puts(p, "debugbus:\n");
2050 	a6xx_show_debugbus(a6xx_state, p);
2051 }
2052