xref: /linux/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c (revision 001821b0e79716c4e17c71d8e053a23599a7a508)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3 
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10 
11 /* Ignore diagnostics about register tables that we aren't using yet. We don't
12  * want to modify these headers too much from their original source.
13  */
14 #pragma GCC diagnostic push
15 #pragma GCC diagnostic ignored "-Wunused-variable"
16 #pragma GCC diagnostic ignored "-Wunused-const-variable"
17 
18 #include "adreno_gen7_0_0_snapshot.h"
19 #include "adreno_gen7_2_0_snapshot.h"
20 #include "adreno_gen7_9_0_snapshot.h"
21 
22 #pragma GCC diagnostic pop
23 
24 struct a6xx_gpu_state_obj {
25 	const void *handle;
26 	u32 *data;
27 	u32 count;	/* optional, used when count potentially read from hw */
28 };
29 
30 struct a6xx_gpu_state {
31 	struct msm_gpu_state base;
32 
33 	struct a6xx_gpu_state_obj *gmu_registers;
34 	int nr_gmu_registers;
35 
36 	struct a6xx_gpu_state_obj *registers;
37 	int nr_registers;
38 
39 	struct a6xx_gpu_state_obj *shaders;
40 	int nr_shaders;
41 
42 	struct a6xx_gpu_state_obj *clusters;
43 	int nr_clusters;
44 
45 	struct a6xx_gpu_state_obj *dbgahb_clusters;
46 	int nr_dbgahb_clusters;
47 
48 	struct a6xx_gpu_state_obj *indexed_regs;
49 	int nr_indexed_regs;
50 
51 	struct a6xx_gpu_state_obj *debugbus;
52 	int nr_debugbus;
53 
54 	struct a6xx_gpu_state_obj *vbif_debugbus;
55 
56 	struct a6xx_gpu_state_obj *cx_debugbus;
57 	int nr_cx_debugbus;
58 
59 	struct msm_gpu_state_bo *gmu_log;
60 	struct msm_gpu_state_bo *gmu_hfi;
61 	struct msm_gpu_state_bo *gmu_debug;
62 
63 	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
64 
65 	struct list_head objs;
66 
67 	bool gpu_initialized;
68 };
69 
70 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
71 {
72 	in[0] = val;
73 	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
74 
75 	return 2;
76 }
77 
78 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
79 {
80 	in[0] = target;
81 	in[1] = (((u64) reg) << 44 | dwords);
82 
83 	return 2;
84 }
85 
86 static inline int CRASHDUMP_FINI(u64 *in)
87 {
88 	in[0] = 0;
89 	in[1] = 0;
90 
91 	return 2;
92 }
93 
94 struct a6xx_crashdumper {
95 	void *ptr;
96 	struct drm_gem_object *bo;
97 	u64 iova;
98 };
99 
100 struct a6xx_state_memobj {
101 	struct list_head node;
102 	unsigned long long data[];
103 };
104 
105 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
106 {
107 	struct a6xx_state_memobj *obj =
108 		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
109 
110 	if (!obj)
111 		return NULL;
112 
113 	list_add_tail(&obj->node, &a6xx_state->objs);
114 	return &obj->data;
115 }
116 
117 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
118 		size_t size)
119 {
120 	void *dst = state_kcalloc(a6xx_state, 1, size);
121 
122 	if (dst)
123 		memcpy(dst, src, size);
124 	return dst;
125 }
126 
127 /*
128  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
129  * the rest for the data
130  */
131 #define A6XX_CD_DATA_OFFSET 8192
132 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
133 
134 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
135 		struct a6xx_crashdumper *dumper)
136 {
137 	dumper->ptr = msm_gem_kernel_new(gpu->dev,
138 		SZ_1M, MSM_BO_WC, gpu->aspace,
139 		&dumper->bo, &dumper->iova);
140 
141 	if (!IS_ERR(dumper->ptr))
142 		msm_gem_object_set_name(dumper->bo, "crashdump");
143 
144 	return PTR_ERR_OR_ZERO(dumper->ptr);
145 }
146 
147 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
148 		struct a6xx_crashdumper *dumper)
149 {
150 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
151 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
152 	u32 val;
153 	int ret;
154 
155 	if (IS_ERR_OR_NULL(dumper->ptr))
156 		return -EINVAL;
157 
158 	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
159 		return -EINVAL;
160 
161 	/* Make sure all pending memory writes are posted */
162 	wmb();
163 
164 	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
165 
166 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
167 
168 	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
169 		val & 0x02, 100, 10000);
170 
171 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
172 
173 	return ret;
174 }
175 
176 /* read a value from the GX debug bus */
177 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
178 		u32 *data)
179 {
180 	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
181 		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
182 
183 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
184 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
185 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
186 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
187 
188 	/* Wait 1 us to make sure the data is flowing */
189 	udelay(1);
190 
191 	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
192 	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
193 
194 	return 2;
195 }
196 
197 #define cxdbg_write(ptr, offset, val) \
198 	writel((val), (ptr) + ((offset) << 2))
199 
200 #define cxdbg_read(ptr, offset) \
201 	readl((ptr) + ((offset) << 2))
202 
203 /* read a value from the CX debug bus */
204 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
205 		u32 *data)
206 {
207 	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
208 		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
209 
210 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
211 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
212 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
213 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
214 
215 	/* Wait 1 us to make sure the data is flowing */
216 	udelay(1);
217 
218 	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
219 	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
220 
221 	return 2;
222 }
223 
224 /* Read a chunk of data from the VBIF debug bus */
225 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
226 		u32 reg, int count, u32 *data)
227 {
228 	int i;
229 
230 	gpu_write(gpu, ctrl0, reg);
231 
232 	for (i = 0; i < count; i++) {
233 		gpu_write(gpu, ctrl1, i);
234 		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
235 	}
236 
237 	return count;
238 }
239 
240 #define AXI_ARB_BLOCKS 2
241 #define XIN_AXI_BLOCKS 5
242 #define XIN_CORE_BLOCKS 4
243 
244 #define VBIF_DEBUGBUS_BLOCK_SIZE \
245 	((16 * AXI_ARB_BLOCKS) + \
246 	 (18 * XIN_AXI_BLOCKS) + \
247 	 (12 * XIN_CORE_BLOCKS))
248 
249 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
250 		struct a6xx_gpu_state *a6xx_state,
251 		struct a6xx_gpu_state_obj *obj)
252 {
253 	u32 clk, *ptr;
254 	int i;
255 
256 	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
257 		sizeof(u32));
258 	if (!obj->data)
259 		return;
260 
261 	obj->handle = NULL;
262 
263 	/* Get the current clock setting */
264 	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
265 
266 	/* Force on the bus so we can read it */
267 	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
268 		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
269 
270 	/* We will read from BUS2 first, so disable BUS1 */
271 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
272 
273 	/* Enable the VBIF bus for reading */
274 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
275 
276 	ptr = obj->data;
277 
278 	for (i = 0; i < AXI_ARB_BLOCKS; i++)
279 		ptr += vbif_debugbus_read(gpu,
280 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
281 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
282 			1 << (i + 16), 16, ptr);
283 
284 	for (i = 0; i < XIN_AXI_BLOCKS; i++)
285 		ptr += vbif_debugbus_read(gpu,
286 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
287 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
288 			1 << i, 18, ptr);
289 
290 	/* Stop BUS2 so we can turn on BUS1 */
291 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
292 
293 	for (i = 0; i < XIN_CORE_BLOCKS; i++)
294 		ptr += vbif_debugbus_read(gpu,
295 			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
296 			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
297 			1 << i, 12, ptr);
298 
299 	/* Restore the VBIF clock setting */
300 	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
301 }
302 
303 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
304 		struct a6xx_gpu_state *a6xx_state,
305 		const struct a6xx_debugbus_block *block,
306 		struct a6xx_gpu_state_obj *obj)
307 {
308 	int i;
309 	u32 *ptr;
310 
311 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
312 	if (!obj->data)
313 		return;
314 
315 	obj->handle = block;
316 
317 	for (ptr = obj->data, i = 0; i < block->count; i++)
318 		ptr += debugbus_read(gpu, block->id, i, ptr);
319 }
320 
321 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
322 		struct a6xx_gpu_state *a6xx_state,
323 		const struct a6xx_debugbus_block *block,
324 		struct a6xx_gpu_state_obj *obj)
325 {
326 	int i;
327 	u32 *ptr;
328 
329 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
330 	if (!obj->data)
331 		return;
332 
333 	obj->handle = block;
334 
335 	for (ptr = obj->data, i = 0; i < block->count; i++)
336 		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
337 }
338 
339 static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
340 		struct a6xx_gpu_state *a6xx_state)
341 {
342 	int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
343 		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
344 
345 	if (adreno_is_a650_family(to_adreno_gpu(gpu)))
346 		nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
347 
348 	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
349 			sizeof(*a6xx_state->debugbus));
350 
351 	if (a6xx_state->debugbus) {
352 		int i;
353 
354 		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
355 			a6xx_get_debugbus_block(gpu,
356 				a6xx_state,
357 				&a6xx_debugbus_blocks[i],
358 				&a6xx_state->debugbus[i]);
359 
360 		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
361 
362 		/*
363 		 * GBIF has same debugbus as of other GPU blocks, fall back to
364 		 * default path if GPU uses GBIF, also GBIF uses exactly same
365 		 * ID as of VBIF.
366 		 */
367 		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
368 			a6xx_get_debugbus_block(gpu, a6xx_state,
369 				&a6xx_gbif_debugbus_block,
370 				&a6xx_state->debugbus[i]);
371 
372 			a6xx_state->nr_debugbus += 1;
373 		}
374 
375 
376 		if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
377 			for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
378 				a6xx_get_debugbus_block(gpu,
379 					a6xx_state,
380 					&a650_debugbus_blocks[i],
381 					&a6xx_state->debugbus[i]);
382 		}
383 	}
384 }
385 
386 static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
387 		struct a6xx_gpu_state *a6xx_state)
388 {
389 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
390 	int debugbus_blocks_count, gbif_debugbus_blocks_count, total_debugbus_blocks;
391 	const u32 *debugbus_blocks, *gbif_debugbus_blocks;
392 	int i;
393 
394 	if (adreno_is_a730(adreno_gpu)) {
395 		debugbus_blocks = gen7_0_0_debugbus_blocks;
396 		debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
397 		gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
398 		gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
399 	} else if (adreno_is_a740_family(adreno_gpu)) {
400 		debugbus_blocks = gen7_2_0_debugbus_blocks;
401 		debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
402 		gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
403 		gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
404 	} else {
405 		BUG_ON(!adreno_is_a750(adreno_gpu));
406 		debugbus_blocks = gen7_9_0_debugbus_blocks;
407 		debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_debugbus_blocks);
408 		gbif_debugbus_blocks = gen7_9_0_gbif_debugbus_blocks;
409 		gbif_debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_gbif_debugbus_blocks);
410 	}
411 
412 	total_debugbus_blocks = debugbus_blocks_count + gbif_debugbus_blocks_count;
413 
414 	a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks,
415 			sizeof(*a6xx_state->debugbus));
416 
417 	if (a6xx_state->debugbus) {
418 		for (i = 0; i < debugbus_blocks_count; i++) {
419 			a6xx_get_debugbus_block(gpu,
420 				a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]],
421 				&a6xx_state->debugbus[i]);
422 		}
423 
424 		for (i = 0; i < gbif_debugbus_blocks_count; i++) {
425 			a6xx_get_debugbus_block(gpu,
426 				a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]],
427 				&a6xx_state->debugbus[i + debugbus_blocks_count]);
428 		}
429 	}
430 
431 }
432 
433 static void a6xx_get_debugbus(struct msm_gpu *gpu,
434 		struct a6xx_gpu_state *a6xx_state)
435 {
436 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
437 	struct resource *res;
438 	void __iomem *cxdbg = NULL;
439 
440 	/* Set up the GX debug bus */
441 
442 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
443 		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
444 
445 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
446 		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
447 
448 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
449 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
450 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
451 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
452 
453 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
454 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
455 
456 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
457 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
458 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
459 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
460 
461 	/* Set up the CX debug bus - it lives elsewhere in the system so do a
462 	 * temporary ioremap for the registers
463 	 */
464 	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
465 			"cx_dbgc");
466 
467 	if (res)
468 		cxdbg = ioremap(res->start, resource_size(res));
469 
470 	if (cxdbg) {
471 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
472 			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
473 
474 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
475 			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
476 
477 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
478 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
479 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
480 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
481 
482 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
483 			0x76543210);
484 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
485 			0xFEDCBA98);
486 
487 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
488 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
489 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
490 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
491 	}
492 
493 	if (adreno_is_a7xx(adreno_gpu)) {
494 		a7xx_get_debugbus_blocks(gpu, a6xx_state);
495 	} else {
496 		a6xx_get_debugbus_blocks(gpu, a6xx_state);
497 	}
498 
499 	/*  Dump the VBIF debugbus on applicable targets */
500 	if (!a6xx_has_gbif(adreno_gpu)) {
501 		a6xx_state->vbif_debugbus =
502 			state_kcalloc(a6xx_state, 1,
503 					sizeof(*a6xx_state->vbif_debugbus));
504 
505 		if (a6xx_state->vbif_debugbus)
506 			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
507 					a6xx_state->vbif_debugbus);
508 	}
509 
510 	if (cxdbg) {
511 		unsigned nr_cx_debugbus_blocks;
512 		const struct a6xx_debugbus_block *cx_debugbus_blocks;
513 
514 		if (adreno_is_a7xx(adreno_gpu)) {
515 			BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
516 			cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
517 			nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
518 		} else {
519 			cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
520 			nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
521 		}
522 
523 		a6xx_state->cx_debugbus =
524 			state_kcalloc(a6xx_state,
525 			nr_cx_debugbus_blocks,
526 			sizeof(*a6xx_state->cx_debugbus));
527 
528 		if (a6xx_state->cx_debugbus) {
529 			int i;
530 
531 			for (i = 0; i < nr_cx_debugbus_blocks; i++)
532 				a6xx_get_cx_debugbus_block(cxdbg,
533 					a6xx_state,
534 					&cx_debugbus_blocks[i],
535 					&a6xx_state->cx_debugbus[i]);
536 
537 			a6xx_state->nr_cx_debugbus =
538 				nr_cx_debugbus_blocks;
539 		}
540 
541 		iounmap(cxdbg);
542 	}
543 }
544 
545 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
546 
547 /* Read a data cluster from behind the AHB aperture */
548 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
549 		struct a6xx_gpu_state *a6xx_state,
550 		const struct a6xx_dbgahb_cluster *dbgahb,
551 		struct a6xx_gpu_state_obj *obj,
552 		struct a6xx_crashdumper *dumper)
553 {
554 	u64 *in = dumper->ptr;
555 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
556 	size_t datasize;
557 	int i, regcount = 0;
558 
559 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
560 		int j;
561 
562 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
563 			(dbgahb->statetype + i * 2) << 8);
564 
565 		for (j = 0; j < dbgahb->count; j += 2) {
566 			int count = RANGE(dbgahb->registers, j);
567 			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
568 				dbgahb->registers[j] - (dbgahb->base >> 2);
569 
570 			in += CRASHDUMP_READ(in, offset, count, out);
571 
572 			out += count * sizeof(u32);
573 
574 			if (i == 0)
575 				regcount += count;
576 		}
577 	}
578 
579 	CRASHDUMP_FINI(in);
580 
581 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
582 
583 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
584 		return;
585 
586 	if (a6xx_crashdumper_run(gpu, dumper))
587 		return;
588 
589 	obj->handle = dbgahb;
590 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
591 		datasize);
592 }
593 
594 static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
595 		struct a6xx_gpu_state *a6xx_state,
596 		const struct gen7_sptp_cluster_registers *dbgahb,
597 		struct a6xx_gpu_state_obj *obj,
598 		struct a6xx_crashdumper *dumper)
599 {
600 	u64 *in = dumper->ptr;
601 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
602 	size_t datasize;
603 	int i, regcount = 0;
604 
605 	in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
606 		A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) |
607 		A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) |
608 		A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype));
609 
610 	for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) {
611 		int count = RANGE(dbgahb->regs, i);
612 		u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
613 			dbgahb->regs[i] - dbgahb->regbase;
614 
615 		in += CRASHDUMP_READ(in, offset, count, out);
616 
617 		out += count * sizeof(u32);
618 		regcount += count;
619 	}
620 
621 	CRASHDUMP_FINI(in);
622 
623 	datasize = regcount * sizeof(u32);
624 
625 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
626 		return;
627 
628 	if (a6xx_crashdumper_run(gpu, dumper))
629 		return;
630 
631 	obj->handle = dbgahb;
632 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
633 		datasize);
634 }
635 
636 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
637 		struct a6xx_gpu_state *a6xx_state,
638 		struct a6xx_crashdumper *dumper)
639 {
640 	int i;
641 
642 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
643 		ARRAY_SIZE(a6xx_dbgahb_clusters),
644 		sizeof(*a6xx_state->dbgahb_clusters));
645 
646 	if (!a6xx_state->dbgahb_clusters)
647 		return;
648 
649 	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
650 
651 	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
652 		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
653 			&a6xx_dbgahb_clusters[i],
654 			&a6xx_state->dbgahb_clusters[i], dumper);
655 }
656 
657 static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
658 		struct a6xx_gpu_state *a6xx_state,
659 		struct a6xx_crashdumper *dumper)
660 {
661 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
662 	int i;
663 	const struct gen7_sptp_cluster_registers *dbgahb_clusters;
664 	unsigned dbgahb_clusters_size;
665 
666 	if (adreno_is_a730(adreno_gpu)) {
667 		dbgahb_clusters = gen7_0_0_sptp_clusters;
668 		dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
669 	} else {
670 		BUG_ON(!adreno_is_a740_family(adreno_gpu));
671 		dbgahb_clusters = gen7_2_0_sptp_clusters;
672 		dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
673 	}
674 
675 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
676 		dbgahb_clusters_size,
677 		sizeof(*a6xx_state->dbgahb_clusters));
678 
679 	if (!a6xx_state->dbgahb_clusters)
680 		return;
681 
682 	a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
683 
684 	for (i = 0; i < dbgahb_clusters_size; i++)
685 		a7xx_get_dbgahb_cluster(gpu, a6xx_state,
686 			&dbgahb_clusters[i],
687 			&a6xx_state->dbgahb_clusters[i], dumper);
688 }
689 
690 /* Read a data cluster from the CP aperture with the crashdumper */
691 static void a6xx_get_cluster(struct msm_gpu *gpu,
692 		struct a6xx_gpu_state *a6xx_state,
693 		const struct a6xx_cluster *cluster,
694 		struct a6xx_gpu_state_obj *obj,
695 		struct a6xx_crashdumper *dumper)
696 {
697 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
698 	u64 *in = dumper->ptr;
699 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
700 	size_t datasize;
701 	int i, regcount = 0;
702 	u32 id = cluster->id;
703 
704 	/* Skip registers that are not present on older generation */
705 	if (!adreno_is_a660_family(adreno_gpu) &&
706 			cluster->registers == a660_fe_cluster)
707 		return;
708 
709 	if (adreno_is_a650_family(adreno_gpu) &&
710 			cluster->registers == a6xx_ps_cluster)
711 		id = CLUSTER_VPC_PS;
712 
713 	/* Some clusters need a selector register to be programmed too */
714 	if (cluster->sel_reg)
715 		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
716 
717 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
718 		int j;
719 
720 		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
721 			(id << 8) | (i << 4) | i);
722 
723 		for (j = 0; j < cluster->count; j += 2) {
724 			int count = RANGE(cluster->registers, j);
725 
726 			in += CRASHDUMP_READ(in, cluster->registers[j],
727 				count, out);
728 
729 			out += count * sizeof(u32);
730 
731 			if (i == 0)
732 				regcount += count;
733 		}
734 	}
735 
736 	CRASHDUMP_FINI(in);
737 
738 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
739 
740 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
741 		return;
742 
743 	if (a6xx_crashdumper_run(gpu, dumper))
744 		return;
745 
746 	obj->handle = cluster;
747 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
748 		datasize);
749 }
750 
751 static void a7xx_get_cluster(struct msm_gpu *gpu,
752 		struct a6xx_gpu_state *a6xx_state,
753 		const struct gen7_cluster_registers *cluster,
754 		struct a6xx_gpu_state_obj *obj,
755 		struct a6xx_crashdumper *dumper)
756 {
757 	u64 *in = dumper->ptr;
758 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
759 	size_t datasize;
760 	int i, regcount = 0;
761 
762 	/* Some clusters need a selector register to be programmed too */
763 	if (cluster->sel)
764 		in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val);
765 
766 	in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
767 		A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) |
768 		A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) |
769 		A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id));
770 
771 	for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) {
772 		int count = RANGE(cluster->regs, i);
773 
774 		in += CRASHDUMP_READ(in, cluster->regs[i],
775 			count, out);
776 
777 		out += count * sizeof(u32);
778 		regcount += count;
779 	}
780 
781 	CRASHDUMP_FINI(in);
782 
783 	datasize = regcount * sizeof(u32);
784 
785 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
786 		return;
787 
788 	if (a6xx_crashdumper_run(gpu, dumper))
789 		return;
790 
791 	obj->handle = cluster;
792 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
793 		datasize);
794 }
795 
796 static void a6xx_get_clusters(struct msm_gpu *gpu,
797 		struct a6xx_gpu_state *a6xx_state,
798 		struct a6xx_crashdumper *dumper)
799 {
800 	int i;
801 
802 	a6xx_state->clusters = state_kcalloc(a6xx_state,
803 		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
804 
805 	if (!a6xx_state->clusters)
806 		return;
807 
808 	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
809 
810 	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
811 		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
812 			&a6xx_state->clusters[i], dumper);
813 }
814 
815 static void a7xx_get_clusters(struct msm_gpu *gpu,
816 		struct a6xx_gpu_state *a6xx_state,
817 		struct a6xx_crashdumper *dumper)
818 {
819 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
820 	int i;
821 	const struct gen7_cluster_registers *clusters;
822 	unsigned clusters_size;
823 
824 	if (adreno_is_a730(adreno_gpu)) {
825 		clusters = gen7_0_0_clusters;
826 		clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
827 	} else if (adreno_is_a740_family(adreno_gpu)) {
828 		clusters = gen7_2_0_clusters;
829 		clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
830 	} else {
831 		BUG_ON(!adreno_is_a750(adreno_gpu));
832 		clusters = gen7_9_0_clusters;
833 		clusters_size = ARRAY_SIZE(gen7_9_0_clusters);
834 	}
835 
836 	a6xx_state->clusters = state_kcalloc(a6xx_state,
837 		clusters_size, sizeof(*a6xx_state->clusters));
838 
839 	if (!a6xx_state->clusters)
840 		return;
841 
842 	a6xx_state->nr_clusters = clusters_size;
843 
844 	for (i = 0; i < clusters_size; i++)
845 		a7xx_get_cluster(gpu, a6xx_state, &clusters[i],
846 			&a6xx_state->clusters[i], dumper);
847 }
848 
849 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
850 static void a6xx_get_shader_block(struct msm_gpu *gpu,
851 		struct a6xx_gpu_state *a6xx_state,
852 		const struct a6xx_shader_block *block,
853 		struct a6xx_gpu_state_obj *obj,
854 		struct a6xx_crashdumper *dumper)
855 {
856 	u64 *in = dumper->ptr;
857 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
858 	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
859 	int i;
860 
861 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
862 		return;
863 
864 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
865 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
866 			(block->type << 8) | i);
867 
868 		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
869 			block->size, out);
870 
871 		out += block->size * sizeof(u32);
872 	}
873 
874 	CRASHDUMP_FINI(in);
875 
876 	if (a6xx_crashdumper_run(gpu, dumper))
877 		return;
878 
879 	obj->handle = block;
880 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
881 		datasize);
882 }
883 
884 static void a7xx_get_shader_block(struct msm_gpu *gpu,
885 		struct a6xx_gpu_state *a6xx_state,
886 		const struct gen7_shader_block *block,
887 		struct a6xx_gpu_state_obj *obj,
888 		struct a6xx_crashdumper *dumper)
889 {
890 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
891 	u64 *in = dumper->ptr;
892 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
893 	size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
894 	int i, j;
895 
896 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
897 		return;
898 
899 	if (adreno_is_a730(adreno_gpu)) {
900 		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3);
901 	}
902 
903 	for (i = 0; i < block->num_sps; i++) {
904 		for (j = 0; j < block->num_usptps; j++) {
905 			in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
906 				A7XX_SP_READ_SEL_LOCATION(block->location) |
907 				A7XX_SP_READ_SEL_PIPE(block->pipeid) |
908 				A7XX_SP_READ_SEL_STATETYPE(block->statetype) |
909 				A7XX_SP_READ_SEL_USPTP(j) |
910 				A7XX_SP_READ_SEL_SPTP(i));
911 
912 			in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
913 				block->size, out);
914 
915 			out += block->size * sizeof(u32);
916 		}
917 	}
918 
919 	CRASHDUMP_FINI(in);
920 
921 	if (a6xx_crashdumper_run(gpu, dumper))
922 		goto out;
923 
924 	obj->handle = block;
925 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
926 		datasize);
927 
928 out:
929 	if (adreno_is_a730(adreno_gpu)) {
930 		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0);
931 	}
932 }
933 
934 static void a6xx_get_shaders(struct msm_gpu *gpu,
935 		struct a6xx_gpu_state *a6xx_state,
936 		struct a6xx_crashdumper *dumper)
937 {
938 	int i;
939 
940 	a6xx_state->shaders = state_kcalloc(a6xx_state,
941 		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
942 
943 	if (!a6xx_state->shaders)
944 		return;
945 
946 	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
947 
948 	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
949 		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
950 			&a6xx_state->shaders[i], dumper);
951 }
952 
953 static void a7xx_get_shaders(struct msm_gpu *gpu,
954 		struct a6xx_gpu_state *a6xx_state,
955 		struct a6xx_crashdumper *dumper)
956 {
957 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
958 	const struct gen7_shader_block *shader_blocks;
959 	unsigned num_shader_blocks;
960 	int i;
961 
962 	if (adreno_is_a730(adreno_gpu)) {
963 		shader_blocks = gen7_0_0_shader_blocks;
964 		num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
965 	} else if (adreno_is_a740_family(adreno_gpu)) {
966 		shader_blocks = gen7_2_0_shader_blocks;
967 		num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
968 	} else {
969 		BUG_ON(!adreno_is_a750(adreno_gpu));
970 		shader_blocks = gen7_9_0_shader_blocks;
971 		num_shader_blocks = ARRAY_SIZE(gen7_9_0_shader_blocks);
972 	}
973 
974 	a6xx_state->shaders = state_kcalloc(a6xx_state,
975 		num_shader_blocks, sizeof(*a6xx_state->shaders));
976 
977 	if (!a6xx_state->shaders)
978 		return;
979 
980 	a6xx_state->nr_shaders = num_shader_blocks;
981 
982 	for (i = 0; i < num_shader_blocks; i++)
983 		a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i],
984 			&a6xx_state->shaders[i], dumper);
985 }
986 
987 /* Read registers from behind the HLSQ aperture with the crashdumper */
988 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
989 		struct a6xx_gpu_state *a6xx_state,
990 		const struct a6xx_registers *regs,
991 		struct a6xx_gpu_state_obj *obj,
992 		struct a6xx_crashdumper *dumper)
993 
994 {
995 	u64 *in = dumper->ptr;
996 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
997 	int i, regcount = 0;
998 
999 	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
1000 
1001 	for (i = 0; i < regs->count; i += 2) {
1002 		u32 count = RANGE(regs->registers, i);
1003 		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
1004 			regs->registers[i] - (regs->val0 >> 2);
1005 
1006 		in += CRASHDUMP_READ(in, offset, count, out);
1007 
1008 		out += count * sizeof(u32);
1009 		regcount += count;
1010 	}
1011 
1012 	CRASHDUMP_FINI(in);
1013 
1014 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1015 		return;
1016 
1017 	if (a6xx_crashdumper_run(gpu, dumper))
1018 		return;
1019 
1020 	obj->handle = regs;
1021 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1022 		regcount * sizeof(u32));
1023 }
1024 
1025 /* Read a block of registers using the crashdumper */
1026 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1027 		struct a6xx_gpu_state *a6xx_state,
1028 		const struct a6xx_registers *regs,
1029 		struct a6xx_gpu_state_obj *obj,
1030 		struct a6xx_crashdumper *dumper)
1031 
1032 {
1033 	u64 *in = dumper->ptr;
1034 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1035 	int i, regcount = 0;
1036 
1037 	/* Skip unsupported registers on older generations */
1038 	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1039 			(regs->registers == a660_registers))
1040 		return;
1041 
1042 	/* Some blocks might need to program a selector register first */
1043 	if (regs->val0)
1044 		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
1045 
1046 	for (i = 0; i < regs->count; i += 2) {
1047 		u32 count = RANGE(regs->registers, i);
1048 
1049 		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
1050 
1051 		out += count * sizeof(u32);
1052 		regcount += count;
1053 	}
1054 
1055 	CRASHDUMP_FINI(in);
1056 
1057 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1058 		return;
1059 
1060 	if (a6xx_crashdumper_run(gpu, dumper))
1061 		return;
1062 
1063 	obj->handle = regs;
1064 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1065 		regcount * sizeof(u32));
1066 }
1067 
1068 static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1069 		struct a6xx_gpu_state *a6xx_state,
1070 		const struct gen7_reg_list *regs,
1071 		struct a6xx_gpu_state_obj *obj,
1072 		struct a6xx_crashdumper *dumper)
1073 
1074 {
1075 	u64 *in = dumper->ptr;
1076 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1077 	int i, regcount = 0;
1078 
1079 	/* Some blocks might need to program a selector register first */
1080 	if (regs->sel)
1081 		in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val);
1082 
1083 	for (i = 0; regs->regs[i] != UINT_MAX; i += 2) {
1084 		u32 count = RANGE(regs->regs, i);
1085 
1086 		in += CRASHDUMP_READ(in, regs->regs[i], count, out);
1087 
1088 		out += count * sizeof(u32);
1089 		regcount += count;
1090 	}
1091 
1092 	CRASHDUMP_FINI(in);
1093 
1094 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1095 		return;
1096 
1097 	if (a6xx_crashdumper_run(gpu, dumper))
1098 		return;
1099 
1100 	obj->handle = regs->regs;
1101 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1102 		regcount * sizeof(u32));
1103 }
1104 
1105 
1106 /* Read a block of registers via AHB */
1107 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1108 		struct a6xx_gpu_state *a6xx_state,
1109 		const struct a6xx_registers *regs,
1110 		struct a6xx_gpu_state_obj *obj)
1111 {
1112 	int i, regcount = 0, index = 0;
1113 
1114 	/* Skip unsupported registers on older generations */
1115 	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1116 			(regs->registers == a660_registers))
1117 		return;
1118 
1119 	for (i = 0; i < regs->count; i += 2)
1120 		regcount += RANGE(regs->registers, i);
1121 
1122 	obj->handle = (const void *) regs;
1123 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1124 	if (!obj->data)
1125 		return;
1126 
1127 	for (i = 0; i < regs->count; i += 2) {
1128 		u32 count = RANGE(regs->registers, i);
1129 		int j;
1130 
1131 		for (j = 0; j < count; j++)
1132 			obj->data[index++] = gpu_read(gpu,
1133 				regs->registers[i] + j);
1134 	}
1135 }
1136 
1137 static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1138 		struct a6xx_gpu_state *a6xx_state,
1139 		const u32 *regs,
1140 		struct a6xx_gpu_state_obj *obj)
1141 {
1142 	int i, regcount = 0, index = 0;
1143 
1144 	for (i = 0; regs[i] != UINT_MAX; i += 2)
1145 		regcount += RANGE(regs, i);
1146 
1147 	obj->handle = (const void *) regs;
1148 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1149 	if (!obj->data)
1150 		return;
1151 
1152 	for (i = 0; regs[i] != UINT_MAX; i += 2) {
1153 		u32 count = RANGE(regs, i);
1154 		int j;
1155 
1156 		for (j = 0; j < count; j++)
1157 			obj->data[index++] = gpu_read(gpu, regs[i] + j);
1158 	}
1159 }
1160 
1161 static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1162 		struct a6xx_gpu_state *a6xx_state,
1163 		const struct gen7_reg_list *regs,
1164 		struct a6xx_gpu_state_obj *obj)
1165 {
1166 	if (regs->sel)
1167 		gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1168 
1169 	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj);
1170 }
1171 
1172 /* Read a block of GMU registers */
1173 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1174 		struct a6xx_gpu_state *a6xx_state,
1175 		const struct a6xx_registers *regs,
1176 		struct a6xx_gpu_state_obj *obj,
1177 		bool rscc)
1178 {
1179 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1180 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1181 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1182 	int i, regcount = 0, index = 0;
1183 
1184 	for (i = 0; i < regs->count; i += 2)
1185 		regcount += RANGE(regs->registers, i);
1186 
1187 	obj->handle = (const void *) regs;
1188 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1189 	if (!obj->data)
1190 		return;
1191 
1192 	for (i = 0; i < regs->count; i += 2) {
1193 		u32 count = RANGE(regs->registers, i);
1194 		int j;
1195 
1196 		for (j = 0; j < count; j++) {
1197 			u32 offset = regs->registers[i] + j;
1198 			u32 val;
1199 
1200 			if (rscc)
1201 				val = gmu_read_rscc(gmu, offset);
1202 			else
1203 				val = gmu_read(gmu, offset);
1204 
1205 			obj->data[index++] = val;
1206 		}
1207 	}
1208 }
1209 
1210 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1211 		struct a6xx_gpu_state *a6xx_state)
1212 {
1213 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1214 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1215 
1216 	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1217 		3, sizeof(*a6xx_state->gmu_registers));
1218 
1219 	if (!a6xx_state->gmu_registers)
1220 		return;
1221 
1222 	a6xx_state->nr_gmu_registers = 3;
1223 
1224 	/* Get the CX GMU registers from AHB */
1225 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
1226 		&a6xx_state->gmu_registers[0], false);
1227 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
1228 		&a6xx_state->gmu_registers[1], true);
1229 
1230 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1231 		return;
1232 
1233 	/* Set the fence to ALLOW mode so we can access the registers */
1234 	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1235 
1236 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
1237 		&a6xx_state->gmu_registers[2], false);
1238 }
1239 
1240 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1241 		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
1242 {
1243 	struct msm_gpu_state_bo *snapshot;
1244 
1245 	if (!bo->size)
1246 		return NULL;
1247 
1248 	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
1249 	if (!snapshot)
1250 		return NULL;
1251 
1252 	snapshot->iova = bo->iova;
1253 	snapshot->size = bo->size;
1254 	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
1255 	if (!snapshot->data)
1256 		return NULL;
1257 
1258 	memcpy(snapshot->data, bo->virt, bo->size);
1259 
1260 	return snapshot;
1261 }
1262 
1263 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1264 					  struct a6xx_gpu_state *a6xx_state)
1265 {
1266 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1267 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1268 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1269 	unsigned i, j;
1270 
1271 	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1272 
1273 	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
1274 		struct a6xx_hfi_queue *queue = &gmu->queues[i];
1275 		for (j = 0; j < HFI_HISTORY_SZ; j++) {
1276 			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1277 			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1278 		}
1279 	}
1280 }
1281 
1282 #define A6XX_REGLIST_SIZE        1
1283 #define A6XX_GBIF_REGLIST_SIZE   1
1284 static void a6xx_get_registers(struct msm_gpu *gpu,
1285 		struct a6xx_gpu_state *a6xx_state,
1286 		struct a6xx_crashdumper *dumper)
1287 {
1288 	int i, count = A6XX_REGLIST_SIZE +
1289 		ARRAY_SIZE(a6xx_reglist) +
1290 		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1291 	int index = 0;
1292 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1293 
1294 	a6xx_state->registers = state_kcalloc(a6xx_state,
1295 		count, sizeof(*a6xx_state->registers));
1296 
1297 	if (!a6xx_state->registers)
1298 		return;
1299 
1300 	a6xx_state->nr_registers = count;
1301 
1302 	a6xx_get_ahb_gpu_registers(gpu,
1303 		a6xx_state, &a6xx_ahb_reglist,
1304 		&a6xx_state->registers[index++]);
1305 
1306 	if (a6xx_has_gbif(adreno_gpu))
1307 		a6xx_get_ahb_gpu_registers(gpu,
1308 				a6xx_state, &a6xx_gbif_reglist,
1309 				&a6xx_state->registers[index++]);
1310 	else
1311 		a6xx_get_ahb_gpu_registers(gpu,
1312 				a6xx_state, &a6xx_vbif_reglist,
1313 				&a6xx_state->registers[index++]);
1314 	if (!dumper) {
1315 		/*
1316 		 * We can't use the crashdumper when the SMMU is stalled,
1317 		 * because the GPU has no memory access until we resume
1318 		 * translation (but we don't want to do that until after
1319 		 * we have captured as much useful GPU state as possible).
1320 		 * So instead collect registers via the CPU:
1321 		 */
1322 		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1323 			a6xx_get_ahb_gpu_registers(gpu,
1324 				a6xx_state, &a6xx_reglist[i],
1325 				&a6xx_state->registers[index++]);
1326 		return;
1327 	}
1328 
1329 	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1330 		a6xx_get_crashdumper_registers(gpu,
1331 			a6xx_state, &a6xx_reglist[i],
1332 			&a6xx_state->registers[index++],
1333 			dumper);
1334 
1335 	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1336 		a6xx_get_crashdumper_hlsq_registers(gpu,
1337 			a6xx_state, &a6xx_hlsq_reglist[i],
1338 			&a6xx_state->registers[index++],
1339 			dumper);
1340 }
1341 
1342 #define A7XX_PRE_CRASHDUMPER_SIZE    1
1343 #define A7XX_POST_CRASHDUMPER_SIZE   1
1344 static void a7xx_get_registers(struct msm_gpu *gpu,
1345 		struct a6xx_gpu_state *a6xx_state,
1346 		struct a6xx_crashdumper *dumper)
1347 {
1348 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1349 	int i, count;
1350 	int index = 0;
1351 	const u32 *pre_crashdumper_regs;
1352 	const struct gen7_reg_list *reglist;
1353 
1354 	if (adreno_is_a730(adreno_gpu)) {
1355 		reglist = gen7_0_0_reg_list;
1356 		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1357 	} else if (adreno_is_a740_family(adreno_gpu)) {
1358 		reglist = gen7_2_0_reg_list;
1359 		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1360 	} else {
1361 		BUG_ON(!adreno_is_a750(adreno_gpu));
1362 		reglist = gen7_9_0_reg_list;
1363 		pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers;
1364 	}
1365 
1366 	count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1367 
1368 	/* The downstream reglist contains registers in other memory regions
1369 	 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1370 	 * offsets and map them to read them on the CPU. For now only read the
1371 	 * first region which is the main one.
1372 	 */
1373 	if (dumper) {
1374 		for (i = 0; reglist[i].regs; i++)
1375 			count++;
1376 	} else {
1377 		count++;
1378 	}
1379 
1380 	a6xx_state->registers = state_kcalloc(a6xx_state,
1381 		count, sizeof(*a6xx_state->registers));
1382 
1383 	if (!a6xx_state->registers)
1384 		return;
1385 
1386 	a6xx_state->nr_registers = count;
1387 
1388 	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs,
1389 		&a6xx_state->registers[index++]);
1390 
1391 	if (!dumper) {
1392 		a7xx_get_ahb_gpu_reglist(gpu,
1393 			a6xx_state, &reglist[0],
1394 			&a6xx_state->registers[index++]);
1395 		return;
1396 	}
1397 
1398 	for (i = 0; reglist[i].regs; i++)
1399 		a7xx_get_crashdumper_registers(gpu,
1400 			a6xx_state, &reglist[i],
1401 			&a6xx_state->registers[index++],
1402 			dumper);
1403 }
1404 
1405 static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1406 		struct a6xx_gpu_state *a6xx_state)
1407 {
1408 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1409 	const u32 *regs;
1410 
1411 	BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu) ||
1412 		 adreno_is_a750(adreno_gpu)));
1413 	regs = gen7_0_0_post_crashdumper_registers;
1414 
1415 	a7xx_get_ahb_gpu_registers(gpu,
1416 		a6xx_state, regs,
1417 		&a6xx_state->registers[a6xx_state->nr_registers - 1]);
1418 }
1419 
1420 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1421 {
1422 	/* The value at [16:31] is in 4dword units. Convert it to dwords */
1423 	return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
1424 }
1425 
1426 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1427 {
1428 	/*
1429 	 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1430 	 * That register however is not directly accessible from APSS on A7xx.
1431 	 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1432 	 */
1433 	gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
1434 
1435 	return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20);
1436 }
1437 
1438 /* Read a block of data from an indexed register pair */
1439 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1440 		struct a6xx_gpu_state *a6xx_state,
1441 		const struct a6xx_indexed_registers *indexed,
1442 		struct a6xx_gpu_state_obj *obj)
1443 {
1444 	u32 count = indexed->count;
1445 	int i;
1446 
1447 	obj->handle = (const void *) indexed;
1448 	if (indexed->count_fn)
1449 		count = indexed->count_fn(gpu);
1450 
1451 	obj->data = state_kcalloc(a6xx_state, count, sizeof(u32));
1452 	obj->count = count;
1453 	if (!obj->data)
1454 		return;
1455 
1456 	/* All the indexed banks start at address 0 */
1457 	gpu_write(gpu, indexed->addr, 0);
1458 
1459 	/* Read the data - each read increments the internal address by 1 */
1460 	for (i = 0; i < count; i++)
1461 		obj->data[i] = gpu_read(gpu, indexed->data);
1462 }
1463 
1464 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1465 		struct a6xx_gpu_state *a6xx_state)
1466 {
1467 	u32 mempool_size;
1468 	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
1469 	int i;
1470 
1471 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
1472 		sizeof(*a6xx_state->indexed_regs));
1473 	if (!a6xx_state->indexed_regs)
1474 		return;
1475 
1476 	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1477 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
1478 			&a6xx_state->indexed_regs[i]);
1479 
1480 	if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1481 		u32 val;
1482 
1483 		val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1484 		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
1485 
1486 		/* Get the contents of the CP mempool */
1487 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1488 			&a6xx_state->indexed_regs[i]);
1489 
1490 		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1491 		a6xx_state->nr_indexed_regs = count;
1492 		return;
1493 	}
1494 
1495 	/* Set the CP mempool size to 0 to stabilize it while dumping */
1496 	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1497 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1498 
1499 	/* Get the contents of the CP mempool */
1500 	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1501 		&a6xx_state->indexed_regs[i]);
1502 
1503 	/*
1504 	 * Offset 0x2000 in the mempool is the size - copy the saved size over
1505 	 * so the data is consistent
1506 	 */
1507 	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1508 
1509 	/* Restore the size in the hardware */
1510 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1511 }
1512 
1513 static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1514 		struct a6xx_gpu_state *a6xx_state)
1515 {
1516 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1517 	const struct a6xx_indexed_registers *indexed_regs;
1518 	int i, indexed_count, mempool_count;
1519 
1520 	if (adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)) {
1521 		indexed_regs = a7xx_indexed_reglist;
1522 		indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1523 	} else {
1524 		BUG_ON(!adreno_is_a750(adreno_gpu));
1525 		indexed_regs = gen7_9_0_cp_indexed_reg_list;
1526 		indexed_count = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list);
1527 	}
1528 
1529 	mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1530 
1531 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1532 					indexed_count + mempool_count,
1533 					sizeof(*a6xx_state->indexed_regs));
1534 	if (!a6xx_state->indexed_regs)
1535 		return;
1536 
1537 	a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1538 
1539 	/* First read the common regs */
1540 	for (i = 0; i < indexed_count; i++)
1541 		a6xx_get_indexed_regs(gpu, a6xx_state, &indexed_regs[i],
1542 			&a6xx_state->indexed_regs[i]);
1543 
1544 	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2));
1545 	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2));
1546 
1547 	/* Get the contents of the CP_BV mempool */
1548 	for (i = 0; i < mempool_count; i++)
1549 		a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i],
1550 			&a6xx_state->indexed_regs[indexed_count + i]);
1551 
1552 	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0);
1553 	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0);
1554 	return;
1555 }
1556 
1557 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1558 {
1559 	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1560 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1561 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1562 	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1563 		GFP_KERNEL);
1564 	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1565 			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1566 
1567 	if (!a6xx_state)
1568 		return ERR_PTR(-ENOMEM);
1569 
1570 	INIT_LIST_HEAD(&a6xx_state->objs);
1571 
1572 	/* Get the generic state from the adreno core */
1573 	adreno_gpu_state_get(gpu, &a6xx_state->base);
1574 
1575 	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1576 		a6xx_get_gmu_registers(gpu, a6xx_state);
1577 
1578 		a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1579 		a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1580 		a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1581 
1582 		a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1583 	}
1584 
1585 	/* If GX isn't on the rest of the data isn't going to be accessible */
1586 	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1587 		return &a6xx_state->base;
1588 
1589 	/* Get the banks of indexed registers */
1590 	if (adreno_is_a7xx(adreno_gpu))
1591 		a7xx_get_indexed_registers(gpu, a6xx_state);
1592 	else
1593 		a6xx_get_indexed_registers(gpu, a6xx_state);
1594 
1595 	/*
1596 	 * Try to initialize the crashdumper, if we are not dumping state
1597 	 * with the SMMU stalled.  The crashdumper needs memory access to
1598 	 * write out GPU state, so we need to skip this when the SMMU is
1599 	 * stalled in response to an iova fault
1600 	 */
1601 	if (!stalled && !gpu->needs_hw_init &&
1602 	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1603 		dumper = &_dumper;
1604 	}
1605 
1606 	if (adreno_is_a7xx(adreno_gpu)) {
1607 		a7xx_get_registers(gpu, a6xx_state, dumper);
1608 
1609 		if (dumper) {
1610 			a7xx_get_shaders(gpu, a6xx_state, dumper);
1611 			a7xx_get_clusters(gpu, a6xx_state, dumper);
1612 			a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1613 
1614 			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1615 		}
1616 
1617 		a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1618 	} else {
1619 		a6xx_get_registers(gpu, a6xx_state, dumper);
1620 
1621 		if (dumper) {
1622 			a6xx_get_shaders(gpu, a6xx_state, dumper);
1623 			a6xx_get_clusters(gpu, a6xx_state, dumper);
1624 			a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1625 
1626 			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1627 		}
1628 	}
1629 
1630 	if (snapshot_debugbus)
1631 		a6xx_get_debugbus(gpu, a6xx_state);
1632 
1633 	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1634 
1635 	return  &a6xx_state->base;
1636 }
1637 
1638 static void a6xx_gpu_state_destroy(struct kref *kref)
1639 {
1640 	struct a6xx_state_memobj *obj, *tmp;
1641 	struct msm_gpu_state *state = container_of(kref,
1642 			struct msm_gpu_state, ref);
1643 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1644 			struct a6xx_gpu_state, base);
1645 
1646 	if (a6xx_state->gmu_log)
1647 		kvfree(a6xx_state->gmu_log->data);
1648 
1649 	if (a6xx_state->gmu_hfi)
1650 		kvfree(a6xx_state->gmu_hfi->data);
1651 
1652 	if (a6xx_state->gmu_debug)
1653 		kvfree(a6xx_state->gmu_debug->data);
1654 
1655 	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1656 		list_del(&obj->node);
1657 		kvfree(obj);
1658 	}
1659 
1660 	adreno_gpu_state_destroy(state);
1661 	kfree(a6xx_state);
1662 }
1663 
1664 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1665 {
1666 	if (IS_ERR_OR_NULL(state))
1667 		return 1;
1668 
1669 	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1670 }
1671 
1672 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1673 		struct drm_printer *p)
1674 {
1675 	int i, index = 0;
1676 
1677 	if (!data)
1678 		return;
1679 
1680 	for (i = 0; i < count; i += 2) {
1681 		u32 count = RANGE(registers, i);
1682 		u32 offset = registers[i];
1683 		int j;
1684 
1685 		for (j = 0; j < count; index++, offset++, j++) {
1686 			if (data[index] == 0xdeafbead)
1687 				continue;
1688 
1689 			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1690 				offset << 2, data[index]);
1691 		}
1692 	}
1693 }
1694 
1695 static void a7xx_show_registers_indented(const u32 *registers, u32 *data,
1696 		struct drm_printer *p, unsigned indent)
1697 {
1698 	int i, index = 0;
1699 
1700 	for (i = 0; registers[i] != UINT_MAX; i += 2) {
1701 		u32 count = RANGE(registers, i);
1702 		u32 offset = registers[i];
1703 		int j;
1704 
1705 		for (j = 0; j < count; index++, offset++, j++) {
1706 			int k;
1707 
1708 			if (data[index] == 0xdeafbead)
1709 				continue;
1710 
1711 			for (k = 0; k < indent; k++)
1712 				drm_printf(p, "  ");
1713 			drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1714 				offset << 2, data[index]);
1715 		}
1716 	}
1717 }
1718 
1719 static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p)
1720 {
1721 	a7xx_show_registers_indented(registers, data, p, 1);
1722 }
1723 
1724 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1725 {
1726 	char out[ASCII85_BUFSZ];
1727 	long i, l, datalen = 0;
1728 
1729 	for (i = 0; i < len >> 2; i++) {
1730 		if (data[i])
1731 			datalen = (i + 1) << 2;
1732 	}
1733 
1734 	if (datalen == 0)
1735 		return;
1736 
1737 	drm_puts(p, "    data: !!ascii85 |\n");
1738 	drm_puts(p, "      ");
1739 
1740 
1741 	l = ascii85_encode_len(datalen);
1742 
1743 	for (i = 0; i < l; i++)
1744 		drm_puts(p, ascii85_encode(data[i], out));
1745 
1746 	drm_puts(p, "\n");
1747 }
1748 
1749 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1750 {
1751 	drm_puts(p, fmt);
1752 	drm_puts(p, name);
1753 	drm_puts(p, "\n");
1754 }
1755 
1756 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1757 		struct drm_printer *p)
1758 {
1759 	const struct a6xx_shader_block *block = obj->handle;
1760 	int i;
1761 
1762 	if (!obj->handle)
1763 		return;
1764 
1765 	print_name(p, "  - type: ", block->name);
1766 
1767 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1768 		drm_printf(p, "    - bank: %d\n", i);
1769 		drm_printf(p, "      size: %d\n", block->size);
1770 
1771 		if (!obj->data)
1772 			continue;
1773 
1774 		print_ascii85(p, block->size << 2,
1775 			obj->data + (block->size * i));
1776 	}
1777 }
1778 
1779 static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1780 		struct drm_printer *p)
1781 {
1782 	const struct gen7_shader_block *block = obj->handle;
1783 	int i, j;
1784 	u32 *data = obj->data;
1785 
1786 	if (!obj->handle)
1787 		return;
1788 
1789 	print_name(p, "  - type: ", a7xx_statetype_names[block->statetype]);
1790 	print_name(p, "    - pipe: ", a7xx_pipe_names[block->pipeid]);
1791 
1792 	for (i = 0; i < block->num_sps; i++) {
1793 		drm_printf(p, "      - sp: %d\n", i);
1794 
1795 		for (j = 0; j < block->num_usptps; j++) {
1796 			drm_printf(p, "        - usptp: %d\n", j);
1797 			drm_printf(p, "          size: %d\n", block->size);
1798 
1799 			if (!obj->data)
1800 				continue;
1801 
1802 			print_ascii85(p, block->size << 2, data);
1803 
1804 			data += block->size;
1805 		}
1806 	}
1807 }
1808 
1809 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1810 		struct drm_printer *p)
1811 {
1812 	int ctx, index = 0;
1813 
1814 	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1815 		int j;
1816 
1817 		drm_printf(p, "    - context: %d\n", ctx);
1818 
1819 		for (j = 0; j < size; j += 2) {
1820 			u32 count = RANGE(registers, j);
1821 			u32 offset = registers[j];
1822 			int k;
1823 
1824 			for (k = 0; k < count; index++, offset++, k++) {
1825 				if (data[index] == 0xdeafbead)
1826 					continue;
1827 
1828 				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1829 					offset << 2, data[index]);
1830 			}
1831 		}
1832 	}
1833 }
1834 
1835 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1836 		struct drm_printer *p)
1837 {
1838 	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1839 
1840 	if (dbgahb) {
1841 		print_name(p, "  - cluster-name: ", dbgahb->name);
1842 		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1843 			obj->data, p);
1844 	}
1845 }
1846 
1847 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1848 		struct drm_printer *p)
1849 {
1850 	const struct a6xx_cluster *cluster = obj->handle;
1851 
1852 	if (cluster) {
1853 		print_name(p, "  - cluster-name: ", cluster->name);
1854 		a6xx_show_cluster_data(cluster->registers, cluster->count,
1855 			obj->data, p);
1856 	}
1857 }
1858 
1859 static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1860 		struct drm_printer *p)
1861 {
1862 	const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1863 
1864 	if (dbgahb) {
1865 		print_name(p, "  - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]);
1866 		print_name(p, "    - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]);
1867 		drm_printf(p, "      - context: %d\n", dbgahb->context_id);
1868 		a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4);
1869 	}
1870 }
1871 
1872 static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1873 		struct drm_printer *p)
1874 {
1875 	const struct gen7_cluster_registers *cluster = obj->handle;
1876 
1877 	if (cluster) {
1878 		int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
1879 
1880 		print_name(p, "  - pipe: ", a7xx_pipe_names[cluster->pipe_id]);
1881 		print_name(p, "    - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]);
1882 		drm_printf(p, "      - context: %d\n", context);
1883 		a7xx_show_registers_indented(cluster->regs, obj->data, p, 4);
1884 	}
1885 }
1886 
1887 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1888 		struct drm_printer *p)
1889 {
1890 	const struct a6xx_indexed_registers *indexed = obj->handle;
1891 
1892 	if (!indexed)
1893 		return;
1894 
1895 	print_name(p, "  - regs-name: ", indexed->name);
1896 	drm_printf(p, "    dwords: %d\n", obj->count);
1897 
1898 	print_ascii85(p, obj->count << 2, obj->data);
1899 }
1900 
1901 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1902 		u32 *data, struct drm_printer *p)
1903 {
1904 	if (block) {
1905 		print_name(p, "  - debugbus-block: ", block->name);
1906 
1907 		/*
1908 		 * count for regular debugbus data is in quadwords,
1909 		 * but print the size in dwords for consistency
1910 		 */
1911 		drm_printf(p, "    count: %d\n", block->count << 1);
1912 
1913 		print_ascii85(p, block->count << 3, data);
1914 	}
1915 }
1916 
1917 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1918 		struct drm_printer *p)
1919 {
1920 	int i;
1921 
1922 	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1923 		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1924 
1925 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1926 	}
1927 
1928 	if (a6xx_state->vbif_debugbus) {
1929 		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1930 
1931 		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1932 		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1933 
1934 		/* vbif debugbus data is in dwords.  Confusing, huh? */
1935 		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1936 	}
1937 
1938 	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1939 		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1940 
1941 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1942 	}
1943 }
1944 
1945 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1946 		struct drm_printer *p)
1947 {
1948 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1949 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1950 			struct a6xx_gpu_state, base);
1951 	int i;
1952 
1953 	if (IS_ERR_OR_NULL(state))
1954 		return;
1955 
1956 	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1957 
1958 	adreno_show(gpu, state, p);
1959 
1960 	drm_puts(p, "gmu-log:\n");
1961 	if (a6xx_state->gmu_log) {
1962 		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1963 
1964 		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1965 		drm_printf(p, "    size: %zu\n", gmu_log->size);
1966 		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1967 				&gmu_log->encoded);
1968 	}
1969 
1970 	drm_puts(p, "gmu-hfi:\n");
1971 	if (a6xx_state->gmu_hfi) {
1972 		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1973 		unsigned i, j;
1974 
1975 		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1976 		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1977 		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1978 			drm_printf(p, "    queue-history[%u]:", i);
1979 			for (j = 0; j < HFI_HISTORY_SZ; j++) {
1980 				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1981 			}
1982 			drm_printf(p, "\n");
1983 		}
1984 		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1985 				&gmu_hfi->encoded);
1986 	}
1987 
1988 	drm_puts(p, "gmu-debug:\n");
1989 	if (a6xx_state->gmu_debug) {
1990 		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1991 
1992 		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1993 		drm_printf(p, "    size: %zu\n", gmu_debug->size);
1994 		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1995 				&gmu_debug->encoded);
1996 	}
1997 
1998 	drm_puts(p, "registers:\n");
1999 	for (i = 0; i < a6xx_state->nr_registers; i++) {
2000 		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
2001 
2002 		if (!obj->handle)
2003 			continue;
2004 
2005 		if (adreno_is_a7xx(adreno_gpu)) {
2006 			a7xx_show_registers(obj->handle, obj->data, p);
2007 		} else {
2008 			const struct a6xx_registers *regs = obj->handle;
2009 
2010 			a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2011 		}
2012 	}
2013 
2014 	drm_puts(p, "registers-gmu:\n");
2015 	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
2016 		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
2017 		const struct a6xx_registers *regs = obj->handle;
2018 
2019 		if (!obj->handle)
2020 			continue;
2021 
2022 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2023 	}
2024 
2025 	drm_puts(p, "indexed-registers:\n");
2026 	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
2027 		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
2028 
2029 	drm_puts(p, "shader-blocks:\n");
2030 	for (i = 0; i < a6xx_state->nr_shaders; i++) {
2031 		if (adreno_is_a7xx(adreno_gpu))
2032 			a7xx_show_shader(&a6xx_state->shaders[i], p);
2033 		else
2034 			a6xx_show_shader(&a6xx_state->shaders[i], p);
2035 	}
2036 
2037 	drm_puts(p, "clusters:\n");
2038 	for (i = 0; i < a6xx_state->nr_clusters; i++) {
2039 		if (adreno_is_a7xx(adreno_gpu))
2040 			a7xx_show_cluster(&a6xx_state->clusters[i], p);
2041 		else
2042 			a6xx_show_cluster(&a6xx_state->clusters[i], p);
2043 	}
2044 
2045 	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) {
2046 		if (adreno_is_a7xx(adreno_gpu))
2047 			a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2048 		else
2049 			a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2050 	}
2051 
2052 	drm_puts(p, "debugbus:\n");
2053 	a6xx_show_debugbus(a6xx_state, p);
2054 }
2055