xref: /linux/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c (revision 8a5f956a9fb7d74fff681145082acfad5afa6bb8)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3 
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10 
11 static const unsigned int *gen7_0_0_external_core_regs[] __always_unused;
12 static const unsigned int *gen7_2_0_external_core_regs[] __always_unused;
13 static const unsigned int *gen7_9_0_external_core_regs[] __always_unused;
14 static const struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused;
15 static const u32 gen7_9_0_cx_debugbus_blocks[] __always_unused;
16 
17 #include "adreno_gen7_0_0_snapshot.h"
18 #include "adreno_gen7_2_0_snapshot.h"
19 #include "adreno_gen7_9_0_snapshot.h"
20 
21 struct a6xx_gpu_state_obj {
22 	const void *handle;
23 	u32 *data;
24 	u32 count;	/* optional, used when count potentially read from hw */
25 };
26 
27 struct a6xx_gpu_state {
28 	struct msm_gpu_state base;
29 
30 	struct a6xx_gpu_state_obj *gmu_registers;
31 	int nr_gmu_registers;
32 
33 	struct a6xx_gpu_state_obj *registers;
34 	int nr_registers;
35 
36 	struct a6xx_gpu_state_obj *shaders;
37 	int nr_shaders;
38 
39 	struct a6xx_gpu_state_obj *clusters;
40 	int nr_clusters;
41 
42 	struct a6xx_gpu_state_obj *dbgahb_clusters;
43 	int nr_dbgahb_clusters;
44 
45 	struct a6xx_gpu_state_obj *indexed_regs;
46 	int nr_indexed_regs;
47 
48 	struct a6xx_gpu_state_obj *debugbus;
49 	int nr_debugbus;
50 
51 	struct a6xx_gpu_state_obj *vbif_debugbus;
52 
53 	struct a6xx_gpu_state_obj *cx_debugbus;
54 	int nr_cx_debugbus;
55 
56 	struct msm_gpu_state_bo *gmu_log;
57 	struct msm_gpu_state_bo *gmu_hfi;
58 	struct msm_gpu_state_bo *gmu_debug;
59 
60 	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
61 
62 	struct list_head objs;
63 
64 	bool gpu_initialized;
65 };
66 
67 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
68 {
69 	in[0] = val;
70 	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
71 
72 	return 2;
73 }
74 
75 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
76 {
77 	in[0] = target;
78 	in[1] = (((u64) reg) << 44 | dwords);
79 
80 	return 2;
81 }
82 
83 static inline int CRASHDUMP_FINI(u64 *in)
84 {
85 	in[0] = 0;
86 	in[1] = 0;
87 
88 	return 2;
89 }
90 
91 struct a6xx_crashdumper {
92 	void *ptr;
93 	struct drm_gem_object *bo;
94 	u64 iova;
95 };
96 
97 struct a6xx_state_memobj {
98 	struct list_head node;
99 	unsigned long long data[];
100 };
101 
102 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
103 {
104 	struct a6xx_state_memobj *obj =
105 		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
106 
107 	if (!obj)
108 		return NULL;
109 
110 	list_add_tail(&obj->node, &a6xx_state->objs);
111 	return &obj->data;
112 }
113 
114 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
115 		size_t size)
116 {
117 	void *dst = state_kcalloc(a6xx_state, 1, size);
118 
119 	if (dst)
120 		memcpy(dst, src, size);
121 	return dst;
122 }
123 
124 /*
125  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
126  * the rest for the data
127  */
128 #define A6XX_CD_DATA_OFFSET 8192
129 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
130 
131 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
132 		struct a6xx_crashdumper *dumper)
133 {
134 	dumper->ptr = msm_gem_kernel_new(gpu->dev,
135 		SZ_1M, MSM_BO_WC, gpu->vm,
136 		&dumper->bo, &dumper->iova);
137 
138 	if (!IS_ERR(dumper->ptr))
139 		msm_gem_object_set_name(dumper->bo, "crashdump");
140 
141 	return PTR_ERR_OR_ZERO(dumper->ptr);
142 }
143 
144 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
145 		struct a6xx_crashdumper *dumper)
146 {
147 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
148 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
149 	u32 val;
150 	int ret;
151 
152 	if (IS_ERR_OR_NULL(dumper->ptr))
153 		return -EINVAL;
154 
155 	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
156 		return -EINVAL;
157 
158 	/* Make sure all pending memory writes are posted */
159 	wmb();
160 
161 	gpu_write64(gpu, REG_A6XX_CP_CRASH_DUMP_SCRIPT_BASE, dumper->iova);
162 
163 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
164 
165 	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
166 		val & 0x02, 100, 10000);
167 
168 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
169 
170 	return ret;
171 }
172 
173 /* read a value from the GX debug bus */
174 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
175 		u32 *data)
176 {
177 	u32 reg;
178 
179 	if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) {
180 		reg = A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
181 			A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
182 	} else {
183 		reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
184 			A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
185 	}
186 
187 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
188 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
189 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
190 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
191 
192 	/* Wait 1 us to make sure the data is flowing */
193 	udelay(1);
194 
195 	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
196 	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
197 
198 	return 2;
199 }
200 
201 #define cxdbg_write(ptr, offset, val) \
202 	writel((val), (ptr) + ((offset) << 2))
203 
204 #define cxdbg_read(ptr, offset) \
205 	readl((ptr) + ((offset) << 2))
206 
207 /* read a value from the CX debug bus */
208 static int cx_debugbus_read(struct msm_gpu *gpu, void __iomem *cxdbg, u32 block, u32 offset,
209 		u32 *data)
210 {
211 	u32 reg;
212 
213 	if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) {
214 		reg = A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
215 			A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
216 	} else {
217 		reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
218 			A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
219 	}
220 
221 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
222 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
223 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
224 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
225 
226 	/* Wait 1 us to make sure the data is flowing */
227 	udelay(1);
228 
229 	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
230 	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
231 
232 	return 2;
233 }
234 
235 /* Read a chunk of data from the VBIF debug bus */
236 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
237 		u32 reg, int count, u32 *data)
238 {
239 	int i;
240 
241 	gpu_write(gpu, ctrl0, reg);
242 
243 	for (i = 0; i < count; i++) {
244 		gpu_write(gpu, ctrl1, i);
245 		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
246 	}
247 
248 	return count;
249 }
250 
251 #define AXI_ARB_BLOCKS 2
252 #define XIN_AXI_BLOCKS 5
253 #define XIN_CORE_BLOCKS 4
254 
255 #define VBIF_DEBUGBUS_BLOCK_SIZE \
256 	((16 * AXI_ARB_BLOCKS) + \
257 	 (18 * XIN_AXI_BLOCKS) + \
258 	 (12 * XIN_CORE_BLOCKS))
259 
260 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
261 		struct a6xx_gpu_state *a6xx_state,
262 		struct a6xx_gpu_state_obj *obj)
263 {
264 	u32 clk, *ptr;
265 	int i;
266 
267 	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
268 		sizeof(u32));
269 	if (!obj->data)
270 		return;
271 
272 	obj->handle = NULL;
273 
274 	/* Get the current clock setting */
275 	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
276 
277 	/* Force on the bus so we can read it */
278 	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
279 		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
280 
281 	/* We will read from BUS2 first, so disable BUS1 */
282 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
283 
284 	/* Enable the VBIF bus for reading */
285 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
286 
287 	ptr = obj->data;
288 
289 	for (i = 0; i < AXI_ARB_BLOCKS; i++)
290 		ptr += vbif_debugbus_read(gpu,
291 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
292 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
293 			1 << (i + 16), 16, ptr);
294 
295 	for (i = 0; i < XIN_AXI_BLOCKS; i++)
296 		ptr += vbif_debugbus_read(gpu,
297 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
298 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
299 			1 << i, 18, ptr);
300 
301 	/* Stop BUS2 so we can turn on BUS1 */
302 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
303 
304 	for (i = 0; i < XIN_CORE_BLOCKS; i++)
305 		ptr += vbif_debugbus_read(gpu,
306 			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
307 			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
308 			1 << i, 12, ptr);
309 
310 	/* Restore the VBIF clock setting */
311 	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
312 }
313 
314 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
315 		struct a6xx_gpu_state *a6xx_state,
316 		const struct a6xx_debugbus_block *block,
317 		struct a6xx_gpu_state_obj *obj)
318 {
319 	int i;
320 	u32 *ptr;
321 
322 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
323 	if (!obj->data)
324 		return;
325 
326 	obj->handle = block;
327 
328 	for (ptr = obj->data, i = 0; i < block->count; i++)
329 		ptr += debugbus_read(gpu, block->id, i, ptr);
330 }
331 
332 static void a6xx_get_cx_debugbus_block(struct msm_gpu *gpu,
333 		void __iomem *cxdbg,
334 		struct a6xx_gpu_state *a6xx_state,
335 		const struct a6xx_debugbus_block *block,
336 		struct a6xx_gpu_state_obj *obj)
337 {
338 	int i;
339 	u32 *ptr;
340 
341 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
342 	if (!obj->data)
343 		return;
344 
345 	obj->handle = block;
346 
347 	for (ptr = obj->data, i = 0; i < block->count; i++)
348 		ptr += cx_debugbus_read(gpu, cxdbg, block->id, i, ptr);
349 }
350 
351 static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
352 		struct a6xx_gpu_state *a6xx_state)
353 {
354 	int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
355 		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
356 
357 	if (adreno_is_a650_family(to_adreno_gpu(gpu)))
358 		nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
359 
360 	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
361 			sizeof(*a6xx_state->debugbus));
362 
363 	if (a6xx_state->debugbus) {
364 		int i;
365 
366 		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
367 			a6xx_get_debugbus_block(gpu,
368 				a6xx_state,
369 				&a6xx_debugbus_blocks[i],
370 				&a6xx_state->debugbus[i]);
371 
372 		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
373 
374 		/*
375 		 * GBIF has same debugbus as of other GPU blocks, fall back to
376 		 * default path if GPU uses GBIF, also GBIF uses exactly same
377 		 * ID as of VBIF.
378 		 */
379 		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
380 			a6xx_get_debugbus_block(gpu, a6xx_state,
381 				&a6xx_gbif_debugbus_block,
382 				&a6xx_state->debugbus[i]);
383 
384 			a6xx_state->nr_debugbus += 1;
385 		}
386 
387 
388 		if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
389 			for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
390 				a6xx_get_debugbus_block(gpu,
391 					a6xx_state,
392 					&a650_debugbus_blocks[i],
393 					&a6xx_state->debugbus[i]);
394 		}
395 	}
396 }
397 
398 static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
399 		struct a6xx_gpu_state *a6xx_state)
400 {
401 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
402 	int debugbus_blocks_count, gbif_debugbus_blocks_count, total_debugbus_blocks;
403 	const u32 *debugbus_blocks, *gbif_debugbus_blocks;
404 	int i;
405 
406 	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
407 		debugbus_blocks = gen7_0_0_debugbus_blocks;
408 		debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
409 		gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
410 		gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
411 	} else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
412 		debugbus_blocks = gen7_2_0_debugbus_blocks;
413 		debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
414 		gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
415 		gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
416 	} else {
417 		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
418 		debugbus_blocks = gen7_9_0_debugbus_blocks;
419 		debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_debugbus_blocks);
420 		gbif_debugbus_blocks = gen7_9_0_gbif_debugbus_blocks;
421 		gbif_debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_gbif_debugbus_blocks);
422 	}
423 
424 	total_debugbus_blocks = debugbus_blocks_count + gbif_debugbus_blocks_count;
425 
426 	a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks,
427 			sizeof(*a6xx_state->debugbus));
428 
429 	if (a6xx_state->debugbus) {
430 		for (i = 0; i < debugbus_blocks_count; i++) {
431 			a6xx_get_debugbus_block(gpu,
432 				a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]],
433 				&a6xx_state->debugbus[i]);
434 		}
435 
436 		for (i = 0; i < gbif_debugbus_blocks_count; i++) {
437 			a6xx_get_debugbus_block(gpu,
438 				a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]],
439 				&a6xx_state->debugbus[i + debugbus_blocks_count]);
440 		}
441 
442 		a6xx_state->nr_debugbus = total_debugbus_blocks;
443 	}
444 }
445 
446 static void a6xx_get_debugbus(struct msm_gpu *gpu,
447 		struct a6xx_gpu_state *a6xx_state)
448 {
449 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
450 	struct resource *res;
451 	void __iomem *cxdbg = NULL;
452 
453 	/* Set up the GX debug bus */
454 
455 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
456 		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
457 
458 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
459 		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
460 
461 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
462 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
463 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
464 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
465 
466 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
467 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
468 
469 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
470 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
471 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
472 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
473 
474 	/* Set up the CX debug bus - it lives elsewhere in the system so do a
475 	 * temporary ioremap for the registers
476 	 */
477 	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
478 			"cx_dbgc");
479 
480 	if (res)
481 		cxdbg = ioremap(res->start, resource_size(res));
482 
483 	if (cxdbg) {
484 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
485 			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
486 
487 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
488 			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
489 
490 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
491 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
492 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
493 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
494 
495 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
496 			0x76543210);
497 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
498 			0xFEDCBA98);
499 
500 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
501 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
502 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
503 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
504 	}
505 
506 	if (adreno_is_a7xx(adreno_gpu)) {
507 		a7xx_get_debugbus_blocks(gpu, a6xx_state);
508 	} else {
509 		a6xx_get_debugbus_blocks(gpu, a6xx_state);
510 	}
511 
512 	/*  Dump the VBIF debugbus on applicable targets */
513 	if (!a6xx_has_gbif(adreno_gpu)) {
514 		a6xx_state->vbif_debugbus =
515 			state_kcalloc(a6xx_state, 1,
516 					sizeof(*a6xx_state->vbif_debugbus));
517 
518 		if (a6xx_state->vbif_debugbus)
519 			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
520 					a6xx_state->vbif_debugbus);
521 	}
522 
523 	if (cxdbg) {
524 		unsigned nr_cx_debugbus_blocks;
525 		const struct a6xx_debugbus_block *cx_debugbus_blocks;
526 
527 		if (adreno_is_a7xx(adreno_gpu)) {
528 			BUG_ON(adreno_gpu->info->family > ADRENO_7XX_GEN3);
529 			cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
530 			nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
531 		} else {
532 			cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
533 			nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
534 		}
535 
536 		a6xx_state->cx_debugbus =
537 			state_kcalloc(a6xx_state,
538 			nr_cx_debugbus_blocks,
539 			sizeof(*a6xx_state->cx_debugbus));
540 
541 		if (a6xx_state->cx_debugbus) {
542 			int i;
543 
544 			for (i = 0; i < nr_cx_debugbus_blocks; i++)
545 				a6xx_get_cx_debugbus_block(gpu,
546 					cxdbg,
547 					a6xx_state,
548 					&cx_debugbus_blocks[i],
549 					&a6xx_state->cx_debugbus[i]);
550 
551 			a6xx_state->nr_cx_debugbus =
552 				nr_cx_debugbus_blocks;
553 		}
554 
555 		iounmap(cxdbg);
556 	}
557 }
558 
559 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
560 
561 /* Read a data cluster from behind the AHB aperture */
562 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
563 		struct a6xx_gpu_state *a6xx_state,
564 		const struct a6xx_dbgahb_cluster *dbgahb,
565 		struct a6xx_gpu_state_obj *obj,
566 		struct a6xx_crashdumper *dumper)
567 {
568 	u64 *in = dumper->ptr;
569 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
570 	size_t datasize;
571 	int i, regcount = 0;
572 
573 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
574 		int j;
575 
576 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
577 			(dbgahb->statetype + i * 2) << 8);
578 
579 		for (j = 0; j < dbgahb->count; j += 2) {
580 			int count = RANGE(dbgahb->registers, j);
581 			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
582 				dbgahb->registers[j] - (dbgahb->base >> 2);
583 
584 			in += CRASHDUMP_READ(in, offset, count, out);
585 
586 			out += count * sizeof(u32);
587 
588 			if (i == 0)
589 				regcount += count;
590 		}
591 	}
592 
593 	CRASHDUMP_FINI(in);
594 
595 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
596 
597 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
598 		return;
599 
600 	if (a6xx_crashdumper_run(gpu, dumper))
601 		return;
602 
603 	obj->handle = dbgahb;
604 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
605 		datasize);
606 }
607 
608 static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
609 		struct a6xx_gpu_state *a6xx_state,
610 		const struct gen7_sptp_cluster_registers *dbgahb,
611 		struct a6xx_gpu_state_obj *obj,
612 		struct a6xx_crashdumper *dumper)
613 {
614 	u64 *in = dumper->ptr;
615 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
616 	size_t datasize;
617 	int i, regcount = 0;
618 
619 	in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
620 		A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) |
621 		A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) |
622 		A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype));
623 
624 	for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) {
625 		int count = RANGE(dbgahb->regs, i);
626 		u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
627 			dbgahb->regs[i] - dbgahb->regbase;
628 
629 		in += CRASHDUMP_READ(in, offset, count, out);
630 
631 		out += count * sizeof(u32);
632 		regcount += count;
633 	}
634 
635 	CRASHDUMP_FINI(in);
636 
637 	datasize = regcount * sizeof(u32);
638 
639 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
640 		return;
641 
642 	if (a6xx_crashdumper_run(gpu, dumper))
643 		return;
644 
645 	obj->handle = dbgahb;
646 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
647 		datasize);
648 }
649 
650 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
651 		struct a6xx_gpu_state *a6xx_state,
652 		struct a6xx_crashdumper *dumper)
653 {
654 	int i;
655 
656 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
657 		ARRAY_SIZE(a6xx_dbgahb_clusters),
658 		sizeof(*a6xx_state->dbgahb_clusters));
659 
660 	if (!a6xx_state->dbgahb_clusters)
661 		return;
662 
663 	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
664 
665 	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
666 		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
667 			&a6xx_dbgahb_clusters[i],
668 			&a6xx_state->dbgahb_clusters[i], dumper);
669 }
670 
671 static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
672 		struct a6xx_gpu_state *a6xx_state,
673 		struct a6xx_crashdumper *dumper)
674 {
675 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
676 	int i;
677 	const struct gen7_sptp_cluster_registers *dbgahb_clusters;
678 	unsigned dbgahb_clusters_size;
679 
680 	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
681 		dbgahb_clusters = gen7_0_0_sptp_clusters;
682 		dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
683 	} else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
684 		dbgahb_clusters = gen7_2_0_sptp_clusters;
685 		dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
686 	} else {
687 		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
688 		dbgahb_clusters = gen7_9_0_sptp_clusters;
689 		dbgahb_clusters_size = ARRAY_SIZE(gen7_9_0_sptp_clusters);
690 	}
691 
692 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
693 		dbgahb_clusters_size,
694 		sizeof(*a6xx_state->dbgahb_clusters));
695 
696 	if (!a6xx_state->dbgahb_clusters)
697 		return;
698 
699 	a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
700 
701 	for (i = 0; i < dbgahb_clusters_size; i++)
702 		a7xx_get_dbgahb_cluster(gpu, a6xx_state,
703 			&dbgahb_clusters[i],
704 			&a6xx_state->dbgahb_clusters[i], dumper);
705 }
706 
707 /* Read a data cluster from the CP aperture with the crashdumper */
708 static void a6xx_get_cluster(struct msm_gpu *gpu,
709 		struct a6xx_gpu_state *a6xx_state,
710 		const struct a6xx_cluster *cluster,
711 		struct a6xx_gpu_state_obj *obj,
712 		struct a6xx_crashdumper *dumper)
713 {
714 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
715 	u64 *in = dumper->ptr;
716 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
717 	size_t datasize;
718 	int i, regcount = 0;
719 	u32 id = cluster->id;
720 
721 	/* Skip registers that are not present on older generation */
722 	if (!adreno_is_a660_family(adreno_gpu) &&
723 			cluster->registers == a660_fe_cluster)
724 		return;
725 
726 	if (adreno_is_a650_family(adreno_gpu) &&
727 			cluster->registers == a6xx_ps_cluster)
728 		id = CLUSTER_VPC_PS;
729 
730 	/* Some clusters need a selector register to be programmed too */
731 	if (cluster->sel_reg)
732 		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
733 
734 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
735 		int j;
736 
737 		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
738 			(id << 8) | (i << 4) | i);
739 
740 		for (j = 0; j < cluster->count; j += 2) {
741 			int count = RANGE(cluster->registers, j);
742 
743 			in += CRASHDUMP_READ(in, cluster->registers[j],
744 				count, out);
745 
746 			out += count * sizeof(u32);
747 
748 			if (i == 0)
749 				regcount += count;
750 		}
751 	}
752 
753 	CRASHDUMP_FINI(in);
754 
755 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
756 
757 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
758 		return;
759 
760 	if (a6xx_crashdumper_run(gpu, dumper))
761 		return;
762 
763 	obj->handle = cluster;
764 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
765 		datasize);
766 }
767 
768 static void a7xx_get_cluster(struct msm_gpu *gpu,
769 		struct a6xx_gpu_state *a6xx_state,
770 		const struct gen7_cluster_registers *cluster,
771 		struct a6xx_gpu_state_obj *obj,
772 		struct a6xx_crashdumper *dumper)
773 {
774 	u64 *in = dumper->ptr;
775 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
776 	size_t datasize;
777 	int i, regcount = 0;
778 
779 	in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
780 		A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) |
781 		A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) |
782 		A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id));
783 
784 	/* Some clusters need a selector register to be programmed too */
785 	if (cluster->sel)
786 		in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val);
787 
788 	for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) {
789 		int count = RANGE(cluster->regs, i);
790 
791 		in += CRASHDUMP_READ(in, cluster->regs[i],
792 			count, out);
793 
794 		out += count * sizeof(u32);
795 		regcount += count;
796 	}
797 
798 	CRASHDUMP_FINI(in);
799 
800 	datasize = regcount * sizeof(u32);
801 
802 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
803 		return;
804 
805 	if (a6xx_crashdumper_run(gpu, dumper))
806 		return;
807 
808 	obj->handle = cluster;
809 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
810 		datasize);
811 }
812 
813 static void a6xx_get_clusters(struct msm_gpu *gpu,
814 		struct a6xx_gpu_state *a6xx_state,
815 		struct a6xx_crashdumper *dumper)
816 {
817 	int i;
818 
819 	a6xx_state->clusters = state_kcalloc(a6xx_state,
820 		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
821 
822 	if (!a6xx_state->clusters)
823 		return;
824 
825 	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
826 
827 	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
828 		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
829 			&a6xx_state->clusters[i], dumper);
830 }
831 
832 static void a7xx_get_clusters(struct msm_gpu *gpu,
833 		struct a6xx_gpu_state *a6xx_state,
834 		struct a6xx_crashdumper *dumper)
835 {
836 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
837 	int i;
838 	const struct gen7_cluster_registers *clusters;
839 	unsigned clusters_size;
840 
841 	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
842 		clusters = gen7_0_0_clusters;
843 		clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
844 	} else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
845 		clusters = gen7_2_0_clusters;
846 		clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
847 	} else {
848 		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
849 		clusters = gen7_9_0_clusters;
850 		clusters_size = ARRAY_SIZE(gen7_9_0_clusters);
851 	}
852 
853 	a6xx_state->clusters = state_kcalloc(a6xx_state,
854 		clusters_size, sizeof(*a6xx_state->clusters));
855 
856 	if (!a6xx_state->clusters)
857 		return;
858 
859 	a6xx_state->nr_clusters = clusters_size;
860 
861 	for (i = 0; i < clusters_size; i++)
862 		a7xx_get_cluster(gpu, a6xx_state, &clusters[i],
863 			&a6xx_state->clusters[i], dumper);
864 }
865 
866 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
867 static void a6xx_get_shader_block(struct msm_gpu *gpu,
868 		struct a6xx_gpu_state *a6xx_state,
869 		const struct a6xx_shader_block *block,
870 		struct a6xx_gpu_state_obj *obj,
871 		struct a6xx_crashdumper *dumper)
872 {
873 	u64 *in = dumper->ptr;
874 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
875 	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
876 	int i;
877 
878 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
879 		return;
880 
881 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
882 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
883 			(block->type << 8) | i);
884 
885 		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
886 			block->size, out);
887 
888 		out += block->size * sizeof(u32);
889 	}
890 
891 	CRASHDUMP_FINI(in);
892 
893 	if (a6xx_crashdumper_run(gpu, dumper))
894 		return;
895 
896 	obj->handle = block;
897 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
898 		datasize);
899 }
900 
901 static void a7xx_get_shader_block(struct msm_gpu *gpu,
902 		struct a6xx_gpu_state *a6xx_state,
903 		const struct gen7_shader_block *block,
904 		struct a6xx_gpu_state_obj *obj,
905 		struct a6xx_crashdumper *dumper)
906 {
907 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
908 	u64 *in = dumper->ptr;
909 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
910 	size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
911 	int i, j;
912 
913 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
914 		return;
915 
916 	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
917 		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3);
918 	}
919 
920 	for (i = 0; i < block->num_sps; i++) {
921 		for (j = 0; j < block->num_usptps; j++) {
922 			in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
923 				A7XX_SP_READ_SEL_LOCATION(block->location) |
924 				A7XX_SP_READ_SEL_PIPE(block->pipeid) |
925 				A7XX_SP_READ_SEL_STATETYPE(block->statetype) |
926 				A7XX_SP_READ_SEL_USPTP(j) |
927 				A7XX_SP_READ_SEL_SPTP(i));
928 
929 			in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
930 				block->size, out);
931 
932 			out += block->size * sizeof(u32);
933 		}
934 	}
935 
936 	CRASHDUMP_FINI(in);
937 
938 	if (a6xx_crashdumper_run(gpu, dumper))
939 		goto out;
940 
941 	obj->handle = block;
942 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
943 		datasize);
944 
945 out:
946 	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
947 		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0);
948 	}
949 }
950 
951 static void a6xx_get_shaders(struct msm_gpu *gpu,
952 		struct a6xx_gpu_state *a6xx_state,
953 		struct a6xx_crashdumper *dumper)
954 {
955 	int i;
956 
957 	a6xx_state->shaders = state_kcalloc(a6xx_state,
958 		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
959 
960 	if (!a6xx_state->shaders)
961 		return;
962 
963 	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
964 
965 	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
966 		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
967 			&a6xx_state->shaders[i], dumper);
968 }
969 
970 static void a7xx_get_shaders(struct msm_gpu *gpu,
971 		struct a6xx_gpu_state *a6xx_state,
972 		struct a6xx_crashdumper *dumper)
973 {
974 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
975 	const struct gen7_shader_block *shader_blocks;
976 	unsigned num_shader_blocks;
977 	int i;
978 
979 	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
980 		shader_blocks = gen7_0_0_shader_blocks;
981 		num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
982 	} else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
983 		shader_blocks = gen7_2_0_shader_blocks;
984 		num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
985 	} else {
986 		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
987 		shader_blocks = gen7_9_0_shader_blocks;
988 		num_shader_blocks = ARRAY_SIZE(gen7_9_0_shader_blocks);
989 	}
990 
991 	a6xx_state->shaders = state_kcalloc(a6xx_state,
992 		num_shader_blocks, sizeof(*a6xx_state->shaders));
993 
994 	if (!a6xx_state->shaders)
995 		return;
996 
997 	a6xx_state->nr_shaders = num_shader_blocks;
998 
999 	for (i = 0; i < num_shader_blocks; i++)
1000 		a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i],
1001 			&a6xx_state->shaders[i], dumper);
1002 }
1003 
1004 /* Read registers from behind the HLSQ aperture with the crashdumper */
1005 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
1006 		struct a6xx_gpu_state *a6xx_state,
1007 		const struct a6xx_registers *regs,
1008 		struct a6xx_gpu_state_obj *obj,
1009 		struct a6xx_crashdumper *dumper)
1010 
1011 {
1012 	u64 *in = dumper->ptr;
1013 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1014 	int i, regcount = 0;
1015 
1016 	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
1017 
1018 	for (i = 0; i < regs->count; i += 2) {
1019 		u32 count = RANGE(regs->registers, i);
1020 		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
1021 			regs->registers[i] - (regs->val0 >> 2);
1022 
1023 		in += CRASHDUMP_READ(in, offset, count, out);
1024 
1025 		out += count * sizeof(u32);
1026 		regcount += count;
1027 	}
1028 
1029 	CRASHDUMP_FINI(in);
1030 
1031 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1032 		return;
1033 
1034 	if (a6xx_crashdumper_run(gpu, dumper))
1035 		return;
1036 
1037 	obj->handle = regs;
1038 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1039 		regcount * sizeof(u32));
1040 }
1041 
1042 /* Read a block of registers using the crashdumper */
1043 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1044 		struct a6xx_gpu_state *a6xx_state,
1045 		const struct a6xx_registers *regs,
1046 		struct a6xx_gpu_state_obj *obj,
1047 		struct a6xx_crashdumper *dumper)
1048 
1049 {
1050 	u64 *in = dumper->ptr;
1051 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1052 	int i, regcount = 0;
1053 
1054 	/* Skip unsupported registers on older generations */
1055 	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1056 			(regs->registers == a660_registers))
1057 		return;
1058 
1059 	/* Some blocks might need to program a selector register first */
1060 	if (regs->val0)
1061 		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
1062 
1063 	for (i = 0; i < regs->count; i += 2) {
1064 		u32 count = RANGE(regs->registers, i);
1065 
1066 		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
1067 
1068 		out += count * sizeof(u32);
1069 		regcount += count;
1070 	}
1071 
1072 	CRASHDUMP_FINI(in);
1073 
1074 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1075 		return;
1076 
1077 	if (a6xx_crashdumper_run(gpu, dumper))
1078 		return;
1079 
1080 	obj->handle = regs;
1081 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1082 		regcount * sizeof(u32));
1083 }
1084 
1085 static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1086 		struct a6xx_gpu_state *a6xx_state,
1087 		const struct gen7_reg_list *regs,
1088 		struct a6xx_gpu_state_obj *obj,
1089 		struct a6xx_crashdumper *dumper)
1090 
1091 {
1092 	u64 *in = dumper->ptr;
1093 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1094 	int i, regcount = 0;
1095 
1096 	/* Some blocks might need to program a selector register first */
1097 	if (regs->sel)
1098 		in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val);
1099 
1100 	for (i = 0; regs->regs[i] != UINT_MAX; i += 2) {
1101 		u32 count = RANGE(regs->regs, i);
1102 
1103 		in += CRASHDUMP_READ(in, regs->regs[i], count, out);
1104 
1105 		out += count * sizeof(u32);
1106 		regcount += count;
1107 	}
1108 
1109 	CRASHDUMP_FINI(in);
1110 
1111 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1112 		return;
1113 
1114 	if (a6xx_crashdumper_run(gpu, dumper))
1115 		return;
1116 
1117 	obj->handle = regs->regs;
1118 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1119 		regcount * sizeof(u32));
1120 }
1121 
1122 
1123 /* Read a block of registers via AHB */
1124 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1125 		struct a6xx_gpu_state *a6xx_state,
1126 		const struct a6xx_registers *regs,
1127 		struct a6xx_gpu_state_obj *obj)
1128 {
1129 	int i, regcount = 0, index = 0;
1130 
1131 	/* Skip unsupported registers on older generations */
1132 	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1133 			(regs->registers == a660_registers))
1134 		return;
1135 
1136 	for (i = 0; i < regs->count; i += 2)
1137 		regcount += RANGE(regs->registers, i);
1138 
1139 	obj->handle = (const void *) regs;
1140 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1141 	if (!obj->data)
1142 		return;
1143 
1144 	for (i = 0; i < regs->count; i += 2) {
1145 		u32 count = RANGE(regs->registers, i);
1146 		int j;
1147 
1148 		for (j = 0; j < count; j++)
1149 			obj->data[index++] = gpu_read(gpu,
1150 				regs->registers[i] + j);
1151 	}
1152 }
1153 
1154 static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1155 		struct a6xx_gpu_state *a6xx_state,
1156 		const u32 *regs,
1157 		struct a6xx_gpu_state_obj *obj)
1158 {
1159 	int i, regcount = 0, index = 0;
1160 
1161 	for (i = 0; regs[i] != UINT_MAX; i += 2)
1162 		regcount += RANGE(regs, i);
1163 
1164 	obj->handle = (const void *) regs;
1165 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1166 	if (!obj->data)
1167 		return;
1168 
1169 	for (i = 0; regs[i] != UINT_MAX; i += 2) {
1170 		u32 count = RANGE(regs, i);
1171 		int j;
1172 
1173 		for (j = 0; j < count; j++)
1174 			obj->data[index++] = gpu_read(gpu, regs[i] + j);
1175 	}
1176 }
1177 
1178 static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1179 		struct a6xx_gpu_state *a6xx_state,
1180 		const struct gen7_reg_list *regs,
1181 		struct a6xx_gpu_state_obj *obj)
1182 {
1183 	if (regs->sel)
1184 		gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1185 
1186 	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj);
1187 }
1188 
1189 /* Read a block of GMU registers */
1190 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1191 		struct a6xx_gpu_state *a6xx_state,
1192 		const struct a6xx_registers *regs,
1193 		struct a6xx_gpu_state_obj *obj,
1194 		bool rscc)
1195 {
1196 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1197 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1198 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1199 	int i, regcount = 0, index = 0;
1200 
1201 	for (i = 0; i < regs->count; i += 2)
1202 		regcount += RANGE(regs->registers, i);
1203 
1204 	obj->handle = (const void *) regs;
1205 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1206 	if (!obj->data)
1207 		return;
1208 
1209 	for (i = 0; i < regs->count; i += 2) {
1210 		u32 count = RANGE(regs->registers, i);
1211 		int j;
1212 
1213 		for (j = 0; j < count; j++) {
1214 			u32 offset = regs->registers[i] + j;
1215 			u32 val;
1216 
1217 			if (rscc)
1218 				val = gmu_read_rscc(gmu, offset);
1219 			else
1220 				val = gmu_read(gmu, offset);
1221 
1222 			obj->data[index++] = val;
1223 		}
1224 	}
1225 }
1226 
1227 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1228 		struct a6xx_gpu_state *a6xx_state)
1229 {
1230 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1231 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1232 
1233 	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1234 		4, sizeof(*a6xx_state->gmu_registers));
1235 
1236 	if (!a6xx_state->gmu_registers)
1237 		return;
1238 
1239 	a6xx_state->nr_gmu_registers = 4;
1240 
1241 	/* Get the CX GMU registers from AHB */
1242 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
1243 		&a6xx_state->gmu_registers[0], false);
1244 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
1245 		&a6xx_state->gmu_registers[1], true);
1246 
1247 	if (adreno_is_a621(adreno_gpu) || adreno_is_a623(adreno_gpu))
1248 		_a6xx_get_gmu_registers(gpu, a6xx_state, &a621_gpucc_reg,
1249 			&a6xx_state->gmu_registers[2], false);
1250 	else
1251 		_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gpucc_reg,
1252 			&a6xx_state->gmu_registers[2], false);
1253 
1254 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1255 		return;
1256 
1257 	/* Set the fence to ALLOW mode so we can access the registers */
1258 	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1259 
1260 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
1261 		&a6xx_state->gmu_registers[3], false);
1262 }
1263 
1264 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1265 		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
1266 {
1267 	struct msm_gpu_state_bo *snapshot;
1268 
1269 	if (!bo->size)
1270 		return NULL;
1271 
1272 	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
1273 	if (!snapshot)
1274 		return NULL;
1275 
1276 	snapshot->iova = bo->iova;
1277 	snapshot->size = bo->size;
1278 	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
1279 	if (!snapshot->data)
1280 		return NULL;
1281 
1282 	memcpy(snapshot->data, bo->virt, bo->size);
1283 
1284 	return snapshot;
1285 }
1286 
1287 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1288 					  struct a6xx_gpu_state *a6xx_state)
1289 {
1290 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1291 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1292 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1293 	unsigned i, j;
1294 
1295 	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1296 
1297 	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
1298 		struct a6xx_hfi_queue *queue = &gmu->queues[i];
1299 		for (j = 0; j < HFI_HISTORY_SZ; j++) {
1300 			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1301 			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1302 		}
1303 	}
1304 }
1305 
1306 #define A6XX_REGLIST_SIZE        1
1307 #define A6XX_GBIF_REGLIST_SIZE   1
1308 static void a6xx_get_registers(struct msm_gpu *gpu,
1309 		struct a6xx_gpu_state *a6xx_state,
1310 		struct a6xx_crashdumper *dumper)
1311 {
1312 	int i, count = A6XX_REGLIST_SIZE +
1313 		ARRAY_SIZE(a6xx_reglist) +
1314 		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1315 	int index = 0;
1316 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1317 
1318 	a6xx_state->registers = state_kcalloc(a6xx_state,
1319 		count, sizeof(*a6xx_state->registers));
1320 
1321 	if (!a6xx_state->registers)
1322 		return;
1323 
1324 	a6xx_state->nr_registers = count;
1325 
1326 	a6xx_get_ahb_gpu_registers(gpu,
1327 		a6xx_state, &a6xx_ahb_reglist,
1328 		&a6xx_state->registers[index++]);
1329 
1330 	if (a6xx_has_gbif(adreno_gpu))
1331 		a6xx_get_ahb_gpu_registers(gpu,
1332 				a6xx_state, &a6xx_gbif_reglist,
1333 				&a6xx_state->registers[index++]);
1334 	else
1335 		a6xx_get_ahb_gpu_registers(gpu,
1336 				a6xx_state, &a6xx_vbif_reglist,
1337 				&a6xx_state->registers[index++]);
1338 	if (!dumper) {
1339 		/*
1340 		 * We can't use the crashdumper when the SMMU is stalled,
1341 		 * because the GPU has no memory access until we resume
1342 		 * translation (but we don't want to do that until after
1343 		 * we have captured as much useful GPU state as possible).
1344 		 * So instead collect registers via the CPU:
1345 		 */
1346 		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1347 			a6xx_get_ahb_gpu_registers(gpu,
1348 				a6xx_state, &a6xx_reglist[i],
1349 				&a6xx_state->registers[index++]);
1350 		return;
1351 	}
1352 
1353 	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1354 		a6xx_get_crashdumper_registers(gpu,
1355 			a6xx_state, &a6xx_reglist[i],
1356 			&a6xx_state->registers[index++],
1357 			dumper);
1358 
1359 	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1360 		a6xx_get_crashdumper_hlsq_registers(gpu,
1361 			a6xx_state, &a6xx_hlsq_reglist[i],
1362 			&a6xx_state->registers[index++],
1363 			dumper);
1364 }
1365 
1366 #define A7XX_PRE_CRASHDUMPER_SIZE    1
1367 #define A7XX_POST_CRASHDUMPER_SIZE   1
1368 static void a7xx_get_registers(struct msm_gpu *gpu,
1369 		struct a6xx_gpu_state *a6xx_state,
1370 		struct a6xx_crashdumper *dumper)
1371 {
1372 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1373 	int i, count;
1374 	int index = 0;
1375 	const u32 *pre_crashdumper_regs;
1376 	const struct gen7_reg_list *reglist;
1377 
1378 	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
1379 		reglist = gen7_0_0_reg_list;
1380 		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1381 	} else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
1382 		reglist = gen7_2_0_reg_list;
1383 		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1384 	} else {
1385 		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
1386 		reglist = gen7_9_0_reg_list;
1387 		pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers;
1388 	}
1389 
1390 	count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1391 
1392 	/* The downstream reglist contains registers in other memory regions
1393 	 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1394 	 * offsets and map them to read them on the CPU. For now only read the
1395 	 * first region which is the main one.
1396 	 */
1397 	if (dumper) {
1398 		for (i = 0; reglist[i].regs; i++)
1399 			count++;
1400 	} else {
1401 		count++;
1402 	}
1403 
1404 	a6xx_state->registers = state_kcalloc(a6xx_state,
1405 		count, sizeof(*a6xx_state->registers));
1406 
1407 	if (!a6xx_state->registers)
1408 		return;
1409 
1410 	a6xx_state->nr_registers = count;
1411 
1412 	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs,
1413 		&a6xx_state->registers[index++]);
1414 
1415 	if (!dumper) {
1416 		a7xx_get_ahb_gpu_reglist(gpu,
1417 			a6xx_state, &reglist[0],
1418 			&a6xx_state->registers[index++]);
1419 		return;
1420 	}
1421 
1422 	for (i = 0; reglist[i].regs; i++)
1423 		a7xx_get_crashdumper_registers(gpu,
1424 			a6xx_state, &reglist[i],
1425 			&a6xx_state->registers[index++],
1426 			dumper);
1427 }
1428 
1429 static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1430 		struct a6xx_gpu_state *a6xx_state)
1431 {
1432 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1433 	const u32 *regs;
1434 
1435 	BUG_ON(adreno_gpu->info->family > ADRENO_7XX_GEN3);
1436 	regs = gen7_0_0_post_crashdumper_registers;
1437 
1438 	a7xx_get_ahb_gpu_registers(gpu,
1439 		a6xx_state, regs,
1440 		&a6xx_state->registers[a6xx_state->nr_registers - 1]);
1441 }
1442 
1443 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1444 {
1445 	/* The value at [16:31] is in 4dword units. Convert it to dwords */
1446 	return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
1447 }
1448 
1449 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1450 {
1451 	/*
1452 	 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1453 	 * That register however is not directly accessible from APSS on A7xx.
1454 	 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1455 	 */
1456 	gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
1457 
1458 	return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20);
1459 }
1460 
1461 /* Read a block of data from an indexed register pair */
1462 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1463 		struct a6xx_gpu_state *a6xx_state,
1464 		const struct a6xx_indexed_registers *indexed,
1465 		struct a6xx_gpu_state_obj *obj)
1466 {
1467 	u32 count = indexed->count;
1468 	int i;
1469 
1470 	obj->handle = (const void *) indexed;
1471 	if (indexed->count_fn)
1472 		count = indexed->count_fn(gpu);
1473 
1474 	obj->data = state_kcalloc(a6xx_state, count, sizeof(u32));
1475 	obj->count = count;
1476 	if (!obj->data)
1477 		return;
1478 
1479 	/* All the indexed banks start at address 0 */
1480 	gpu_write(gpu, indexed->addr, 0);
1481 
1482 	/* Read the data - each read increments the internal address by 1 */
1483 	for (i = 0; i < count; i++)
1484 		obj->data[i] = gpu_read(gpu, indexed->data);
1485 }
1486 
1487 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1488 		struct a6xx_gpu_state *a6xx_state)
1489 {
1490 	u32 mempool_size;
1491 	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
1492 	int i;
1493 
1494 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
1495 		sizeof(*a6xx_state->indexed_regs));
1496 	if (!a6xx_state->indexed_regs)
1497 		return;
1498 
1499 	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1500 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
1501 			&a6xx_state->indexed_regs[i]);
1502 
1503 	if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1504 		u32 val;
1505 
1506 		val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1507 		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
1508 
1509 		/* Get the contents of the CP mempool */
1510 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1511 			&a6xx_state->indexed_regs[i]);
1512 
1513 		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1514 		a6xx_state->nr_indexed_regs = count;
1515 		return;
1516 	}
1517 
1518 	/* Set the CP mempool size to 0 to stabilize it while dumping */
1519 	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1520 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1521 
1522 	/* Get the contents of the CP mempool */
1523 	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1524 		&a6xx_state->indexed_regs[i]);
1525 
1526 	/*
1527 	 * Offset 0x2000 in the mempool is the size - copy the saved size over
1528 	 * so the data is consistent
1529 	 */
1530 	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1531 
1532 	/* Restore the size in the hardware */
1533 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1534 
1535 	a6xx_state->nr_indexed_regs = count;
1536 }
1537 
1538 static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1539 		struct a6xx_gpu_state *a6xx_state)
1540 {
1541 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1542 	const struct a6xx_indexed_registers *indexed_regs;
1543 	int i, indexed_count, mempool_count;
1544 
1545 	if (adreno_gpu->info->family <= ADRENO_7XX_GEN2) {
1546 		indexed_regs = a7xx_indexed_reglist;
1547 		indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1548 	} else {
1549 		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
1550 		indexed_regs = gen7_9_0_cp_indexed_reg_list;
1551 		indexed_count = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list);
1552 	}
1553 
1554 	mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1555 
1556 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1557 					indexed_count + mempool_count,
1558 					sizeof(*a6xx_state->indexed_regs));
1559 	if (!a6xx_state->indexed_regs)
1560 		return;
1561 
1562 	a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1563 
1564 	/* First read the common regs */
1565 	for (i = 0; i < indexed_count; i++)
1566 		a6xx_get_indexed_regs(gpu, a6xx_state, &indexed_regs[i],
1567 			&a6xx_state->indexed_regs[i]);
1568 
1569 	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2));
1570 	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2));
1571 
1572 	/* Get the contents of the CP_BV mempool */
1573 	for (i = 0; i < mempool_count; i++)
1574 		a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i],
1575 			&a6xx_state->indexed_regs[indexed_count + i]);
1576 
1577 	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0);
1578 	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0);
1579 	return;
1580 }
1581 
1582 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1583 {
1584 	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1585 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1586 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1587 	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1588 		GFP_KERNEL);
1589 	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1590 			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1591 
1592 	if (!a6xx_state)
1593 		return ERR_PTR(-ENOMEM);
1594 
1595 	INIT_LIST_HEAD(&a6xx_state->objs);
1596 
1597 	/* Get the generic state from the adreno core */
1598 	adreno_gpu_state_get(gpu, &a6xx_state->base);
1599 
1600 	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1601 		a6xx_get_gmu_registers(gpu, a6xx_state);
1602 
1603 		a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1604 		a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1605 		a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1606 
1607 		a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1608 	}
1609 
1610 	/* If GX isn't on the rest of the data isn't going to be accessible */
1611 	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1612 		return &a6xx_state->base;
1613 
1614 	/* Get the banks of indexed registers */
1615 	if (adreno_is_a7xx(adreno_gpu))
1616 		a7xx_get_indexed_registers(gpu, a6xx_state);
1617 	else
1618 		a6xx_get_indexed_registers(gpu, a6xx_state);
1619 
1620 	/*
1621 	 * Try to initialize the crashdumper, if we are not dumping state
1622 	 * with the SMMU stalled.  The crashdumper needs memory access to
1623 	 * write out GPU state, so we need to skip this when the SMMU is
1624 	 * stalled in response to an iova fault
1625 	 */
1626 	if (!stalled && !gpu->needs_hw_init &&
1627 	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1628 		dumper = &_dumper;
1629 	}
1630 
1631 	if (adreno_is_a7xx(adreno_gpu)) {
1632 		a7xx_get_registers(gpu, a6xx_state, dumper);
1633 
1634 		if (dumper) {
1635 			a7xx_get_shaders(gpu, a6xx_state, dumper);
1636 			a7xx_get_clusters(gpu, a6xx_state, dumper);
1637 			a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1638 
1639 			msm_gem_kernel_put(dumper->bo, gpu->vm);
1640 		}
1641 
1642 		a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1643 	} else {
1644 		a6xx_get_registers(gpu, a6xx_state, dumper);
1645 
1646 		if (dumper) {
1647 			a6xx_get_shaders(gpu, a6xx_state, dumper);
1648 			a6xx_get_clusters(gpu, a6xx_state, dumper);
1649 			a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1650 
1651 			msm_gem_kernel_put(dumper->bo, gpu->vm);
1652 		}
1653 	}
1654 
1655 	if (snapshot_debugbus)
1656 		a6xx_get_debugbus(gpu, a6xx_state);
1657 
1658 	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1659 
1660 	return  &a6xx_state->base;
1661 }
1662 
1663 static void a6xx_gpu_state_destroy(struct kref *kref)
1664 {
1665 	struct a6xx_state_memobj *obj, *tmp;
1666 	struct msm_gpu_state *state = container_of(kref,
1667 			struct msm_gpu_state, ref);
1668 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1669 			struct a6xx_gpu_state, base);
1670 
1671 	if (a6xx_state->gmu_log)
1672 		kvfree(a6xx_state->gmu_log->data);
1673 
1674 	if (a6xx_state->gmu_hfi)
1675 		kvfree(a6xx_state->gmu_hfi->data);
1676 
1677 	if (a6xx_state->gmu_debug)
1678 		kvfree(a6xx_state->gmu_debug->data);
1679 
1680 	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1681 		list_del(&obj->node);
1682 		kvfree(obj);
1683 	}
1684 
1685 	adreno_gpu_state_destroy(state);
1686 	kfree(a6xx_state);
1687 }
1688 
1689 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1690 {
1691 	if (IS_ERR_OR_NULL(state))
1692 		return 1;
1693 
1694 	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1695 }
1696 
1697 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1698 		struct drm_printer *p)
1699 {
1700 	int i, index = 0;
1701 
1702 	if (!data)
1703 		return;
1704 
1705 	for (i = 0; i < count; i += 2) {
1706 		u32 count = RANGE(registers, i);
1707 		u32 offset = registers[i];
1708 		int j;
1709 
1710 		for (j = 0; j < count; index++, offset++, j++) {
1711 			if (data[index] == 0xdeafbead)
1712 				continue;
1713 
1714 			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1715 				offset << 2, data[index]);
1716 		}
1717 	}
1718 }
1719 
1720 static void a7xx_show_registers_indented(const u32 *registers, u32 *data,
1721 		struct drm_printer *p, unsigned indent)
1722 {
1723 	int i, index = 0;
1724 
1725 	for (i = 0; registers[i] != UINT_MAX; i += 2) {
1726 		u32 count = RANGE(registers, i);
1727 		u32 offset = registers[i];
1728 		int j;
1729 
1730 		for (j = 0; j < count; index++, offset++, j++) {
1731 			int k;
1732 
1733 			if (data[index] == 0xdeafbead)
1734 				continue;
1735 
1736 			for (k = 0; k < indent; k++)
1737 				drm_printf(p, "  ");
1738 			drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1739 				offset << 2, data[index]);
1740 		}
1741 	}
1742 }
1743 
1744 static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p)
1745 {
1746 	a7xx_show_registers_indented(registers, data, p, 1);
1747 }
1748 
1749 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1750 {
1751 	char out[ASCII85_BUFSZ];
1752 	long i, l, datalen = 0;
1753 
1754 	for (i = 0; i < len >> 2; i++) {
1755 		if (data[i])
1756 			datalen = (i + 1) << 2;
1757 	}
1758 
1759 	if (datalen == 0)
1760 		return;
1761 
1762 	drm_puts(p, "    data: !!ascii85 |\n");
1763 	drm_puts(p, "      ");
1764 
1765 
1766 	l = ascii85_encode_len(datalen);
1767 
1768 	for (i = 0; i < l; i++)
1769 		drm_puts(p, ascii85_encode(data[i], out));
1770 
1771 	drm_puts(p, "\n");
1772 }
1773 
1774 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1775 {
1776 	drm_puts(p, fmt);
1777 	drm_puts(p, name);
1778 	drm_puts(p, "\n");
1779 }
1780 
1781 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1782 		struct drm_printer *p)
1783 {
1784 	const struct a6xx_shader_block *block = obj->handle;
1785 	int i;
1786 
1787 	if (!obj->handle)
1788 		return;
1789 
1790 	print_name(p, "  - type: ", block->name);
1791 
1792 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1793 		drm_printf(p, "    - bank: %d\n", i);
1794 		drm_printf(p, "      size: %d\n", block->size);
1795 
1796 		if (!obj->data)
1797 			continue;
1798 
1799 		print_ascii85(p, block->size << 2,
1800 			obj->data + (block->size * i));
1801 	}
1802 }
1803 
1804 static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1805 		struct drm_printer *p)
1806 {
1807 	const struct gen7_shader_block *block = obj->handle;
1808 	int i, j;
1809 	u32 *data = obj->data;
1810 
1811 	if (!obj->handle)
1812 		return;
1813 
1814 	print_name(p, "  - type: ", a7xx_statetype_names[block->statetype]);
1815 	print_name(p, "    - pipe: ", a7xx_pipe_names[block->pipeid]);
1816 	drm_printf(p, "    - location: %d\n", block->location);
1817 
1818 	for (i = 0; i < block->num_sps; i++) {
1819 		drm_printf(p, "      - sp: %d\n", i);
1820 
1821 		for (j = 0; j < block->num_usptps; j++) {
1822 			drm_printf(p, "        - usptp: %d\n", j);
1823 			drm_printf(p, "          size: %d\n", block->size);
1824 
1825 			if (!obj->data)
1826 				continue;
1827 
1828 			print_ascii85(p, block->size << 2, data);
1829 
1830 			data += block->size;
1831 		}
1832 	}
1833 }
1834 
1835 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1836 		struct drm_printer *p)
1837 {
1838 	int ctx, index = 0;
1839 
1840 	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1841 		int j;
1842 
1843 		drm_printf(p, "    - context: %d\n", ctx);
1844 
1845 		for (j = 0; j < size; j += 2) {
1846 			u32 count = RANGE(registers, j);
1847 			u32 offset = registers[j];
1848 			int k;
1849 
1850 			for (k = 0; k < count; index++, offset++, k++) {
1851 				if (data[index] == 0xdeafbead)
1852 					continue;
1853 
1854 				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1855 					offset << 2, data[index]);
1856 			}
1857 		}
1858 	}
1859 }
1860 
1861 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1862 		struct drm_printer *p)
1863 {
1864 	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1865 
1866 	if (dbgahb) {
1867 		print_name(p, "  - cluster-name: ", dbgahb->name);
1868 		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1869 			obj->data, p);
1870 	}
1871 }
1872 
1873 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1874 		struct drm_printer *p)
1875 {
1876 	const struct a6xx_cluster *cluster = obj->handle;
1877 
1878 	if (cluster) {
1879 		print_name(p, "  - cluster-name: ", cluster->name);
1880 		a6xx_show_cluster_data(cluster->registers, cluster->count,
1881 			obj->data, p);
1882 	}
1883 }
1884 
1885 static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1886 		struct drm_printer *p)
1887 {
1888 	const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1889 
1890 	if (dbgahb) {
1891 		print_name(p, "  - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]);
1892 		print_name(p, "    - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]);
1893 		drm_printf(p, "      - context: %d\n", dbgahb->context_id);
1894 		drm_printf(p, "      - location: %d\n", dbgahb->location_id);
1895 		a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4);
1896 	}
1897 }
1898 
1899 static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1900 		struct drm_printer *p)
1901 {
1902 	const struct gen7_cluster_registers *cluster = obj->handle;
1903 
1904 	if (cluster) {
1905 		int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
1906 
1907 		print_name(p, "  - pipe: ", a7xx_pipe_names[cluster->pipe_id]);
1908 		print_name(p, "    - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]);
1909 		drm_printf(p, "      - context: %d\n", context);
1910 		a7xx_show_registers_indented(cluster->regs, obj->data, p, 4);
1911 	}
1912 }
1913 
1914 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1915 		struct drm_printer *p)
1916 {
1917 	const struct a6xx_indexed_registers *indexed = obj->handle;
1918 
1919 	if (!indexed)
1920 		return;
1921 
1922 	print_name(p, "  - regs-name: ", indexed->name);
1923 	drm_printf(p, "    dwords: %d\n", obj->count);
1924 
1925 	print_ascii85(p, obj->count << 2, obj->data);
1926 }
1927 
1928 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1929 		u32 *data, struct drm_printer *p)
1930 {
1931 	if (block) {
1932 		print_name(p, "  - debugbus-block: ", block->name);
1933 
1934 		/*
1935 		 * count for regular debugbus data is in quadwords,
1936 		 * but print the size in dwords for consistency
1937 		 */
1938 		drm_printf(p, "    count: %d\n", block->count << 1);
1939 
1940 		print_ascii85(p, block->count << 3, data);
1941 	}
1942 }
1943 
1944 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1945 		struct drm_printer *p)
1946 {
1947 	int i;
1948 
1949 	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1950 		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1951 
1952 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1953 	}
1954 
1955 	if (a6xx_state->vbif_debugbus) {
1956 		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1957 
1958 		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1959 		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1960 
1961 		/* vbif debugbus data is in dwords.  Confusing, huh? */
1962 		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1963 	}
1964 
1965 	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1966 		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1967 
1968 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1969 	}
1970 }
1971 
1972 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1973 		struct drm_printer *p)
1974 {
1975 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1976 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1977 			struct a6xx_gpu_state, base);
1978 	int i;
1979 
1980 	if (IS_ERR_OR_NULL(state))
1981 		return;
1982 
1983 	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1984 
1985 	adreno_show(gpu, state, p);
1986 
1987 	drm_puts(p, "gmu-log:\n");
1988 	if (a6xx_state->gmu_log) {
1989 		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1990 
1991 		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1992 		drm_printf(p, "    size: %zu\n", gmu_log->size);
1993 		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1994 				&gmu_log->encoded);
1995 	}
1996 
1997 	drm_puts(p, "gmu-hfi:\n");
1998 	if (a6xx_state->gmu_hfi) {
1999 		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
2000 		unsigned i, j;
2001 
2002 		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
2003 		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
2004 		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
2005 			drm_printf(p, "    queue-history[%u]:", i);
2006 			for (j = 0; j < HFI_HISTORY_SZ; j++) {
2007 				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
2008 			}
2009 			drm_printf(p, "\n");
2010 		}
2011 		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
2012 				&gmu_hfi->encoded);
2013 	}
2014 
2015 	drm_puts(p, "gmu-debug:\n");
2016 	if (a6xx_state->gmu_debug) {
2017 		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
2018 
2019 		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
2020 		drm_printf(p, "    size: %zu\n", gmu_debug->size);
2021 		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
2022 				&gmu_debug->encoded);
2023 	}
2024 
2025 	drm_puts(p, "registers:\n");
2026 	for (i = 0; i < a6xx_state->nr_registers; i++) {
2027 		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
2028 
2029 		if (!obj->handle)
2030 			continue;
2031 
2032 		if (adreno_is_a7xx(adreno_gpu)) {
2033 			a7xx_show_registers(obj->handle, obj->data, p);
2034 		} else {
2035 			const struct a6xx_registers *regs = obj->handle;
2036 
2037 			a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2038 		}
2039 	}
2040 
2041 	drm_puts(p, "registers-gmu:\n");
2042 	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
2043 		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
2044 		const struct a6xx_registers *regs = obj->handle;
2045 
2046 		if (!obj->handle)
2047 			continue;
2048 
2049 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2050 	}
2051 
2052 	drm_puts(p, "indexed-registers:\n");
2053 	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
2054 		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
2055 
2056 	drm_puts(p, "shader-blocks:\n");
2057 	for (i = 0; i < a6xx_state->nr_shaders; i++) {
2058 		if (adreno_is_a7xx(adreno_gpu))
2059 			a7xx_show_shader(&a6xx_state->shaders[i], p);
2060 		else
2061 			a6xx_show_shader(&a6xx_state->shaders[i], p);
2062 	}
2063 
2064 	drm_puts(p, "clusters:\n");
2065 	for (i = 0; i < a6xx_state->nr_clusters; i++) {
2066 		if (adreno_is_a7xx(adreno_gpu))
2067 			a7xx_show_cluster(&a6xx_state->clusters[i], p);
2068 		else
2069 			a6xx_show_cluster(&a6xx_state->clusters[i], p);
2070 	}
2071 
2072 	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) {
2073 		if (adreno_is_a7xx(adreno_gpu))
2074 			a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2075 		else
2076 			a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2077 	}
2078 
2079 	drm_puts(p, "debugbus:\n");
2080 	a6xx_show_debugbus(a6xx_state, p);
2081 }
2082