1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10
11 static const unsigned int *gen7_0_0_external_core_regs[] __always_unused;
12 static const unsigned int *gen7_2_0_external_core_regs[] __always_unused;
13 static const unsigned int *gen7_9_0_external_core_regs[] __always_unused;
14 static const struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused;
15 static const u32 gen7_9_0_cx_debugbus_blocks[] __always_unused;
16
17 #include "adreno_gen7_0_0_snapshot.h"
18 #include "adreno_gen7_2_0_snapshot.h"
19 #include "adreno_gen7_9_0_snapshot.h"
20
21 struct a6xx_gpu_state_obj {
22 const void *handle;
23 u32 *data;
24 u32 count; /* optional, used when count potentially read from hw */
25 };
26
27 struct a6xx_gpu_state {
28 struct msm_gpu_state base;
29
30 struct a6xx_gpu_state_obj *gmu_registers;
31 int nr_gmu_registers;
32
33 struct a6xx_gpu_state_obj *registers;
34 int nr_registers;
35
36 struct a6xx_gpu_state_obj *shaders;
37 int nr_shaders;
38
39 struct a6xx_gpu_state_obj *clusters;
40 int nr_clusters;
41
42 struct a6xx_gpu_state_obj *dbgahb_clusters;
43 int nr_dbgahb_clusters;
44
45 struct a6xx_gpu_state_obj *indexed_regs;
46 int nr_indexed_regs;
47
48 struct a6xx_gpu_state_obj *debugbus;
49 int nr_debugbus;
50
51 struct a6xx_gpu_state_obj *vbif_debugbus;
52
53 struct a6xx_gpu_state_obj *cx_debugbus;
54 int nr_cx_debugbus;
55
56 struct msm_gpu_state_bo *gmu_log;
57 struct msm_gpu_state_bo *gmu_hfi;
58 struct msm_gpu_state_bo *gmu_debug;
59
60 s32 hfi_queue_history[2][HFI_HISTORY_SZ];
61
62 struct list_head objs;
63
64 bool gpu_initialized;
65 };
66
CRASHDUMP_WRITE(u64 * in,u32 reg,u32 val)67 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
68 {
69 in[0] = val;
70 in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
71
72 return 2;
73 }
74
CRASHDUMP_READ(u64 * in,u32 reg,u32 dwords,u64 target)75 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
76 {
77 in[0] = target;
78 in[1] = (((u64) reg) << 44 | dwords);
79
80 return 2;
81 }
82
CRASHDUMP_FINI(u64 * in)83 static inline int CRASHDUMP_FINI(u64 *in)
84 {
85 in[0] = 0;
86 in[1] = 0;
87
88 return 2;
89 }
90
91 struct a6xx_crashdumper {
92 void *ptr;
93 struct drm_gem_object *bo;
94 u64 iova;
95 };
96
97 struct a6xx_state_memobj {
98 struct list_head node;
99 unsigned long long data[];
100 };
101
state_kcalloc(struct a6xx_gpu_state * a6xx_state,int nr,size_t objsize)102 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
103 {
104 struct a6xx_state_memobj *obj =
105 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
106
107 if (!obj)
108 return NULL;
109
110 list_add_tail(&obj->node, &a6xx_state->objs);
111 return &obj->data;
112 }
113
state_kmemdup(struct a6xx_gpu_state * a6xx_state,void * src,size_t size)114 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
115 size_t size)
116 {
117 void *dst = state_kcalloc(a6xx_state, 1, size);
118
119 if (dst)
120 memcpy(dst, src, size);
121 return dst;
122 }
123
124 /*
125 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
126 * the rest for the data
127 */
128 #define A6XX_CD_DATA_OFFSET 8192
129 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192)
130
a6xx_crashdumper_init(struct msm_gpu * gpu,struct a6xx_crashdumper * dumper)131 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
132 struct a6xx_crashdumper *dumper)
133 {
134 dumper->ptr = msm_gem_kernel_new(gpu->dev,
135 SZ_1M, MSM_BO_WC, gpu->vm,
136 &dumper->bo, &dumper->iova);
137
138 if (!IS_ERR(dumper->ptr))
139 msm_gem_object_set_name(dumper->bo, "crashdump");
140
141 return PTR_ERR_OR_ZERO(dumper->ptr);
142 }
143
a6xx_crashdumper_run(struct msm_gpu * gpu,struct a6xx_crashdumper * dumper)144 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
145 struct a6xx_crashdumper *dumper)
146 {
147 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
148 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
149 u32 val;
150 int ret;
151
152 if (IS_ERR_OR_NULL(dumper->ptr))
153 return -EINVAL;
154
155 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
156 return -EINVAL;
157
158 /* Make sure all pending memory writes are posted */
159 wmb();
160
161 gpu_write64(gpu, REG_A6XX_CP_CRASH_DUMP_SCRIPT_BASE, dumper->iova);
162
163 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
164
165 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
166 val & 0x02, 100, 10000);
167
168 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
169
170 return ret;
171 }
172
173 /* read a value from the GX debug bus */
debugbus_read(struct msm_gpu * gpu,u32 block,u32 offset,u32 * data)174 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
175 u32 *data)
176 {
177 u32 reg;
178
179 if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) {
180 reg = A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
181 A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
182 } else {
183 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
184 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
185 }
186
187 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
188 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
189 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
190 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
191
192 /* Wait 1 us to make sure the data is flowing */
193 udelay(1);
194
195 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
196 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
197
198 return 2;
199 }
200
201 #define cxdbg_write(ptr, offset, val) \
202 writel((val), (ptr) + ((offset) << 2))
203
204 #define cxdbg_read(ptr, offset) \
205 readl((ptr) + ((offset) << 2))
206
207 /* read a value from the CX debug bus */
cx_debugbus_read(struct msm_gpu * gpu,void __iomem * cxdbg,u32 block,u32 offset,u32 * data)208 static int cx_debugbus_read(struct msm_gpu *gpu, void __iomem *cxdbg, u32 block, u32 offset,
209 u32 *data)
210 {
211 u32 reg;
212
213 if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) {
214 reg = A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
215 A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
216 } else {
217 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
218 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
219 }
220
221 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
222 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
223 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
224 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
225
226 /* Wait 1 us to make sure the data is flowing */
227 udelay(1);
228
229 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
230 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
231
232 return 2;
233 }
234
235 /* Read a chunk of data from the VBIF debug bus */
vbif_debugbus_read(struct msm_gpu * gpu,u32 ctrl0,u32 ctrl1,u32 reg,int count,u32 * data)236 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
237 u32 reg, int count, u32 *data)
238 {
239 int i;
240
241 gpu_write(gpu, ctrl0, reg);
242
243 for (i = 0; i < count; i++) {
244 gpu_write(gpu, ctrl1, i);
245 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
246 }
247
248 return count;
249 }
250
251 #define AXI_ARB_BLOCKS 2
252 #define XIN_AXI_BLOCKS 5
253 #define XIN_CORE_BLOCKS 4
254
255 #define VBIF_DEBUGBUS_BLOCK_SIZE \
256 ((16 * AXI_ARB_BLOCKS) + \
257 (18 * XIN_AXI_BLOCKS) + \
258 (12 * XIN_CORE_BLOCKS))
259
a6xx_get_vbif_debugbus_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_gpu_state_obj * obj)260 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
261 struct a6xx_gpu_state *a6xx_state,
262 struct a6xx_gpu_state_obj *obj)
263 {
264 u32 clk, *ptr;
265 int i;
266
267 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
268 sizeof(u32));
269 if (!obj->data)
270 return;
271
272 obj->handle = NULL;
273
274 /* Get the current clock setting */
275 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
276
277 /* Force on the bus so we can read it */
278 gpu_write(gpu, REG_A6XX_VBIF_CLKON,
279 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
280
281 /* We will read from BUS2 first, so disable BUS1 */
282 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
283
284 /* Enable the VBIF bus for reading */
285 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
286
287 ptr = obj->data;
288
289 for (i = 0; i < AXI_ARB_BLOCKS; i++)
290 ptr += vbif_debugbus_read(gpu,
291 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
292 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
293 1 << (i + 16), 16, ptr);
294
295 for (i = 0; i < XIN_AXI_BLOCKS; i++)
296 ptr += vbif_debugbus_read(gpu,
297 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
298 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
299 1 << i, 18, ptr);
300
301 /* Stop BUS2 so we can turn on BUS1 */
302 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
303
304 for (i = 0; i < XIN_CORE_BLOCKS; i++)
305 ptr += vbif_debugbus_read(gpu,
306 REG_A6XX_VBIF_TEST_BUS1_CTRL0,
307 REG_A6XX_VBIF_TEST_BUS1_CTRL1,
308 1 << i, 12, ptr);
309
310 /* Restore the VBIF clock setting */
311 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
312 }
313
a6xx_get_debugbus_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_debugbus_block * block,struct a6xx_gpu_state_obj * obj)314 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
315 struct a6xx_gpu_state *a6xx_state,
316 const struct a6xx_debugbus_block *block,
317 struct a6xx_gpu_state_obj *obj)
318 {
319 int i;
320 u32 *ptr;
321
322 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
323 if (!obj->data)
324 return;
325
326 obj->handle = block;
327
328 for (ptr = obj->data, i = 0; i < block->count; i++)
329 ptr += debugbus_read(gpu, block->id, i, ptr);
330 }
331
a6xx_get_cx_debugbus_block(struct msm_gpu * gpu,void __iomem * cxdbg,struct a6xx_gpu_state * a6xx_state,const struct a6xx_debugbus_block * block,struct a6xx_gpu_state_obj * obj)332 static void a6xx_get_cx_debugbus_block(struct msm_gpu *gpu,
333 void __iomem *cxdbg,
334 struct a6xx_gpu_state *a6xx_state,
335 const struct a6xx_debugbus_block *block,
336 struct a6xx_gpu_state_obj *obj)
337 {
338 int i;
339 u32 *ptr;
340
341 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
342 if (!obj->data)
343 return;
344
345 obj->handle = block;
346
347 for (ptr = obj->data, i = 0; i < block->count; i++)
348 ptr += cx_debugbus_read(gpu, cxdbg, block->id, i, ptr);
349 }
350
a6xx_get_debugbus_blocks(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)351 static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
352 struct a6xx_gpu_state *a6xx_state)
353 {
354 int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
355 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
356
357 if (adreno_is_a650_family(to_adreno_gpu(gpu)))
358 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
359
360 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
361 sizeof(*a6xx_state->debugbus));
362
363 if (a6xx_state->debugbus) {
364 int i;
365
366 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
367 a6xx_get_debugbus_block(gpu,
368 a6xx_state,
369 &a6xx_debugbus_blocks[i],
370 &a6xx_state->debugbus[i]);
371
372 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
373
374 /*
375 * GBIF has same debugbus as of other GPU blocks, fall back to
376 * default path if GPU uses GBIF, also GBIF uses exactly same
377 * ID as of VBIF.
378 */
379 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
380 a6xx_get_debugbus_block(gpu, a6xx_state,
381 &a6xx_gbif_debugbus_block,
382 &a6xx_state->debugbus[i]);
383
384 a6xx_state->nr_debugbus += 1;
385 }
386
387
388 if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
389 for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
390 a6xx_get_debugbus_block(gpu,
391 a6xx_state,
392 &a650_debugbus_blocks[i],
393 &a6xx_state->debugbus[i]);
394 }
395 }
396 }
397
a7xx_get_debugbus_blocks(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)398 static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
399 struct a6xx_gpu_state *a6xx_state)
400 {
401 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
402 int debugbus_blocks_count, gbif_debugbus_blocks_count, total_debugbus_blocks;
403 const u32 *debugbus_blocks, *gbif_debugbus_blocks;
404 int i;
405
406 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
407 debugbus_blocks = gen7_0_0_debugbus_blocks;
408 debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
409 gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
410 gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
411 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
412 debugbus_blocks = gen7_2_0_debugbus_blocks;
413 debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
414 gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
415 gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
416 } else {
417 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
418 debugbus_blocks = gen7_9_0_debugbus_blocks;
419 debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_debugbus_blocks);
420 gbif_debugbus_blocks = gen7_9_0_gbif_debugbus_blocks;
421 gbif_debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_gbif_debugbus_blocks);
422 }
423
424 total_debugbus_blocks = debugbus_blocks_count + gbif_debugbus_blocks_count;
425
426 a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks,
427 sizeof(*a6xx_state->debugbus));
428
429 if (a6xx_state->debugbus) {
430 for (i = 0; i < debugbus_blocks_count; i++) {
431 a6xx_get_debugbus_block(gpu,
432 a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]],
433 &a6xx_state->debugbus[i]);
434 }
435
436 for (i = 0; i < gbif_debugbus_blocks_count; i++) {
437 a6xx_get_debugbus_block(gpu,
438 a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]],
439 &a6xx_state->debugbus[i + debugbus_blocks_count]);
440 }
441
442 a6xx_state->nr_debugbus = total_debugbus_blocks;
443 }
444 }
445
a6xx_get_debugbus(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)446 static void a6xx_get_debugbus(struct msm_gpu *gpu,
447 struct a6xx_gpu_state *a6xx_state)
448 {
449 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
450 struct resource *res;
451 void __iomem *cxdbg = NULL;
452
453 /* Set up the GX debug bus */
454
455 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
456 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
457
458 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
459 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
460
461 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
462 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
463 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
464 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
465
466 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
467 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
468
469 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
470 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
471 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
472 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
473
474 /* Set up the CX debug bus - it lives elsewhere in the system so do a
475 * temporary ioremap for the registers
476 */
477 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
478 "cx_dbgc");
479
480 if (res)
481 cxdbg = ioremap(res->start, resource_size(res));
482
483 if (cxdbg) {
484 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
485 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
486
487 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
488 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
489
490 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
491 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
492 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
493 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
494
495 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
496 0x76543210);
497 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
498 0xFEDCBA98);
499
500 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
501 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
502 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
503 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
504 }
505
506 if (adreno_is_a7xx(adreno_gpu)) {
507 a7xx_get_debugbus_blocks(gpu, a6xx_state);
508 } else {
509 a6xx_get_debugbus_blocks(gpu, a6xx_state);
510 }
511
512 /* Dump the VBIF debugbus on applicable targets */
513 if (!a6xx_has_gbif(adreno_gpu)) {
514 a6xx_state->vbif_debugbus =
515 state_kcalloc(a6xx_state, 1,
516 sizeof(*a6xx_state->vbif_debugbus));
517
518 if (a6xx_state->vbif_debugbus)
519 a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
520 a6xx_state->vbif_debugbus);
521 }
522
523 if (cxdbg) {
524 unsigned nr_cx_debugbus_blocks;
525 const struct a6xx_debugbus_block *cx_debugbus_blocks;
526
527 if (adreno_is_a7xx(adreno_gpu)) {
528 BUG_ON(adreno_gpu->info->family > ADRENO_7XX_GEN3);
529 cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
530 nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
531 } else {
532 cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
533 nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
534 }
535
536 a6xx_state->cx_debugbus =
537 state_kcalloc(a6xx_state,
538 nr_cx_debugbus_blocks,
539 sizeof(*a6xx_state->cx_debugbus));
540
541 if (a6xx_state->cx_debugbus) {
542 int i;
543
544 for (i = 0; i < nr_cx_debugbus_blocks; i++)
545 a6xx_get_cx_debugbus_block(gpu,
546 cxdbg,
547 a6xx_state,
548 &cx_debugbus_blocks[i],
549 &a6xx_state->cx_debugbus[i]);
550
551 a6xx_state->nr_cx_debugbus =
552 nr_cx_debugbus_blocks;
553 }
554
555 iounmap(cxdbg);
556 }
557 }
558
559 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
560
561 /* Read a data cluster from behind the AHB aperture */
a6xx_get_dbgahb_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_dbgahb_cluster * dbgahb,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)562 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
563 struct a6xx_gpu_state *a6xx_state,
564 const struct a6xx_dbgahb_cluster *dbgahb,
565 struct a6xx_gpu_state_obj *obj,
566 struct a6xx_crashdumper *dumper)
567 {
568 u64 *in = dumper->ptr;
569 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
570 size_t datasize;
571 int i, regcount = 0;
572
573 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
574 int j;
575
576 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
577 (dbgahb->statetype + i * 2) << 8);
578
579 for (j = 0; j < dbgahb->count; j += 2) {
580 int count = RANGE(dbgahb->registers, j);
581 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
582 dbgahb->registers[j] - (dbgahb->base >> 2);
583
584 in += CRASHDUMP_READ(in, offset, count, out);
585
586 out += count * sizeof(u32);
587
588 if (i == 0)
589 regcount += count;
590 }
591 }
592
593 CRASHDUMP_FINI(in);
594
595 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
596
597 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
598 return;
599
600 if (a6xx_crashdumper_run(gpu, dumper))
601 return;
602
603 obj->handle = dbgahb;
604 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
605 datasize);
606 }
607
a7xx_get_dbgahb_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct gen7_sptp_cluster_registers * dbgahb,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)608 static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
609 struct a6xx_gpu_state *a6xx_state,
610 const struct gen7_sptp_cluster_registers *dbgahb,
611 struct a6xx_gpu_state_obj *obj,
612 struct a6xx_crashdumper *dumper)
613 {
614 u64 *in = dumper->ptr;
615 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
616 size_t datasize;
617 int i, regcount = 0;
618
619 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
620 A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) |
621 A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) |
622 A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype));
623
624 for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) {
625 int count = RANGE(dbgahb->regs, i);
626 u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
627 dbgahb->regs[i] - dbgahb->regbase;
628
629 in += CRASHDUMP_READ(in, offset, count, out);
630
631 out += count * sizeof(u32);
632 regcount += count;
633 }
634
635 CRASHDUMP_FINI(in);
636
637 datasize = regcount * sizeof(u32);
638
639 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
640 return;
641
642 if (a6xx_crashdumper_run(gpu, dumper))
643 return;
644
645 obj->handle = dbgahb;
646 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
647 datasize);
648 }
649
a6xx_get_dbgahb_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)650 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
651 struct a6xx_gpu_state *a6xx_state,
652 struct a6xx_crashdumper *dumper)
653 {
654 int i;
655
656 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
657 ARRAY_SIZE(a6xx_dbgahb_clusters),
658 sizeof(*a6xx_state->dbgahb_clusters));
659
660 if (!a6xx_state->dbgahb_clusters)
661 return;
662
663 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
664
665 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
666 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
667 &a6xx_dbgahb_clusters[i],
668 &a6xx_state->dbgahb_clusters[i], dumper);
669 }
670
a7xx_get_dbgahb_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)671 static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
672 struct a6xx_gpu_state *a6xx_state,
673 struct a6xx_crashdumper *dumper)
674 {
675 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
676 int i;
677 const struct gen7_sptp_cluster_registers *dbgahb_clusters;
678 unsigned dbgahb_clusters_size;
679
680 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
681 dbgahb_clusters = gen7_0_0_sptp_clusters;
682 dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
683 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
684 dbgahb_clusters = gen7_2_0_sptp_clusters;
685 dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
686 } else {
687 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
688 dbgahb_clusters = gen7_9_0_sptp_clusters;
689 dbgahb_clusters_size = ARRAY_SIZE(gen7_9_0_sptp_clusters);
690 }
691
692 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
693 dbgahb_clusters_size,
694 sizeof(*a6xx_state->dbgahb_clusters));
695
696 if (!a6xx_state->dbgahb_clusters)
697 return;
698
699 a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
700
701 for (i = 0; i < dbgahb_clusters_size; i++)
702 a7xx_get_dbgahb_cluster(gpu, a6xx_state,
703 &dbgahb_clusters[i],
704 &a6xx_state->dbgahb_clusters[i], dumper);
705 }
706
707 /* Read a data cluster from the CP aperture with the crashdumper */
a6xx_get_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_cluster * cluster,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)708 static void a6xx_get_cluster(struct msm_gpu *gpu,
709 struct a6xx_gpu_state *a6xx_state,
710 const struct a6xx_cluster *cluster,
711 struct a6xx_gpu_state_obj *obj,
712 struct a6xx_crashdumper *dumper)
713 {
714 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
715 u64 *in = dumper->ptr;
716 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
717 size_t datasize;
718 int i, regcount = 0;
719 u32 id = cluster->id;
720
721 /* Skip registers that are not present on older generation */
722 if (!adreno_is_a660_family(adreno_gpu) &&
723 cluster->registers == a660_fe_cluster)
724 return;
725
726 if (adreno_is_a650_family(adreno_gpu) &&
727 cluster->registers == a6xx_ps_cluster)
728 id = CLUSTER_VPC_PS;
729
730 /* Some clusters need a selector register to be programmed too */
731 if (cluster->sel_reg)
732 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
733
734 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
735 int j;
736
737 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
738 (id << 8) | (i << 4) | i);
739
740 for (j = 0; j < cluster->count; j += 2) {
741 int count = RANGE(cluster->registers, j);
742
743 in += CRASHDUMP_READ(in, cluster->registers[j],
744 count, out);
745
746 out += count * sizeof(u32);
747
748 if (i == 0)
749 regcount += count;
750 }
751 }
752
753 CRASHDUMP_FINI(in);
754
755 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
756
757 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
758 return;
759
760 if (a6xx_crashdumper_run(gpu, dumper))
761 return;
762
763 obj->handle = cluster;
764 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
765 datasize);
766 }
767
a7xx_get_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct gen7_cluster_registers * cluster,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)768 static void a7xx_get_cluster(struct msm_gpu *gpu,
769 struct a6xx_gpu_state *a6xx_state,
770 const struct gen7_cluster_registers *cluster,
771 struct a6xx_gpu_state_obj *obj,
772 struct a6xx_crashdumper *dumper)
773 {
774 u64 *in = dumper->ptr;
775 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
776 size_t datasize;
777 int i, regcount = 0;
778
779 in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
780 A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) |
781 A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) |
782 A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id));
783
784 /* Some clusters need a selector register to be programmed too */
785 if (cluster->sel)
786 in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val);
787
788 for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) {
789 int count = RANGE(cluster->regs, i);
790
791 in += CRASHDUMP_READ(in, cluster->regs[i],
792 count, out);
793
794 out += count * sizeof(u32);
795 regcount += count;
796 }
797
798 CRASHDUMP_FINI(in);
799
800 datasize = regcount * sizeof(u32);
801
802 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
803 return;
804
805 if (a6xx_crashdumper_run(gpu, dumper))
806 return;
807
808 obj->handle = cluster;
809 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
810 datasize);
811 }
812
a6xx_get_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)813 static void a6xx_get_clusters(struct msm_gpu *gpu,
814 struct a6xx_gpu_state *a6xx_state,
815 struct a6xx_crashdumper *dumper)
816 {
817 int i;
818
819 a6xx_state->clusters = state_kcalloc(a6xx_state,
820 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
821
822 if (!a6xx_state->clusters)
823 return;
824
825 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
826
827 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
828 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
829 &a6xx_state->clusters[i], dumper);
830 }
831
a7xx_get_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)832 static void a7xx_get_clusters(struct msm_gpu *gpu,
833 struct a6xx_gpu_state *a6xx_state,
834 struct a6xx_crashdumper *dumper)
835 {
836 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
837 int i;
838 const struct gen7_cluster_registers *clusters;
839 unsigned clusters_size;
840
841 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
842 clusters = gen7_0_0_clusters;
843 clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
844 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
845 clusters = gen7_2_0_clusters;
846 clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
847 } else {
848 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
849 clusters = gen7_9_0_clusters;
850 clusters_size = ARRAY_SIZE(gen7_9_0_clusters);
851 }
852
853 a6xx_state->clusters = state_kcalloc(a6xx_state,
854 clusters_size, sizeof(*a6xx_state->clusters));
855
856 if (!a6xx_state->clusters)
857 return;
858
859 a6xx_state->nr_clusters = clusters_size;
860
861 for (i = 0; i < clusters_size; i++)
862 a7xx_get_cluster(gpu, a6xx_state, &clusters[i],
863 &a6xx_state->clusters[i], dumper);
864 }
865
866 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
a6xx_get_shader_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_shader_block * block,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)867 static void a6xx_get_shader_block(struct msm_gpu *gpu,
868 struct a6xx_gpu_state *a6xx_state,
869 const struct a6xx_shader_block *block,
870 struct a6xx_gpu_state_obj *obj,
871 struct a6xx_crashdumper *dumper)
872 {
873 u64 *in = dumper->ptr;
874 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
875 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
876 int i;
877
878 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
879 return;
880
881 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
882 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
883 (block->type << 8) | i);
884
885 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
886 block->size, out);
887
888 out += block->size * sizeof(u32);
889 }
890
891 CRASHDUMP_FINI(in);
892
893 if (a6xx_crashdumper_run(gpu, dumper))
894 return;
895
896 obj->handle = block;
897 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
898 datasize);
899 }
900
a7xx_get_shader_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct gen7_shader_block * block,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)901 static void a7xx_get_shader_block(struct msm_gpu *gpu,
902 struct a6xx_gpu_state *a6xx_state,
903 const struct gen7_shader_block *block,
904 struct a6xx_gpu_state_obj *obj,
905 struct a6xx_crashdumper *dumper)
906 {
907 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
908 u64 *in = dumper->ptr;
909 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
910 size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
911 int i, j;
912
913 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
914 return;
915
916 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
917 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3);
918 }
919
920 for (i = 0; i < block->num_sps; i++) {
921 for (j = 0; j < block->num_usptps; j++) {
922 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
923 A7XX_SP_READ_SEL_LOCATION(block->location) |
924 A7XX_SP_READ_SEL_PIPE(block->pipeid) |
925 A7XX_SP_READ_SEL_STATETYPE(block->statetype) |
926 A7XX_SP_READ_SEL_USPTP(j) |
927 A7XX_SP_READ_SEL_SPTP(i));
928
929 in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
930 block->size, out);
931
932 out += block->size * sizeof(u32);
933 }
934 }
935
936 CRASHDUMP_FINI(in);
937
938 if (a6xx_crashdumper_run(gpu, dumper))
939 goto out;
940
941 obj->handle = block;
942 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
943 datasize);
944
945 out:
946 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
947 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0);
948 }
949 }
950
a6xx_get_shaders(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)951 static void a6xx_get_shaders(struct msm_gpu *gpu,
952 struct a6xx_gpu_state *a6xx_state,
953 struct a6xx_crashdumper *dumper)
954 {
955 int i;
956
957 a6xx_state->shaders = state_kcalloc(a6xx_state,
958 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
959
960 if (!a6xx_state->shaders)
961 return;
962
963 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
964
965 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
966 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
967 &a6xx_state->shaders[i], dumper);
968 }
969
a7xx_get_shaders(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)970 static void a7xx_get_shaders(struct msm_gpu *gpu,
971 struct a6xx_gpu_state *a6xx_state,
972 struct a6xx_crashdumper *dumper)
973 {
974 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
975 const struct gen7_shader_block *shader_blocks;
976 unsigned num_shader_blocks;
977 int i;
978
979 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
980 shader_blocks = gen7_0_0_shader_blocks;
981 num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
982 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
983 shader_blocks = gen7_2_0_shader_blocks;
984 num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
985 } else {
986 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
987 shader_blocks = gen7_9_0_shader_blocks;
988 num_shader_blocks = ARRAY_SIZE(gen7_9_0_shader_blocks);
989 }
990
991 a6xx_state->shaders = state_kcalloc(a6xx_state,
992 num_shader_blocks, sizeof(*a6xx_state->shaders));
993
994 if (!a6xx_state->shaders)
995 return;
996
997 a6xx_state->nr_shaders = num_shader_blocks;
998
999 for (i = 0; i < num_shader_blocks; i++)
1000 a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i],
1001 &a6xx_state->shaders[i], dumper);
1002 }
1003
1004 /* Read registers from behind the HLSQ aperture with the crashdumper */
a6xx_get_crashdumper_hlsq_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)1005 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
1006 struct a6xx_gpu_state *a6xx_state,
1007 const struct a6xx_registers *regs,
1008 struct a6xx_gpu_state_obj *obj,
1009 struct a6xx_crashdumper *dumper)
1010
1011 {
1012 u64 *in = dumper->ptr;
1013 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1014 int i, regcount = 0;
1015
1016 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
1017
1018 for (i = 0; i < regs->count; i += 2) {
1019 u32 count = RANGE(regs->registers, i);
1020 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
1021 regs->registers[i] - (regs->val0 >> 2);
1022
1023 in += CRASHDUMP_READ(in, offset, count, out);
1024
1025 out += count * sizeof(u32);
1026 regcount += count;
1027 }
1028
1029 CRASHDUMP_FINI(in);
1030
1031 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1032 return;
1033
1034 if (a6xx_crashdumper_run(gpu, dumper))
1035 return;
1036
1037 obj->handle = regs;
1038 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1039 regcount * sizeof(u32));
1040 }
1041
1042 /* Read a block of registers using the crashdumper */
a6xx_get_crashdumper_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)1043 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1044 struct a6xx_gpu_state *a6xx_state,
1045 const struct a6xx_registers *regs,
1046 struct a6xx_gpu_state_obj *obj,
1047 struct a6xx_crashdumper *dumper)
1048
1049 {
1050 u64 *in = dumper->ptr;
1051 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1052 int i, regcount = 0;
1053
1054 /* Skip unsupported registers on older generations */
1055 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1056 (regs->registers == a660_registers))
1057 return;
1058
1059 /* Some blocks might need to program a selector register first */
1060 if (regs->val0)
1061 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
1062
1063 for (i = 0; i < regs->count; i += 2) {
1064 u32 count = RANGE(regs->registers, i);
1065
1066 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
1067
1068 out += count * sizeof(u32);
1069 regcount += count;
1070 }
1071
1072 CRASHDUMP_FINI(in);
1073
1074 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1075 return;
1076
1077 if (a6xx_crashdumper_run(gpu, dumper))
1078 return;
1079
1080 obj->handle = regs;
1081 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1082 regcount * sizeof(u32));
1083 }
1084
a7xx_get_crashdumper_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct gen7_reg_list * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)1085 static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1086 struct a6xx_gpu_state *a6xx_state,
1087 const struct gen7_reg_list *regs,
1088 struct a6xx_gpu_state_obj *obj,
1089 struct a6xx_crashdumper *dumper)
1090
1091 {
1092 u64 *in = dumper->ptr;
1093 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1094 int i, regcount = 0;
1095
1096 /* Some blocks might need to program a selector register first */
1097 if (regs->sel)
1098 in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val);
1099
1100 for (i = 0; regs->regs[i] != UINT_MAX; i += 2) {
1101 u32 count = RANGE(regs->regs, i);
1102
1103 in += CRASHDUMP_READ(in, regs->regs[i], count, out);
1104
1105 out += count * sizeof(u32);
1106 regcount += count;
1107 }
1108
1109 CRASHDUMP_FINI(in);
1110
1111 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1112 return;
1113
1114 if (a6xx_crashdumper_run(gpu, dumper))
1115 return;
1116
1117 obj->handle = regs->regs;
1118 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1119 regcount * sizeof(u32));
1120 }
1121
1122
1123 /* Read a block of registers via AHB */
a6xx_get_ahb_gpu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj)1124 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1125 struct a6xx_gpu_state *a6xx_state,
1126 const struct a6xx_registers *regs,
1127 struct a6xx_gpu_state_obj *obj)
1128 {
1129 int i, regcount = 0, index = 0;
1130
1131 /* Skip unsupported registers on older generations */
1132 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1133 (regs->registers == a660_registers))
1134 return;
1135
1136 for (i = 0; i < regs->count; i += 2)
1137 regcount += RANGE(regs->registers, i);
1138
1139 obj->handle = (const void *) regs;
1140 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1141 if (!obj->data)
1142 return;
1143
1144 for (i = 0; i < regs->count; i += 2) {
1145 u32 count = RANGE(regs->registers, i);
1146 int j;
1147
1148 for (j = 0; j < count; j++)
1149 obj->data[index++] = gpu_read(gpu,
1150 regs->registers[i] + j);
1151 }
1152 }
1153
a7xx_get_ahb_gpu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const u32 * regs,struct a6xx_gpu_state_obj * obj)1154 static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1155 struct a6xx_gpu_state *a6xx_state,
1156 const u32 *regs,
1157 struct a6xx_gpu_state_obj *obj)
1158 {
1159 int i, regcount = 0, index = 0;
1160
1161 for (i = 0; regs[i] != UINT_MAX; i += 2)
1162 regcount += RANGE(regs, i);
1163
1164 obj->handle = (const void *) regs;
1165 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1166 if (!obj->data)
1167 return;
1168
1169 for (i = 0; regs[i] != UINT_MAX; i += 2) {
1170 u32 count = RANGE(regs, i);
1171 int j;
1172
1173 for (j = 0; j < count; j++)
1174 obj->data[index++] = gpu_read(gpu, regs[i] + j);
1175 }
1176 }
1177
a7xx_get_ahb_gpu_reglist(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct gen7_reg_list * regs,struct a6xx_gpu_state_obj * obj)1178 static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1179 struct a6xx_gpu_state *a6xx_state,
1180 const struct gen7_reg_list *regs,
1181 struct a6xx_gpu_state_obj *obj)
1182 {
1183 if (regs->sel)
1184 gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1185
1186 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj);
1187 }
1188
1189 /* Read a block of GMU registers */
_a6xx_get_gmu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,bool rscc)1190 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1191 struct a6xx_gpu_state *a6xx_state,
1192 const struct a6xx_registers *regs,
1193 struct a6xx_gpu_state_obj *obj,
1194 bool rscc)
1195 {
1196 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1197 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1198 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1199 int i, regcount = 0, index = 0;
1200
1201 for (i = 0; i < regs->count; i += 2)
1202 regcount += RANGE(regs->registers, i);
1203
1204 obj->handle = (const void *) regs;
1205 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1206 if (!obj->data)
1207 return;
1208
1209 for (i = 0; i < regs->count; i += 2) {
1210 u32 count = RANGE(regs->registers, i);
1211 int j;
1212
1213 for (j = 0; j < count; j++) {
1214 u32 offset = regs->registers[i] + j;
1215 u32 val;
1216
1217 if (rscc)
1218 val = gmu_read_rscc(gmu, offset);
1219 else
1220 val = gmu_read(gmu, offset);
1221
1222 obj->data[index++] = val;
1223 }
1224 }
1225 }
1226
a6xx_get_gmu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)1227 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1228 struct a6xx_gpu_state *a6xx_state)
1229 {
1230 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1231 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1232
1233 a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1234 4, sizeof(*a6xx_state->gmu_registers));
1235
1236 if (!a6xx_state->gmu_registers)
1237 return;
1238
1239 a6xx_state->nr_gmu_registers = 4;
1240
1241 /* Get the CX GMU registers from AHB */
1242 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
1243 &a6xx_state->gmu_registers[0], false);
1244 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
1245 &a6xx_state->gmu_registers[1], true);
1246
1247 if (adreno_is_a621(adreno_gpu) || adreno_is_a623(adreno_gpu))
1248 _a6xx_get_gmu_registers(gpu, a6xx_state, &a621_gpucc_reg,
1249 &a6xx_state->gmu_registers[2], false);
1250 else
1251 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gpucc_reg,
1252 &a6xx_state->gmu_registers[2], false);
1253
1254 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1255 return;
1256
1257 /* Set the fence to ALLOW mode so we can access the registers */
1258 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1259
1260 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
1261 &a6xx_state->gmu_registers[3], false);
1262 }
1263
a6xx_snapshot_gmu_bo(struct a6xx_gpu_state * a6xx_state,struct a6xx_gmu_bo * bo)1264 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1265 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
1266 {
1267 struct msm_gpu_state_bo *snapshot;
1268
1269 if (!bo->size)
1270 return NULL;
1271
1272 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
1273 if (!snapshot)
1274 return NULL;
1275
1276 snapshot->iova = bo->iova;
1277 snapshot->size = bo->size;
1278 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
1279 if (!snapshot->data)
1280 return NULL;
1281
1282 memcpy(snapshot->data, bo->virt, bo->size);
1283
1284 return snapshot;
1285 }
1286
a6xx_snapshot_gmu_hfi_history(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)1287 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1288 struct a6xx_gpu_state *a6xx_state)
1289 {
1290 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1291 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1292 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1293 unsigned i, j;
1294
1295 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1296
1297 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
1298 struct a6xx_hfi_queue *queue = &gmu->queues[i];
1299 for (j = 0; j < HFI_HISTORY_SZ; j++) {
1300 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1301 a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1302 }
1303 }
1304 }
1305
1306 #define A6XX_REGLIST_SIZE 1
1307 #define A6XX_GBIF_REGLIST_SIZE 1
a6xx_get_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)1308 static void a6xx_get_registers(struct msm_gpu *gpu,
1309 struct a6xx_gpu_state *a6xx_state,
1310 struct a6xx_crashdumper *dumper)
1311 {
1312 int i, count = A6XX_REGLIST_SIZE +
1313 ARRAY_SIZE(a6xx_reglist) +
1314 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1315 int index = 0;
1316 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1317
1318 a6xx_state->registers = state_kcalloc(a6xx_state,
1319 count, sizeof(*a6xx_state->registers));
1320
1321 if (!a6xx_state->registers)
1322 return;
1323
1324 a6xx_state->nr_registers = count;
1325
1326 a6xx_get_ahb_gpu_registers(gpu,
1327 a6xx_state, &a6xx_ahb_reglist,
1328 &a6xx_state->registers[index++]);
1329
1330 if (a6xx_has_gbif(adreno_gpu))
1331 a6xx_get_ahb_gpu_registers(gpu,
1332 a6xx_state, &a6xx_gbif_reglist,
1333 &a6xx_state->registers[index++]);
1334 else
1335 a6xx_get_ahb_gpu_registers(gpu,
1336 a6xx_state, &a6xx_vbif_reglist,
1337 &a6xx_state->registers[index++]);
1338 if (!dumper) {
1339 /*
1340 * We can't use the crashdumper when the SMMU is stalled,
1341 * because the GPU has no memory access until we resume
1342 * translation (but we don't want to do that until after
1343 * we have captured as much useful GPU state as possible).
1344 * So instead collect registers via the CPU:
1345 */
1346 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1347 a6xx_get_ahb_gpu_registers(gpu,
1348 a6xx_state, &a6xx_reglist[i],
1349 &a6xx_state->registers[index++]);
1350 return;
1351 }
1352
1353 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1354 a6xx_get_crashdumper_registers(gpu,
1355 a6xx_state, &a6xx_reglist[i],
1356 &a6xx_state->registers[index++],
1357 dumper);
1358
1359 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1360 a6xx_get_crashdumper_hlsq_registers(gpu,
1361 a6xx_state, &a6xx_hlsq_reglist[i],
1362 &a6xx_state->registers[index++],
1363 dumper);
1364 }
1365
1366 #define A7XX_PRE_CRASHDUMPER_SIZE 1
1367 #define A7XX_POST_CRASHDUMPER_SIZE 1
a7xx_get_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)1368 static void a7xx_get_registers(struct msm_gpu *gpu,
1369 struct a6xx_gpu_state *a6xx_state,
1370 struct a6xx_crashdumper *dumper)
1371 {
1372 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1373 int i, count;
1374 int index = 0;
1375 const u32 *pre_crashdumper_regs;
1376 const struct gen7_reg_list *reglist;
1377
1378 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
1379 reglist = gen7_0_0_reg_list;
1380 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1381 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
1382 reglist = gen7_2_0_reg_list;
1383 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1384 } else {
1385 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
1386 reglist = gen7_9_0_reg_list;
1387 pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers;
1388 }
1389
1390 count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1391
1392 /* The downstream reglist contains registers in other memory regions
1393 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1394 * offsets and map them to read them on the CPU. For now only read the
1395 * first region which is the main one.
1396 */
1397 if (dumper) {
1398 for (i = 0; reglist[i].regs; i++)
1399 count++;
1400 } else {
1401 count++;
1402 }
1403
1404 a6xx_state->registers = state_kcalloc(a6xx_state,
1405 count, sizeof(*a6xx_state->registers));
1406
1407 if (!a6xx_state->registers)
1408 return;
1409
1410 a6xx_state->nr_registers = count;
1411
1412 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs,
1413 &a6xx_state->registers[index++]);
1414
1415 if (!dumper) {
1416 a7xx_get_ahb_gpu_reglist(gpu,
1417 a6xx_state, ®list[0],
1418 &a6xx_state->registers[index++]);
1419 return;
1420 }
1421
1422 for (i = 0; reglist[i].regs; i++)
1423 a7xx_get_crashdumper_registers(gpu,
1424 a6xx_state, ®list[i],
1425 &a6xx_state->registers[index++],
1426 dumper);
1427 }
1428
a7xx_get_post_crashdumper_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)1429 static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1430 struct a6xx_gpu_state *a6xx_state)
1431 {
1432 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1433 const u32 *regs;
1434
1435 BUG_ON(adreno_gpu->info->family > ADRENO_7XX_GEN3);
1436 regs = gen7_0_0_post_crashdumper_registers;
1437
1438 a7xx_get_ahb_gpu_registers(gpu,
1439 a6xx_state, regs,
1440 &a6xx_state->registers[a6xx_state->nr_registers - 1]);
1441 }
1442
a6xx_get_cp_roq_size(struct msm_gpu * gpu)1443 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1444 {
1445 /* The value at [16:31] is in 4dword units. Convert it to dwords */
1446 return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
1447 }
1448
a7xx_get_cp_roq_size(struct msm_gpu * gpu)1449 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1450 {
1451 /*
1452 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1453 * That register however is not directly accessible from APSS on A7xx.
1454 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1455 */
1456 gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
1457
1458 return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20);
1459 }
1460
1461 /* Read a block of data from an indexed register pair */
a6xx_get_indexed_regs(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_indexed_registers * indexed,struct a6xx_gpu_state_obj * obj)1462 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1463 struct a6xx_gpu_state *a6xx_state,
1464 const struct a6xx_indexed_registers *indexed,
1465 struct a6xx_gpu_state_obj *obj)
1466 {
1467 u32 count = indexed->count;
1468 int i;
1469
1470 obj->handle = (const void *) indexed;
1471 if (indexed->count_fn)
1472 count = indexed->count_fn(gpu);
1473
1474 obj->data = state_kcalloc(a6xx_state, count, sizeof(u32));
1475 obj->count = count;
1476 if (!obj->data)
1477 return;
1478
1479 /* All the indexed banks start at address 0 */
1480 gpu_write(gpu, indexed->addr, 0);
1481
1482 /* Read the data - each read increments the internal address by 1 */
1483 for (i = 0; i < count; i++)
1484 obj->data[i] = gpu_read(gpu, indexed->data);
1485 }
1486
a6xx_get_indexed_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)1487 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1488 struct a6xx_gpu_state *a6xx_state)
1489 {
1490 u32 mempool_size;
1491 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
1492 int i;
1493
1494 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
1495 sizeof(*a6xx_state->indexed_regs));
1496 if (!a6xx_state->indexed_regs)
1497 return;
1498
1499 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1500 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
1501 &a6xx_state->indexed_regs[i]);
1502
1503 if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1504 u32 val;
1505
1506 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1507 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
1508
1509 /* Get the contents of the CP mempool */
1510 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1511 &a6xx_state->indexed_regs[i]);
1512
1513 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1514 a6xx_state->nr_indexed_regs = count;
1515 return;
1516 }
1517
1518 /* Set the CP mempool size to 0 to stabilize it while dumping */
1519 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1520 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1521
1522 /* Get the contents of the CP mempool */
1523 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1524 &a6xx_state->indexed_regs[i]);
1525
1526 /*
1527 * Offset 0x2000 in the mempool is the size - copy the saved size over
1528 * so the data is consistent
1529 */
1530 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1531
1532 /* Restore the size in the hardware */
1533 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1534
1535 a6xx_state->nr_indexed_regs = count;
1536 }
1537
a7xx_get_indexed_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)1538 static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1539 struct a6xx_gpu_state *a6xx_state)
1540 {
1541 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1542 const struct a6xx_indexed_registers *indexed_regs;
1543 int i, indexed_count, mempool_count;
1544
1545 if (adreno_gpu->info->family <= ADRENO_7XX_GEN2) {
1546 indexed_regs = a7xx_indexed_reglist;
1547 indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1548 } else {
1549 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
1550 indexed_regs = gen7_9_0_cp_indexed_reg_list;
1551 indexed_count = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list);
1552 }
1553
1554 mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1555
1556 a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1557 indexed_count + mempool_count,
1558 sizeof(*a6xx_state->indexed_regs));
1559 if (!a6xx_state->indexed_regs)
1560 return;
1561
1562 a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1563
1564 /* First read the common regs */
1565 for (i = 0; i < indexed_count; i++)
1566 a6xx_get_indexed_regs(gpu, a6xx_state, &indexed_regs[i],
1567 &a6xx_state->indexed_regs[i]);
1568
1569 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2));
1570 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2));
1571
1572 /* Get the contents of the CP_BV mempool */
1573 for (i = 0; i < mempool_count; i++)
1574 a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i],
1575 &a6xx_state->indexed_regs[indexed_count + i]);
1576
1577 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0);
1578 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0);
1579 return;
1580 }
1581
a6xx_gpu_state_get(struct msm_gpu * gpu)1582 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1583 {
1584 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1585 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1586 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1587 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1588 GFP_KERNEL);
1589 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1590 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1591
1592 if (!a6xx_state)
1593 return ERR_PTR(-ENOMEM);
1594
1595 INIT_LIST_HEAD(&a6xx_state->objs);
1596
1597 /* Get the generic state from the adreno core */
1598 adreno_gpu_state_get(gpu, &a6xx_state->base);
1599
1600 if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1601 a6xx_get_gmu_registers(gpu, a6xx_state);
1602
1603 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1604 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1605 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1606
1607 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1608 }
1609
1610 /* If GX isn't on the rest of the data isn't going to be accessible */
1611 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1612 return &a6xx_state->base;
1613
1614 /* Get the banks of indexed registers */
1615 if (adreno_is_a7xx(adreno_gpu))
1616 a7xx_get_indexed_registers(gpu, a6xx_state);
1617 else
1618 a6xx_get_indexed_registers(gpu, a6xx_state);
1619
1620 /*
1621 * Try to initialize the crashdumper, if we are not dumping state
1622 * with the SMMU stalled. The crashdumper needs memory access to
1623 * write out GPU state, so we need to skip this when the SMMU is
1624 * stalled in response to an iova fault
1625 */
1626 if (!stalled && !gpu->needs_hw_init &&
1627 !a6xx_crashdumper_init(gpu, &_dumper)) {
1628 dumper = &_dumper;
1629 }
1630
1631 if (adreno_is_a7xx(adreno_gpu)) {
1632 a7xx_get_registers(gpu, a6xx_state, dumper);
1633
1634 if (dumper) {
1635 a7xx_get_shaders(gpu, a6xx_state, dumper);
1636 a7xx_get_clusters(gpu, a6xx_state, dumper);
1637 a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1638
1639 msm_gem_kernel_put(dumper->bo, gpu->vm);
1640 }
1641
1642 a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1643 } else {
1644 a6xx_get_registers(gpu, a6xx_state, dumper);
1645
1646 if (dumper) {
1647 a6xx_get_shaders(gpu, a6xx_state, dumper);
1648 a6xx_get_clusters(gpu, a6xx_state, dumper);
1649 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1650
1651 msm_gem_kernel_put(dumper->bo, gpu->vm);
1652 }
1653 }
1654
1655 if (snapshot_debugbus)
1656 a6xx_get_debugbus(gpu, a6xx_state);
1657
1658 a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1659
1660 return &a6xx_state->base;
1661 }
1662
a6xx_gpu_state_destroy(struct kref * kref)1663 static void a6xx_gpu_state_destroy(struct kref *kref)
1664 {
1665 struct a6xx_state_memobj *obj, *tmp;
1666 struct msm_gpu_state *state = container_of(kref,
1667 struct msm_gpu_state, ref);
1668 struct a6xx_gpu_state *a6xx_state = container_of(state,
1669 struct a6xx_gpu_state, base);
1670
1671 if (a6xx_state->gmu_log)
1672 kvfree(a6xx_state->gmu_log->data);
1673
1674 if (a6xx_state->gmu_hfi)
1675 kvfree(a6xx_state->gmu_hfi->data);
1676
1677 if (a6xx_state->gmu_debug)
1678 kvfree(a6xx_state->gmu_debug->data);
1679
1680 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1681 list_del(&obj->node);
1682 kvfree(obj);
1683 }
1684
1685 adreno_gpu_state_destroy(state);
1686 kfree(a6xx_state);
1687 }
1688
a6xx_gpu_state_put(struct msm_gpu_state * state)1689 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1690 {
1691 if (IS_ERR_OR_NULL(state))
1692 return 1;
1693
1694 return kref_put(&state->ref, a6xx_gpu_state_destroy);
1695 }
1696
a6xx_show_registers(const u32 * registers,u32 * data,size_t count,struct drm_printer * p)1697 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1698 struct drm_printer *p)
1699 {
1700 int i, index = 0;
1701
1702 if (!data)
1703 return;
1704
1705 for (i = 0; i < count; i += 2) {
1706 u32 count = RANGE(registers, i);
1707 u32 offset = registers[i];
1708 int j;
1709
1710 for (j = 0; j < count; index++, offset++, j++) {
1711 if (data[index] == 0xdeafbead)
1712 continue;
1713
1714 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1715 offset << 2, data[index]);
1716 }
1717 }
1718 }
1719
a7xx_show_registers_indented(const u32 * registers,u32 * data,struct drm_printer * p,unsigned indent)1720 static void a7xx_show_registers_indented(const u32 *registers, u32 *data,
1721 struct drm_printer *p, unsigned indent)
1722 {
1723 int i, index = 0;
1724
1725 for (i = 0; registers[i] != UINT_MAX; i += 2) {
1726 u32 count = RANGE(registers, i);
1727 u32 offset = registers[i];
1728 int j;
1729
1730 for (j = 0; j < count; index++, offset++, j++) {
1731 int k;
1732
1733 if (data[index] == 0xdeafbead)
1734 continue;
1735
1736 for (k = 0; k < indent; k++)
1737 drm_printf(p, " ");
1738 drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1739 offset << 2, data[index]);
1740 }
1741 }
1742 }
1743
a7xx_show_registers(const u32 * registers,u32 * data,struct drm_printer * p)1744 static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p)
1745 {
1746 a7xx_show_registers_indented(registers, data, p, 1);
1747 }
1748
print_ascii85(struct drm_printer * p,size_t len,u32 * data)1749 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1750 {
1751 char out[ASCII85_BUFSZ];
1752 long i, l, datalen = 0;
1753
1754 for (i = 0; i < len >> 2; i++) {
1755 if (data[i])
1756 datalen = (i + 1) << 2;
1757 }
1758
1759 if (datalen == 0)
1760 return;
1761
1762 drm_puts(p, " data: !!ascii85 |\n");
1763 drm_puts(p, " ");
1764
1765
1766 l = ascii85_encode_len(datalen);
1767
1768 for (i = 0; i < l; i++)
1769 drm_puts(p, ascii85_encode(data[i], out));
1770
1771 drm_puts(p, "\n");
1772 }
1773
print_name(struct drm_printer * p,const char * fmt,const char * name)1774 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1775 {
1776 drm_puts(p, fmt);
1777 drm_puts(p, name);
1778 drm_puts(p, "\n");
1779 }
1780
a6xx_show_shader(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1781 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1782 struct drm_printer *p)
1783 {
1784 const struct a6xx_shader_block *block = obj->handle;
1785 int i;
1786
1787 if (!obj->handle)
1788 return;
1789
1790 print_name(p, " - type: ", block->name);
1791
1792 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1793 drm_printf(p, " - bank: %d\n", i);
1794 drm_printf(p, " size: %d\n", block->size);
1795
1796 if (!obj->data)
1797 continue;
1798
1799 print_ascii85(p, block->size << 2,
1800 obj->data + (block->size * i));
1801 }
1802 }
1803
a7xx_show_shader(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1804 static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1805 struct drm_printer *p)
1806 {
1807 const struct gen7_shader_block *block = obj->handle;
1808 int i, j;
1809 u32 *data = obj->data;
1810
1811 if (!obj->handle)
1812 return;
1813
1814 print_name(p, " - type: ", a7xx_statetype_names[block->statetype]);
1815 print_name(p, " - pipe: ", a7xx_pipe_names[block->pipeid]);
1816 drm_printf(p, " - location: %d\n", block->location);
1817
1818 for (i = 0; i < block->num_sps; i++) {
1819 drm_printf(p, " - sp: %d\n", i);
1820
1821 for (j = 0; j < block->num_usptps; j++) {
1822 drm_printf(p, " - usptp: %d\n", j);
1823 drm_printf(p, " size: %d\n", block->size);
1824
1825 if (!obj->data)
1826 continue;
1827
1828 print_ascii85(p, block->size << 2, data);
1829
1830 data += block->size;
1831 }
1832 }
1833 }
1834
a6xx_show_cluster_data(const u32 * registers,int size,u32 * data,struct drm_printer * p)1835 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1836 struct drm_printer *p)
1837 {
1838 int ctx, index = 0;
1839
1840 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1841 int j;
1842
1843 drm_printf(p, " - context: %d\n", ctx);
1844
1845 for (j = 0; j < size; j += 2) {
1846 u32 count = RANGE(registers, j);
1847 u32 offset = registers[j];
1848 int k;
1849
1850 for (k = 0; k < count; index++, offset++, k++) {
1851 if (data[index] == 0xdeafbead)
1852 continue;
1853
1854 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1855 offset << 2, data[index]);
1856 }
1857 }
1858 }
1859 }
1860
a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1861 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1862 struct drm_printer *p)
1863 {
1864 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1865
1866 if (dbgahb) {
1867 print_name(p, " - cluster-name: ", dbgahb->name);
1868 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1869 obj->data, p);
1870 }
1871 }
1872
a6xx_show_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1873 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1874 struct drm_printer *p)
1875 {
1876 const struct a6xx_cluster *cluster = obj->handle;
1877
1878 if (cluster) {
1879 print_name(p, " - cluster-name: ", cluster->name);
1880 a6xx_show_cluster_data(cluster->registers, cluster->count,
1881 obj->data, p);
1882 }
1883 }
1884
a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1885 static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1886 struct drm_printer *p)
1887 {
1888 const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1889
1890 if (dbgahb) {
1891 print_name(p, " - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]);
1892 print_name(p, " - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]);
1893 drm_printf(p, " - context: %d\n", dbgahb->context_id);
1894 drm_printf(p, " - location: %d\n", dbgahb->location_id);
1895 a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4);
1896 }
1897 }
1898
a7xx_show_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1899 static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1900 struct drm_printer *p)
1901 {
1902 const struct gen7_cluster_registers *cluster = obj->handle;
1903
1904 if (cluster) {
1905 int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
1906
1907 print_name(p, " - pipe: ", a7xx_pipe_names[cluster->pipe_id]);
1908 print_name(p, " - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]);
1909 drm_printf(p, " - context: %d\n", context);
1910 a7xx_show_registers_indented(cluster->regs, obj->data, p, 4);
1911 }
1912 }
1913
a6xx_show_indexed_regs(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1914 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1915 struct drm_printer *p)
1916 {
1917 const struct a6xx_indexed_registers *indexed = obj->handle;
1918
1919 if (!indexed)
1920 return;
1921
1922 print_name(p, " - regs-name: ", indexed->name);
1923 drm_printf(p, " dwords: %d\n", obj->count);
1924
1925 print_ascii85(p, obj->count << 2, obj->data);
1926 }
1927
a6xx_show_debugbus_block(const struct a6xx_debugbus_block * block,u32 * data,struct drm_printer * p)1928 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1929 u32 *data, struct drm_printer *p)
1930 {
1931 if (block) {
1932 print_name(p, " - debugbus-block: ", block->name);
1933
1934 /*
1935 * count for regular debugbus data is in quadwords,
1936 * but print the size in dwords for consistency
1937 */
1938 drm_printf(p, " count: %d\n", block->count << 1);
1939
1940 print_ascii85(p, block->count << 3, data);
1941 }
1942 }
1943
a6xx_show_debugbus(struct a6xx_gpu_state * a6xx_state,struct drm_printer * p)1944 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1945 struct drm_printer *p)
1946 {
1947 int i;
1948
1949 for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1950 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1951
1952 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1953 }
1954
1955 if (a6xx_state->vbif_debugbus) {
1956 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1957
1958 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n");
1959 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1960
1961 /* vbif debugbus data is in dwords. Confusing, huh? */
1962 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1963 }
1964
1965 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1966 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1967
1968 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1969 }
1970 }
1971
a6xx_show(struct msm_gpu * gpu,struct msm_gpu_state * state,struct drm_printer * p)1972 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1973 struct drm_printer *p)
1974 {
1975 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1976 struct a6xx_gpu_state *a6xx_state = container_of(state,
1977 struct a6xx_gpu_state, base);
1978 int i;
1979
1980 if (IS_ERR_OR_NULL(state))
1981 return;
1982
1983 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1984
1985 adreno_show(gpu, state, p);
1986
1987 drm_puts(p, "gmu-log:\n");
1988 if (a6xx_state->gmu_log) {
1989 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1990
1991 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova);
1992 drm_printf(p, " size: %zu\n", gmu_log->size);
1993 adreno_show_object(p, &gmu_log->data, gmu_log->size,
1994 &gmu_log->encoded);
1995 }
1996
1997 drm_puts(p, "gmu-hfi:\n");
1998 if (a6xx_state->gmu_hfi) {
1999 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
2000 unsigned i, j;
2001
2002 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova);
2003 drm_printf(p, " size: %zu\n", gmu_hfi->size);
2004 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
2005 drm_printf(p, " queue-history[%u]:", i);
2006 for (j = 0; j < HFI_HISTORY_SZ; j++) {
2007 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
2008 }
2009 drm_printf(p, "\n");
2010 }
2011 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
2012 &gmu_hfi->encoded);
2013 }
2014
2015 drm_puts(p, "gmu-debug:\n");
2016 if (a6xx_state->gmu_debug) {
2017 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
2018
2019 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova);
2020 drm_printf(p, " size: %zu\n", gmu_debug->size);
2021 adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
2022 &gmu_debug->encoded);
2023 }
2024
2025 drm_puts(p, "registers:\n");
2026 for (i = 0; i < a6xx_state->nr_registers; i++) {
2027 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
2028
2029 if (!obj->handle)
2030 continue;
2031
2032 if (adreno_is_a7xx(adreno_gpu)) {
2033 a7xx_show_registers(obj->handle, obj->data, p);
2034 } else {
2035 const struct a6xx_registers *regs = obj->handle;
2036
2037 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2038 }
2039 }
2040
2041 drm_puts(p, "registers-gmu:\n");
2042 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
2043 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
2044 const struct a6xx_registers *regs = obj->handle;
2045
2046 if (!obj->handle)
2047 continue;
2048
2049 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2050 }
2051
2052 drm_puts(p, "indexed-registers:\n");
2053 for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
2054 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
2055
2056 drm_puts(p, "shader-blocks:\n");
2057 for (i = 0; i < a6xx_state->nr_shaders; i++) {
2058 if (adreno_is_a7xx(adreno_gpu))
2059 a7xx_show_shader(&a6xx_state->shaders[i], p);
2060 else
2061 a6xx_show_shader(&a6xx_state->shaders[i], p);
2062 }
2063
2064 drm_puts(p, "clusters:\n");
2065 for (i = 0; i < a6xx_state->nr_clusters; i++) {
2066 if (adreno_is_a7xx(adreno_gpu))
2067 a7xx_show_cluster(&a6xx_state->clusters[i], p);
2068 else
2069 a6xx_show_cluster(&a6xx_state->clusters[i], p);
2070 }
2071
2072 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) {
2073 if (adreno_is_a7xx(adreno_gpu))
2074 a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2075 else
2076 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2077 }
2078
2079 drm_puts(p, "debugbus:\n");
2080 a6xx_show_debugbus(a6xx_state, p);
2081 }
2082