1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ 3 4 #include <linux/ascii85.h> 5 #include "msm_gem.h" 6 #include "a6xx_gpu.h" 7 #include "a6xx_gmu.h" 8 #include "a6xx_gpu_state.h" 9 #include "a6xx_gmu.xml.h" 10 11 static const unsigned int *gen7_0_0_external_core_regs[] __always_unused; 12 static const unsigned int *gen7_2_0_external_core_regs[] __always_unused; 13 static const unsigned int *gen7_9_0_external_core_regs[] __always_unused; 14 static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused; 15 static const u32 gen7_9_0_cx_debugbus_blocks[] __always_unused; 16 17 #include "adreno_gen7_0_0_snapshot.h" 18 #include "adreno_gen7_2_0_snapshot.h" 19 #include "adreno_gen7_9_0_snapshot.h" 20 21 struct a6xx_gpu_state_obj { 22 const void *handle; 23 u32 *data; 24 u32 count; /* optional, used when count potentially read from hw */ 25 }; 26 27 struct a6xx_gpu_state { 28 struct msm_gpu_state base; 29 30 struct a6xx_gpu_state_obj *gmu_registers; 31 int nr_gmu_registers; 32 33 struct a6xx_gpu_state_obj *registers; 34 int nr_registers; 35 36 struct a6xx_gpu_state_obj *shaders; 37 int nr_shaders; 38 39 struct a6xx_gpu_state_obj *clusters; 40 int nr_clusters; 41 42 struct a6xx_gpu_state_obj *dbgahb_clusters; 43 int nr_dbgahb_clusters; 44 45 struct a6xx_gpu_state_obj *indexed_regs; 46 int nr_indexed_regs; 47 48 struct a6xx_gpu_state_obj *debugbus; 49 int nr_debugbus; 50 51 struct a6xx_gpu_state_obj *vbif_debugbus; 52 53 struct a6xx_gpu_state_obj *cx_debugbus; 54 int nr_cx_debugbus; 55 56 struct msm_gpu_state_bo *gmu_log; 57 struct msm_gpu_state_bo *gmu_hfi; 58 struct msm_gpu_state_bo *gmu_debug; 59 60 s32 hfi_queue_history[2][HFI_HISTORY_SZ]; 61 62 struct list_head objs; 63 64 bool gpu_initialized; 65 }; 66 67 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) 68 { 69 in[0] = val; 70 in[1] = (((u64) reg) << 44 | (1 << 21) | 1); 71 72 return 2; 73 } 74 75 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) 76 { 77 in[0] = target; 78 in[1] = (((u64) reg) << 44 | dwords); 79 80 return 2; 81 } 82 83 static inline int CRASHDUMP_FINI(u64 *in) 84 { 85 in[0] = 0; 86 in[1] = 0; 87 88 return 2; 89 } 90 91 struct a6xx_crashdumper { 92 void *ptr; 93 struct drm_gem_object *bo; 94 u64 iova; 95 }; 96 97 struct a6xx_state_memobj { 98 struct list_head node; 99 unsigned long long data[]; 100 }; 101 102 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize) 103 { 104 struct a6xx_state_memobj *obj = 105 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); 106 107 if (!obj) 108 return NULL; 109 110 list_add_tail(&obj->node, &a6xx_state->objs); 111 return &obj->data; 112 } 113 114 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, 115 size_t size) 116 { 117 void *dst = state_kcalloc(a6xx_state, 1, size); 118 119 if (dst) 120 memcpy(dst, src, size); 121 return dst; 122 } 123 124 /* 125 * Allocate 1MB for the crashdumper scratch region - 8k for the script and 126 * the rest for the data 127 */ 128 #define A6XX_CD_DATA_OFFSET 8192 129 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192) 130 131 static int a6xx_crashdumper_init(struct msm_gpu *gpu, 132 struct a6xx_crashdumper *dumper) 133 { 134 dumper->ptr = msm_gem_kernel_new(gpu->dev, 135 SZ_1M, MSM_BO_WC, gpu->aspace, 136 &dumper->bo, &dumper->iova); 137 138 if (!IS_ERR(dumper->ptr)) 139 msm_gem_object_set_name(dumper->bo, "crashdump"); 140 141 return PTR_ERR_OR_ZERO(dumper->ptr); 142 } 143 144 static int a6xx_crashdumper_run(struct msm_gpu *gpu, 145 struct a6xx_crashdumper *dumper) 146 { 147 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 148 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 149 u32 val; 150 int ret; 151 152 if (IS_ERR_OR_NULL(dumper->ptr)) 153 return -EINVAL; 154 155 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) 156 return -EINVAL; 157 158 /* Make sure all pending memory writes are posted */ 159 wmb(); 160 161 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova); 162 163 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); 164 165 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, 166 val & 0x02, 100, 10000); 167 168 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); 169 170 return ret; 171 } 172 173 /* read a value from the GX debug bus */ 174 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, 175 u32 *data) 176 { 177 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | 178 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); 179 180 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); 181 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); 182 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); 183 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); 184 185 /* Wait 1 us to make sure the data is flowing */ 186 udelay(1); 187 188 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); 189 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); 190 191 return 2; 192 } 193 194 #define cxdbg_write(ptr, offset, val) \ 195 writel((val), (ptr) + ((offset) << 2)) 196 197 #define cxdbg_read(ptr, offset) \ 198 readl((ptr) + ((offset) << 2)) 199 200 /* read a value from the CX debug bus */ 201 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset, 202 u32 *data) 203 { 204 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | 205 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); 206 207 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); 208 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); 209 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); 210 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); 211 212 /* Wait 1 us to make sure the data is flowing */ 213 udelay(1); 214 215 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); 216 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); 217 218 return 2; 219 } 220 221 /* Read a chunk of data from the VBIF debug bus */ 222 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, 223 u32 reg, int count, u32 *data) 224 { 225 int i; 226 227 gpu_write(gpu, ctrl0, reg); 228 229 for (i = 0; i < count; i++) { 230 gpu_write(gpu, ctrl1, i); 231 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); 232 } 233 234 return count; 235 } 236 237 #define AXI_ARB_BLOCKS 2 238 #define XIN_AXI_BLOCKS 5 239 #define XIN_CORE_BLOCKS 4 240 241 #define VBIF_DEBUGBUS_BLOCK_SIZE \ 242 ((16 * AXI_ARB_BLOCKS) + \ 243 (18 * XIN_AXI_BLOCKS) + \ 244 (12 * XIN_CORE_BLOCKS)) 245 246 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, 247 struct a6xx_gpu_state *a6xx_state, 248 struct a6xx_gpu_state_obj *obj) 249 { 250 u32 clk, *ptr; 251 int i; 252 253 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE, 254 sizeof(u32)); 255 if (!obj->data) 256 return; 257 258 obj->handle = NULL; 259 260 /* Get the current clock setting */ 261 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); 262 263 /* Force on the bus so we can read it */ 264 gpu_write(gpu, REG_A6XX_VBIF_CLKON, 265 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); 266 267 /* We will read from BUS2 first, so disable BUS1 */ 268 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); 269 270 /* Enable the VBIF bus for reading */ 271 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); 272 273 ptr = obj->data; 274 275 for (i = 0; i < AXI_ARB_BLOCKS; i++) 276 ptr += vbif_debugbus_read(gpu, 277 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 278 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 279 1 << (i + 16), 16, ptr); 280 281 for (i = 0; i < XIN_AXI_BLOCKS; i++) 282 ptr += vbif_debugbus_read(gpu, 283 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 284 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 285 1 << i, 18, ptr); 286 287 /* Stop BUS2 so we can turn on BUS1 */ 288 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); 289 290 for (i = 0; i < XIN_CORE_BLOCKS; i++) 291 ptr += vbif_debugbus_read(gpu, 292 REG_A6XX_VBIF_TEST_BUS1_CTRL0, 293 REG_A6XX_VBIF_TEST_BUS1_CTRL1, 294 1 << i, 12, ptr); 295 296 /* Restore the VBIF clock setting */ 297 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); 298 } 299 300 static void a6xx_get_debugbus_block(struct msm_gpu *gpu, 301 struct a6xx_gpu_state *a6xx_state, 302 const struct a6xx_debugbus_block *block, 303 struct a6xx_gpu_state_obj *obj) 304 { 305 int i; 306 u32 *ptr; 307 308 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 309 if (!obj->data) 310 return; 311 312 obj->handle = block; 313 314 for (ptr = obj->data, i = 0; i < block->count; i++) 315 ptr += debugbus_read(gpu, block->id, i, ptr); 316 } 317 318 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, 319 struct a6xx_gpu_state *a6xx_state, 320 const struct a6xx_debugbus_block *block, 321 struct a6xx_gpu_state_obj *obj) 322 { 323 int i; 324 u32 *ptr; 325 326 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 327 if (!obj->data) 328 return; 329 330 obj->handle = block; 331 332 for (ptr = obj->data, i = 0; i < block->count; i++) 333 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); 334 } 335 336 static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu, 337 struct a6xx_gpu_state *a6xx_state) 338 { 339 int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) + 340 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0); 341 342 if (adreno_is_a650_family(to_adreno_gpu(gpu))) 343 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks); 344 345 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks, 346 sizeof(*a6xx_state->debugbus)); 347 348 if (a6xx_state->debugbus) { 349 int i; 350 351 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) 352 a6xx_get_debugbus_block(gpu, 353 a6xx_state, 354 &a6xx_debugbus_blocks[i], 355 &a6xx_state->debugbus[i]); 356 357 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); 358 359 /* 360 * GBIF has same debugbus as of other GPU blocks, fall back to 361 * default path if GPU uses GBIF, also GBIF uses exactly same 362 * ID as of VBIF. 363 */ 364 if (a6xx_has_gbif(to_adreno_gpu(gpu))) { 365 a6xx_get_debugbus_block(gpu, a6xx_state, 366 &a6xx_gbif_debugbus_block, 367 &a6xx_state->debugbus[i]); 368 369 a6xx_state->nr_debugbus += 1; 370 } 371 372 373 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 374 for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++) 375 a6xx_get_debugbus_block(gpu, 376 a6xx_state, 377 &a650_debugbus_blocks[i], 378 &a6xx_state->debugbus[i]); 379 } 380 } 381 } 382 383 static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu, 384 struct a6xx_gpu_state *a6xx_state) 385 { 386 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 387 int debugbus_blocks_count, gbif_debugbus_blocks_count, total_debugbus_blocks; 388 const u32 *debugbus_blocks, *gbif_debugbus_blocks; 389 int i; 390 391 if (adreno_is_a730(adreno_gpu)) { 392 debugbus_blocks = gen7_0_0_debugbus_blocks; 393 debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks); 394 gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks; 395 gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks); 396 } else if (adreno_is_a740_family(adreno_gpu)) { 397 debugbus_blocks = gen7_2_0_debugbus_blocks; 398 debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks); 399 gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks; 400 gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks); 401 } else { 402 BUG_ON(!adreno_is_a750(adreno_gpu)); 403 debugbus_blocks = gen7_9_0_debugbus_blocks; 404 debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_debugbus_blocks); 405 gbif_debugbus_blocks = gen7_9_0_gbif_debugbus_blocks; 406 gbif_debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_gbif_debugbus_blocks); 407 } 408 409 total_debugbus_blocks = debugbus_blocks_count + gbif_debugbus_blocks_count; 410 411 a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks, 412 sizeof(*a6xx_state->debugbus)); 413 414 if (a6xx_state->debugbus) { 415 for (i = 0; i < debugbus_blocks_count; i++) { 416 a6xx_get_debugbus_block(gpu, 417 a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]], 418 &a6xx_state->debugbus[i]); 419 } 420 421 for (i = 0; i < gbif_debugbus_blocks_count; i++) { 422 a6xx_get_debugbus_block(gpu, 423 a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]], 424 &a6xx_state->debugbus[i + debugbus_blocks_count]); 425 } 426 } 427 428 } 429 430 static void a6xx_get_debugbus(struct msm_gpu *gpu, 431 struct a6xx_gpu_state *a6xx_state) 432 { 433 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 434 struct resource *res; 435 void __iomem *cxdbg = NULL; 436 437 /* Set up the GX debug bus */ 438 439 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, 440 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 441 442 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, 443 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 444 445 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); 446 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); 447 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); 448 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); 449 450 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); 451 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); 452 453 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); 454 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); 455 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); 456 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); 457 458 /* Set up the CX debug bus - it lives elsewhere in the system so do a 459 * temporary ioremap for the registers 460 */ 461 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, 462 "cx_dbgc"); 463 464 if (res) 465 cxdbg = ioremap(res->start, resource_size(res)); 466 467 if (cxdbg) { 468 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, 469 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 470 471 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, 472 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 473 474 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); 475 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); 476 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); 477 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); 478 479 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, 480 0x76543210); 481 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, 482 0xFEDCBA98); 483 484 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); 485 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); 486 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); 487 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); 488 } 489 490 if (adreno_is_a7xx(adreno_gpu)) { 491 a7xx_get_debugbus_blocks(gpu, a6xx_state); 492 } else { 493 a6xx_get_debugbus_blocks(gpu, a6xx_state); 494 } 495 496 /* Dump the VBIF debugbus on applicable targets */ 497 if (!a6xx_has_gbif(adreno_gpu)) { 498 a6xx_state->vbif_debugbus = 499 state_kcalloc(a6xx_state, 1, 500 sizeof(*a6xx_state->vbif_debugbus)); 501 502 if (a6xx_state->vbif_debugbus) 503 a6xx_get_vbif_debugbus_block(gpu, a6xx_state, 504 a6xx_state->vbif_debugbus); 505 } 506 507 if (cxdbg) { 508 unsigned nr_cx_debugbus_blocks; 509 const struct a6xx_debugbus_block *cx_debugbus_blocks; 510 511 if (adreno_is_a7xx(adreno_gpu)) { 512 BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu))); 513 cx_debugbus_blocks = a7xx_cx_debugbus_blocks; 514 nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks); 515 } else { 516 cx_debugbus_blocks = a6xx_cx_debugbus_blocks; 517 nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks); 518 } 519 520 a6xx_state->cx_debugbus = 521 state_kcalloc(a6xx_state, 522 nr_cx_debugbus_blocks, 523 sizeof(*a6xx_state->cx_debugbus)); 524 525 if (a6xx_state->cx_debugbus) { 526 int i; 527 528 for (i = 0; i < nr_cx_debugbus_blocks; i++) 529 a6xx_get_cx_debugbus_block(cxdbg, 530 a6xx_state, 531 &cx_debugbus_blocks[i], 532 &a6xx_state->cx_debugbus[i]); 533 534 a6xx_state->nr_cx_debugbus = 535 nr_cx_debugbus_blocks; 536 } 537 538 iounmap(cxdbg); 539 } 540 } 541 542 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) 543 544 /* Read a data cluster from behind the AHB aperture */ 545 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, 546 struct a6xx_gpu_state *a6xx_state, 547 const struct a6xx_dbgahb_cluster *dbgahb, 548 struct a6xx_gpu_state_obj *obj, 549 struct a6xx_crashdumper *dumper) 550 { 551 u64 *in = dumper->ptr; 552 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 553 size_t datasize; 554 int i, regcount = 0; 555 556 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 557 int j; 558 559 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 560 (dbgahb->statetype + i * 2) << 8); 561 562 for (j = 0; j < dbgahb->count; j += 2) { 563 int count = RANGE(dbgahb->registers, j); 564 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 565 dbgahb->registers[j] - (dbgahb->base >> 2); 566 567 in += CRASHDUMP_READ(in, offset, count, out); 568 569 out += count * sizeof(u32); 570 571 if (i == 0) 572 regcount += count; 573 } 574 } 575 576 CRASHDUMP_FINI(in); 577 578 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 579 580 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 581 return; 582 583 if (a6xx_crashdumper_run(gpu, dumper)) 584 return; 585 586 obj->handle = dbgahb; 587 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 588 datasize); 589 } 590 591 static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu, 592 struct a6xx_gpu_state *a6xx_state, 593 const struct gen7_sptp_cluster_registers *dbgahb, 594 struct a6xx_gpu_state_obj *obj, 595 struct a6xx_crashdumper *dumper) 596 { 597 u64 *in = dumper->ptr; 598 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 599 size_t datasize; 600 int i, regcount = 0; 601 602 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL, 603 A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) | 604 A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) | 605 A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype)); 606 607 for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) { 608 int count = RANGE(dbgahb->regs, i); 609 u32 offset = REG_A7XX_SP_AHB_READ_APERTURE + 610 dbgahb->regs[i] - dbgahb->regbase; 611 612 in += CRASHDUMP_READ(in, offset, count, out); 613 614 out += count * sizeof(u32); 615 regcount += count; 616 } 617 618 CRASHDUMP_FINI(in); 619 620 datasize = regcount * sizeof(u32); 621 622 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 623 return; 624 625 if (a6xx_crashdumper_run(gpu, dumper)) 626 return; 627 628 obj->handle = dbgahb; 629 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 630 datasize); 631 } 632 633 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, 634 struct a6xx_gpu_state *a6xx_state, 635 struct a6xx_crashdumper *dumper) 636 { 637 int i; 638 639 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 640 ARRAY_SIZE(a6xx_dbgahb_clusters), 641 sizeof(*a6xx_state->dbgahb_clusters)); 642 643 if (!a6xx_state->dbgahb_clusters) 644 return; 645 646 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); 647 648 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) 649 a6xx_get_dbgahb_cluster(gpu, a6xx_state, 650 &a6xx_dbgahb_clusters[i], 651 &a6xx_state->dbgahb_clusters[i], dumper); 652 } 653 654 static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu, 655 struct a6xx_gpu_state *a6xx_state, 656 struct a6xx_crashdumper *dumper) 657 { 658 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 659 int i; 660 const struct gen7_sptp_cluster_registers *dbgahb_clusters; 661 unsigned dbgahb_clusters_size; 662 663 if (adreno_is_a730(adreno_gpu)) { 664 dbgahb_clusters = gen7_0_0_sptp_clusters; 665 dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters); 666 } else { 667 BUG_ON(!adreno_is_a740_family(adreno_gpu)); 668 dbgahb_clusters = gen7_2_0_sptp_clusters; 669 dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters); 670 } 671 672 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 673 dbgahb_clusters_size, 674 sizeof(*a6xx_state->dbgahb_clusters)); 675 676 if (!a6xx_state->dbgahb_clusters) 677 return; 678 679 a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size; 680 681 for (i = 0; i < dbgahb_clusters_size; i++) 682 a7xx_get_dbgahb_cluster(gpu, a6xx_state, 683 &dbgahb_clusters[i], 684 &a6xx_state->dbgahb_clusters[i], dumper); 685 } 686 687 /* Read a data cluster from the CP aperture with the crashdumper */ 688 static void a6xx_get_cluster(struct msm_gpu *gpu, 689 struct a6xx_gpu_state *a6xx_state, 690 const struct a6xx_cluster *cluster, 691 struct a6xx_gpu_state_obj *obj, 692 struct a6xx_crashdumper *dumper) 693 { 694 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 695 u64 *in = dumper->ptr; 696 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 697 size_t datasize; 698 int i, regcount = 0; 699 u32 id = cluster->id; 700 701 /* Skip registers that are not present on older generation */ 702 if (!adreno_is_a660_family(adreno_gpu) && 703 cluster->registers == a660_fe_cluster) 704 return; 705 706 if (adreno_is_a650_family(adreno_gpu) && 707 cluster->registers == a6xx_ps_cluster) 708 id = CLUSTER_VPC_PS; 709 710 /* Some clusters need a selector register to be programmed too */ 711 if (cluster->sel_reg) 712 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); 713 714 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 715 int j; 716 717 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, 718 (id << 8) | (i << 4) | i); 719 720 for (j = 0; j < cluster->count; j += 2) { 721 int count = RANGE(cluster->registers, j); 722 723 in += CRASHDUMP_READ(in, cluster->registers[j], 724 count, out); 725 726 out += count * sizeof(u32); 727 728 if (i == 0) 729 regcount += count; 730 } 731 } 732 733 CRASHDUMP_FINI(in); 734 735 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 736 737 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 738 return; 739 740 if (a6xx_crashdumper_run(gpu, dumper)) 741 return; 742 743 obj->handle = cluster; 744 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 745 datasize); 746 } 747 748 static void a7xx_get_cluster(struct msm_gpu *gpu, 749 struct a6xx_gpu_state *a6xx_state, 750 const struct gen7_cluster_registers *cluster, 751 struct a6xx_gpu_state_obj *obj, 752 struct a6xx_crashdumper *dumper) 753 { 754 u64 *in = dumper->ptr; 755 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 756 size_t datasize; 757 int i, regcount = 0; 758 759 /* Some clusters need a selector register to be programmed too */ 760 if (cluster->sel) 761 in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val); 762 763 in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD, 764 A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) | 765 A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) | 766 A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id)); 767 768 for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) { 769 int count = RANGE(cluster->regs, i); 770 771 in += CRASHDUMP_READ(in, cluster->regs[i], 772 count, out); 773 774 out += count * sizeof(u32); 775 regcount += count; 776 } 777 778 CRASHDUMP_FINI(in); 779 780 datasize = regcount * sizeof(u32); 781 782 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 783 return; 784 785 if (a6xx_crashdumper_run(gpu, dumper)) 786 return; 787 788 obj->handle = cluster; 789 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 790 datasize); 791 } 792 793 static void a6xx_get_clusters(struct msm_gpu *gpu, 794 struct a6xx_gpu_state *a6xx_state, 795 struct a6xx_crashdumper *dumper) 796 { 797 int i; 798 799 a6xx_state->clusters = state_kcalloc(a6xx_state, 800 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters)); 801 802 if (!a6xx_state->clusters) 803 return; 804 805 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); 806 807 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) 808 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i], 809 &a6xx_state->clusters[i], dumper); 810 } 811 812 static void a7xx_get_clusters(struct msm_gpu *gpu, 813 struct a6xx_gpu_state *a6xx_state, 814 struct a6xx_crashdumper *dumper) 815 { 816 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 817 int i; 818 const struct gen7_cluster_registers *clusters; 819 unsigned clusters_size; 820 821 if (adreno_is_a730(adreno_gpu)) { 822 clusters = gen7_0_0_clusters; 823 clusters_size = ARRAY_SIZE(gen7_0_0_clusters); 824 } else if (adreno_is_a740_family(adreno_gpu)) { 825 clusters = gen7_2_0_clusters; 826 clusters_size = ARRAY_SIZE(gen7_2_0_clusters); 827 } else { 828 BUG_ON(!adreno_is_a750(adreno_gpu)); 829 clusters = gen7_9_0_clusters; 830 clusters_size = ARRAY_SIZE(gen7_9_0_clusters); 831 } 832 833 a6xx_state->clusters = state_kcalloc(a6xx_state, 834 clusters_size, sizeof(*a6xx_state->clusters)); 835 836 if (!a6xx_state->clusters) 837 return; 838 839 a6xx_state->nr_clusters = clusters_size; 840 841 for (i = 0; i < clusters_size; i++) 842 a7xx_get_cluster(gpu, a6xx_state, &clusters[i], 843 &a6xx_state->clusters[i], dumper); 844 } 845 846 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */ 847 static void a6xx_get_shader_block(struct msm_gpu *gpu, 848 struct a6xx_gpu_state *a6xx_state, 849 const struct a6xx_shader_block *block, 850 struct a6xx_gpu_state_obj *obj, 851 struct a6xx_crashdumper *dumper) 852 { 853 u64 *in = dumper->ptr; 854 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 855 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); 856 int i; 857 858 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 859 return; 860 861 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 862 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 863 (block->type << 8) | i); 864 865 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, 866 block->size, out); 867 868 out += block->size * sizeof(u32); 869 } 870 871 CRASHDUMP_FINI(in); 872 873 if (a6xx_crashdumper_run(gpu, dumper)) 874 return; 875 876 obj->handle = block; 877 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 878 datasize); 879 } 880 881 static void a7xx_get_shader_block(struct msm_gpu *gpu, 882 struct a6xx_gpu_state *a6xx_state, 883 const struct gen7_shader_block *block, 884 struct a6xx_gpu_state_obj *obj, 885 struct a6xx_crashdumper *dumper) 886 { 887 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 888 u64 *in = dumper->ptr; 889 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 890 size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32); 891 int i, j; 892 893 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 894 return; 895 896 if (adreno_is_a730(adreno_gpu)) { 897 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3); 898 } 899 900 for (i = 0; i < block->num_sps; i++) { 901 for (j = 0; j < block->num_usptps; j++) { 902 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL, 903 A7XX_SP_READ_SEL_LOCATION(block->location) | 904 A7XX_SP_READ_SEL_PIPE(block->pipeid) | 905 A7XX_SP_READ_SEL_STATETYPE(block->statetype) | 906 A7XX_SP_READ_SEL_USPTP(j) | 907 A7XX_SP_READ_SEL_SPTP(i)); 908 909 in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE, 910 block->size, out); 911 912 out += block->size * sizeof(u32); 913 } 914 } 915 916 CRASHDUMP_FINI(in); 917 918 if (a6xx_crashdumper_run(gpu, dumper)) 919 goto out; 920 921 obj->handle = block; 922 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 923 datasize); 924 925 out: 926 if (adreno_is_a730(adreno_gpu)) { 927 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0); 928 } 929 } 930 931 static void a6xx_get_shaders(struct msm_gpu *gpu, 932 struct a6xx_gpu_state *a6xx_state, 933 struct a6xx_crashdumper *dumper) 934 { 935 int i; 936 937 a6xx_state->shaders = state_kcalloc(a6xx_state, 938 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders)); 939 940 if (!a6xx_state->shaders) 941 return; 942 943 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); 944 945 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) 946 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i], 947 &a6xx_state->shaders[i], dumper); 948 } 949 950 static void a7xx_get_shaders(struct msm_gpu *gpu, 951 struct a6xx_gpu_state *a6xx_state, 952 struct a6xx_crashdumper *dumper) 953 { 954 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 955 const struct gen7_shader_block *shader_blocks; 956 unsigned num_shader_blocks; 957 int i; 958 959 if (adreno_is_a730(adreno_gpu)) { 960 shader_blocks = gen7_0_0_shader_blocks; 961 num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks); 962 } else if (adreno_is_a740_family(adreno_gpu)) { 963 shader_blocks = gen7_2_0_shader_blocks; 964 num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks); 965 } else { 966 BUG_ON(!adreno_is_a750(adreno_gpu)); 967 shader_blocks = gen7_9_0_shader_blocks; 968 num_shader_blocks = ARRAY_SIZE(gen7_9_0_shader_blocks); 969 } 970 971 a6xx_state->shaders = state_kcalloc(a6xx_state, 972 num_shader_blocks, sizeof(*a6xx_state->shaders)); 973 974 if (!a6xx_state->shaders) 975 return; 976 977 a6xx_state->nr_shaders = num_shader_blocks; 978 979 for (i = 0; i < num_shader_blocks; i++) 980 a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i], 981 &a6xx_state->shaders[i], dumper); 982 } 983 984 /* Read registers from behind the HLSQ aperture with the crashdumper */ 985 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, 986 struct a6xx_gpu_state *a6xx_state, 987 const struct a6xx_registers *regs, 988 struct a6xx_gpu_state_obj *obj, 989 struct a6xx_crashdumper *dumper) 990 991 { 992 u64 *in = dumper->ptr; 993 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 994 int i, regcount = 0; 995 996 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); 997 998 for (i = 0; i < regs->count; i += 2) { 999 u32 count = RANGE(regs->registers, i); 1000 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 1001 regs->registers[i] - (regs->val0 >> 2); 1002 1003 in += CRASHDUMP_READ(in, offset, count, out); 1004 1005 out += count * sizeof(u32); 1006 regcount += count; 1007 } 1008 1009 CRASHDUMP_FINI(in); 1010 1011 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1012 return; 1013 1014 if (a6xx_crashdumper_run(gpu, dumper)) 1015 return; 1016 1017 obj->handle = regs; 1018 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1019 regcount * sizeof(u32)); 1020 } 1021 1022 /* Read a block of registers using the crashdumper */ 1023 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, 1024 struct a6xx_gpu_state *a6xx_state, 1025 const struct a6xx_registers *regs, 1026 struct a6xx_gpu_state_obj *obj, 1027 struct a6xx_crashdumper *dumper) 1028 1029 { 1030 u64 *in = dumper->ptr; 1031 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 1032 int i, regcount = 0; 1033 1034 /* Skip unsupported registers on older generations */ 1035 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 1036 (regs->registers == a660_registers)) 1037 return; 1038 1039 /* Some blocks might need to program a selector register first */ 1040 if (regs->val0) 1041 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); 1042 1043 for (i = 0; i < regs->count; i += 2) { 1044 u32 count = RANGE(regs->registers, i); 1045 1046 in += CRASHDUMP_READ(in, regs->registers[i], count, out); 1047 1048 out += count * sizeof(u32); 1049 regcount += count; 1050 } 1051 1052 CRASHDUMP_FINI(in); 1053 1054 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1055 return; 1056 1057 if (a6xx_crashdumper_run(gpu, dumper)) 1058 return; 1059 1060 obj->handle = regs; 1061 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1062 regcount * sizeof(u32)); 1063 } 1064 1065 static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu, 1066 struct a6xx_gpu_state *a6xx_state, 1067 const struct gen7_reg_list *regs, 1068 struct a6xx_gpu_state_obj *obj, 1069 struct a6xx_crashdumper *dumper) 1070 1071 { 1072 u64 *in = dumper->ptr; 1073 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 1074 int i, regcount = 0; 1075 1076 /* Some blocks might need to program a selector register first */ 1077 if (regs->sel) 1078 in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val); 1079 1080 for (i = 0; regs->regs[i] != UINT_MAX; i += 2) { 1081 u32 count = RANGE(regs->regs, i); 1082 1083 in += CRASHDUMP_READ(in, regs->regs[i], count, out); 1084 1085 out += count * sizeof(u32); 1086 regcount += count; 1087 } 1088 1089 CRASHDUMP_FINI(in); 1090 1091 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1092 return; 1093 1094 if (a6xx_crashdumper_run(gpu, dumper)) 1095 return; 1096 1097 obj->handle = regs->regs; 1098 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1099 regcount * sizeof(u32)); 1100 } 1101 1102 1103 /* Read a block of registers via AHB */ 1104 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 1105 struct a6xx_gpu_state *a6xx_state, 1106 const struct a6xx_registers *regs, 1107 struct a6xx_gpu_state_obj *obj) 1108 { 1109 int i, regcount = 0, index = 0; 1110 1111 /* Skip unsupported registers on older generations */ 1112 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 1113 (regs->registers == a660_registers)) 1114 return; 1115 1116 for (i = 0; i < regs->count; i += 2) 1117 regcount += RANGE(regs->registers, i); 1118 1119 obj->handle = (const void *) regs; 1120 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1121 if (!obj->data) 1122 return; 1123 1124 for (i = 0; i < regs->count; i += 2) { 1125 u32 count = RANGE(regs->registers, i); 1126 int j; 1127 1128 for (j = 0; j < count; j++) 1129 obj->data[index++] = gpu_read(gpu, 1130 regs->registers[i] + j); 1131 } 1132 } 1133 1134 static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 1135 struct a6xx_gpu_state *a6xx_state, 1136 const u32 *regs, 1137 struct a6xx_gpu_state_obj *obj) 1138 { 1139 int i, regcount = 0, index = 0; 1140 1141 for (i = 0; regs[i] != UINT_MAX; i += 2) 1142 regcount += RANGE(regs, i); 1143 1144 obj->handle = (const void *) regs; 1145 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1146 if (!obj->data) 1147 return; 1148 1149 for (i = 0; regs[i] != UINT_MAX; i += 2) { 1150 u32 count = RANGE(regs, i); 1151 int j; 1152 1153 for (j = 0; j < count; j++) 1154 obj->data[index++] = gpu_read(gpu, regs[i] + j); 1155 } 1156 } 1157 1158 static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu, 1159 struct a6xx_gpu_state *a6xx_state, 1160 const struct gen7_reg_list *regs, 1161 struct a6xx_gpu_state_obj *obj) 1162 { 1163 if (regs->sel) 1164 gpu_write(gpu, regs->sel->host_reg, regs->sel->val); 1165 1166 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj); 1167 } 1168 1169 /* Read a block of GMU registers */ 1170 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, 1171 struct a6xx_gpu_state *a6xx_state, 1172 const struct a6xx_registers *regs, 1173 struct a6xx_gpu_state_obj *obj, 1174 bool rscc) 1175 { 1176 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1177 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1178 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1179 int i, regcount = 0, index = 0; 1180 1181 for (i = 0; i < regs->count; i += 2) 1182 regcount += RANGE(regs->registers, i); 1183 1184 obj->handle = (const void *) regs; 1185 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1186 if (!obj->data) 1187 return; 1188 1189 for (i = 0; i < regs->count; i += 2) { 1190 u32 count = RANGE(regs->registers, i); 1191 int j; 1192 1193 for (j = 0; j < count; j++) { 1194 u32 offset = regs->registers[i] + j; 1195 u32 val; 1196 1197 if (rscc) 1198 val = gmu_read_rscc(gmu, offset); 1199 else 1200 val = gmu_read(gmu, offset); 1201 1202 obj->data[index++] = val; 1203 } 1204 } 1205 } 1206 1207 static void a6xx_get_gmu_registers(struct msm_gpu *gpu, 1208 struct a6xx_gpu_state *a6xx_state) 1209 { 1210 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1211 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1212 1213 a6xx_state->gmu_registers = state_kcalloc(a6xx_state, 1214 3, sizeof(*a6xx_state->gmu_registers)); 1215 1216 if (!a6xx_state->gmu_registers) 1217 return; 1218 1219 a6xx_state->nr_gmu_registers = 3; 1220 1221 /* Get the CX GMU registers from AHB */ 1222 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], 1223 &a6xx_state->gmu_registers[0], false); 1224 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1], 1225 &a6xx_state->gmu_registers[1], true); 1226 1227 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 1228 return; 1229 1230 /* Set the fence to ALLOW mode so we can access the registers */ 1231 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); 1232 1233 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2], 1234 &a6xx_state->gmu_registers[2], false); 1235 } 1236 1237 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo( 1238 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo) 1239 { 1240 struct msm_gpu_state_bo *snapshot; 1241 1242 if (!bo->size) 1243 return NULL; 1244 1245 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot)); 1246 if (!snapshot) 1247 return NULL; 1248 1249 snapshot->iova = bo->iova; 1250 snapshot->size = bo->size; 1251 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL); 1252 if (!snapshot->data) 1253 return NULL; 1254 1255 memcpy(snapshot->data, bo->virt, bo->size); 1256 1257 return snapshot; 1258 } 1259 1260 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu, 1261 struct a6xx_gpu_state *a6xx_state) 1262 { 1263 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1264 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1265 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1266 unsigned i, j; 1267 1268 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history)); 1269 1270 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) { 1271 struct a6xx_hfi_queue *queue = &gmu->queues[i]; 1272 for (j = 0; j < HFI_HISTORY_SZ; j++) { 1273 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ; 1274 a6xx_state->hfi_queue_history[i][j] = queue->history[idx]; 1275 } 1276 } 1277 } 1278 1279 #define A6XX_REGLIST_SIZE 1 1280 #define A6XX_GBIF_REGLIST_SIZE 1 1281 static void a6xx_get_registers(struct msm_gpu *gpu, 1282 struct a6xx_gpu_state *a6xx_state, 1283 struct a6xx_crashdumper *dumper) 1284 { 1285 int i, count = A6XX_REGLIST_SIZE + 1286 ARRAY_SIZE(a6xx_reglist) + 1287 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE; 1288 int index = 0; 1289 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1290 1291 a6xx_state->registers = state_kcalloc(a6xx_state, 1292 count, sizeof(*a6xx_state->registers)); 1293 1294 if (!a6xx_state->registers) 1295 return; 1296 1297 a6xx_state->nr_registers = count; 1298 1299 a6xx_get_ahb_gpu_registers(gpu, 1300 a6xx_state, &a6xx_ahb_reglist, 1301 &a6xx_state->registers[index++]); 1302 1303 if (a6xx_has_gbif(adreno_gpu)) 1304 a6xx_get_ahb_gpu_registers(gpu, 1305 a6xx_state, &a6xx_gbif_reglist, 1306 &a6xx_state->registers[index++]); 1307 else 1308 a6xx_get_ahb_gpu_registers(gpu, 1309 a6xx_state, &a6xx_vbif_reglist, 1310 &a6xx_state->registers[index++]); 1311 if (!dumper) { 1312 /* 1313 * We can't use the crashdumper when the SMMU is stalled, 1314 * because the GPU has no memory access until we resume 1315 * translation (but we don't want to do that until after 1316 * we have captured as much useful GPU state as possible). 1317 * So instead collect registers via the CPU: 1318 */ 1319 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 1320 a6xx_get_ahb_gpu_registers(gpu, 1321 a6xx_state, &a6xx_reglist[i], 1322 &a6xx_state->registers[index++]); 1323 return; 1324 } 1325 1326 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 1327 a6xx_get_crashdumper_registers(gpu, 1328 a6xx_state, &a6xx_reglist[i], 1329 &a6xx_state->registers[index++], 1330 dumper); 1331 1332 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) 1333 a6xx_get_crashdumper_hlsq_registers(gpu, 1334 a6xx_state, &a6xx_hlsq_reglist[i], 1335 &a6xx_state->registers[index++], 1336 dumper); 1337 } 1338 1339 #define A7XX_PRE_CRASHDUMPER_SIZE 1 1340 #define A7XX_POST_CRASHDUMPER_SIZE 1 1341 static void a7xx_get_registers(struct msm_gpu *gpu, 1342 struct a6xx_gpu_state *a6xx_state, 1343 struct a6xx_crashdumper *dumper) 1344 { 1345 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1346 int i, count; 1347 int index = 0; 1348 const u32 *pre_crashdumper_regs; 1349 const struct gen7_reg_list *reglist; 1350 1351 if (adreno_is_a730(adreno_gpu)) { 1352 reglist = gen7_0_0_reg_list; 1353 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers; 1354 } else if (adreno_is_a740_family(adreno_gpu)) { 1355 reglist = gen7_2_0_reg_list; 1356 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers; 1357 } else { 1358 BUG_ON(!adreno_is_a750(adreno_gpu)); 1359 reglist = gen7_9_0_reg_list; 1360 pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers; 1361 } 1362 1363 count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE; 1364 1365 /* The downstream reglist contains registers in other memory regions 1366 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their 1367 * offsets and map them to read them on the CPU. For now only read the 1368 * first region which is the main one. 1369 */ 1370 if (dumper) { 1371 for (i = 0; reglist[i].regs; i++) 1372 count++; 1373 } else { 1374 count++; 1375 } 1376 1377 a6xx_state->registers = state_kcalloc(a6xx_state, 1378 count, sizeof(*a6xx_state->registers)); 1379 1380 if (!a6xx_state->registers) 1381 return; 1382 1383 a6xx_state->nr_registers = count; 1384 1385 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs, 1386 &a6xx_state->registers[index++]); 1387 1388 if (!dumper) { 1389 a7xx_get_ahb_gpu_reglist(gpu, 1390 a6xx_state, ®list[0], 1391 &a6xx_state->registers[index++]); 1392 return; 1393 } 1394 1395 for (i = 0; reglist[i].regs; i++) 1396 a7xx_get_crashdumper_registers(gpu, 1397 a6xx_state, ®list[i], 1398 &a6xx_state->registers[index++], 1399 dumper); 1400 } 1401 1402 static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu, 1403 struct a6xx_gpu_state *a6xx_state) 1404 { 1405 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1406 const u32 *regs; 1407 1408 BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu) || 1409 adreno_is_a750(adreno_gpu))); 1410 regs = gen7_0_0_post_crashdumper_registers; 1411 1412 a7xx_get_ahb_gpu_registers(gpu, 1413 a6xx_state, regs, 1414 &a6xx_state->registers[a6xx_state->nr_registers - 1]); 1415 } 1416 1417 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu) 1418 { 1419 /* The value at [16:31] is in 4dword units. Convert it to dwords */ 1420 return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14; 1421 } 1422 1423 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu) 1424 { 1425 /* 1426 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units. 1427 * That register however is not directly accessible from APSS on A7xx. 1428 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value. 1429 */ 1430 gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3); 1431 1432 return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20); 1433 } 1434 1435 /* Read a block of data from an indexed register pair */ 1436 static void a6xx_get_indexed_regs(struct msm_gpu *gpu, 1437 struct a6xx_gpu_state *a6xx_state, 1438 const struct a6xx_indexed_registers *indexed, 1439 struct a6xx_gpu_state_obj *obj) 1440 { 1441 u32 count = indexed->count; 1442 int i; 1443 1444 obj->handle = (const void *) indexed; 1445 if (indexed->count_fn) 1446 count = indexed->count_fn(gpu); 1447 1448 obj->data = state_kcalloc(a6xx_state, count, sizeof(u32)); 1449 obj->count = count; 1450 if (!obj->data) 1451 return; 1452 1453 /* All the indexed banks start at address 0 */ 1454 gpu_write(gpu, indexed->addr, 0); 1455 1456 /* Read the data - each read increments the internal address by 1 */ 1457 for (i = 0; i < count; i++) 1458 obj->data[i] = gpu_read(gpu, indexed->data); 1459 } 1460 1461 static void a6xx_get_indexed_registers(struct msm_gpu *gpu, 1462 struct a6xx_gpu_state *a6xx_state) 1463 { 1464 u32 mempool_size; 1465 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; 1466 int i; 1467 1468 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count, 1469 sizeof(*a6xx_state->indexed_regs)); 1470 if (!a6xx_state->indexed_regs) 1471 return; 1472 1473 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) 1474 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i], 1475 &a6xx_state->indexed_regs[i]); 1476 1477 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 1478 u32 val; 1479 1480 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG); 1481 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4); 1482 1483 /* Get the contents of the CP mempool */ 1484 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1485 &a6xx_state->indexed_regs[i]); 1486 1487 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val); 1488 a6xx_state->nr_indexed_regs = count; 1489 return; 1490 } 1491 1492 /* Set the CP mempool size to 0 to stabilize it while dumping */ 1493 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); 1494 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); 1495 1496 /* Get the contents of the CP mempool */ 1497 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1498 &a6xx_state->indexed_regs[i]); 1499 1500 /* 1501 * Offset 0x2000 in the mempool is the size - copy the saved size over 1502 * so the data is consistent 1503 */ 1504 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; 1505 1506 /* Restore the size in the hardware */ 1507 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); 1508 } 1509 1510 static void a7xx_get_indexed_registers(struct msm_gpu *gpu, 1511 struct a6xx_gpu_state *a6xx_state) 1512 { 1513 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1514 const struct a6xx_indexed_registers *indexed_regs; 1515 int i, indexed_count, mempool_count; 1516 1517 if (adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)) { 1518 indexed_regs = a7xx_indexed_reglist; 1519 indexed_count = ARRAY_SIZE(a7xx_indexed_reglist); 1520 } else { 1521 BUG_ON(!adreno_is_a750(adreno_gpu)); 1522 indexed_regs = gen7_9_0_cp_indexed_reg_list; 1523 indexed_count = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list); 1524 } 1525 1526 mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed); 1527 1528 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, 1529 indexed_count + mempool_count, 1530 sizeof(*a6xx_state->indexed_regs)); 1531 if (!a6xx_state->indexed_regs) 1532 return; 1533 1534 a6xx_state->nr_indexed_regs = indexed_count + mempool_count; 1535 1536 /* First read the common regs */ 1537 for (i = 0; i < indexed_count; i++) 1538 a6xx_get_indexed_regs(gpu, a6xx_state, &indexed_regs[i], 1539 &a6xx_state->indexed_regs[i]); 1540 1541 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2)); 1542 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2)); 1543 1544 /* Get the contents of the CP_BV mempool */ 1545 for (i = 0; i < mempool_count; i++) 1546 a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i], 1547 &a6xx_state->indexed_regs[indexed_count + i]); 1548 1549 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0); 1550 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0); 1551 return; 1552 } 1553 1554 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) 1555 { 1556 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL; 1557 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1558 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1559 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), 1560 GFP_KERNEL); 1561 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & 1562 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); 1563 1564 if (!a6xx_state) 1565 return ERR_PTR(-ENOMEM); 1566 1567 INIT_LIST_HEAD(&a6xx_state->objs); 1568 1569 /* Get the generic state from the adreno core */ 1570 adreno_gpu_state_get(gpu, &a6xx_state->base); 1571 1572 if (!adreno_has_gmu_wrapper(adreno_gpu)) { 1573 a6xx_get_gmu_registers(gpu, a6xx_state); 1574 1575 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log); 1576 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi); 1577 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug); 1578 1579 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state); 1580 } 1581 1582 /* If GX isn't on the rest of the data isn't going to be accessible */ 1583 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 1584 return &a6xx_state->base; 1585 1586 /* Get the banks of indexed registers */ 1587 if (adreno_is_a7xx(adreno_gpu)) 1588 a7xx_get_indexed_registers(gpu, a6xx_state); 1589 else 1590 a6xx_get_indexed_registers(gpu, a6xx_state); 1591 1592 /* 1593 * Try to initialize the crashdumper, if we are not dumping state 1594 * with the SMMU stalled. The crashdumper needs memory access to 1595 * write out GPU state, so we need to skip this when the SMMU is 1596 * stalled in response to an iova fault 1597 */ 1598 if (!stalled && !gpu->needs_hw_init && 1599 !a6xx_crashdumper_init(gpu, &_dumper)) { 1600 dumper = &_dumper; 1601 } 1602 1603 if (adreno_is_a7xx(adreno_gpu)) { 1604 a7xx_get_registers(gpu, a6xx_state, dumper); 1605 1606 if (dumper) { 1607 a7xx_get_shaders(gpu, a6xx_state, dumper); 1608 a7xx_get_clusters(gpu, a6xx_state, dumper); 1609 a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); 1610 1611 msm_gem_kernel_put(dumper->bo, gpu->aspace); 1612 } 1613 1614 a7xx_get_post_crashdumper_registers(gpu, a6xx_state); 1615 } else { 1616 a6xx_get_registers(gpu, a6xx_state, dumper); 1617 1618 if (dumper) { 1619 a6xx_get_shaders(gpu, a6xx_state, dumper); 1620 a6xx_get_clusters(gpu, a6xx_state, dumper); 1621 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); 1622 1623 msm_gem_kernel_put(dumper->bo, gpu->aspace); 1624 } 1625 } 1626 1627 if (snapshot_debugbus) 1628 a6xx_get_debugbus(gpu, a6xx_state); 1629 1630 a6xx_state->gpu_initialized = !gpu->needs_hw_init; 1631 1632 return &a6xx_state->base; 1633 } 1634 1635 static void a6xx_gpu_state_destroy(struct kref *kref) 1636 { 1637 struct a6xx_state_memobj *obj, *tmp; 1638 struct msm_gpu_state *state = container_of(kref, 1639 struct msm_gpu_state, ref); 1640 struct a6xx_gpu_state *a6xx_state = container_of(state, 1641 struct a6xx_gpu_state, base); 1642 1643 if (a6xx_state->gmu_log) 1644 kvfree(a6xx_state->gmu_log->data); 1645 1646 if (a6xx_state->gmu_hfi) 1647 kvfree(a6xx_state->gmu_hfi->data); 1648 1649 if (a6xx_state->gmu_debug) 1650 kvfree(a6xx_state->gmu_debug->data); 1651 1652 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) { 1653 list_del(&obj->node); 1654 kvfree(obj); 1655 } 1656 1657 adreno_gpu_state_destroy(state); 1658 kfree(a6xx_state); 1659 } 1660 1661 int a6xx_gpu_state_put(struct msm_gpu_state *state) 1662 { 1663 if (IS_ERR_OR_NULL(state)) 1664 return 1; 1665 1666 return kref_put(&state->ref, a6xx_gpu_state_destroy); 1667 } 1668 1669 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, 1670 struct drm_printer *p) 1671 { 1672 int i, index = 0; 1673 1674 if (!data) 1675 return; 1676 1677 for (i = 0; i < count; i += 2) { 1678 u32 count = RANGE(registers, i); 1679 u32 offset = registers[i]; 1680 int j; 1681 1682 for (j = 0; j < count; index++, offset++, j++) { 1683 if (data[index] == 0xdeafbead) 1684 continue; 1685 1686 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1687 offset << 2, data[index]); 1688 } 1689 } 1690 } 1691 1692 static void a7xx_show_registers_indented(const u32 *registers, u32 *data, 1693 struct drm_printer *p, unsigned indent) 1694 { 1695 int i, index = 0; 1696 1697 for (i = 0; registers[i] != UINT_MAX; i += 2) { 1698 u32 count = RANGE(registers, i); 1699 u32 offset = registers[i]; 1700 int j; 1701 1702 for (j = 0; j < count; index++, offset++, j++) { 1703 int k; 1704 1705 if (data[index] == 0xdeafbead) 1706 continue; 1707 1708 for (k = 0; k < indent; k++) 1709 drm_printf(p, " "); 1710 drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n", 1711 offset << 2, data[index]); 1712 } 1713 } 1714 } 1715 1716 static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p) 1717 { 1718 a7xx_show_registers_indented(registers, data, p, 1); 1719 } 1720 1721 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) 1722 { 1723 char out[ASCII85_BUFSZ]; 1724 long i, l, datalen = 0; 1725 1726 for (i = 0; i < len >> 2; i++) { 1727 if (data[i]) 1728 datalen = (i + 1) << 2; 1729 } 1730 1731 if (datalen == 0) 1732 return; 1733 1734 drm_puts(p, " data: !!ascii85 |\n"); 1735 drm_puts(p, " "); 1736 1737 1738 l = ascii85_encode_len(datalen); 1739 1740 for (i = 0; i < l; i++) 1741 drm_puts(p, ascii85_encode(data[i], out)); 1742 1743 drm_puts(p, "\n"); 1744 } 1745 1746 static void print_name(struct drm_printer *p, const char *fmt, const char *name) 1747 { 1748 drm_puts(p, fmt); 1749 drm_puts(p, name); 1750 drm_puts(p, "\n"); 1751 } 1752 1753 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, 1754 struct drm_printer *p) 1755 { 1756 const struct a6xx_shader_block *block = obj->handle; 1757 int i; 1758 1759 if (!obj->handle) 1760 return; 1761 1762 print_name(p, " - type: ", block->name); 1763 1764 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 1765 drm_printf(p, " - bank: %d\n", i); 1766 drm_printf(p, " size: %d\n", block->size); 1767 1768 if (!obj->data) 1769 continue; 1770 1771 print_ascii85(p, block->size << 2, 1772 obj->data + (block->size * i)); 1773 } 1774 } 1775 1776 static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj, 1777 struct drm_printer *p) 1778 { 1779 const struct gen7_shader_block *block = obj->handle; 1780 int i, j; 1781 u32 *data = obj->data; 1782 1783 if (!obj->handle) 1784 return; 1785 1786 print_name(p, " - type: ", a7xx_statetype_names[block->statetype]); 1787 print_name(p, " - pipe: ", a7xx_pipe_names[block->pipeid]); 1788 1789 for (i = 0; i < block->num_sps; i++) { 1790 drm_printf(p, " - sp: %d\n", i); 1791 1792 for (j = 0; j < block->num_usptps; j++) { 1793 drm_printf(p, " - usptp: %d\n", j); 1794 drm_printf(p, " size: %d\n", block->size); 1795 1796 if (!obj->data) 1797 continue; 1798 1799 print_ascii85(p, block->size << 2, data); 1800 1801 data += block->size; 1802 } 1803 } 1804 } 1805 1806 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, 1807 struct drm_printer *p) 1808 { 1809 int ctx, index = 0; 1810 1811 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { 1812 int j; 1813 1814 drm_printf(p, " - context: %d\n", ctx); 1815 1816 for (j = 0; j < size; j += 2) { 1817 u32 count = RANGE(registers, j); 1818 u32 offset = registers[j]; 1819 int k; 1820 1821 for (k = 0; k < count; index++, offset++, k++) { 1822 if (data[index] == 0xdeafbead) 1823 continue; 1824 1825 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1826 offset << 2, data[index]); 1827 } 1828 } 1829 } 1830 } 1831 1832 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1833 struct drm_printer *p) 1834 { 1835 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; 1836 1837 if (dbgahb) { 1838 print_name(p, " - cluster-name: ", dbgahb->name); 1839 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, 1840 obj->data, p); 1841 } 1842 } 1843 1844 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1845 struct drm_printer *p) 1846 { 1847 const struct a6xx_cluster *cluster = obj->handle; 1848 1849 if (cluster) { 1850 print_name(p, " - cluster-name: ", cluster->name); 1851 a6xx_show_cluster_data(cluster->registers, cluster->count, 1852 obj->data, p); 1853 } 1854 } 1855 1856 static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1857 struct drm_printer *p) 1858 { 1859 const struct gen7_sptp_cluster_registers *dbgahb = obj->handle; 1860 1861 if (dbgahb) { 1862 print_name(p, " - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]); 1863 print_name(p, " - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]); 1864 drm_printf(p, " - context: %d\n", dbgahb->context_id); 1865 a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4); 1866 } 1867 } 1868 1869 static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1870 struct drm_printer *p) 1871 { 1872 const struct gen7_cluster_registers *cluster = obj->handle; 1873 1874 if (cluster) { 1875 int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0; 1876 1877 print_name(p, " - pipe: ", a7xx_pipe_names[cluster->pipe_id]); 1878 print_name(p, " - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]); 1879 drm_printf(p, " - context: %d\n", context); 1880 a7xx_show_registers_indented(cluster->regs, obj->data, p, 4); 1881 } 1882 } 1883 1884 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, 1885 struct drm_printer *p) 1886 { 1887 const struct a6xx_indexed_registers *indexed = obj->handle; 1888 1889 if (!indexed) 1890 return; 1891 1892 print_name(p, " - regs-name: ", indexed->name); 1893 drm_printf(p, " dwords: %d\n", obj->count); 1894 1895 print_ascii85(p, obj->count << 2, obj->data); 1896 } 1897 1898 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, 1899 u32 *data, struct drm_printer *p) 1900 { 1901 if (block) { 1902 print_name(p, " - debugbus-block: ", block->name); 1903 1904 /* 1905 * count for regular debugbus data is in quadwords, 1906 * but print the size in dwords for consistency 1907 */ 1908 drm_printf(p, " count: %d\n", block->count << 1); 1909 1910 print_ascii85(p, block->count << 3, data); 1911 } 1912 } 1913 1914 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, 1915 struct drm_printer *p) 1916 { 1917 int i; 1918 1919 for (i = 0; i < a6xx_state->nr_debugbus; i++) { 1920 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; 1921 1922 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1923 } 1924 1925 if (a6xx_state->vbif_debugbus) { 1926 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; 1927 1928 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); 1929 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); 1930 1931 /* vbif debugbus data is in dwords. Confusing, huh? */ 1932 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); 1933 } 1934 1935 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { 1936 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; 1937 1938 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1939 } 1940 } 1941 1942 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1943 struct drm_printer *p) 1944 { 1945 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1946 struct a6xx_gpu_state *a6xx_state = container_of(state, 1947 struct a6xx_gpu_state, base); 1948 int i; 1949 1950 if (IS_ERR_OR_NULL(state)) 1951 return; 1952 1953 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized); 1954 1955 adreno_show(gpu, state, p); 1956 1957 drm_puts(p, "gmu-log:\n"); 1958 if (a6xx_state->gmu_log) { 1959 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log; 1960 1961 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova); 1962 drm_printf(p, " size: %zu\n", gmu_log->size); 1963 adreno_show_object(p, &gmu_log->data, gmu_log->size, 1964 &gmu_log->encoded); 1965 } 1966 1967 drm_puts(p, "gmu-hfi:\n"); 1968 if (a6xx_state->gmu_hfi) { 1969 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi; 1970 unsigned i, j; 1971 1972 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova); 1973 drm_printf(p, " size: %zu\n", gmu_hfi->size); 1974 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) { 1975 drm_printf(p, " queue-history[%u]:", i); 1976 for (j = 0; j < HFI_HISTORY_SZ; j++) { 1977 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]); 1978 } 1979 drm_printf(p, "\n"); 1980 } 1981 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size, 1982 &gmu_hfi->encoded); 1983 } 1984 1985 drm_puts(p, "gmu-debug:\n"); 1986 if (a6xx_state->gmu_debug) { 1987 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug; 1988 1989 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova); 1990 drm_printf(p, " size: %zu\n", gmu_debug->size); 1991 adreno_show_object(p, &gmu_debug->data, gmu_debug->size, 1992 &gmu_debug->encoded); 1993 } 1994 1995 drm_puts(p, "registers:\n"); 1996 for (i = 0; i < a6xx_state->nr_registers; i++) { 1997 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; 1998 1999 if (!obj->handle) 2000 continue; 2001 2002 if (adreno_is_a7xx(adreno_gpu)) { 2003 a7xx_show_registers(obj->handle, obj->data, p); 2004 } else { 2005 const struct a6xx_registers *regs = obj->handle; 2006 2007 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 2008 } 2009 } 2010 2011 drm_puts(p, "registers-gmu:\n"); 2012 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { 2013 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; 2014 const struct a6xx_registers *regs = obj->handle; 2015 2016 if (!obj->handle) 2017 continue; 2018 2019 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 2020 } 2021 2022 drm_puts(p, "indexed-registers:\n"); 2023 for (i = 0; i < a6xx_state->nr_indexed_regs; i++) 2024 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); 2025 2026 drm_puts(p, "shader-blocks:\n"); 2027 for (i = 0; i < a6xx_state->nr_shaders; i++) { 2028 if (adreno_is_a7xx(adreno_gpu)) 2029 a7xx_show_shader(&a6xx_state->shaders[i], p); 2030 else 2031 a6xx_show_shader(&a6xx_state->shaders[i], p); 2032 } 2033 2034 drm_puts(p, "clusters:\n"); 2035 for (i = 0; i < a6xx_state->nr_clusters; i++) { 2036 if (adreno_is_a7xx(adreno_gpu)) 2037 a7xx_show_cluster(&a6xx_state->clusters[i], p); 2038 else 2039 a6xx_show_cluster(&a6xx_state->clusters[i], p); 2040 } 2041 2042 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) { 2043 if (adreno_is_a7xx(adreno_gpu)) 2044 a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 2045 else 2046 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 2047 } 2048 2049 drm_puts(p, "debugbus:\n"); 2050 a6xx_show_debugbus(a6xx_state, p); 2051 } 2052