1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ 3 4 #include <linux/ascii85.h> 5 #include "msm_gem.h" 6 #include "a6xx_gpu.h" 7 #include "a6xx_gmu.h" 8 #include "a6xx_gpu_state.h" 9 #include "a6xx_gmu.xml.h" 10 11 /* Ignore diagnostics about register tables that we aren't using yet. We don't 12 * want to modify these headers too much from their original source. 13 */ 14 #pragma GCC diagnostic push 15 #pragma GCC diagnostic ignored "-Wunused-variable" 16 17 #include "adreno_gen7_0_0_snapshot.h" 18 #include "adreno_gen7_2_0_snapshot.h" 19 20 #pragma GCC diagnostic pop 21 22 struct a6xx_gpu_state_obj { 23 const void *handle; 24 u32 *data; 25 }; 26 27 struct a6xx_gpu_state { 28 struct msm_gpu_state base; 29 30 struct a6xx_gpu_state_obj *gmu_registers; 31 int nr_gmu_registers; 32 33 struct a6xx_gpu_state_obj *registers; 34 int nr_registers; 35 36 struct a6xx_gpu_state_obj *shaders; 37 int nr_shaders; 38 39 struct a6xx_gpu_state_obj *clusters; 40 int nr_clusters; 41 42 struct a6xx_gpu_state_obj *dbgahb_clusters; 43 int nr_dbgahb_clusters; 44 45 struct a6xx_gpu_state_obj *indexed_regs; 46 int nr_indexed_regs; 47 48 struct a6xx_gpu_state_obj *debugbus; 49 int nr_debugbus; 50 51 struct a6xx_gpu_state_obj *vbif_debugbus; 52 53 struct a6xx_gpu_state_obj *cx_debugbus; 54 int nr_cx_debugbus; 55 56 struct msm_gpu_state_bo *gmu_log; 57 struct msm_gpu_state_bo *gmu_hfi; 58 struct msm_gpu_state_bo *gmu_debug; 59 60 s32 hfi_queue_history[2][HFI_HISTORY_SZ]; 61 62 struct list_head objs; 63 64 bool gpu_initialized; 65 }; 66 67 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) 68 { 69 in[0] = val; 70 in[1] = (((u64) reg) << 44 | (1 << 21) | 1); 71 72 return 2; 73 } 74 75 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) 76 { 77 in[0] = target; 78 in[1] = (((u64) reg) << 44 | dwords); 79 80 return 2; 81 } 82 83 static inline int CRASHDUMP_FINI(u64 *in) 84 { 85 in[0] = 0; 86 in[1] = 0; 87 88 return 2; 89 } 90 91 struct a6xx_crashdumper { 92 void *ptr; 93 struct drm_gem_object *bo; 94 u64 iova; 95 }; 96 97 struct a6xx_state_memobj { 98 struct list_head node; 99 unsigned long long data[]; 100 }; 101 102 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize) 103 { 104 struct a6xx_state_memobj *obj = 105 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); 106 107 if (!obj) 108 return NULL; 109 110 list_add_tail(&obj->node, &a6xx_state->objs); 111 return &obj->data; 112 } 113 114 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, 115 size_t size) 116 { 117 void *dst = state_kcalloc(a6xx_state, 1, size); 118 119 if (dst) 120 memcpy(dst, src, size); 121 return dst; 122 } 123 124 /* 125 * Allocate 1MB for the crashdumper scratch region - 8k for the script and 126 * the rest for the data 127 */ 128 #define A6XX_CD_DATA_OFFSET 8192 129 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192) 130 131 static int a6xx_crashdumper_init(struct msm_gpu *gpu, 132 struct a6xx_crashdumper *dumper) 133 { 134 dumper->ptr = msm_gem_kernel_new(gpu->dev, 135 SZ_1M, MSM_BO_WC, gpu->aspace, 136 &dumper->bo, &dumper->iova); 137 138 if (!IS_ERR(dumper->ptr)) 139 msm_gem_object_set_name(dumper->bo, "crashdump"); 140 141 return PTR_ERR_OR_ZERO(dumper->ptr); 142 } 143 144 static int a6xx_crashdumper_run(struct msm_gpu *gpu, 145 struct a6xx_crashdumper *dumper) 146 { 147 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 148 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 149 u32 val; 150 int ret; 151 152 if (IS_ERR_OR_NULL(dumper->ptr)) 153 return -EINVAL; 154 155 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) 156 return -EINVAL; 157 158 /* Make sure all pending memory writes are posted */ 159 wmb(); 160 161 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova); 162 163 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); 164 165 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, 166 val & 0x02, 100, 10000); 167 168 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); 169 170 return ret; 171 } 172 173 /* read a value from the GX debug bus */ 174 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, 175 u32 *data) 176 { 177 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | 178 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); 179 180 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); 181 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); 182 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); 183 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); 184 185 /* Wait 1 us to make sure the data is flowing */ 186 udelay(1); 187 188 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); 189 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); 190 191 return 2; 192 } 193 194 #define cxdbg_write(ptr, offset, val) \ 195 msm_writel((val), (ptr) + ((offset) << 2)) 196 197 #define cxdbg_read(ptr, offset) \ 198 msm_readl((ptr) + ((offset) << 2)) 199 200 /* read a value from the CX debug bus */ 201 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset, 202 u32 *data) 203 { 204 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | 205 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); 206 207 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); 208 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); 209 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); 210 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); 211 212 /* Wait 1 us to make sure the data is flowing */ 213 udelay(1); 214 215 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); 216 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); 217 218 return 2; 219 } 220 221 /* Read a chunk of data from the VBIF debug bus */ 222 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, 223 u32 reg, int count, u32 *data) 224 { 225 int i; 226 227 gpu_write(gpu, ctrl0, reg); 228 229 for (i = 0; i < count; i++) { 230 gpu_write(gpu, ctrl1, i); 231 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); 232 } 233 234 return count; 235 } 236 237 #define AXI_ARB_BLOCKS 2 238 #define XIN_AXI_BLOCKS 5 239 #define XIN_CORE_BLOCKS 4 240 241 #define VBIF_DEBUGBUS_BLOCK_SIZE \ 242 ((16 * AXI_ARB_BLOCKS) + \ 243 (18 * XIN_AXI_BLOCKS) + \ 244 (12 * XIN_CORE_BLOCKS)) 245 246 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, 247 struct a6xx_gpu_state *a6xx_state, 248 struct a6xx_gpu_state_obj *obj) 249 { 250 u32 clk, *ptr; 251 int i; 252 253 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE, 254 sizeof(u32)); 255 if (!obj->data) 256 return; 257 258 obj->handle = NULL; 259 260 /* Get the current clock setting */ 261 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); 262 263 /* Force on the bus so we can read it */ 264 gpu_write(gpu, REG_A6XX_VBIF_CLKON, 265 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); 266 267 /* We will read from BUS2 first, so disable BUS1 */ 268 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); 269 270 /* Enable the VBIF bus for reading */ 271 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); 272 273 ptr = obj->data; 274 275 for (i = 0; i < AXI_ARB_BLOCKS; i++) 276 ptr += vbif_debugbus_read(gpu, 277 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 278 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 279 1 << (i + 16), 16, ptr); 280 281 for (i = 0; i < XIN_AXI_BLOCKS; i++) 282 ptr += vbif_debugbus_read(gpu, 283 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 284 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 285 1 << i, 18, ptr); 286 287 /* Stop BUS2 so we can turn on BUS1 */ 288 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); 289 290 for (i = 0; i < XIN_CORE_BLOCKS; i++) 291 ptr += vbif_debugbus_read(gpu, 292 REG_A6XX_VBIF_TEST_BUS1_CTRL0, 293 REG_A6XX_VBIF_TEST_BUS1_CTRL1, 294 1 << i, 12, ptr); 295 296 /* Restore the VBIF clock setting */ 297 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); 298 } 299 300 static void a6xx_get_debugbus_block(struct msm_gpu *gpu, 301 struct a6xx_gpu_state *a6xx_state, 302 const struct a6xx_debugbus_block *block, 303 struct a6xx_gpu_state_obj *obj) 304 { 305 int i; 306 u32 *ptr; 307 308 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 309 if (!obj->data) 310 return; 311 312 obj->handle = block; 313 314 for (ptr = obj->data, i = 0; i < block->count; i++) 315 ptr += debugbus_read(gpu, block->id, i, ptr); 316 } 317 318 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, 319 struct a6xx_gpu_state *a6xx_state, 320 const struct a6xx_debugbus_block *block, 321 struct a6xx_gpu_state_obj *obj) 322 { 323 int i; 324 u32 *ptr; 325 326 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 327 if (!obj->data) 328 return; 329 330 obj->handle = block; 331 332 for (ptr = obj->data, i = 0; i < block->count; i++) 333 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); 334 } 335 336 static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu, 337 struct a6xx_gpu_state *a6xx_state) 338 { 339 int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) + 340 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0); 341 342 if (adreno_is_a650_family(to_adreno_gpu(gpu))) 343 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks); 344 345 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks, 346 sizeof(*a6xx_state->debugbus)); 347 348 if (a6xx_state->debugbus) { 349 int i; 350 351 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) 352 a6xx_get_debugbus_block(gpu, 353 a6xx_state, 354 &a6xx_debugbus_blocks[i], 355 &a6xx_state->debugbus[i]); 356 357 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); 358 359 /* 360 * GBIF has same debugbus as of other GPU blocks, fall back to 361 * default path if GPU uses GBIF, also GBIF uses exactly same 362 * ID as of VBIF. 363 */ 364 if (a6xx_has_gbif(to_adreno_gpu(gpu))) { 365 a6xx_get_debugbus_block(gpu, a6xx_state, 366 &a6xx_gbif_debugbus_block, 367 &a6xx_state->debugbus[i]); 368 369 a6xx_state->nr_debugbus += 1; 370 } 371 372 373 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 374 for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++) 375 a6xx_get_debugbus_block(gpu, 376 a6xx_state, 377 &a650_debugbus_blocks[i], 378 &a6xx_state->debugbus[i]); 379 } 380 } 381 } 382 383 static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu, 384 struct a6xx_gpu_state *a6xx_state) 385 { 386 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 387 int debugbus_blocks_count, total_debugbus_blocks; 388 const u32 *debugbus_blocks; 389 int i; 390 391 if (adreno_is_a730(adreno_gpu)) { 392 debugbus_blocks = gen7_0_0_debugbus_blocks; 393 debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks); 394 } else { 395 BUG_ON(!adreno_is_a740_family(adreno_gpu)); 396 debugbus_blocks = gen7_2_0_debugbus_blocks; 397 debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks); 398 } 399 400 total_debugbus_blocks = debugbus_blocks_count + 401 ARRAY_SIZE(a7xx_gbif_debugbus_blocks); 402 403 a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks, 404 sizeof(*a6xx_state->debugbus)); 405 406 if (a6xx_state->debugbus) { 407 for (i = 0; i < debugbus_blocks_count; i++) { 408 a6xx_get_debugbus_block(gpu, 409 a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]], 410 &a6xx_state->debugbus[i]); 411 } 412 413 for (i = 0; i < ARRAY_SIZE(a7xx_gbif_debugbus_blocks); i++) { 414 a6xx_get_debugbus_block(gpu, 415 a6xx_state, &a7xx_gbif_debugbus_blocks[i], 416 &a6xx_state->debugbus[i + debugbus_blocks_count]); 417 } 418 } 419 420 } 421 422 static void a6xx_get_debugbus(struct msm_gpu *gpu, 423 struct a6xx_gpu_state *a6xx_state) 424 { 425 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 426 struct resource *res; 427 void __iomem *cxdbg = NULL; 428 429 /* Set up the GX debug bus */ 430 431 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, 432 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 433 434 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, 435 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 436 437 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); 438 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); 439 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); 440 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); 441 442 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); 443 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); 444 445 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); 446 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); 447 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); 448 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); 449 450 /* Set up the CX debug bus - it lives elsewhere in the system so do a 451 * temporary ioremap for the registers 452 */ 453 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, 454 "cx_dbgc"); 455 456 if (res) 457 cxdbg = ioremap(res->start, resource_size(res)); 458 459 if (cxdbg) { 460 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, 461 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 462 463 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, 464 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 465 466 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); 467 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); 468 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); 469 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); 470 471 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, 472 0x76543210); 473 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, 474 0xFEDCBA98); 475 476 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); 477 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); 478 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); 479 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); 480 } 481 482 if (adreno_is_a7xx(adreno_gpu)) { 483 a7xx_get_debugbus_blocks(gpu, a6xx_state); 484 } else { 485 a6xx_get_debugbus_blocks(gpu, a6xx_state); 486 } 487 488 /* Dump the VBIF debugbus on applicable targets */ 489 if (!a6xx_has_gbif(adreno_gpu)) { 490 a6xx_state->vbif_debugbus = 491 state_kcalloc(a6xx_state, 1, 492 sizeof(*a6xx_state->vbif_debugbus)); 493 494 if (a6xx_state->vbif_debugbus) 495 a6xx_get_vbif_debugbus_block(gpu, a6xx_state, 496 a6xx_state->vbif_debugbus); 497 } 498 499 if (cxdbg) { 500 unsigned nr_cx_debugbus_blocks; 501 const struct a6xx_debugbus_block *cx_debugbus_blocks; 502 503 if (adreno_is_a7xx(adreno_gpu)) { 504 BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu))); 505 cx_debugbus_blocks = a7xx_cx_debugbus_blocks; 506 nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks); 507 } else { 508 cx_debugbus_blocks = a6xx_cx_debugbus_blocks; 509 nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks); 510 } 511 512 a6xx_state->cx_debugbus = 513 state_kcalloc(a6xx_state, 514 nr_cx_debugbus_blocks, 515 sizeof(*a6xx_state->cx_debugbus)); 516 517 if (a6xx_state->cx_debugbus) { 518 int i; 519 520 for (i = 0; i < nr_cx_debugbus_blocks; i++) 521 a6xx_get_cx_debugbus_block(cxdbg, 522 a6xx_state, 523 &cx_debugbus_blocks[i], 524 &a6xx_state->cx_debugbus[i]); 525 526 a6xx_state->nr_cx_debugbus = 527 nr_cx_debugbus_blocks; 528 } 529 530 iounmap(cxdbg); 531 } 532 } 533 534 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) 535 536 /* Read a data cluster from behind the AHB aperture */ 537 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, 538 struct a6xx_gpu_state *a6xx_state, 539 const struct a6xx_dbgahb_cluster *dbgahb, 540 struct a6xx_gpu_state_obj *obj, 541 struct a6xx_crashdumper *dumper) 542 { 543 u64 *in = dumper->ptr; 544 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 545 size_t datasize; 546 int i, regcount = 0; 547 548 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 549 int j; 550 551 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 552 (dbgahb->statetype + i * 2) << 8); 553 554 for (j = 0; j < dbgahb->count; j += 2) { 555 int count = RANGE(dbgahb->registers, j); 556 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 557 dbgahb->registers[j] - (dbgahb->base >> 2); 558 559 in += CRASHDUMP_READ(in, offset, count, out); 560 561 out += count * sizeof(u32); 562 563 if (i == 0) 564 regcount += count; 565 } 566 } 567 568 CRASHDUMP_FINI(in); 569 570 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 571 572 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 573 return; 574 575 if (a6xx_crashdumper_run(gpu, dumper)) 576 return; 577 578 obj->handle = dbgahb; 579 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 580 datasize); 581 } 582 583 static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu, 584 struct a6xx_gpu_state *a6xx_state, 585 const struct gen7_sptp_cluster_registers *dbgahb, 586 struct a6xx_gpu_state_obj *obj, 587 struct a6xx_crashdumper *dumper) 588 { 589 u64 *in = dumper->ptr; 590 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 591 size_t datasize; 592 int i, regcount = 0; 593 594 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL, 595 A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) | 596 A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) | 597 A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype)); 598 599 for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) { 600 int count = RANGE(dbgahb->regs, i); 601 u32 offset = REG_A7XX_SP_AHB_READ_APERTURE + 602 dbgahb->regs[i] - dbgahb->regbase; 603 604 in += CRASHDUMP_READ(in, offset, count, out); 605 606 out += count * sizeof(u32); 607 regcount += count; 608 } 609 610 CRASHDUMP_FINI(in); 611 612 datasize = regcount * sizeof(u32); 613 614 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 615 return; 616 617 if (a6xx_crashdumper_run(gpu, dumper)) 618 return; 619 620 obj->handle = dbgahb; 621 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 622 datasize); 623 } 624 625 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, 626 struct a6xx_gpu_state *a6xx_state, 627 struct a6xx_crashdumper *dumper) 628 { 629 int i; 630 631 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 632 ARRAY_SIZE(a6xx_dbgahb_clusters), 633 sizeof(*a6xx_state->dbgahb_clusters)); 634 635 if (!a6xx_state->dbgahb_clusters) 636 return; 637 638 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); 639 640 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) 641 a6xx_get_dbgahb_cluster(gpu, a6xx_state, 642 &a6xx_dbgahb_clusters[i], 643 &a6xx_state->dbgahb_clusters[i], dumper); 644 } 645 646 static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu, 647 struct a6xx_gpu_state *a6xx_state, 648 struct a6xx_crashdumper *dumper) 649 { 650 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 651 int i; 652 const struct gen7_sptp_cluster_registers *dbgahb_clusters; 653 unsigned dbgahb_clusters_size; 654 655 if (adreno_is_a730(adreno_gpu)) { 656 dbgahb_clusters = gen7_0_0_sptp_clusters; 657 dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters); 658 } else { 659 BUG_ON(!adreno_is_a740_family(adreno_gpu)); 660 dbgahb_clusters = gen7_2_0_sptp_clusters; 661 dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters); 662 } 663 664 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 665 dbgahb_clusters_size, 666 sizeof(*a6xx_state->dbgahb_clusters)); 667 668 if (!a6xx_state->dbgahb_clusters) 669 return; 670 671 a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size; 672 673 for (i = 0; i < dbgahb_clusters_size; i++) 674 a7xx_get_dbgahb_cluster(gpu, a6xx_state, 675 &dbgahb_clusters[i], 676 &a6xx_state->dbgahb_clusters[i], dumper); 677 } 678 679 /* Read a data cluster from the CP aperture with the crashdumper */ 680 static void a6xx_get_cluster(struct msm_gpu *gpu, 681 struct a6xx_gpu_state *a6xx_state, 682 const struct a6xx_cluster *cluster, 683 struct a6xx_gpu_state_obj *obj, 684 struct a6xx_crashdumper *dumper) 685 { 686 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 687 u64 *in = dumper->ptr; 688 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 689 size_t datasize; 690 int i, regcount = 0; 691 u32 id = cluster->id; 692 693 /* Skip registers that are not present on older generation */ 694 if (!adreno_is_a660_family(adreno_gpu) && 695 cluster->registers == a660_fe_cluster) 696 return; 697 698 if (adreno_is_a650_family(adreno_gpu) && 699 cluster->registers == a6xx_ps_cluster) 700 id = CLUSTER_VPC_PS; 701 702 /* Some clusters need a selector register to be programmed too */ 703 if (cluster->sel_reg) 704 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); 705 706 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 707 int j; 708 709 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, 710 (id << 8) | (i << 4) | i); 711 712 for (j = 0; j < cluster->count; j += 2) { 713 int count = RANGE(cluster->registers, j); 714 715 in += CRASHDUMP_READ(in, cluster->registers[j], 716 count, out); 717 718 out += count * sizeof(u32); 719 720 if (i == 0) 721 regcount += count; 722 } 723 } 724 725 CRASHDUMP_FINI(in); 726 727 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 728 729 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 730 return; 731 732 if (a6xx_crashdumper_run(gpu, dumper)) 733 return; 734 735 obj->handle = cluster; 736 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 737 datasize); 738 } 739 740 static void a7xx_get_cluster(struct msm_gpu *gpu, 741 struct a6xx_gpu_state *a6xx_state, 742 const struct gen7_cluster_registers *cluster, 743 struct a6xx_gpu_state_obj *obj, 744 struct a6xx_crashdumper *dumper) 745 { 746 u64 *in = dumper->ptr; 747 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 748 size_t datasize; 749 int i, regcount = 0; 750 751 /* Some clusters need a selector register to be programmed too */ 752 if (cluster->sel) 753 in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val); 754 755 in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD, 756 A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) | 757 A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) | 758 A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id)); 759 760 for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) { 761 int count = RANGE(cluster->regs, i); 762 763 in += CRASHDUMP_READ(in, cluster->regs[i], 764 count, out); 765 766 out += count * sizeof(u32); 767 regcount += count; 768 } 769 770 CRASHDUMP_FINI(in); 771 772 datasize = regcount * sizeof(u32); 773 774 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 775 return; 776 777 if (a6xx_crashdumper_run(gpu, dumper)) 778 return; 779 780 obj->handle = cluster; 781 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 782 datasize); 783 } 784 785 static void a6xx_get_clusters(struct msm_gpu *gpu, 786 struct a6xx_gpu_state *a6xx_state, 787 struct a6xx_crashdumper *dumper) 788 { 789 int i; 790 791 a6xx_state->clusters = state_kcalloc(a6xx_state, 792 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters)); 793 794 if (!a6xx_state->clusters) 795 return; 796 797 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); 798 799 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) 800 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i], 801 &a6xx_state->clusters[i], dumper); 802 } 803 804 static void a7xx_get_clusters(struct msm_gpu *gpu, 805 struct a6xx_gpu_state *a6xx_state, 806 struct a6xx_crashdumper *dumper) 807 { 808 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 809 int i; 810 const struct gen7_cluster_registers *clusters; 811 unsigned clusters_size; 812 813 if (adreno_is_a730(adreno_gpu)) { 814 clusters = gen7_0_0_clusters; 815 clusters_size = ARRAY_SIZE(gen7_0_0_clusters); 816 } else { 817 BUG_ON(!adreno_is_a740_family(adreno_gpu)); 818 clusters = gen7_2_0_clusters; 819 clusters_size = ARRAY_SIZE(gen7_2_0_clusters); 820 } 821 822 a6xx_state->clusters = state_kcalloc(a6xx_state, 823 clusters_size, sizeof(*a6xx_state->clusters)); 824 825 if (!a6xx_state->clusters) 826 return; 827 828 a6xx_state->nr_clusters = clusters_size; 829 830 for (i = 0; i < clusters_size; i++) 831 a7xx_get_cluster(gpu, a6xx_state, &clusters[i], 832 &a6xx_state->clusters[i], dumper); 833 } 834 835 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */ 836 static void a6xx_get_shader_block(struct msm_gpu *gpu, 837 struct a6xx_gpu_state *a6xx_state, 838 const struct a6xx_shader_block *block, 839 struct a6xx_gpu_state_obj *obj, 840 struct a6xx_crashdumper *dumper) 841 { 842 u64 *in = dumper->ptr; 843 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 844 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); 845 int i; 846 847 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 848 return; 849 850 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 851 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 852 (block->type << 8) | i); 853 854 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, 855 block->size, out); 856 857 out += block->size * sizeof(u32); 858 } 859 860 CRASHDUMP_FINI(in); 861 862 if (a6xx_crashdumper_run(gpu, dumper)) 863 return; 864 865 obj->handle = block; 866 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 867 datasize); 868 } 869 870 static void a7xx_get_shader_block(struct msm_gpu *gpu, 871 struct a6xx_gpu_state *a6xx_state, 872 const struct gen7_shader_block *block, 873 struct a6xx_gpu_state_obj *obj, 874 struct a6xx_crashdumper *dumper) 875 { 876 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 877 u64 *in = dumper->ptr; 878 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 879 size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32); 880 int i, j; 881 882 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 883 return; 884 885 if (adreno_is_a730(adreno_gpu)) { 886 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3); 887 } 888 889 for (i = 0; i < block->num_sps; i++) { 890 for (j = 0; j < block->num_usptps; j++) { 891 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL, 892 A7XX_SP_READ_SEL_LOCATION(block->location) | 893 A7XX_SP_READ_SEL_PIPE(block->pipeid) | 894 A7XX_SP_READ_SEL_STATETYPE(block->statetype) | 895 A7XX_SP_READ_SEL_USPTP(j) | 896 A7XX_SP_READ_SEL_SPTP(i)); 897 898 in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE, 899 block->size, out); 900 901 out += block->size * sizeof(u32); 902 } 903 } 904 905 CRASHDUMP_FINI(in); 906 907 if (a6xx_crashdumper_run(gpu, dumper)) 908 goto out; 909 910 obj->handle = block; 911 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 912 datasize); 913 914 out: 915 if (adreno_is_a730(adreno_gpu)) { 916 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0); 917 } 918 } 919 920 static void a6xx_get_shaders(struct msm_gpu *gpu, 921 struct a6xx_gpu_state *a6xx_state, 922 struct a6xx_crashdumper *dumper) 923 { 924 int i; 925 926 a6xx_state->shaders = state_kcalloc(a6xx_state, 927 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders)); 928 929 if (!a6xx_state->shaders) 930 return; 931 932 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); 933 934 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) 935 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i], 936 &a6xx_state->shaders[i], dumper); 937 } 938 939 static void a7xx_get_shaders(struct msm_gpu *gpu, 940 struct a6xx_gpu_state *a6xx_state, 941 struct a6xx_crashdumper *dumper) 942 { 943 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 944 const struct gen7_shader_block *shader_blocks; 945 unsigned num_shader_blocks; 946 int i; 947 948 if (adreno_is_a730(adreno_gpu)) { 949 shader_blocks = gen7_0_0_shader_blocks; 950 num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks); 951 } else { 952 BUG_ON(!adreno_is_a740_family(adreno_gpu)); 953 shader_blocks = gen7_2_0_shader_blocks; 954 num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks); 955 } 956 957 a6xx_state->shaders = state_kcalloc(a6xx_state, 958 num_shader_blocks, sizeof(*a6xx_state->shaders)); 959 960 if (!a6xx_state->shaders) 961 return; 962 963 a6xx_state->nr_shaders = num_shader_blocks; 964 965 for (i = 0; i < num_shader_blocks; i++) 966 a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i], 967 &a6xx_state->shaders[i], dumper); 968 } 969 970 /* Read registers from behind the HLSQ aperture with the crashdumper */ 971 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, 972 struct a6xx_gpu_state *a6xx_state, 973 const struct a6xx_registers *regs, 974 struct a6xx_gpu_state_obj *obj, 975 struct a6xx_crashdumper *dumper) 976 977 { 978 u64 *in = dumper->ptr; 979 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 980 int i, regcount = 0; 981 982 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); 983 984 for (i = 0; i < regs->count; i += 2) { 985 u32 count = RANGE(regs->registers, i); 986 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 987 regs->registers[i] - (regs->val0 >> 2); 988 989 in += CRASHDUMP_READ(in, offset, count, out); 990 991 out += count * sizeof(u32); 992 regcount += count; 993 } 994 995 CRASHDUMP_FINI(in); 996 997 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 998 return; 999 1000 if (a6xx_crashdumper_run(gpu, dumper)) 1001 return; 1002 1003 obj->handle = regs; 1004 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1005 regcount * sizeof(u32)); 1006 } 1007 1008 /* Read a block of registers using the crashdumper */ 1009 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, 1010 struct a6xx_gpu_state *a6xx_state, 1011 const struct a6xx_registers *regs, 1012 struct a6xx_gpu_state_obj *obj, 1013 struct a6xx_crashdumper *dumper) 1014 1015 { 1016 u64 *in = dumper->ptr; 1017 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 1018 int i, regcount = 0; 1019 1020 /* Skip unsupported registers on older generations */ 1021 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 1022 (regs->registers == a660_registers)) 1023 return; 1024 1025 /* Some blocks might need to program a selector register first */ 1026 if (regs->val0) 1027 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); 1028 1029 for (i = 0; i < regs->count; i += 2) { 1030 u32 count = RANGE(regs->registers, i); 1031 1032 in += CRASHDUMP_READ(in, regs->registers[i], count, out); 1033 1034 out += count * sizeof(u32); 1035 regcount += count; 1036 } 1037 1038 CRASHDUMP_FINI(in); 1039 1040 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1041 return; 1042 1043 if (a6xx_crashdumper_run(gpu, dumper)) 1044 return; 1045 1046 obj->handle = regs; 1047 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1048 regcount * sizeof(u32)); 1049 } 1050 1051 static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu, 1052 struct a6xx_gpu_state *a6xx_state, 1053 const struct gen7_reg_list *regs, 1054 struct a6xx_gpu_state_obj *obj, 1055 struct a6xx_crashdumper *dumper) 1056 1057 { 1058 u64 *in = dumper->ptr; 1059 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 1060 int i, regcount = 0; 1061 1062 /* Some blocks might need to program a selector register first */ 1063 if (regs->sel) 1064 in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val); 1065 1066 for (i = 0; regs->regs[i] != UINT_MAX; i += 2) { 1067 u32 count = RANGE(regs->regs, i); 1068 1069 in += CRASHDUMP_READ(in, regs->regs[i], count, out); 1070 1071 out += count * sizeof(u32); 1072 regcount += count; 1073 } 1074 1075 CRASHDUMP_FINI(in); 1076 1077 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1078 return; 1079 1080 if (a6xx_crashdumper_run(gpu, dumper)) 1081 return; 1082 1083 obj->handle = regs->regs; 1084 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1085 regcount * sizeof(u32)); 1086 } 1087 1088 1089 /* Read a block of registers via AHB */ 1090 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 1091 struct a6xx_gpu_state *a6xx_state, 1092 const struct a6xx_registers *regs, 1093 struct a6xx_gpu_state_obj *obj) 1094 { 1095 int i, regcount = 0, index = 0; 1096 1097 /* Skip unsupported registers on older generations */ 1098 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 1099 (regs->registers == a660_registers)) 1100 return; 1101 1102 for (i = 0; i < regs->count; i += 2) 1103 regcount += RANGE(regs->registers, i); 1104 1105 obj->handle = (const void *) regs; 1106 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1107 if (!obj->data) 1108 return; 1109 1110 for (i = 0; i < regs->count; i += 2) { 1111 u32 count = RANGE(regs->registers, i); 1112 int j; 1113 1114 for (j = 0; j < count; j++) 1115 obj->data[index++] = gpu_read(gpu, 1116 regs->registers[i] + j); 1117 } 1118 } 1119 1120 static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 1121 struct a6xx_gpu_state *a6xx_state, 1122 const u32 *regs, 1123 struct a6xx_gpu_state_obj *obj) 1124 { 1125 int i, regcount = 0, index = 0; 1126 1127 for (i = 0; regs[i] != UINT_MAX; i += 2) 1128 regcount += RANGE(regs, i); 1129 1130 obj->handle = (const void *) regs; 1131 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1132 if (!obj->data) 1133 return; 1134 1135 for (i = 0; regs[i] != UINT_MAX; i += 2) { 1136 u32 count = RANGE(regs, i); 1137 int j; 1138 1139 for (j = 0; j < count; j++) 1140 obj->data[index++] = gpu_read(gpu, regs[i] + j); 1141 } 1142 } 1143 1144 static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu, 1145 struct a6xx_gpu_state *a6xx_state, 1146 const struct gen7_reg_list *regs, 1147 struct a6xx_gpu_state_obj *obj) 1148 { 1149 if (regs->sel) 1150 gpu_write(gpu, regs->sel->host_reg, regs->sel->val); 1151 1152 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj); 1153 } 1154 1155 /* Read a block of GMU registers */ 1156 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, 1157 struct a6xx_gpu_state *a6xx_state, 1158 const struct a6xx_registers *regs, 1159 struct a6xx_gpu_state_obj *obj, 1160 bool rscc) 1161 { 1162 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1163 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1164 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1165 int i, regcount = 0, index = 0; 1166 1167 for (i = 0; i < regs->count; i += 2) 1168 regcount += RANGE(regs->registers, i); 1169 1170 obj->handle = (const void *) regs; 1171 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1172 if (!obj->data) 1173 return; 1174 1175 for (i = 0; i < regs->count; i += 2) { 1176 u32 count = RANGE(regs->registers, i); 1177 int j; 1178 1179 for (j = 0; j < count; j++) { 1180 u32 offset = regs->registers[i] + j; 1181 u32 val; 1182 1183 if (rscc) 1184 val = gmu_read_rscc(gmu, offset); 1185 else 1186 val = gmu_read(gmu, offset); 1187 1188 obj->data[index++] = val; 1189 } 1190 } 1191 } 1192 1193 static void a6xx_get_gmu_registers(struct msm_gpu *gpu, 1194 struct a6xx_gpu_state *a6xx_state) 1195 { 1196 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1197 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1198 1199 a6xx_state->gmu_registers = state_kcalloc(a6xx_state, 1200 3, sizeof(*a6xx_state->gmu_registers)); 1201 1202 if (!a6xx_state->gmu_registers) 1203 return; 1204 1205 a6xx_state->nr_gmu_registers = 3; 1206 1207 /* Get the CX GMU registers from AHB */ 1208 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], 1209 &a6xx_state->gmu_registers[0], false); 1210 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1], 1211 &a6xx_state->gmu_registers[1], true); 1212 1213 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 1214 return; 1215 1216 /* Set the fence to ALLOW mode so we can access the registers */ 1217 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); 1218 1219 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2], 1220 &a6xx_state->gmu_registers[2], false); 1221 } 1222 1223 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo( 1224 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo) 1225 { 1226 struct msm_gpu_state_bo *snapshot; 1227 1228 if (!bo->size) 1229 return NULL; 1230 1231 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot)); 1232 if (!snapshot) 1233 return NULL; 1234 1235 snapshot->iova = bo->iova; 1236 snapshot->size = bo->size; 1237 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL); 1238 if (!snapshot->data) 1239 return NULL; 1240 1241 memcpy(snapshot->data, bo->virt, bo->size); 1242 1243 return snapshot; 1244 } 1245 1246 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu, 1247 struct a6xx_gpu_state *a6xx_state) 1248 { 1249 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1250 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1251 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1252 unsigned i, j; 1253 1254 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history)); 1255 1256 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) { 1257 struct a6xx_hfi_queue *queue = &gmu->queues[i]; 1258 for (j = 0; j < HFI_HISTORY_SZ; j++) { 1259 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ; 1260 a6xx_state->hfi_queue_history[i][j] = queue->history[idx]; 1261 } 1262 } 1263 } 1264 1265 #define A6XX_REGLIST_SIZE 1 1266 #define A6XX_GBIF_REGLIST_SIZE 1 1267 static void a6xx_get_registers(struct msm_gpu *gpu, 1268 struct a6xx_gpu_state *a6xx_state, 1269 struct a6xx_crashdumper *dumper) 1270 { 1271 int i, count = A6XX_REGLIST_SIZE + 1272 ARRAY_SIZE(a6xx_reglist) + 1273 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE; 1274 int index = 0; 1275 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1276 1277 a6xx_state->registers = state_kcalloc(a6xx_state, 1278 count, sizeof(*a6xx_state->registers)); 1279 1280 if (!a6xx_state->registers) 1281 return; 1282 1283 a6xx_state->nr_registers = count; 1284 1285 a6xx_get_ahb_gpu_registers(gpu, 1286 a6xx_state, &a6xx_ahb_reglist, 1287 &a6xx_state->registers[index++]); 1288 1289 if (a6xx_has_gbif(adreno_gpu)) 1290 a6xx_get_ahb_gpu_registers(gpu, 1291 a6xx_state, &a6xx_gbif_reglist, 1292 &a6xx_state->registers[index++]); 1293 else 1294 a6xx_get_ahb_gpu_registers(gpu, 1295 a6xx_state, &a6xx_vbif_reglist, 1296 &a6xx_state->registers[index++]); 1297 if (!dumper) { 1298 /* 1299 * We can't use the crashdumper when the SMMU is stalled, 1300 * because the GPU has no memory access until we resume 1301 * translation (but we don't want to do that until after 1302 * we have captured as much useful GPU state as possible). 1303 * So instead collect registers via the CPU: 1304 */ 1305 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 1306 a6xx_get_ahb_gpu_registers(gpu, 1307 a6xx_state, &a6xx_reglist[i], 1308 &a6xx_state->registers[index++]); 1309 return; 1310 } 1311 1312 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 1313 a6xx_get_crashdumper_registers(gpu, 1314 a6xx_state, &a6xx_reglist[i], 1315 &a6xx_state->registers[index++], 1316 dumper); 1317 1318 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) 1319 a6xx_get_crashdumper_hlsq_registers(gpu, 1320 a6xx_state, &a6xx_hlsq_reglist[i], 1321 &a6xx_state->registers[index++], 1322 dumper); 1323 } 1324 1325 #define A7XX_PRE_CRASHDUMPER_SIZE 1 1326 #define A7XX_POST_CRASHDUMPER_SIZE 1 1327 static void a7xx_get_registers(struct msm_gpu *gpu, 1328 struct a6xx_gpu_state *a6xx_state, 1329 struct a6xx_crashdumper *dumper) 1330 { 1331 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1332 int i, count; 1333 int index = 0; 1334 const u32 *pre_crashdumper_regs; 1335 const struct gen7_reg_list *reglist; 1336 1337 if (adreno_is_a730(adreno_gpu)) { 1338 reglist = gen7_0_0_reg_list; 1339 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers; 1340 } else { 1341 BUG_ON(!adreno_is_a740_family(adreno_gpu)); 1342 reglist = gen7_2_0_reg_list; 1343 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers; 1344 } 1345 1346 count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE; 1347 1348 /* The downstream reglist contains registers in other memory regions 1349 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their 1350 * offsets and map them to read them on the CPU. For now only read the 1351 * first region which is the main one. 1352 */ 1353 if (dumper) { 1354 for (i = 0; reglist[i].regs; i++) 1355 count++; 1356 } else { 1357 count++; 1358 } 1359 1360 a6xx_state->registers = state_kcalloc(a6xx_state, 1361 count, sizeof(*a6xx_state->registers)); 1362 1363 if (!a6xx_state->registers) 1364 return; 1365 1366 a6xx_state->nr_registers = count; 1367 1368 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs, 1369 &a6xx_state->registers[index++]); 1370 1371 if (!dumper) { 1372 a7xx_get_ahb_gpu_reglist(gpu, 1373 a6xx_state, ®list[0], 1374 &a6xx_state->registers[index++]); 1375 return; 1376 } 1377 1378 for (i = 0; reglist[i].regs; i++) 1379 a7xx_get_crashdumper_registers(gpu, 1380 a6xx_state, ®list[i], 1381 &a6xx_state->registers[index++], 1382 dumper); 1383 } 1384 1385 static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu, 1386 struct a6xx_gpu_state *a6xx_state) 1387 { 1388 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1389 const u32 *regs; 1390 1391 BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu))); 1392 regs = gen7_0_0_post_crashdumper_registers; 1393 1394 a7xx_get_ahb_gpu_registers(gpu, 1395 a6xx_state, regs, 1396 &a6xx_state->registers[a6xx_state->nr_registers - 1]); 1397 } 1398 1399 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu) 1400 { 1401 /* The value at [16:31] is in 4dword units. Convert it to dwords */ 1402 return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14; 1403 } 1404 1405 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu) 1406 { 1407 /* 1408 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units. 1409 * That register however is not directly accessible from APSS on A7xx. 1410 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value. 1411 */ 1412 gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3); 1413 1414 return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20); 1415 } 1416 1417 /* Read a block of data from an indexed register pair */ 1418 static void a6xx_get_indexed_regs(struct msm_gpu *gpu, 1419 struct a6xx_gpu_state *a6xx_state, 1420 struct a6xx_indexed_registers *indexed, 1421 struct a6xx_gpu_state_obj *obj) 1422 { 1423 int i; 1424 1425 obj->handle = (const void *) indexed; 1426 if (indexed->count_fn) 1427 indexed->count = indexed->count_fn(gpu); 1428 1429 obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32)); 1430 if (!obj->data) 1431 return; 1432 1433 /* All the indexed banks start at address 0 */ 1434 gpu_write(gpu, indexed->addr, 0); 1435 1436 /* Read the data - each read increments the internal address by 1 */ 1437 for (i = 0; i < indexed->count; i++) 1438 obj->data[i] = gpu_read(gpu, indexed->data); 1439 } 1440 1441 static void a6xx_get_indexed_registers(struct msm_gpu *gpu, 1442 struct a6xx_gpu_state *a6xx_state) 1443 { 1444 u32 mempool_size; 1445 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; 1446 int i; 1447 1448 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count, 1449 sizeof(*a6xx_state->indexed_regs)); 1450 if (!a6xx_state->indexed_regs) 1451 return; 1452 1453 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) 1454 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i], 1455 &a6xx_state->indexed_regs[i]); 1456 1457 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 1458 u32 val; 1459 1460 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG); 1461 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4); 1462 1463 /* Get the contents of the CP mempool */ 1464 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1465 &a6xx_state->indexed_regs[i]); 1466 1467 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val); 1468 a6xx_state->nr_indexed_regs = count; 1469 return; 1470 } 1471 1472 /* Set the CP mempool size to 0 to stabilize it while dumping */ 1473 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); 1474 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); 1475 1476 /* Get the contents of the CP mempool */ 1477 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1478 &a6xx_state->indexed_regs[i]); 1479 1480 /* 1481 * Offset 0x2000 in the mempool is the size - copy the saved size over 1482 * so the data is consistent 1483 */ 1484 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; 1485 1486 /* Restore the size in the hardware */ 1487 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); 1488 } 1489 1490 static void a7xx_get_indexed_registers(struct msm_gpu *gpu, 1491 struct a6xx_gpu_state *a6xx_state) 1492 { 1493 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1494 int i, indexed_count, mempool_count; 1495 1496 BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu))); 1497 indexed_count = ARRAY_SIZE(a7xx_indexed_reglist); 1498 mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed); 1499 1500 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, 1501 indexed_count + mempool_count, 1502 sizeof(*a6xx_state->indexed_regs)); 1503 if (!a6xx_state->indexed_regs) 1504 return; 1505 1506 a6xx_state->nr_indexed_regs = indexed_count + mempool_count; 1507 1508 /* First read the common regs */ 1509 for (i = 0; i < indexed_count; i++) 1510 a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_indexed_reglist[i], 1511 &a6xx_state->indexed_regs[i]); 1512 1513 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2)); 1514 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2)); 1515 1516 /* Get the contents of the CP_BV mempool */ 1517 for (i = 0; i < mempool_count; i++) 1518 a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i], 1519 &a6xx_state->indexed_regs[indexed_count + i]); 1520 1521 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0); 1522 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0); 1523 return; 1524 } 1525 1526 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) 1527 { 1528 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL; 1529 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1530 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1531 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), 1532 GFP_KERNEL); 1533 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & 1534 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); 1535 1536 if (!a6xx_state) 1537 return ERR_PTR(-ENOMEM); 1538 1539 INIT_LIST_HEAD(&a6xx_state->objs); 1540 1541 /* Get the generic state from the adreno core */ 1542 adreno_gpu_state_get(gpu, &a6xx_state->base); 1543 1544 if (!adreno_has_gmu_wrapper(adreno_gpu)) { 1545 a6xx_get_gmu_registers(gpu, a6xx_state); 1546 1547 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log); 1548 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi); 1549 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug); 1550 1551 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state); 1552 } 1553 1554 /* If GX isn't on the rest of the data isn't going to be accessible */ 1555 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 1556 return &a6xx_state->base; 1557 1558 /* Get the banks of indexed registers */ 1559 if (adreno_is_a7xx(adreno_gpu)) 1560 a7xx_get_indexed_registers(gpu, a6xx_state); 1561 else 1562 a6xx_get_indexed_registers(gpu, a6xx_state); 1563 1564 /* 1565 * Try to initialize the crashdumper, if we are not dumping state 1566 * with the SMMU stalled. The crashdumper needs memory access to 1567 * write out GPU state, so we need to skip this when the SMMU is 1568 * stalled in response to an iova fault 1569 */ 1570 if (!stalled && !gpu->needs_hw_init && 1571 !a6xx_crashdumper_init(gpu, &_dumper)) { 1572 dumper = &_dumper; 1573 } 1574 1575 if (adreno_is_a7xx(adreno_gpu)) { 1576 a7xx_get_registers(gpu, a6xx_state, dumper); 1577 1578 if (dumper) { 1579 a7xx_get_shaders(gpu, a6xx_state, dumper); 1580 a7xx_get_clusters(gpu, a6xx_state, dumper); 1581 a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); 1582 1583 msm_gem_kernel_put(dumper->bo, gpu->aspace); 1584 } 1585 1586 a7xx_get_post_crashdumper_registers(gpu, a6xx_state); 1587 } else { 1588 a6xx_get_registers(gpu, a6xx_state, dumper); 1589 1590 if (dumper) { 1591 a6xx_get_shaders(gpu, a6xx_state, dumper); 1592 a6xx_get_clusters(gpu, a6xx_state, dumper); 1593 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); 1594 1595 msm_gem_kernel_put(dumper->bo, gpu->aspace); 1596 } 1597 } 1598 1599 if (snapshot_debugbus) 1600 a6xx_get_debugbus(gpu, a6xx_state); 1601 1602 a6xx_state->gpu_initialized = !gpu->needs_hw_init; 1603 1604 return &a6xx_state->base; 1605 } 1606 1607 static void a6xx_gpu_state_destroy(struct kref *kref) 1608 { 1609 struct a6xx_state_memobj *obj, *tmp; 1610 struct msm_gpu_state *state = container_of(kref, 1611 struct msm_gpu_state, ref); 1612 struct a6xx_gpu_state *a6xx_state = container_of(state, 1613 struct a6xx_gpu_state, base); 1614 1615 if (a6xx_state->gmu_log) 1616 kvfree(a6xx_state->gmu_log->data); 1617 1618 if (a6xx_state->gmu_hfi) 1619 kvfree(a6xx_state->gmu_hfi->data); 1620 1621 if (a6xx_state->gmu_debug) 1622 kvfree(a6xx_state->gmu_debug->data); 1623 1624 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) { 1625 list_del(&obj->node); 1626 kvfree(obj); 1627 } 1628 1629 adreno_gpu_state_destroy(state); 1630 kfree(a6xx_state); 1631 } 1632 1633 int a6xx_gpu_state_put(struct msm_gpu_state *state) 1634 { 1635 if (IS_ERR_OR_NULL(state)) 1636 return 1; 1637 1638 return kref_put(&state->ref, a6xx_gpu_state_destroy); 1639 } 1640 1641 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, 1642 struct drm_printer *p) 1643 { 1644 int i, index = 0; 1645 1646 if (!data) 1647 return; 1648 1649 for (i = 0; i < count; i += 2) { 1650 u32 count = RANGE(registers, i); 1651 u32 offset = registers[i]; 1652 int j; 1653 1654 for (j = 0; j < count; index++, offset++, j++) { 1655 if (data[index] == 0xdeafbead) 1656 continue; 1657 1658 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1659 offset << 2, data[index]); 1660 } 1661 } 1662 } 1663 1664 static void a7xx_show_registers_indented(const u32 *registers, u32 *data, 1665 struct drm_printer *p, unsigned indent) 1666 { 1667 int i, index = 0; 1668 1669 for (i = 0; registers[i] != UINT_MAX; i += 2) { 1670 u32 count = RANGE(registers, i); 1671 u32 offset = registers[i]; 1672 int j; 1673 1674 for (j = 0; j < count; index++, offset++, j++) { 1675 int k; 1676 1677 if (data[index] == 0xdeafbead) 1678 continue; 1679 1680 for (k = 0; k < indent; k++) 1681 drm_printf(p, " "); 1682 drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n", 1683 offset << 2, data[index]); 1684 } 1685 } 1686 } 1687 1688 static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p) 1689 { 1690 a7xx_show_registers_indented(registers, data, p, 1); 1691 } 1692 1693 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) 1694 { 1695 char out[ASCII85_BUFSZ]; 1696 long i, l, datalen = 0; 1697 1698 for (i = 0; i < len >> 2; i++) { 1699 if (data[i]) 1700 datalen = (i + 1) << 2; 1701 } 1702 1703 if (datalen == 0) 1704 return; 1705 1706 drm_puts(p, " data: !!ascii85 |\n"); 1707 drm_puts(p, " "); 1708 1709 1710 l = ascii85_encode_len(datalen); 1711 1712 for (i = 0; i < l; i++) 1713 drm_puts(p, ascii85_encode(data[i], out)); 1714 1715 drm_puts(p, "\n"); 1716 } 1717 1718 static void print_name(struct drm_printer *p, const char *fmt, const char *name) 1719 { 1720 drm_puts(p, fmt); 1721 drm_puts(p, name); 1722 drm_puts(p, "\n"); 1723 } 1724 1725 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, 1726 struct drm_printer *p) 1727 { 1728 const struct a6xx_shader_block *block = obj->handle; 1729 int i; 1730 1731 if (!obj->handle) 1732 return; 1733 1734 print_name(p, " - type: ", block->name); 1735 1736 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 1737 drm_printf(p, " - bank: %d\n", i); 1738 drm_printf(p, " size: %d\n", block->size); 1739 1740 if (!obj->data) 1741 continue; 1742 1743 print_ascii85(p, block->size << 2, 1744 obj->data + (block->size * i)); 1745 } 1746 } 1747 1748 static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj, 1749 struct drm_printer *p) 1750 { 1751 const struct gen7_shader_block *block = obj->handle; 1752 int i, j; 1753 u32 *data = obj->data; 1754 1755 if (!obj->handle) 1756 return; 1757 1758 print_name(p, " - type: ", a7xx_statetype_names[block->statetype]); 1759 print_name(p, " - pipe: ", a7xx_pipe_names[block->pipeid]); 1760 1761 for (i = 0; i < block->num_sps; i++) { 1762 drm_printf(p, " - sp: %d\n", i); 1763 1764 for (j = 0; j < block->num_usptps; j++) { 1765 drm_printf(p, " - usptp: %d\n", j); 1766 drm_printf(p, " size: %d\n", block->size); 1767 1768 if (!obj->data) 1769 continue; 1770 1771 print_ascii85(p, block->size << 2, data); 1772 1773 data += block->size; 1774 } 1775 } 1776 } 1777 1778 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, 1779 struct drm_printer *p) 1780 { 1781 int ctx, index = 0; 1782 1783 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { 1784 int j; 1785 1786 drm_printf(p, " - context: %d\n", ctx); 1787 1788 for (j = 0; j < size; j += 2) { 1789 u32 count = RANGE(registers, j); 1790 u32 offset = registers[j]; 1791 int k; 1792 1793 for (k = 0; k < count; index++, offset++, k++) { 1794 if (data[index] == 0xdeafbead) 1795 continue; 1796 1797 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1798 offset << 2, data[index]); 1799 } 1800 } 1801 } 1802 } 1803 1804 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1805 struct drm_printer *p) 1806 { 1807 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; 1808 1809 if (dbgahb) { 1810 print_name(p, " - cluster-name: ", dbgahb->name); 1811 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, 1812 obj->data, p); 1813 } 1814 } 1815 1816 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1817 struct drm_printer *p) 1818 { 1819 const struct a6xx_cluster *cluster = obj->handle; 1820 1821 if (cluster) { 1822 print_name(p, " - cluster-name: ", cluster->name); 1823 a6xx_show_cluster_data(cluster->registers, cluster->count, 1824 obj->data, p); 1825 } 1826 } 1827 1828 static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1829 struct drm_printer *p) 1830 { 1831 const struct gen7_sptp_cluster_registers *dbgahb = obj->handle; 1832 1833 if (dbgahb) { 1834 print_name(p, " - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]); 1835 print_name(p, " - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]); 1836 drm_printf(p, " - context: %d\n", dbgahb->context_id); 1837 a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4); 1838 } 1839 } 1840 1841 static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1842 struct drm_printer *p) 1843 { 1844 const struct gen7_cluster_registers *cluster = obj->handle; 1845 1846 if (cluster) { 1847 int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0; 1848 1849 print_name(p, " - pipe: ", a7xx_pipe_names[cluster->pipe_id]); 1850 print_name(p, " - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]); 1851 drm_printf(p, " - context: %d\n", context); 1852 a7xx_show_registers_indented(cluster->regs, obj->data, p, 4); 1853 } 1854 } 1855 1856 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, 1857 struct drm_printer *p) 1858 { 1859 const struct a6xx_indexed_registers *indexed = obj->handle; 1860 1861 if (!indexed) 1862 return; 1863 1864 print_name(p, " - regs-name: ", indexed->name); 1865 drm_printf(p, " dwords: %d\n", indexed->count); 1866 1867 print_ascii85(p, indexed->count << 2, obj->data); 1868 } 1869 1870 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, 1871 u32 *data, struct drm_printer *p) 1872 { 1873 if (block) { 1874 print_name(p, " - debugbus-block: ", block->name); 1875 1876 /* 1877 * count for regular debugbus data is in quadwords, 1878 * but print the size in dwords for consistency 1879 */ 1880 drm_printf(p, " count: %d\n", block->count << 1); 1881 1882 print_ascii85(p, block->count << 3, data); 1883 } 1884 } 1885 1886 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, 1887 struct drm_printer *p) 1888 { 1889 int i; 1890 1891 for (i = 0; i < a6xx_state->nr_debugbus; i++) { 1892 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; 1893 1894 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1895 } 1896 1897 if (a6xx_state->vbif_debugbus) { 1898 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; 1899 1900 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); 1901 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); 1902 1903 /* vbif debugbus data is in dwords. Confusing, huh? */ 1904 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); 1905 } 1906 1907 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { 1908 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; 1909 1910 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1911 } 1912 } 1913 1914 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1915 struct drm_printer *p) 1916 { 1917 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1918 struct a6xx_gpu_state *a6xx_state = container_of(state, 1919 struct a6xx_gpu_state, base); 1920 int i; 1921 1922 if (IS_ERR_OR_NULL(state)) 1923 return; 1924 1925 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized); 1926 1927 adreno_show(gpu, state, p); 1928 1929 drm_puts(p, "gmu-log:\n"); 1930 if (a6xx_state->gmu_log) { 1931 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log; 1932 1933 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova); 1934 drm_printf(p, " size: %zu\n", gmu_log->size); 1935 adreno_show_object(p, &gmu_log->data, gmu_log->size, 1936 &gmu_log->encoded); 1937 } 1938 1939 drm_puts(p, "gmu-hfi:\n"); 1940 if (a6xx_state->gmu_hfi) { 1941 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi; 1942 unsigned i, j; 1943 1944 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova); 1945 drm_printf(p, " size: %zu\n", gmu_hfi->size); 1946 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) { 1947 drm_printf(p, " queue-history[%u]:", i); 1948 for (j = 0; j < HFI_HISTORY_SZ; j++) { 1949 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]); 1950 } 1951 drm_printf(p, "\n"); 1952 } 1953 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size, 1954 &gmu_hfi->encoded); 1955 } 1956 1957 drm_puts(p, "gmu-debug:\n"); 1958 if (a6xx_state->gmu_debug) { 1959 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug; 1960 1961 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova); 1962 drm_printf(p, " size: %zu\n", gmu_debug->size); 1963 adreno_show_object(p, &gmu_debug->data, gmu_debug->size, 1964 &gmu_debug->encoded); 1965 } 1966 1967 drm_puts(p, "registers:\n"); 1968 for (i = 0; i < a6xx_state->nr_registers; i++) { 1969 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; 1970 1971 if (!obj->handle) 1972 continue; 1973 1974 if (adreno_is_a7xx(adreno_gpu)) { 1975 a7xx_show_registers(obj->handle, obj->data, p); 1976 } else { 1977 const struct a6xx_registers *regs = obj->handle; 1978 1979 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1980 } 1981 } 1982 1983 drm_puts(p, "registers-gmu:\n"); 1984 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { 1985 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; 1986 const struct a6xx_registers *regs = obj->handle; 1987 1988 if (!obj->handle) 1989 continue; 1990 1991 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1992 } 1993 1994 drm_puts(p, "indexed-registers:\n"); 1995 for (i = 0; i < a6xx_state->nr_indexed_regs; i++) 1996 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); 1997 1998 drm_puts(p, "shader-blocks:\n"); 1999 for (i = 0; i < a6xx_state->nr_shaders; i++) { 2000 if (adreno_is_a7xx(adreno_gpu)) 2001 a7xx_show_shader(&a6xx_state->shaders[i], p); 2002 else 2003 a6xx_show_shader(&a6xx_state->shaders[i], p); 2004 } 2005 2006 drm_puts(p, "clusters:\n"); 2007 for (i = 0; i < a6xx_state->nr_clusters; i++) { 2008 if (adreno_is_a7xx(adreno_gpu)) 2009 a7xx_show_cluster(&a6xx_state->clusters[i], p); 2010 else 2011 a6xx_show_cluster(&a6xx_state->clusters[i], p); 2012 } 2013 2014 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) { 2015 if (adreno_is_a7xx(adreno_gpu)) 2016 a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 2017 else 2018 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 2019 } 2020 2021 drm_puts(p, "debugbus:\n"); 2022 a6xx_show_debugbus(a6xx_state, p); 2023 } 2024