1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ 3 4 #include <linux/ascii85.h> 5 #include "msm_gem.h" 6 #include "a6xx_gpu.h" 7 #include "a6xx_gmu.h" 8 #include "a6xx_gpu_state.h" 9 #include "a6xx_gmu.xml.h" 10 11 /* Ignore diagnostics about register tables that we aren't using yet. We don't 12 * want to modify these headers too much from their original source. 13 */ 14 #pragma GCC diagnostic push 15 #pragma GCC diagnostic ignored "-Wunused-variable" 16 #pragma GCC diagnostic ignored "-Wunused-const-variable" 17 18 #include "adreno_gen7_0_0_snapshot.h" 19 #include "adreno_gen7_2_0_snapshot.h" 20 #include "adreno_gen7_9_0_snapshot.h" 21 22 #pragma GCC diagnostic pop 23 24 struct a6xx_gpu_state_obj { 25 const void *handle; 26 u32 *data; 27 u32 count; /* optional, used when count potentially read from hw */ 28 }; 29 30 struct a6xx_gpu_state { 31 struct msm_gpu_state base; 32 33 struct a6xx_gpu_state_obj *gmu_registers; 34 int nr_gmu_registers; 35 36 struct a6xx_gpu_state_obj *registers; 37 int nr_registers; 38 39 struct a6xx_gpu_state_obj *shaders; 40 int nr_shaders; 41 42 struct a6xx_gpu_state_obj *clusters; 43 int nr_clusters; 44 45 struct a6xx_gpu_state_obj *dbgahb_clusters; 46 int nr_dbgahb_clusters; 47 48 struct a6xx_gpu_state_obj *indexed_regs; 49 int nr_indexed_regs; 50 51 struct a6xx_gpu_state_obj *debugbus; 52 int nr_debugbus; 53 54 struct a6xx_gpu_state_obj *vbif_debugbus; 55 56 struct a6xx_gpu_state_obj *cx_debugbus; 57 int nr_cx_debugbus; 58 59 struct msm_gpu_state_bo *gmu_log; 60 struct msm_gpu_state_bo *gmu_hfi; 61 struct msm_gpu_state_bo *gmu_debug; 62 63 s32 hfi_queue_history[2][HFI_HISTORY_SZ]; 64 65 struct list_head objs; 66 67 bool gpu_initialized; 68 }; 69 70 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) 71 { 72 in[0] = val; 73 in[1] = (((u64) reg) << 44 | (1 << 21) | 1); 74 75 return 2; 76 } 77 78 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) 79 { 80 in[0] = target; 81 in[1] = (((u64) reg) << 44 | dwords); 82 83 return 2; 84 } 85 86 static inline int CRASHDUMP_FINI(u64 *in) 87 { 88 in[0] = 0; 89 in[1] = 0; 90 91 return 2; 92 } 93 94 struct a6xx_crashdumper { 95 void *ptr; 96 struct drm_gem_object *bo; 97 u64 iova; 98 }; 99 100 struct a6xx_state_memobj { 101 struct list_head node; 102 unsigned long long data[]; 103 }; 104 105 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize) 106 { 107 struct a6xx_state_memobj *obj = 108 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); 109 110 if (!obj) 111 return NULL; 112 113 list_add_tail(&obj->node, &a6xx_state->objs); 114 return &obj->data; 115 } 116 117 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, 118 size_t size) 119 { 120 void *dst = state_kcalloc(a6xx_state, 1, size); 121 122 if (dst) 123 memcpy(dst, src, size); 124 return dst; 125 } 126 127 /* 128 * Allocate 1MB for the crashdumper scratch region - 8k for the script and 129 * the rest for the data 130 */ 131 #define A6XX_CD_DATA_OFFSET 8192 132 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192) 133 134 static int a6xx_crashdumper_init(struct msm_gpu *gpu, 135 struct a6xx_crashdumper *dumper) 136 { 137 dumper->ptr = msm_gem_kernel_new(gpu->dev, 138 SZ_1M, MSM_BO_WC, gpu->aspace, 139 &dumper->bo, &dumper->iova); 140 141 if (!IS_ERR(dumper->ptr)) 142 msm_gem_object_set_name(dumper->bo, "crashdump"); 143 144 return PTR_ERR_OR_ZERO(dumper->ptr); 145 } 146 147 static int a6xx_crashdumper_run(struct msm_gpu *gpu, 148 struct a6xx_crashdumper *dumper) 149 { 150 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 151 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 152 u32 val; 153 int ret; 154 155 if (IS_ERR_OR_NULL(dumper->ptr)) 156 return -EINVAL; 157 158 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) 159 return -EINVAL; 160 161 /* Make sure all pending memory writes are posted */ 162 wmb(); 163 164 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova); 165 166 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); 167 168 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, 169 val & 0x02, 100, 10000); 170 171 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); 172 173 return ret; 174 } 175 176 /* read a value from the GX debug bus */ 177 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, 178 u32 *data) 179 { 180 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | 181 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); 182 183 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); 184 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); 185 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); 186 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); 187 188 /* Wait 1 us to make sure the data is flowing */ 189 udelay(1); 190 191 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); 192 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); 193 194 return 2; 195 } 196 197 #define cxdbg_write(ptr, offset, val) \ 198 writel((val), (ptr) + ((offset) << 2)) 199 200 #define cxdbg_read(ptr, offset) \ 201 readl((ptr) + ((offset) << 2)) 202 203 /* read a value from the CX debug bus */ 204 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset, 205 u32 *data) 206 { 207 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | 208 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); 209 210 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); 211 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); 212 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); 213 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); 214 215 /* Wait 1 us to make sure the data is flowing */ 216 udelay(1); 217 218 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); 219 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); 220 221 return 2; 222 } 223 224 /* Read a chunk of data from the VBIF debug bus */ 225 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, 226 u32 reg, int count, u32 *data) 227 { 228 int i; 229 230 gpu_write(gpu, ctrl0, reg); 231 232 for (i = 0; i < count; i++) { 233 gpu_write(gpu, ctrl1, i); 234 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); 235 } 236 237 return count; 238 } 239 240 #define AXI_ARB_BLOCKS 2 241 #define XIN_AXI_BLOCKS 5 242 #define XIN_CORE_BLOCKS 4 243 244 #define VBIF_DEBUGBUS_BLOCK_SIZE \ 245 ((16 * AXI_ARB_BLOCKS) + \ 246 (18 * XIN_AXI_BLOCKS) + \ 247 (12 * XIN_CORE_BLOCKS)) 248 249 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, 250 struct a6xx_gpu_state *a6xx_state, 251 struct a6xx_gpu_state_obj *obj) 252 { 253 u32 clk, *ptr; 254 int i; 255 256 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE, 257 sizeof(u32)); 258 if (!obj->data) 259 return; 260 261 obj->handle = NULL; 262 263 /* Get the current clock setting */ 264 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); 265 266 /* Force on the bus so we can read it */ 267 gpu_write(gpu, REG_A6XX_VBIF_CLKON, 268 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); 269 270 /* We will read from BUS2 first, so disable BUS1 */ 271 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); 272 273 /* Enable the VBIF bus for reading */ 274 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); 275 276 ptr = obj->data; 277 278 for (i = 0; i < AXI_ARB_BLOCKS; i++) 279 ptr += vbif_debugbus_read(gpu, 280 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 281 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 282 1 << (i + 16), 16, ptr); 283 284 for (i = 0; i < XIN_AXI_BLOCKS; i++) 285 ptr += vbif_debugbus_read(gpu, 286 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 287 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 288 1 << i, 18, ptr); 289 290 /* Stop BUS2 so we can turn on BUS1 */ 291 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); 292 293 for (i = 0; i < XIN_CORE_BLOCKS; i++) 294 ptr += vbif_debugbus_read(gpu, 295 REG_A6XX_VBIF_TEST_BUS1_CTRL0, 296 REG_A6XX_VBIF_TEST_BUS1_CTRL1, 297 1 << i, 12, ptr); 298 299 /* Restore the VBIF clock setting */ 300 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); 301 } 302 303 static void a6xx_get_debugbus_block(struct msm_gpu *gpu, 304 struct a6xx_gpu_state *a6xx_state, 305 const struct a6xx_debugbus_block *block, 306 struct a6xx_gpu_state_obj *obj) 307 { 308 int i; 309 u32 *ptr; 310 311 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 312 if (!obj->data) 313 return; 314 315 obj->handle = block; 316 317 for (ptr = obj->data, i = 0; i < block->count; i++) 318 ptr += debugbus_read(gpu, block->id, i, ptr); 319 } 320 321 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, 322 struct a6xx_gpu_state *a6xx_state, 323 const struct a6xx_debugbus_block *block, 324 struct a6xx_gpu_state_obj *obj) 325 { 326 int i; 327 u32 *ptr; 328 329 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 330 if (!obj->data) 331 return; 332 333 obj->handle = block; 334 335 for (ptr = obj->data, i = 0; i < block->count; i++) 336 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); 337 } 338 339 static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu, 340 struct a6xx_gpu_state *a6xx_state) 341 { 342 int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) + 343 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0); 344 345 if (adreno_is_a650_family(to_adreno_gpu(gpu))) 346 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks); 347 348 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks, 349 sizeof(*a6xx_state->debugbus)); 350 351 if (a6xx_state->debugbus) { 352 int i; 353 354 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) 355 a6xx_get_debugbus_block(gpu, 356 a6xx_state, 357 &a6xx_debugbus_blocks[i], 358 &a6xx_state->debugbus[i]); 359 360 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); 361 362 /* 363 * GBIF has same debugbus as of other GPU blocks, fall back to 364 * default path if GPU uses GBIF, also GBIF uses exactly same 365 * ID as of VBIF. 366 */ 367 if (a6xx_has_gbif(to_adreno_gpu(gpu))) { 368 a6xx_get_debugbus_block(gpu, a6xx_state, 369 &a6xx_gbif_debugbus_block, 370 &a6xx_state->debugbus[i]); 371 372 a6xx_state->nr_debugbus += 1; 373 } 374 375 376 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 377 for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++) 378 a6xx_get_debugbus_block(gpu, 379 a6xx_state, 380 &a650_debugbus_blocks[i], 381 &a6xx_state->debugbus[i]); 382 } 383 } 384 } 385 386 static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu, 387 struct a6xx_gpu_state *a6xx_state) 388 { 389 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 390 int debugbus_blocks_count, gbif_debugbus_blocks_count, total_debugbus_blocks; 391 const u32 *debugbus_blocks, *gbif_debugbus_blocks; 392 int i; 393 394 if (adreno_is_a730(adreno_gpu)) { 395 debugbus_blocks = gen7_0_0_debugbus_blocks; 396 debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks); 397 gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks; 398 gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks); 399 } else if (adreno_is_a740_family(adreno_gpu)) { 400 debugbus_blocks = gen7_2_0_debugbus_blocks; 401 debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks); 402 gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks; 403 gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks); 404 } else { 405 BUG_ON(!adreno_is_a750(adreno_gpu)); 406 debugbus_blocks = gen7_9_0_debugbus_blocks; 407 debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_debugbus_blocks); 408 gbif_debugbus_blocks = gen7_9_0_gbif_debugbus_blocks; 409 gbif_debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_gbif_debugbus_blocks); 410 } 411 412 total_debugbus_blocks = debugbus_blocks_count + gbif_debugbus_blocks_count; 413 414 a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks, 415 sizeof(*a6xx_state->debugbus)); 416 417 if (a6xx_state->debugbus) { 418 for (i = 0; i < debugbus_blocks_count; i++) { 419 a6xx_get_debugbus_block(gpu, 420 a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]], 421 &a6xx_state->debugbus[i]); 422 } 423 424 for (i = 0; i < gbif_debugbus_blocks_count; i++) { 425 a6xx_get_debugbus_block(gpu, 426 a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]], 427 &a6xx_state->debugbus[i + debugbus_blocks_count]); 428 } 429 } 430 431 } 432 433 static void a6xx_get_debugbus(struct msm_gpu *gpu, 434 struct a6xx_gpu_state *a6xx_state) 435 { 436 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 437 struct resource *res; 438 void __iomem *cxdbg = NULL; 439 440 /* Set up the GX debug bus */ 441 442 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, 443 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 444 445 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, 446 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 447 448 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); 449 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); 450 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); 451 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); 452 453 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); 454 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); 455 456 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); 457 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); 458 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); 459 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); 460 461 /* Set up the CX debug bus - it lives elsewhere in the system so do a 462 * temporary ioremap for the registers 463 */ 464 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, 465 "cx_dbgc"); 466 467 if (res) 468 cxdbg = ioremap(res->start, resource_size(res)); 469 470 if (cxdbg) { 471 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, 472 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 473 474 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, 475 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 476 477 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); 478 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); 479 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); 480 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); 481 482 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, 483 0x76543210); 484 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, 485 0xFEDCBA98); 486 487 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); 488 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); 489 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); 490 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); 491 } 492 493 if (adreno_is_a7xx(adreno_gpu)) { 494 a7xx_get_debugbus_blocks(gpu, a6xx_state); 495 } else { 496 a6xx_get_debugbus_blocks(gpu, a6xx_state); 497 } 498 499 /* Dump the VBIF debugbus on applicable targets */ 500 if (!a6xx_has_gbif(adreno_gpu)) { 501 a6xx_state->vbif_debugbus = 502 state_kcalloc(a6xx_state, 1, 503 sizeof(*a6xx_state->vbif_debugbus)); 504 505 if (a6xx_state->vbif_debugbus) 506 a6xx_get_vbif_debugbus_block(gpu, a6xx_state, 507 a6xx_state->vbif_debugbus); 508 } 509 510 if (cxdbg) { 511 unsigned nr_cx_debugbus_blocks; 512 const struct a6xx_debugbus_block *cx_debugbus_blocks; 513 514 if (adreno_is_a7xx(adreno_gpu)) { 515 BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu))); 516 cx_debugbus_blocks = a7xx_cx_debugbus_blocks; 517 nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks); 518 } else { 519 cx_debugbus_blocks = a6xx_cx_debugbus_blocks; 520 nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks); 521 } 522 523 a6xx_state->cx_debugbus = 524 state_kcalloc(a6xx_state, 525 nr_cx_debugbus_blocks, 526 sizeof(*a6xx_state->cx_debugbus)); 527 528 if (a6xx_state->cx_debugbus) { 529 int i; 530 531 for (i = 0; i < nr_cx_debugbus_blocks; i++) 532 a6xx_get_cx_debugbus_block(cxdbg, 533 a6xx_state, 534 &cx_debugbus_blocks[i], 535 &a6xx_state->cx_debugbus[i]); 536 537 a6xx_state->nr_cx_debugbus = 538 nr_cx_debugbus_blocks; 539 } 540 541 iounmap(cxdbg); 542 } 543 } 544 545 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) 546 547 /* Read a data cluster from behind the AHB aperture */ 548 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, 549 struct a6xx_gpu_state *a6xx_state, 550 const struct a6xx_dbgahb_cluster *dbgahb, 551 struct a6xx_gpu_state_obj *obj, 552 struct a6xx_crashdumper *dumper) 553 { 554 u64 *in = dumper->ptr; 555 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 556 size_t datasize; 557 int i, regcount = 0; 558 559 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 560 int j; 561 562 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 563 (dbgahb->statetype + i * 2) << 8); 564 565 for (j = 0; j < dbgahb->count; j += 2) { 566 int count = RANGE(dbgahb->registers, j); 567 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 568 dbgahb->registers[j] - (dbgahb->base >> 2); 569 570 in += CRASHDUMP_READ(in, offset, count, out); 571 572 out += count * sizeof(u32); 573 574 if (i == 0) 575 regcount += count; 576 } 577 } 578 579 CRASHDUMP_FINI(in); 580 581 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 582 583 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 584 return; 585 586 if (a6xx_crashdumper_run(gpu, dumper)) 587 return; 588 589 obj->handle = dbgahb; 590 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 591 datasize); 592 } 593 594 static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu, 595 struct a6xx_gpu_state *a6xx_state, 596 const struct gen7_sptp_cluster_registers *dbgahb, 597 struct a6xx_gpu_state_obj *obj, 598 struct a6xx_crashdumper *dumper) 599 { 600 u64 *in = dumper->ptr; 601 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 602 size_t datasize; 603 int i, regcount = 0; 604 605 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL, 606 A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) | 607 A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) | 608 A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype)); 609 610 for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) { 611 int count = RANGE(dbgahb->regs, i); 612 u32 offset = REG_A7XX_SP_AHB_READ_APERTURE + 613 dbgahb->regs[i] - dbgahb->regbase; 614 615 in += CRASHDUMP_READ(in, offset, count, out); 616 617 out += count * sizeof(u32); 618 regcount += count; 619 } 620 621 CRASHDUMP_FINI(in); 622 623 datasize = regcount * sizeof(u32); 624 625 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 626 return; 627 628 if (a6xx_crashdumper_run(gpu, dumper)) 629 return; 630 631 obj->handle = dbgahb; 632 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 633 datasize); 634 } 635 636 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, 637 struct a6xx_gpu_state *a6xx_state, 638 struct a6xx_crashdumper *dumper) 639 { 640 int i; 641 642 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 643 ARRAY_SIZE(a6xx_dbgahb_clusters), 644 sizeof(*a6xx_state->dbgahb_clusters)); 645 646 if (!a6xx_state->dbgahb_clusters) 647 return; 648 649 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); 650 651 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) 652 a6xx_get_dbgahb_cluster(gpu, a6xx_state, 653 &a6xx_dbgahb_clusters[i], 654 &a6xx_state->dbgahb_clusters[i], dumper); 655 } 656 657 static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu, 658 struct a6xx_gpu_state *a6xx_state, 659 struct a6xx_crashdumper *dumper) 660 { 661 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 662 int i; 663 const struct gen7_sptp_cluster_registers *dbgahb_clusters; 664 unsigned dbgahb_clusters_size; 665 666 if (adreno_is_a730(adreno_gpu)) { 667 dbgahb_clusters = gen7_0_0_sptp_clusters; 668 dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters); 669 } else { 670 BUG_ON(!adreno_is_a740_family(adreno_gpu)); 671 dbgahb_clusters = gen7_2_0_sptp_clusters; 672 dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters); 673 } 674 675 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 676 dbgahb_clusters_size, 677 sizeof(*a6xx_state->dbgahb_clusters)); 678 679 if (!a6xx_state->dbgahb_clusters) 680 return; 681 682 a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size; 683 684 for (i = 0; i < dbgahb_clusters_size; i++) 685 a7xx_get_dbgahb_cluster(gpu, a6xx_state, 686 &dbgahb_clusters[i], 687 &a6xx_state->dbgahb_clusters[i], dumper); 688 } 689 690 /* Read a data cluster from the CP aperture with the crashdumper */ 691 static void a6xx_get_cluster(struct msm_gpu *gpu, 692 struct a6xx_gpu_state *a6xx_state, 693 const struct a6xx_cluster *cluster, 694 struct a6xx_gpu_state_obj *obj, 695 struct a6xx_crashdumper *dumper) 696 { 697 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 698 u64 *in = dumper->ptr; 699 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 700 size_t datasize; 701 int i, regcount = 0; 702 u32 id = cluster->id; 703 704 /* Skip registers that are not present on older generation */ 705 if (!adreno_is_a660_family(adreno_gpu) && 706 cluster->registers == a660_fe_cluster) 707 return; 708 709 if (adreno_is_a650_family(adreno_gpu) && 710 cluster->registers == a6xx_ps_cluster) 711 id = CLUSTER_VPC_PS; 712 713 /* Some clusters need a selector register to be programmed too */ 714 if (cluster->sel_reg) 715 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); 716 717 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 718 int j; 719 720 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, 721 (id << 8) | (i << 4) | i); 722 723 for (j = 0; j < cluster->count; j += 2) { 724 int count = RANGE(cluster->registers, j); 725 726 in += CRASHDUMP_READ(in, cluster->registers[j], 727 count, out); 728 729 out += count * sizeof(u32); 730 731 if (i == 0) 732 regcount += count; 733 } 734 } 735 736 CRASHDUMP_FINI(in); 737 738 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 739 740 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 741 return; 742 743 if (a6xx_crashdumper_run(gpu, dumper)) 744 return; 745 746 obj->handle = cluster; 747 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 748 datasize); 749 } 750 751 static void a7xx_get_cluster(struct msm_gpu *gpu, 752 struct a6xx_gpu_state *a6xx_state, 753 const struct gen7_cluster_registers *cluster, 754 struct a6xx_gpu_state_obj *obj, 755 struct a6xx_crashdumper *dumper) 756 { 757 u64 *in = dumper->ptr; 758 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 759 size_t datasize; 760 int i, regcount = 0; 761 762 /* Some clusters need a selector register to be programmed too */ 763 if (cluster->sel) 764 in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val); 765 766 in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD, 767 A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) | 768 A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) | 769 A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id)); 770 771 for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) { 772 int count = RANGE(cluster->regs, i); 773 774 in += CRASHDUMP_READ(in, cluster->regs[i], 775 count, out); 776 777 out += count * sizeof(u32); 778 regcount += count; 779 } 780 781 CRASHDUMP_FINI(in); 782 783 datasize = regcount * sizeof(u32); 784 785 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 786 return; 787 788 if (a6xx_crashdumper_run(gpu, dumper)) 789 return; 790 791 obj->handle = cluster; 792 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 793 datasize); 794 } 795 796 static void a6xx_get_clusters(struct msm_gpu *gpu, 797 struct a6xx_gpu_state *a6xx_state, 798 struct a6xx_crashdumper *dumper) 799 { 800 int i; 801 802 a6xx_state->clusters = state_kcalloc(a6xx_state, 803 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters)); 804 805 if (!a6xx_state->clusters) 806 return; 807 808 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); 809 810 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) 811 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i], 812 &a6xx_state->clusters[i], dumper); 813 } 814 815 static void a7xx_get_clusters(struct msm_gpu *gpu, 816 struct a6xx_gpu_state *a6xx_state, 817 struct a6xx_crashdumper *dumper) 818 { 819 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 820 int i; 821 const struct gen7_cluster_registers *clusters; 822 unsigned clusters_size; 823 824 if (adreno_is_a730(adreno_gpu)) { 825 clusters = gen7_0_0_clusters; 826 clusters_size = ARRAY_SIZE(gen7_0_0_clusters); 827 } else if (adreno_is_a740_family(adreno_gpu)) { 828 clusters = gen7_2_0_clusters; 829 clusters_size = ARRAY_SIZE(gen7_2_0_clusters); 830 } else { 831 BUG_ON(!adreno_is_a750(adreno_gpu)); 832 clusters = gen7_9_0_clusters; 833 clusters_size = ARRAY_SIZE(gen7_9_0_clusters); 834 } 835 836 a6xx_state->clusters = state_kcalloc(a6xx_state, 837 clusters_size, sizeof(*a6xx_state->clusters)); 838 839 if (!a6xx_state->clusters) 840 return; 841 842 a6xx_state->nr_clusters = clusters_size; 843 844 for (i = 0; i < clusters_size; i++) 845 a7xx_get_cluster(gpu, a6xx_state, &clusters[i], 846 &a6xx_state->clusters[i], dumper); 847 } 848 849 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */ 850 static void a6xx_get_shader_block(struct msm_gpu *gpu, 851 struct a6xx_gpu_state *a6xx_state, 852 const struct a6xx_shader_block *block, 853 struct a6xx_gpu_state_obj *obj, 854 struct a6xx_crashdumper *dumper) 855 { 856 u64 *in = dumper->ptr; 857 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 858 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); 859 int i; 860 861 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 862 return; 863 864 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 865 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 866 (block->type << 8) | i); 867 868 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, 869 block->size, out); 870 871 out += block->size * sizeof(u32); 872 } 873 874 CRASHDUMP_FINI(in); 875 876 if (a6xx_crashdumper_run(gpu, dumper)) 877 return; 878 879 obj->handle = block; 880 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 881 datasize); 882 } 883 884 static void a7xx_get_shader_block(struct msm_gpu *gpu, 885 struct a6xx_gpu_state *a6xx_state, 886 const struct gen7_shader_block *block, 887 struct a6xx_gpu_state_obj *obj, 888 struct a6xx_crashdumper *dumper) 889 { 890 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 891 u64 *in = dumper->ptr; 892 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 893 size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32); 894 int i, j; 895 896 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 897 return; 898 899 if (adreno_is_a730(adreno_gpu)) { 900 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3); 901 } 902 903 for (i = 0; i < block->num_sps; i++) { 904 for (j = 0; j < block->num_usptps; j++) { 905 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL, 906 A7XX_SP_READ_SEL_LOCATION(block->location) | 907 A7XX_SP_READ_SEL_PIPE(block->pipeid) | 908 A7XX_SP_READ_SEL_STATETYPE(block->statetype) | 909 A7XX_SP_READ_SEL_USPTP(j) | 910 A7XX_SP_READ_SEL_SPTP(i)); 911 912 in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE, 913 block->size, out); 914 915 out += block->size * sizeof(u32); 916 } 917 } 918 919 CRASHDUMP_FINI(in); 920 921 if (a6xx_crashdumper_run(gpu, dumper)) 922 goto out; 923 924 obj->handle = block; 925 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 926 datasize); 927 928 out: 929 if (adreno_is_a730(adreno_gpu)) { 930 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0); 931 } 932 } 933 934 static void a6xx_get_shaders(struct msm_gpu *gpu, 935 struct a6xx_gpu_state *a6xx_state, 936 struct a6xx_crashdumper *dumper) 937 { 938 int i; 939 940 a6xx_state->shaders = state_kcalloc(a6xx_state, 941 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders)); 942 943 if (!a6xx_state->shaders) 944 return; 945 946 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); 947 948 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) 949 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i], 950 &a6xx_state->shaders[i], dumper); 951 } 952 953 static void a7xx_get_shaders(struct msm_gpu *gpu, 954 struct a6xx_gpu_state *a6xx_state, 955 struct a6xx_crashdumper *dumper) 956 { 957 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 958 const struct gen7_shader_block *shader_blocks; 959 unsigned num_shader_blocks; 960 int i; 961 962 if (adreno_is_a730(adreno_gpu)) { 963 shader_blocks = gen7_0_0_shader_blocks; 964 num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks); 965 } else if (adreno_is_a740_family(adreno_gpu)) { 966 shader_blocks = gen7_2_0_shader_blocks; 967 num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks); 968 } else { 969 BUG_ON(!adreno_is_a750(adreno_gpu)); 970 shader_blocks = gen7_9_0_shader_blocks; 971 num_shader_blocks = ARRAY_SIZE(gen7_9_0_shader_blocks); 972 } 973 974 a6xx_state->shaders = state_kcalloc(a6xx_state, 975 num_shader_blocks, sizeof(*a6xx_state->shaders)); 976 977 if (!a6xx_state->shaders) 978 return; 979 980 a6xx_state->nr_shaders = num_shader_blocks; 981 982 for (i = 0; i < num_shader_blocks; i++) 983 a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i], 984 &a6xx_state->shaders[i], dumper); 985 } 986 987 /* Read registers from behind the HLSQ aperture with the crashdumper */ 988 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, 989 struct a6xx_gpu_state *a6xx_state, 990 const struct a6xx_registers *regs, 991 struct a6xx_gpu_state_obj *obj, 992 struct a6xx_crashdumper *dumper) 993 994 { 995 u64 *in = dumper->ptr; 996 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 997 int i, regcount = 0; 998 999 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); 1000 1001 for (i = 0; i < regs->count; i += 2) { 1002 u32 count = RANGE(regs->registers, i); 1003 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 1004 regs->registers[i] - (regs->val0 >> 2); 1005 1006 in += CRASHDUMP_READ(in, offset, count, out); 1007 1008 out += count * sizeof(u32); 1009 regcount += count; 1010 } 1011 1012 CRASHDUMP_FINI(in); 1013 1014 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1015 return; 1016 1017 if (a6xx_crashdumper_run(gpu, dumper)) 1018 return; 1019 1020 obj->handle = regs; 1021 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1022 regcount * sizeof(u32)); 1023 } 1024 1025 /* Read a block of registers using the crashdumper */ 1026 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, 1027 struct a6xx_gpu_state *a6xx_state, 1028 const struct a6xx_registers *regs, 1029 struct a6xx_gpu_state_obj *obj, 1030 struct a6xx_crashdumper *dumper) 1031 1032 { 1033 u64 *in = dumper->ptr; 1034 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 1035 int i, regcount = 0; 1036 1037 /* Skip unsupported registers on older generations */ 1038 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 1039 (regs->registers == a660_registers)) 1040 return; 1041 1042 /* Some blocks might need to program a selector register first */ 1043 if (regs->val0) 1044 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); 1045 1046 for (i = 0; i < regs->count; i += 2) { 1047 u32 count = RANGE(regs->registers, i); 1048 1049 in += CRASHDUMP_READ(in, regs->registers[i], count, out); 1050 1051 out += count * sizeof(u32); 1052 regcount += count; 1053 } 1054 1055 CRASHDUMP_FINI(in); 1056 1057 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1058 return; 1059 1060 if (a6xx_crashdumper_run(gpu, dumper)) 1061 return; 1062 1063 obj->handle = regs; 1064 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1065 regcount * sizeof(u32)); 1066 } 1067 1068 static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu, 1069 struct a6xx_gpu_state *a6xx_state, 1070 const struct gen7_reg_list *regs, 1071 struct a6xx_gpu_state_obj *obj, 1072 struct a6xx_crashdumper *dumper) 1073 1074 { 1075 u64 *in = dumper->ptr; 1076 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 1077 int i, regcount = 0; 1078 1079 /* Some blocks might need to program a selector register first */ 1080 if (regs->sel) 1081 in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val); 1082 1083 for (i = 0; regs->regs[i] != UINT_MAX; i += 2) { 1084 u32 count = RANGE(regs->regs, i); 1085 1086 in += CRASHDUMP_READ(in, regs->regs[i], count, out); 1087 1088 out += count * sizeof(u32); 1089 regcount += count; 1090 } 1091 1092 CRASHDUMP_FINI(in); 1093 1094 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1095 return; 1096 1097 if (a6xx_crashdumper_run(gpu, dumper)) 1098 return; 1099 1100 obj->handle = regs->regs; 1101 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1102 regcount * sizeof(u32)); 1103 } 1104 1105 1106 /* Read a block of registers via AHB */ 1107 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 1108 struct a6xx_gpu_state *a6xx_state, 1109 const struct a6xx_registers *regs, 1110 struct a6xx_gpu_state_obj *obj) 1111 { 1112 int i, regcount = 0, index = 0; 1113 1114 /* Skip unsupported registers on older generations */ 1115 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 1116 (regs->registers == a660_registers)) 1117 return; 1118 1119 for (i = 0; i < regs->count; i += 2) 1120 regcount += RANGE(regs->registers, i); 1121 1122 obj->handle = (const void *) regs; 1123 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1124 if (!obj->data) 1125 return; 1126 1127 for (i = 0; i < regs->count; i += 2) { 1128 u32 count = RANGE(regs->registers, i); 1129 int j; 1130 1131 for (j = 0; j < count; j++) 1132 obj->data[index++] = gpu_read(gpu, 1133 regs->registers[i] + j); 1134 } 1135 } 1136 1137 static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 1138 struct a6xx_gpu_state *a6xx_state, 1139 const u32 *regs, 1140 struct a6xx_gpu_state_obj *obj) 1141 { 1142 int i, regcount = 0, index = 0; 1143 1144 for (i = 0; regs[i] != UINT_MAX; i += 2) 1145 regcount += RANGE(regs, i); 1146 1147 obj->handle = (const void *) regs; 1148 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1149 if (!obj->data) 1150 return; 1151 1152 for (i = 0; regs[i] != UINT_MAX; i += 2) { 1153 u32 count = RANGE(regs, i); 1154 int j; 1155 1156 for (j = 0; j < count; j++) 1157 obj->data[index++] = gpu_read(gpu, regs[i] + j); 1158 } 1159 } 1160 1161 static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu, 1162 struct a6xx_gpu_state *a6xx_state, 1163 const struct gen7_reg_list *regs, 1164 struct a6xx_gpu_state_obj *obj) 1165 { 1166 if (regs->sel) 1167 gpu_write(gpu, regs->sel->host_reg, regs->sel->val); 1168 1169 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj); 1170 } 1171 1172 /* Read a block of GMU registers */ 1173 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, 1174 struct a6xx_gpu_state *a6xx_state, 1175 const struct a6xx_registers *regs, 1176 struct a6xx_gpu_state_obj *obj, 1177 bool rscc) 1178 { 1179 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1180 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1181 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1182 int i, regcount = 0, index = 0; 1183 1184 for (i = 0; i < regs->count; i += 2) 1185 regcount += RANGE(regs->registers, i); 1186 1187 obj->handle = (const void *) regs; 1188 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1189 if (!obj->data) 1190 return; 1191 1192 for (i = 0; i < regs->count; i += 2) { 1193 u32 count = RANGE(regs->registers, i); 1194 int j; 1195 1196 for (j = 0; j < count; j++) { 1197 u32 offset = regs->registers[i] + j; 1198 u32 val; 1199 1200 if (rscc) 1201 val = gmu_read_rscc(gmu, offset); 1202 else 1203 val = gmu_read(gmu, offset); 1204 1205 obj->data[index++] = val; 1206 } 1207 } 1208 } 1209 1210 static void a6xx_get_gmu_registers(struct msm_gpu *gpu, 1211 struct a6xx_gpu_state *a6xx_state) 1212 { 1213 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1214 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1215 1216 a6xx_state->gmu_registers = state_kcalloc(a6xx_state, 1217 3, sizeof(*a6xx_state->gmu_registers)); 1218 1219 if (!a6xx_state->gmu_registers) 1220 return; 1221 1222 a6xx_state->nr_gmu_registers = 3; 1223 1224 /* Get the CX GMU registers from AHB */ 1225 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], 1226 &a6xx_state->gmu_registers[0], false); 1227 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1], 1228 &a6xx_state->gmu_registers[1], true); 1229 1230 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 1231 return; 1232 1233 /* Set the fence to ALLOW mode so we can access the registers */ 1234 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); 1235 1236 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2], 1237 &a6xx_state->gmu_registers[2], false); 1238 } 1239 1240 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo( 1241 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo) 1242 { 1243 struct msm_gpu_state_bo *snapshot; 1244 1245 if (!bo->size) 1246 return NULL; 1247 1248 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot)); 1249 if (!snapshot) 1250 return NULL; 1251 1252 snapshot->iova = bo->iova; 1253 snapshot->size = bo->size; 1254 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL); 1255 if (!snapshot->data) 1256 return NULL; 1257 1258 memcpy(snapshot->data, bo->virt, bo->size); 1259 1260 return snapshot; 1261 } 1262 1263 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu, 1264 struct a6xx_gpu_state *a6xx_state) 1265 { 1266 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1267 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1268 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1269 unsigned i, j; 1270 1271 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history)); 1272 1273 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) { 1274 struct a6xx_hfi_queue *queue = &gmu->queues[i]; 1275 for (j = 0; j < HFI_HISTORY_SZ; j++) { 1276 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ; 1277 a6xx_state->hfi_queue_history[i][j] = queue->history[idx]; 1278 } 1279 } 1280 } 1281 1282 #define A6XX_REGLIST_SIZE 1 1283 #define A6XX_GBIF_REGLIST_SIZE 1 1284 static void a6xx_get_registers(struct msm_gpu *gpu, 1285 struct a6xx_gpu_state *a6xx_state, 1286 struct a6xx_crashdumper *dumper) 1287 { 1288 int i, count = A6XX_REGLIST_SIZE + 1289 ARRAY_SIZE(a6xx_reglist) + 1290 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE; 1291 int index = 0; 1292 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1293 1294 a6xx_state->registers = state_kcalloc(a6xx_state, 1295 count, sizeof(*a6xx_state->registers)); 1296 1297 if (!a6xx_state->registers) 1298 return; 1299 1300 a6xx_state->nr_registers = count; 1301 1302 a6xx_get_ahb_gpu_registers(gpu, 1303 a6xx_state, &a6xx_ahb_reglist, 1304 &a6xx_state->registers[index++]); 1305 1306 if (a6xx_has_gbif(adreno_gpu)) 1307 a6xx_get_ahb_gpu_registers(gpu, 1308 a6xx_state, &a6xx_gbif_reglist, 1309 &a6xx_state->registers[index++]); 1310 else 1311 a6xx_get_ahb_gpu_registers(gpu, 1312 a6xx_state, &a6xx_vbif_reglist, 1313 &a6xx_state->registers[index++]); 1314 if (!dumper) { 1315 /* 1316 * We can't use the crashdumper when the SMMU is stalled, 1317 * because the GPU has no memory access until we resume 1318 * translation (but we don't want to do that until after 1319 * we have captured as much useful GPU state as possible). 1320 * So instead collect registers via the CPU: 1321 */ 1322 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 1323 a6xx_get_ahb_gpu_registers(gpu, 1324 a6xx_state, &a6xx_reglist[i], 1325 &a6xx_state->registers[index++]); 1326 return; 1327 } 1328 1329 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 1330 a6xx_get_crashdumper_registers(gpu, 1331 a6xx_state, &a6xx_reglist[i], 1332 &a6xx_state->registers[index++], 1333 dumper); 1334 1335 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) 1336 a6xx_get_crashdumper_hlsq_registers(gpu, 1337 a6xx_state, &a6xx_hlsq_reglist[i], 1338 &a6xx_state->registers[index++], 1339 dumper); 1340 } 1341 1342 #define A7XX_PRE_CRASHDUMPER_SIZE 1 1343 #define A7XX_POST_CRASHDUMPER_SIZE 1 1344 static void a7xx_get_registers(struct msm_gpu *gpu, 1345 struct a6xx_gpu_state *a6xx_state, 1346 struct a6xx_crashdumper *dumper) 1347 { 1348 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1349 int i, count; 1350 int index = 0; 1351 const u32 *pre_crashdumper_regs; 1352 const struct gen7_reg_list *reglist; 1353 1354 if (adreno_is_a730(adreno_gpu)) { 1355 reglist = gen7_0_0_reg_list; 1356 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers; 1357 } else if (adreno_is_a740_family(adreno_gpu)) { 1358 reglist = gen7_2_0_reg_list; 1359 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers; 1360 } else { 1361 BUG_ON(!adreno_is_a750(adreno_gpu)); 1362 reglist = gen7_9_0_reg_list; 1363 pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers; 1364 } 1365 1366 count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE; 1367 1368 /* The downstream reglist contains registers in other memory regions 1369 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their 1370 * offsets and map them to read them on the CPU. For now only read the 1371 * first region which is the main one. 1372 */ 1373 if (dumper) { 1374 for (i = 0; reglist[i].regs; i++) 1375 count++; 1376 } else { 1377 count++; 1378 } 1379 1380 a6xx_state->registers = state_kcalloc(a6xx_state, 1381 count, sizeof(*a6xx_state->registers)); 1382 1383 if (!a6xx_state->registers) 1384 return; 1385 1386 a6xx_state->nr_registers = count; 1387 1388 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs, 1389 &a6xx_state->registers[index++]); 1390 1391 if (!dumper) { 1392 a7xx_get_ahb_gpu_reglist(gpu, 1393 a6xx_state, ®list[0], 1394 &a6xx_state->registers[index++]); 1395 return; 1396 } 1397 1398 for (i = 0; reglist[i].regs; i++) 1399 a7xx_get_crashdumper_registers(gpu, 1400 a6xx_state, ®list[i], 1401 &a6xx_state->registers[index++], 1402 dumper); 1403 } 1404 1405 static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu, 1406 struct a6xx_gpu_state *a6xx_state) 1407 { 1408 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1409 const u32 *regs; 1410 1411 BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu) || 1412 adreno_is_a750(adreno_gpu))); 1413 regs = gen7_0_0_post_crashdumper_registers; 1414 1415 a7xx_get_ahb_gpu_registers(gpu, 1416 a6xx_state, regs, 1417 &a6xx_state->registers[a6xx_state->nr_registers - 1]); 1418 } 1419 1420 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu) 1421 { 1422 /* The value at [16:31] is in 4dword units. Convert it to dwords */ 1423 return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14; 1424 } 1425 1426 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu) 1427 { 1428 /* 1429 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units. 1430 * That register however is not directly accessible from APSS on A7xx. 1431 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value. 1432 */ 1433 gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3); 1434 1435 return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20); 1436 } 1437 1438 /* Read a block of data from an indexed register pair */ 1439 static void a6xx_get_indexed_regs(struct msm_gpu *gpu, 1440 struct a6xx_gpu_state *a6xx_state, 1441 const struct a6xx_indexed_registers *indexed, 1442 struct a6xx_gpu_state_obj *obj) 1443 { 1444 u32 count = indexed->count; 1445 int i; 1446 1447 obj->handle = (const void *) indexed; 1448 if (indexed->count_fn) 1449 count = indexed->count_fn(gpu); 1450 1451 obj->data = state_kcalloc(a6xx_state, count, sizeof(u32)); 1452 obj->count = count; 1453 if (!obj->data) 1454 return; 1455 1456 /* All the indexed banks start at address 0 */ 1457 gpu_write(gpu, indexed->addr, 0); 1458 1459 /* Read the data - each read increments the internal address by 1 */ 1460 for (i = 0; i < count; i++) 1461 obj->data[i] = gpu_read(gpu, indexed->data); 1462 } 1463 1464 static void a6xx_get_indexed_registers(struct msm_gpu *gpu, 1465 struct a6xx_gpu_state *a6xx_state) 1466 { 1467 u32 mempool_size; 1468 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; 1469 int i; 1470 1471 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count, 1472 sizeof(*a6xx_state->indexed_regs)); 1473 if (!a6xx_state->indexed_regs) 1474 return; 1475 1476 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) 1477 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i], 1478 &a6xx_state->indexed_regs[i]); 1479 1480 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 1481 u32 val; 1482 1483 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG); 1484 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4); 1485 1486 /* Get the contents of the CP mempool */ 1487 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1488 &a6xx_state->indexed_regs[i]); 1489 1490 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val); 1491 a6xx_state->nr_indexed_regs = count; 1492 return; 1493 } 1494 1495 /* Set the CP mempool size to 0 to stabilize it while dumping */ 1496 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); 1497 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); 1498 1499 /* Get the contents of the CP mempool */ 1500 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1501 &a6xx_state->indexed_regs[i]); 1502 1503 /* 1504 * Offset 0x2000 in the mempool is the size - copy the saved size over 1505 * so the data is consistent 1506 */ 1507 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; 1508 1509 /* Restore the size in the hardware */ 1510 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); 1511 } 1512 1513 static void a7xx_get_indexed_registers(struct msm_gpu *gpu, 1514 struct a6xx_gpu_state *a6xx_state) 1515 { 1516 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1517 const struct a6xx_indexed_registers *indexed_regs; 1518 int i, indexed_count, mempool_count; 1519 1520 if (adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)) { 1521 indexed_regs = a7xx_indexed_reglist; 1522 indexed_count = ARRAY_SIZE(a7xx_indexed_reglist); 1523 } else { 1524 BUG_ON(!adreno_is_a750(adreno_gpu)); 1525 indexed_regs = gen7_9_0_cp_indexed_reg_list; 1526 indexed_count = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list); 1527 } 1528 1529 mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed); 1530 1531 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, 1532 indexed_count + mempool_count, 1533 sizeof(*a6xx_state->indexed_regs)); 1534 if (!a6xx_state->indexed_regs) 1535 return; 1536 1537 a6xx_state->nr_indexed_regs = indexed_count + mempool_count; 1538 1539 /* First read the common regs */ 1540 for (i = 0; i < indexed_count; i++) 1541 a6xx_get_indexed_regs(gpu, a6xx_state, &indexed_regs[i], 1542 &a6xx_state->indexed_regs[i]); 1543 1544 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2)); 1545 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2)); 1546 1547 /* Get the contents of the CP_BV mempool */ 1548 for (i = 0; i < mempool_count; i++) 1549 a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i], 1550 &a6xx_state->indexed_regs[indexed_count + i]); 1551 1552 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0); 1553 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0); 1554 return; 1555 } 1556 1557 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) 1558 { 1559 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL; 1560 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1561 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1562 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), 1563 GFP_KERNEL); 1564 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & 1565 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); 1566 1567 if (!a6xx_state) 1568 return ERR_PTR(-ENOMEM); 1569 1570 INIT_LIST_HEAD(&a6xx_state->objs); 1571 1572 /* Get the generic state from the adreno core */ 1573 adreno_gpu_state_get(gpu, &a6xx_state->base); 1574 1575 if (!adreno_has_gmu_wrapper(adreno_gpu)) { 1576 a6xx_get_gmu_registers(gpu, a6xx_state); 1577 1578 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log); 1579 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi); 1580 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug); 1581 1582 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state); 1583 } 1584 1585 /* If GX isn't on the rest of the data isn't going to be accessible */ 1586 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 1587 return &a6xx_state->base; 1588 1589 /* Get the banks of indexed registers */ 1590 if (adreno_is_a7xx(adreno_gpu)) 1591 a7xx_get_indexed_registers(gpu, a6xx_state); 1592 else 1593 a6xx_get_indexed_registers(gpu, a6xx_state); 1594 1595 /* 1596 * Try to initialize the crashdumper, if we are not dumping state 1597 * with the SMMU stalled. The crashdumper needs memory access to 1598 * write out GPU state, so we need to skip this when the SMMU is 1599 * stalled in response to an iova fault 1600 */ 1601 if (!stalled && !gpu->needs_hw_init && 1602 !a6xx_crashdumper_init(gpu, &_dumper)) { 1603 dumper = &_dumper; 1604 } 1605 1606 if (adreno_is_a7xx(adreno_gpu)) { 1607 a7xx_get_registers(gpu, a6xx_state, dumper); 1608 1609 if (dumper) { 1610 a7xx_get_shaders(gpu, a6xx_state, dumper); 1611 a7xx_get_clusters(gpu, a6xx_state, dumper); 1612 a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); 1613 1614 msm_gem_kernel_put(dumper->bo, gpu->aspace); 1615 } 1616 1617 a7xx_get_post_crashdumper_registers(gpu, a6xx_state); 1618 } else { 1619 a6xx_get_registers(gpu, a6xx_state, dumper); 1620 1621 if (dumper) { 1622 a6xx_get_shaders(gpu, a6xx_state, dumper); 1623 a6xx_get_clusters(gpu, a6xx_state, dumper); 1624 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); 1625 1626 msm_gem_kernel_put(dumper->bo, gpu->aspace); 1627 } 1628 } 1629 1630 if (snapshot_debugbus) 1631 a6xx_get_debugbus(gpu, a6xx_state); 1632 1633 a6xx_state->gpu_initialized = !gpu->needs_hw_init; 1634 1635 return &a6xx_state->base; 1636 } 1637 1638 static void a6xx_gpu_state_destroy(struct kref *kref) 1639 { 1640 struct a6xx_state_memobj *obj, *tmp; 1641 struct msm_gpu_state *state = container_of(kref, 1642 struct msm_gpu_state, ref); 1643 struct a6xx_gpu_state *a6xx_state = container_of(state, 1644 struct a6xx_gpu_state, base); 1645 1646 if (a6xx_state->gmu_log) 1647 kvfree(a6xx_state->gmu_log->data); 1648 1649 if (a6xx_state->gmu_hfi) 1650 kvfree(a6xx_state->gmu_hfi->data); 1651 1652 if (a6xx_state->gmu_debug) 1653 kvfree(a6xx_state->gmu_debug->data); 1654 1655 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) { 1656 list_del(&obj->node); 1657 kvfree(obj); 1658 } 1659 1660 adreno_gpu_state_destroy(state); 1661 kfree(a6xx_state); 1662 } 1663 1664 int a6xx_gpu_state_put(struct msm_gpu_state *state) 1665 { 1666 if (IS_ERR_OR_NULL(state)) 1667 return 1; 1668 1669 return kref_put(&state->ref, a6xx_gpu_state_destroy); 1670 } 1671 1672 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, 1673 struct drm_printer *p) 1674 { 1675 int i, index = 0; 1676 1677 if (!data) 1678 return; 1679 1680 for (i = 0; i < count; i += 2) { 1681 u32 count = RANGE(registers, i); 1682 u32 offset = registers[i]; 1683 int j; 1684 1685 for (j = 0; j < count; index++, offset++, j++) { 1686 if (data[index] == 0xdeafbead) 1687 continue; 1688 1689 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1690 offset << 2, data[index]); 1691 } 1692 } 1693 } 1694 1695 static void a7xx_show_registers_indented(const u32 *registers, u32 *data, 1696 struct drm_printer *p, unsigned indent) 1697 { 1698 int i, index = 0; 1699 1700 for (i = 0; registers[i] != UINT_MAX; i += 2) { 1701 u32 count = RANGE(registers, i); 1702 u32 offset = registers[i]; 1703 int j; 1704 1705 for (j = 0; j < count; index++, offset++, j++) { 1706 int k; 1707 1708 if (data[index] == 0xdeafbead) 1709 continue; 1710 1711 for (k = 0; k < indent; k++) 1712 drm_printf(p, " "); 1713 drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n", 1714 offset << 2, data[index]); 1715 } 1716 } 1717 } 1718 1719 static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p) 1720 { 1721 a7xx_show_registers_indented(registers, data, p, 1); 1722 } 1723 1724 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) 1725 { 1726 char out[ASCII85_BUFSZ]; 1727 long i, l, datalen = 0; 1728 1729 for (i = 0; i < len >> 2; i++) { 1730 if (data[i]) 1731 datalen = (i + 1) << 2; 1732 } 1733 1734 if (datalen == 0) 1735 return; 1736 1737 drm_puts(p, " data: !!ascii85 |\n"); 1738 drm_puts(p, " "); 1739 1740 1741 l = ascii85_encode_len(datalen); 1742 1743 for (i = 0; i < l; i++) 1744 drm_puts(p, ascii85_encode(data[i], out)); 1745 1746 drm_puts(p, "\n"); 1747 } 1748 1749 static void print_name(struct drm_printer *p, const char *fmt, const char *name) 1750 { 1751 drm_puts(p, fmt); 1752 drm_puts(p, name); 1753 drm_puts(p, "\n"); 1754 } 1755 1756 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, 1757 struct drm_printer *p) 1758 { 1759 const struct a6xx_shader_block *block = obj->handle; 1760 int i; 1761 1762 if (!obj->handle) 1763 return; 1764 1765 print_name(p, " - type: ", block->name); 1766 1767 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 1768 drm_printf(p, " - bank: %d\n", i); 1769 drm_printf(p, " size: %d\n", block->size); 1770 1771 if (!obj->data) 1772 continue; 1773 1774 print_ascii85(p, block->size << 2, 1775 obj->data + (block->size * i)); 1776 } 1777 } 1778 1779 static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj, 1780 struct drm_printer *p) 1781 { 1782 const struct gen7_shader_block *block = obj->handle; 1783 int i, j; 1784 u32 *data = obj->data; 1785 1786 if (!obj->handle) 1787 return; 1788 1789 print_name(p, " - type: ", a7xx_statetype_names[block->statetype]); 1790 print_name(p, " - pipe: ", a7xx_pipe_names[block->pipeid]); 1791 1792 for (i = 0; i < block->num_sps; i++) { 1793 drm_printf(p, " - sp: %d\n", i); 1794 1795 for (j = 0; j < block->num_usptps; j++) { 1796 drm_printf(p, " - usptp: %d\n", j); 1797 drm_printf(p, " size: %d\n", block->size); 1798 1799 if (!obj->data) 1800 continue; 1801 1802 print_ascii85(p, block->size << 2, data); 1803 1804 data += block->size; 1805 } 1806 } 1807 } 1808 1809 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, 1810 struct drm_printer *p) 1811 { 1812 int ctx, index = 0; 1813 1814 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { 1815 int j; 1816 1817 drm_printf(p, " - context: %d\n", ctx); 1818 1819 for (j = 0; j < size; j += 2) { 1820 u32 count = RANGE(registers, j); 1821 u32 offset = registers[j]; 1822 int k; 1823 1824 for (k = 0; k < count; index++, offset++, k++) { 1825 if (data[index] == 0xdeafbead) 1826 continue; 1827 1828 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1829 offset << 2, data[index]); 1830 } 1831 } 1832 } 1833 } 1834 1835 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1836 struct drm_printer *p) 1837 { 1838 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; 1839 1840 if (dbgahb) { 1841 print_name(p, " - cluster-name: ", dbgahb->name); 1842 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, 1843 obj->data, p); 1844 } 1845 } 1846 1847 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1848 struct drm_printer *p) 1849 { 1850 const struct a6xx_cluster *cluster = obj->handle; 1851 1852 if (cluster) { 1853 print_name(p, " - cluster-name: ", cluster->name); 1854 a6xx_show_cluster_data(cluster->registers, cluster->count, 1855 obj->data, p); 1856 } 1857 } 1858 1859 static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1860 struct drm_printer *p) 1861 { 1862 const struct gen7_sptp_cluster_registers *dbgahb = obj->handle; 1863 1864 if (dbgahb) { 1865 print_name(p, " - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]); 1866 print_name(p, " - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]); 1867 drm_printf(p, " - context: %d\n", dbgahb->context_id); 1868 a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4); 1869 } 1870 } 1871 1872 static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1873 struct drm_printer *p) 1874 { 1875 const struct gen7_cluster_registers *cluster = obj->handle; 1876 1877 if (cluster) { 1878 int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0; 1879 1880 print_name(p, " - pipe: ", a7xx_pipe_names[cluster->pipe_id]); 1881 print_name(p, " - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]); 1882 drm_printf(p, " - context: %d\n", context); 1883 a7xx_show_registers_indented(cluster->regs, obj->data, p, 4); 1884 } 1885 } 1886 1887 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, 1888 struct drm_printer *p) 1889 { 1890 const struct a6xx_indexed_registers *indexed = obj->handle; 1891 1892 if (!indexed) 1893 return; 1894 1895 print_name(p, " - regs-name: ", indexed->name); 1896 drm_printf(p, " dwords: %d\n", obj->count); 1897 1898 print_ascii85(p, obj->count << 2, obj->data); 1899 } 1900 1901 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, 1902 u32 *data, struct drm_printer *p) 1903 { 1904 if (block) { 1905 print_name(p, " - debugbus-block: ", block->name); 1906 1907 /* 1908 * count for regular debugbus data is in quadwords, 1909 * but print the size in dwords for consistency 1910 */ 1911 drm_printf(p, " count: %d\n", block->count << 1); 1912 1913 print_ascii85(p, block->count << 3, data); 1914 } 1915 } 1916 1917 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, 1918 struct drm_printer *p) 1919 { 1920 int i; 1921 1922 for (i = 0; i < a6xx_state->nr_debugbus; i++) { 1923 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; 1924 1925 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1926 } 1927 1928 if (a6xx_state->vbif_debugbus) { 1929 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; 1930 1931 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); 1932 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); 1933 1934 /* vbif debugbus data is in dwords. Confusing, huh? */ 1935 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); 1936 } 1937 1938 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { 1939 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; 1940 1941 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1942 } 1943 } 1944 1945 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1946 struct drm_printer *p) 1947 { 1948 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1949 struct a6xx_gpu_state *a6xx_state = container_of(state, 1950 struct a6xx_gpu_state, base); 1951 int i; 1952 1953 if (IS_ERR_OR_NULL(state)) 1954 return; 1955 1956 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized); 1957 1958 adreno_show(gpu, state, p); 1959 1960 drm_puts(p, "gmu-log:\n"); 1961 if (a6xx_state->gmu_log) { 1962 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log; 1963 1964 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova); 1965 drm_printf(p, " size: %zu\n", gmu_log->size); 1966 adreno_show_object(p, &gmu_log->data, gmu_log->size, 1967 &gmu_log->encoded); 1968 } 1969 1970 drm_puts(p, "gmu-hfi:\n"); 1971 if (a6xx_state->gmu_hfi) { 1972 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi; 1973 unsigned i, j; 1974 1975 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova); 1976 drm_printf(p, " size: %zu\n", gmu_hfi->size); 1977 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) { 1978 drm_printf(p, " queue-history[%u]:", i); 1979 for (j = 0; j < HFI_HISTORY_SZ; j++) { 1980 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]); 1981 } 1982 drm_printf(p, "\n"); 1983 } 1984 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size, 1985 &gmu_hfi->encoded); 1986 } 1987 1988 drm_puts(p, "gmu-debug:\n"); 1989 if (a6xx_state->gmu_debug) { 1990 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug; 1991 1992 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova); 1993 drm_printf(p, " size: %zu\n", gmu_debug->size); 1994 adreno_show_object(p, &gmu_debug->data, gmu_debug->size, 1995 &gmu_debug->encoded); 1996 } 1997 1998 drm_puts(p, "registers:\n"); 1999 for (i = 0; i < a6xx_state->nr_registers; i++) { 2000 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; 2001 2002 if (!obj->handle) 2003 continue; 2004 2005 if (adreno_is_a7xx(adreno_gpu)) { 2006 a7xx_show_registers(obj->handle, obj->data, p); 2007 } else { 2008 const struct a6xx_registers *regs = obj->handle; 2009 2010 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 2011 } 2012 } 2013 2014 drm_puts(p, "registers-gmu:\n"); 2015 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { 2016 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; 2017 const struct a6xx_registers *regs = obj->handle; 2018 2019 if (!obj->handle) 2020 continue; 2021 2022 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 2023 } 2024 2025 drm_puts(p, "indexed-registers:\n"); 2026 for (i = 0; i < a6xx_state->nr_indexed_regs; i++) 2027 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); 2028 2029 drm_puts(p, "shader-blocks:\n"); 2030 for (i = 0; i < a6xx_state->nr_shaders; i++) { 2031 if (adreno_is_a7xx(adreno_gpu)) 2032 a7xx_show_shader(&a6xx_state->shaders[i], p); 2033 else 2034 a6xx_show_shader(&a6xx_state->shaders[i], p); 2035 } 2036 2037 drm_puts(p, "clusters:\n"); 2038 for (i = 0; i < a6xx_state->nr_clusters; i++) { 2039 if (adreno_is_a7xx(adreno_gpu)) 2040 a7xx_show_cluster(&a6xx_state->clusters[i], p); 2041 else 2042 a6xx_show_cluster(&a6xx_state->clusters[i], p); 2043 } 2044 2045 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) { 2046 if (adreno_is_a7xx(adreno_gpu)) 2047 a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 2048 else 2049 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 2050 } 2051 2052 drm_puts(p, "debugbus:\n"); 2053 a6xx_show_debugbus(a6xx_state, p); 2054 } 2055