1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ 3 4 #include <linux/ascii85.h> 5 #include "msm_gem.h" 6 #include "a6xx_gpu.h" 7 #include "a6xx_gmu.h" 8 #include "a6xx_gpu_state.h" 9 #include "a6xx_gmu.xml.h" 10 11 struct a6xx_gpu_state_obj { 12 const void *handle; 13 u32 *data; 14 }; 15 16 struct a6xx_gpu_state { 17 struct msm_gpu_state base; 18 19 struct a6xx_gpu_state_obj *gmu_registers; 20 int nr_gmu_registers; 21 22 struct a6xx_gpu_state_obj *registers; 23 int nr_registers; 24 25 struct a6xx_gpu_state_obj *shaders; 26 int nr_shaders; 27 28 struct a6xx_gpu_state_obj *clusters; 29 int nr_clusters; 30 31 struct a6xx_gpu_state_obj *dbgahb_clusters; 32 int nr_dbgahb_clusters; 33 34 struct a6xx_gpu_state_obj *indexed_regs; 35 int nr_indexed_regs; 36 37 struct a6xx_gpu_state_obj *debugbus; 38 int nr_debugbus; 39 40 struct a6xx_gpu_state_obj *vbif_debugbus; 41 42 struct a6xx_gpu_state_obj *cx_debugbus; 43 int nr_cx_debugbus; 44 45 struct msm_gpu_state_bo *gmu_log; 46 struct msm_gpu_state_bo *gmu_hfi; 47 struct msm_gpu_state_bo *gmu_debug; 48 49 s32 hfi_queue_history[2][HFI_HISTORY_SZ]; 50 51 struct list_head objs; 52 53 bool gpu_initialized; 54 }; 55 56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) 57 { 58 in[0] = val; 59 in[1] = (((u64) reg) << 44 | (1 << 21) | 1); 60 61 return 2; 62 } 63 64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) 65 { 66 in[0] = target; 67 in[1] = (((u64) reg) << 44 | dwords); 68 69 return 2; 70 } 71 72 static inline int CRASHDUMP_FINI(u64 *in) 73 { 74 in[0] = 0; 75 in[1] = 0; 76 77 return 2; 78 } 79 80 struct a6xx_crashdumper { 81 void *ptr; 82 struct drm_gem_object *bo; 83 u64 iova; 84 }; 85 86 struct a6xx_state_memobj { 87 struct list_head node; 88 unsigned long long data[]; 89 }; 90 91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize) 92 { 93 struct a6xx_state_memobj *obj = 94 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); 95 96 if (!obj) 97 return NULL; 98 99 list_add_tail(&obj->node, &a6xx_state->objs); 100 return &obj->data; 101 } 102 103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, 104 size_t size) 105 { 106 void *dst = state_kcalloc(a6xx_state, 1, size); 107 108 if (dst) 109 memcpy(dst, src, size); 110 return dst; 111 } 112 113 /* 114 * Allocate 1MB for the crashdumper scratch region - 8k for the script and 115 * the rest for the data 116 */ 117 #define A6XX_CD_DATA_OFFSET 8192 118 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192) 119 120 static int a6xx_crashdumper_init(struct msm_gpu *gpu, 121 struct a6xx_crashdumper *dumper) 122 { 123 dumper->ptr = msm_gem_kernel_new(gpu->dev, 124 SZ_1M, MSM_BO_WC, gpu->aspace, 125 &dumper->bo, &dumper->iova); 126 127 if (!IS_ERR(dumper->ptr)) 128 msm_gem_object_set_name(dumper->bo, "crashdump"); 129 130 return PTR_ERR_OR_ZERO(dumper->ptr); 131 } 132 133 static int a6xx_crashdumper_run(struct msm_gpu *gpu, 134 struct a6xx_crashdumper *dumper) 135 { 136 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 137 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 138 u32 val; 139 int ret; 140 141 if (IS_ERR_OR_NULL(dumper->ptr)) 142 return -EINVAL; 143 144 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) 145 return -EINVAL; 146 147 /* Make sure all pending memory writes are posted */ 148 wmb(); 149 150 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova); 151 152 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); 153 154 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, 155 val & 0x02, 100, 10000); 156 157 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); 158 159 return ret; 160 } 161 162 /* read a value from the GX debug bus */ 163 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, 164 u32 *data) 165 { 166 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | 167 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); 168 169 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); 170 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); 171 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); 172 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); 173 174 /* Wait 1 us to make sure the data is flowing */ 175 udelay(1); 176 177 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); 178 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); 179 180 return 2; 181 } 182 183 #define cxdbg_write(ptr, offset, val) \ 184 msm_writel((val), (ptr) + ((offset) << 2)) 185 186 #define cxdbg_read(ptr, offset) \ 187 msm_readl((ptr) + ((offset) << 2)) 188 189 /* read a value from the CX debug bus */ 190 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset, 191 u32 *data) 192 { 193 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | 194 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); 195 196 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); 197 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); 198 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); 199 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); 200 201 /* Wait 1 us to make sure the data is flowing */ 202 udelay(1); 203 204 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); 205 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); 206 207 return 2; 208 } 209 210 /* Read a chunk of data from the VBIF debug bus */ 211 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, 212 u32 reg, int count, u32 *data) 213 { 214 int i; 215 216 gpu_write(gpu, ctrl0, reg); 217 218 for (i = 0; i < count; i++) { 219 gpu_write(gpu, ctrl1, i); 220 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); 221 } 222 223 return count; 224 } 225 226 #define AXI_ARB_BLOCKS 2 227 #define XIN_AXI_BLOCKS 5 228 #define XIN_CORE_BLOCKS 4 229 230 #define VBIF_DEBUGBUS_BLOCK_SIZE \ 231 ((16 * AXI_ARB_BLOCKS) + \ 232 (18 * XIN_AXI_BLOCKS) + \ 233 (12 * XIN_CORE_BLOCKS)) 234 235 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, 236 struct a6xx_gpu_state *a6xx_state, 237 struct a6xx_gpu_state_obj *obj) 238 { 239 u32 clk, *ptr; 240 int i; 241 242 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE, 243 sizeof(u32)); 244 if (!obj->data) 245 return; 246 247 obj->handle = NULL; 248 249 /* Get the current clock setting */ 250 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); 251 252 /* Force on the bus so we can read it */ 253 gpu_write(gpu, REG_A6XX_VBIF_CLKON, 254 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); 255 256 /* We will read from BUS2 first, so disable BUS1 */ 257 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); 258 259 /* Enable the VBIF bus for reading */ 260 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); 261 262 ptr = obj->data; 263 264 for (i = 0; i < AXI_ARB_BLOCKS; i++) 265 ptr += vbif_debugbus_read(gpu, 266 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 267 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 268 1 << (i + 16), 16, ptr); 269 270 for (i = 0; i < XIN_AXI_BLOCKS; i++) 271 ptr += vbif_debugbus_read(gpu, 272 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 273 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 274 1 << i, 18, ptr); 275 276 /* Stop BUS2 so we can turn on BUS1 */ 277 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); 278 279 for (i = 0; i < XIN_CORE_BLOCKS; i++) 280 ptr += vbif_debugbus_read(gpu, 281 REG_A6XX_VBIF_TEST_BUS1_CTRL0, 282 REG_A6XX_VBIF_TEST_BUS1_CTRL1, 283 1 << i, 12, ptr); 284 285 /* Restore the VBIF clock setting */ 286 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); 287 } 288 289 static void a6xx_get_debugbus_block(struct msm_gpu *gpu, 290 struct a6xx_gpu_state *a6xx_state, 291 const struct a6xx_debugbus_block *block, 292 struct a6xx_gpu_state_obj *obj) 293 { 294 int i; 295 u32 *ptr; 296 297 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 298 if (!obj->data) 299 return; 300 301 obj->handle = block; 302 303 for (ptr = obj->data, i = 0; i < block->count; i++) 304 ptr += debugbus_read(gpu, block->id, i, ptr); 305 } 306 307 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, 308 struct a6xx_gpu_state *a6xx_state, 309 const struct a6xx_debugbus_block *block, 310 struct a6xx_gpu_state_obj *obj) 311 { 312 int i; 313 u32 *ptr; 314 315 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 316 if (!obj->data) 317 return; 318 319 obj->handle = block; 320 321 for (ptr = obj->data, i = 0; i < block->count; i++) 322 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); 323 } 324 325 static void a6xx_get_debugbus(struct msm_gpu *gpu, 326 struct a6xx_gpu_state *a6xx_state) 327 { 328 struct resource *res; 329 void __iomem *cxdbg = NULL; 330 int nr_debugbus_blocks; 331 332 /* Set up the GX debug bus */ 333 334 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, 335 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 336 337 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, 338 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 339 340 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); 341 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); 342 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); 343 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); 344 345 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); 346 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); 347 348 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); 349 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); 350 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); 351 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); 352 353 /* Set up the CX debug bus - it lives elsewhere in the system so do a 354 * temporary ioremap for the registers 355 */ 356 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, 357 "cx_dbgc"); 358 359 if (res) 360 cxdbg = ioremap(res->start, resource_size(res)); 361 362 if (cxdbg) { 363 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, 364 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 365 366 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, 367 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 368 369 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); 370 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); 371 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); 372 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); 373 374 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, 375 0x76543210); 376 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, 377 0xFEDCBA98); 378 379 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); 380 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); 381 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); 382 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); 383 } 384 385 nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) + 386 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0); 387 388 if (adreno_is_a650_family(to_adreno_gpu(gpu))) 389 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks); 390 391 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks, 392 sizeof(*a6xx_state->debugbus)); 393 394 if (a6xx_state->debugbus) { 395 int i; 396 397 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) 398 a6xx_get_debugbus_block(gpu, 399 a6xx_state, 400 &a6xx_debugbus_blocks[i], 401 &a6xx_state->debugbus[i]); 402 403 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); 404 405 /* 406 * GBIF has same debugbus as of other GPU blocks, fall back to 407 * default path if GPU uses GBIF, also GBIF uses exactly same 408 * ID as of VBIF. 409 */ 410 if (a6xx_has_gbif(to_adreno_gpu(gpu))) { 411 a6xx_get_debugbus_block(gpu, a6xx_state, 412 &a6xx_gbif_debugbus_block, 413 &a6xx_state->debugbus[i]); 414 415 a6xx_state->nr_debugbus += 1; 416 } 417 418 419 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 420 for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++) 421 a6xx_get_debugbus_block(gpu, 422 a6xx_state, 423 &a650_debugbus_blocks[i], 424 &a6xx_state->debugbus[i]); 425 } 426 } 427 428 /* Dump the VBIF debugbus on applicable targets */ 429 if (!a6xx_has_gbif(to_adreno_gpu(gpu))) { 430 a6xx_state->vbif_debugbus = 431 state_kcalloc(a6xx_state, 1, 432 sizeof(*a6xx_state->vbif_debugbus)); 433 434 if (a6xx_state->vbif_debugbus) 435 a6xx_get_vbif_debugbus_block(gpu, a6xx_state, 436 a6xx_state->vbif_debugbus); 437 } 438 439 if (cxdbg) { 440 a6xx_state->cx_debugbus = 441 state_kcalloc(a6xx_state, 442 ARRAY_SIZE(a6xx_cx_debugbus_blocks), 443 sizeof(*a6xx_state->cx_debugbus)); 444 445 if (a6xx_state->cx_debugbus) { 446 int i; 447 448 for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++) 449 a6xx_get_cx_debugbus_block(cxdbg, 450 a6xx_state, 451 &a6xx_cx_debugbus_blocks[i], 452 &a6xx_state->cx_debugbus[i]); 453 454 a6xx_state->nr_cx_debugbus = 455 ARRAY_SIZE(a6xx_cx_debugbus_blocks); 456 } 457 458 iounmap(cxdbg); 459 } 460 } 461 462 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) 463 464 /* Read a data cluster from behind the AHB aperture */ 465 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, 466 struct a6xx_gpu_state *a6xx_state, 467 const struct a6xx_dbgahb_cluster *dbgahb, 468 struct a6xx_gpu_state_obj *obj, 469 struct a6xx_crashdumper *dumper) 470 { 471 u64 *in = dumper->ptr; 472 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 473 size_t datasize; 474 int i, regcount = 0; 475 476 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 477 int j; 478 479 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 480 (dbgahb->statetype + i * 2) << 8); 481 482 for (j = 0; j < dbgahb->count; j += 2) { 483 int count = RANGE(dbgahb->registers, j); 484 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 485 dbgahb->registers[j] - (dbgahb->base >> 2); 486 487 in += CRASHDUMP_READ(in, offset, count, out); 488 489 out += count * sizeof(u32); 490 491 if (i == 0) 492 regcount += count; 493 } 494 } 495 496 CRASHDUMP_FINI(in); 497 498 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 499 500 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 501 return; 502 503 if (a6xx_crashdumper_run(gpu, dumper)) 504 return; 505 506 obj->handle = dbgahb; 507 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 508 datasize); 509 } 510 511 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, 512 struct a6xx_gpu_state *a6xx_state, 513 struct a6xx_crashdumper *dumper) 514 { 515 int i; 516 517 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 518 ARRAY_SIZE(a6xx_dbgahb_clusters), 519 sizeof(*a6xx_state->dbgahb_clusters)); 520 521 if (!a6xx_state->dbgahb_clusters) 522 return; 523 524 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); 525 526 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) 527 a6xx_get_dbgahb_cluster(gpu, a6xx_state, 528 &a6xx_dbgahb_clusters[i], 529 &a6xx_state->dbgahb_clusters[i], dumper); 530 } 531 532 /* Read a data cluster from the CP aperture with the crashdumper */ 533 static void a6xx_get_cluster(struct msm_gpu *gpu, 534 struct a6xx_gpu_state *a6xx_state, 535 const struct a6xx_cluster *cluster, 536 struct a6xx_gpu_state_obj *obj, 537 struct a6xx_crashdumper *dumper) 538 { 539 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 540 u64 *in = dumper->ptr; 541 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 542 size_t datasize; 543 int i, regcount = 0; 544 u32 id = cluster->id; 545 546 /* Skip registers that are not present on older generation */ 547 if (!adreno_is_a660_family(adreno_gpu) && 548 cluster->registers == a660_fe_cluster) 549 return; 550 551 if (adreno_is_a650_family(adreno_gpu) && 552 cluster->registers == a6xx_ps_cluster) 553 id = CLUSTER_VPC_PS; 554 555 /* Some clusters need a selector register to be programmed too */ 556 if (cluster->sel_reg) 557 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); 558 559 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 560 int j; 561 562 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, 563 (id << 8) | (i << 4) | i); 564 565 for (j = 0; j < cluster->count; j += 2) { 566 int count = RANGE(cluster->registers, j); 567 568 in += CRASHDUMP_READ(in, cluster->registers[j], 569 count, out); 570 571 out += count * sizeof(u32); 572 573 if (i == 0) 574 regcount += count; 575 } 576 } 577 578 CRASHDUMP_FINI(in); 579 580 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 581 582 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 583 return; 584 585 if (a6xx_crashdumper_run(gpu, dumper)) 586 return; 587 588 obj->handle = cluster; 589 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 590 datasize); 591 } 592 593 static void a6xx_get_clusters(struct msm_gpu *gpu, 594 struct a6xx_gpu_state *a6xx_state, 595 struct a6xx_crashdumper *dumper) 596 { 597 int i; 598 599 a6xx_state->clusters = state_kcalloc(a6xx_state, 600 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters)); 601 602 if (!a6xx_state->clusters) 603 return; 604 605 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); 606 607 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) 608 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i], 609 &a6xx_state->clusters[i], dumper); 610 } 611 612 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */ 613 static void a6xx_get_shader_block(struct msm_gpu *gpu, 614 struct a6xx_gpu_state *a6xx_state, 615 const struct a6xx_shader_block *block, 616 struct a6xx_gpu_state_obj *obj, 617 struct a6xx_crashdumper *dumper) 618 { 619 u64 *in = dumper->ptr; 620 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); 621 int i; 622 623 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 624 return; 625 626 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 627 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 628 (block->type << 8) | i); 629 630 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, 631 block->size, dumper->iova + A6XX_CD_DATA_OFFSET); 632 } 633 634 CRASHDUMP_FINI(in); 635 636 if (a6xx_crashdumper_run(gpu, dumper)) 637 return; 638 639 obj->handle = block; 640 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 641 datasize); 642 } 643 644 static void a6xx_get_shaders(struct msm_gpu *gpu, 645 struct a6xx_gpu_state *a6xx_state, 646 struct a6xx_crashdumper *dumper) 647 { 648 int i; 649 650 a6xx_state->shaders = state_kcalloc(a6xx_state, 651 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders)); 652 653 if (!a6xx_state->shaders) 654 return; 655 656 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); 657 658 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) 659 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i], 660 &a6xx_state->shaders[i], dumper); 661 } 662 663 /* Read registers from behind the HLSQ aperture with the crashdumper */ 664 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, 665 struct a6xx_gpu_state *a6xx_state, 666 const struct a6xx_registers *regs, 667 struct a6xx_gpu_state_obj *obj, 668 struct a6xx_crashdumper *dumper) 669 670 { 671 u64 *in = dumper->ptr; 672 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 673 int i, regcount = 0; 674 675 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); 676 677 for (i = 0; i < regs->count; i += 2) { 678 u32 count = RANGE(regs->registers, i); 679 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 680 regs->registers[i] - (regs->val0 >> 2); 681 682 in += CRASHDUMP_READ(in, offset, count, out); 683 684 out += count * sizeof(u32); 685 regcount += count; 686 } 687 688 CRASHDUMP_FINI(in); 689 690 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 691 return; 692 693 if (a6xx_crashdumper_run(gpu, dumper)) 694 return; 695 696 obj->handle = regs; 697 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 698 regcount * sizeof(u32)); 699 } 700 701 /* Read a block of registers using the crashdumper */ 702 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, 703 struct a6xx_gpu_state *a6xx_state, 704 const struct a6xx_registers *regs, 705 struct a6xx_gpu_state_obj *obj, 706 struct a6xx_crashdumper *dumper) 707 708 { 709 u64 *in = dumper->ptr; 710 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 711 int i, regcount = 0; 712 713 /* Skip unsupported registers on older generations */ 714 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 715 (regs->registers == a660_registers)) 716 return; 717 718 /* Some blocks might need to program a selector register first */ 719 if (regs->val0) 720 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); 721 722 for (i = 0; i < regs->count; i += 2) { 723 u32 count = RANGE(regs->registers, i); 724 725 in += CRASHDUMP_READ(in, regs->registers[i], count, out); 726 727 out += count * sizeof(u32); 728 regcount += count; 729 } 730 731 CRASHDUMP_FINI(in); 732 733 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 734 return; 735 736 if (a6xx_crashdumper_run(gpu, dumper)) 737 return; 738 739 obj->handle = regs; 740 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 741 regcount * sizeof(u32)); 742 } 743 744 /* Read a block of registers via AHB */ 745 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 746 struct a6xx_gpu_state *a6xx_state, 747 const struct a6xx_registers *regs, 748 struct a6xx_gpu_state_obj *obj) 749 { 750 int i, regcount = 0, index = 0; 751 752 /* Skip unsupported registers on older generations */ 753 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 754 (regs->registers == a660_registers)) 755 return; 756 757 for (i = 0; i < regs->count; i += 2) 758 regcount += RANGE(regs->registers, i); 759 760 obj->handle = (const void *) regs; 761 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 762 if (!obj->data) 763 return; 764 765 for (i = 0; i < regs->count; i += 2) { 766 u32 count = RANGE(regs->registers, i); 767 int j; 768 769 for (j = 0; j < count; j++) 770 obj->data[index++] = gpu_read(gpu, 771 regs->registers[i] + j); 772 } 773 } 774 775 /* Read a block of GMU registers */ 776 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, 777 struct a6xx_gpu_state *a6xx_state, 778 const struct a6xx_registers *regs, 779 struct a6xx_gpu_state_obj *obj, 780 bool rscc) 781 { 782 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 783 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 784 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 785 int i, regcount = 0, index = 0; 786 787 for (i = 0; i < regs->count; i += 2) 788 regcount += RANGE(regs->registers, i); 789 790 obj->handle = (const void *) regs; 791 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 792 if (!obj->data) 793 return; 794 795 for (i = 0; i < regs->count; i += 2) { 796 u32 count = RANGE(regs->registers, i); 797 int j; 798 799 for (j = 0; j < count; j++) { 800 u32 offset = regs->registers[i] + j; 801 u32 val; 802 803 if (rscc) 804 val = gmu_read_rscc(gmu, offset); 805 else 806 val = gmu_read(gmu, offset); 807 808 obj->data[index++] = val; 809 } 810 } 811 } 812 813 static void a6xx_get_gmu_registers(struct msm_gpu *gpu, 814 struct a6xx_gpu_state *a6xx_state) 815 { 816 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 817 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 818 819 a6xx_state->gmu_registers = state_kcalloc(a6xx_state, 820 3, sizeof(*a6xx_state->gmu_registers)); 821 822 if (!a6xx_state->gmu_registers) 823 return; 824 825 a6xx_state->nr_gmu_registers = 3; 826 827 /* Get the CX GMU registers from AHB */ 828 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], 829 &a6xx_state->gmu_registers[0], false); 830 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1], 831 &a6xx_state->gmu_registers[1], true); 832 833 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 834 return; 835 836 /* Set the fence to ALLOW mode so we can access the registers */ 837 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); 838 839 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2], 840 &a6xx_state->gmu_registers[2], false); 841 } 842 843 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo( 844 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo) 845 { 846 struct msm_gpu_state_bo *snapshot; 847 848 if (!bo->size) 849 return NULL; 850 851 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot)); 852 if (!snapshot) 853 return NULL; 854 855 snapshot->iova = bo->iova; 856 snapshot->size = bo->size; 857 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL); 858 if (!snapshot->data) 859 return NULL; 860 861 memcpy(snapshot->data, bo->virt, bo->size); 862 863 return snapshot; 864 } 865 866 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu, 867 struct a6xx_gpu_state *a6xx_state) 868 { 869 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 870 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 871 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 872 unsigned i, j; 873 874 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history)); 875 876 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) { 877 struct a6xx_hfi_queue *queue = &gmu->queues[i]; 878 for (j = 0; j < HFI_HISTORY_SZ; j++) { 879 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ; 880 a6xx_state->hfi_queue_history[i][j] = queue->history[idx]; 881 } 882 } 883 } 884 885 #define A6XX_REGLIST_SIZE 1 886 #define A6XX_GBIF_REGLIST_SIZE 1 887 static void a6xx_get_registers(struct msm_gpu *gpu, 888 struct a6xx_gpu_state *a6xx_state, 889 struct a6xx_crashdumper *dumper) 890 { 891 int i, count = A6XX_REGLIST_SIZE + 892 ARRAY_SIZE(a6xx_reglist) + 893 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE; 894 int index = 0; 895 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 896 897 a6xx_state->registers = state_kcalloc(a6xx_state, 898 count, sizeof(*a6xx_state->registers)); 899 900 if (!a6xx_state->registers) 901 return; 902 903 a6xx_state->nr_registers = count; 904 905 if (adreno_is_a7xx(adreno_gpu)) 906 a6xx_get_ahb_gpu_registers(gpu, 907 a6xx_state, &a7xx_ahb_reglist, 908 &a6xx_state->registers[index++]); 909 else 910 a6xx_get_ahb_gpu_registers(gpu, 911 a6xx_state, &a6xx_ahb_reglist, 912 &a6xx_state->registers[index++]); 913 914 if (adreno_is_a7xx(adreno_gpu)) 915 a6xx_get_ahb_gpu_registers(gpu, 916 a6xx_state, &a7xx_gbif_reglist, 917 &a6xx_state->registers[index++]); 918 else if (a6xx_has_gbif(adreno_gpu)) 919 a6xx_get_ahb_gpu_registers(gpu, 920 a6xx_state, &a6xx_gbif_reglist, 921 &a6xx_state->registers[index++]); 922 else 923 a6xx_get_ahb_gpu_registers(gpu, 924 a6xx_state, &a6xx_vbif_reglist, 925 &a6xx_state->registers[index++]); 926 if (!dumper) { 927 /* 928 * We can't use the crashdumper when the SMMU is stalled, 929 * because the GPU has no memory access until we resume 930 * translation (but we don't want to do that until after 931 * we have captured as much useful GPU state as possible). 932 * So instead collect registers via the CPU: 933 */ 934 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 935 a6xx_get_ahb_gpu_registers(gpu, 936 a6xx_state, &a6xx_reglist[i], 937 &a6xx_state->registers[index++]); 938 return; 939 } 940 941 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 942 a6xx_get_crashdumper_registers(gpu, 943 a6xx_state, &a6xx_reglist[i], 944 &a6xx_state->registers[index++], 945 dumper); 946 947 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) 948 a6xx_get_crashdumper_hlsq_registers(gpu, 949 a6xx_state, &a6xx_hlsq_reglist[i], 950 &a6xx_state->registers[index++], 951 dumper); 952 } 953 954 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu) 955 { 956 /* The value at [16:31] is in 4dword units. Convert it to dwords */ 957 return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14; 958 } 959 960 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu) 961 { 962 /* 963 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units. 964 * That register however is not directly accessible from APSS on A7xx. 965 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value. 966 */ 967 gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3); 968 969 return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20); 970 } 971 972 /* Read a block of data from an indexed register pair */ 973 static void a6xx_get_indexed_regs(struct msm_gpu *gpu, 974 struct a6xx_gpu_state *a6xx_state, 975 struct a6xx_indexed_registers *indexed, 976 struct a6xx_gpu_state_obj *obj) 977 { 978 int i; 979 980 obj->handle = (const void *) indexed; 981 if (indexed->count_fn) 982 indexed->count = indexed->count_fn(gpu); 983 984 obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32)); 985 if (!obj->data) 986 return; 987 988 /* All the indexed banks start at address 0 */ 989 gpu_write(gpu, indexed->addr, 0); 990 991 /* Read the data - each read increments the internal address by 1 */ 992 for (i = 0; i < indexed->count; i++) 993 obj->data[i] = gpu_read(gpu, indexed->data); 994 } 995 996 static void a6xx_get_indexed_registers(struct msm_gpu *gpu, 997 struct a6xx_gpu_state *a6xx_state) 998 { 999 u32 mempool_size; 1000 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; 1001 int i; 1002 1003 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count, 1004 sizeof(*a6xx_state->indexed_regs)); 1005 if (!a6xx_state->indexed_regs) 1006 return; 1007 1008 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) 1009 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i], 1010 &a6xx_state->indexed_regs[i]); 1011 1012 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 1013 u32 val; 1014 1015 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG); 1016 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4); 1017 1018 /* Get the contents of the CP mempool */ 1019 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1020 &a6xx_state->indexed_regs[i]); 1021 1022 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val); 1023 a6xx_state->nr_indexed_regs = count; 1024 return; 1025 } 1026 1027 /* Set the CP mempool size to 0 to stabilize it while dumping */ 1028 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); 1029 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); 1030 1031 /* Get the contents of the CP mempool */ 1032 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1033 &a6xx_state->indexed_regs[i]); 1034 1035 /* 1036 * Offset 0x2000 in the mempool is the size - copy the saved size over 1037 * so the data is consistent 1038 */ 1039 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; 1040 1041 /* Restore the size in the hardware */ 1042 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); 1043 } 1044 1045 static void a7xx_get_indexed_registers(struct msm_gpu *gpu, 1046 struct a6xx_gpu_state *a6xx_state) 1047 { 1048 int i, indexed_count, mempool_count; 1049 1050 indexed_count = ARRAY_SIZE(a7xx_indexed_reglist); 1051 mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed); 1052 1053 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, 1054 indexed_count + mempool_count, 1055 sizeof(*a6xx_state->indexed_regs)); 1056 if (!a6xx_state->indexed_regs) 1057 return; 1058 1059 a6xx_state->nr_indexed_regs = indexed_count + mempool_count; 1060 1061 /* First read the common regs */ 1062 for (i = 0; i < indexed_count; i++) 1063 a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_indexed_reglist[i], 1064 &a6xx_state->indexed_regs[i]); 1065 1066 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2)); 1067 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2)); 1068 1069 /* Get the contents of the CP_BV mempool */ 1070 for (i = 0; i < mempool_count; i++) 1071 a6xx_get_indexed_regs(gpu, a6xx_state, a7xx_cp_bv_mempool_indexed, 1072 &a6xx_state->indexed_regs[indexed_count - 1 + i]); 1073 1074 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0); 1075 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0); 1076 return; 1077 } 1078 1079 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) 1080 { 1081 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL; 1082 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1083 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1084 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), 1085 GFP_KERNEL); 1086 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & 1087 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); 1088 1089 if (!a6xx_state) 1090 return ERR_PTR(-ENOMEM); 1091 1092 INIT_LIST_HEAD(&a6xx_state->objs); 1093 1094 /* Get the generic state from the adreno core */ 1095 adreno_gpu_state_get(gpu, &a6xx_state->base); 1096 1097 if (!adreno_has_gmu_wrapper(adreno_gpu)) { 1098 a6xx_get_gmu_registers(gpu, a6xx_state); 1099 1100 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log); 1101 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi); 1102 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug); 1103 1104 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state); 1105 } 1106 1107 /* If GX isn't on the rest of the data isn't going to be accessible */ 1108 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 1109 return &a6xx_state->base; 1110 1111 /* Get the banks of indexed registers */ 1112 if (adreno_is_a7xx(adreno_gpu)) { 1113 a7xx_get_indexed_registers(gpu, a6xx_state); 1114 /* Further codeflow is untested on A7xx. */ 1115 return &a6xx_state->base; 1116 } 1117 1118 a6xx_get_indexed_registers(gpu, a6xx_state); 1119 1120 /* 1121 * Try to initialize the crashdumper, if we are not dumping state 1122 * with the SMMU stalled. The crashdumper needs memory access to 1123 * write out GPU state, so we need to skip this when the SMMU is 1124 * stalled in response to an iova fault 1125 */ 1126 if (!stalled && !gpu->needs_hw_init && 1127 !a6xx_crashdumper_init(gpu, &_dumper)) { 1128 dumper = &_dumper; 1129 } 1130 1131 a6xx_get_registers(gpu, a6xx_state, dumper); 1132 1133 if (dumper) { 1134 a6xx_get_shaders(gpu, a6xx_state, dumper); 1135 a6xx_get_clusters(gpu, a6xx_state, dumper); 1136 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); 1137 1138 msm_gem_kernel_put(dumper->bo, gpu->aspace); 1139 } 1140 1141 if (snapshot_debugbus) 1142 a6xx_get_debugbus(gpu, a6xx_state); 1143 1144 a6xx_state->gpu_initialized = !gpu->needs_hw_init; 1145 1146 return &a6xx_state->base; 1147 } 1148 1149 static void a6xx_gpu_state_destroy(struct kref *kref) 1150 { 1151 struct a6xx_state_memobj *obj, *tmp; 1152 struct msm_gpu_state *state = container_of(kref, 1153 struct msm_gpu_state, ref); 1154 struct a6xx_gpu_state *a6xx_state = container_of(state, 1155 struct a6xx_gpu_state, base); 1156 1157 if (a6xx_state->gmu_log) 1158 kvfree(a6xx_state->gmu_log->data); 1159 1160 if (a6xx_state->gmu_hfi) 1161 kvfree(a6xx_state->gmu_hfi->data); 1162 1163 if (a6xx_state->gmu_debug) 1164 kvfree(a6xx_state->gmu_debug->data); 1165 1166 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) { 1167 list_del(&obj->node); 1168 kvfree(obj); 1169 } 1170 1171 adreno_gpu_state_destroy(state); 1172 kfree(a6xx_state); 1173 } 1174 1175 int a6xx_gpu_state_put(struct msm_gpu_state *state) 1176 { 1177 if (IS_ERR_OR_NULL(state)) 1178 return 1; 1179 1180 return kref_put(&state->ref, a6xx_gpu_state_destroy); 1181 } 1182 1183 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, 1184 struct drm_printer *p) 1185 { 1186 int i, index = 0; 1187 1188 if (!data) 1189 return; 1190 1191 for (i = 0; i < count; i += 2) { 1192 u32 count = RANGE(registers, i); 1193 u32 offset = registers[i]; 1194 int j; 1195 1196 for (j = 0; j < count; index++, offset++, j++) { 1197 if (data[index] == 0xdeafbead) 1198 continue; 1199 1200 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1201 offset << 2, data[index]); 1202 } 1203 } 1204 } 1205 1206 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) 1207 { 1208 char out[ASCII85_BUFSZ]; 1209 long i, l, datalen = 0; 1210 1211 for (i = 0; i < len >> 2; i++) { 1212 if (data[i]) 1213 datalen = (i + 1) << 2; 1214 } 1215 1216 if (datalen == 0) 1217 return; 1218 1219 drm_puts(p, " data: !!ascii85 |\n"); 1220 drm_puts(p, " "); 1221 1222 1223 l = ascii85_encode_len(datalen); 1224 1225 for (i = 0; i < l; i++) 1226 drm_puts(p, ascii85_encode(data[i], out)); 1227 1228 drm_puts(p, "\n"); 1229 } 1230 1231 static void print_name(struct drm_printer *p, const char *fmt, const char *name) 1232 { 1233 drm_puts(p, fmt); 1234 drm_puts(p, name); 1235 drm_puts(p, "\n"); 1236 } 1237 1238 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, 1239 struct drm_printer *p) 1240 { 1241 const struct a6xx_shader_block *block = obj->handle; 1242 int i; 1243 1244 if (!obj->handle) 1245 return; 1246 1247 print_name(p, " - type: ", block->name); 1248 1249 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 1250 drm_printf(p, " - bank: %d\n", i); 1251 drm_printf(p, " size: %d\n", block->size); 1252 1253 if (!obj->data) 1254 continue; 1255 1256 print_ascii85(p, block->size << 2, 1257 obj->data + (block->size * i)); 1258 } 1259 } 1260 1261 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, 1262 struct drm_printer *p) 1263 { 1264 int ctx, index = 0; 1265 1266 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { 1267 int j; 1268 1269 drm_printf(p, " - context: %d\n", ctx); 1270 1271 for (j = 0; j < size; j += 2) { 1272 u32 count = RANGE(registers, j); 1273 u32 offset = registers[j]; 1274 int k; 1275 1276 for (k = 0; k < count; index++, offset++, k++) { 1277 if (data[index] == 0xdeafbead) 1278 continue; 1279 1280 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1281 offset << 2, data[index]); 1282 } 1283 } 1284 } 1285 } 1286 1287 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1288 struct drm_printer *p) 1289 { 1290 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; 1291 1292 if (dbgahb) { 1293 print_name(p, " - cluster-name: ", dbgahb->name); 1294 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, 1295 obj->data, p); 1296 } 1297 } 1298 1299 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1300 struct drm_printer *p) 1301 { 1302 const struct a6xx_cluster *cluster = obj->handle; 1303 1304 if (cluster) { 1305 print_name(p, " - cluster-name: ", cluster->name); 1306 a6xx_show_cluster_data(cluster->registers, cluster->count, 1307 obj->data, p); 1308 } 1309 } 1310 1311 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, 1312 struct drm_printer *p) 1313 { 1314 const struct a6xx_indexed_registers *indexed = obj->handle; 1315 1316 if (!indexed) 1317 return; 1318 1319 print_name(p, " - regs-name: ", indexed->name); 1320 drm_printf(p, " dwords: %d\n", indexed->count); 1321 1322 print_ascii85(p, indexed->count << 2, obj->data); 1323 } 1324 1325 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, 1326 u32 *data, struct drm_printer *p) 1327 { 1328 if (block) { 1329 print_name(p, " - debugbus-block: ", block->name); 1330 1331 /* 1332 * count for regular debugbus data is in quadwords, 1333 * but print the size in dwords for consistency 1334 */ 1335 drm_printf(p, " count: %d\n", block->count << 1); 1336 1337 print_ascii85(p, block->count << 3, data); 1338 } 1339 } 1340 1341 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, 1342 struct drm_printer *p) 1343 { 1344 int i; 1345 1346 for (i = 0; i < a6xx_state->nr_debugbus; i++) { 1347 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; 1348 1349 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1350 } 1351 1352 if (a6xx_state->vbif_debugbus) { 1353 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; 1354 1355 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); 1356 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); 1357 1358 /* vbif debugbus data is in dwords. Confusing, huh? */ 1359 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); 1360 } 1361 1362 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { 1363 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; 1364 1365 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1366 } 1367 } 1368 1369 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1370 struct drm_printer *p) 1371 { 1372 struct a6xx_gpu_state *a6xx_state = container_of(state, 1373 struct a6xx_gpu_state, base); 1374 int i; 1375 1376 if (IS_ERR_OR_NULL(state)) 1377 return; 1378 1379 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized); 1380 1381 adreno_show(gpu, state, p); 1382 1383 drm_puts(p, "gmu-log:\n"); 1384 if (a6xx_state->gmu_log) { 1385 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log; 1386 1387 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova); 1388 drm_printf(p, " size: %zu\n", gmu_log->size); 1389 adreno_show_object(p, &gmu_log->data, gmu_log->size, 1390 &gmu_log->encoded); 1391 } 1392 1393 drm_puts(p, "gmu-hfi:\n"); 1394 if (a6xx_state->gmu_hfi) { 1395 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi; 1396 unsigned i, j; 1397 1398 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova); 1399 drm_printf(p, " size: %zu\n", gmu_hfi->size); 1400 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) { 1401 drm_printf(p, " queue-history[%u]:", i); 1402 for (j = 0; j < HFI_HISTORY_SZ; j++) { 1403 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]); 1404 } 1405 drm_printf(p, "\n"); 1406 } 1407 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size, 1408 &gmu_hfi->encoded); 1409 } 1410 1411 drm_puts(p, "gmu-debug:\n"); 1412 if (a6xx_state->gmu_debug) { 1413 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug; 1414 1415 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova); 1416 drm_printf(p, " size: %zu\n", gmu_debug->size); 1417 adreno_show_object(p, &gmu_debug->data, gmu_debug->size, 1418 &gmu_debug->encoded); 1419 } 1420 1421 drm_puts(p, "registers:\n"); 1422 for (i = 0; i < a6xx_state->nr_registers; i++) { 1423 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; 1424 const struct a6xx_registers *regs = obj->handle; 1425 1426 if (!obj->handle) 1427 continue; 1428 1429 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1430 } 1431 1432 drm_puts(p, "registers-gmu:\n"); 1433 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { 1434 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; 1435 const struct a6xx_registers *regs = obj->handle; 1436 1437 if (!obj->handle) 1438 continue; 1439 1440 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1441 } 1442 1443 drm_puts(p, "indexed-registers:\n"); 1444 for (i = 0; i < a6xx_state->nr_indexed_regs; i++) 1445 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); 1446 1447 drm_puts(p, "shader-blocks:\n"); 1448 for (i = 0; i < a6xx_state->nr_shaders; i++) 1449 a6xx_show_shader(&a6xx_state->shaders[i], p); 1450 1451 drm_puts(p, "clusters:\n"); 1452 for (i = 0; i < a6xx_state->nr_clusters; i++) 1453 a6xx_show_cluster(&a6xx_state->clusters[i], p); 1454 1455 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) 1456 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 1457 1458 drm_puts(p, "debugbus:\n"); 1459 a6xx_show_debugbus(a6xx_state, p); 1460 } 1461