1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018 The Linux Foundation. All rights reserved. */ 3 4 #include <linux/ascii85.h> 5 #include "msm_gem.h" 6 #include "a6xx_gpu.h" 7 #include "a6xx_gmu.h" 8 #include "a6xx_gpu_state.h" 9 #include "a6xx_gmu.xml.h" 10 11 struct a6xx_gpu_state_obj { 12 const void *handle; 13 u32 *data; 14 }; 15 16 struct a6xx_gpu_state { 17 struct msm_gpu_state base; 18 19 struct a6xx_gpu_state_obj *gmu_registers; 20 int nr_gmu_registers; 21 22 struct a6xx_gpu_state_obj *registers; 23 int nr_registers; 24 25 struct a6xx_gpu_state_obj *shaders; 26 int nr_shaders; 27 28 struct a6xx_gpu_state_obj *clusters; 29 int nr_clusters; 30 31 struct a6xx_gpu_state_obj *dbgahb_clusters; 32 int nr_dbgahb_clusters; 33 34 struct a6xx_gpu_state_obj *indexed_regs; 35 int nr_indexed_regs; 36 37 struct a6xx_gpu_state_obj *debugbus; 38 int nr_debugbus; 39 40 struct a6xx_gpu_state_obj *vbif_debugbus; 41 42 struct a6xx_gpu_state_obj *cx_debugbus; 43 int nr_cx_debugbus; 44 45 struct list_head objs; 46 }; 47 48 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) 49 { 50 in[0] = val; 51 in[1] = (((u64) reg) << 44 | (1 << 21) | 1); 52 53 return 2; 54 } 55 56 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) 57 { 58 in[0] = target; 59 in[1] = (((u64) reg) << 44 | dwords); 60 61 return 2; 62 } 63 64 static inline int CRASHDUMP_FINI(u64 *in) 65 { 66 in[0] = 0; 67 in[1] = 0; 68 69 return 2; 70 } 71 72 struct a6xx_crashdumper { 73 void *ptr; 74 struct drm_gem_object *bo; 75 u64 iova; 76 }; 77 78 struct a6xx_state_memobj { 79 struct list_head node; 80 unsigned long long data[]; 81 }; 82 83 void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize) 84 { 85 struct a6xx_state_memobj *obj = 86 kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); 87 88 if (!obj) 89 return NULL; 90 91 list_add_tail(&obj->node, &a6xx_state->objs); 92 return &obj->data; 93 } 94 95 void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, 96 size_t size) 97 { 98 void *dst = state_kcalloc(a6xx_state, 1, size); 99 100 if (dst) 101 memcpy(dst, src, size); 102 return dst; 103 } 104 105 /* 106 * Allocate 1MB for the crashdumper scratch region - 8k for the script and 107 * the rest for the data 108 */ 109 #define A6XX_CD_DATA_OFFSET 8192 110 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192) 111 112 static int a6xx_crashdumper_init(struct msm_gpu *gpu, 113 struct a6xx_crashdumper *dumper) 114 { 115 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, 116 SZ_1M, MSM_BO_UNCACHED, gpu->aspace, 117 &dumper->bo, &dumper->iova); 118 119 if (!IS_ERR(dumper->ptr)) 120 msm_gem_object_set_name(dumper->bo, "crashdump"); 121 122 return PTR_ERR_OR_ZERO(dumper->ptr); 123 } 124 125 static int a6xx_crashdumper_run(struct msm_gpu *gpu, 126 struct a6xx_crashdumper *dumper) 127 { 128 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 129 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 130 u32 val; 131 int ret; 132 133 if (IS_ERR_OR_NULL(dumper->ptr)) 134 return -EINVAL; 135 136 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) 137 return -EINVAL; 138 139 /* Make sure all pending memory writes are posted */ 140 wmb(); 141 142 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, 143 REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); 144 145 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); 146 147 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, 148 val & 0x02, 100, 10000); 149 150 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); 151 152 return ret; 153 } 154 155 /* read a value from the GX debug bus */ 156 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, 157 u32 *data) 158 { 159 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | 160 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); 161 162 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); 163 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); 164 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); 165 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); 166 167 /* Wait 1 us to make sure the data is flowing */ 168 udelay(1); 169 170 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); 171 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); 172 173 return 2; 174 } 175 176 #define cxdbg_write(ptr, offset, val) \ 177 msm_writel((val), (ptr) + ((offset) << 2)) 178 179 #define cxdbg_read(ptr, offset) \ 180 msm_readl((ptr) + ((offset) << 2)) 181 182 /* read a value from the CX debug bus */ 183 static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset, 184 u32 *data) 185 { 186 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | 187 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); 188 189 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); 190 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); 191 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); 192 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); 193 194 /* Wait 1 us to make sure the data is flowing */ 195 udelay(1); 196 197 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); 198 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); 199 200 return 2; 201 } 202 203 /* Read a chunk of data from the VBIF debug bus */ 204 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, 205 u32 reg, int count, u32 *data) 206 { 207 int i; 208 209 gpu_write(gpu, ctrl0, reg); 210 211 for (i = 0; i < count; i++) { 212 gpu_write(gpu, ctrl1, i); 213 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); 214 } 215 216 return count; 217 } 218 219 #define AXI_ARB_BLOCKS 2 220 #define XIN_AXI_BLOCKS 5 221 #define XIN_CORE_BLOCKS 4 222 223 #define VBIF_DEBUGBUS_BLOCK_SIZE \ 224 ((16 * AXI_ARB_BLOCKS) + \ 225 (18 * XIN_AXI_BLOCKS) + \ 226 (12 * XIN_CORE_BLOCKS)) 227 228 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, 229 struct a6xx_gpu_state *a6xx_state, 230 struct a6xx_gpu_state_obj *obj) 231 { 232 u32 clk, *ptr; 233 int i; 234 235 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE, 236 sizeof(u32)); 237 if (!obj->data) 238 return; 239 240 obj->handle = NULL; 241 242 /* Get the current clock setting */ 243 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); 244 245 /* Force on the bus so we can read it */ 246 gpu_write(gpu, REG_A6XX_VBIF_CLKON, 247 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); 248 249 /* We will read from BUS2 first, so disable BUS1 */ 250 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); 251 252 /* Enable the VBIF bus for reading */ 253 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); 254 255 ptr = obj->data; 256 257 for (i = 0; i < AXI_ARB_BLOCKS; i++) 258 ptr += vbif_debugbus_read(gpu, 259 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 260 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 261 1 << (i + 16), 16, ptr); 262 263 for (i = 0; i < XIN_AXI_BLOCKS; i++) 264 ptr += vbif_debugbus_read(gpu, 265 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 266 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 267 1 << i, 18, ptr); 268 269 /* Stop BUS2 so we can turn on BUS1 */ 270 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); 271 272 for (i = 0; i < XIN_CORE_BLOCKS; i++) 273 ptr += vbif_debugbus_read(gpu, 274 REG_A6XX_VBIF_TEST_BUS1_CTRL0, 275 REG_A6XX_VBIF_TEST_BUS1_CTRL1, 276 1 << i, 12, ptr); 277 278 /* Restore the VBIF clock setting */ 279 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); 280 } 281 282 static void a6xx_get_debugbus_block(struct msm_gpu *gpu, 283 struct a6xx_gpu_state *a6xx_state, 284 const struct a6xx_debugbus_block *block, 285 struct a6xx_gpu_state_obj *obj) 286 { 287 int i; 288 u32 *ptr; 289 290 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 291 if (!obj->data) 292 return; 293 294 obj->handle = block; 295 296 for (ptr = obj->data, i = 0; i < block->count; i++) 297 ptr += debugbus_read(gpu, block->id, i, ptr); 298 } 299 300 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, 301 struct a6xx_gpu_state *a6xx_state, 302 const struct a6xx_debugbus_block *block, 303 struct a6xx_gpu_state_obj *obj) 304 { 305 int i; 306 u32 *ptr; 307 308 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 309 if (!obj->data) 310 return; 311 312 obj->handle = block; 313 314 for (ptr = obj->data, i = 0; i < block->count; i++) 315 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); 316 } 317 318 static void a6xx_get_debugbus(struct msm_gpu *gpu, 319 struct a6xx_gpu_state *a6xx_state) 320 { 321 struct resource *res; 322 void __iomem *cxdbg = NULL; 323 324 /* Set up the GX debug bus */ 325 326 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, 327 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 328 329 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, 330 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 331 332 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); 333 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); 334 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); 335 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); 336 337 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); 338 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); 339 340 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); 341 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); 342 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); 343 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); 344 345 /* Set up the CX debug bus - it lives elsewhere in the system so do a 346 * temporary ioremap for the registers 347 */ 348 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, 349 "cx_dbgc"); 350 351 if (res) 352 cxdbg = ioremap(res->start, resource_size(res)); 353 354 if (cxdbg) { 355 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, 356 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 357 358 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, 359 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 360 361 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); 362 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); 363 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); 364 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); 365 366 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, 367 0x76543210); 368 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, 369 0xFEDCBA98); 370 371 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); 372 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); 373 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); 374 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); 375 } 376 377 a6xx_state->debugbus = state_kcalloc(a6xx_state, 378 ARRAY_SIZE(a6xx_debugbus_blocks), 379 sizeof(*a6xx_state->debugbus)); 380 381 if (a6xx_state->debugbus) { 382 int i; 383 384 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) 385 a6xx_get_debugbus_block(gpu, 386 a6xx_state, 387 &a6xx_debugbus_blocks[i], 388 &a6xx_state->debugbus[i]); 389 390 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); 391 } 392 393 a6xx_state->vbif_debugbus = 394 state_kcalloc(a6xx_state, 1, 395 sizeof(*a6xx_state->vbif_debugbus)); 396 397 if (a6xx_state->vbif_debugbus) 398 a6xx_get_vbif_debugbus_block(gpu, a6xx_state, 399 a6xx_state->vbif_debugbus); 400 401 if (cxdbg) { 402 a6xx_state->cx_debugbus = 403 state_kcalloc(a6xx_state, 404 ARRAY_SIZE(a6xx_cx_debugbus_blocks), 405 sizeof(*a6xx_state->cx_debugbus)); 406 407 if (a6xx_state->cx_debugbus) { 408 int i; 409 410 for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++) 411 a6xx_get_cx_debugbus_block(cxdbg, 412 a6xx_state, 413 &a6xx_cx_debugbus_blocks[i], 414 &a6xx_state->cx_debugbus[i]); 415 416 a6xx_state->nr_cx_debugbus = 417 ARRAY_SIZE(a6xx_cx_debugbus_blocks); 418 } 419 420 iounmap(cxdbg); 421 } 422 } 423 424 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) 425 426 /* Read a data cluster from behind the AHB aperture */ 427 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, 428 struct a6xx_gpu_state *a6xx_state, 429 const struct a6xx_dbgahb_cluster *dbgahb, 430 struct a6xx_gpu_state_obj *obj, 431 struct a6xx_crashdumper *dumper) 432 { 433 u64 *in = dumper->ptr; 434 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 435 size_t datasize; 436 int i, regcount = 0; 437 438 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 439 int j; 440 441 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 442 (dbgahb->statetype + i * 2) << 8); 443 444 for (j = 0; j < dbgahb->count; j += 2) { 445 int count = RANGE(dbgahb->registers, j); 446 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 447 dbgahb->registers[j] - (dbgahb->base >> 2); 448 449 in += CRASHDUMP_READ(in, offset, count, out); 450 451 out += count * sizeof(u32); 452 453 if (i == 0) 454 regcount += count; 455 } 456 } 457 458 CRASHDUMP_FINI(in); 459 460 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 461 462 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 463 return; 464 465 if (a6xx_crashdumper_run(gpu, dumper)) 466 return; 467 468 obj->handle = dbgahb; 469 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 470 datasize); 471 } 472 473 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, 474 struct a6xx_gpu_state *a6xx_state, 475 struct a6xx_crashdumper *dumper) 476 { 477 int i; 478 479 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 480 ARRAY_SIZE(a6xx_dbgahb_clusters), 481 sizeof(*a6xx_state->dbgahb_clusters)); 482 483 if (!a6xx_state->dbgahb_clusters) 484 return; 485 486 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); 487 488 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) 489 a6xx_get_dbgahb_cluster(gpu, a6xx_state, 490 &a6xx_dbgahb_clusters[i], 491 &a6xx_state->dbgahb_clusters[i], dumper); 492 } 493 494 /* Read a data cluster from the CP aperture with the crashdumper */ 495 static void a6xx_get_cluster(struct msm_gpu *gpu, 496 struct a6xx_gpu_state *a6xx_state, 497 const struct a6xx_cluster *cluster, 498 struct a6xx_gpu_state_obj *obj, 499 struct a6xx_crashdumper *dumper) 500 { 501 u64 *in = dumper->ptr; 502 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 503 size_t datasize; 504 int i, regcount = 0; 505 506 /* Some clusters need a selector register to be programmed too */ 507 if (cluster->sel_reg) 508 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); 509 510 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 511 int j; 512 513 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, 514 (cluster->id << 8) | (i << 4) | i); 515 516 for (j = 0; j < cluster->count; j += 2) { 517 int count = RANGE(cluster->registers, j); 518 519 in += CRASHDUMP_READ(in, cluster->registers[j], 520 count, out); 521 522 out += count * sizeof(u32); 523 524 if (i == 0) 525 regcount += count; 526 } 527 } 528 529 CRASHDUMP_FINI(in); 530 531 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 532 533 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 534 return; 535 536 if (a6xx_crashdumper_run(gpu, dumper)) 537 return; 538 539 obj->handle = cluster; 540 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 541 datasize); 542 } 543 544 static void a6xx_get_clusters(struct msm_gpu *gpu, 545 struct a6xx_gpu_state *a6xx_state, 546 struct a6xx_crashdumper *dumper) 547 { 548 int i; 549 550 a6xx_state->clusters = state_kcalloc(a6xx_state, 551 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters)); 552 553 if (!a6xx_state->clusters) 554 return; 555 556 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); 557 558 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) 559 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i], 560 &a6xx_state->clusters[i], dumper); 561 } 562 563 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */ 564 static void a6xx_get_shader_block(struct msm_gpu *gpu, 565 struct a6xx_gpu_state *a6xx_state, 566 const struct a6xx_shader_block *block, 567 struct a6xx_gpu_state_obj *obj, 568 struct a6xx_crashdumper *dumper) 569 { 570 u64 *in = dumper->ptr; 571 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); 572 int i; 573 574 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 575 return; 576 577 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 578 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 579 (block->type << 8) | i); 580 581 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, 582 block->size, dumper->iova + A6XX_CD_DATA_OFFSET); 583 } 584 585 CRASHDUMP_FINI(in); 586 587 if (a6xx_crashdumper_run(gpu, dumper)) 588 return; 589 590 obj->handle = block; 591 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 592 datasize); 593 } 594 595 static void a6xx_get_shaders(struct msm_gpu *gpu, 596 struct a6xx_gpu_state *a6xx_state, 597 struct a6xx_crashdumper *dumper) 598 { 599 int i; 600 601 a6xx_state->shaders = state_kcalloc(a6xx_state, 602 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders)); 603 604 if (!a6xx_state->shaders) 605 return; 606 607 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); 608 609 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) 610 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i], 611 &a6xx_state->shaders[i], dumper); 612 } 613 614 /* Read registers from behind the HLSQ aperture with the crashdumper */ 615 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, 616 struct a6xx_gpu_state *a6xx_state, 617 const struct a6xx_registers *regs, 618 struct a6xx_gpu_state_obj *obj, 619 struct a6xx_crashdumper *dumper) 620 621 { 622 u64 *in = dumper->ptr; 623 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 624 int i, regcount = 0; 625 626 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); 627 628 for (i = 0; i < regs->count; i += 2) { 629 u32 count = RANGE(regs->registers, i); 630 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 631 regs->registers[i] - (regs->val0 >> 2); 632 633 in += CRASHDUMP_READ(in, offset, count, out); 634 635 out += count * sizeof(u32); 636 regcount += count; 637 } 638 639 CRASHDUMP_FINI(in); 640 641 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 642 return; 643 644 if (a6xx_crashdumper_run(gpu, dumper)) 645 return; 646 647 obj->handle = regs; 648 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 649 regcount * sizeof(u32)); 650 } 651 652 /* Read a block of registers using the crashdumper */ 653 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, 654 struct a6xx_gpu_state *a6xx_state, 655 const struct a6xx_registers *regs, 656 struct a6xx_gpu_state_obj *obj, 657 struct a6xx_crashdumper *dumper) 658 659 { 660 u64 *in = dumper->ptr; 661 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 662 int i, regcount = 0; 663 664 /* Some blocks might need to program a selector register first */ 665 if (regs->val0) 666 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); 667 668 for (i = 0; i < regs->count; i += 2) { 669 u32 count = RANGE(regs->registers, i); 670 671 in += CRASHDUMP_READ(in, regs->registers[i], count, out); 672 673 out += count * sizeof(u32); 674 regcount += count; 675 } 676 677 CRASHDUMP_FINI(in); 678 679 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 680 return; 681 682 if (a6xx_crashdumper_run(gpu, dumper)) 683 return; 684 685 obj->handle = regs; 686 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 687 regcount * sizeof(u32)); 688 } 689 690 /* Read a block of registers via AHB */ 691 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 692 struct a6xx_gpu_state *a6xx_state, 693 const struct a6xx_registers *regs, 694 struct a6xx_gpu_state_obj *obj) 695 { 696 int i, regcount = 0, index = 0; 697 698 for (i = 0; i < regs->count; i += 2) 699 regcount += RANGE(regs->registers, i); 700 701 obj->handle = (const void *) regs; 702 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 703 if (!obj->data) 704 return; 705 706 for (i = 0; i < regs->count; i += 2) { 707 u32 count = RANGE(regs->registers, i); 708 int j; 709 710 for (j = 0; j < count; j++) 711 obj->data[index++] = gpu_read(gpu, 712 regs->registers[i] + j); 713 } 714 } 715 716 /* Read a block of GMU registers */ 717 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, 718 struct a6xx_gpu_state *a6xx_state, 719 const struct a6xx_registers *regs, 720 struct a6xx_gpu_state_obj *obj) 721 { 722 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 723 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 724 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 725 int i, regcount = 0, index = 0; 726 727 for (i = 0; i < regs->count; i += 2) 728 regcount += RANGE(regs->registers, i); 729 730 obj->handle = (const void *) regs; 731 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 732 if (!obj->data) 733 return; 734 735 for (i = 0; i < regs->count; i += 2) { 736 u32 count = RANGE(regs->registers, i); 737 int j; 738 739 for (j = 0; j < count; j++) 740 obj->data[index++] = gmu_read(gmu, 741 regs->registers[i] + j); 742 } 743 } 744 745 static void a6xx_get_gmu_registers(struct msm_gpu *gpu, 746 struct a6xx_gpu_state *a6xx_state) 747 { 748 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 749 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 750 751 a6xx_state->gmu_registers = state_kcalloc(a6xx_state, 752 2, sizeof(*a6xx_state->gmu_registers)); 753 754 if (!a6xx_state->gmu_registers) 755 return; 756 757 a6xx_state->nr_gmu_registers = 2; 758 759 /* Get the CX GMU registers from AHB */ 760 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], 761 &a6xx_state->gmu_registers[0]); 762 763 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 764 return; 765 766 /* Set the fence to ALLOW mode so we can access the registers */ 767 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); 768 769 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1], 770 &a6xx_state->gmu_registers[1]); 771 } 772 773 static void a6xx_get_registers(struct msm_gpu *gpu, 774 struct a6xx_gpu_state *a6xx_state, 775 struct a6xx_crashdumper *dumper) 776 { 777 int i, count = ARRAY_SIZE(a6xx_ahb_reglist) + 778 ARRAY_SIZE(a6xx_reglist) + 779 ARRAY_SIZE(a6xx_hlsq_reglist); 780 int index = 0; 781 782 a6xx_state->registers = state_kcalloc(a6xx_state, 783 count, sizeof(*a6xx_state->registers)); 784 785 if (!a6xx_state->registers) 786 return; 787 788 a6xx_state->nr_registers = count; 789 790 for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++) 791 a6xx_get_ahb_gpu_registers(gpu, 792 a6xx_state, &a6xx_ahb_reglist[i], 793 &a6xx_state->registers[index++]); 794 795 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 796 a6xx_get_crashdumper_registers(gpu, 797 a6xx_state, &a6xx_reglist[i], 798 &a6xx_state->registers[index++], 799 dumper); 800 801 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) 802 a6xx_get_crashdumper_hlsq_registers(gpu, 803 a6xx_state, &a6xx_hlsq_reglist[i], 804 &a6xx_state->registers[index++], 805 dumper); 806 } 807 808 /* Read a block of data from an indexed register pair */ 809 static void a6xx_get_indexed_regs(struct msm_gpu *gpu, 810 struct a6xx_gpu_state *a6xx_state, 811 const struct a6xx_indexed_registers *indexed, 812 struct a6xx_gpu_state_obj *obj) 813 { 814 int i; 815 816 obj->handle = (const void *) indexed; 817 obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32)); 818 if (!obj->data) 819 return; 820 821 /* All the indexed banks start at address 0 */ 822 gpu_write(gpu, indexed->addr, 0); 823 824 /* Read the data - each read increments the internal address by 1 */ 825 for (i = 0; i < indexed->count; i++) 826 obj->data[i] = gpu_read(gpu, indexed->data); 827 } 828 829 static void a6xx_get_indexed_registers(struct msm_gpu *gpu, 830 struct a6xx_gpu_state *a6xx_state) 831 { 832 u32 mempool_size; 833 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; 834 int i; 835 836 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count, 837 sizeof(a6xx_state->indexed_regs)); 838 if (!a6xx_state->indexed_regs) 839 return; 840 841 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) 842 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i], 843 &a6xx_state->indexed_regs[i]); 844 845 /* Set the CP mempool size to 0 to stabilize it while dumping */ 846 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); 847 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); 848 849 /* Get the contents of the CP mempool */ 850 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 851 &a6xx_state->indexed_regs[i]); 852 853 /* 854 * Offset 0x2000 in the mempool is the size - copy the saved size over 855 * so the data is consistent 856 */ 857 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; 858 859 /* Restore the size in the hardware */ 860 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); 861 862 a6xx_state->nr_indexed_regs = count; 863 } 864 865 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) 866 { 867 struct a6xx_crashdumper dumper = { 0 }; 868 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 869 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 870 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), 871 GFP_KERNEL); 872 873 if (!a6xx_state) 874 return ERR_PTR(-ENOMEM); 875 876 INIT_LIST_HEAD(&a6xx_state->objs); 877 878 /* Get the generic state from the adreno core */ 879 adreno_gpu_state_get(gpu, &a6xx_state->base); 880 881 a6xx_get_gmu_registers(gpu, a6xx_state); 882 883 /* If GX isn't on the rest of the data isn't going to be accessible */ 884 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 885 return &a6xx_state->base; 886 887 /* Get the banks of indexed registers */ 888 a6xx_get_indexed_registers(gpu, a6xx_state); 889 890 /* Try to initialize the crashdumper */ 891 if (!a6xx_crashdumper_init(gpu, &dumper)) { 892 a6xx_get_registers(gpu, a6xx_state, &dumper); 893 a6xx_get_shaders(gpu, a6xx_state, &dumper); 894 a6xx_get_clusters(gpu, a6xx_state, &dumper); 895 a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper); 896 897 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 898 } 899 900 a6xx_get_debugbus(gpu, a6xx_state); 901 902 return &a6xx_state->base; 903 } 904 905 void a6xx_gpu_state_destroy(struct kref *kref) 906 { 907 struct a6xx_state_memobj *obj, *tmp; 908 struct msm_gpu_state *state = container_of(kref, 909 struct msm_gpu_state, ref); 910 struct a6xx_gpu_state *a6xx_state = container_of(state, 911 struct a6xx_gpu_state, base); 912 913 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) 914 kfree(obj); 915 916 adreno_gpu_state_destroy(state); 917 kfree(a6xx_state); 918 } 919 920 int a6xx_gpu_state_put(struct msm_gpu_state *state) 921 { 922 if (IS_ERR_OR_NULL(state)) 923 return 1; 924 925 return kref_put(&state->ref, a6xx_gpu_state_destroy); 926 } 927 928 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, 929 struct drm_printer *p) 930 { 931 int i, index = 0; 932 933 if (!data) 934 return; 935 936 for (i = 0; i < count; i += 2) { 937 u32 count = RANGE(registers, i); 938 u32 offset = registers[i]; 939 int j; 940 941 for (j = 0; j < count; index++, offset++, j++) { 942 if (data[index] == 0xdeafbead) 943 continue; 944 945 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 946 offset << 2, data[index]); 947 } 948 } 949 } 950 951 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) 952 { 953 char out[ASCII85_BUFSZ]; 954 long i, l, datalen = 0; 955 956 for (i = 0; i < len >> 2; i++) { 957 if (data[i]) 958 datalen = (i + 1) << 2; 959 } 960 961 if (datalen == 0) 962 return; 963 964 drm_puts(p, " data: !!ascii85 |\n"); 965 drm_puts(p, " "); 966 967 968 l = ascii85_encode_len(datalen); 969 970 for (i = 0; i < l; i++) 971 drm_puts(p, ascii85_encode(data[i], out)); 972 973 drm_puts(p, "\n"); 974 } 975 976 static void print_name(struct drm_printer *p, const char *fmt, const char *name) 977 { 978 drm_puts(p, fmt); 979 drm_puts(p, name); 980 drm_puts(p, "\n"); 981 } 982 983 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, 984 struct drm_printer *p) 985 { 986 const struct a6xx_shader_block *block = obj->handle; 987 int i; 988 989 if (!obj->handle) 990 return; 991 992 print_name(p, " - type: ", block->name); 993 994 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 995 drm_printf(p, " - bank: %d\n", i); 996 drm_printf(p, " size: %d\n", block->size); 997 998 if (!obj->data) 999 continue; 1000 1001 print_ascii85(p, block->size << 2, 1002 obj->data + (block->size * i)); 1003 } 1004 } 1005 1006 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, 1007 struct drm_printer *p) 1008 { 1009 int ctx, index = 0; 1010 1011 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { 1012 int j; 1013 1014 drm_printf(p, " - context: %d\n", ctx); 1015 1016 for (j = 0; j < size; j += 2) { 1017 u32 count = RANGE(registers, j); 1018 u32 offset = registers[j]; 1019 int k; 1020 1021 for (k = 0; k < count; index++, offset++, k++) { 1022 if (data[index] == 0xdeafbead) 1023 continue; 1024 1025 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1026 offset << 2, data[index]); 1027 } 1028 } 1029 } 1030 } 1031 1032 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1033 struct drm_printer *p) 1034 { 1035 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; 1036 1037 if (dbgahb) { 1038 print_name(p, " - cluster-name: ", dbgahb->name); 1039 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, 1040 obj->data, p); 1041 } 1042 } 1043 1044 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1045 struct drm_printer *p) 1046 { 1047 const struct a6xx_cluster *cluster = obj->handle; 1048 1049 if (cluster) { 1050 print_name(p, " - cluster-name: ", cluster->name); 1051 a6xx_show_cluster_data(cluster->registers, cluster->count, 1052 obj->data, p); 1053 } 1054 } 1055 1056 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, 1057 struct drm_printer *p) 1058 { 1059 const struct a6xx_indexed_registers *indexed = obj->handle; 1060 1061 if (!indexed) 1062 return; 1063 1064 print_name(p, " - regs-name: ", indexed->name); 1065 drm_printf(p, " dwords: %d\n", indexed->count); 1066 1067 print_ascii85(p, indexed->count << 2, obj->data); 1068 } 1069 1070 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, 1071 u32 *data, struct drm_printer *p) 1072 { 1073 if (block) { 1074 print_name(p, " - debugbus-block: ", block->name); 1075 1076 /* 1077 * count for regular debugbus data is in quadwords, 1078 * but print the size in dwords for consistency 1079 */ 1080 drm_printf(p, " count: %d\n", block->count << 1); 1081 1082 print_ascii85(p, block->count << 3, data); 1083 } 1084 } 1085 1086 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, 1087 struct drm_printer *p) 1088 { 1089 int i; 1090 1091 for (i = 0; i < a6xx_state->nr_debugbus; i++) { 1092 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; 1093 1094 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1095 } 1096 1097 if (a6xx_state->vbif_debugbus) { 1098 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; 1099 1100 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); 1101 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); 1102 1103 /* vbif debugbus data is in dwords. Confusing, huh? */ 1104 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); 1105 } 1106 1107 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { 1108 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; 1109 1110 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1111 } 1112 } 1113 1114 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1115 struct drm_printer *p) 1116 { 1117 struct a6xx_gpu_state *a6xx_state = container_of(state, 1118 struct a6xx_gpu_state, base); 1119 int i; 1120 1121 if (IS_ERR_OR_NULL(state)) 1122 return; 1123 1124 adreno_show(gpu, state, p); 1125 1126 drm_puts(p, "registers:\n"); 1127 for (i = 0; i < a6xx_state->nr_registers; i++) { 1128 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; 1129 const struct a6xx_registers *regs = obj->handle; 1130 1131 if (!obj->handle) 1132 continue; 1133 1134 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1135 } 1136 1137 drm_puts(p, "registers-gmu:\n"); 1138 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { 1139 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; 1140 const struct a6xx_registers *regs = obj->handle; 1141 1142 if (!obj->handle) 1143 continue; 1144 1145 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1146 } 1147 1148 drm_puts(p, "indexed-registers:\n"); 1149 for (i = 0; i < a6xx_state->nr_indexed_regs; i++) 1150 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); 1151 1152 drm_puts(p, "shader-blocks:\n"); 1153 for (i = 0; i < a6xx_state->nr_shaders; i++) 1154 a6xx_show_shader(&a6xx_state->shaders[i], p); 1155 1156 drm_puts(p, "clusters:\n"); 1157 for (i = 0; i < a6xx_state->nr_clusters; i++) 1158 a6xx_show_cluster(&a6xx_state->clusters[i], p); 1159 1160 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) 1161 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 1162 1163 drm_puts(p, "debugbus:\n"); 1164 a6xx_show_debugbus(a6xx_state, p); 1165 } 1166