1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ 3 4 #include <linux/ascii85.h> 5 #include "msm_gem.h" 6 #include "a6xx_gpu.h" 7 #include "a6xx_gmu.h" 8 #include "a6xx_gpu_state.h" 9 #include "a6xx_gmu.xml.h" 10 11 static const unsigned int *gen7_0_0_external_core_regs[] __always_unused; 12 static const unsigned int *gen7_2_0_external_core_regs[] __always_unused; 13 static const unsigned int *gen7_9_0_external_core_regs[] __always_unused; 14 static const struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused; 15 static const u32 gen7_9_0_cx_debugbus_blocks[] __always_unused; 16 17 #include "adreno_gen7_0_0_snapshot.h" 18 #include "adreno_gen7_2_0_snapshot.h" 19 #include "adreno_gen7_9_0_snapshot.h" 20 21 struct a6xx_gpu_state_obj { 22 const void *handle; 23 u32 *data; 24 u32 count; /* optional, used when count potentially read from hw */ 25 }; 26 27 struct a6xx_gpu_state { 28 struct msm_gpu_state base; 29 30 struct a6xx_gpu_state_obj *gmu_registers; 31 int nr_gmu_registers; 32 33 struct a6xx_gpu_state_obj *registers; 34 int nr_registers; 35 36 struct a6xx_gpu_state_obj *shaders; 37 int nr_shaders; 38 39 struct a6xx_gpu_state_obj *clusters; 40 int nr_clusters; 41 42 struct a6xx_gpu_state_obj *dbgahb_clusters; 43 int nr_dbgahb_clusters; 44 45 struct a6xx_gpu_state_obj *indexed_regs; 46 int nr_indexed_regs; 47 48 struct a6xx_gpu_state_obj *debugbus; 49 int nr_debugbus; 50 51 struct a6xx_gpu_state_obj *vbif_debugbus; 52 53 struct a6xx_gpu_state_obj *cx_debugbus; 54 int nr_cx_debugbus; 55 56 struct msm_gpu_state_bo *gmu_log; 57 struct msm_gpu_state_bo *gmu_hfi; 58 struct msm_gpu_state_bo *gmu_debug; 59 60 s32 hfi_queue_history[2][HFI_HISTORY_SZ]; 61 62 struct list_head objs; 63 64 bool gpu_initialized; 65 }; 66 67 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) 68 { 69 in[0] = val; 70 in[1] = (((u64) reg) << 44 | (1 << 21) | 1); 71 72 return 2; 73 } 74 75 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) 76 { 77 in[0] = target; 78 in[1] = (((u64) reg) << 44 | dwords); 79 80 return 2; 81 } 82 83 static inline int CRASHDUMP_FINI(u64 *in) 84 { 85 in[0] = 0; 86 in[1] = 0; 87 88 return 2; 89 } 90 91 struct a6xx_crashdumper { 92 void *ptr; 93 struct drm_gem_object *bo; 94 u64 iova; 95 }; 96 97 struct a6xx_state_memobj { 98 struct list_head node; 99 unsigned long long data[]; 100 }; 101 102 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize) 103 { 104 struct a6xx_state_memobj *obj = 105 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); 106 107 if (!obj) 108 return NULL; 109 110 list_add_tail(&obj->node, &a6xx_state->objs); 111 return &obj->data; 112 } 113 114 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, 115 size_t size) 116 { 117 void *dst = state_kcalloc(a6xx_state, 1, size); 118 119 if (dst) 120 memcpy(dst, src, size); 121 return dst; 122 } 123 124 /* 125 * Allocate 1MB for the crashdumper scratch region - 8k for the script and 126 * the rest for the data 127 */ 128 #define A6XX_CD_DATA_OFFSET 8192 129 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192) 130 131 static int a6xx_crashdumper_init(struct msm_gpu *gpu, 132 struct a6xx_crashdumper *dumper) 133 { 134 dumper->ptr = msm_gem_kernel_new(gpu->dev, 135 SZ_1M, MSM_BO_WC, gpu->vm, 136 &dumper->bo, &dumper->iova); 137 138 if (!IS_ERR(dumper->ptr)) 139 msm_gem_object_set_name(dumper->bo, "crashdump"); 140 141 return PTR_ERR_OR_ZERO(dumper->ptr); 142 } 143 144 static int a6xx_crashdumper_run(struct msm_gpu *gpu, 145 struct a6xx_crashdumper *dumper) 146 { 147 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 148 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 149 u32 val; 150 int ret; 151 152 if (IS_ERR_OR_NULL(dumper->ptr)) 153 return -EINVAL; 154 155 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) 156 return -EINVAL; 157 158 /* Make sure all pending memory writes are posted */ 159 wmb(); 160 161 gpu_write64(gpu, REG_A6XX_CP_CRASH_DUMP_SCRIPT_BASE, dumper->iova); 162 163 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); 164 165 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, 166 val & 0x02, 100, 10000); 167 168 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); 169 170 return ret; 171 } 172 173 /* read a value from the GX debug bus */ 174 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, 175 u32 *data) 176 { 177 u32 reg; 178 179 if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) { 180 reg = A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | 181 A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); 182 } else { 183 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | 184 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); 185 } 186 187 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); 188 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); 189 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); 190 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); 191 192 /* Wait 1 us to make sure the data is flowing */ 193 udelay(1); 194 195 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); 196 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); 197 198 return 2; 199 } 200 201 #define cxdbg_write(ptr, offset, val) \ 202 writel((val), (ptr) + ((offset) << 2)) 203 204 #define cxdbg_read(ptr, offset) \ 205 readl((ptr) + ((offset) << 2)) 206 207 /* read a value from the CX debug bus */ 208 static int cx_debugbus_read(struct msm_gpu *gpu, void __iomem *cxdbg, u32 block, u32 offset, 209 u32 *data) 210 { 211 u32 reg; 212 213 if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) { 214 reg = A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | 215 A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); 216 } else { 217 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | 218 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); 219 } 220 221 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); 222 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); 223 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); 224 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); 225 226 /* Wait 1 us to make sure the data is flowing */ 227 udelay(1); 228 229 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); 230 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); 231 232 return 2; 233 } 234 235 /* Read a chunk of data from the VBIF debug bus */ 236 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, 237 u32 reg, int count, u32 *data) 238 { 239 int i; 240 241 gpu_write(gpu, ctrl0, reg); 242 243 for (i = 0; i < count; i++) { 244 gpu_write(gpu, ctrl1, i); 245 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); 246 } 247 248 return count; 249 } 250 251 #define AXI_ARB_BLOCKS 2 252 #define XIN_AXI_BLOCKS 5 253 #define XIN_CORE_BLOCKS 4 254 255 #define VBIF_DEBUGBUS_BLOCK_SIZE \ 256 ((16 * AXI_ARB_BLOCKS) + \ 257 (18 * XIN_AXI_BLOCKS) + \ 258 (12 * XIN_CORE_BLOCKS)) 259 260 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, 261 struct a6xx_gpu_state *a6xx_state, 262 struct a6xx_gpu_state_obj *obj) 263 { 264 u32 clk, *ptr; 265 int i; 266 267 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE, 268 sizeof(u32)); 269 if (!obj->data) 270 return; 271 272 obj->handle = NULL; 273 274 /* Get the current clock setting */ 275 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); 276 277 /* Force on the bus so we can read it */ 278 gpu_write(gpu, REG_A6XX_VBIF_CLKON, 279 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); 280 281 /* We will read from BUS2 first, so disable BUS1 */ 282 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); 283 284 /* Enable the VBIF bus for reading */ 285 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); 286 287 ptr = obj->data; 288 289 for (i = 0; i < AXI_ARB_BLOCKS; i++) 290 ptr += vbif_debugbus_read(gpu, 291 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 292 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 293 1 << (i + 16), 16, ptr); 294 295 for (i = 0; i < XIN_AXI_BLOCKS; i++) 296 ptr += vbif_debugbus_read(gpu, 297 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 298 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 299 1 << i, 18, ptr); 300 301 /* Stop BUS2 so we can turn on BUS1 */ 302 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); 303 304 for (i = 0; i < XIN_CORE_BLOCKS; i++) 305 ptr += vbif_debugbus_read(gpu, 306 REG_A6XX_VBIF_TEST_BUS1_CTRL0, 307 REG_A6XX_VBIF_TEST_BUS1_CTRL1, 308 1 << i, 12, ptr); 309 310 /* Restore the VBIF clock setting */ 311 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); 312 } 313 314 static void a6xx_get_debugbus_block(struct msm_gpu *gpu, 315 struct a6xx_gpu_state *a6xx_state, 316 const struct a6xx_debugbus_block *block, 317 struct a6xx_gpu_state_obj *obj) 318 { 319 int i; 320 u32 *ptr; 321 322 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 323 if (!obj->data) 324 return; 325 326 obj->handle = block; 327 328 for (ptr = obj->data, i = 0; i < block->count; i++) 329 ptr += debugbus_read(gpu, block->id, i, ptr); 330 } 331 332 static void a6xx_get_cx_debugbus_block(struct msm_gpu *gpu, 333 void __iomem *cxdbg, 334 struct a6xx_gpu_state *a6xx_state, 335 const struct a6xx_debugbus_block *block, 336 struct a6xx_gpu_state_obj *obj) 337 { 338 int i; 339 u32 *ptr; 340 341 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 342 if (!obj->data) 343 return; 344 345 obj->handle = block; 346 347 for (ptr = obj->data, i = 0; i < block->count; i++) 348 ptr += cx_debugbus_read(gpu, cxdbg, block->id, i, ptr); 349 } 350 351 static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu, 352 struct a6xx_gpu_state *a6xx_state) 353 { 354 int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) + 355 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0); 356 357 if (adreno_is_a650_family(to_adreno_gpu(gpu))) 358 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks); 359 360 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks, 361 sizeof(*a6xx_state->debugbus)); 362 363 if (a6xx_state->debugbus) { 364 int i; 365 366 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) 367 a6xx_get_debugbus_block(gpu, 368 a6xx_state, 369 &a6xx_debugbus_blocks[i], 370 &a6xx_state->debugbus[i]); 371 372 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); 373 374 /* 375 * GBIF has same debugbus as of other GPU blocks, fall back to 376 * default path if GPU uses GBIF, also GBIF uses exactly same 377 * ID as of VBIF. 378 */ 379 if (a6xx_has_gbif(to_adreno_gpu(gpu))) { 380 a6xx_get_debugbus_block(gpu, a6xx_state, 381 &a6xx_gbif_debugbus_block, 382 &a6xx_state->debugbus[i]); 383 384 a6xx_state->nr_debugbus += 1; 385 } 386 387 388 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 389 for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++) 390 a6xx_get_debugbus_block(gpu, 391 a6xx_state, 392 &a650_debugbus_blocks[i], 393 &a6xx_state->debugbus[i]); 394 } 395 } 396 } 397 398 static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu, 399 struct a6xx_gpu_state *a6xx_state) 400 { 401 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 402 int debugbus_blocks_count, gbif_debugbus_blocks_count, total_debugbus_blocks; 403 const u32 *debugbus_blocks, *gbif_debugbus_blocks; 404 int i; 405 406 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) { 407 debugbus_blocks = gen7_0_0_debugbus_blocks; 408 debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks); 409 gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks; 410 gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks); 411 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) { 412 debugbus_blocks = gen7_2_0_debugbus_blocks; 413 debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks); 414 gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks; 415 gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks); 416 } else { 417 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3); 418 debugbus_blocks = gen7_9_0_debugbus_blocks; 419 debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_debugbus_blocks); 420 gbif_debugbus_blocks = gen7_9_0_gbif_debugbus_blocks; 421 gbif_debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_gbif_debugbus_blocks); 422 } 423 424 total_debugbus_blocks = debugbus_blocks_count + gbif_debugbus_blocks_count; 425 426 a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks, 427 sizeof(*a6xx_state->debugbus)); 428 429 if (a6xx_state->debugbus) { 430 for (i = 0; i < debugbus_blocks_count; i++) { 431 a6xx_get_debugbus_block(gpu, 432 a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]], 433 &a6xx_state->debugbus[i]); 434 } 435 436 for (i = 0; i < gbif_debugbus_blocks_count; i++) { 437 a6xx_get_debugbus_block(gpu, 438 a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]], 439 &a6xx_state->debugbus[i + debugbus_blocks_count]); 440 } 441 442 a6xx_state->nr_debugbus = total_debugbus_blocks; 443 } 444 } 445 446 static void a6xx_get_debugbus(struct msm_gpu *gpu, 447 struct a6xx_gpu_state *a6xx_state) 448 { 449 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 450 struct resource *res; 451 void __iomem *cxdbg = NULL; 452 453 /* Set up the GX debug bus */ 454 455 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, 456 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 457 458 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, 459 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 460 461 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); 462 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); 463 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); 464 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); 465 466 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); 467 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); 468 469 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); 470 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); 471 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); 472 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); 473 474 /* Set up the CX debug bus - it lives elsewhere in the system so do a 475 * temporary ioremap for the registers 476 */ 477 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, 478 "cx_dbgc"); 479 480 if (res) 481 cxdbg = ioremap(res->start, resource_size(res)); 482 483 if (cxdbg) { 484 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, 485 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 486 487 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, 488 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 489 490 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); 491 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); 492 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); 493 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); 494 495 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, 496 0x76543210); 497 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, 498 0xFEDCBA98); 499 500 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); 501 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); 502 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); 503 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); 504 } 505 506 if (adreno_is_a7xx(adreno_gpu)) { 507 a7xx_get_debugbus_blocks(gpu, a6xx_state); 508 } else { 509 a6xx_get_debugbus_blocks(gpu, a6xx_state); 510 } 511 512 /* Dump the VBIF debugbus on applicable targets */ 513 if (!a6xx_has_gbif(adreno_gpu)) { 514 a6xx_state->vbif_debugbus = 515 state_kcalloc(a6xx_state, 1, 516 sizeof(*a6xx_state->vbif_debugbus)); 517 518 if (a6xx_state->vbif_debugbus) 519 a6xx_get_vbif_debugbus_block(gpu, a6xx_state, 520 a6xx_state->vbif_debugbus); 521 } 522 523 if (cxdbg) { 524 unsigned nr_cx_debugbus_blocks; 525 const struct a6xx_debugbus_block *cx_debugbus_blocks; 526 527 if (adreno_is_a7xx(adreno_gpu)) { 528 BUG_ON(adreno_gpu->info->family > ADRENO_7XX_GEN3); 529 cx_debugbus_blocks = a7xx_cx_debugbus_blocks; 530 nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks); 531 } else { 532 cx_debugbus_blocks = a6xx_cx_debugbus_blocks; 533 nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks); 534 } 535 536 a6xx_state->cx_debugbus = 537 state_kcalloc(a6xx_state, 538 nr_cx_debugbus_blocks, 539 sizeof(*a6xx_state->cx_debugbus)); 540 541 if (a6xx_state->cx_debugbus) { 542 int i; 543 544 for (i = 0; i < nr_cx_debugbus_blocks; i++) 545 a6xx_get_cx_debugbus_block(gpu, 546 cxdbg, 547 a6xx_state, 548 &cx_debugbus_blocks[i], 549 &a6xx_state->cx_debugbus[i]); 550 551 a6xx_state->nr_cx_debugbus = 552 nr_cx_debugbus_blocks; 553 } 554 555 iounmap(cxdbg); 556 } 557 } 558 559 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) 560 561 /* Read a data cluster from behind the AHB aperture */ 562 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, 563 struct a6xx_gpu_state *a6xx_state, 564 const struct a6xx_dbgahb_cluster *dbgahb, 565 struct a6xx_gpu_state_obj *obj, 566 struct a6xx_crashdumper *dumper) 567 { 568 u64 *in = dumper->ptr; 569 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 570 size_t datasize; 571 int i, regcount = 0; 572 573 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 574 int j; 575 576 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 577 (dbgahb->statetype + i * 2) << 8); 578 579 for (j = 0; j < dbgahb->count; j += 2) { 580 int count = RANGE(dbgahb->registers, j); 581 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 582 dbgahb->registers[j] - (dbgahb->base >> 2); 583 584 in += CRASHDUMP_READ(in, offset, count, out); 585 586 out += count * sizeof(u32); 587 588 if (i == 0) 589 regcount += count; 590 } 591 } 592 593 CRASHDUMP_FINI(in); 594 595 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 596 597 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 598 return; 599 600 if (a6xx_crashdumper_run(gpu, dumper)) 601 return; 602 603 obj->handle = dbgahb; 604 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 605 datasize); 606 } 607 608 static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu, 609 struct a6xx_gpu_state *a6xx_state, 610 const struct gen7_sptp_cluster_registers *dbgahb, 611 struct a6xx_gpu_state_obj *obj, 612 struct a6xx_crashdumper *dumper) 613 { 614 u64 *in = dumper->ptr; 615 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 616 size_t datasize; 617 int i, regcount = 0; 618 619 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL, 620 A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) | 621 A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) | 622 A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype)); 623 624 for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) { 625 int count = RANGE(dbgahb->regs, i); 626 u32 offset = REG_A7XX_SP_AHB_READ_APERTURE + 627 dbgahb->regs[i] - dbgahb->regbase; 628 629 in += CRASHDUMP_READ(in, offset, count, out); 630 631 out += count * sizeof(u32); 632 regcount += count; 633 } 634 635 CRASHDUMP_FINI(in); 636 637 datasize = regcount * sizeof(u32); 638 639 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 640 return; 641 642 if (a6xx_crashdumper_run(gpu, dumper)) 643 return; 644 645 obj->handle = dbgahb; 646 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 647 datasize); 648 } 649 650 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, 651 struct a6xx_gpu_state *a6xx_state, 652 struct a6xx_crashdumper *dumper) 653 { 654 int i; 655 656 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 657 ARRAY_SIZE(a6xx_dbgahb_clusters), 658 sizeof(*a6xx_state->dbgahb_clusters)); 659 660 if (!a6xx_state->dbgahb_clusters) 661 return; 662 663 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); 664 665 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) 666 a6xx_get_dbgahb_cluster(gpu, a6xx_state, 667 &a6xx_dbgahb_clusters[i], 668 &a6xx_state->dbgahb_clusters[i], dumper); 669 } 670 671 static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu, 672 struct a6xx_gpu_state *a6xx_state, 673 struct a6xx_crashdumper *dumper) 674 { 675 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 676 int i; 677 const struct gen7_sptp_cluster_registers *dbgahb_clusters; 678 unsigned dbgahb_clusters_size; 679 680 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) { 681 dbgahb_clusters = gen7_0_0_sptp_clusters; 682 dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters); 683 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) { 684 dbgahb_clusters = gen7_2_0_sptp_clusters; 685 dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters); 686 } else { 687 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3); 688 dbgahb_clusters = gen7_9_0_sptp_clusters; 689 dbgahb_clusters_size = ARRAY_SIZE(gen7_9_0_sptp_clusters); 690 } 691 692 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 693 dbgahb_clusters_size, 694 sizeof(*a6xx_state->dbgahb_clusters)); 695 696 if (!a6xx_state->dbgahb_clusters) 697 return; 698 699 a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size; 700 701 for (i = 0; i < dbgahb_clusters_size; i++) 702 a7xx_get_dbgahb_cluster(gpu, a6xx_state, 703 &dbgahb_clusters[i], 704 &a6xx_state->dbgahb_clusters[i], dumper); 705 } 706 707 /* Read a data cluster from the CP aperture with the crashdumper */ 708 static void a6xx_get_cluster(struct msm_gpu *gpu, 709 struct a6xx_gpu_state *a6xx_state, 710 const struct a6xx_cluster *cluster, 711 struct a6xx_gpu_state_obj *obj, 712 struct a6xx_crashdumper *dumper) 713 { 714 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 715 u64 *in = dumper->ptr; 716 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 717 size_t datasize; 718 int i, regcount = 0; 719 u32 id = cluster->id; 720 721 /* Skip registers that are not present on older generation */ 722 if (!adreno_is_a660_family(adreno_gpu) && 723 cluster->registers == a660_fe_cluster) 724 return; 725 726 if (adreno_is_a650_family(adreno_gpu) && 727 cluster->registers == a6xx_ps_cluster) 728 id = CLUSTER_VPC_PS; 729 730 /* Some clusters need a selector register to be programmed too */ 731 if (cluster->sel_reg) 732 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); 733 734 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 735 int j; 736 737 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, 738 (id << 8) | (i << 4) | i); 739 740 for (j = 0; j < cluster->count; j += 2) { 741 int count = RANGE(cluster->registers, j); 742 743 in += CRASHDUMP_READ(in, cluster->registers[j], 744 count, out); 745 746 out += count * sizeof(u32); 747 748 if (i == 0) 749 regcount += count; 750 } 751 } 752 753 CRASHDUMP_FINI(in); 754 755 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 756 757 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 758 return; 759 760 if (a6xx_crashdumper_run(gpu, dumper)) 761 return; 762 763 obj->handle = cluster; 764 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 765 datasize); 766 } 767 768 static void a7xx_get_cluster(struct msm_gpu *gpu, 769 struct a6xx_gpu_state *a6xx_state, 770 const struct gen7_cluster_registers *cluster, 771 struct a6xx_gpu_state_obj *obj, 772 struct a6xx_crashdumper *dumper) 773 { 774 u64 *in = dumper->ptr; 775 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 776 size_t datasize; 777 int i, regcount = 0; 778 779 in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD, 780 A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) | 781 A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) | 782 A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id)); 783 784 /* Some clusters need a selector register to be programmed too */ 785 if (cluster->sel) 786 in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val); 787 788 for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) { 789 int count = RANGE(cluster->regs, i); 790 791 in += CRASHDUMP_READ(in, cluster->regs[i], 792 count, out); 793 794 out += count * sizeof(u32); 795 regcount += count; 796 } 797 798 CRASHDUMP_FINI(in); 799 800 datasize = regcount * sizeof(u32); 801 802 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 803 return; 804 805 if (a6xx_crashdumper_run(gpu, dumper)) 806 return; 807 808 obj->handle = cluster; 809 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 810 datasize); 811 } 812 813 static void a6xx_get_clusters(struct msm_gpu *gpu, 814 struct a6xx_gpu_state *a6xx_state, 815 struct a6xx_crashdumper *dumper) 816 { 817 int i; 818 819 a6xx_state->clusters = state_kcalloc(a6xx_state, 820 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters)); 821 822 if (!a6xx_state->clusters) 823 return; 824 825 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); 826 827 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) 828 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i], 829 &a6xx_state->clusters[i], dumper); 830 } 831 832 static void a7xx_get_clusters(struct msm_gpu *gpu, 833 struct a6xx_gpu_state *a6xx_state, 834 struct a6xx_crashdumper *dumper) 835 { 836 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 837 int i; 838 const struct gen7_cluster_registers *clusters; 839 unsigned clusters_size; 840 841 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) { 842 clusters = gen7_0_0_clusters; 843 clusters_size = ARRAY_SIZE(gen7_0_0_clusters); 844 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) { 845 clusters = gen7_2_0_clusters; 846 clusters_size = ARRAY_SIZE(gen7_2_0_clusters); 847 } else { 848 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3); 849 clusters = gen7_9_0_clusters; 850 clusters_size = ARRAY_SIZE(gen7_9_0_clusters); 851 } 852 853 a6xx_state->clusters = state_kcalloc(a6xx_state, 854 clusters_size, sizeof(*a6xx_state->clusters)); 855 856 if (!a6xx_state->clusters) 857 return; 858 859 a6xx_state->nr_clusters = clusters_size; 860 861 for (i = 0; i < clusters_size; i++) 862 a7xx_get_cluster(gpu, a6xx_state, &clusters[i], 863 &a6xx_state->clusters[i], dumper); 864 } 865 866 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */ 867 static void a6xx_get_shader_block(struct msm_gpu *gpu, 868 struct a6xx_gpu_state *a6xx_state, 869 const struct a6xx_shader_block *block, 870 struct a6xx_gpu_state_obj *obj, 871 struct a6xx_crashdumper *dumper) 872 { 873 u64 *in = dumper->ptr; 874 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 875 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); 876 int i; 877 878 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 879 return; 880 881 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 882 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 883 (block->type << 8) | i); 884 885 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, 886 block->size, out); 887 888 out += block->size * sizeof(u32); 889 } 890 891 CRASHDUMP_FINI(in); 892 893 if (a6xx_crashdumper_run(gpu, dumper)) 894 return; 895 896 obj->handle = block; 897 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 898 datasize); 899 } 900 901 static void a7xx_get_shader_block(struct msm_gpu *gpu, 902 struct a6xx_gpu_state *a6xx_state, 903 const struct gen7_shader_block *block, 904 struct a6xx_gpu_state_obj *obj, 905 struct a6xx_crashdumper *dumper) 906 { 907 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 908 u64 *in = dumper->ptr; 909 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 910 size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32); 911 int i, j; 912 913 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 914 return; 915 916 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) { 917 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3); 918 } 919 920 for (i = 0; i < block->num_sps; i++) { 921 for (j = 0; j < block->num_usptps; j++) { 922 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL, 923 A7XX_SP_READ_SEL_LOCATION(block->location) | 924 A7XX_SP_READ_SEL_PIPE(block->pipeid) | 925 A7XX_SP_READ_SEL_STATETYPE(block->statetype) | 926 A7XX_SP_READ_SEL_USPTP(j) | 927 A7XX_SP_READ_SEL_SPTP(i)); 928 929 in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE, 930 block->size, out); 931 932 out += block->size * sizeof(u32); 933 } 934 } 935 936 CRASHDUMP_FINI(in); 937 938 if (a6xx_crashdumper_run(gpu, dumper)) 939 goto out; 940 941 obj->handle = block; 942 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 943 datasize); 944 945 out: 946 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) { 947 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0); 948 } 949 } 950 951 static void a6xx_get_shaders(struct msm_gpu *gpu, 952 struct a6xx_gpu_state *a6xx_state, 953 struct a6xx_crashdumper *dumper) 954 { 955 int i; 956 957 a6xx_state->shaders = state_kcalloc(a6xx_state, 958 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders)); 959 960 if (!a6xx_state->shaders) 961 return; 962 963 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); 964 965 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) 966 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i], 967 &a6xx_state->shaders[i], dumper); 968 } 969 970 static void a7xx_get_shaders(struct msm_gpu *gpu, 971 struct a6xx_gpu_state *a6xx_state, 972 struct a6xx_crashdumper *dumper) 973 { 974 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 975 const struct gen7_shader_block *shader_blocks; 976 unsigned num_shader_blocks; 977 int i; 978 979 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) { 980 shader_blocks = gen7_0_0_shader_blocks; 981 num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks); 982 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) { 983 shader_blocks = gen7_2_0_shader_blocks; 984 num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks); 985 } else { 986 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3); 987 shader_blocks = gen7_9_0_shader_blocks; 988 num_shader_blocks = ARRAY_SIZE(gen7_9_0_shader_blocks); 989 } 990 991 a6xx_state->shaders = state_kcalloc(a6xx_state, 992 num_shader_blocks, sizeof(*a6xx_state->shaders)); 993 994 if (!a6xx_state->shaders) 995 return; 996 997 a6xx_state->nr_shaders = num_shader_blocks; 998 999 for (i = 0; i < num_shader_blocks; i++) 1000 a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i], 1001 &a6xx_state->shaders[i], dumper); 1002 } 1003 1004 /* Read registers from behind the HLSQ aperture with the crashdumper */ 1005 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, 1006 struct a6xx_gpu_state *a6xx_state, 1007 const struct a6xx_registers *regs, 1008 struct a6xx_gpu_state_obj *obj, 1009 struct a6xx_crashdumper *dumper) 1010 1011 { 1012 u64 *in = dumper->ptr; 1013 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 1014 int i, regcount = 0; 1015 1016 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); 1017 1018 for (i = 0; i < regs->count; i += 2) { 1019 u32 count = RANGE(regs->registers, i); 1020 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 1021 regs->registers[i] - (regs->val0 >> 2); 1022 1023 in += CRASHDUMP_READ(in, offset, count, out); 1024 1025 out += count * sizeof(u32); 1026 regcount += count; 1027 } 1028 1029 CRASHDUMP_FINI(in); 1030 1031 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1032 return; 1033 1034 if (a6xx_crashdumper_run(gpu, dumper)) 1035 return; 1036 1037 obj->handle = regs; 1038 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1039 regcount * sizeof(u32)); 1040 } 1041 1042 /* Read a block of registers using the crashdumper */ 1043 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, 1044 struct a6xx_gpu_state *a6xx_state, 1045 const struct a6xx_registers *regs, 1046 struct a6xx_gpu_state_obj *obj, 1047 struct a6xx_crashdumper *dumper) 1048 1049 { 1050 u64 *in = dumper->ptr; 1051 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 1052 int i, regcount = 0; 1053 1054 /* Skip unsupported registers on older generations */ 1055 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 1056 (regs->registers == a660_registers)) 1057 return; 1058 1059 /* Some blocks might need to program a selector register first */ 1060 if (regs->val0) 1061 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); 1062 1063 for (i = 0; i < regs->count; i += 2) { 1064 u32 count = RANGE(regs->registers, i); 1065 1066 in += CRASHDUMP_READ(in, regs->registers[i], count, out); 1067 1068 out += count * sizeof(u32); 1069 regcount += count; 1070 } 1071 1072 CRASHDUMP_FINI(in); 1073 1074 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1075 return; 1076 1077 if (a6xx_crashdumper_run(gpu, dumper)) 1078 return; 1079 1080 obj->handle = regs; 1081 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1082 regcount * sizeof(u32)); 1083 } 1084 1085 static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu, 1086 struct a6xx_gpu_state *a6xx_state, 1087 const struct gen7_reg_list *regs, 1088 struct a6xx_gpu_state_obj *obj, 1089 struct a6xx_crashdumper *dumper) 1090 1091 { 1092 u64 *in = dumper->ptr; 1093 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 1094 int i, regcount = 0; 1095 1096 /* Some blocks might need to program a selector register first */ 1097 if (regs->sel) 1098 in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val); 1099 1100 for (i = 0; regs->regs[i] != UINT_MAX; i += 2) { 1101 u32 count = RANGE(regs->regs, i); 1102 1103 in += CRASHDUMP_READ(in, regs->regs[i], count, out); 1104 1105 out += count * sizeof(u32); 1106 regcount += count; 1107 } 1108 1109 CRASHDUMP_FINI(in); 1110 1111 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 1112 return; 1113 1114 if (a6xx_crashdumper_run(gpu, dumper)) 1115 return; 1116 1117 obj->handle = regs->regs; 1118 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 1119 regcount * sizeof(u32)); 1120 } 1121 1122 1123 /* Read a block of registers via AHB */ 1124 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 1125 struct a6xx_gpu_state *a6xx_state, 1126 const struct a6xx_registers *regs, 1127 struct a6xx_gpu_state_obj *obj) 1128 { 1129 int i, regcount = 0, index = 0; 1130 1131 /* Skip unsupported registers on older generations */ 1132 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 1133 (regs->registers == a660_registers)) 1134 return; 1135 1136 for (i = 0; i < regs->count; i += 2) 1137 regcount += RANGE(regs->registers, i); 1138 1139 obj->handle = (const void *) regs; 1140 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1141 if (!obj->data) 1142 return; 1143 1144 for (i = 0; i < regs->count; i += 2) { 1145 u32 count = RANGE(regs->registers, i); 1146 int j; 1147 1148 for (j = 0; j < count; j++) 1149 obj->data[index++] = gpu_read(gpu, 1150 regs->registers[i] + j); 1151 } 1152 } 1153 1154 static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 1155 struct a6xx_gpu_state *a6xx_state, 1156 const u32 *regs, 1157 struct a6xx_gpu_state_obj *obj) 1158 { 1159 int i, regcount = 0, index = 0; 1160 1161 for (i = 0; regs[i] != UINT_MAX; i += 2) 1162 regcount += RANGE(regs, i); 1163 1164 obj->handle = (const void *) regs; 1165 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1166 if (!obj->data) 1167 return; 1168 1169 for (i = 0; regs[i] != UINT_MAX; i += 2) { 1170 u32 count = RANGE(regs, i); 1171 int j; 1172 1173 for (j = 0; j < count; j++) 1174 obj->data[index++] = gpu_read(gpu, regs[i] + j); 1175 } 1176 } 1177 1178 static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu, 1179 struct a6xx_gpu_state *a6xx_state, 1180 const struct gen7_reg_list *regs, 1181 struct a6xx_gpu_state_obj *obj) 1182 { 1183 if (regs->sel) 1184 gpu_write(gpu, regs->sel->host_reg, regs->sel->val); 1185 1186 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj); 1187 } 1188 1189 /* Read a block of GMU registers */ 1190 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, 1191 struct a6xx_gpu_state *a6xx_state, 1192 const struct a6xx_registers *regs, 1193 struct a6xx_gpu_state_obj *obj, 1194 bool rscc) 1195 { 1196 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1197 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1198 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1199 int i, regcount = 0, index = 0; 1200 1201 for (i = 0; i < regs->count; i += 2) 1202 regcount += RANGE(regs->registers, i); 1203 1204 obj->handle = (const void *) regs; 1205 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 1206 if (!obj->data) 1207 return; 1208 1209 for (i = 0; i < regs->count; i += 2) { 1210 u32 count = RANGE(regs->registers, i); 1211 int j; 1212 1213 for (j = 0; j < count; j++) { 1214 u32 offset = regs->registers[i] + j; 1215 u32 val; 1216 1217 if (rscc) 1218 val = gmu_read_rscc(gmu, offset); 1219 else 1220 val = gmu_read(gmu, offset); 1221 1222 obj->data[index++] = val; 1223 } 1224 } 1225 } 1226 1227 static void a6xx_get_gmu_registers(struct msm_gpu *gpu, 1228 struct a6xx_gpu_state *a6xx_state) 1229 { 1230 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1231 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1232 1233 a6xx_state->gmu_registers = state_kcalloc(a6xx_state, 1234 4, sizeof(*a6xx_state->gmu_registers)); 1235 1236 if (!a6xx_state->gmu_registers) 1237 return; 1238 1239 a6xx_state->nr_gmu_registers = 4; 1240 1241 /* Get the CX GMU registers from AHB */ 1242 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], 1243 &a6xx_state->gmu_registers[0], false); 1244 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1], 1245 &a6xx_state->gmu_registers[1], true); 1246 1247 if (adreno_is_a621(adreno_gpu) || adreno_is_a623(adreno_gpu)) 1248 _a6xx_get_gmu_registers(gpu, a6xx_state, &a621_gpucc_reg, 1249 &a6xx_state->gmu_registers[2], false); 1250 else 1251 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gpucc_reg, 1252 &a6xx_state->gmu_registers[2], false); 1253 1254 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 1255 return; 1256 1257 /* Set the fence to ALLOW mode so we can access the registers */ 1258 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); 1259 1260 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2], 1261 &a6xx_state->gmu_registers[3], false); 1262 } 1263 1264 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo( 1265 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo) 1266 { 1267 struct msm_gpu_state_bo *snapshot; 1268 1269 if (!bo->size) 1270 return NULL; 1271 1272 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot)); 1273 if (!snapshot) 1274 return NULL; 1275 1276 snapshot->iova = bo->iova; 1277 snapshot->size = bo->size; 1278 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL); 1279 if (!snapshot->data) 1280 return NULL; 1281 1282 memcpy(snapshot->data, bo->virt, bo->size); 1283 1284 return snapshot; 1285 } 1286 1287 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu, 1288 struct a6xx_gpu_state *a6xx_state) 1289 { 1290 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1291 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1292 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1293 unsigned i, j; 1294 1295 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history)); 1296 1297 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) { 1298 struct a6xx_hfi_queue *queue = &gmu->queues[i]; 1299 for (j = 0; j < HFI_HISTORY_SZ; j++) { 1300 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ; 1301 a6xx_state->hfi_queue_history[i][j] = queue->history[idx]; 1302 } 1303 } 1304 } 1305 1306 #define A6XX_REGLIST_SIZE 1 1307 #define A6XX_GBIF_REGLIST_SIZE 1 1308 static void a6xx_get_registers(struct msm_gpu *gpu, 1309 struct a6xx_gpu_state *a6xx_state, 1310 struct a6xx_crashdumper *dumper) 1311 { 1312 int i, count = A6XX_REGLIST_SIZE + 1313 ARRAY_SIZE(a6xx_reglist) + 1314 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE; 1315 int index = 0; 1316 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1317 1318 a6xx_state->registers = state_kcalloc(a6xx_state, 1319 count, sizeof(*a6xx_state->registers)); 1320 1321 if (!a6xx_state->registers) 1322 return; 1323 1324 a6xx_state->nr_registers = count; 1325 1326 a6xx_get_ahb_gpu_registers(gpu, 1327 a6xx_state, &a6xx_ahb_reglist, 1328 &a6xx_state->registers[index++]); 1329 1330 if (a6xx_has_gbif(adreno_gpu)) 1331 a6xx_get_ahb_gpu_registers(gpu, 1332 a6xx_state, &a6xx_gbif_reglist, 1333 &a6xx_state->registers[index++]); 1334 else 1335 a6xx_get_ahb_gpu_registers(gpu, 1336 a6xx_state, &a6xx_vbif_reglist, 1337 &a6xx_state->registers[index++]); 1338 if (!dumper) { 1339 /* 1340 * We can't use the crashdumper when the SMMU is stalled, 1341 * because the GPU has no memory access until we resume 1342 * translation (but we don't want to do that until after 1343 * we have captured as much useful GPU state as possible). 1344 * So instead collect registers via the CPU: 1345 */ 1346 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 1347 a6xx_get_ahb_gpu_registers(gpu, 1348 a6xx_state, &a6xx_reglist[i], 1349 &a6xx_state->registers[index++]); 1350 return; 1351 } 1352 1353 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 1354 a6xx_get_crashdumper_registers(gpu, 1355 a6xx_state, &a6xx_reglist[i], 1356 &a6xx_state->registers[index++], 1357 dumper); 1358 1359 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) 1360 a6xx_get_crashdumper_hlsq_registers(gpu, 1361 a6xx_state, &a6xx_hlsq_reglist[i], 1362 &a6xx_state->registers[index++], 1363 dumper); 1364 } 1365 1366 #define A7XX_PRE_CRASHDUMPER_SIZE 1 1367 #define A7XX_POST_CRASHDUMPER_SIZE 1 1368 static void a7xx_get_registers(struct msm_gpu *gpu, 1369 struct a6xx_gpu_state *a6xx_state, 1370 struct a6xx_crashdumper *dumper) 1371 { 1372 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1373 int i, count; 1374 int index = 0; 1375 const u32 *pre_crashdumper_regs; 1376 const struct gen7_reg_list *reglist; 1377 1378 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) { 1379 reglist = gen7_0_0_reg_list; 1380 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers; 1381 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) { 1382 reglist = gen7_2_0_reg_list; 1383 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers; 1384 } else { 1385 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3); 1386 reglist = gen7_9_0_reg_list; 1387 pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers; 1388 } 1389 1390 count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE; 1391 1392 /* The downstream reglist contains registers in other memory regions 1393 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their 1394 * offsets and map them to read them on the CPU. For now only read the 1395 * first region which is the main one. 1396 */ 1397 if (dumper) { 1398 for (i = 0; reglist[i].regs; i++) 1399 count++; 1400 } else { 1401 count++; 1402 } 1403 1404 a6xx_state->registers = state_kcalloc(a6xx_state, 1405 count, sizeof(*a6xx_state->registers)); 1406 1407 if (!a6xx_state->registers) 1408 return; 1409 1410 a6xx_state->nr_registers = count; 1411 1412 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs, 1413 &a6xx_state->registers[index++]); 1414 1415 if (!dumper) { 1416 a7xx_get_ahb_gpu_reglist(gpu, 1417 a6xx_state, ®list[0], 1418 &a6xx_state->registers[index++]); 1419 return; 1420 } 1421 1422 for (i = 0; reglist[i].regs; i++) 1423 a7xx_get_crashdumper_registers(gpu, 1424 a6xx_state, ®list[i], 1425 &a6xx_state->registers[index++], 1426 dumper); 1427 } 1428 1429 static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu, 1430 struct a6xx_gpu_state *a6xx_state) 1431 { 1432 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1433 const u32 *regs; 1434 1435 BUG_ON(adreno_gpu->info->family > ADRENO_7XX_GEN3); 1436 regs = gen7_0_0_post_crashdumper_registers; 1437 1438 a7xx_get_ahb_gpu_registers(gpu, 1439 a6xx_state, regs, 1440 &a6xx_state->registers[a6xx_state->nr_registers - 1]); 1441 } 1442 1443 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu) 1444 { 1445 /* The value at [16:31] is in 4dword units. Convert it to dwords */ 1446 return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14; 1447 } 1448 1449 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu) 1450 { 1451 /* 1452 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units. 1453 * That register however is not directly accessible from APSS on A7xx. 1454 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value. 1455 */ 1456 gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3); 1457 1458 return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20); 1459 } 1460 1461 /* Read a block of data from an indexed register pair */ 1462 static void a6xx_get_indexed_regs(struct msm_gpu *gpu, 1463 struct a6xx_gpu_state *a6xx_state, 1464 const struct a6xx_indexed_registers *indexed, 1465 struct a6xx_gpu_state_obj *obj) 1466 { 1467 u32 count = indexed->count; 1468 int i; 1469 1470 obj->handle = (const void *) indexed; 1471 if (indexed->count_fn) 1472 count = indexed->count_fn(gpu); 1473 1474 obj->data = state_kcalloc(a6xx_state, count, sizeof(u32)); 1475 obj->count = count; 1476 if (!obj->data) 1477 return; 1478 1479 /* All the indexed banks start at address 0 */ 1480 gpu_write(gpu, indexed->addr, 0); 1481 1482 /* Read the data - each read increments the internal address by 1 */ 1483 for (i = 0; i < count; i++) 1484 obj->data[i] = gpu_read(gpu, indexed->data); 1485 } 1486 1487 static void a6xx_get_indexed_registers(struct msm_gpu *gpu, 1488 struct a6xx_gpu_state *a6xx_state) 1489 { 1490 u32 mempool_size; 1491 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; 1492 int i; 1493 1494 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count, 1495 sizeof(*a6xx_state->indexed_regs)); 1496 if (!a6xx_state->indexed_regs) 1497 return; 1498 1499 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) 1500 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i], 1501 &a6xx_state->indexed_regs[i]); 1502 1503 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 1504 u32 val; 1505 1506 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG); 1507 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4); 1508 1509 /* Get the contents of the CP mempool */ 1510 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1511 &a6xx_state->indexed_regs[i]); 1512 1513 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val); 1514 a6xx_state->nr_indexed_regs = count; 1515 return; 1516 } 1517 1518 /* Set the CP mempool size to 0 to stabilize it while dumping */ 1519 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); 1520 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); 1521 1522 /* Get the contents of the CP mempool */ 1523 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1524 &a6xx_state->indexed_regs[i]); 1525 1526 /* 1527 * Offset 0x2000 in the mempool is the size - copy the saved size over 1528 * so the data is consistent 1529 */ 1530 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; 1531 1532 /* Restore the size in the hardware */ 1533 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); 1534 1535 a6xx_state->nr_indexed_regs = count; 1536 } 1537 1538 static void a7xx_get_indexed_registers(struct msm_gpu *gpu, 1539 struct a6xx_gpu_state *a6xx_state) 1540 { 1541 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1542 const struct a6xx_indexed_registers *indexed_regs; 1543 int i, indexed_count, mempool_count; 1544 1545 if (adreno_gpu->info->family <= ADRENO_7XX_GEN2) { 1546 indexed_regs = a7xx_indexed_reglist; 1547 indexed_count = ARRAY_SIZE(a7xx_indexed_reglist); 1548 } else { 1549 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3); 1550 indexed_regs = gen7_9_0_cp_indexed_reg_list; 1551 indexed_count = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list); 1552 } 1553 1554 mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed); 1555 1556 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, 1557 indexed_count + mempool_count, 1558 sizeof(*a6xx_state->indexed_regs)); 1559 if (!a6xx_state->indexed_regs) 1560 return; 1561 1562 a6xx_state->nr_indexed_regs = indexed_count + mempool_count; 1563 1564 /* First read the common regs */ 1565 for (i = 0; i < indexed_count; i++) 1566 a6xx_get_indexed_regs(gpu, a6xx_state, &indexed_regs[i], 1567 &a6xx_state->indexed_regs[i]); 1568 1569 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2)); 1570 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2)); 1571 1572 /* Get the contents of the CP_BV mempool */ 1573 for (i = 0; i < mempool_count; i++) 1574 a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i], 1575 &a6xx_state->indexed_regs[indexed_count + i]); 1576 1577 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0); 1578 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0); 1579 return; 1580 } 1581 1582 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) 1583 { 1584 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL; 1585 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1586 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1587 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), 1588 GFP_KERNEL); 1589 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & 1590 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); 1591 1592 if (!a6xx_state) 1593 return ERR_PTR(-ENOMEM); 1594 1595 INIT_LIST_HEAD(&a6xx_state->objs); 1596 1597 /* Get the generic state from the adreno core */ 1598 adreno_gpu_state_get(gpu, &a6xx_state->base); 1599 1600 if (!adreno_has_gmu_wrapper(adreno_gpu)) { 1601 a6xx_get_gmu_registers(gpu, a6xx_state); 1602 1603 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log); 1604 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi); 1605 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug); 1606 1607 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state); 1608 } 1609 1610 /* If GX isn't on the rest of the data isn't going to be accessible */ 1611 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 1612 return &a6xx_state->base; 1613 1614 /* Get the banks of indexed registers */ 1615 if (adreno_is_a7xx(adreno_gpu)) 1616 a7xx_get_indexed_registers(gpu, a6xx_state); 1617 else 1618 a6xx_get_indexed_registers(gpu, a6xx_state); 1619 1620 /* 1621 * Try to initialize the crashdumper, if we are not dumping state 1622 * with the SMMU stalled. The crashdumper needs memory access to 1623 * write out GPU state, so we need to skip this when the SMMU is 1624 * stalled in response to an iova fault 1625 */ 1626 if (!stalled && !gpu->needs_hw_init && 1627 !a6xx_crashdumper_init(gpu, &_dumper)) { 1628 dumper = &_dumper; 1629 } 1630 1631 if (adreno_is_a7xx(adreno_gpu)) { 1632 a7xx_get_registers(gpu, a6xx_state, dumper); 1633 1634 if (dumper) { 1635 a7xx_get_shaders(gpu, a6xx_state, dumper); 1636 a7xx_get_clusters(gpu, a6xx_state, dumper); 1637 a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); 1638 1639 msm_gem_kernel_put(dumper->bo, gpu->vm); 1640 } 1641 1642 a7xx_get_post_crashdumper_registers(gpu, a6xx_state); 1643 } else { 1644 a6xx_get_registers(gpu, a6xx_state, dumper); 1645 1646 if (dumper) { 1647 a6xx_get_shaders(gpu, a6xx_state, dumper); 1648 a6xx_get_clusters(gpu, a6xx_state, dumper); 1649 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); 1650 1651 msm_gem_kernel_put(dumper->bo, gpu->vm); 1652 } 1653 } 1654 1655 if (snapshot_debugbus) 1656 a6xx_get_debugbus(gpu, a6xx_state); 1657 1658 a6xx_state->gpu_initialized = !gpu->needs_hw_init; 1659 1660 return &a6xx_state->base; 1661 } 1662 1663 static void a6xx_gpu_state_destroy(struct kref *kref) 1664 { 1665 struct a6xx_state_memobj *obj, *tmp; 1666 struct msm_gpu_state *state = container_of(kref, 1667 struct msm_gpu_state, ref); 1668 struct a6xx_gpu_state *a6xx_state = container_of(state, 1669 struct a6xx_gpu_state, base); 1670 1671 if (a6xx_state->gmu_log) 1672 kvfree(a6xx_state->gmu_log->data); 1673 1674 if (a6xx_state->gmu_hfi) 1675 kvfree(a6xx_state->gmu_hfi->data); 1676 1677 if (a6xx_state->gmu_debug) 1678 kvfree(a6xx_state->gmu_debug->data); 1679 1680 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) { 1681 list_del(&obj->node); 1682 kvfree(obj); 1683 } 1684 1685 adreno_gpu_state_destroy(state); 1686 kfree(a6xx_state); 1687 } 1688 1689 int a6xx_gpu_state_put(struct msm_gpu_state *state) 1690 { 1691 if (IS_ERR_OR_NULL(state)) 1692 return 1; 1693 1694 return kref_put(&state->ref, a6xx_gpu_state_destroy); 1695 } 1696 1697 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, 1698 struct drm_printer *p) 1699 { 1700 int i, index = 0; 1701 1702 if (!data) 1703 return; 1704 1705 for (i = 0; i < count; i += 2) { 1706 u32 count = RANGE(registers, i); 1707 u32 offset = registers[i]; 1708 int j; 1709 1710 for (j = 0; j < count; index++, offset++, j++) { 1711 if (data[index] == 0xdeafbead) 1712 continue; 1713 1714 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1715 offset << 2, data[index]); 1716 } 1717 } 1718 } 1719 1720 static void a7xx_show_registers_indented(const u32 *registers, u32 *data, 1721 struct drm_printer *p, unsigned indent) 1722 { 1723 int i, index = 0; 1724 1725 for (i = 0; registers[i] != UINT_MAX; i += 2) { 1726 u32 count = RANGE(registers, i); 1727 u32 offset = registers[i]; 1728 int j; 1729 1730 for (j = 0; j < count; index++, offset++, j++) { 1731 int k; 1732 1733 if (data[index] == 0xdeafbead) 1734 continue; 1735 1736 for (k = 0; k < indent; k++) 1737 drm_printf(p, " "); 1738 drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n", 1739 offset << 2, data[index]); 1740 } 1741 } 1742 } 1743 1744 static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p) 1745 { 1746 a7xx_show_registers_indented(registers, data, p, 1); 1747 } 1748 1749 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) 1750 { 1751 char out[ASCII85_BUFSZ]; 1752 long i, l, datalen = 0; 1753 1754 for (i = 0; i < len >> 2; i++) { 1755 if (data[i]) 1756 datalen = (i + 1) << 2; 1757 } 1758 1759 if (datalen == 0) 1760 return; 1761 1762 drm_puts(p, " data: !!ascii85 |\n"); 1763 drm_puts(p, " "); 1764 1765 1766 l = ascii85_encode_len(datalen); 1767 1768 for (i = 0; i < l; i++) 1769 drm_puts(p, ascii85_encode(data[i], out)); 1770 1771 drm_puts(p, "\n"); 1772 } 1773 1774 static void print_name(struct drm_printer *p, const char *fmt, const char *name) 1775 { 1776 drm_puts(p, fmt); 1777 drm_puts(p, name); 1778 drm_puts(p, "\n"); 1779 } 1780 1781 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, 1782 struct drm_printer *p) 1783 { 1784 const struct a6xx_shader_block *block = obj->handle; 1785 int i; 1786 1787 if (!obj->handle) 1788 return; 1789 1790 print_name(p, " - type: ", block->name); 1791 1792 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 1793 drm_printf(p, " - bank: %d\n", i); 1794 drm_printf(p, " size: %d\n", block->size); 1795 1796 if (!obj->data) 1797 continue; 1798 1799 print_ascii85(p, block->size << 2, 1800 obj->data + (block->size * i)); 1801 } 1802 } 1803 1804 static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj, 1805 struct drm_printer *p) 1806 { 1807 const struct gen7_shader_block *block = obj->handle; 1808 int i, j; 1809 u32 *data = obj->data; 1810 1811 if (!obj->handle) 1812 return; 1813 1814 print_name(p, " - type: ", a7xx_statetype_names[block->statetype]); 1815 print_name(p, " - pipe: ", a7xx_pipe_names[block->pipeid]); 1816 drm_printf(p, " - location: %d\n", block->location); 1817 1818 for (i = 0; i < block->num_sps; i++) { 1819 drm_printf(p, " - sp: %d\n", i); 1820 1821 for (j = 0; j < block->num_usptps; j++) { 1822 drm_printf(p, " - usptp: %d\n", j); 1823 drm_printf(p, " size: %d\n", block->size); 1824 1825 if (!obj->data) 1826 continue; 1827 1828 print_ascii85(p, block->size << 2, data); 1829 1830 data += block->size; 1831 } 1832 } 1833 } 1834 1835 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, 1836 struct drm_printer *p) 1837 { 1838 int ctx, index = 0; 1839 1840 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { 1841 int j; 1842 1843 drm_printf(p, " - context: %d\n", ctx); 1844 1845 for (j = 0; j < size; j += 2) { 1846 u32 count = RANGE(registers, j); 1847 u32 offset = registers[j]; 1848 int k; 1849 1850 for (k = 0; k < count; index++, offset++, k++) { 1851 if (data[index] == 0xdeafbead) 1852 continue; 1853 1854 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1855 offset << 2, data[index]); 1856 } 1857 } 1858 } 1859 } 1860 1861 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1862 struct drm_printer *p) 1863 { 1864 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; 1865 1866 if (dbgahb) { 1867 print_name(p, " - cluster-name: ", dbgahb->name); 1868 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, 1869 obj->data, p); 1870 } 1871 } 1872 1873 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1874 struct drm_printer *p) 1875 { 1876 const struct a6xx_cluster *cluster = obj->handle; 1877 1878 if (cluster) { 1879 print_name(p, " - cluster-name: ", cluster->name); 1880 a6xx_show_cluster_data(cluster->registers, cluster->count, 1881 obj->data, p); 1882 } 1883 } 1884 1885 static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1886 struct drm_printer *p) 1887 { 1888 const struct gen7_sptp_cluster_registers *dbgahb = obj->handle; 1889 1890 if (dbgahb) { 1891 print_name(p, " - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]); 1892 print_name(p, " - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]); 1893 drm_printf(p, " - context: %d\n", dbgahb->context_id); 1894 drm_printf(p, " - location: %d\n", dbgahb->location_id); 1895 a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4); 1896 } 1897 } 1898 1899 static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1900 struct drm_printer *p) 1901 { 1902 const struct gen7_cluster_registers *cluster = obj->handle; 1903 1904 if (cluster) { 1905 int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0; 1906 1907 print_name(p, " - pipe: ", a7xx_pipe_names[cluster->pipe_id]); 1908 print_name(p, " - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]); 1909 drm_printf(p, " - context: %d\n", context); 1910 a7xx_show_registers_indented(cluster->regs, obj->data, p, 4); 1911 } 1912 } 1913 1914 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, 1915 struct drm_printer *p) 1916 { 1917 const struct a6xx_indexed_registers *indexed = obj->handle; 1918 1919 if (!indexed) 1920 return; 1921 1922 print_name(p, " - regs-name: ", indexed->name); 1923 drm_printf(p, " dwords: %d\n", obj->count); 1924 1925 print_ascii85(p, obj->count << 2, obj->data); 1926 } 1927 1928 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, 1929 u32 *data, struct drm_printer *p) 1930 { 1931 if (block) { 1932 print_name(p, " - debugbus-block: ", block->name); 1933 1934 /* 1935 * count for regular debugbus data is in quadwords, 1936 * but print the size in dwords for consistency 1937 */ 1938 drm_printf(p, " count: %d\n", block->count << 1); 1939 1940 print_ascii85(p, block->count << 3, data); 1941 } 1942 } 1943 1944 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, 1945 struct drm_printer *p) 1946 { 1947 int i; 1948 1949 for (i = 0; i < a6xx_state->nr_debugbus; i++) { 1950 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; 1951 1952 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1953 } 1954 1955 if (a6xx_state->vbif_debugbus) { 1956 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; 1957 1958 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); 1959 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); 1960 1961 /* vbif debugbus data is in dwords. Confusing, huh? */ 1962 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); 1963 } 1964 1965 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { 1966 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; 1967 1968 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1969 } 1970 } 1971 1972 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1973 struct drm_printer *p) 1974 { 1975 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1976 struct a6xx_gpu_state *a6xx_state = container_of(state, 1977 struct a6xx_gpu_state, base); 1978 int i; 1979 1980 if (IS_ERR_OR_NULL(state)) 1981 return; 1982 1983 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized); 1984 1985 adreno_show(gpu, state, p); 1986 1987 drm_puts(p, "gmu-log:\n"); 1988 if (a6xx_state->gmu_log) { 1989 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log; 1990 1991 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova); 1992 drm_printf(p, " size: %zu\n", gmu_log->size); 1993 adreno_show_object(p, &gmu_log->data, gmu_log->size, 1994 &gmu_log->encoded); 1995 } 1996 1997 drm_puts(p, "gmu-hfi:\n"); 1998 if (a6xx_state->gmu_hfi) { 1999 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi; 2000 unsigned i, j; 2001 2002 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova); 2003 drm_printf(p, " size: %zu\n", gmu_hfi->size); 2004 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) { 2005 drm_printf(p, " queue-history[%u]:", i); 2006 for (j = 0; j < HFI_HISTORY_SZ; j++) { 2007 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]); 2008 } 2009 drm_printf(p, "\n"); 2010 } 2011 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size, 2012 &gmu_hfi->encoded); 2013 } 2014 2015 drm_puts(p, "gmu-debug:\n"); 2016 if (a6xx_state->gmu_debug) { 2017 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug; 2018 2019 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova); 2020 drm_printf(p, " size: %zu\n", gmu_debug->size); 2021 adreno_show_object(p, &gmu_debug->data, gmu_debug->size, 2022 &gmu_debug->encoded); 2023 } 2024 2025 drm_puts(p, "registers:\n"); 2026 for (i = 0; i < a6xx_state->nr_registers; i++) { 2027 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; 2028 2029 if (!obj->handle) 2030 continue; 2031 2032 if (adreno_is_a7xx(adreno_gpu)) { 2033 a7xx_show_registers(obj->handle, obj->data, p); 2034 } else { 2035 const struct a6xx_registers *regs = obj->handle; 2036 2037 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 2038 } 2039 } 2040 2041 drm_puts(p, "registers-gmu:\n"); 2042 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { 2043 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; 2044 const struct a6xx_registers *regs = obj->handle; 2045 2046 if (!obj->handle) 2047 continue; 2048 2049 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 2050 } 2051 2052 drm_puts(p, "indexed-registers:\n"); 2053 for (i = 0; i < a6xx_state->nr_indexed_regs; i++) 2054 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); 2055 2056 drm_puts(p, "shader-blocks:\n"); 2057 for (i = 0; i < a6xx_state->nr_shaders; i++) { 2058 if (adreno_is_a7xx(adreno_gpu)) 2059 a7xx_show_shader(&a6xx_state->shaders[i], p); 2060 else 2061 a6xx_show_shader(&a6xx_state->shaders[i], p); 2062 } 2063 2064 drm_puts(p, "clusters:\n"); 2065 for (i = 0; i < a6xx_state->nr_clusters; i++) { 2066 if (adreno_is_a7xx(adreno_gpu)) 2067 a7xx_show_cluster(&a6xx_state->clusters[i], p); 2068 else 2069 a6xx_show_cluster(&a6xx_state->clusters[i], p); 2070 } 2071 2072 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) { 2073 if (adreno_is_a7xx(adreno_gpu)) 2074 a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 2075 else 2076 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 2077 } 2078 2079 drm_puts(p, "debugbus:\n"); 2080 a6xx_show_debugbus(a6xx_state, p); 2081 } 2082