1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ 3 4 5 #include "msm_gem.h" 6 #include "msm_mmu.h" 7 #include "msm_gpu_trace.h" 8 #include "a6xx_gpu.h" 9 #include "a6xx_gmu.xml.h" 10 11 #include <linux/bitfield.h> 12 #include <linux/devfreq.h> 13 #include <linux/firmware/qcom/qcom_scm.h> 14 #include <linux/pm_domain.h> 15 #include <linux/soc/qcom/llcc-qcom.h> 16 17 #define GPU_PAS_ID 13 18 19 static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice) 20 { 21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 23 u32 val; 24 25 val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice); 26 27 if (a6xx_gpu->cached_aperture == val) 28 return; 29 30 gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val); 31 32 a6xx_gpu->cached_aperture = val; 33 } 34 35 static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags) 36 { 37 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 38 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 39 40 spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags); 41 42 a8xx_aperture_slice_set(gpu, pipe, 0); 43 } 44 45 static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags) 46 { 47 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 48 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 49 50 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 51 } 52 53 static void a8xx_aperture_clear(struct msm_gpu *gpu) 54 { 55 unsigned long flags; 56 57 a8xx_aperture_acquire(gpu, PIPE_NONE, &flags); 58 a8xx_aperture_release(gpu, flags); 59 } 60 61 static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data) 62 { 63 unsigned long flags; 64 65 a8xx_aperture_acquire(gpu, pipe, &flags); 66 gpu_write(gpu, offset, data); 67 a8xx_aperture_release(gpu, flags); 68 } 69 70 static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset) 71 { 72 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 73 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 74 unsigned long flags; 75 u32 val; 76 77 spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags); 78 a8xx_aperture_slice_set(gpu, pipe, slice); 79 val = gpu_read(gpu, offset); 80 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 81 82 return val; 83 } 84 85 void a8xx_gpu_get_slice_info(struct msm_gpu *gpu) 86 { 87 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 88 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 89 const struct a6xx_info *info = adreno_gpu->info->a6xx; 90 struct device *dev = &gpu->pdev->dev; 91 u32 slice_mask; 92 93 if (adreno_gpu->info->family < ADRENO_8XX_GEN1) 94 return; 95 96 if (a6xx_gpu->slice_mask) 97 return; 98 99 slice_mask = GENMASK(info->max_slices - 1, 0); 100 101 /* GEN1 doesn't support partial slice configurations */ 102 if (adreno_gpu->info->family == ADRENO_8XX_GEN1) { 103 a6xx_gpu->slice_mask = slice_mask; 104 return; 105 } 106 107 slice_mask &= a6xx_llc_read(a6xx_gpu, 108 REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL); 109 110 a6xx_gpu->slice_mask = slice_mask; 111 112 /* Chip ID depends on the number of slices available. So update it */ 113 adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask)); 114 115 /* Update the gpu-name to reflect the slice config: */ 116 const char *name = devm_kasprintf(dev, GFP_KERNEL, 117 "%"ADRENO_CHIPID_FMT, 118 ADRENO_CHIPID_ARGS(adreno_gpu->chip_id)); 119 if (name) { 120 devm_kfree(dev, adreno_gpu->base.name); 121 adreno_gpu->base.name = name; 122 } 123 } 124 125 static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu) 126 { 127 return ffs(a6xx_gpu->slice_mask) - 1; 128 } 129 130 static inline bool _a8xx_check_idle(struct msm_gpu *gpu) 131 { 132 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 133 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 134 135 /* Check that the GMU is idle */ 136 if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) 137 return false; 138 139 /* Check that the CX master is idle */ 140 if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) & 141 ~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 142 return false; 143 144 return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) & 145 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 146 } 147 148 static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 149 { 150 /* wait for CP to drain ringbuffer: */ 151 if (!adreno_idle(gpu, ring)) 152 return false; 153 154 if (spin_until(_a8xx_check_idle(gpu))) { 155 DRM_ERROR( 156 "%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 157 gpu->name, __builtin_return_address(0), 158 gpu_read(gpu, REG_A8XX_RBBM_STATUS), 159 gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS), 160 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 161 gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 162 return false; 163 } 164 165 return true; 166 } 167 168 void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 169 { 170 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 171 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 172 uint32_t wptr; 173 unsigned long flags; 174 175 spin_lock_irqsave(&ring->preempt_lock, flags); 176 177 /* Copy the shadow to the actual register */ 178 ring->cur = ring->next; 179 180 /* Make sure to wrap wptr if we need to */ 181 wptr = get_wptr(ring); 182 183 /* Update HW if this is the current ring and we are not in preempt*/ 184 if (!a6xx_in_preempt(a6xx_gpu)) { 185 if (a6xx_gpu->cur_ring == ring) 186 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); 187 else 188 ring->restore_wptr = true; 189 } else { 190 ring->restore_wptr = true; 191 } 192 193 spin_unlock_irqrestore(&ring->preempt_lock, flags); 194 } 195 196 static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state) 197 { 198 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 199 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 200 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 201 u32 val; 202 203 if (adreno_is_x285(adreno_gpu) && state) 204 gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702); 205 206 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 207 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 208 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 209 state ? 0x110111 : 0); 210 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 211 state ? 0x55555 : 0); 212 213 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 214 gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state); 215 216 if (state) { 217 gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1); 218 219 if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val, 220 val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 221 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 222 return; 223 } 224 225 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 226 } else { 227 /* 228 * GMU enables clk gating in GBIF during boot up. So, 229 * override that here when hwcg feature is disabled 230 */ 231 gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0); 232 } 233 } 234 235 static void a8xx_set_cp_protect(struct msm_gpu *gpu) 236 { 237 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 238 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 239 u32 cntl, final_cfg; 240 unsigned int i; 241 242 cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN | 243 A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN | 244 A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE | 245 A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK; 246 /* 247 * Enable access protection to privileged registers, fault on an access 248 * protect violation and select the last span to protect from the start 249 * address all the way to the end of the register address space 250 */ 251 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 252 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 253 254 a8xx_aperture_clear(gpu); 255 256 for (i = 0; i < protect->count; i++) { 257 /* Intentionally skip writing to some registers */ 258 if (protect->regs[i]) { 259 gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]); 260 final_cfg = protect->regs[i]; 261 } 262 } 263 264 /* 265 * Last span feature is only supported on PIPE specific register. 266 * So update those here 267 */ 268 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); 269 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); 270 271 a8xx_aperture_clear(gpu); 272 } 273 274 static void a8xx_set_ubwc_config(struct msm_gpu *gpu) 275 { 276 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 277 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; 278 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); 279 u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3); 280 bool rgba8888_lossless = false, fp16compoptdis = false; 281 bool yuvnotcomptofc = false, min_acc_len_64b = false; 282 bool rgb565_predicator = false, amsbc = false; 283 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); 284 u32 ubwc_version = cfg->ubwc_enc_version; 285 u32 hbb, hbb_hi, hbb_lo, mode = 1; 286 u8 uavflagprd_inv = 2; 287 288 switch (ubwc_version) { 289 case UBWC_6_0: 290 yuvnotcomptofc = true; 291 mode = 5; 292 break; 293 case UBWC_5_0: 294 amsbc = true; 295 rgb565_predicator = true; 296 mode = 4; 297 break; 298 case UBWC_4_0: 299 amsbc = true; 300 rgb565_predicator = true; 301 fp16compoptdis = true; 302 rgba8888_lossless = true; 303 mode = 2; 304 break; 305 case UBWC_3_0: 306 amsbc = true; 307 mode = 1; 308 break; 309 default: 310 dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version); 311 break; 312 } 313 314 /* 315 * We subtract 13 from the highest bank bit (13 is the minimum value 316 * allowed by hw) and write the lowest two bits of the remaining value 317 * as hbb_lo and the one above it as hbb_hi to the hardware. 318 */ 319 WARN_ON(cfg->highest_bank_bit < 13); 320 hbb = cfg->highest_bank_bit - 13; 321 hbb_hi = hbb >> 2; 322 hbb_lo = hbb & 3; 323 324 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, 325 hbb << 5 | 326 level3_swizzling_dis << 4 | 327 level2_swizzling_dis << 3); 328 329 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, 330 hbb << 5 | 331 level3_swizzling_dis << 4 | 332 level2_swizzling_dis << 3); 333 334 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL, 335 yuvnotcomptofc << 6 | 336 level3_swizzling_dis << 5 | 337 level2_swizzling_dis << 4 | 338 hbb_hi << 3 | 339 hbb_lo << 1); 340 341 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL, 342 mode << 15 | 343 yuvnotcomptofc << 6 | 344 rgba8888_lossless << 4 | 345 fp16compoptdis << 3 | 346 rgb565_predicator << 2 | 347 amsbc << 1 | 348 min_acc_len_64b); 349 350 a8xx_aperture_clear(gpu); 351 352 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 353 level3_swizzling_dis << 13 | 354 level2_swizzling_dis << 12 | 355 hbb_hi << 10 | 356 uavflagprd_inv << 4 | 357 min_acc_len_64b << 3 | 358 hbb_lo << 1 | ubwc_mode); 359 360 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 361 level3_swizzling_dis << 7 | 362 level2_swizzling_dis << 6 | 363 hbb_hi << 4 | 364 min_acc_len_64b << 3 | 365 hbb_lo << 1 | ubwc_mode); 366 } 367 368 static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect) 369 { 370 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 371 const struct a6xx_info *info = adreno_gpu->info->a6xx; 372 const struct adreno_reglist_pipe *regs = info->nonctxt_reglist; 373 unsigned int pipe_id, i; 374 unsigned long flags; 375 376 for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 377 /* We don't have support for LPAC yet */ 378 if (pipe_id == PIPE_LPAC) 379 continue; 380 381 a8xx_aperture_acquire(gpu, pipe_id, &flags); 382 383 for (i = 0; regs[i].offset; i++) { 384 if (!(BIT(pipe_id) & regs[i].pipe)) 385 continue; 386 387 if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT) 388 *gmem_protect = regs[i].value; 389 390 gpu_write(gpu, regs[i].offset, regs[i].value); 391 } 392 393 a8xx_aperture_release(gpu, flags); 394 } 395 396 a8xx_aperture_clear(gpu); 397 } 398 399 static void a8xx_patch_pwrup_reglist(struct msm_gpu *gpu) 400 { 401 const struct adreno_reglist_pipe_list *dyn_pwrup_reglist; 402 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 403 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 404 const struct adreno_reglist_list *reglist; 405 void *ptr = a6xx_gpu->pwrup_reglist_ptr; 406 struct cpu_gpu_lock *lock = ptr; 407 u32 *dest = (u32 *)&lock->regs[0]; 408 u32 dyn_pwrup_reglist_count = 0; 409 int i; 410 411 lock->gpu_req = lock->cpu_req = lock->turn = 0; 412 413 reglist = adreno_gpu->info->a6xx->ifpc_reglist; 414 if (reglist) { 415 lock->ifpc_list_len = reglist->count; 416 417 /* 418 * For each entry in each of the lists, write the offset and the current 419 * register value into the GPU buffer 420 */ 421 for (i = 0; i < reglist->count; i++) { 422 *dest++ = reglist->regs[i]; 423 *dest++ = gpu_read(gpu, reglist->regs[i]); 424 } 425 } 426 427 reglist = adreno_gpu->info->a6xx->pwrup_reglist; 428 if (reglist) { 429 lock->preemption_list_len = reglist->count; 430 431 for (i = 0; i < reglist->count; i++) { 432 *dest++ = reglist->regs[i]; 433 *dest++ = gpu_read(gpu, reglist->regs[i]); 434 } 435 } 436 437 /* 438 * The overall register list is composed of 439 * 1. Static IFPC-only registers 440 * 2. Static IFPC + preemption registers 441 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) 442 * 443 * The first two lists are static. Size of these lists are stored as 444 * number of pairs in ifpc_list_len and preemption_list_len 445 * respectively. With concurrent binning, Some of the perfcounter 446 * registers being virtualized, CP needs to know the pipe id to program 447 * the aperture inorder to restore the same. Thus, third list is a 448 * dynamic list with triplets as 449 * (<aperture, shifted 12 bits> <address> <data>), and the length is 450 * stored as number for triplets in dynamic_list_len. 451 */ 452 dyn_pwrup_reglist = adreno_gpu->info->a6xx->dyn_pwrup_reglist; 453 if (!dyn_pwrup_reglist) 454 goto done; 455 456 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { 457 for (i = 0; i < dyn_pwrup_reglist->count; i++) { 458 if (!(dyn_pwrup_reglist->regs[i].pipe & BIT(pipe_id))) 459 continue; 460 *dest++ = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe_id); 461 *dest++ = dyn_pwrup_reglist->regs[i].offset; 462 *dest++ = a8xx_read_pipe_slice(gpu, 463 pipe_id, 464 a8xx_get_first_slice(a6xx_gpu), 465 dyn_pwrup_reglist->regs[i].offset); 466 dyn_pwrup_reglist_count++; 467 } 468 } 469 470 lock->dynamic_list_len = dyn_pwrup_reglist_count; 471 472 done: 473 a8xx_aperture_clear(gpu); 474 } 475 476 static int a8xx_preempt_start(struct msm_gpu *gpu) 477 { 478 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 479 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 480 struct msm_ringbuffer *ring = gpu->rb[0]; 481 482 if (gpu->nr_rings <= 1) 483 return 0; 484 485 /* Turn CP protection off */ 486 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 487 OUT_RING(ring, 0); 488 489 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); 490 491 /* Yield the floor on command completion */ 492 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 493 OUT_RING(ring, 0x00); 494 OUT_RING(ring, 0x00); 495 OUT_RING(ring, 0x00); 496 /* Generate interrupt on preemption completion */ 497 OUT_RING(ring, 0x00); 498 499 a6xx_flush(gpu, ring); 500 501 return a8xx_idle(gpu, ring) ? 0 : -EINVAL; 502 } 503 504 static int a8xx_cp_init(struct msm_gpu *gpu) 505 { 506 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 507 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 508 struct msm_ringbuffer *ring = gpu->rb[0]; 509 u32 mask; 510 511 /* Disable concurrent binning before sending CP init */ 512 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 513 OUT_RING(ring, BIT(27)); 514 515 OUT_PKT7(ring, CP_ME_INIT, 7); 516 517 /* Use multiple HW contexts */ 518 mask = BIT(0); 519 520 /* Enable error detection */ 521 mask |= BIT(1); 522 523 /* Set default reset state */ 524 mask |= BIT(3); 525 526 /* Disable save/restore of performance counters across preemption */ 527 mask |= BIT(6); 528 529 /* Enable the register init list with the spinlock */ 530 mask |= BIT(8); 531 532 OUT_RING(ring, mask); 533 534 /* Enable multiple hardware contexts */ 535 OUT_RING(ring, 0x00000003); 536 537 /* Enable error detection */ 538 OUT_RING(ring, 0x20000000); 539 540 /* Operation mode mask */ 541 OUT_RING(ring, 0x00000002); 542 543 /* Lo address */ 544 OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); 545 /* Hi address */ 546 OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); 547 548 /* Enable dyn pwrup list with triplets (offset, value, pipe) */ 549 OUT_RING(ring, BIT(31)); 550 551 a6xx_flush(gpu, ring); 552 return a8xx_idle(gpu, ring) ? 0 : -EINVAL; 553 } 554 555 #define A8XX_INT_MASK \ 556 (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 557 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 558 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 559 A6XX_RBBM_INT_0_MASK_CP_SW | \ 560 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 561 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 562 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 563 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 564 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 565 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 566 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 567 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 568 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 569 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 570 571 #define A8XX_APRIV_MASK \ 572 (A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \ 573 A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \ 574 A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \ 575 A8XX_CP_APRIV_CNTL_PIPE_RBRPWB) 576 577 #define A8XX_BR_APRIV_MASK \ 578 (A8XX_APRIV_MASK | \ 579 A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \ 580 A8XX_CP_APRIV_CNTL_PIPE_CDWRITE) 581 582 #define A8XX_CP_GLOBAL_INT_MASK \ 583 (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \ 584 A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \ 585 A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \ 586 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \ 587 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \ 588 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \ 589 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \ 590 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \ 591 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \ 592 A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \ 593 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \ 594 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \ 595 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \ 596 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV) 597 598 #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \ 599 (A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \ 600 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \ 601 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \ 602 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \ 603 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \ 604 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \ 605 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \ 606 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \ 607 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \ 608 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \ 609 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \ 610 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \ 611 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \ 612 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \ 613 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \ 614 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \ 615 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \ 616 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \ 617 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \ 618 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS) 619 620 #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \ 621 (A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \ 622 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \ 623 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \ 624 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \ 625 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \ 626 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \ 627 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \ 628 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \ 629 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \ 630 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR) 631 632 static int hw_init(struct msm_gpu *gpu) 633 { 634 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 635 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 636 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 637 unsigned int pipe_id, i; 638 u32 gmem_protect = 0; 639 u64 gmem_range_min; 640 int ret; 641 642 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 643 if (ret) 644 return ret; 645 646 /* Clear the cached value to force aperture configuration next time */ 647 a6xx_gpu->cached_aperture = UINT_MAX; 648 a8xx_aperture_clear(gpu); 649 650 /* Clear GBIF halt in case GX domain was not collapsed */ 651 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 652 gpu_read(gpu, REG_A6XX_GBIF_HALT); 653 654 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0); 655 gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT); 656 657 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 658 659 /* 660 * Disable the trusted memory range - we don't actually supported secure 661 * memory rendering at this point in time and we don't want to block off 662 * part of the virtual memory space. 663 */ 664 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 665 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 666 667 /* Make all blocks contribute to the GPU BUSY perf counter */ 668 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 669 670 /* Setup GMEM Range in UCHE */ 671 gmem_range_min = SZ_64M; 672 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 673 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min); 674 gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min); 675 676 /* Setup UCHE Trap region */ 677 gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 678 gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 679 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 680 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 681 682 /* Turn on performance counters */ 683 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1); 684 gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1); 685 686 /* Turn on the IFPC counter (countable 4 on XOCLK1) */ 687 gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1, 688 FIELD_PREP(GENMASK(7, 0), 0x4)); 689 690 /* Select CP0 to always count cycles */ 691 gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1); 692 693 a8xx_set_ubwc_config(gpu); 694 695 /* Set weights for bicubic filtering */ 696 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); 697 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); 698 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); 699 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); 700 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); 701 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000); 702 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8); 703 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc); 704 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb); 705 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0); 706 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b); 707 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d); 708 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412); 709 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a); 710 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05); 711 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e); 712 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001); 713 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa); 714 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7); 715 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7); 716 717 gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 718 719 a8xx_nonctxt_config(gpu, &gmem_protect); 720 721 /* Enable fault detection */ 722 gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff); 723 gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); 724 725 /* Set up the CX GMU counter 0 to count busy ticks */ 726 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 727 728 /* Enable the power counter */ 729 gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5)); 730 gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 731 732 /* Protect registers from the CP */ 733 a8xx_set_cp_protect(gpu); 734 735 /* Enable the GMEM save/restore feature for preemption */ 736 a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1); 737 738 for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { 739 u32 apriv_mask = A8XX_APRIV_MASK; 740 unsigned long flags; 741 742 if (pipe_id == PIPE_LPAC) 743 continue; 744 745 if (pipe_id == PIPE_BR) 746 apriv_mask = A8XX_BR_APRIV_MASK; 747 748 a8xx_aperture_acquire(gpu, pipe_id, &flags); 749 gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask); 750 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE, 751 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE); 752 gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE, 753 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE); 754 a8xx_aperture_release(gpu, flags); 755 } 756 757 a8xx_aperture_clear(gpu); 758 759 /* Enable interrupts */ 760 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK); 761 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK); 762 763 ret = adreno_hw_init(gpu); 764 if (ret) 765 goto out; 766 767 gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 768 if (a6xx_gpu->aqe_iova) 769 gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova); 770 771 /* Set the ringbuffer address */ 772 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 773 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 774 775 /* Configure the RPTR shadow if needed: */ 776 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0])); 777 gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr)); 778 779 a8xx_preempt_hw_init(gpu); 780 781 for (i = 0; i < gpu->nr_rings; i++) 782 a6xx_gpu->shadow[i] = 0; 783 784 /* Always come up on rb 0 */ 785 a6xx_gpu->cur_ring = gpu->rb[0]; 786 787 for (i = 0; i < gpu->nr_rings; i++) 788 gpu->rb[i]->cur_ctx_seqno = 0; 789 790 /* Enable the SQE_to start the CP engine */ 791 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1); 792 793 ret = a8xx_cp_init(gpu); 794 if (ret) 795 goto out; 796 797 /* 798 * Try to load a zap shader into the secure world. If successful 799 * we can use the CP to switch out of secure mode. If not then we 800 * have no resource but to try to switch ourselves out manually. If we 801 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 802 * be blocked and a permissions violation will soon follow. 803 */ 804 ret = a6xx_zap_shader_init(gpu); 805 if (!ret) { 806 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 807 OUT_RING(gpu->rb[0], 0x00000000); 808 809 a6xx_flush(gpu, gpu->rb[0]); 810 if (!a8xx_idle(gpu, gpu->rb[0])) 811 return -EINVAL; 812 } else if (ret == -ENODEV) { 813 /* 814 * This device does not use zap shader (but print a warning 815 * just in case someone got their dt wrong.. hopefully they 816 * have a debug UART to realize the error of their ways... 817 * if you mess this up you are about to crash horribly) 818 */ 819 dev_warn_once(gpu->dev->dev, 820 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 821 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 822 ret = 0; 823 } else { 824 return ret; 825 } 826 827 /* 828 * GMEM_PROTECT register should be programmed after GPU is transitioned to 829 * non-secure mode 830 */ 831 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect); 832 WARN_ON(!gmem_protect); 833 a8xx_aperture_clear(gpu); 834 835 if (!a6xx_gpu->pwrup_reglist_emitted) { 836 a8xx_patch_pwrup_reglist(gpu); 837 a6xx_gpu->pwrup_reglist_emitted = true; 838 } 839 840 /* Enable hardware clockgating */ 841 a8xx_set_hwcg(gpu, true); 842 out: 843 /* Last step - yield the ringbuffer */ 844 a8xx_preempt_start(gpu); 845 846 /* 847 * Tell the GMU that we are done touching the GPU and it can start power 848 * management 849 */ 850 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 851 852 if (!ret && (refcount_read(&gpu->sysprof_active) > 1)) { 853 ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); 854 if (!ret) 855 set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status); 856 } 857 858 return ret; 859 } 860 861 int a8xx_hw_init(struct msm_gpu *gpu) 862 { 863 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 864 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 865 int ret; 866 867 mutex_lock(&a6xx_gpu->gmu.lock); 868 ret = hw_init(gpu); 869 mutex_unlock(&a6xx_gpu->gmu.lock); 870 871 return ret; 872 } 873 874 static void a8xx_dump(struct msm_gpu *gpu) 875 { 876 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS)); 877 adreno_dump(gpu); 878 } 879 880 void a8xx_recover(struct msm_gpu *gpu) 881 { 882 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 883 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 884 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 885 int active_submits; 886 887 adreno_dump_info(gpu); 888 889 if (hang_debug) 890 a8xx_dump(gpu); 891 892 /* 893 * To handle recovery specific sequences during the rpm suspend we are 894 * about to trigger 895 */ 896 a6xx_gpu->hung = true; 897 898 /* Halt SQE first */ 899 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3); 900 901 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 902 903 /* active_submit won't change until we make a submission */ 904 mutex_lock(&gpu->active_lock); 905 active_submits = gpu->active_submits; 906 907 /* 908 * Temporarily clear active_submits count to silence a WARN() in the 909 * runtime suspend cb 910 */ 911 gpu->active_submits = 0; 912 913 reinit_completion(&gmu->pd_gate); 914 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 915 dev_pm_genpd_synced_poweroff(gmu->cxpd); 916 917 /* Drop the rpm refcount from active submits */ 918 if (active_submits) 919 pm_runtime_put(&gpu->pdev->dev); 920 921 /* And the final one from recover worker */ 922 pm_runtime_put_sync(&gpu->pdev->dev); 923 924 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 925 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 926 927 dev_pm_genpd_remove_notifier(gmu->cxpd); 928 929 pm_runtime_use_autosuspend(&gpu->pdev->dev); 930 931 if (active_submits) 932 pm_runtime_get(&gpu->pdev->dev); 933 934 pm_runtime_get_sync(&gpu->pdev->dev); 935 936 gpu->active_submits = active_submits; 937 mutex_unlock(&gpu->active_lock); 938 939 msm_gpu_hw_init(gpu); 940 a6xx_gpu->hung = false; 941 } 942 943 static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 944 { 945 static const char * const uche_clients[] = { 946 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 947 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 948 "STCHE", 949 }; 950 static const char * const uche_clients_lpac[] = { 951 "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC", 952 }; 953 u32 val; 954 955 /* 956 * The source of the data depends on the mid ID read from FSYNR1. 957 * and the client ID read from the UCHE block 958 */ 959 val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF); 960 961 val &= GENMASK(6, 0); 962 963 /* mid=3 refers to BR or BV */ 964 if (mid == 3) { 965 if (val < ARRAY_SIZE(uche_clients)) 966 return uche_clients[val]; 967 else 968 return "UCHE"; 969 } 970 971 /* mid=8 refers to LPAC */ 972 if (mid == 8) { 973 if (val < ARRAY_SIZE(uche_clients_lpac)) 974 return uche_clients_lpac[val]; 975 else 976 return "UCHE_LPAC"; 977 } 978 979 return "Unknown"; 980 } 981 982 static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id) 983 { 984 switch (id) { 985 case 0x0: 986 return "CP"; 987 case 0x1: 988 return "UCHE: Unknown"; 989 case 0x2: 990 return "UCHE_LPAC: Unknown"; 991 case 0x3: 992 case 0x8: 993 return a8xx_uche_fault_block(gpu, id); 994 case 0x4: 995 return "CCU"; 996 case 0x5: 997 return "Flag cache"; 998 case 0x6: 999 return "PREFETCH"; 1000 case 0x7: 1001 return "GMU"; 1002 case 0x9: 1003 return "UCHE_HPAC"; 1004 } 1005 1006 return "Unknown"; 1007 } 1008 1009 int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 1010 { 1011 struct msm_gpu *gpu = arg; 1012 struct adreno_smmu_fault_info *info = data; 1013 const char *block = "unknown"; 1014 1015 u32 scratch[] = { 1016 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)), 1017 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)), 1018 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)), 1019 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)), 1020 }; 1021 1022 if (info) 1023 block = a8xx_fault_block(gpu, info->fsynr1 & 0xff); 1024 1025 return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 1026 } 1027 1028 static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu) 1029 { 1030 u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL); 1031 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1032 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1033 u32 slice = a8xx_get_first_slice(a6xx_gpu); 1034 u32 hw_fault_mask = GENMASK(6, 0); 1035 u32 sw_fault_mask = GENMASK(22, 16); 1036 u32 pipe = 0; 1037 1038 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status); 1039 1040 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | 1041 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR)) 1042 pipe |= BIT(PIPE_BR); 1043 1044 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | 1045 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV)) 1046 pipe |= BIT(PIPE_BV); 1047 1048 if (!pipe) { 1049 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n"); 1050 goto out; 1051 } 1052 1053 for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 1054 if (!(BIT(pipe_id) & pipe)) 1055 continue; 1056 1057 if (hw_fault_mask & status) { 1058 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 1059 REG_A8XX_CP_HW_FAULT_STATUS_PIPE); 1060 dev_err_ratelimited(&gpu->pdev->dev, 1061 "CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 1062 } 1063 1064 if (sw_fault_mask & status) { 1065 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 1066 REG_A8XX_CP_INTERRUPT_STATUS_PIPE); 1067 dev_err_ratelimited(&gpu->pdev->dev, 1068 "CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 1069 1070 if (status & BIT(8)) { 1071 a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1); 1072 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 1073 REG_A8XX_CP_SQE_STAT_DATA_PIPE); 1074 dev_err_ratelimited(&gpu->pdev->dev, 1075 "CP Opcode error, opcode=0x%x\n", status); 1076 } 1077 1078 if (status & BIT(10)) { 1079 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 1080 REG_A8XX_CP_PROTECT_STATUS_PIPE); 1081 dev_err_ratelimited(&gpu->pdev->dev, 1082 "CP REG PROTECT error, status=0x%x\n", status); 1083 } 1084 } 1085 } 1086 1087 out: 1088 /* Turn off interrupts to avoid triggering recovery again */ 1089 a8xx_aperture_clear(gpu); 1090 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0); 1091 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0); 1092 1093 kthread_queue_work(gpu->worker, &gpu->recover_work); 1094 } 1095 1096 static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset) 1097 { 1098 gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset); 1099 1100 return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE); 1101 } 1102 1103 static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset) 1104 { 1105 u64 lo, hi; 1106 1107 lo = gpu_periph_read(gpu, dbg_offset); 1108 hi = gpu_periph_read(gpu, dbg_offset + 1); 1109 1110 return (hi << 32) | lo; 1111 } 1112 1113 #define CP_PERIPH_IB1_BASE_LO 0x7005 1114 #define CP_PERIPH_IB1_BASE_HI 0x7006 1115 #define CP_PERIPH_IB1_SIZE 0x7007 1116 #define CP_PERIPH_IB1_OFFSET 0x7008 1117 #define CP_PERIPH_IB2_BASE_LO 0x7009 1118 #define CP_PERIPH_IB2_BASE_HI 0x700a 1119 #define CP_PERIPH_IB2_SIZE 0x700b 1120 #define CP_PERIPH_IB2_OFFSET 0x700c 1121 #define CP_PERIPH_IB3_BASE_LO 0x700d 1122 #define CP_PERIPH_IB3_BASE_HI 0x700e 1123 #define CP_PERIPH_IB3_SIZE 0x700f 1124 #define CP_PERIPH_IB3_OFFSET 0x7010 1125 1126 static void a8xx_fault_detect_irq(struct msm_gpu *gpu) 1127 { 1128 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1129 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1130 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1131 unsigned long flags; 1132 1133 /* 1134 * If stalled on SMMU fault, we could trip the GPU's hang detection, 1135 * but the fault handler will trigger the devcore dump, and we want 1136 * to otherwise resume normally rather than killing the submit, so 1137 * just bail. 1138 */ 1139 if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT) 1140 return; 1141 1142 /* 1143 * Force the GPU to stay on until after we finish 1144 * collecting information 1145 */ 1146 if (!adreno_has_gmu_wrapper(adreno_gpu)) 1147 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); 1148 1149 DRM_DEV_ERROR(&gpu->pdev->dev, 1150 "gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n", 1151 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 1152 gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS)); 1153 1154 a8xx_aperture_acquire(gpu, PIPE_BR, &flags); 1155 1156 DRM_DEV_ERROR(&gpu->pdev->dev, 1157 "BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1158 gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS), 1159 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1160 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1161 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1162 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1163 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1164 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1165 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1166 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1167 1168 a8xx_aperture_release(gpu, flags); 1169 a8xx_aperture_acquire(gpu, PIPE_BV, &flags); 1170 1171 DRM_DEV_ERROR(&gpu->pdev->dev, 1172 "BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1173 gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS), 1174 gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV), 1175 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1176 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1177 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1178 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1179 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1180 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1181 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1182 1183 a8xx_aperture_release(gpu, flags); 1184 a8xx_aperture_clear(gpu); 1185 1186 /* Turn off the hangcheck timer to keep it from bothering us */ 1187 timer_delete(&gpu->hangcheck_timer); 1188 1189 kthread_queue_work(gpu->worker, &gpu->recover_work); 1190 } 1191 1192 static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1193 { 1194 u32 status; 1195 1196 status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS); 1197 gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0); 1198 1199 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1200 1201 /* 1202 * Ignore FASTBLEND violations, because the HW will silently fall back 1203 * to legacy blending. 1204 */ 1205 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1206 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1207 timer_delete(&gpu->hangcheck_timer); 1208 1209 kthread_queue_work(gpu->worker, &gpu->recover_work); 1210 } 1211 } 1212 1213 irqreturn_t a8xx_irq(struct msm_gpu *gpu) 1214 { 1215 struct msm_drm_private *priv = gpu->dev->dev_private; 1216 u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS); 1217 1218 gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status); 1219 1220 if (priv->disable_err_irq) 1221 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1222 1223 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1224 a8xx_fault_detect_irq(gpu); 1225 1226 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) { 1227 u32 rl0, rl1; 1228 1229 rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0); 1230 rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1); 1231 dev_err_ratelimited(&gpu->pdev->dev, 1232 "CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1); 1233 } 1234 1235 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1236 a8xx_cp_hw_err_irq(gpu); 1237 1238 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 1239 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 1240 1241 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1242 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 1243 1244 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1245 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 1246 1247 if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1248 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n"); 1249 1250 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1251 a8xx_sw_fuse_violation_irq(gpu); 1252 1253 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1254 msm_gpu_retire(gpu); 1255 a8xx_preempt_trigger(gpu); 1256 } 1257 1258 if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 1259 a8xx_preempt_irq(gpu); 1260 1261 return IRQ_HANDLED; 1262 } 1263 1264 void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1265 { 1266 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1267 struct msm_gpu *gpu = &adreno_gpu->base; 1268 1269 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 1270 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 1271 1272 gpu_scid &= GENMASK(5, 0); 1273 1274 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 1275 FIELD_PREP(GENMASK(29, 24), gpu_scid) | 1276 FIELD_PREP(GENMASK(23, 18), gpu_scid) | 1277 FIELD_PREP(GENMASK(17, 12), gpu_scid) | 1278 FIELD_PREP(GENMASK(11, 6), gpu_scid) | 1279 FIELD_PREP(GENMASK(5, 0), gpu_scid)); 1280 1281 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 1282 FIELD_PREP(GENMASK(27, 22), gpu_scid) | 1283 FIELD_PREP(GENMASK(21, 16), gpu_scid) | 1284 FIELD_PREP(GENMASK(15, 10), gpu_scid) | 1285 BIT(8)); 1286 } 1287 1288 llcc_slice_activate(a6xx_gpu->htw_llc_slice); 1289 } 1290 1291 #define GBIF_CLIENT_HALT_MASK BIT(0) 1292 #define GBIF_ARB_HALT_MASK BIT(1) 1293 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 1294 #define VBIF_RESET_ACK_MASK 0xF0 1295 #define GPR0_GBIF_HALT_REQUEST 0x1E0 1296 1297 void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 1298 { 1299 struct msm_gpu *gpu = &adreno_gpu->base; 1300 1301 if (gx_off) { 1302 /* Halt the gx side of GBIF */ 1303 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1); 1304 spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1); 1305 } 1306 1307 /* Halt new client requests on GBIF */ 1308 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 1309 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1310 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 1311 1312 /* Halt all AXI requests on GBIF */ 1313 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 1314 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1315 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 1316 1317 /* The GBIF halt needs to be explicitly cleared */ 1318 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 1319 } 1320 1321 u64 a8xx_gmu_get_timestamp(struct msm_gpu *gpu) 1322 { 1323 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1324 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1325 u64 count_hi, count_lo, temp; 1326 1327 do { 1328 count_hi = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_H); 1329 count_lo = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_L); 1330 temp = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_H); 1331 } while (unlikely(count_hi != temp)); 1332 1333 return (count_hi << 32) | count_lo; 1334 } 1335 1336 u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 1337 { 1338 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1339 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1340 u64 busy_cycles; 1341 1342 /* 19.2MHz */ 1343 *out_sample_rate = 19200000; 1344 1345 busy_cycles = gmu_read64(&a6xx_gpu->gmu, 1346 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 1347 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 1348 1349 return busy_cycles; 1350 } 1351 1352 bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 1353 { 1354 return true; 1355 } 1356