1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ 3 4 5 #include "msm_gem.h" 6 #include "msm_mmu.h" 7 #include "msm_gpu_trace.h" 8 #include "a6xx_gpu.h" 9 #include "a6xx_gmu.xml.h" 10 11 #include <linux/bitfield.h> 12 #include <linux/devfreq.h> 13 #include <linux/firmware/qcom/qcom_scm.h> 14 #include <linux/pm_domain.h> 15 #include <linux/soc/qcom/llcc-qcom.h> 16 17 #define GPU_PAS_ID 13 18 19 static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice) 20 { 21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 23 u32 val; 24 25 val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice); 26 27 if (a6xx_gpu->cached_aperture == val) 28 return; 29 30 gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val); 31 32 a6xx_gpu->cached_aperture = val; 33 } 34 35 static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags) 36 { 37 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 38 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 39 40 spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags); 41 42 a8xx_aperture_slice_set(gpu, pipe, 0); 43 } 44 45 static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags) 46 { 47 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 48 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 49 50 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 51 } 52 53 static void a8xx_aperture_clear(struct msm_gpu *gpu) 54 { 55 unsigned long flags; 56 57 a8xx_aperture_acquire(gpu, PIPE_NONE, &flags); 58 a8xx_aperture_release(gpu, flags); 59 } 60 61 static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data) 62 { 63 unsigned long flags; 64 65 a8xx_aperture_acquire(gpu, pipe, &flags); 66 gpu_write(gpu, offset, data); 67 a8xx_aperture_release(gpu, flags); 68 } 69 70 static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset) 71 { 72 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 73 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 74 unsigned long flags; 75 u32 val; 76 77 spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags); 78 a8xx_aperture_slice_set(gpu, pipe, slice); 79 val = gpu_read(gpu, offset); 80 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 81 82 return val; 83 } 84 85 void a8xx_gpu_get_slice_info(struct msm_gpu *gpu) 86 { 87 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 88 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 89 const struct a6xx_info *info = adreno_gpu->info->a6xx; 90 struct device *dev = &gpu->pdev->dev; 91 u32 slice_mask; 92 93 if (adreno_gpu->info->family < ADRENO_8XX_GEN1) 94 return; 95 96 if (a6xx_gpu->slice_mask) 97 return; 98 99 slice_mask = GENMASK(info->max_slices - 1, 0); 100 101 /* GEN1 doesn't support partial slice configurations */ 102 if (adreno_gpu->info->family == ADRENO_8XX_GEN1) { 103 a6xx_gpu->slice_mask = slice_mask; 104 return; 105 } 106 107 slice_mask &= a6xx_cx_misc_read(a6xx_gpu, 108 REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL); 109 110 a6xx_gpu->slice_mask = slice_mask; 111 112 /* Chip ID depends on the number of slices available. So update it */ 113 adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask)); 114 115 /* Update the gpu-name to reflect the slice config: */ 116 const char *name = devm_kasprintf(dev, GFP_KERNEL, 117 "%"ADRENO_CHIPID_FMT, 118 ADRENO_CHIPID_ARGS(adreno_gpu->chip_id)); 119 if (name) { 120 devm_kfree(dev, adreno_gpu->base.name); 121 adreno_gpu->base.name = name; 122 } 123 } 124 125 static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu) 126 { 127 return ffs(a6xx_gpu->slice_mask) - 1; 128 } 129 130 static inline bool _a8xx_check_idle(struct msm_gpu *gpu) 131 { 132 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 133 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 134 135 /* Check that the GMU is idle */ 136 if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) 137 return false; 138 139 /* Check that the CX master is idle */ 140 if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) & 141 ~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 142 return false; 143 144 return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) & 145 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 146 } 147 148 static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 149 { 150 /* wait for CP to drain ringbuffer: */ 151 if (!adreno_idle(gpu, ring)) 152 return false; 153 154 if (spin_until(_a8xx_check_idle(gpu))) { 155 DRM_ERROR( 156 "%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 157 gpu->name, __builtin_return_address(0), 158 gpu_read(gpu, REG_A8XX_RBBM_STATUS), 159 gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS), 160 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 161 gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 162 return false; 163 } 164 165 return true; 166 } 167 168 void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 169 { 170 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 171 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 172 uint32_t wptr; 173 unsigned long flags; 174 175 spin_lock_irqsave(&ring->preempt_lock, flags); 176 177 /* Copy the shadow to the actual register */ 178 ring->cur = ring->next; 179 180 /* Make sure to wrap wptr if we need to */ 181 wptr = get_wptr(ring); 182 183 /* Update HW if this is the current ring and we are not in preempt*/ 184 if (!a6xx_in_preempt(a6xx_gpu)) { 185 if (a6xx_gpu->cur_ring == ring) 186 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); 187 else 188 ring->restore_wptr = true; 189 } else { 190 ring->restore_wptr = true; 191 } 192 193 spin_unlock_irqrestore(&ring->preempt_lock, flags); 194 } 195 196 static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state) 197 { 198 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 199 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 200 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 201 u32 val; 202 203 if (adreno_is_x285(adreno_gpu) && state) 204 gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702); 205 206 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 207 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 208 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 209 state ? 0x110111 : 0); 210 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 211 state ? 0x55555 : 0); 212 213 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 214 gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state); 215 216 if (state) { 217 gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1); 218 219 if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val, 220 val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 221 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 222 return; 223 } 224 225 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 226 } else { 227 /* 228 * GMU enables clk gating in GBIF during boot up. So, 229 * override that here when hwcg feature is disabled 230 */ 231 gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0); 232 } 233 } 234 235 static void a8xx_set_cp_protect(struct msm_gpu *gpu) 236 { 237 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 238 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 239 u32 cntl, final_cfg; 240 unsigned int i; 241 242 cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN | 243 A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN | 244 A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE | 245 A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK; 246 /* 247 * Enable access protection to privileged registers, fault on an access 248 * protect violation and select the last span to protect from the start 249 * address all the way to the end of the register address space 250 */ 251 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 252 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 253 254 a8xx_aperture_clear(gpu); 255 256 for (i = 0; i < protect->count; i++) { 257 /* Intentionally skip writing to some registers */ 258 if (protect->regs[i]) { 259 gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]); 260 final_cfg = protect->regs[i]; 261 } 262 } 263 264 /* 265 * Last span feature is only supported on PIPE specific register. 266 * So update those here 267 */ 268 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(15), final_cfg); 269 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(15), final_cfg); 270 271 a8xx_aperture_clear(gpu); 272 } 273 274 static void a8xx_set_ubwc_config(struct msm_gpu *gpu) 275 { 276 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 277 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; 278 u32 level2_swizzling_dis = !(qcom_ubwc_swizzle(cfg) & UBWC_SWIZZLE_ENABLE_LVL2); 279 u32 level3_swizzling_dis = !(qcom_ubwc_swizzle(cfg) & UBWC_SWIZZLE_ENABLE_LVL3); 280 bool rgba8888_lossless = false, fp16compoptdis = false; 281 bool yuvnotcomptofc = false, min_acc_len_64b = false; 282 bool rgb565_predicator = false; 283 bool amsbc = qcom_ubwc_enable_amsbc(cfg); 284 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); 285 u32 ubwc_version = cfg->ubwc_enc_version; 286 u32 hbb, hbb_hi, hbb_lo, mode; 287 u8 uavflagprd_inv = 2; 288 289 if (ubwc_version > UBWC_6_0) 290 dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version); 291 292 if (ubwc_version == UBWC_6_0) 293 yuvnotcomptofc = true; 294 295 if (ubwc_version < UBWC_5_0 && 296 ubwc_version >= UBWC_4_0) 297 rgba8888_lossless = true; 298 299 if (ubwc_version < UBWC_4_3) 300 fp16compoptdis = true; 301 302 if (cfg->ubwc_enc_version >= UBWC_4_0) 303 rgb565_predicator = true; 304 305 if (ubwc_version < UBWC_3_0) 306 dev_err(&gpu->pdev->dev, "Unsupported UBWC version: 0x%x\n", ubwc_version); 307 308 mode = qcom_ubwc_version_tag(cfg); 309 310 /* 311 * We subtract 13 from the highest bank bit (13 is the minimum value 312 * allowed by hw) and write the lowest two bits of the remaining value 313 * as hbb_lo and the one above it as hbb_hi to the hardware. 314 */ 315 WARN_ON(cfg->highest_bank_bit < 13); 316 hbb = cfg->highest_bank_bit - 13; 317 hbb_hi = hbb >> 2; 318 hbb_lo = hbb & 3; 319 320 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, 321 hbb << 5 | 322 level3_swizzling_dis << 4 | 323 level2_swizzling_dis << 3); 324 325 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, 326 hbb << 5 | 327 level3_swizzling_dis << 4 | 328 level2_swizzling_dis << 3); 329 330 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL, 331 yuvnotcomptofc << 6 | 332 level3_swizzling_dis << 5 | 333 level2_swizzling_dis << 4 | 334 hbb_hi << 3 | 335 hbb_lo << 1); 336 337 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL, 338 mode << 15 | 339 yuvnotcomptofc << 6 | 340 rgba8888_lossless << 4 | 341 fp16compoptdis << 3 | 342 rgb565_predicator << 2 | 343 amsbc << 1 | 344 min_acc_len_64b); 345 346 a8xx_aperture_clear(gpu); 347 348 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 349 level3_swizzling_dis << 13 | 350 level2_swizzling_dis << 12 | 351 hbb_hi << 10 | 352 uavflagprd_inv << 4 | 353 min_acc_len_64b << 3 | 354 hbb_lo << 1 | ubwc_mode); 355 356 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 357 level3_swizzling_dis << 7 | 358 level2_swizzling_dis << 6 | 359 hbb_hi << 4 | 360 min_acc_len_64b << 3 | 361 hbb_lo << 1 | ubwc_mode); 362 } 363 364 static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect) 365 { 366 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 367 const struct a6xx_info *info = adreno_gpu->info->a6xx; 368 const struct adreno_reglist_pipe *regs = info->nonctxt_reglist; 369 unsigned int pipe_id, i; 370 unsigned long flags; 371 372 for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 373 /* We don't have support for LPAC yet */ 374 if (pipe_id == PIPE_LPAC) 375 continue; 376 377 a8xx_aperture_acquire(gpu, pipe_id, &flags); 378 379 for (i = 0; regs[i].offset; i++) { 380 if (!(BIT(pipe_id) & regs[i].pipe)) 381 continue; 382 383 if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT) 384 *gmem_protect = regs[i].value; 385 386 gpu_write(gpu, regs[i].offset, regs[i].value); 387 } 388 389 a8xx_aperture_release(gpu, flags); 390 } 391 392 a8xx_aperture_clear(gpu); 393 } 394 395 static void a8xx_patch_pwrup_reglist(struct msm_gpu *gpu) 396 { 397 const struct adreno_reglist_pipe_list *dyn_pwrup_reglist; 398 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 399 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 400 const struct adreno_reglist_list *reglist; 401 void *ptr = a6xx_gpu->pwrup_reglist_ptr; 402 struct cpu_gpu_lock *lock = ptr; 403 u32 *dest = (u32 *)&lock->regs[0]; 404 u32 dyn_pwrup_reglist_count = 0; 405 int i; 406 407 lock->gpu_req = lock->cpu_req = lock->turn = 0; 408 409 reglist = adreno_gpu->info->a6xx->ifpc_reglist; 410 if (reglist) { 411 lock->ifpc_list_len = reglist->count; 412 413 /* 414 * For each entry in each of the lists, write the offset and the current 415 * register value into the GPU buffer 416 */ 417 for (i = 0; i < reglist->count; i++) { 418 *dest++ = reglist->regs[i]; 419 *dest++ = gpu_read(gpu, reglist->regs[i]); 420 } 421 } 422 423 reglist = adreno_gpu->info->a6xx->pwrup_reglist; 424 if (reglist) { 425 lock->preemption_list_len = reglist->count; 426 427 for (i = 0; i < reglist->count; i++) { 428 *dest++ = reglist->regs[i]; 429 *dest++ = gpu_read(gpu, reglist->regs[i]); 430 } 431 } 432 433 /* 434 * The overall register list is composed of 435 * 1. Static IFPC-only registers 436 * 2. Static IFPC + preemption registers 437 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) 438 * 439 * The first two lists are static. Size of these lists are stored as 440 * number of pairs in ifpc_list_len and preemption_list_len 441 * respectively. With concurrent binning, Some of the perfcounter 442 * registers being virtualized, CP needs to know the pipe id to program 443 * the aperture inorder to restore the same. Thus, third list is a 444 * dynamic list with triplets as 445 * (<aperture, shifted 12 bits> <address> <data>), and the length is 446 * stored as number for triplets in dynamic_list_len. 447 */ 448 dyn_pwrup_reglist = adreno_gpu->info->a6xx->dyn_pwrup_reglist; 449 if (!dyn_pwrup_reglist) 450 goto done; 451 452 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { 453 for (i = 0; i < dyn_pwrup_reglist->count; i++) { 454 if (!(dyn_pwrup_reglist->regs[i].pipe & BIT(pipe_id))) 455 continue; 456 *dest++ = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe_id); 457 *dest++ = dyn_pwrup_reglist->regs[i].offset; 458 *dest++ = a8xx_read_pipe_slice(gpu, 459 pipe_id, 460 a8xx_get_first_slice(a6xx_gpu), 461 dyn_pwrup_reglist->regs[i].offset); 462 dyn_pwrup_reglist_count++; 463 } 464 } 465 466 lock->dynamic_list_len = dyn_pwrup_reglist_count; 467 a6xx_gpu->dynamic_sel_reglist_offset = dyn_pwrup_reglist_count; 468 469 done: 470 a8xx_aperture_clear(gpu); 471 } 472 473 static int a8xx_preempt_start(struct msm_gpu *gpu) 474 { 475 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 476 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 477 struct msm_ringbuffer *ring = gpu->rb[0]; 478 479 if (gpu->nr_rings <= 1) 480 return 0; 481 482 /* Turn CP protection off */ 483 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 484 OUT_RING(ring, 0); 485 486 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); 487 488 a6xx_flush_yield(gpu, ring); 489 490 return a8xx_idle(gpu, ring) ? 0 : -EINVAL; 491 } 492 493 static int a8xx_cp_init(struct msm_gpu *gpu) 494 { 495 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 496 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 497 struct msm_ringbuffer *ring = gpu->rb[0]; 498 u32 mask; 499 500 /* Disable concurrent binning before sending CP init */ 501 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 502 OUT_RING(ring, BIT(27)); 503 504 OUT_PKT7(ring, CP_ME_INIT, 7); 505 506 /* Use multiple HW contexts */ 507 mask = BIT(0); 508 509 /* Enable error detection */ 510 mask |= BIT(1); 511 512 /* Set default reset state */ 513 mask |= BIT(3); 514 515 /* Disable save/restore of performance counters across preemption */ 516 mask |= BIT(6); 517 518 /* Enable the register init list with the spinlock */ 519 mask |= BIT(8); 520 521 OUT_RING(ring, mask); 522 523 /* Enable multiple hardware contexts */ 524 OUT_RING(ring, 0x00000003); 525 526 /* Enable error detection */ 527 OUT_RING(ring, 0x20000000); 528 529 /* Operation mode mask */ 530 OUT_RING(ring, 0x00000002); 531 532 /* Lo address */ 533 OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); 534 /* Hi address */ 535 OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); 536 537 /* Enable dyn pwrup list with triplets (offset, value, pipe) */ 538 OUT_RING(ring, BIT(31)); 539 540 a6xx_flush(gpu, ring); 541 return a8xx_idle(gpu, ring) ? 0 : -EINVAL; 542 } 543 544 #define A8XX_INT_MASK \ 545 (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 546 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 547 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 548 A6XX_RBBM_INT_0_MASK_CP_SW | \ 549 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 550 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 551 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 552 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 553 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 554 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 555 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 556 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 557 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 558 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 559 560 #define A8XX_APRIV_MASK \ 561 (A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \ 562 A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \ 563 A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \ 564 A8XX_CP_APRIV_CNTL_PIPE_RBRPWB) 565 566 #define A8XX_BR_APRIV_MASK \ 567 (A8XX_APRIV_MASK | \ 568 A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \ 569 A8XX_CP_APRIV_CNTL_PIPE_CDWRITE) 570 571 #define A8XX_CP_GLOBAL_INT_MASK \ 572 (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \ 573 A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \ 574 A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \ 575 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \ 576 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \ 577 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \ 578 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \ 579 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \ 580 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \ 581 A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \ 582 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \ 583 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \ 584 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \ 585 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV) 586 587 #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \ 588 (A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \ 589 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \ 590 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \ 591 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \ 592 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \ 593 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \ 594 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \ 595 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \ 596 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \ 597 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \ 598 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \ 599 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \ 600 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \ 601 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \ 602 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \ 603 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \ 604 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \ 605 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \ 606 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \ 607 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS) 608 609 #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \ 610 (A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \ 611 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \ 612 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \ 613 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \ 614 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \ 615 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \ 616 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \ 617 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \ 618 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \ 619 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR) 620 621 static int hw_init(struct msm_gpu *gpu) 622 { 623 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 624 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 625 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 626 unsigned int pipe_id, i; 627 u32 gmem_protect = 0; 628 u64 gmem_range_min; 629 int ret; 630 631 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 632 if (ret) 633 return ret; 634 635 /* Clear the cached value to force aperture configuration next time */ 636 a6xx_gpu->cached_aperture = UINT_MAX; 637 a8xx_aperture_clear(gpu); 638 639 /* Clear GBIF halt in case GX domain was not collapsed */ 640 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 641 gpu_read(gpu, REG_A6XX_GBIF_HALT); 642 643 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0); 644 gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT); 645 646 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 647 648 /* 649 * Disable the trusted memory range - we don't actually supported secure 650 * memory rendering at this point in time and we don't want to block off 651 * part of the virtual memory space. 652 */ 653 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 654 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 655 656 /* Make all blocks contribute to the GPU BUSY perf counter */ 657 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 658 659 /* Setup GMEM Range in UCHE */ 660 gmem_range_min = SZ_64M; 661 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 662 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min); 663 gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min); 664 665 /* Setup UCHE Trap region */ 666 gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 667 gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 668 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 669 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 670 671 /* Turn on performance counters */ 672 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1); 673 gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1); 674 675 /* Turn on the IFPC counter (countable 4 on XOCLK1) */ 676 gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1, 677 FIELD_PREP(GENMASK(7, 0), 0x4)); 678 679 /* Select CP0 to always count cycles */ 680 gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1); 681 682 a8xx_set_ubwc_config(gpu); 683 684 /* Set weights for bicubic filtering */ 685 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); 686 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); 687 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); 688 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); 689 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); 690 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000); 691 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8); 692 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc); 693 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb); 694 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0); 695 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b); 696 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d); 697 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412); 698 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a); 699 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05); 700 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e); 701 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001); 702 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa); 703 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7); 704 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7); 705 706 gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 707 708 a8xx_nonctxt_config(gpu, &gmem_protect); 709 710 /* Enable fault detection */ 711 gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff); 712 gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); 713 714 /* Set up the CX GMU counter 0 to count busy ticks */ 715 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 716 717 /* Enable the power counter */ 718 gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5)); 719 gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 720 721 /* Protect registers from the CP */ 722 a8xx_set_cp_protect(gpu); 723 724 /* Enable the GMEM save/restore feature for preemption */ 725 a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1); 726 727 for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { 728 u32 apriv_mask = A8XX_APRIV_MASK; 729 unsigned long flags; 730 731 if (pipe_id == PIPE_LPAC) 732 continue; 733 734 if (pipe_id == PIPE_BR) 735 apriv_mask = A8XX_BR_APRIV_MASK; 736 737 a8xx_aperture_acquire(gpu, pipe_id, &flags); 738 gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask); 739 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE, 740 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE); 741 gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE, 742 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE); 743 a8xx_aperture_release(gpu, flags); 744 } 745 746 a8xx_aperture_clear(gpu); 747 748 /* Enable interrupts */ 749 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK); 750 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK); 751 752 ret = adreno_hw_init(gpu); 753 if (ret) 754 goto out; 755 756 gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 757 if (a6xx_gpu->aqe_iova) 758 gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova); 759 760 /* Set the ringbuffer address */ 761 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 762 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 763 764 /* Configure the RPTR shadow if needed: */ 765 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0])); 766 gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr)); 767 768 a8xx_preempt_hw_init(gpu); 769 770 for (i = 0; i < gpu->nr_rings; i++) 771 a6xx_gpu->shadow[i] = 0; 772 773 /* Always come up on rb 0 */ 774 a6xx_gpu->cur_ring = gpu->rb[0]; 775 776 for (i = 0; i < gpu->nr_rings; i++) 777 gpu->rb[i]->cur_ctx_seqno = 0; 778 779 /* Enable the SQE_to start the CP engine */ 780 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1); 781 782 ret = a8xx_cp_init(gpu); 783 if (ret) 784 goto out; 785 786 /* 787 * Try to load a zap shader into the secure world. If successful 788 * we can use the CP to switch out of secure mode. If not then we 789 * have no resource but to try to switch ourselves out manually. If we 790 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 791 * be blocked and a permissions violation will soon follow. 792 */ 793 ret = a6xx_zap_shader_init(gpu); 794 if (!ret) { 795 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 796 OUT_RING(gpu->rb[0], 0x00000000); 797 798 a6xx_flush(gpu, gpu->rb[0]); 799 if (!a8xx_idle(gpu, gpu->rb[0])) 800 return -EINVAL; 801 } else if (ret == -ENODEV) { 802 /* 803 * This device does not use zap shader (but print a warning 804 * just in case someone got their dt wrong.. hopefully they 805 * have a debug UART to realize the error of their ways... 806 * if you mess this up you are about to crash horribly) 807 */ 808 dev_warn_once(gpu->dev->dev, 809 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 810 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 811 ret = 0; 812 } else { 813 return ret; 814 } 815 816 /* 817 * GMEM_PROTECT register should be programmed after GPU is transitioned to 818 * non-secure mode 819 */ 820 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect); 821 WARN_ON(!gmem_protect); 822 a8xx_aperture_clear(gpu); 823 824 if (!a6xx_gpu->pwrup_reglist_emitted) { 825 a8xx_patch_pwrup_reglist(gpu); 826 a6xx_gpu->pwrup_reglist_emitted = true; 827 } 828 829 /* Enable hardware clockgating */ 830 a8xx_set_hwcg(gpu, true); 831 out: 832 /* Last step - yield the ringbuffer */ 833 a8xx_preempt_start(gpu); 834 835 /* 836 * Tell the GMU that we are done touching the GPU and it can start power 837 * management 838 */ 839 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 840 841 if (!ret && msm_gpu_sysprof_no_perfcntr_zap(gpu)) { 842 ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); 843 if (!ret) 844 set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status); 845 } 846 847 return ret; 848 } 849 850 int a8xx_hw_init(struct msm_gpu *gpu) 851 { 852 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 853 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 854 int ret; 855 856 mutex_lock(&a6xx_gpu->gmu.lock); 857 ret = hw_init(gpu); 858 mutex_unlock(&a6xx_gpu->gmu.lock); 859 860 return ret; 861 } 862 863 static void a8xx_dump(struct msm_gpu *gpu) 864 { 865 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS)); 866 adreno_dump(gpu); 867 } 868 869 void a8xx_recover(struct msm_gpu *gpu) 870 { 871 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 872 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 873 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 874 int active_submits; 875 876 adreno_dump_info(gpu); 877 878 /* 879 * To handle recovery specific sequences during the rpm suspend we are 880 * about to trigger 881 */ 882 a6xx_gpu->hung = true; 883 884 if (adreno_gpu->funcs->gx_is_on(adreno_gpu)) { 885 /* 886 * Sometimes crashstate capture is skipped, so SQE should be 887 * halted here again 888 */ 889 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3); 890 891 if (hang_debug) 892 a8xx_dump(gpu); 893 } 894 895 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 896 897 /* active_submit won't change until we make a submission */ 898 mutex_lock(&gpu->active_lock); 899 active_submits = gpu->active_submits; 900 901 /* 902 * Temporarily clear active_submits count to silence a WARN() in the 903 * runtime suspend cb 904 */ 905 gpu->active_submits = 0; 906 907 reinit_completion(&gmu->pd_gate); 908 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 909 dev_pm_genpd_synced_poweroff(gmu->cxpd); 910 911 /* Drop the rpm refcount from active submits */ 912 if (active_submits) 913 pm_runtime_put(&gpu->pdev->dev); 914 915 /* And the final one from recover worker */ 916 pm_runtime_put_sync(&gpu->pdev->dev); 917 918 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 919 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 920 921 dev_pm_genpd_remove_notifier(gmu->cxpd); 922 923 pm_runtime_use_autosuspend(&gpu->pdev->dev); 924 925 if (active_submits) 926 pm_runtime_get(&gpu->pdev->dev); 927 928 pm_runtime_get_sync(&gpu->pdev->dev); 929 930 gpu->active_submits = active_submits; 931 mutex_unlock(&gpu->active_lock); 932 933 msm_gpu_hw_init(gpu); 934 a6xx_gpu->hung = false; 935 } 936 937 static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 938 { 939 static const char * const uche_clients[] = { 940 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 941 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 942 "STCHE", 943 }; 944 static const char * const uche_clients_lpac[] = { 945 "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC", 946 }; 947 u32 val; 948 949 /* 950 * The source of the data depends on the mid ID read from FSYNR1. 951 * and the client ID read from the UCHE block 952 */ 953 val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF); 954 955 val &= GENMASK(6, 0); 956 957 /* mid=3 refers to BR or BV */ 958 if (mid == 3) { 959 if (val < ARRAY_SIZE(uche_clients)) 960 return uche_clients[val]; 961 else 962 return "UCHE"; 963 } 964 965 /* mid=8 refers to LPAC */ 966 if (mid == 8) { 967 if (val < ARRAY_SIZE(uche_clients_lpac)) 968 return uche_clients_lpac[val]; 969 else 970 return "UCHE_LPAC"; 971 } 972 973 return "Unknown"; 974 } 975 976 static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id) 977 { 978 switch (id) { 979 case 0x0: 980 return "CP"; 981 case 0x1: 982 return "UCHE: Unknown"; 983 case 0x2: 984 return "UCHE_LPAC: Unknown"; 985 case 0x3: 986 case 0x8: 987 return a8xx_uche_fault_block(gpu, id); 988 case 0x4: 989 return "CCU"; 990 case 0x5: 991 return "Flag cache"; 992 case 0x6: 993 return "PREFETCH"; 994 case 0x7: 995 return "GMU"; 996 case 0x9: 997 return "UCHE_HPAC"; 998 } 999 1000 return "Unknown"; 1001 } 1002 1003 int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 1004 { 1005 struct msm_gpu *gpu = arg; 1006 struct adreno_smmu_fault_info *info = data; 1007 const char *block = "unknown"; 1008 1009 u32 scratch[] = { 1010 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)), 1011 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)), 1012 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)), 1013 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)), 1014 }; 1015 1016 if (info) 1017 block = a8xx_fault_block(gpu, info->fsynr1 & 0xff); 1018 1019 return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 1020 } 1021 1022 static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu) 1023 { 1024 u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL); 1025 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1026 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1027 u32 slice = a8xx_get_first_slice(a6xx_gpu); 1028 u32 hw_fault_mask = GENMASK(6, 0); 1029 u32 sw_fault_mask = GENMASK(22, 16); 1030 u32 pipe = 0; 1031 1032 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status); 1033 1034 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | 1035 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR)) 1036 pipe |= BIT(PIPE_BR); 1037 1038 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | 1039 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV)) 1040 pipe |= BIT(PIPE_BV); 1041 1042 if (!pipe) { 1043 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n"); 1044 goto out; 1045 } 1046 1047 for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 1048 if (!(BIT(pipe_id) & pipe)) 1049 continue; 1050 1051 if (hw_fault_mask & status) { 1052 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 1053 REG_A8XX_CP_HW_FAULT_STATUS_PIPE); 1054 dev_err_ratelimited(&gpu->pdev->dev, 1055 "CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 1056 } 1057 1058 if (sw_fault_mask & status) { 1059 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 1060 REG_A8XX_CP_INTERRUPT_STATUS_PIPE); 1061 dev_err_ratelimited(&gpu->pdev->dev, 1062 "CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 1063 1064 if (status & BIT(8)) { 1065 a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1); 1066 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 1067 REG_A8XX_CP_SQE_STAT_DATA_PIPE); 1068 dev_err_ratelimited(&gpu->pdev->dev, 1069 "CP Opcode error, opcode=0x%x\n", status); 1070 } 1071 1072 if (status & BIT(10)) { 1073 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 1074 REG_A8XX_CP_PROTECT_STATUS_PIPE); 1075 dev_err_ratelimited(&gpu->pdev->dev, 1076 "CP REG PROTECT error, status=0x%x\n", status); 1077 } 1078 } 1079 } 1080 1081 out: 1082 /* Turn off interrupts to avoid triggering recovery again */ 1083 a8xx_aperture_clear(gpu); 1084 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0); 1085 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0); 1086 1087 kthread_queue_work(gpu->worker, &gpu->recover_work); 1088 } 1089 1090 static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset) 1091 { 1092 gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset); 1093 1094 return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE); 1095 } 1096 1097 static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset) 1098 { 1099 u64 lo, hi; 1100 1101 lo = gpu_periph_read(gpu, dbg_offset); 1102 hi = gpu_periph_read(gpu, dbg_offset + 1); 1103 1104 return (hi << 32) | lo; 1105 } 1106 1107 #define CP_PERIPH_IB1_BASE_LO 0x7005 1108 #define CP_PERIPH_IB1_BASE_HI 0x7006 1109 #define CP_PERIPH_IB1_SIZE 0x7007 1110 #define CP_PERIPH_IB1_OFFSET 0x7008 1111 #define CP_PERIPH_IB2_BASE_LO 0x7009 1112 #define CP_PERIPH_IB2_BASE_HI 0x700a 1113 #define CP_PERIPH_IB2_SIZE 0x700b 1114 #define CP_PERIPH_IB2_OFFSET 0x700c 1115 #define CP_PERIPH_IB3_BASE_LO 0x700d 1116 #define CP_PERIPH_IB3_BASE_HI 0x700e 1117 #define CP_PERIPH_IB3_SIZE 0x700f 1118 #define CP_PERIPH_IB3_OFFSET 0x7010 1119 1120 static void a8xx_fault_detect_irq(struct msm_gpu *gpu) 1121 { 1122 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1123 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1124 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1125 unsigned long flags; 1126 1127 /* 1128 * If stalled on SMMU fault, we could trip the GPU's hang detection, 1129 * but the fault handler will trigger the devcore dump, and we want 1130 * to otherwise resume normally rather than killing the submit, so 1131 * just bail. 1132 */ 1133 if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT) 1134 return; 1135 1136 /* 1137 * Force the GPU to stay on until after we finish 1138 * collecting information 1139 */ 1140 if (!adreno_has_gmu_wrapper(adreno_gpu)) 1141 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); 1142 1143 DRM_DEV_ERROR(&gpu->pdev->dev, 1144 "gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n", 1145 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 1146 gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS)); 1147 1148 a8xx_aperture_acquire(gpu, PIPE_BR, &flags); 1149 1150 DRM_DEV_ERROR(&gpu->pdev->dev, 1151 "BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1152 gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS), 1153 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1154 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1155 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1156 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1157 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1158 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1159 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1160 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1161 1162 a8xx_aperture_release(gpu, flags); 1163 a8xx_aperture_acquire(gpu, PIPE_BV, &flags); 1164 1165 DRM_DEV_ERROR(&gpu->pdev->dev, 1166 "BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1167 gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS), 1168 gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV), 1169 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1170 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1171 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1172 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1173 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1174 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1175 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1176 1177 a8xx_aperture_release(gpu, flags); 1178 a8xx_aperture_clear(gpu); 1179 1180 /* Turn off the hangcheck timer to keep it from bothering us */ 1181 timer_delete(&gpu->hangcheck_timer); 1182 1183 kthread_queue_work(gpu->worker, &gpu->recover_work); 1184 } 1185 1186 static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1187 { 1188 u32 status; 1189 1190 status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS); 1191 gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0); 1192 1193 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1194 1195 /* 1196 * Ignore FASTBLEND violations, because the HW will silently fall back 1197 * to legacy blending. 1198 */ 1199 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1200 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1201 timer_delete(&gpu->hangcheck_timer); 1202 1203 kthread_queue_work(gpu->worker, &gpu->recover_work); 1204 } 1205 } 1206 1207 irqreturn_t a8xx_irq(struct msm_gpu *gpu) 1208 { 1209 struct msm_drm_private *priv = gpu->dev->dev_private; 1210 u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS); 1211 1212 gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status); 1213 1214 if (priv->disable_err_irq) 1215 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1216 1217 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1218 a8xx_fault_detect_irq(gpu); 1219 1220 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) { 1221 u32 rl0, rl1; 1222 1223 rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0); 1224 rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1); 1225 dev_err_ratelimited(&gpu->pdev->dev, 1226 "CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1); 1227 } 1228 1229 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1230 a8xx_cp_hw_err_irq(gpu); 1231 1232 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 1233 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 1234 1235 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1236 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 1237 1238 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1239 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 1240 1241 if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1242 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n"); 1243 1244 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1245 a8xx_sw_fuse_violation_irq(gpu); 1246 1247 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1248 msm_gpu_retire(gpu); 1249 a8xx_preempt_trigger(gpu); 1250 } 1251 1252 if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 1253 a8xx_preempt_irq(gpu); 1254 1255 return IRQ_HANDLED; 1256 } 1257 1258 void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1259 { 1260 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1261 struct msm_gpu *gpu = &adreno_gpu->base; 1262 1263 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 1264 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 1265 1266 gpu_scid &= GENMASK(5, 0); 1267 1268 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 1269 FIELD_PREP(GENMASK(29, 24), gpu_scid) | 1270 FIELD_PREP(GENMASK(23, 18), gpu_scid) | 1271 FIELD_PREP(GENMASK(17, 12), gpu_scid) | 1272 FIELD_PREP(GENMASK(11, 6), gpu_scid) | 1273 FIELD_PREP(GENMASK(5, 0), gpu_scid)); 1274 1275 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 1276 FIELD_PREP(GENMASK(27, 22), gpu_scid) | 1277 FIELD_PREP(GENMASK(21, 16), gpu_scid) | 1278 FIELD_PREP(GENMASK(15, 10), gpu_scid) | 1279 BIT(8)); 1280 } 1281 1282 llcc_slice_activate(a6xx_gpu->htw_llc_slice); 1283 } 1284 1285 #define GBIF_CLIENT_HALT_MASK BIT(0) 1286 #define GBIF_ARB_HALT_MASK BIT(1) 1287 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 1288 #define VBIF_RESET_ACK_MASK 0xF0 1289 #define GPR0_GBIF_HALT_REQUEST 0x1E0 1290 1291 void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 1292 { 1293 struct msm_gpu *gpu = &adreno_gpu->base; 1294 1295 if (gx_off) { 1296 /* Halt the gx side of GBIF */ 1297 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1); 1298 spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1); 1299 } 1300 1301 /* Halt new client requests on GBIF */ 1302 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 1303 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1304 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 1305 1306 /* Halt all AXI requests on GBIF */ 1307 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 1308 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1309 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 1310 1311 /* The GBIF halt needs to be explicitly cleared */ 1312 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 1313 } 1314 1315 u64 a8xx_gmu_get_timestamp(struct msm_gpu *gpu) 1316 { 1317 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1318 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1319 u64 count_hi, count_lo, temp; 1320 1321 do { 1322 count_hi = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_H); 1323 count_lo = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_L); 1324 temp = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_H); 1325 } while (unlikely(count_hi != temp)); 1326 1327 return (count_hi << 32) | count_lo; 1328 } 1329 1330 u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 1331 { 1332 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1333 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1334 u64 busy_cycles; 1335 1336 /* 19.2MHz */ 1337 *out_sample_rate = 19200000; 1338 1339 busy_cycles = gmu_read64(&a6xx_gpu->gmu, 1340 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 1341 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 1342 1343 return busy_cycles; 1344 } 1345 1346 bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 1347 { 1348 return true; 1349 } 1350 1351 void a8xx_perfcntr_flush(struct msm_gpu *gpu) 1352 { 1353 u32 val; 1354 1355 /* 1356 * Flush delta counters (both perf counters and pipe stats) present in 1357 * RBBM_S and RBBM_US to perf RAM logic to get the latest data. 1358 */ 1359 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_FLUSH_HOST_CMD, BIT(0)); 1360 gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD, BIT(0)); 1361 1362 /* Ensure all writes are posted before polling status register */ 1363 wmb(); 1364 1365 if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_PERFCTR_FLUSH_HOST_STATUS, val, 1366 val & BIT(0), 100, 100 * 1000)) { 1367 dev_err(&gpu->pdev->dev, "Perfcounter flush timed out: status=0x%08x\n", val); 1368 } 1369 } 1370