1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ 3 4 5 #include "msm_gem.h" 6 #include "msm_mmu.h" 7 #include "msm_gpu_trace.h" 8 #include "a6xx_gpu.h" 9 #include "a6xx_gmu.xml.h" 10 11 #include <linux/bitfield.h> 12 #include <linux/devfreq.h> 13 #include <linux/firmware/qcom/qcom_scm.h> 14 #include <linux/pm_domain.h> 15 #include <linux/soc/qcom/llcc-qcom.h> 16 17 #define GPU_PAS_ID 13 18 19 static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice) 20 { 21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 23 u32 val; 24 25 val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice); 26 27 if (a6xx_gpu->cached_aperture == val) 28 return; 29 30 gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val); 31 32 a6xx_gpu->cached_aperture = val; 33 } 34 35 static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags) 36 { 37 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 38 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 39 40 spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags); 41 42 a8xx_aperture_slice_set(gpu, pipe, 0); 43 } 44 45 static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags) 46 { 47 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 48 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 49 50 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 51 } 52 53 static void a8xx_aperture_clear(struct msm_gpu *gpu) 54 { 55 unsigned long flags; 56 57 a8xx_aperture_acquire(gpu, PIPE_NONE, &flags); 58 a8xx_aperture_release(gpu, flags); 59 } 60 61 static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data) 62 { 63 unsigned long flags; 64 65 a8xx_aperture_acquire(gpu, pipe, &flags); 66 gpu_write(gpu, offset, data); 67 a8xx_aperture_release(gpu, flags); 68 } 69 70 static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset) 71 { 72 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 73 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 74 unsigned long flags; 75 u32 val; 76 77 spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags); 78 a8xx_aperture_slice_set(gpu, pipe, slice); 79 val = gpu_read(gpu, offset); 80 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 81 82 return val; 83 } 84 85 void a8xx_gpu_get_slice_info(struct msm_gpu *gpu) 86 { 87 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 88 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 89 const struct a6xx_info *info = adreno_gpu->info->a6xx; 90 u32 slice_mask; 91 92 if (adreno_gpu->info->family < ADRENO_8XX_GEN1) 93 return; 94 95 if (a6xx_gpu->slice_mask) 96 return; 97 98 slice_mask = GENMASK(info->max_slices - 1, 0); 99 100 /* GEN1 doesn't support partial slice configurations */ 101 if (adreno_gpu->info->family == ADRENO_8XX_GEN1) { 102 a6xx_gpu->slice_mask = slice_mask; 103 return; 104 } 105 106 slice_mask &= a6xx_llc_read(a6xx_gpu, 107 REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL); 108 109 a6xx_gpu->slice_mask = slice_mask; 110 111 /* Chip ID depends on the number of slices available. So update it */ 112 adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask)); 113 } 114 115 static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu) 116 { 117 return ffs(a6xx_gpu->slice_mask) - 1; 118 } 119 120 static inline bool _a8xx_check_idle(struct msm_gpu *gpu) 121 { 122 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 123 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 124 125 /* Check that the GMU is idle */ 126 if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) 127 return false; 128 129 /* Check that the CX master is idle */ 130 if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) & 131 ~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 132 return false; 133 134 return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) & 135 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 136 } 137 138 static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 139 { 140 /* wait for CP to drain ringbuffer: */ 141 if (!adreno_idle(gpu, ring)) 142 return false; 143 144 if (spin_until(_a8xx_check_idle(gpu))) { 145 DRM_ERROR( 146 "%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 147 gpu->name, __builtin_return_address(0), 148 gpu_read(gpu, REG_A8XX_RBBM_STATUS), 149 gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS), 150 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 151 gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 152 return false; 153 } 154 155 return true; 156 } 157 158 void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 159 { 160 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 161 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 162 uint32_t wptr; 163 unsigned long flags; 164 165 spin_lock_irqsave(&ring->preempt_lock, flags); 166 167 /* Copy the shadow to the actual register */ 168 ring->cur = ring->next; 169 170 /* Make sure to wrap wptr if we need to */ 171 wptr = get_wptr(ring); 172 173 /* Update HW if this is the current ring and we are not in preempt*/ 174 if (!a6xx_in_preempt(a6xx_gpu)) { 175 if (a6xx_gpu->cur_ring == ring) 176 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); 177 else 178 ring->restore_wptr = true; 179 } else { 180 ring->restore_wptr = true; 181 } 182 183 spin_unlock_irqrestore(&ring->preempt_lock, flags); 184 } 185 186 static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state) 187 { 188 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 189 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 190 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 191 u32 val; 192 193 if (adreno_is_x285(adreno_gpu) && state) 194 gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702); 195 196 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 197 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 198 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 199 state ? 0x110111 : 0); 200 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 201 state ? 0x55555 : 0); 202 203 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 204 gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state); 205 206 if (state) { 207 gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1); 208 209 if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val, 210 val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 211 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 212 return; 213 } 214 215 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 216 } else { 217 /* 218 * GMU enables clk gating in GBIF during boot up. So, 219 * override that here when hwcg feature is disabled 220 */ 221 gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0); 222 } 223 } 224 225 static void a8xx_set_cp_protect(struct msm_gpu *gpu) 226 { 227 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 228 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 229 u32 cntl, final_cfg; 230 unsigned int i; 231 232 cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN | 233 A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN | 234 A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE | 235 A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK; 236 /* 237 * Enable access protection to privileged registers, fault on an access 238 * protect violation and select the last span to protect from the start 239 * address all the way to the end of the register address space 240 */ 241 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 242 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 243 244 a8xx_aperture_clear(gpu); 245 246 for (i = 0; i < protect->count; i++) { 247 /* Intentionally skip writing to some registers */ 248 if (protect->regs[i]) { 249 gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]); 250 final_cfg = protect->regs[i]; 251 } 252 } 253 254 /* 255 * Last span feature is only supported on PIPE specific register. 256 * So update those here 257 */ 258 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); 259 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); 260 261 a8xx_aperture_clear(gpu); 262 } 263 264 static void a8xx_set_ubwc_config(struct msm_gpu *gpu) 265 { 266 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 267 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; 268 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); 269 u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3); 270 bool rgba8888_lossless = false, fp16compoptdis = false; 271 bool yuvnotcomptofc = false, min_acc_len_64b = false; 272 bool rgb565_predicator = false, amsbc = false; 273 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); 274 u32 ubwc_version = cfg->ubwc_enc_version; 275 u32 hbb, hbb_hi, hbb_lo, mode = 1; 276 u8 uavflagprd_inv = 2; 277 278 switch (ubwc_version) { 279 case UBWC_5_0: 280 amsbc = true; 281 rgb565_predicator = true; 282 mode = 4; 283 break; 284 case UBWC_4_0: 285 amsbc = true; 286 rgb565_predicator = true; 287 fp16compoptdis = true; 288 rgba8888_lossless = true; 289 mode = 2; 290 break; 291 case UBWC_3_0: 292 amsbc = true; 293 mode = 1; 294 break; 295 default: 296 dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version); 297 break; 298 } 299 300 /* 301 * We subtract 13 from the highest bank bit (13 is the minimum value 302 * allowed by hw) and write the lowest two bits of the remaining value 303 * as hbb_lo and the one above it as hbb_hi to the hardware. 304 */ 305 WARN_ON(cfg->highest_bank_bit < 13); 306 hbb = cfg->highest_bank_bit - 13; 307 hbb_hi = hbb >> 2; 308 hbb_lo = hbb & 3; 309 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); 310 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); 311 312 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL, 313 yuvnotcomptofc << 6 | 314 hbb_hi << 3 | 315 hbb_lo << 1); 316 317 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL, 318 mode << 15 | 319 yuvnotcomptofc << 6 | 320 rgba8888_lossless << 4 | 321 fp16compoptdis << 3 | 322 rgb565_predicator << 2 | 323 amsbc << 1 | 324 min_acc_len_64b); 325 326 a8xx_aperture_clear(gpu); 327 328 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 329 level3_swizzling_dis << 13 | 330 level2_swizzling_dis << 12 | 331 hbb_hi << 10 | 332 uavflagprd_inv << 4 | 333 min_acc_len_64b << 3 | 334 hbb_lo << 1 | ubwc_mode); 335 336 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 337 level3_swizzling_dis << 7 | 338 level2_swizzling_dis << 6 | 339 hbb_hi << 4 | 340 min_acc_len_64b << 3 | 341 hbb_lo << 1 | ubwc_mode); 342 } 343 344 static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect) 345 { 346 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 347 const struct a6xx_info *info = adreno_gpu->info->a6xx; 348 const struct adreno_reglist_pipe *regs = info->nonctxt_reglist; 349 unsigned int pipe_id, i; 350 unsigned long flags; 351 352 for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 353 /* We don't have support for LPAC yet */ 354 if (pipe_id == PIPE_LPAC) 355 continue; 356 357 a8xx_aperture_acquire(gpu, pipe_id, &flags); 358 359 for (i = 0; regs[i].offset; i++) { 360 if (!(BIT(pipe_id) & regs[i].pipe)) 361 continue; 362 363 if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT) 364 *gmem_protect = regs[i].value; 365 366 gpu_write(gpu, regs[i].offset, regs[i].value); 367 } 368 369 a8xx_aperture_release(gpu, flags); 370 } 371 372 a8xx_aperture_clear(gpu); 373 } 374 375 static int a8xx_cp_init(struct msm_gpu *gpu) 376 { 377 struct msm_ringbuffer *ring = gpu->rb[0]; 378 u32 mask; 379 380 /* Disable concurrent binning before sending CP init */ 381 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 382 OUT_RING(ring, BIT(27)); 383 384 OUT_PKT7(ring, CP_ME_INIT, 4); 385 386 /* Use multiple HW contexts */ 387 mask = BIT(0); 388 389 /* Enable error detection */ 390 mask |= BIT(1); 391 392 /* Set default reset state */ 393 mask |= BIT(3); 394 395 /* Disable save/restore of performance counters across preemption */ 396 mask |= BIT(6); 397 398 OUT_RING(ring, mask); 399 400 /* Enable multiple hardware contexts */ 401 OUT_RING(ring, 0x00000003); 402 403 /* Enable error detection */ 404 OUT_RING(ring, 0x20000000); 405 406 /* Operation mode mask */ 407 OUT_RING(ring, 0x00000002); 408 409 a6xx_flush(gpu, ring); 410 return a8xx_idle(gpu, ring) ? 0 : -EINVAL; 411 } 412 413 #define A8XX_INT_MASK \ 414 (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 415 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 416 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 417 A6XX_RBBM_INT_0_MASK_CP_SW | \ 418 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 419 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 420 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 421 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 422 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 423 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 424 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 425 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 426 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 427 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 428 429 #define A8XX_APRIV_MASK \ 430 (A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \ 431 A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \ 432 A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \ 433 A8XX_CP_APRIV_CNTL_PIPE_RBRPWB) 434 435 #define A8XX_BR_APRIV_MASK \ 436 (A8XX_APRIV_MASK | \ 437 A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \ 438 A8XX_CP_APRIV_CNTL_PIPE_CDWRITE) 439 440 #define A8XX_CP_GLOBAL_INT_MASK \ 441 (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \ 442 A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \ 443 A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \ 444 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \ 445 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \ 446 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \ 447 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \ 448 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \ 449 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \ 450 A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \ 451 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \ 452 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \ 453 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \ 454 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV) 455 456 #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \ 457 (A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \ 458 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \ 459 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \ 460 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \ 461 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \ 462 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \ 463 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \ 464 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \ 465 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \ 466 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \ 467 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \ 468 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \ 469 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \ 470 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \ 471 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \ 472 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \ 473 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \ 474 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \ 475 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \ 476 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS) 477 478 #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \ 479 (A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \ 480 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \ 481 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \ 482 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \ 483 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \ 484 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \ 485 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \ 486 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \ 487 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \ 488 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR) 489 490 static int hw_init(struct msm_gpu *gpu) 491 { 492 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 493 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 494 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 495 unsigned int pipe_id, i; 496 u32 gmem_protect = 0; 497 u64 gmem_range_min; 498 int ret; 499 500 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 501 if (ret) 502 return ret; 503 504 /* Clear the cached value to force aperture configuration next time */ 505 a6xx_gpu->cached_aperture = UINT_MAX; 506 a8xx_aperture_clear(gpu); 507 508 /* Clear GBIF halt in case GX domain was not collapsed */ 509 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 510 gpu_read(gpu, REG_A6XX_GBIF_HALT); 511 512 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0); 513 gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT); 514 515 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 516 517 /* 518 * Disable the trusted memory range - we don't actually supported secure 519 * memory rendering at this point in time and we don't want to block off 520 * part of the virtual memory space. 521 */ 522 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 523 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 524 525 /* Make all blocks contribute to the GPU BUSY perf counter */ 526 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 527 528 /* Setup GMEM Range in UCHE */ 529 gmem_range_min = SZ_64M; 530 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 531 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min); 532 gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min); 533 534 /* Setup UCHE Trap region */ 535 gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 536 gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 537 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 538 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 539 540 /* Turn on performance counters */ 541 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1); 542 gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1); 543 544 /* Turn on the IFPC counter (countable 4 on XOCLK1) */ 545 gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1, 546 FIELD_PREP(GENMASK(7, 0), 0x4)); 547 548 /* Select CP0 to always count cycles */ 549 gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1); 550 551 a8xx_set_ubwc_config(gpu); 552 553 /* Set weights for bicubic filtering */ 554 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); 555 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); 556 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); 557 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); 558 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); 559 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000); 560 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8); 561 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc); 562 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb); 563 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0); 564 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b); 565 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d); 566 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412); 567 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a); 568 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05); 569 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e); 570 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001); 571 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa); 572 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7); 573 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7); 574 575 gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 576 577 a8xx_nonctxt_config(gpu, &gmem_protect); 578 579 /* Enable fault detection */ 580 gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff); 581 gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); 582 583 /* Set up the CX GMU counter 0 to count busy ticks */ 584 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 585 586 /* Enable the power counter */ 587 gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5)); 588 gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 589 590 /* Protect registers from the CP */ 591 a8xx_set_cp_protect(gpu); 592 593 /* Enable the GMEM save/restore feature for preemption */ 594 a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1); 595 596 for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { 597 u32 apriv_mask = A8XX_APRIV_MASK; 598 unsigned long flags; 599 600 if (pipe_id == PIPE_LPAC) 601 continue; 602 603 if (pipe_id == PIPE_BR) 604 apriv_mask = A8XX_BR_APRIV_MASK; 605 606 a8xx_aperture_acquire(gpu, pipe_id, &flags); 607 gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask); 608 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE, 609 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE); 610 gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE, 611 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE); 612 a8xx_aperture_release(gpu, flags); 613 } 614 615 a8xx_aperture_clear(gpu); 616 617 /* Enable interrupts */ 618 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK); 619 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK); 620 621 ret = adreno_hw_init(gpu); 622 if (ret) 623 goto out; 624 625 gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 626 if (a6xx_gpu->aqe_iova) 627 gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova); 628 629 /* Set the ringbuffer address */ 630 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 631 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 632 633 /* Configure the RPTR shadow if needed: */ 634 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0])); 635 gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr)); 636 637 for (i = 0; i < gpu->nr_rings; i++) 638 a6xx_gpu->shadow[i] = 0; 639 640 /* Always come up on rb 0 */ 641 a6xx_gpu->cur_ring = gpu->rb[0]; 642 643 for (i = 0; i < gpu->nr_rings; i++) 644 gpu->rb[i]->cur_ctx_seqno = 0; 645 646 /* Enable the SQE_to start the CP engine */ 647 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1); 648 649 ret = a8xx_cp_init(gpu); 650 if (ret) 651 goto out; 652 653 /* 654 * Try to load a zap shader into the secure world. If successful 655 * we can use the CP to switch out of secure mode. If not then we 656 * have no resource but to try to switch ourselves out manually. If we 657 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 658 * be blocked and a permissions violation will soon follow. 659 */ 660 ret = a6xx_zap_shader_init(gpu); 661 if (!ret) { 662 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 663 OUT_RING(gpu->rb[0], 0x00000000); 664 665 a6xx_flush(gpu, gpu->rb[0]); 666 if (!a8xx_idle(gpu, gpu->rb[0])) 667 return -EINVAL; 668 } else if (ret == -ENODEV) { 669 /* 670 * This device does not use zap shader (but print a warning 671 * just in case someone got their dt wrong.. hopefully they 672 * have a debug UART to realize the error of their ways... 673 * if you mess this up you are about to crash horribly) 674 */ 675 dev_warn_once(gpu->dev->dev, 676 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 677 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 678 ret = 0; 679 } else { 680 return ret; 681 } 682 683 /* 684 * GMEM_PROTECT register should be programmed after GPU is transitioned to 685 * non-secure mode 686 */ 687 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect); 688 WARN_ON(!gmem_protect); 689 a8xx_aperture_clear(gpu); 690 691 /* Enable hardware clockgating */ 692 a8xx_set_hwcg(gpu, true); 693 out: 694 /* 695 * Tell the GMU that we are done touching the GPU and it can start power 696 * management 697 */ 698 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 699 700 return ret; 701 } 702 703 int a8xx_hw_init(struct msm_gpu *gpu) 704 { 705 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 706 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 707 int ret; 708 709 mutex_lock(&a6xx_gpu->gmu.lock); 710 ret = hw_init(gpu); 711 mutex_unlock(&a6xx_gpu->gmu.lock); 712 713 return ret; 714 } 715 716 static void a8xx_dump(struct msm_gpu *gpu) 717 { 718 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS)); 719 adreno_dump(gpu); 720 } 721 722 void a8xx_recover(struct msm_gpu *gpu) 723 { 724 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 725 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 726 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 727 int active_submits; 728 729 adreno_dump_info(gpu); 730 731 if (hang_debug) 732 a8xx_dump(gpu); 733 734 /* 735 * To handle recovery specific sequences during the rpm suspend we are 736 * about to trigger 737 */ 738 a6xx_gpu->hung = true; 739 740 /* Halt SQE first */ 741 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3); 742 743 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 744 745 /* active_submit won't change until we make a submission */ 746 mutex_lock(&gpu->active_lock); 747 active_submits = gpu->active_submits; 748 749 /* 750 * Temporarily clear active_submits count to silence a WARN() in the 751 * runtime suspend cb 752 */ 753 gpu->active_submits = 0; 754 755 reinit_completion(&gmu->pd_gate); 756 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 757 dev_pm_genpd_synced_poweroff(gmu->cxpd); 758 759 /* Drop the rpm refcount from active submits */ 760 if (active_submits) 761 pm_runtime_put(&gpu->pdev->dev); 762 763 /* And the final one from recover worker */ 764 pm_runtime_put_sync(&gpu->pdev->dev); 765 766 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 767 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 768 769 dev_pm_genpd_remove_notifier(gmu->cxpd); 770 771 pm_runtime_use_autosuspend(&gpu->pdev->dev); 772 773 if (active_submits) 774 pm_runtime_get(&gpu->pdev->dev); 775 776 pm_runtime_get_sync(&gpu->pdev->dev); 777 778 gpu->active_submits = active_submits; 779 mutex_unlock(&gpu->active_lock); 780 781 msm_gpu_hw_init(gpu); 782 a6xx_gpu->hung = false; 783 } 784 785 static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 786 { 787 static const char * const uche_clients[] = { 788 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 789 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 790 "STCHE", 791 }; 792 static const char * const uche_clients_lpac[] = { 793 "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC", 794 }; 795 u32 val; 796 797 /* 798 * The source of the data depends on the mid ID read from FSYNR1. 799 * and the client ID read from the UCHE block 800 */ 801 val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF); 802 803 val &= GENMASK(6, 0); 804 805 /* mid=3 refers to BR or BV */ 806 if (mid == 3) { 807 if (val < ARRAY_SIZE(uche_clients)) 808 return uche_clients[val]; 809 else 810 return "UCHE"; 811 } 812 813 /* mid=8 refers to LPAC */ 814 if (mid == 8) { 815 if (val < ARRAY_SIZE(uche_clients_lpac)) 816 return uche_clients_lpac[val]; 817 else 818 return "UCHE_LPAC"; 819 } 820 821 return "Unknown"; 822 } 823 824 static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id) 825 { 826 switch (id) { 827 case 0x0: 828 return "CP"; 829 case 0x1: 830 return "UCHE: Unknown"; 831 case 0x2: 832 return "UCHE_LPAC: Unknown"; 833 case 0x3: 834 case 0x8: 835 return a8xx_uche_fault_block(gpu, id); 836 case 0x4: 837 return "CCU"; 838 case 0x5: 839 return "Flag cache"; 840 case 0x6: 841 return "PREFETCH"; 842 case 0x7: 843 return "GMU"; 844 case 0x9: 845 return "UCHE_HPAC"; 846 } 847 848 return "Unknown"; 849 } 850 851 int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 852 { 853 struct msm_gpu *gpu = arg; 854 struct adreno_smmu_fault_info *info = data; 855 const char *block = "unknown"; 856 857 u32 scratch[] = { 858 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)), 859 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)), 860 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)), 861 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)), 862 }; 863 864 if (info) 865 block = a8xx_fault_block(gpu, info->fsynr1 & 0xff); 866 867 return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 868 } 869 870 static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu) 871 { 872 u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL); 873 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 874 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 875 u32 slice = a8xx_get_first_slice(a6xx_gpu); 876 u32 hw_fault_mask = GENMASK(6, 0); 877 u32 sw_fault_mask = GENMASK(22, 16); 878 u32 pipe = 0; 879 880 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status); 881 882 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | 883 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR)) 884 pipe |= BIT(PIPE_BR); 885 886 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | 887 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV)) 888 pipe |= BIT(PIPE_BV); 889 890 if (!pipe) { 891 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n"); 892 goto out; 893 } 894 895 for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 896 if (!(BIT(pipe_id) & pipe)) 897 continue; 898 899 if (hw_fault_mask & status) { 900 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 901 REG_A8XX_CP_HW_FAULT_STATUS_PIPE); 902 dev_err_ratelimited(&gpu->pdev->dev, 903 "CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 904 } 905 906 if (sw_fault_mask & status) { 907 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 908 REG_A8XX_CP_INTERRUPT_STATUS_PIPE); 909 dev_err_ratelimited(&gpu->pdev->dev, 910 "CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 911 912 if (status & BIT(8)) { 913 a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1); 914 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 915 REG_A8XX_CP_SQE_STAT_DATA_PIPE); 916 dev_err_ratelimited(&gpu->pdev->dev, 917 "CP Opcode error, opcode=0x%x\n", status); 918 } 919 920 if (status & BIT(10)) { 921 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 922 REG_A8XX_CP_PROTECT_STATUS_PIPE); 923 dev_err_ratelimited(&gpu->pdev->dev, 924 "CP REG PROTECT error, status=0x%x\n", status); 925 } 926 } 927 } 928 929 out: 930 /* Turn off interrupts to avoid triggering recovery again */ 931 a8xx_aperture_clear(gpu); 932 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0); 933 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0); 934 935 kthread_queue_work(gpu->worker, &gpu->recover_work); 936 } 937 938 static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset) 939 { 940 gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset); 941 942 return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE); 943 } 944 945 static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset) 946 { 947 u64 lo, hi; 948 949 lo = gpu_periph_read(gpu, dbg_offset); 950 hi = gpu_periph_read(gpu, dbg_offset + 1); 951 952 return (hi << 32) | lo; 953 } 954 955 #define CP_PERIPH_IB1_BASE_LO 0x7005 956 #define CP_PERIPH_IB1_BASE_HI 0x7006 957 #define CP_PERIPH_IB1_SIZE 0x7007 958 #define CP_PERIPH_IB1_OFFSET 0x7008 959 #define CP_PERIPH_IB2_BASE_LO 0x7009 960 #define CP_PERIPH_IB2_BASE_HI 0x700a 961 #define CP_PERIPH_IB2_SIZE 0x700b 962 #define CP_PERIPH_IB2_OFFSET 0x700c 963 #define CP_PERIPH_IB3_BASE_LO 0x700d 964 #define CP_PERIPH_IB3_BASE_HI 0x700e 965 #define CP_PERIPH_IB3_SIZE 0x700f 966 #define CP_PERIPH_IB3_OFFSET 0x7010 967 968 static void a8xx_fault_detect_irq(struct msm_gpu *gpu) 969 { 970 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 971 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 972 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 973 unsigned long flags; 974 975 /* 976 * If stalled on SMMU fault, we could trip the GPU's hang detection, 977 * but the fault handler will trigger the devcore dump, and we want 978 * to otherwise resume normally rather than killing the submit, so 979 * just bail. 980 */ 981 if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT) 982 return; 983 984 /* 985 * Force the GPU to stay on until after we finish 986 * collecting information 987 */ 988 if (!adreno_has_gmu_wrapper(adreno_gpu)) 989 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); 990 991 DRM_DEV_ERROR(&gpu->pdev->dev, 992 "gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n", 993 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 994 gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS)); 995 996 a8xx_aperture_acquire(gpu, PIPE_BR, &flags); 997 998 DRM_DEV_ERROR(&gpu->pdev->dev, 999 "BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1000 gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS), 1001 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1002 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1003 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1004 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1005 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1006 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1007 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1008 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1009 1010 a8xx_aperture_release(gpu, flags); 1011 a8xx_aperture_acquire(gpu, PIPE_BV, &flags); 1012 1013 DRM_DEV_ERROR(&gpu->pdev->dev, 1014 "BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1015 gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS), 1016 gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV), 1017 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1018 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1019 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1020 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1021 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1022 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1023 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1024 1025 a8xx_aperture_release(gpu, flags); 1026 a8xx_aperture_clear(gpu); 1027 1028 /* Turn off the hangcheck timer to keep it from bothering us */ 1029 timer_delete(&gpu->hangcheck_timer); 1030 1031 kthread_queue_work(gpu->worker, &gpu->recover_work); 1032 } 1033 1034 static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1035 { 1036 u32 status; 1037 1038 status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS); 1039 gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0); 1040 1041 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1042 1043 /* 1044 * Ignore FASTBLEND violations, because the HW will silently fall back 1045 * to legacy blending. 1046 */ 1047 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1048 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1049 timer_delete(&gpu->hangcheck_timer); 1050 1051 kthread_queue_work(gpu->worker, &gpu->recover_work); 1052 } 1053 } 1054 1055 irqreturn_t a8xx_irq(struct msm_gpu *gpu) 1056 { 1057 struct msm_drm_private *priv = gpu->dev->dev_private; 1058 u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS); 1059 1060 gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status); 1061 1062 if (priv->disable_err_irq) 1063 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1064 1065 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1066 a8xx_fault_detect_irq(gpu); 1067 1068 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) { 1069 u32 rl0, rl1; 1070 1071 rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0); 1072 rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1); 1073 dev_err_ratelimited(&gpu->pdev->dev, 1074 "CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1); 1075 } 1076 1077 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1078 a8xx_cp_hw_err_irq(gpu); 1079 1080 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 1081 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 1082 1083 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1084 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 1085 1086 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1087 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 1088 1089 if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1090 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n"); 1091 1092 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1093 a8xx_sw_fuse_violation_irq(gpu); 1094 1095 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1096 msm_gpu_retire(gpu); 1097 a6xx_preempt_trigger(gpu); 1098 } 1099 1100 if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 1101 a6xx_preempt_irq(gpu); 1102 1103 return IRQ_HANDLED; 1104 } 1105 1106 void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1107 { 1108 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1109 struct msm_gpu *gpu = &adreno_gpu->base; 1110 1111 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 1112 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 1113 1114 gpu_scid &= GENMASK(5, 0); 1115 1116 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 1117 FIELD_PREP(GENMASK(29, 24), gpu_scid) | 1118 FIELD_PREP(GENMASK(23, 18), gpu_scid) | 1119 FIELD_PREP(GENMASK(17, 12), gpu_scid) | 1120 FIELD_PREP(GENMASK(11, 6), gpu_scid) | 1121 FIELD_PREP(GENMASK(5, 0), gpu_scid)); 1122 1123 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 1124 FIELD_PREP(GENMASK(27, 22), gpu_scid) | 1125 FIELD_PREP(GENMASK(21, 16), gpu_scid) | 1126 FIELD_PREP(GENMASK(15, 10), gpu_scid) | 1127 BIT(8)); 1128 } 1129 1130 llcc_slice_activate(a6xx_gpu->htw_llc_slice); 1131 } 1132 1133 #define GBIF_CLIENT_HALT_MASK BIT(0) 1134 #define GBIF_ARB_HALT_MASK BIT(1) 1135 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 1136 #define VBIF_RESET_ACK_MASK 0xF0 1137 #define GPR0_GBIF_HALT_REQUEST 0x1E0 1138 1139 void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 1140 { 1141 struct msm_gpu *gpu = &adreno_gpu->base; 1142 1143 if (gx_off) { 1144 /* Halt the gx side of GBIF */ 1145 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1); 1146 spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1); 1147 } 1148 1149 /* Halt new client requests on GBIF */ 1150 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 1151 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1152 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 1153 1154 /* Halt all AXI requests on GBIF */ 1155 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 1156 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1157 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 1158 1159 /* The GBIF halt needs to be explicitly cleared */ 1160 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 1161 } 1162 1163 int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1164 { 1165 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1166 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1167 1168 mutex_lock(&a6xx_gpu->gmu.lock); 1169 1170 /* Force the GPU power on so we can read this register */ 1171 a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 1172 1173 *value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER); 1174 1175 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 1176 1177 mutex_unlock(&a6xx_gpu->gmu.lock); 1178 1179 return 0; 1180 } 1181 1182 u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 1183 { 1184 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1185 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1186 u64 busy_cycles; 1187 1188 /* 19.2MHz */ 1189 *out_sample_rate = 19200000; 1190 1191 busy_cycles = gmu_read64(&a6xx_gpu->gmu, 1192 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 1193 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 1194 1195 return busy_cycles; 1196 } 1197 1198 bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 1199 { 1200 return true; 1201 } 1202