1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ 3 4 5 #include "msm_gem.h" 6 #include "msm_mmu.h" 7 #include "msm_gpu_trace.h" 8 #include "a6xx_gpu.h" 9 #include "a6xx_gmu.xml.h" 10 11 #include <linux/bitfield.h> 12 #include <linux/devfreq.h> 13 #include <linux/firmware/qcom/qcom_scm.h> 14 #include <linux/pm_domain.h> 15 #include <linux/soc/qcom/llcc-qcom.h> 16 17 #define GPU_PAS_ID 13 18 19 static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice) 20 { 21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 23 u32 val; 24 25 val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice); 26 27 if (a6xx_gpu->cached_aperture == val) 28 return; 29 30 gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val); 31 32 a6xx_gpu->cached_aperture = val; 33 } 34 35 static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags) 36 { 37 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 38 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 39 40 spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags); 41 42 a8xx_aperture_slice_set(gpu, pipe, 0); 43 } 44 45 static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags) 46 { 47 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 48 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 49 50 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 51 } 52 53 static void a8xx_aperture_clear(struct msm_gpu *gpu) 54 { 55 unsigned long flags; 56 57 a8xx_aperture_acquire(gpu, PIPE_NONE, &flags); 58 a8xx_aperture_release(gpu, flags); 59 } 60 61 static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data) 62 { 63 unsigned long flags; 64 65 a8xx_aperture_acquire(gpu, pipe, &flags); 66 gpu_write(gpu, offset, data); 67 a8xx_aperture_release(gpu, flags); 68 } 69 70 static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset) 71 { 72 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 73 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 74 unsigned long flags; 75 u32 val; 76 77 spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags); 78 a8xx_aperture_slice_set(gpu, pipe, slice); 79 val = gpu_read(gpu, offset); 80 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 81 82 return val; 83 } 84 85 void a8xx_gpu_get_slice_info(struct msm_gpu *gpu) 86 { 87 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 88 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 89 const struct a6xx_info *info = adreno_gpu->info->a6xx; 90 u32 slice_mask; 91 92 if (adreno_gpu->info->family < ADRENO_8XX_GEN1) 93 return; 94 95 if (a6xx_gpu->slice_mask) 96 return; 97 98 slice_mask = GENMASK(info->max_slices - 1, 0); 99 100 /* GEN1 doesn't support partial slice configurations */ 101 if (adreno_gpu->info->family == ADRENO_8XX_GEN1) { 102 a6xx_gpu->slice_mask = slice_mask; 103 return; 104 } 105 106 slice_mask &= a6xx_llc_read(a6xx_gpu, 107 REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL); 108 109 a6xx_gpu->slice_mask = slice_mask; 110 111 /* Chip ID depends on the number of slices available. So update it */ 112 adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask)); 113 } 114 115 static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu) 116 { 117 return ffs(a6xx_gpu->slice_mask) - 1; 118 } 119 120 static inline bool _a8xx_check_idle(struct msm_gpu *gpu) 121 { 122 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 123 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 124 125 /* Check that the GMU is idle */ 126 if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) 127 return false; 128 129 /* Check that the CX master is idle */ 130 if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) & 131 ~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 132 return false; 133 134 return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) & 135 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 136 } 137 138 static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 139 { 140 /* wait for CP to drain ringbuffer: */ 141 if (!adreno_idle(gpu, ring)) 142 return false; 143 144 if (spin_until(_a8xx_check_idle(gpu))) { 145 DRM_ERROR( 146 "%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 147 gpu->name, __builtin_return_address(0), 148 gpu_read(gpu, REG_A8XX_RBBM_STATUS), 149 gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS), 150 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 151 gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 152 return false; 153 } 154 155 return true; 156 } 157 158 void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 159 { 160 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 161 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 162 uint32_t wptr; 163 unsigned long flags; 164 165 spin_lock_irqsave(&ring->preempt_lock, flags); 166 167 /* Copy the shadow to the actual register */ 168 ring->cur = ring->next; 169 170 /* Make sure to wrap wptr if we need to */ 171 wptr = get_wptr(ring); 172 173 /* Update HW if this is the current ring and we are not in preempt*/ 174 if (!a6xx_in_preempt(a6xx_gpu)) { 175 if (a6xx_gpu->cur_ring == ring) 176 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); 177 else 178 ring->restore_wptr = true; 179 } else { 180 ring->restore_wptr = true; 181 } 182 183 spin_unlock_irqrestore(&ring->preempt_lock, flags); 184 } 185 186 static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state) 187 { 188 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 189 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 190 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 191 u32 val; 192 193 if (adreno_is_x285(adreno_gpu) && state) 194 gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702); 195 196 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 197 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 198 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 199 state ? 0x110111 : 0); 200 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 201 state ? 0x55555 : 0); 202 203 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 204 gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state); 205 206 if (state) { 207 gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1); 208 209 if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val, 210 val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 211 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 212 return; 213 } 214 215 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 216 } else { 217 /* 218 * GMU enables clk gating in GBIF during boot up. So, 219 * override that here when hwcg feature is disabled 220 */ 221 gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0); 222 } 223 } 224 225 static void a8xx_set_cp_protect(struct msm_gpu *gpu) 226 { 227 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 228 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 229 u32 cntl, final_cfg; 230 unsigned int i; 231 232 cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN | 233 A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN | 234 A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE | 235 A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK; 236 /* 237 * Enable access protection to privileged registers, fault on an access 238 * protect violation and select the last span to protect from the start 239 * address all the way to the end of the register address space 240 */ 241 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 242 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 243 244 a8xx_aperture_clear(gpu); 245 246 for (i = 0; i < protect->count; i++) { 247 /* Intentionally skip writing to some registers */ 248 if (protect->regs[i]) { 249 gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]); 250 final_cfg = protect->regs[i]; 251 } 252 } 253 254 /* 255 * Last span feature is only supported on PIPE specific register. 256 * So update those here 257 */ 258 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); 259 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); 260 261 a8xx_aperture_clear(gpu); 262 } 263 264 static void a8xx_set_ubwc_config(struct msm_gpu *gpu) 265 { 266 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 267 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; 268 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); 269 u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3); 270 bool rgba8888_lossless = false, fp16compoptdis = false; 271 bool yuvnotcomptofc = false, min_acc_len_64b = false; 272 bool rgb565_predicator = false, amsbc = false; 273 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); 274 u32 ubwc_version = cfg->ubwc_enc_version; 275 u32 hbb, hbb_hi, hbb_lo, mode = 1; 276 u8 uavflagprd_inv = 2; 277 278 switch (ubwc_version) { 279 case UBWC_6_0: 280 yuvnotcomptofc = true; 281 mode = 5; 282 break; 283 case UBWC_5_0: 284 amsbc = true; 285 rgb565_predicator = true; 286 mode = 4; 287 break; 288 case UBWC_4_0: 289 amsbc = true; 290 rgb565_predicator = true; 291 fp16compoptdis = true; 292 rgba8888_lossless = true; 293 mode = 2; 294 break; 295 case UBWC_3_0: 296 amsbc = true; 297 mode = 1; 298 break; 299 default: 300 dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version); 301 break; 302 } 303 304 /* 305 * We subtract 13 from the highest bank bit (13 is the minimum value 306 * allowed by hw) and write the lowest two bits of the remaining value 307 * as hbb_lo and the one above it as hbb_hi to the hardware. 308 */ 309 WARN_ON(cfg->highest_bank_bit < 13); 310 hbb = cfg->highest_bank_bit - 13; 311 hbb_hi = hbb >> 2; 312 hbb_lo = hbb & 3; 313 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); 314 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); 315 316 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL, 317 yuvnotcomptofc << 6 | 318 hbb_hi << 3 | 319 hbb_lo << 1); 320 321 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL, 322 mode << 15 | 323 yuvnotcomptofc << 6 | 324 rgba8888_lossless << 4 | 325 fp16compoptdis << 3 | 326 rgb565_predicator << 2 | 327 amsbc << 1 | 328 min_acc_len_64b); 329 330 a8xx_aperture_clear(gpu); 331 332 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 333 level3_swizzling_dis << 13 | 334 level2_swizzling_dis << 12 | 335 hbb_hi << 10 | 336 uavflagprd_inv << 4 | 337 min_acc_len_64b << 3 | 338 hbb_lo << 1 | ubwc_mode); 339 340 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 341 level3_swizzling_dis << 7 | 342 level2_swizzling_dis << 6 | 343 hbb_hi << 4 | 344 min_acc_len_64b << 3 | 345 hbb_lo << 1 | ubwc_mode); 346 } 347 348 static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect) 349 { 350 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 351 const struct a6xx_info *info = adreno_gpu->info->a6xx; 352 const struct adreno_reglist_pipe *regs = info->nonctxt_reglist; 353 unsigned int pipe_id, i; 354 unsigned long flags; 355 356 for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 357 /* We don't have support for LPAC yet */ 358 if (pipe_id == PIPE_LPAC) 359 continue; 360 361 a8xx_aperture_acquire(gpu, pipe_id, &flags); 362 363 for (i = 0; regs[i].offset; i++) { 364 if (!(BIT(pipe_id) & regs[i].pipe)) 365 continue; 366 367 if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT) 368 *gmem_protect = regs[i].value; 369 370 gpu_write(gpu, regs[i].offset, regs[i].value); 371 } 372 373 a8xx_aperture_release(gpu, flags); 374 } 375 376 a8xx_aperture_clear(gpu); 377 } 378 379 static int a8xx_cp_init(struct msm_gpu *gpu) 380 { 381 struct msm_ringbuffer *ring = gpu->rb[0]; 382 u32 mask; 383 384 /* Disable concurrent binning before sending CP init */ 385 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 386 OUT_RING(ring, BIT(27)); 387 388 OUT_PKT7(ring, CP_ME_INIT, 4); 389 390 /* Use multiple HW contexts */ 391 mask = BIT(0); 392 393 /* Enable error detection */ 394 mask |= BIT(1); 395 396 /* Set default reset state */ 397 mask |= BIT(3); 398 399 /* Disable save/restore of performance counters across preemption */ 400 mask |= BIT(6); 401 402 OUT_RING(ring, mask); 403 404 /* Enable multiple hardware contexts */ 405 OUT_RING(ring, 0x00000003); 406 407 /* Enable error detection */ 408 OUT_RING(ring, 0x20000000); 409 410 /* Operation mode mask */ 411 OUT_RING(ring, 0x00000002); 412 413 a6xx_flush(gpu, ring); 414 return a8xx_idle(gpu, ring) ? 0 : -EINVAL; 415 } 416 417 #define A8XX_INT_MASK \ 418 (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 419 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 420 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 421 A6XX_RBBM_INT_0_MASK_CP_SW | \ 422 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 423 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 424 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 425 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 426 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 427 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 428 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 429 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 430 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 431 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 432 433 #define A8XX_APRIV_MASK \ 434 (A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \ 435 A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \ 436 A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \ 437 A8XX_CP_APRIV_CNTL_PIPE_RBRPWB) 438 439 #define A8XX_BR_APRIV_MASK \ 440 (A8XX_APRIV_MASK | \ 441 A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \ 442 A8XX_CP_APRIV_CNTL_PIPE_CDWRITE) 443 444 #define A8XX_CP_GLOBAL_INT_MASK \ 445 (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \ 446 A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \ 447 A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \ 448 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \ 449 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \ 450 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \ 451 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \ 452 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \ 453 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \ 454 A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \ 455 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \ 456 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \ 457 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \ 458 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV) 459 460 #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \ 461 (A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \ 462 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \ 463 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \ 464 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \ 465 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \ 466 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \ 467 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \ 468 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \ 469 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \ 470 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \ 471 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \ 472 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \ 473 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \ 474 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \ 475 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \ 476 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \ 477 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \ 478 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \ 479 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \ 480 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS) 481 482 #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \ 483 (A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \ 484 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \ 485 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \ 486 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \ 487 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \ 488 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \ 489 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \ 490 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \ 491 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \ 492 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR) 493 494 static int hw_init(struct msm_gpu *gpu) 495 { 496 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 497 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 498 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 499 unsigned int pipe_id, i; 500 u32 gmem_protect = 0; 501 u64 gmem_range_min; 502 int ret; 503 504 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 505 if (ret) 506 return ret; 507 508 /* Clear the cached value to force aperture configuration next time */ 509 a6xx_gpu->cached_aperture = UINT_MAX; 510 a8xx_aperture_clear(gpu); 511 512 /* Clear GBIF halt in case GX domain was not collapsed */ 513 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 514 gpu_read(gpu, REG_A6XX_GBIF_HALT); 515 516 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0); 517 gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT); 518 519 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 520 521 /* 522 * Disable the trusted memory range - we don't actually supported secure 523 * memory rendering at this point in time and we don't want to block off 524 * part of the virtual memory space. 525 */ 526 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 527 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 528 529 /* Make all blocks contribute to the GPU BUSY perf counter */ 530 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 531 532 /* Setup GMEM Range in UCHE */ 533 gmem_range_min = SZ_64M; 534 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 535 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min); 536 gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min); 537 538 /* Setup UCHE Trap region */ 539 gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 540 gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 541 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 542 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 543 544 /* Turn on performance counters */ 545 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1); 546 gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1); 547 548 /* Turn on the IFPC counter (countable 4 on XOCLK1) */ 549 gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1, 550 FIELD_PREP(GENMASK(7, 0), 0x4)); 551 552 /* Select CP0 to always count cycles */ 553 gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1); 554 555 a8xx_set_ubwc_config(gpu); 556 557 /* Set weights for bicubic filtering */ 558 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); 559 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); 560 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); 561 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); 562 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); 563 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000); 564 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8); 565 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc); 566 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb); 567 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0); 568 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b); 569 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d); 570 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412); 571 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a); 572 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05); 573 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e); 574 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001); 575 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa); 576 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7); 577 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7); 578 579 gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 580 581 a8xx_nonctxt_config(gpu, &gmem_protect); 582 583 /* Enable fault detection */ 584 gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff); 585 gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); 586 587 /* Set up the CX GMU counter 0 to count busy ticks */ 588 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 589 590 /* Enable the power counter */ 591 gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5)); 592 gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 593 594 /* Protect registers from the CP */ 595 a8xx_set_cp_protect(gpu); 596 597 /* Enable the GMEM save/restore feature for preemption */ 598 a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1); 599 600 for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { 601 u32 apriv_mask = A8XX_APRIV_MASK; 602 unsigned long flags; 603 604 if (pipe_id == PIPE_LPAC) 605 continue; 606 607 if (pipe_id == PIPE_BR) 608 apriv_mask = A8XX_BR_APRIV_MASK; 609 610 a8xx_aperture_acquire(gpu, pipe_id, &flags); 611 gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask); 612 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE, 613 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE); 614 gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE, 615 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE); 616 a8xx_aperture_release(gpu, flags); 617 } 618 619 a8xx_aperture_clear(gpu); 620 621 /* Enable interrupts */ 622 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK); 623 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK); 624 625 ret = adreno_hw_init(gpu); 626 if (ret) 627 goto out; 628 629 gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 630 if (a6xx_gpu->aqe_iova) 631 gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova); 632 633 /* Set the ringbuffer address */ 634 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 635 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 636 637 /* Configure the RPTR shadow if needed: */ 638 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0])); 639 gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr)); 640 641 for (i = 0; i < gpu->nr_rings; i++) 642 a6xx_gpu->shadow[i] = 0; 643 644 /* Always come up on rb 0 */ 645 a6xx_gpu->cur_ring = gpu->rb[0]; 646 647 for (i = 0; i < gpu->nr_rings; i++) 648 gpu->rb[i]->cur_ctx_seqno = 0; 649 650 /* Enable the SQE_to start the CP engine */ 651 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1); 652 653 ret = a8xx_cp_init(gpu); 654 if (ret) 655 goto out; 656 657 /* 658 * Try to load a zap shader into the secure world. If successful 659 * we can use the CP to switch out of secure mode. If not then we 660 * have no resource but to try to switch ourselves out manually. If we 661 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 662 * be blocked and a permissions violation will soon follow. 663 */ 664 ret = a6xx_zap_shader_init(gpu); 665 if (!ret) { 666 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 667 OUT_RING(gpu->rb[0], 0x00000000); 668 669 a6xx_flush(gpu, gpu->rb[0]); 670 if (!a8xx_idle(gpu, gpu->rb[0])) 671 return -EINVAL; 672 } else if (ret == -ENODEV) { 673 /* 674 * This device does not use zap shader (but print a warning 675 * just in case someone got their dt wrong.. hopefully they 676 * have a debug UART to realize the error of their ways... 677 * if you mess this up you are about to crash horribly) 678 */ 679 dev_warn_once(gpu->dev->dev, 680 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 681 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 682 ret = 0; 683 } else { 684 return ret; 685 } 686 687 /* 688 * GMEM_PROTECT register should be programmed after GPU is transitioned to 689 * non-secure mode 690 */ 691 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect); 692 WARN_ON(!gmem_protect); 693 a8xx_aperture_clear(gpu); 694 695 /* Enable hardware clockgating */ 696 a8xx_set_hwcg(gpu, true); 697 out: 698 /* 699 * Tell the GMU that we are done touching the GPU and it can start power 700 * management 701 */ 702 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 703 704 return ret; 705 } 706 707 int a8xx_hw_init(struct msm_gpu *gpu) 708 { 709 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 710 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 711 int ret; 712 713 mutex_lock(&a6xx_gpu->gmu.lock); 714 ret = hw_init(gpu); 715 mutex_unlock(&a6xx_gpu->gmu.lock); 716 717 return ret; 718 } 719 720 static void a8xx_dump(struct msm_gpu *gpu) 721 { 722 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS)); 723 adreno_dump(gpu); 724 } 725 726 void a8xx_recover(struct msm_gpu *gpu) 727 { 728 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 729 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 730 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 731 int active_submits; 732 733 adreno_dump_info(gpu); 734 735 if (hang_debug) 736 a8xx_dump(gpu); 737 738 /* 739 * To handle recovery specific sequences during the rpm suspend we are 740 * about to trigger 741 */ 742 a6xx_gpu->hung = true; 743 744 /* Halt SQE first */ 745 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3); 746 747 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 748 749 /* active_submit won't change until we make a submission */ 750 mutex_lock(&gpu->active_lock); 751 active_submits = gpu->active_submits; 752 753 /* 754 * Temporarily clear active_submits count to silence a WARN() in the 755 * runtime suspend cb 756 */ 757 gpu->active_submits = 0; 758 759 reinit_completion(&gmu->pd_gate); 760 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 761 dev_pm_genpd_synced_poweroff(gmu->cxpd); 762 763 /* Drop the rpm refcount from active submits */ 764 if (active_submits) 765 pm_runtime_put(&gpu->pdev->dev); 766 767 /* And the final one from recover worker */ 768 pm_runtime_put_sync(&gpu->pdev->dev); 769 770 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 771 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 772 773 dev_pm_genpd_remove_notifier(gmu->cxpd); 774 775 pm_runtime_use_autosuspend(&gpu->pdev->dev); 776 777 if (active_submits) 778 pm_runtime_get(&gpu->pdev->dev); 779 780 pm_runtime_get_sync(&gpu->pdev->dev); 781 782 gpu->active_submits = active_submits; 783 mutex_unlock(&gpu->active_lock); 784 785 msm_gpu_hw_init(gpu); 786 a6xx_gpu->hung = false; 787 } 788 789 static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 790 { 791 static const char * const uche_clients[] = { 792 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 793 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 794 "STCHE", 795 }; 796 static const char * const uche_clients_lpac[] = { 797 "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC", 798 }; 799 u32 val; 800 801 /* 802 * The source of the data depends on the mid ID read from FSYNR1. 803 * and the client ID read from the UCHE block 804 */ 805 val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF); 806 807 val &= GENMASK(6, 0); 808 809 /* mid=3 refers to BR or BV */ 810 if (mid == 3) { 811 if (val < ARRAY_SIZE(uche_clients)) 812 return uche_clients[val]; 813 else 814 return "UCHE"; 815 } 816 817 /* mid=8 refers to LPAC */ 818 if (mid == 8) { 819 if (val < ARRAY_SIZE(uche_clients_lpac)) 820 return uche_clients_lpac[val]; 821 else 822 return "UCHE_LPAC"; 823 } 824 825 return "Unknown"; 826 } 827 828 static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id) 829 { 830 switch (id) { 831 case 0x0: 832 return "CP"; 833 case 0x1: 834 return "UCHE: Unknown"; 835 case 0x2: 836 return "UCHE_LPAC: Unknown"; 837 case 0x3: 838 case 0x8: 839 return a8xx_uche_fault_block(gpu, id); 840 case 0x4: 841 return "CCU"; 842 case 0x5: 843 return "Flag cache"; 844 case 0x6: 845 return "PREFETCH"; 846 case 0x7: 847 return "GMU"; 848 case 0x9: 849 return "UCHE_HPAC"; 850 } 851 852 return "Unknown"; 853 } 854 855 int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 856 { 857 struct msm_gpu *gpu = arg; 858 struct adreno_smmu_fault_info *info = data; 859 const char *block = "unknown"; 860 861 u32 scratch[] = { 862 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)), 863 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)), 864 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)), 865 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)), 866 }; 867 868 if (info) 869 block = a8xx_fault_block(gpu, info->fsynr1 & 0xff); 870 871 return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 872 } 873 874 static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu) 875 { 876 u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL); 877 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 878 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 879 u32 slice = a8xx_get_first_slice(a6xx_gpu); 880 u32 hw_fault_mask = GENMASK(6, 0); 881 u32 sw_fault_mask = GENMASK(22, 16); 882 u32 pipe = 0; 883 884 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status); 885 886 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | 887 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR)) 888 pipe |= BIT(PIPE_BR); 889 890 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | 891 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV)) 892 pipe |= BIT(PIPE_BV); 893 894 if (!pipe) { 895 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n"); 896 goto out; 897 } 898 899 for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 900 if (!(BIT(pipe_id) & pipe)) 901 continue; 902 903 if (hw_fault_mask & status) { 904 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 905 REG_A8XX_CP_HW_FAULT_STATUS_PIPE); 906 dev_err_ratelimited(&gpu->pdev->dev, 907 "CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 908 } 909 910 if (sw_fault_mask & status) { 911 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 912 REG_A8XX_CP_INTERRUPT_STATUS_PIPE); 913 dev_err_ratelimited(&gpu->pdev->dev, 914 "CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 915 916 if (status & BIT(8)) { 917 a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1); 918 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 919 REG_A8XX_CP_SQE_STAT_DATA_PIPE); 920 dev_err_ratelimited(&gpu->pdev->dev, 921 "CP Opcode error, opcode=0x%x\n", status); 922 } 923 924 if (status & BIT(10)) { 925 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 926 REG_A8XX_CP_PROTECT_STATUS_PIPE); 927 dev_err_ratelimited(&gpu->pdev->dev, 928 "CP REG PROTECT error, status=0x%x\n", status); 929 } 930 } 931 } 932 933 out: 934 /* Turn off interrupts to avoid triggering recovery again */ 935 a8xx_aperture_clear(gpu); 936 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0); 937 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0); 938 939 kthread_queue_work(gpu->worker, &gpu->recover_work); 940 } 941 942 static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset) 943 { 944 gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset); 945 946 return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE); 947 } 948 949 static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset) 950 { 951 u64 lo, hi; 952 953 lo = gpu_periph_read(gpu, dbg_offset); 954 hi = gpu_periph_read(gpu, dbg_offset + 1); 955 956 return (hi << 32) | lo; 957 } 958 959 #define CP_PERIPH_IB1_BASE_LO 0x7005 960 #define CP_PERIPH_IB1_BASE_HI 0x7006 961 #define CP_PERIPH_IB1_SIZE 0x7007 962 #define CP_PERIPH_IB1_OFFSET 0x7008 963 #define CP_PERIPH_IB2_BASE_LO 0x7009 964 #define CP_PERIPH_IB2_BASE_HI 0x700a 965 #define CP_PERIPH_IB2_SIZE 0x700b 966 #define CP_PERIPH_IB2_OFFSET 0x700c 967 #define CP_PERIPH_IB3_BASE_LO 0x700d 968 #define CP_PERIPH_IB3_BASE_HI 0x700e 969 #define CP_PERIPH_IB3_SIZE 0x700f 970 #define CP_PERIPH_IB3_OFFSET 0x7010 971 972 static void a8xx_fault_detect_irq(struct msm_gpu *gpu) 973 { 974 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 975 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 976 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 977 unsigned long flags; 978 979 /* 980 * If stalled on SMMU fault, we could trip the GPU's hang detection, 981 * but the fault handler will trigger the devcore dump, and we want 982 * to otherwise resume normally rather than killing the submit, so 983 * just bail. 984 */ 985 if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT) 986 return; 987 988 /* 989 * Force the GPU to stay on until after we finish 990 * collecting information 991 */ 992 if (!adreno_has_gmu_wrapper(adreno_gpu)) 993 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); 994 995 DRM_DEV_ERROR(&gpu->pdev->dev, 996 "gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n", 997 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 998 gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS)); 999 1000 a8xx_aperture_acquire(gpu, PIPE_BR, &flags); 1001 1002 DRM_DEV_ERROR(&gpu->pdev->dev, 1003 "BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1004 gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS), 1005 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1006 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1007 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1008 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1009 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1010 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1011 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1012 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1013 1014 a8xx_aperture_release(gpu, flags); 1015 a8xx_aperture_acquire(gpu, PIPE_BV, &flags); 1016 1017 DRM_DEV_ERROR(&gpu->pdev->dev, 1018 "BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1019 gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS), 1020 gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV), 1021 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1022 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1023 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1024 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1025 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1026 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1027 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1028 1029 a8xx_aperture_release(gpu, flags); 1030 a8xx_aperture_clear(gpu); 1031 1032 /* Turn off the hangcheck timer to keep it from bothering us */ 1033 timer_delete(&gpu->hangcheck_timer); 1034 1035 kthread_queue_work(gpu->worker, &gpu->recover_work); 1036 } 1037 1038 static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1039 { 1040 u32 status; 1041 1042 status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS); 1043 gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0); 1044 1045 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1046 1047 /* 1048 * Ignore FASTBLEND violations, because the HW will silently fall back 1049 * to legacy blending. 1050 */ 1051 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1052 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1053 timer_delete(&gpu->hangcheck_timer); 1054 1055 kthread_queue_work(gpu->worker, &gpu->recover_work); 1056 } 1057 } 1058 1059 irqreturn_t a8xx_irq(struct msm_gpu *gpu) 1060 { 1061 struct msm_drm_private *priv = gpu->dev->dev_private; 1062 u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS); 1063 1064 gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status); 1065 1066 if (priv->disable_err_irq) 1067 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1068 1069 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1070 a8xx_fault_detect_irq(gpu); 1071 1072 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) { 1073 u32 rl0, rl1; 1074 1075 rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0); 1076 rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1); 1077 dev_err_ratelimited(&gpu->pdev->dev, 1078 "CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1); 1079 } 1080 1081 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1082 a8xx_cp_hw_err_irq(gpu); 1083 1084 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 1085 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 1086 1087 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1088 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 1089 1090 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1091 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 1092 1093 if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1094 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n"); 1095 1096 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1097 a8xx_sw_fuse_violation_irq(gpu); 1098 1099 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1100 msm_gpu_retire(gpu); 1101 a6xx_preempt_trigger(gpu); 1102 } 1103 1104 if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 1105 a6xx_preempt_irq(gpu); 1106 1107 return IRQ_HANDLED; 1108 } 1109 1110 void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1111 { 1112 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1113 struct msm_gpu *gpu = &adreno_gpu->base; 1114 1115 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 1116 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 1117 1118 gpu_scid &= GENMASK(5, 0); 1119 1120 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 1121 FIELD_PREP(GENMASK(29, 24), gpu_scid) | 1122 FIELD_PREP(GENMASK(23, 18), gpu_scid) | 1123 FIELD_PREP(GENMASK(17, 12), gpu_scid) | 1124 FIELD_PREP(GENMASK(11, 6), gpu_scid) | 1125 FIELD_PREP(GENMASK(5, 0), gpu_scid)); 1126 1127 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 1128 FIELD_PREP(GENMASK(27, 22), gpu_scid) | 1129 FIELD_PREP(GENMASK(21, 16), gpu_scid) | 1130 FIELD_PREP(GENMASK(15, 10), gpu_scid) | 1131 BIT(8)); 1132 } 1133 1134 llcc_slice_activate(a6xx_gpu->htw_llc_slice); 1135 } 1136 1137 #define GBIF_CLIENT_HALT_MASK BIT(0) 1138 #define GBIF_ARB_HALT_MASK BIT(1) 1139 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 1140 #define VBIF_RESET_ACK_MASK 0xF0 1141 #define GPR0_GBIF_HALT_REQUEST 0x1E0 1142 1143 void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 1144 { 1145 struct msm_gpu *gpu = &adreno_gpu->base; 1146 1147 if (gx_off) { 1148 /* Halt the gx side of GBIF */ 1149 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1); 1150 spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1); 1151 } 1152 1153 /* Halt new client requests on GBIF */ 1154 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 1155 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1156 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 1157 1158 /* Halt all AXI requests on GBIF */ 1159 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 1160 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1161 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 1162 1163 /* The GBIF halt needs to be explicitly cleared */ 1164 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 1165 } 1166 1167 int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1168 { 1169 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1170 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1171 1172 mutex_lock(&a6xx_gpu->gmu.lock); 1173 1174 /* Force the GPU power on so we can read this register */ 1175 a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 1176 1177 *value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER); 1178 1179 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 1180 1181 mutex_unlock(&a6xx_gpu->gmu.lock); 1182 1183 return 0; 1184 } 1185 1186 u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 1187 { 1188 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1189 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1190 u64 busy_cycles; 1191 1192 /* 19.2MHz */ 1193 *out_sample_rate = 19200000; 1194 1195 busy_cycles = gmu_read64(&a6xx_gpu->gmu, 1196 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 1197 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 1198 1199 return busy_cycles; 1200 } 1201 1202 bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 1203 { 1204 return true; 1205 } 1206