1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ 3 4 5 #include "msm_gem.h" 6 #include "msm_mmu.h" 7 #include "msm_gpu_trace.h" 8 #include "a6xx_gpu.h" 9 #include "a6xx_gmu.xml.h" 10 11 #include <linux/bitfield.h> 12 #include <linux/devfreq.h> 13 #include <linux/firmware/qcom/qcom_scm.h> 14 #include <linux/pm_domain.h> 15 #include <linux/soc/qcom/llcc-qcom.h> 16 17 #define GPU_PAS_ID 13 18 19 static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice) 20 { 21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 23 u32 val; 24 25 val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice); 26 27 if (a6xx_gpu->cached_aperture == val) 28 return; 29 30 gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val); 31 32 a6xx_gpu->cached_aperture = val; 33 } 34 35 static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags) 36 { 37 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 38 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 39 40 spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags); 41 42 a8xx_aperture_slice_set(gpu, pipe, 0); 43 } 44 45 static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags) 46 { 47 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 48 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 49 50 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 51 } 52 53 static void a8xx_aperture_clear(struct msm_gpu *gpu) 54 { 55 unsigned long flags; 56 57 a8xx_aperture_acquire(gpu, PIPE_NONE, &flags); 58 a8xx_aperture_release(gpu, flags); 59 } 60 61 static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data) 62 { 63 unsigned long flags; 64 65 a8xx_aperture_acquire(gpu, pipe, &flags); 66 gpu_write(gpu, offset, data); 67 a8xx_aperture_release(gpu, flags); 68 } 69 70 static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset) 71 { 72 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 73 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 74 unsigned long flags; 75 u32 val; 76 77 spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags); 78 a8xx_aperture_slice_set(gpu, pipe, slice); 79 val = gpu_read(gpu, offset); 80 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 81 82 return val; 83 } 84 85 void a8xx_gpu_get_slice_info(struct msm_gpu *gpu) 86 { 87 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 88 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 89 const struct a6xx_info *info = adreno_gpu->info->a6xx; 90 u32 slice_mask; 91 92 if (adreno_gpu->info->family < ADRENO_8XX_GEN1) 93 return; 94 95 if (a6xx_gpu->slice_mask) 96 return; 97 98 slice_mask = GENMASK(info->max_slices - 1, 0); 99 100 /* GEN1 doesn't support partial slice configurations */ 101 if (adreno_gpu->info->family == ADRENO_8XX_GEN1) { 102 a6xx_gpu->slice_mask = slice_mask; 103 return; 104 } 105 106 slice_mask &= a6xx_llc_read(a6xx_gpu, 107 REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL); 108 109 a6xx_gpu->slice_mask = slice_mask; 110 111 /* Chip ID depends on the number of slices available. So update it */ 112 adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask)); 113 } 114 115 static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu) 116 { 117 return ffs(a6xx_gpu->slice_mask) - 1; 118 } 119 120 static inline bool _a8xx_check_idle(struct msm_gpu *gpu) 121 { 122 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 123 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 124 125 /* Check that the GMU is idle */ 126 if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) 127 return false; 128 129 /* Check that the CX master is idle */ 130 if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) & 131 ~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 132 return false; 133 134 return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) & 135 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 136 } 137 138 static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 139 { 140 /* wait for CP to drain ringbuffer: */ 141 if (!adreno_idle(gpu, ring)) 142 return false; 143 144 if (spin_until(_a8xx_check_idle(gpu))) { 145 DRM_ERROR( 146 "%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 147 gpu->name, __builtin_return_address(0), 148 gpu_read(gpu, REG_A8XX_RBBM_STATUS), 149 gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS), 150 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 151 gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 152 return false; 153 } 154 155 return true; 156 } 157 158 void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 159 { 160 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 161 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 162 uint32_t wptr; 163 unsigned long flags; 164 165 spin_lock_irqsave(&ring->preempt_lock, flags); 166 167 /* Copy the shadow to the actual register */ 168 ring->cur = ring->next; 169 170 /* Make sure to wrap wptr if we need to */ 171 wptr = get_wptr(ring); 172 173 /* Update HW if this is the current ring and we are not in preempt*/ 174 if (!a6xx_in_preempt(a6xx_gpu)) { 175 if (a6xx_gpu->cur_ring == ring) 176 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); 177 else 178 ring->restore_wptr = true; 179 } else { 180 ring->restore_wptr = true; 181 } 182 183 spin_unlock_irqrestore(&ring->preempt_lock, flags); 184 } 185 186 static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state) 187 { 188 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 189 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 190 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 191 u32 val; 192 193 if (adreno_is_x285(adreno_gpu) && state) 194 gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702); 195 196 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 197 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 198 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 199 state ? 0x110111 : 0); 200 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 201 state ? 0x55555 : 0); 202 203 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 204 gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state); 205 206 if (state) { 207 gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1); 208 209 if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val, 210 val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 211 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 212 return; 213 } 214 215 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 216 } else { 217 /* 218 * GMU enables clk gating in GBIF during boot up. So, 219 * override that here when hwcg feature is disabled 220 */ 221 gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0); 222 } 223 } 224 225 static void a8xx_set_cp_protect(struct msm_gpu *gpu) 226 { 227 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 228 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 229 u32 cntl, final_cfg; 230 unsigned int i; 231 232 cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN | 233 A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN | 234 A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE | 235 A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK; 236 /* 237 * Enable access protection to privileged registers, fault on an access 238 * protect violation and select the last span to protect from the start 239 * address all the way to the end of the register address space 240 */ 241 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 242 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 243 244 a8xx_aperture_clear(gpu); 245 246 for (i = 0; i < protect->count; i++) { 247 /* Intentionally skip writing to some registers */ 248 if (protect->regs[i]) { 249 gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]); 250 final_cfg = protect->regs[i]; 251 } 252 } 253 254 /* 255 * Last span feature is only supported on PIPE specific register. 256 * So update those here 257 */ 258 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); 259 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); 260 261 a8xx_aperture_clear(gpu); 262 } 263 264 static void a8xx_set_ubwc_config(struct msm_gpu *gpu) 265 { 266 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 267 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; 268 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); 269 u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3); 270 bool rgba8888_lossless = false, fp16compoptdis = false; 271 bool yuvnotcomptofc = false, min_acc_len_64b = false; 272 bool rgb565_predicator = false, amsbc = false; 273 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); 274 u32 ubwc_version = cfg->ubwc_enc_version; 275 u32 hbb, hbb_hi, hbb_lo, mode = 1; 276 u8 uavflagprd_inv = 2; 277 278 switch (ubwc_version) { 279 case UBWC_6_0: 280 yuvnotcomptofc = true; 281 mode = 5; 282 break; 283 case UBWC_5_0: 284 amsbc = true; 285 rgb565_predicator = true; 286 mode = 4; 287 break; 288 case UBWC_4_0: 289 amsbc = true; 290 rgb565_predicator = true; 291 fp16compoptdis = true; 292 rgba8888_lossless = true; 293 mode = 2; 294 break; 295 case UBWC_3_0: 296 amsbc = true; 297 mode = 1; 298 break; 299 default: 300 dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version); 301 break; 302 } 303 304 /* 305 * We subtract 13 from the highest bank bit (13 is the minimum value 306 * allowed by hw) and write the lowest two bits of the remaining value 307 * as hbb_lo and the one above it as hbb_hi to the hardware. 308 */ 309 WARN_ON(cfg->highest_bank_bit < 13); 310 hbb = cfg->highest_bank_bit - 13; 311 hbb_hi = hbb >> 2; 312 hbb_lo = hbb & 3; 313 314 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, 315 hbb << 5 | 316 level3_swizzling_dis << 4 | 317 level2_swizzling_dis << 3); 318 319 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, 320 hbb << 5 | 321 level3_swizzling_dis << 4 | 322 level2_swizzling_dis << 3); 323 324 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL, 325 yuvnotcomptofc << 6 | 326 level3_swizzling_dis << 5 | 327 level2_swizzling_dis << 4 | 328 hbb_hi << 3 | 329 hbb_lo << 1); 330 331 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL, 332 mode << 15 | 333 yuvnotcomptofc << 6 | 334 rgba8888_lossless << 4 | 335 fp16compoptdis << 3 | 336 rgb565_predicator << 2 | 337 amsbc << 1 | 338 min_acc_len_64b); 339 340 a8xx_aperture_clear(gpu); 341 342 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 343 level3_swizzling_dis << 13 | 344 level2_swizzling_dis << 12 | 345 hbb_hi << 10 | 346 uavflagprd_inv << 4 | 347 min_acc_len_64b << 3 | 348 hbb_lo << 1 | ubwc_mode); 349 350 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 351 level3_swizzling_dis << 7 | 352 level2_swizzling_dis << 6 | 353 hbb_hi << 4 | 354 min_acc_len_64b << 3 | 355 hbb_lo << 1 | ubwc_mode); 356 } 357 358 static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect) 359 { 360 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 361 const struct a6xx_info *info = adreno_gpu->info->a6xx; 362 const struct adreno_reglist_pipe *regs = info->nonctxt_reglist; 363 unsigned int pipe_id, i; 364 unsigned long flags; 365 366 for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 367 /* We don't have support for LPAC yet */ 368 if (pipe_id == PIPE_LPAC) 369 continue; 370 371 a8xx_aperture_acquire(gpu, pipe_id, &flags); 372 373 for (i = 0; regs[i].offset; i++) { 374 if (!(BIT(pipe_id) & regs[i].pipe)) 375 continue; 376 377 if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT) 378 *gmem_protect = regs[i].value; 379 380 gpu_write(gpu, regs[i].offset, regs[i].value); 381 } 382 383 a8xx_aperture_release(gpu, flags); 384 } 385 386 a8xx_aperture_clear(gpu); 387 } 388 389 static int a8xx_cp_init(struct msm_gpu *gpu) 390 { 391 struct msm_ringbuffer *ring = gpu->rb[0]; 392 u32 mask; 393 394 /* Disable concurrent binning before sending CP init */ 395 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 396 OUT_RING(ring, BIT(27)); 397 398 OUT_PKT7(ring, CP_ME_INIT, 4); 399 400 /* Use multiple HW contexts */ 401 mask = BIT(0); 402 403 /* Enable error detection */ 404 mask |= BIT(1); 405 406 /* Set default reset state */ 407 mask |= BIT(3); 408 409 /* Disable save/restore of performance counters across preemption */ 410 mask |= BIT(6); 411 412 OUT_RING(ring, mask); 413 414 /* Enable multiple hardware contexts */ 415 OUT_RING(ring, 0x00000003); 416 417 /* Enable error detection */ 418 OUT_RING(ring, 0x20000000); 419 420 /* Operation mode mask */ 421 OUT_RING(ring, 0x00000002); 422 423 a6xx_flush(gpu, ring); 424 return a8xx_idle(gpu, ring) ? 0 : -EINVAL; 425 } 426 427 #define A8XX_INT_MASK \ 428 (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 429 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 430 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 431 A6XX_RBBM_INT_0_MASK_CP_SW | \ 432 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 433 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 434 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 435 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 436 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 437 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 438 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 439 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 440 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 441 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 442 443 #define A8XX_APRIV_MASK \ 444 (A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \ 445 A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \ 446 A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \ 447 A8XX_CP_APRIV_CNTL_PIPE_RBRPWB) 448 449 #define A8XX_BR_APRIV_MASK \ 450 (A8XX_APRIV_MASK | \ 451 A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \ 452 A8XX_CP_APRIV_CNTL_PIPE_CDWRITE) 453 454 #define A8XX_CP_GLOBAL_INT_MASK \ 455 (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \ 456 A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \ 457 A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \ 458 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \ 459 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \ 460 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \ 461 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \ 462 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \ 463 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \ 464 A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \ 465 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \ 466 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \ 467 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \ 468 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV) 469 470 #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \ 471 (A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \ 472 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \ 473 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \ 474 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \ 475 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \ 476 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \ 477 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \ 478 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \ 479 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \ 480 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \ 481 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \ 482 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \ 483 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \ 484 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \ 485 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \ 486 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \ 487 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \ 488 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \ 489 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \ 490 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS) 491 492 #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \ 493 (A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \ 494 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \ 495 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \ 496 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \ 497 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \ 498 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \ 499 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \ 500 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \ 501 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \ 502 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR) 503 504 static int hw_init(struct msm_gpu *gpu) 505 { 506 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 507 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 508 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 509 unsigned int pipe_id, i; 510 u32 gmem_protect = 0; 511 u64 gmem_range_min; 512 int ret; 513 514 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 515 if (ret) 516 return ret; 517 518 /* Clear the cached value to force aperture configuration next time */ 519 a6xx_gpu->cached_aperture = UINT_MAX; 520 a8xx_aperture_clear(gpu); 521 522 /* Clear GBIF halt in case GX domain was not collapsed */ 523 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 524 gpu_read(gpu, REG_A6XX_GBIF_HALT); 525 526 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0); 527 gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT); 528 529 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 530 531 /* 532 * Disable the trusted memory range - we don't actually supported secure 533 * memory rendering at this point in time and we don't want to block off 534 * part of the virtual memory space. 535 */ 536 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 537 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 538 539 /* Make all blocks contribute to the GPU BUSY perf counter */ 540 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 541 542 /* Setup GMEM Range in UCHE */ 543 gmem_range_min = SZ_64M; 544 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 545 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min); 546 gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min); 547 548 /* Setup UCHE Trap region */ 549 gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 550 gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 551 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 552 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 553 554 /* Turn on performance counters */ 555 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1); 556 gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1); 557 558 /* Turn on the IFPC counter (countable 4 on XOCLK1) */ 559 gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1, 560 FIELD_PREP(GENMASK(7, 0), 0x4)); 561 562 /* Select CP0 to always count cycles */ 563 gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1); 564 565 a8xx_set_ubwc_config(gpu); 566 567 /* Set weights for bicubic filtering */ 568 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); 569 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); 570 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); 571 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); 572 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); 573 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000); 574 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8); 575 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc); 576 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb); 577 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0); 578 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b); 579 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d); 580 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412); 581 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a); 582 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05); 583 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e); 584 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001); 585 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa); 586 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7); 587 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7); 588 589 gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 590 591 a8xx_nonctxt_config(gpu, &gmem_protect); 592 593 /* Enable fault detection */ 594 gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff); 595 gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); 596 597 /* Set up the CX GMU counter 0 to count busy ticks */ 598 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 599 600 /* Enable the power counter */ 601 gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5)); 602 gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 603 604 /* Protect registers from the CP */ 605 a8xx_set_cp_protect(gpu); 606 607 /* Enable the GMEM save/restore feature for preemption */ 608 a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1); 609 610 for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { 611 u32 apriv_mask = A8XX_APRIV_MASK; 612 unsigned long flags; 613 614 if (pipe_id == PIPE_LPAC) 615 continue; 616 617 if (pipe_id == PIPE_BR) 618 apriv_mask = A8XX_BR_APRIV_MASK; 619 620 a8xx_aperture_acquire(gpu, pipe_id, &flags); 621 gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask); 622 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE, 623 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE); 624 gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE, 625 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE); 626 a8xx_aperture_release(gpu, flags); 627 } 628 629 a8xx_aperture_clear(gpu); 630 631 /* Enable interrupts */ 632 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK); 633 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK); 634 635 ret = adreno_hw_init(gpu); 636 if (ret) 637 goto out; 638 639 gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 640 if (a6xx_gpu->aqe_iova) 641 gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova); 642 643 /* Set the ringbuffer address */ 644 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 645 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 646 647 /* Configure the RPTR shadow if needed: */ 648 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0])); 649 gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr)); 650 651 for (i = 0; i < gpu->nr_rings; i++) 652 a6xx_gpu->shadow[i] = 0; 653 654 /* Always come up on rb 0 */ 655 a6xx_gpu->cur_ring = gpu->rb[0]; 656 657 for (i = 0; i < gpu->nr_rings; i++) 658 gpu->rb[i]->cur_ctx_seqno = 0; 659 660 /* Enable the SQE_to start the CP engine */ 661 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1); 662 663 ret = a8xx_cp_init(gpu); 664 if (ret) 665 goto out; 666 667 /* 668 * Try to load a zap shader into the secure world. If successful 669 * we can use the CP to switch out of secure mode. If not then we 670 * have no resource but to try to switch ourselves out manually. If we 671 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 672 * be blocked and a permissions violation will soon follow. 673 */ 674 ret = a6xx_zap_shader_init(gpu); 675 if (!ret) { 676 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 677 OUT_RING(gpu->rb[0], 0x00000000); 678 679 a6xx_flush(gpu, gpu->rb[0]); 680 if (!a8xx_idle(gpu, gpu->rb[0])) 681 return -EINVAL; 682 } else if (ret == -ENODEV) { 683 /* 684 * This device does not use zap shader (but print a warning 685 * just in case someone got their dt wrong.. hopefully they 686 * have a debug UART to realize the error of their ways... 687 * if you mess this up you are about to crash horribly) 688 */ 689 dev_warn_once(gpu->dev->dev, 690 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 691 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 692 ret = 0; 693 } else { 694 return ret; 695 } 696 697 /* 698 * GMEM_PROTECT register should be programmed after GPU is transitioned to 699 * non-secure mode 700 */ 701 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect); 702 WARN_ON(!gmem_protect); 703 a8xx_aperture_clear(gpu); 704 705 /* Enable hardware clockgating */ 706 a8xx_set_hwcg(gpu, true); 707 out: 708 /* 709 * Tell the GMU that we are done touching the GPU and it can start power 710 * management 711 */ 712 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 713 714 return ret; 715 } 716 717 int a8xx_hw_init(struct msm_gpu *gpu) 718 { 719 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 720 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 721 int ret; 722 723 mutex_lock(&a6xx_gpu->gmu.lock); 724 ret = hw_init(gpu); 725 mutex_unlock(&a6xx_gpu->gmu.lock); 726 727 return ret; 728 } 729 730 static void a8xx_dump(struct msm_gpu *gpu) 731 { 732 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS)); 733 adreno_dump(gpu); 734 } 735 736 void a8xx_recover(struct msm_gpu *gpu) 737 { 738 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 739 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 740 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 741 int active_submits; 742 743 adreno_dump_info(gpu); 744 745 if (hang_debug) 746 a8xx_dump(gpu); 747 748 /* 749 * To handle recovery specific sequences during the rpm suspend we are 750 * about to trigger 751 */ 752 a6xx_gpu->hung = true; 753 754 /* Halt SQE first */ 755 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3); 756 757 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 758 759 /* active_submit won't change until we make a submission */ 760 mutex_lock(&gpu->active_lock); 761 active_submits = gpu->active_submits; 762 763 /* 764 * Temporarily clear active_submits count to silence a WARN() in the 765 * runtime suspend cb 766 */ 767 gpu->active_submits = 0; 768 769 reinit_completion(&gmu->pd_gate); 770 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 771 dev_pm_genpd_synced_poweroff(gmu->cxpd); 772 773 /* Drop the rpm refcount from active submits */ 774 if (active_submits) 775 pm_runtime_put(&gpu->pdev->dev); 776 777 /* And the final one from recover worker */ 778 pm_runtime_put_sync(&gpu->pdev->dev); 779 780 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 781 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 782 783 dev_pm_genpd_remove_notifier(gmu->cxpd); 784 785 pm_runtime_use_autosuspend(&gpu->pdev->dev); 786 787 if (active_submits) 788 pm_runtime_get(&gpu->pdev->dev); 789 790 pm_runtime_get_sync(&gpu->pdev->dev); 791 792 gpu->active_submits = active_submits; 793 mutex_unlock(&gpu->active_lock); 794 795 msm_gpu_hw_init(gpu); 796 a6xx_gpu->hung = false; 797 } 798 799 static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 800 { 801 static const char * const uche_clients[] = { 802 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 803 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 804 "STCHE", 805 }; 806 static const char * const uche_clients_lpac[] = { 807 "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC", 808 }; 809 u32 val; 810 811 /* 812 * The source of the data depends on the mid ID read from FSYNR1. 813 * and the client ID read from the UCHE block 814 */ 815 val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF); 816 817 val &= GENMASK(6, 0); 818 819 /* mid=3 refers to BR or BV */ 820 if (mid == 3) { 821 if (val < ARRAY_SIZE(uche_clients)) 822 return uche_clients[val]; 823 else 824 return "UCHE"; 825 } 826 827 /* mid=8 refers to LPAC */ 828 if (mid == 8) { 829 if (val < ARRAY_SIZE(uche_clients_lpac)) 830 return uche_clients_lpac[val]; 831 else 832 return "UCHE_LPAC"; 833 } 834 835 return "Unknown"; 836 } 837 838 static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id) 839 { 840 switch (id) { 841 case 0x0: 842 return "CP"; 843 case 0x1: 844 return "UCHE: Unknown"; 845 case 0x2: 846 return "UCHE_LPAC: Unknown"; 847 case 0x3: 848 case 0x8: 849 return a8xx_uche_fault_block(gpu, id); 850 case 0x4: 851 return "CCU"; 852 case 0x5: 853 return "Flag cache"; 854 case 0x6: 855 return "PREFETCH"; 856 case 0x7: 857 return "GMU"; 858 case 0x9: 859 return "UCHE_HPAC"; 860 } 861 862 return "Unknown"; 863 } 864 865 int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 866 { 867 struct msm_gpu *gpu = arg; 868 struct adreno_smmu_fault_info *info = data; 869 const char *block = "unknown"; 870 871 u32 scratch[] = { 872 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)), 873 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)), 874 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)), 875 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)), 876 }; 877 878 if (info) 879 block = a8xx_fault_block(gpu, info->fsynr1 & 0xff); 880 881 return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 882 } 883 884 static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu) 885 { 886 u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL); 887 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 888 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 889 u32 slice = a8xx_get_first_slice(a6xx_gpu); 890 u32 hw_fault_mask = GENMASK(6, 0); 891 u32 sw_fault_mask = GENMASK(22, 16); 892 u32 pipe = 0; 893 894 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status); 895 896 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | 897 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR)) 898 pipe |= BIT(PIPE_BR); 899 900 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | 901 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV)) 902 pipe |= BIT(PIPE_BV); 903 904 if (!pipe) { 905 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n"); 906 goto out; 907 } 908 909 for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 910 if (!(BIT(pipe_id) & pipe)) 911 continue; 912 913 if (hw_fault_mask & status) { 914 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 915 REG_A8XX_CP_HW_FAULT_STATUS_PIPE); 916 dev_err_ratelimited(&gpu->pdev->dev, 917 "CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 918 } 919 920 if (sw_fault_mask & status) { 921 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 922 REG_A8XX_CP_INTERRUPT_STATUS_PIPE); 923 dev_err_ratelimited(&gpu->pdev->dev, 924 "CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 925 926 if (status & BIT(8)) { 927 a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1); 928 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 929 REG_A8XX_CP_SQE_STAT_DATA_PIPE); 930 dev_err_ratelimited(&gpu->pdev->dev, 931 "CP Opcode error, opcode=0x%x\n", status); 932 } 933 934 if (status & BIT(10)) { 935 status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 936 REG_A8XX_CP_PROTECT_STATUS_PIPE); 937 dev_err_ratelimited(&gpu->pdev->dev, 938 "CP REG PROTECT error, status=0x%x\n", status); 939 } 940 } 941 } 942 943 out: 944 /* Turn off interrupts to avoid triggering recovery again */ 945 a8xx_aperture_clear(gpu); 946 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0); 947 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0); 948 949 kthread_queue_work(gpu->worker, &gpu->recover_work); 950 } 951 952 static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset) 953 { 954 gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset); 955 956 return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE); 957 } 958 959 static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset) 960 { 961 u64 lo, hi; 962 963 lo = gpu_periph_read(gpu, dbg_offset); 964 hi = gpu_periph_read(gpu, dbg_offset + 1); 965 966 return (hi << 32) | lo; 967 } 968 969 #define CP_PERIPH_IB1_BASE_LO 0x7005 970 #define CP_PERIPH_IB1_BASE_HI 0x7006 971 #define CP_PERIPH_IB1_SIZE 0x7007 972 #define CP_PERIPH_IB1_OFFSET 0x7008 973 #define CP_PERIPH_IB2_BASE_LO 0x7009 974 #define CP_PERIPH_IB2_BASE_HI 0x700a 975 #define CP_PERIPH_IB2_SIZE 0x700b 976 #define CP_PERIPH_IB2_OFFSET 0x700c 977 #define CP_PERIPH_IB3_BASE_LO 0x700d 978 #define CP_PERIPH_IB3_BASE_HI 0x700e 979 #define CP_PERIPH_IB3_SIZE 0x700f 980 #define CP_PERIPH_IB3_OFFSET 0x7010 981 982 static void a8xx_fault_detect_irq(struct msm_gpu *gpu) 983 { 984 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 985 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 986 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 987 unsigned long flags; 988 989 /* 990 * If stalled on SMMU fault, we could trip the GPU's hang detection, 991 * but the fault handler will trigger the devcore dump, and we want 992 * to otherwise resume normally rather than killing the submit, so 993 * just bail. 994 */ 995 if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT) 996 return; 997 998 /* 999 * Force the GPU to stay on until after we finish 1000 * collecting information 1001 */ 1002 if (!adreno_has_gmu_wrapper(adreno_gpu)) 1003 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); 1004 1005 DRM_DEV_ERROR(&gpu->pdev->dev, 1006 "gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n", 1007 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 1008 gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS)); 1009 1010 a8xx_aperture_acquire(gpu, PIPE_BR, &flags); 1011 1012 DRM_DEV_ERROR(&gpu->pdev->dev, 1013 "BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1014 gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS), 1015 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1016 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1017 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1018 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1019 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1020 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1021 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1022 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1023 1024 a8xx_aperture_release(gpu, flags); 1025 a8xx_aperture_acquire(gpu, PIPE_BV, &flags); 1026 1027 DRM_DEV_ERROR(&gpu->pdev->dev, 1028 "BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1029 gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS), 1030 gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV), 1031 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1032 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1033 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1034 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1035 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1036 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1037 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1038 1039 a8xx_aperture_release(gpu, flags); 1040 a8xx_aperture_clear(gpu); 1041 1042 /* Turn off the hangcheck timer to keep it from bothering us */ 1043 timer_delete(&gpu->hangcheck_timer); 1044 1045 kthread_queue_work(gpu->worker, &gpu->recover_work); 1046 } 1047 1048 static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1049 { 1050 u32 status; 1051 1052 status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS); 1053 gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0); 1054 1055 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1056 1057 /* 1058 * Ignore FASTBLEND violations, because the HW will silently fall back 1059 * to legacy blending. 1060 */ 1061 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1062 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1063 timer_delete(&gpu->hangcheck_timer); 1064 1065 kthread_queue_work(gpu->worker, &gpu->recover_work); 1066 } 1067 } 1068 1069 irqreturn_t a8xx_irq(struct msm_gpu *gpu) 1070 { 1071 struct msm_drm_private *priv = gpu->dev->dev_private; 1072 u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS); 1073 1074 gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status); 1075 1076 if (priv->disable_err_irq) 1077 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1078 1079 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1080 a8xx_fault_detect_irq(gpu); 1081 1082 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) { 1083 u32 rl0, rl1; 1084 1085 rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0); 1086 rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1); 1087 dev_err_ratelimited(&gpu->pdev->dev, 1088 "CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1); 1089 } 1090 1091 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1092 a8xx_cp_hw_err_irq(gpu); 1093 1094 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 1095 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 1096 1097 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1098 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 1099 1100 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1101 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 1102 1103 if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1104 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n"); 1105 1106 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1107 a8xx_sw_fuse_violation_irq(gpu); 1108 1109 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1110 msm_gpu_retire(gpu); 1111 a6xx_preempt_trigger(gpu); 1112 } 1113 1114 if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 1115 a6xx_preempt_irq(gpu); 1116 1117 return IRQ_HANDLED; 1118 } 1119 1120 void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1121 { 1122 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1123 struct msm_gpu *gpu = &adreno_gpu->base; 1124 1125 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 1126 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 1127 1128 gpu_scid &= GENMASK(5, 0); 1129 1130 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 1131 FIELD_PREP(GENMASK(29, 24), gpu_scid) | 1132 FIELD_PREP(GENMASK(23, 18), gpu_scid) | 1133 FIELD_PREP(GENMASK(17, 12), gpu_scid) | 1134 FIELD_PREP(GENMASK(11, 6), gpu_scid) | 1135 FIELD_PREP(GENMASK(5, 0), gpu_scid)); 1136 1137 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 1138 FIELD_PREP(GENMASK(27, 22), gpu_scid) | 1139 FIELD_PREP(GENMASK(21, 16), gpu_scid) | 1140 FIELD_PREP(GENMASK(15, 10), gpu_scid) | 1141 BIT(8)); 1142 } 1143 1144 llcc_slice_activate(a6xx_gpu->htw_llc_slice); 1145 } 1146 1147 #define GBIF_CLIENT_HALT_MASK BIT(0) 1148 #define GBIF_ARB_HALT_MASK BIT(1) 1149 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 1150 #define VBIF_RESET_ACK_MASK 0xF0 1151 #define GPR0_GBIF_HALT_REQUEST 0x1E0 1152 1153 void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 1154 { 1155 struct msm_gpu *gpu = &adreno_gpu->base; 1156 1157 if (gx_off) { 1158 /* Halt the gx side of GBIF */ 1159 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1); 1160 spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1); 1161 } 1162 1163 /* Halt new client requests on GBIF */ 1164 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 1165 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1166 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 1167 1168 /* Halt all AXI requests on GBIF */ 1169 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 1170 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1171 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 1172 1173 /* The GBIF halt needs to be explicitly cleared */ 1174 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 1175 } 1176 1177 int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1178 { 1179 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1180 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1181 1182 mutex_lock(&a6xx_gpu->gmu.lock); 1183 1184 /* Force the GPU power on so we can read this register */ 1185 a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 1186 1187 *value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER); 1188 1189 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 1190 1191 mutex_unlock(&a6xx_gpu->gmu.lock); 1192 1193 return 0; 1194 } 1195 1196 u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 1197 { 1198 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1199 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1200 u64 busy_cycles; 1201 1202 /* 19.2MHz */ 1203 *out_sample_rate = 19200000; 1204 1205 busy_cycles = gmu_read64(&a6xx_gpu->gmu, 1206 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 1207 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 1208 1209 return busy_cycles; 1210 } 1211 1212 bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 1213 { 1214 return true; 1215 } 1216