1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */ 3 4 5 #include "msm_gem.h" 6 #include "msm_mmu.h" 7 #include "msm_gpu_trace.h" 8 #include "a6xx_gpu.h" 9 #include "a6xx_gmu.xml.h" 10 11 #include <linux/bitfield.h> 12 #include <linux/devfreq.h> 13 #include <linux/pm_domain.h> 14 #include <linux/soc/qcom/llcc-qcom.h> 15 16 #define GPU_PAS_ID 13 17 18 static u64 a6xx_gmu_get_timestamp(struct msm_gpu *gpu) 19 { 20 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 21 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 22 u64 count_hi, count_lo, temp; 23 24 do { 25 count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); 26 count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L); 27 temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); 28 } while (unlikely(count_hi != temp)); 29 30 return (count_hi << 32) | count_lo; 31 } 32 33 static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask) 34 { 35 /* Success if !writedropped0/1 */ 36 if (!(status & mask)) 37 return true; 38 39 udelay(10); 40 41 /* Try to update fenced register again */ 42 gpu_write(gpu, offset, value); 43 44 /* We can't do a posted write here because the power domain could be 45 * in collapse state. So use the heaviest barrier instead 46 */ 47 mb(); 48 return false; 49 } 50 51 static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask) 52 { 53 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 54 struct msm_gpu *gpu = &adreno_gpu->base; 55 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 56 u32 status; 57 58 gpu_write(gpu, offset, value); 59 60 /* Nothing else to be done in the case of no-GMU */ 61 if (adreno_has_gmu_wrapper(adreno_gpu)) 62 return 0; 63 64 /* We can't do a posted write here because the power domain could be 65 * in collapse state. So use the heaviest barrier instead 66 */ 67 mb(); 68 69 if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, 70 fence_status_check(gpu, offset, value, status, mask), 0, 1000)) 71 return 0; 72 73 /* Try again for another 1ms before failing */ 74 gpu_write(gpu, offset, value); 75 mb(); 76 77 if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, 78 fence_status_check(gpu, offset, value, status, mask), 0, 1000)) { 79 /* 80 * The 'delay' warning is here because the pause to print this 81 * warning will allow gpu to move to power collapse which 82 * defeats the purpose of continuous polling for 2 ms 83 */ 84 dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n", 85 offset); 86 return 0; 87 } 88 89 dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n", 90 offset); 91 92 return -ETIMEDOUT; 93 } 94 95 int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b) 96 { 97 int ret; 98 99 ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask); 100 if (ret) 101 return ret; 102 103 if (!is_64b) 104 return 0; 105 106 ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask); 107 108 return ret; 109 } 110 111 static inline bool _a6xx_check_idle(struct msm_gpu *gpu) 112 { 113 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 114 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 115 116 /* Check that the GMU is idle */ 117 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu)) 118 return false; 119 120 /* Check tha the CX master is idle */ 121 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) & 122 ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 123 return false; 124 125 return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) & 126 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 127 } 128 129 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 130 { 131 /* wait for CP to drain ringbuffer: */ 132 if (!adreno_idle(gpu, ring)) 133 return false; 134 135 if (spin_until(_a6xx_check_idle(gpu))) { 136 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 137 gpu->name, __builtin_return_address(0), 138 gpu_read(gpu, REG_A6XX_RBBM_STATUS), 139 gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS), 140 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 141 gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 142 return false; 143 } 144 145 return true; 146 } 147 148 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 149 { 150 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 151 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 152 153 /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */ 154 if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) { 155 OUT_PKT7(ring, CP_WHERE_AM_I, 2); 156 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring))); 157 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); 158 } 159 } 160 161 void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 162 { 163 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 164 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 165 uint32_t wptr; 166 unsigned long flags; 167 168 update_shadow_rptr(gpu, ring); 169 170 spin_lock_irqsave(&ring->preempt_lock, flags); 171 172 /* Copy the shadow to the actual register */ 173 ring->cur = ring->next; 174 175 /* Make sure to wrap wptr if we need to */ 176 wptr = get_wptr(ring); 177 178 /* Update HW if this is the current ring and we are not in preempt*/ 179 if (!a6xx_in_preempt(a6xx_gpu)) { 180 if (a6xx_gpu->cur_ring == ring) 181 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); 182 else 183 ring->restore_wptr = true; 184 } else { 185 ring->restore_wptr = true; 186 } 187 188 spin_unlock_irqrestore(&ring->preempt_lock, flags); 189 } 190 191 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, 192 u64 iova) 193 { 194 OUT_PKT7(ring, CP_REG_TO_MEM, 3); 195 OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) | 196 CP_REG_TO_MEM_0_CNT(2) | 197 CP_REG_TO_MEM_0_64B); 198 OUT_RING(ring, lower_32_bits(iova)); 199 OUT_RING(ring, upper_32_bits(iova)); 200 } 201 202 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, 203 struct msm_ringbuffer *ring, struct msm_gem_submit *submit) 204 { 205 bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; 206 struct msm_context *ctx = submit->queue->ctx; 207 struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx); 208 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 209 phys_addr_t ttbr; 210 u32 asid; 211 u64 memptr = rbmemptr(ring, ttbr0); 212 213 if (ctx->seqno == ring->cur_ctx_seqno) 214 return; 215 216 if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid)) 217 return; 218 219 if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) { 220 /* Wait for previous submit to complete before continuing: */ 221 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); 222 OUT_RING(ring, 0); 223 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 224 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 225 OUT_RING(ring, submit->seqno - 1); 226 227 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 228 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH); 229 230 /* Reset state used to synchronize BR and BV */ 231 OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); 232 OUT_RING(ring, 233 CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | 234 CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | 235 CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | 236 CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); 237 238 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 239 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH); 240 241 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 242 OUT_RING(ring, LRZ_FLUSH_INVALIDATE); 243 244 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 245 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 246 } 247 248 if (!sysprof) { 249 if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) { 250 /* Turn off protected mode to write to special registers */ 251 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 252 OUT_RING(ring, 0); 253 } 254 255 if (adreno_is_a8xx(adreno_gpu)) { 256 OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 257 OUT_RING(ring, 1); 258 OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1); 259 OUT_RING(ring, 1); 260 } else { 261 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 262 OUT_RING(ring, 1); 263 } 264 } 265 266 /* Execute the table update */ 267 OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4); 268 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr))); 269 270 OUT_RING(ring, 271 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) | 272 CP_SMMU_TABLE_UPDATE_1_ASID(asid)); 273 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0)); 274 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0)); 275 276 /* 277 * Write the new TTBR0 to the memstore. This is good for debugging. 278 * Needed for preemption 279 */ 280 OUT_PKT7(ring, CP_MEM_WRITE, 5); 281 OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr))); 282 OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr))); 283 OUT_RING(ring, lower_32_bits(ttbr)); 284 OUT_RING(ring, upper_32_bits(ttbr)); 285 OUT_RING(ring, ctx->seqno); 286 287 /* 288 * Sync both threads after switching pagetables and enable BR only 289 * to make sure BV doesn't race ahead while BR is still switching 290 * pagetables. 291 */ 292 if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) { 293 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 294 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 295 } 296 297 /* 298 * And finally, trigger a uche flush to be sure there isn't anything 299 * lingering in that part of the GPU 300 */ 301 302 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 303 OUT_RING(ring, CACHE_INVALIDATE); 304 305 if (!sysprof) { 306 u32 reg_status = adreno_is_a8xx(adreno_gpu) ? 307 REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS : 308 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS; 309 /* 310 * Wait for SRAM clear after the pgtable update, so the 311 * two can happen in parallel: 312 */ 313 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); 314 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); 315 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status)); 316 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0)); 317 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); 318 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); 319 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); 320 321 if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) { 322 /* Re-enable protected mode: */ 323 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 324 OUT_RING(ring, 1); 325 } 326 } 327 } 328 329 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 330 { 331 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 332 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 333 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 334 struct msm_ringbuffer *ring = submit->ring; 335 unsigned int i, ibs = 0; 336 337 adreno_check_and_reenable_stall(adreno_gpu); 338 339 a6xx_set_pagetable(a6xx_gpu, ring, submit); 340 341 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), 342 rbmemptr_stats(ring, index, cpcycles_start)); 343 344 /* 345 * For PM4 the GMU register offsets are calculated from the base of the 346 * GPU registers so we need to add 0x1a800 to the register value on A630 347 * to get the right value from PM4. 348 */ 349 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_CONTEXT, 350 rbmemptr_stats(ring, index, alwayson_start)); 351 352 /* Invalidate CCU depth and color */ 353 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 354 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH)); 355 356 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 357 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR)); 358 359 /* Submit the commands */ 360 for (i = 0; i < submit->nr_cmds; i++) { 361 switch (submit->cmd[i].type) { 362 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 363 break; 364 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 365 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) 366 break; 367 fallthrough; 368 case MSM_SUBMIT_CMD_BUF: 369 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 370 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 371 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 372 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 373 ibs++; 374 break; 375 } 376 377 /* 378 * Periodically update shadow-wptr if needed, so that we 379 * can see partial progress of submits with large # of 380 * cmds.. otherwise we could needlessly stall waiting for 381 * ringbuffer state, simply due to looking at a shadow 382 * rptr value that has not been updated 383 */ 384 if ((ibs % 32) == 0) 385 update_shadow_rptr(gpu, ring); 386 } 387 388 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), 389 rbmemptr_stats(ring, index, cpcycles_end)); 390 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_CONTEXT, 391 rbmemptr_stats(ring, index, alwayson_end)); 392 393 /* Write the fence to the scratch register */ 394 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); 395 OUT_RING(ring, submit->seqno); 396 397 /* 398 * Execute a CACHE_FLUSH_TS event. This will ensure that the 399 * timestamp is written to the memory and then triggers the interrupt 400 */ 401 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 402 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) | 403 CP_EVENT_WRITE_0_IRQ); 404 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 405 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 406 OUT_RING(ring, submit->seqno); 407 408 trace_msm_gpu_submit_flush(submit, adreno_gpu->funcs->get_timestamp(gpu)); 409 410 a6xx_flush(gpu, ring); 411 } 412 413 void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring, 414 struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue) 415 { 416 u64 preempt_postamble; 417 418 OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12); 419 420 OUT_RING(ring, SMMU_INFO); 421 /* don't save SMMU, we write the record from the kernel instead */ 422 OUT_RING(ring, 0); 423 OUT_RING(ring, 0); 424 425 /* privileged and non secure buffer save */ 426 OUT_RING(ring, NON_SECURE_SAVE_ADDR); 427 OUT_RING(ring, lower_32_bits( 428 a6xx_gpu->preempt_iova[ring->id])); 429 OUT_RING(ring, upper_32_bits( 430 a6xx_gpu->preempt_iova[ring->id])); 431 432 /* user context buffer save, seems to be unnused by fw */ 433 OUT_RING(ring, NON_PRIV_SAVE_ADDR); 434 OUT_RING(ring, 0); 435 OUT_RING(ring, 0); 436 437 OUT_RING(ring, COUNTER); 438 /* seems OK to set to 0 to disable it */ 439 OUT_RING(ring, 0); 440 OUT_RING(ring, 0); 441 442 /* Emit postamble to clear perfcounters */ 443 preempt_postamble = a6xx_gpu->preempt_postamble_iova; 444 445 OUT_PKT7(ring, CP_SET_AMBLE, 3); 446 OUT_RING(ring, lower_32_bits(preempt_postamble)); 447 OUT_RING(ring, upper_32_bits(preempt_postamble)); 448 OUT_RING(ring, CP_SET_AMBLE_2_DWORDS( 449 a6xx_gpu->preempt_postamble_len) | 450 CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE)); 451 } 452 453 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 454 { 455 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 456 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 457 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 458 struct msm_ringbuffer *ring = submit->ring; 459 u32 rbbm_perfctr_cp0, cp_always_on_context; 460 unsigned int i, ibs = 0; 461 462 adreno_check_and_reenable_stall(adreno_gpu); 463 464 /* 465 * Toggle concurrent binning for pagetable switch and set the thread to 466 * BR since only it can execute the pagetable switch packets. 467 */ 468 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 469 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 470 471 a6xx_set_pagetable(a6xx_gpu, ring, submit); 472 473 /* 474 * If preemption is enabled, then set the pseudo register for the save 475 * sequence 476 */ 477 if (gpu->nr_rings > 1) 478 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); 479 480 if (adreno_is_a8xx(adreno_gpu)) { 481 rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0); 482 cp_always_on_context = REG_A8XX_CP_ALWAYS_ON_CONTEXT; 483 } else { 484 rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0); 485 cp_always_on_context = REG_A6XX_CP_ALWAYS_ON_CONTEXT; 486 } 487 488 get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start)); 489 get_stats_counter(ring, cp_always_on_context, rbmemptr_stats(ring, index, alwayson_start)); 490 491 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 492 OUT_RING(ring, CP_SET_THREAD_BOTH); 493 494 OUT_PKT7(ring, CP_SET_MARKER, 1); 495 OUT_RING(ring, 0x101); /* IFPC disable */ 496 497 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { 498 OUT_PKT7(ring, CP_SET_MARKER, 1); 499 OUT_RING(ring, 0x00d); /* IB1LIST start */ 500 } 501 502 /* Submit the commands */ 503 for (i = 0; i < submit->nr_cmds; i++) { 504 switch (submit->cmd[i].type) { 505 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 506 break; 507 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 508 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) 509 break; 510 fallthrough; 511 case MSM_SUBMIT_CMD_BUF: 512 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 513 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 514 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 515 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 516 ibs++; 517 break; 518 } 519 520 /* 521 * Periodically update shadow-wptr if needed, so that we 522 * can see partial progress of submits with large # of 523 * cmds.. otherwise we could needlessly stall waiting for 524 * ringbuffer state, simply due to looking at a shadow 525 * rptr value that has not been updated 526 */ 527 if ((ibs % 32) == 0) 528 update_shadow_rptr(gpu, ring); 529 } 530 531 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { 532 OUT_PKT7(ring, CP_SET_MARKER, 1); 533 OUT_RING(ring, 0x00e); /* IB1LIST end */ 534 } 535 536 get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end)); 537 get_stats_counter(ring, cp_always_on_context, rbmemptr_stats(ring, index, alwayson_end)); 538 539 /* Write the fence to the scratch register */ 540 if (adreno_is_a8xx(adreno_gpu)) { 541 OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1); 542 OUT_RING(ring, submit->seqno); 543 } else { 544 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); 545 OUT_RING(ring, submit->seqno); 546 } 547 548 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 549 OUT_RING(ring, CP_SET_THREAD_BR); 550 551 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 552 OUT_RING(ring, CCU_INVALIDATE_DEPTH); 553 554 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 555 OUT_RING(ring, CCU_INVALIDATE_COLOR); 556 557 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 558 OUT_RING(ring, CP_SET_THREAD_BV); 559 560 /* 561 * Make sure the timestamp is committed once BV pipe is 562 * completely done with this submission. 563 */ 564 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 565 OUT_RING(ring, CACHE_CLEAN | BIT(27)); 566 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence))); 567 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); 568 OUT_RING(ring, submit->seqno); 569 570 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 571 OUT_RING(ring, CP_SET_THREAD_BR); 572 573 /* 574 * This makes sure that BR doesn't race ahead and commit 575 * timestamp to memstore while BV is still processing 576 * this submission. 577 */ 578 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); 579 OUT_RING(ring, 0); 580 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence))); 581 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); 582 OUT_RING(ring, submit->seqno); 583 584 a6xx_gpu->last_seqno[ring->id] = submit->seqno; 585 586 /* write the ringbuffer timestamp */ 587 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 588 OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27)); 589 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 590 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 591 OUT_RING(ring, submit->seqno); 592 593 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 594 OUT_RING(ring, CP_SET_THREAD_BOTH); 595 596 OUT_PKT7(ring, CP_SET_MARKER, 1); 597 OUT_RING(ring, 0x100); /* IFPC enable */ 598 599 /* If preemption is enabled */ 600 if (gpu->nr_rings > 1) { 601 /* Yield the floor on command completion */ 602 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 603 604 /* 605 * If dword[2:1] are non zero, they specify an address for 606 * the CP to write the value of dword[3] to on preemption 607 * complete. Write 0 to skip the write 608 */ 609 OUT_RING(ring, 0x00); 610 OUT_RING(ring, 0x00); 611 /* Data value - not used if the address above is 0 */ 612 OUT_RING(ring, 0x01); 613 /* generate interrupt on preemption completion */ 614 OUT_RING(ring, 0x00); 615 } 616 617 618 trace_msm_gpu_submit_flush(submit, adreno_gpu->funcs->get_timestamp(gpu)); 619 620 a6xx_flush(gpu, ring); 621 622 /* Check to see if we need to start preemption */ 623 if (adreno_is_a8xx(adreno_gpu)) 624 a8xx_preempt_trigger(gpu); 625 else 626 a6xx_preempt_trigger(gpu); 627 } 628 629 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) 630 { 631 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 632 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 633 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 634 const struct adreno_reglist *reg; 635 unsigned int i; 636 u32 cgc_delay, cgc_hyst; 637 u32 val, clock_cntl_on; 638 639 if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu))) 640 return; 641 642 if (adreno_is_a630(adreno_gpu)) 643 clock_cntl_on = 0x8aa8aa02; 644 else if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) 645 clock_cntl_on = 0xaaa8aa82; 646 else if (adreno_is_a702(adreno_gpu)) 647 clock_cntl_on = 0xaaaaaa82; 648 else 649 clock_cntl_on = 0x8aa8aa82; 650 651 if (adreno_is_a612(adreno_gpu)) 652 cgc_delay = 0x11; 653 else if (adreno_is_a615_family(adreno_gpu)) 654 cgc_delay = 0x111; 655 else 656 cgc_delay = 0x10111; 657 658 if (adreno_is_a612(adreno_gpu)) 659 cgc_hyst = 0x55; 660 else if (adreno_is_a615_family(adreno_gpu)) 661 cgc_hyst = 0x555; 662 else 663 cgc_hyst = 0x5555; 664 665 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 666 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 667 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 668 state ? cgc_delay : 0); 669 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 670 state ? cgc_hyst : 0); 671 672 if (!adreno_gpu->info->a6xx->hwcg) { 673 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 674 gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0); 675 676 if (state) { 677 gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1); 678 679 if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val, 680 val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 681 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 682 return; 683 } 684 685 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 686 } 687 688 return; 689 } 690 691 val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL); 692 693 /* Don't re-program the registers if they are already correct */ 694 if ((!state && !val) || (state && (val == clock_cntl_on))) 695 return; 696 697 /* Disable SP clock before programming HWCG registers */ 698 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) 699 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); 700 701 for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++) 702 gpu_write(gpu, reg->offset, state ? reg->value : 0); 703 704 /* Enable SP clock */ 705 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) 706 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); 707 708 gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0); 709 } 710 711 static void a6xx_set_cp_protect(struct msm_gpu *gpu) 712 { 713 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 714 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 715 unsigned i; 716 717 /* 718 * Enable access protection to privileged registers, fault on an access 719 * protect violation and select the last span to protect from the start 720 * address all the way to the end of the register address space 721 */ 722 gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 723 A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN | 724 A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN | 725 A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE); 726 727 for (i = 0; i < protect->count - 1; i++) { 728 /* Intentionally skip writing to some registers */ 729 if (protect->regs[i]) 730 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]); 731 } 732 /* last CP_PROTECT to have "infinite" length on the last entry */ 733 gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]); 734 } 735 736 static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu) 737 { 738 const struct qcom_ubwc_cfg_data *common_cfg; 739 struct qcom_ubwc_cfg_data *cfg = &gpu->_ubwc_config; 740 741 /* Inherit the common config and make some necessary fixups */ 742 common_cfg = qcom_ubwc_config_get_data(); 743 if (IS_ERR(common_cfg)) 744 return PTR_ERR(common_cfg); 745 746 /* Copy the data into the internal struct to drop the const qualifier (temporarily) */ 747 *cfg = *common_cfg; 748 749 /* Use common config as is for A8x */ 750 if (!adreno_is_a8xx(gpu)) { 751 cfg->ubwc_swizzle = 0x6; 752 cfg->highest_bank_bit = 15; 753 } 754 755 if (adreno_is_a610(gpu)) { 756 cfg->highest_bank_bit = 13; 757 cfg->ubwc_swizzle = 0x7; 758 } 759 760 if (adreno_is_a612(gpu)) 761 cfg->highest_bank_bit = 14; 762 763 if (adreno_is_a618(gpu)) 764 cfg->highest_bank_bit = 14; 765 766 if (adreno_is_a619(gpu)) 767 /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */ 768 cfg->highest_bank_bit = 13; 769 770 if (adreno_is_a619_holi(gpu)) 771 cfg->highest_bank_bit = 13; 772 773 if (adreno_is_a621(gpu)) 774 cfg->highest_bank_bit = 13; 775 776 if (adreno_is_a623(gpu)) 777 cfg->highest_bank_bit = 16; 778 779 if (adreno_is_a650(gpu) || 780 adreno_is_a660(gpu) || 781 adreno_is_a690(gpu) || 782 adreno_is_a730(gpu) || 783 adreno_is_a740_family(gpu)) { 784 /* TODO: get ddr type from bootloader and use 15 for LPDDR4 */ 785 cfg->highest_bank_bit = 16; 786 } 787 788 if (adreno_is_a663(gpu)) { 789 cfg->highest_bank_bit = 13; 790 cfg->ubwc_swizzle = 0x4; 791 } 792 793 if (adreno_is_7c3(gpu)) 794 cfg->highest_bank_bit = 14; 795 796 if (adreno_is_a702(gpu)) 797 cfg->highest_bank_bit = 14; 798 799 if (cfg->highest_bank_bit != common_cfg->highest_bank_bit) 800 DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n", 801 cfg->highest_bank_bit, common_cfg->highest_bank_bit); 802 803 if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle) 804 DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n", 805 cfg->ubwc_swizzle, common_cfg->ubwc_swizzle); 806 807 gpu->ubwc_config = &gpu->_ubwc_config; 808 809 return 0; 810 } 811 812 static void a6xx_set_ubwc_config(struct msm_gpu *gpu) 813 { 814 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 815 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; 816 /* 817 * We subtract 13 from the highest bank bit (13 is the minimum value 818 * allowed by hw) and write the lowest two bits of the remaining value 819 * as hbb_lo and the one above it as hbb_hi to the hardware. 820 */ 821 BUG_ON(cfg->highest_bank_bit < 13); 822 u32 hbb = cfg->highest_bank_bit - 13; 823 bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0; 824 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); 825 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); 826 bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0; 827 bool min_acc_len_64b = false; 828 u8 uavflagprd_inv = 0; 829 u32 hbb_hi = hbb >> 2; 830 u32 hbb_lo = hbb & 3; 831 832 if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu)) 833 uavflagprd_inv = 2; 834 835 if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) 836 min_acc_len_64b = true; 837 838 gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 839 level2_swizzling_dis << 12 | 840 rgb565_predicator << 11 | 841 hbb_hi << 10 | amsbc << 4 | 842 min_acc_len_64b << 3 | 843 hbb_lo << 1 | ubwc_mode); 844 845 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 846 level2_swizzling_dis << 6 | hbb_hi << 4 | 847 min_acc_len_64b << 3 | 848 hbb_lo << 1 | ubwc_mode); 849 850 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 851 level2_swizzling_dis << 12 | hbb_hi << 10 | 852 uavflagprd_inv << 4 | 853 min_acc_len_64b << 3 | 854 hbb_lo << 1 | ubwc_mode); 855 856 if (adreno_is_a7xx(adreno_gpu)) { 857 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) { 858 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST, 859 A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id)); 860 gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL, 861 FIELD_PREP(GENMASK(8, 5), hbb_lo)); 862 } 863 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST, 864 A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE)); 865 } 866 867 gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 868 min_acc_len_64b << 23 | hbb_lo << 21); 869 870 gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL, 871 cfg->macrotile_mode); 872 } 873 874 static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) 875 { 876 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 877 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 878 const struct adreno_reglist_list *reglist; 879 const struct adreno_reglist_pipe_list *dyn_pwrup_reglist; 880 void *ptr = a6xx_gpu->pwrup_reglist_ptr; 881 struct cpu_gpu_lock *lock = ptr; 882 u32 *dest = (u32 *)&lock->regs[0]; 883 u32 dyn_pwrup_reglist_count = 0; 884 int i; 885 886 lock->gpu_req = lock->cpu_req = lock->turn = 0; 887 888 reglist = adreno_gpu->info->a6xx->ifpc_reglist; 889 if (reglist) { 890 lock->ifpc_list_len = reglist->count; 891 892 /* 893 * For each entry in each of the lists, write the offset and the current 894 * register value into the GPU buffer 895 */ 896 for (i = 0; i < reglist->count; i++) { 897 *dest++ = reglist->regs[i]; 898 *dest++ = gpu_read(gpu, reglist->regs[i]); 899 } 900 } 901 902 reglist = adreno_gpu->info->a6xx->pwrup_reglist; 903 lock->preemption_list_len = reglist->count; 904 905 for (i = 0; i < reglist->count; i++) { 906 *dest++ = reglist->regs[i]; 907 *dest++ = gpu_read(gpu, reglist->regs[i]); 908 } 909 910 /* 911 * The overall register list is composed of 912 * 1. Static IFPC-only registers 913 * 2. Static IFPC + preemption registers 914 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) 915 * 916 * The first two lists are static. Size of these lists are stored as 917 * number of pairs in ifpc_list_len and preemption_list_len 918 * respectively. With concurrent binning, Some of the perfcounter 919 * registers being virtualized, CP needs to know the pipe id to program 920 * the aperture inorder to restore the same. Thus, third list is a 921 * dynamic list with triplets as 922 * (<aperture, shifted 12 bits> <address> <data>), and the length is 923 * stored as number for triplets in dynamic_list_len. 924 */ 925 dyn_pwrup_reglist = adreno_gpu->info->a6xx->dyn_pwrup_reglist; 926 if (dyn_pwrup_reglist) { 927 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) { 928 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST, 929 A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id)); 930 for (i = 0; i < dyn_pwrup_reglist->count; i++) { 931 if ((dyn_pwrup_reglist->regs[i].pipe & BIT(pipe_id)) == 0) 932 continue; 933 *dest++ = A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id); 934 *dest++ = dyn_pwrup_reglist->regs[i].offset; 935 *dest++ = gpu_read(gpu, dyn_pwrup_reglist->regs[i].offset); 936 dyn_pwrup_reglist_count++; 937 } 938 } 939 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST, 940 A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE)); 941 } 942 lock->dynamic_list_len = dyn_pwrup_reglist_count; 943 } 944 945 static int a7xx_preempt_start(struct msm_gpu *gpu) 946 { 947 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 948 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 949 struct msm_ringbuffer *ring = gpu->rb[0]; 950 951 if (gpu->nr_rings <= 1) 952 return 0; 953 954 /* Turn CP protection off */ 955 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 956 OUT_RING(ring, 0); 957 958 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); 959 960 /* Yield the floor on command completion */ 961 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 962 OUT_RING(ring, 0x00); 963 OUT_RING(ring, 0x00); 964 OUT_RING(ring, 0x00); 965 /* Generate interrupt on preemption completion */ 966 OUT_RING(ring, 0x00); 967 968 a6xx_flush(gpu, ring); 969 970 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 971 } 972 973 static int a6xx_cp_init(struct msm_gpu *gpu) 974 { 975 struct msm_ringbuffer *ring = gpu->rb[0]; 976 977 OUT_PKT7(ring, CP_ME_INIT, 8); 978 979 OUT_RING(ring, 0x0000002f); 980 981 /* Enable multiple hardware contexts */ 982 OUT_RING(ring, 0x00000003); 983 984 /* Enable error detection */ 985 OUT_RING(ring, 0x20000000); 986 987 /* Don't enable header dump */ 988 OUT_RING(ring, 0x00000000); 989 OUT_RING(ring, 0x00000000); 990 991 /* No workarounds enabled */ 992 OUT_RING(ring, 0x00000000); 993 994 /* Pad rest of the cmds with 0's */ 995 OUT_RING(ring, 0x00000000); 996 OUT_RING(ring, 0x00000000); 997 998 a6xx_flush(gpu, ring); 999 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 1000 } 1001 1002 static int a7xx_cp_init(struct msm_gpu *gpu) 1003 { 1004 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1005 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1006 struct msm_ringbuffer *ring = gpu->rb[0]; 1007 u32 mask; 1008 1009 /* Disable concurrent binning before sending CP init */ 1010 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 1011 OUT_RING(ring, BIT(27)); 1012 1013 OUT_PKT7(ring, CP_ME_INIT, 7); 1014 1015 /* Use multiple HW contexts */ 1016 mask = BIT(0); 1017 1018 /* Enable error detection */ 1019 mask |= BIT(1); 1020 1021 /* Set default reset state */ 1022 mask |= BIT(3); 1023 1024 /* Disable save/restore of performance counters across preemption */ 1025 mask |= BIT(6); 1026 1027 /* Enable the register init list with the spinlock */ 1028 mask |= BIT(8); 1029 1030 OUT_RING(ring, mask); 1031 1032 /* Enable multiple hardware contexts */ 1033 OUT_RING(ring, 0x00000003); 1034 1035 /* Enable error detection */ 1036 OUT_RING(ring, 0x20000000); 1037 1038 /* Operation mode mask */ 1039 OUT_RING(ring, 0x00000002); 1040 1041 /* *Don't* send a power up reg list for concurrent binning (TODO) */ 1042 /* Lo address */ 1043 OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); 1044 /* Hi address */ 1045 OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); 1046 /* BIT(31) set => read the regs from the list */ 1047 OUT_RING(ring, BIT(31)); 1048 1049 a6xx_flush(gpu, ring); 1050 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 1051 } 1052 1053 /* 1054 * Check that the microcode version is new enough to include several key 1055 * security fixes. Return true if the ucode is safe. 1056 */ 1057 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, 1058 struct drm_gem_object *obj) 1059 { 1060 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1061 struct msm_gpu *gpu = &adreno_gpu->base; 1062 const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE]; 1063 u32 *buf = msm_gem_get_vaddr(obj); 1064 bool ret = false; 1065 1066 if (IS_ERR(buf)) 1067 return false; 1068 1069 /* A7xx is safe! */ 1070 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) 1071 return true; 1072 1073 /* 1074 * Targets up to a640 (a618, a630 and a640) need to check for a 1075 * microcode version that is patched to support the whereami opcode or 1076 * one that is new enough to include it by default. 1077 * 1078 * a650 tier targets don't need whereami but still need to be 1079 * equal to or newer than 0.95 for other security fixes 1080 * 1081 * a660 targets have all the critical security fixes from the start 1082 */ 1083 if (!strcmp(sqe_name, "a630_sqe.fw")) { 1084 /* 1085 * If the lowest nibble is 0xa that is an indication that this 1086 * microcode has been patched. The actual version is in dword 1087 * [3] but we only care about the patchlevel which is the lowest 1088 * nibble of dword [3] 1089 * 1090 * Otherwise check that the firmware is greater than or equal 1091 * to 1.90 which was the first version that had this fix built 1092 * in 1093 */ 1094 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) || 1095 (buf[0] & 0xfff) >= 0x190) { 1096 a6xx_gpu->has_whereami = true; 1097 ret = true; 1098 goto out; 1099 } 1100 1101 DRM_DEV_ERROR(&gpu->pdev->dev, 1102 "a630 SQE ucode is too old. Have version %x need at least %x\n", 1103 buf[0] & 0xfff, 0x190); 1104 } else if (!strcmp(sqe_name, "a650_sqe.fw")) { 1105 if ((buf[0] & 0xfff) >= 0x095) { 1106 ret = true; 1107 goto out; 1108 } 1109 1110 DRM_DEV_ERROR(&gpu->pdev->dev, 1111 "a650 SQE ucode is too old. Have version %x need at least %x\n", 1112 buf[0] & 0xfff, 0x095); 1113 } else if (!strcmp(sqe_name, "a660_sqe.fw")) { 1114 ret = true; 1115 } else { 1116 DRM_DEV_ERROR(&gpu->pdev->dev, 1117 "unknown GPU, add it to a6xx_ucode_check_version()!!\n"); 1118 } 1119 out: 1120 msm_gem_put_vaddr(obj); 1121 return ret; 1122 } 1123 1124 static int a6xx_ucode_load(struct msm_gpu *gpu) 1125 { 1126 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1127 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1128 1129 if (!a6xx_gpu->sqe_bo) { 1130 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu, 1131 adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova); 1132 1133 if (IS_ERR(a6xx_gpu->sqe_bo)) { 1134 int ret = PTR_ERR(a6xx_gpu->sqe_bo); 1135 1136 a6xx_gpu->sqe_bo = NULL; 1137 DRM_DEV_ERROR(&gpu->pdev->dev, 1138 "Could not allocate SQE ucode: %d\n", ret); 1139 1140 return ret; 1141 } 1142 1143 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw"); 1144 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) { 1145 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm); 1146 drm_gem_object_put(a6xx_gpu->sqe_bo); 1147 1148 a6xx_gpu->sqe_bo = NULL; 1149 return -EPERM; 1150 } 1151 } 1152 1153 if (!a6xx_gpu->aqe_bo && adreno_gpu->fw[ADRENO_FW_AQE]) { 1154 a6xx_gpu->aqe_bo = adreno_fw_create_bo(gpu, 1155 adreno_gpu->fw[ADRENO_FW_AQE], &a6xx_gpu->aqe_iova); 1156 1157 if (IS_ERR(a6xx_gpu->aqe_bo)) { 1158 int ret = PTR_ERR(a6xx_gpu->aqe_bo); 1159 1160 a6xx_gpu->aqe_bo = NULL; 1161 DRM_DEV_ERROR(&gpu->pdev->dev, 1162 "Could not allocate AQE ucode: %d\n", ret); 1163 1164 return ret; 1165 } 1166 1167 msm_gem_object_set_name(a6xx_gpu->aqe_bo, "aqefw"); 1168 } 1169 1170 /* 1171 * Expanded APRIV and targets that support WHERE_AM_I both need a 1172 * privileged buffer to store the RPTR shadow 1173 */ 1174 if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) && 1175 !a6xx_gpu->shadow_bo) { 1176 a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, 1177 sizeof(u32) * gpu->nr_rings, 1178 MSM_BO_WC | MSM_BO_MAP_PRIV, 1179 gpu->vm, &a6xx_gpu->shadow_bo, 1180 &a6xx_gpu->shadow_iova); 1181 1182 if (IS_ERR(a6xx_gpu->shadow)) 1183 return PTR_ERR(a6xx_gpu->shadow); 1184 1185 msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow"); 1186 } 1187 1188 a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE, 1189 MSM_BO_WC | MSM_BO_MAP_PRIV, 1190 gpu->vm, &a6xx_gpu->pwrup_reglist_bo, 1191 &a6xx_gpu->pwrup_reglist_iova); 1192 1193 if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr)) 1194 return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr); 1195 1196 msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist"); 1197 1198 return 0; 1199 } 1200 1201 int a6xx_zap_shader_init(struct msm_gpu *gpu) 1202 { 1203 static bool loaded; 1204 int ret; 1205 1206 if (loaded) 1207 return 0; 1208 1209 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID); 1210 1211 loaded = !ret; 1212 return ret; 1213 } 1214 1215 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 1216 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 1217 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 1218 A6XX_RBBM_INT_0_MASK_CP_IB2 | \ 1219 A6XX_RBBM_INT_0_MASK_CP_IB1 | \ 1220 A6XX_RBBM_INT_0_MASK_CP_RB | \ 1221 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 1222 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 1223 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 1224 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 1225 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1226 1227 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 1228 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 1229 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 1230 A6XX_RBBM_INT_0_MASK_CP_SW | \ 1231 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 1232 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 1233 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 1234 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 1235 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 1236 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 1237 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 1238 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 1239 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 1240 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1241 1242 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ 1243 A6XX_CP_APRIV_CNTL_RBFETCH | \ 1244 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \ 1245 A6XX_CP_APRIV_CNTL_RBRPWB) 1246 1247 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \ 1248 A6XX_CP_APRIV_CNTL_CDREAD | \ 1249 A6XX_CP_APRIV_CNTL_CDWRITE) 1250 1251 static int hw_init(struct msm_gpu *gpu) 1252 { 1253 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1254 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1255 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1256 u64 gmem_range_min; 1257 unsigned int i; 1258 int ret; 1259 1260 if (!adreno_has_gmu_wrapper(adreno_gpu)) { 1261 /* Make sure the GMU keeps the GPU on while we set it up */ 1262 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 1263 if (ret) 1264 return ret; 1265 } 1266 1267 /* Clear GBIF halt in case GX domain was not collapsed */ 1268 if (adreno_is_a619_holi(adreno_gpu)) { 1269 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 1270 gpu_read(gpu, REG_A6XX_GBIF_HALT); 1271 1272 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0); 1273 gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL); 1274 } else if (a6xx_has_gbif(adreno_gpu)) { 1275 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 1276 gpu_read(gpu, REG_A6XX_GBIF_HALT); 1277 1278 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0); 1279 gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT); 1280 } 1281 1282 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 1283 1284 if (adreno_is_a619_holi(adreno_gpu)) 1285 a6xx_sptprac_enable(gmu); 1286 1287 /* 1288 * Disable the trusted memory range - we don't actually supported secure 1289 * memory rendering at this point in time and we don't want to block off 1290 * part of the virtual memory space. 1291 */ 1292 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 1293 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 1294 1295 if (!adreno_is_a7xx(adreno_gpu)) { 1296 /* Turn on 64 bit addressing for all blocks */ 1297 gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1); 1298 gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1); 1299 gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1); 1300 gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1); 1301 gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1); 1302 gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1); 1303 gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1); 1304 gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1); 1305 gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1); 1306 gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1); 1307 gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1); 1308 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); 1309 } 1310 1311 /* enable hardware clockgating */ 1312 a6xx_set_hwcg(gpu, true); 1313 1314 /* For gmuwrapper implementations, do the VBIF/GBIF CX configuration here */ 1315 if (adreno_is_a610_family(adreno_gpu)) { 1316 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620); 1317 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620); 1318 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620); 1319 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620); 1320 } 1321 1322 if (adreno_is_a610_family(adreno_gpu) || 1323 adreno_is_a640_family(adreno_gpu) || 1324 adreno_is_a650_family(adreno_gpu)) { 1325 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3); 1326 } else if (adreno_is_a7xx(adreno_gpu)) { 1327 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212); 1328 } else { 1329 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3); 1330 } 1331 1332 if (adreno_is_a630(adreno_gpu)) 1333 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); 1334 1335 if (adreno_is_a7xx(adreno_gpu)) 1336 gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0); 1337 1338 /* Make all blocks contribute to the GPU BUSY perf counter */ 1339 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 1340 1341 /* Disable L2 bypass in the UCHE */ 1342 if (adreno_is_a7xx(adreno_gpu)) { 1343 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 1344 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 1345 } else { 1346 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); 1347 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 1348 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 1349 } 1350 1351 if (!(adreno_is_a650_family(adreno_gpu) || 1352 adreno_is_a702(adreno_gpu) || 1353 adreno_is_a730(adreno_gpu))) { 1354 gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M; 1355 1356 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 1357 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min); 1358 1359 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX, 1360 gmem_range_min + adreno_gpu->info->gmem - 1); 1361 } 1362 1363 if (adreno_is_a7xx(adreno_gpu)) 1364 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23)); 1365 else { 1366 gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); 1367 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); 1368 } 1369 1370 if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) { 1371 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); 1372 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); 1373 } else if (adreno_is_a610_family(adreno_gpu)) { 1374 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060); 1375 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16); 1376 } else if (!adreno_is_a7xx(adreno_gpu)) { 1377 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0); 1378 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); 1379 } 1380 1381 if (adreno_is_a660_family(adreno_gpu)) 1382 gpu_write(gpu, REG_A7XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); 1383 1384 /* Setting the mem pool size */ 1385 if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) { 1386 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48); 1387 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47); 1388 } else if (adreno_is_a702(adreno_gpu)) { 1389 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64); 1390 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63); 1391 } else if (!adreno_is_a7xx(adreno_gpu)) 1392 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128); 1393 1394 1395 /* Set the default primFifo threshold values */ 1396 if (adreno_gpu->info->a6xx->prim_fifo_threshold) 1397 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 1398 adreno_gpu->info->a6xx->prim_fifo_threshold); 1399 1400 /* Set the AHB default slave response to "ERROR" */ 1401 gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1); 1402 1403 /* Turn on performance counters */ 1404 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1); 1405 1406 if (adreno_is_a7xx(adreno_gpu)) { 1407 /* Turn on the IFPC counter (countable 4 on XOCLK4) */ 1408 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1, 1409 FIELD_PREP(GENMASK(7, 0), 0x4)); 1410 } 1411 1412 /* Select CP0 to always count cycles */ 1413 gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT); 1414 1415 a6xx_set_ubwc_config(gpu); 1416 1417 /* Enable fault detection */ 1418 if (adreno_is_a612(adreno_gpu) || 1419 adreno_is_a730(adreno_gpu) || 1420 adreno_is_a740_family(adreno_gpu)) 1421 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff); 1422 else if (adreno_is_a690(adreno_gpu)) 1423 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff); 1424 else if (adreno_is_a619(adreno_gpu)) 1425 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff); 1426 else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) 1427 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff); 1428 else 1429 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff); 1430 1431 gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 1432 1433 /* Set weights for bicubic filtering */ 1434 if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { 1435 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); 1436 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 1437 0x3fe05ff4); 1438 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 1439 0x3fa0ebee); 1440 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 1441 0x3f5193ed); 1442 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 1443 0x3f0243f0); 1444 } 1445 1446 /* Set up the CX GMU counter 0 to count busy ticks */ 1447 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 1448 1449 /* Enable the power counter */ 1450 gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5)); 1451 gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 1452 1453 /* Protect registers from the CP */ 1454 a6xx_set_cp_protect(gpu); 1455 1456 if (adreno_is_a660_family(adreno_gpu)) { 1457 if (adreno_is_a690(adreno_gpu)) 1458 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801); 1459 else 1460 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); 1461 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); 1462 } else if (adreno_is_a702(adreno_gpu)) { 1463 /* Something to do with the HLSQ cluster */ 1464 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24)); 1465 } 1466 1467 if (adreno_is_a690(adreno_gpu)) 1468 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90); 1469 /* Set dualQ + disable afull for A660 GPU */ 1470 else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu)) 1471 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); 1472 else if (adreno_is_a7xx(adreno_gpu)) 1473 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 1474 FIELD_PREP(GENMASK(19, 16), 6) | 1475 FIELD_PREP(GENMASK(15, 12), 6) | 1476 FIELD_PREP(GENMASK(11, 8), 9) | 1477 BIT(3) | BIT(2) | 1478 FIELD_PREP(GENMASK(1, 0), 2)); 1479 1480 /* Enable expanded apriv for targets that support it */ 1481 if (gpu->hw_apriv) { 1482 if (adreno_is_a7xx(adreno_gpu)) { 1483 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, 1484 A7XX_BR_APRIVMASK); 1485 gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL, 1486 A7XX_APRIV_MASK); 1487 gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL, 1488 A7XX_APRIV_MASK); 1489 } else 1490 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, 1491 BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1)); 1492 } 1493 1494 if (adreno_is_a750(adreno_gpu)) { 1495 /* Disable ubwc merged UFC request feature */ 1496 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19)); 1497 1498 /* Enable TP flaghint and other performance settings */ 1499 gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700); 1500 } else if (adreno_is_a7xx(adreno_gpu)) { 1501 /* Disable non-ubwc read reqs from passing write reqs */ 1502 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11)); 1503 } 1504 1505 /* Enable interrupts */ 1506 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 1507 adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK); 1508 1509 ret = adreno_hw_init(gpu); 1510 if (ret) 1511 goto out; 1512 1513 gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 1514 1515 /* Set the ringbuffer address */ 1516 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 1517 1518 /* Targets that support extended APRIV can use the RPTR shadow from 1519 * hardware but all the other ones need to disable the feature. Targets 1520 * that support the WHERE_AM_I opcode can use that instead 1521 */ 1522 if (adreno_gpu->base.hw_apriv) 1523 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 1524 else 1525 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, 1526 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); 1527 1528 /* Configure the RPTR shadow if needed: */ 1529 if (a6xx_gpu->shadow_bo) { 1530 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, 1531 shadowptr(a6xx_gpu, gpu->rb[0])); 1532 for (unsigned int i = 0; i < gpu->nr_rings; i++) 1533 a6xx_gpu->shadow[i] = 0; 1534 } 1535 1536 /* ..which means "always" on A7xx, also for BV shadow */ 1537 if (adreno_is_a7xx(adreno_gpu)) { 1538 gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR, 1539 rbmemptr(gpu->rb[0], bv_rptr)); 1540 } 1541 1542 a6xx_preempt_hw_init(gpu); 1543 1544 /* Always come up on rb 0 */ 1545 a6xx_gpu->cur_ring = gpu->rb[0]; 1546 1547 for (i = 0; i < gpu->nr_rings; i++) 1548 gpu->rb[i]->cur_ctx_seqno = 0; 1549 1550 /* Enable the SQE_to start the CP engine */ 1551 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); 1552 1553 if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) { 1554 a7xx_patch_pwrup_reglist(gpu); 1555 a6xx_gpu->pwrup_reglist_emitted = true; 1556 } 1557 1558 ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu); 1559 if (ret) 1560 goto out; 1561 1562 /* 1563 * Try to load a zap shader into the secure world. If successful 1564 * we can use the CP to switch out of secure mode. If not then we 1565 * have no resource but to try to switch ourselves out manually. If we 1566 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 1567 * be blocked and a permissions violation will soon follow. 1568 */ 1569 ret = a6xx_zap_shader_init(gpu); 1570 if (!ret) { 1571 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 1572 OUT_RING(gpu->rb[0], 0x00000000); 1573 1574 a6xx_flush(gpu, gpu->rb[0]); 1575 if (!a6xx_idle(gpu, gpu->rb[0])) 1576 return -EINVAL; 1577 } else if (ret == -ENODEV) { 1578 /* 1579 * This device does not use zap shader (but print a warning 1580 * just in case someone got their dt wrong.. hopefully they 1581 * have a debug UART to realize the error of their ways... 1582 * if you mess this up you are about to crash horribly) 1583 */ 1584 dev_warn_once(gpu->dev->dev, 1585 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 1586 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 1587 ret = 0; 1588 } else { 1589 return ret; 1590 } 1591 1592 out: 1593 if (adreno_has_gmu_wrapper(adreno_gpu)) 1594 return ret; 1595 1596 /* Last step - yield the ringbuffer */ 1597 a7xx_preempt_start(gpu); 1598 1599 /* 1600 * Tell the GMU that we are done touching the GPU and it can start power 1601 * management 1602 */ 1603 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 1604 1605 if (a6xx_gpu->gmu.legacy) { 1606 /* Take the GMU out of its special boot mode */ 1607 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER); 1608 } 1609 1610 if (!ret && (refcount_read(&gpu->sysprof_active) > 1)) { 1611 ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); 1612 if (!ret) 1613 set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status); 1614 } 1615 1616 return ret; 1617 } 1618 1619 static int a6xx_hw_init(struct msm_gpu *gpu) 1620 { 1621 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1622 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1623 int ret; 1624 1625 mutex_lock(&a6xx_gpu->gmu.lock); 1626 ret = hw_init(gpu); 1627 mutex_unlock(&a6xx_gpu->gmu.lock); 1628 1629 return ret; 1630 } 1631 1632 static void a6xx_dump(struct msm_gpu *gpu) 1633 { 1634 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", 1635 gpu_read(gpu, REG_A6XX_RBBM_STATUS)); 1636 adreno_dump(gpu); 1637 } 1638 1639 static void a6xx_recover(struct msm_gpu *gpu) 1640 { 1641 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1642 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1643 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1644 int active_submits; 1645 1646 adreno_dump_info(gpu); 1647 1648 if (adreno_gpu->funcs->gx_is_on(adreno_gpu)) { 1649 /* Sometimes crashstate capture is skipped, so SQE should be halted here again */ 1650 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); 1651 1652 if (hang_debug) 1653 a6xx_dump(gpu); 1654 1655 } 1656 1657 /* 1658 * To handle recovery specific sequences during the rpm suspend we are 1659 * about to trigger 1660 */ 1661 1662 a6xx_gpu->hung = true; 1663 1664 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 1665 1666 /* active_submit won't change until we make a submission */ 1667 mutex_lock(&gpu->active_lock); 1668 active_submits = gpu->active_submits; 1669 1670 /* 1671 * Temporarily clear active_submits count to silence a WARN() in the 1672 * runtime suspend cb 1673 */ 1674 gpu->active_submits = 0; 1675 1676 if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) { 1677 /* Drain the outstanding traffic on memory buses */ 1678 adreno_gpu->funcs->bus_halt(adreno_gpu, true); 1679 1680 /* Reset the GPU to a clean state */ 1681 a6xx_gpu_sw_reset(gpu, true); 1682 a6xx_gpu_sw_reset(gpu, false); 1683 } 1684 1685 reinit_completion(&gmu->pd_gate); 1686 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 1687 dev_pm_genpd_synced_poweroff(gmu->cxpd); 1688 1689 /* Drop the rpm refcount from active submits */ 1690 if (active_submits) 1691 pm_runtime_put(&gpu->pdev->dev); 1692 1693 /* And the final one from recover worker */ 1694 pm_runtime_put_sync(&gpu->pdev->dev); 1695 1696 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 1697 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 1698 1699 dev_pm_genpd_remove_notifier(gmu->cxpd); 1700 1701 pm_runtime_use_autosuspend(&gpu->pdev->dev); 1702 1703 if (active_submits) 1704 pm_runtime_get(&gpu->pdev->dev); 1705 1706 pm_runtime_get_sync(&gpu->pdev->dev); 1707 1708 gpu->active_submits = active_submits; 1709 mutex_unlock(&gpu->active_lock); 1710 1711 msm_gpu_hw_init(gpu); 1712 a6xx_gpu->hung = false; 1713 } 1714 1715 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 1716 { 1717 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1718 static const char *uche_clients[7] = { 1719 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", 1720 }; 1721 u32 val; 1722 1723 if (adreno_is_a7xx(adreno_gpu)) { 1724 if (mid != 1 && mid != 2 && mid != 3 && mid != 8) 1725 return "UNKNOWN"; 1726 } else { 1727 if (mid < 1 || mid > 3) 1728 return "UNKNOWN"; 1729 } 1730 1731 /* 1732 * The source of the data depends on the mid ID read from FSYNR1. 1733 * and the client ID read from the UCHE block 1734 */ 1735 val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF); 1736 1737 if (adreno_is_a7xx(adreno_gpu)) { 1738 /* Bit 3 for mid=3 indicates BR or BV */ 1739 static const char *uche_clients_a7xx[16] = { 1740 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", 1741 "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 1742 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", 1743 "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 1744 }; 1745 1746 /* LPAC has the same clients as BR and BV, but because it is 1747 * compute-only some of them do not exist and there are holes 1748 * in the array. 1749 */ 1750 static const char *uche_clients_lpac_a7xx[8] = { 1751 "-", "LPAC_SP", "-", "-", 1752 "LPAC_HLSQ", "-", "-", "LPAC_TP", 1753 }; 1754 1755 val &= GENMASK(6, 0); 1756 1757 /* mid=3 refers to BR or BV */ 1758 if (mid == 3) { 1759 if (val < ARRAY_SIZE(uche_clients_a7xx)) 1760 return uche_clients_a7xx[val]; 1761 else 1762 return "UCHE"; 1763 } 1764 1765 /* mid=8 refers to LPAC */ 1766 if (mid == 8) { 1767 if (val < ARRAY_SIZE(uche_clients_lpac_a7xx)) 1768 return uche_clients_lpac_a7xx[val]; 1769 else 1770 return "UCHE_LPAC"; 1771 } 1772 1773 /* mid=2 is a catchall for everything else in LPAC */ 1774 if (mid == 2) 1775 return "UCHE_LPAC"; 1776 1777 /* mid=1 is a catchall for everything else in BR/BV */ 1778 return "UCHE"; 1779 } else if (adreno_is_a660_family(adreno_gpu)) { 1780 static const char *uche_clients_a660[8] = { 1781 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP", 1782 }; 1783 1784 static const char *uche_clients_a660_not[8] = { 1785 "not VFD", "not SP", "not VSC", "not VPC", 1786 "not HLSQ", "not PC", "not LRZ", "not TP", 1787 }; 1788 1789 val &= GENMASK(6, 0); 1790 1791 if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660)) 1792 return uche_clients_a660[val]; 1793 1794 if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not)) 1795 return uche_clients_a660_not[val]; 1796 1797 return "UCHE"; 1798 } else { 1799 /* mid = 3 is most precise and refers to only one block per client */ 1800 if (mid == 3) 1801 return uche_clients[val & 7]; 1802 1803 /* For mid=2 the source is TP or VFD except when the client id is 0 */ 1804 if (mid == 2) 1805 return ((val & 7) == 0) ? "TP" : "TP|VFD"; 1806 1807 /* For mid=1 just return "UCHE" as a catchall for everything else */ 1808 return "UCHE"; 1809 } 1810 } 1811 1812 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id) 1813 { 1814 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1815 1816 if (id == 0) 1817 return "CP"; 1818 else if (id == 4) 1819 return "CCU"; 1820 else if (id == 6) 1821 return "CDP Prefetch"; 1822 else if (id == 7) 1823 return "GMU"; 1824 else if (id == 5 && adreno_is_a7xx(adreno_gpu)) 1825 return "Flag cache"; 1826 1827 return a6xx_uche_fault_block(gpu, id); 1828 } 1829 1830 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 1831 { 1832 struct msm_gpu *gpu = arg; 1833 struct adreno_smmu_fault_info *info = data; 1834 const char *block = "unknown"; 1835 1836 u32 scratch[] = { 1837 gpu_read(gpu, REG_A6XX_CP_SCRATCH(4)), 1838 gpu_read(gpu, REG_A6XX_CP_SCRATCH(5)), 1839 gpu_read(gpu, REG_A6XX_CP_SCRATCH(6)), 1840 gpu_read(gpu, REG_A6XX_CP_SCRATCH(7)), 1841 }; 1842 1843 if (info) 1844 block = a6xx_fault_block(gpu, info->fsynr1 & 0xff); 1845 1846 return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 1847 } 1848 1849 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) 1850 { 1851 u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS); 1852 1853 if (status & A6XX_CP_INT_CP_OPCODE_ERROR) { 1854 u32 val; 1855 1856 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1); 1857 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA); 1858 dev_err_ratelimited(&gpu->pdev->dev, 1859 "CP | opcode error | possible opcode=0x%8.8X\n", 1860 val); 1861 } 1862 1863 if (status & A6XX_CP_INT_CP_UCODE_ERROR) 1864 dev_err_ratelimited(&gpu->pdev->dev, 1865 "CP ucode error interrupt\n"); 1866 1867 if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR) 1868 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n", 1869 gpu_read(gpu, REG_A6XX_CP_HW_FAULT)); 1870 1871 if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 1872 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS); 1873 1874 dev_err_ratelimited(&gpu->pdev->dev, 1875 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 1876 val & (1 << 20) ? "READ" : "WRITE", 1877 (val & 0x3ffff), val); 1878 } 1879 1880 if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu))) 1881 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n"); 1882 1883 if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR) 1884 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n"); 1885 1886 if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR) 1887 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n"); 1888 1889 } 1890 1891 static void a6xx_fault_detect_irq(struct msm_gpu *gpu) 1892 { 1893 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1894 1895 /* 1896 * If stalled on SMMU fault, we could trip the GPU's hang detection, 1897 * but the fault handler will trigger the devcore dump, and we want 1898 * to otherwise resume normally rather than killing the submit, so 1899 * just bail. 1900 */ 1901 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) 1902 return; 1903 1904 DRM_DEV_ERROR(&gpu->pdev->dev, 1905 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 1906 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 1907 gpu_read(gpu, REG_A6XX_RBBM_STATUS), 1908 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1909 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1910 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), 1911 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), 1912 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), 1913 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); 1914 1915 /* Turn off the hangcheck timer to keep it from bothering us */ 1916 timer_delete(&gpu->hangcheck_timer); 1917 1918 /* Turn off interrupts to avoid triggering recovery again */ 1919 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0); 1920 1921 kthread_queue_work(gpu->worker, &gpu->recover_work); 1922 } 1923 1924 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1925 { 1926 u32 status; 1927 1928 status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); 1929 gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); 1930 1931 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1932 1933 /* 1934 * Ignore FASTBLEND violations, because the HW will silently fall back 1935 * to legacy blending. 1936 */ 1937 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1938 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1939 timer_delete(&gpu->hangcheck_timer); 1940 1941 kthread_queue_work(gpu->worker, &gpu->recover_work); 1942 } 1943 } 1944 1945 static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on) 1946 { 1947 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1948 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1949 1950 if (adreno_has_gmu_wrapper(adreno_gpu)) 1951 return; 1952 1953 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on); 1954 } 1955 1956 static int irq_poll_fence(struct msm_gpu *gpu) 1957 { 1958 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1959 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1960 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1961 u32 status; 1962 1963 if (adreno_has_gmu_wrapper(adreno_gpu)) 1964 return 0; 1965 1966 if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) { 1967 u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS); 1968 1969 dev_err_ratelimited(&gpu->pdev->dev, 1970 "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n", 1971 status, rbbm_unmasked); 1972 return -ETIMEDOUT; 1973 } 1974 1975 return 0; 1976 } 1977 1978 static irqreturn_t a6xx_irq(struct msm_gpu *gpu) 1979 { 1980 struct msm_drm_private *priv = gpu->dev->dev_private; 1981 1982 /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */ 1983 a6xx_gpu_keepalive_vote(gpu, true); 1984 1985 if (irq_poll_fence(gpu)) 1986 goto done; 1987 1988 u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); 1989 1990 gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); 1991 1992 if (priv->disable_err_irq) 1993 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1994 1995 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1996 a6xx_fault_detect_irq(gpu); 1997 1998 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) 1999 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n"); 2000 2001 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 2002 a6xx_cp_hw_err_irq(gpu); 2003 2004 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 2005 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 2006 2007 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 2008 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 2009 2010 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 2011 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 2012 2013 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 2014 a7xx_sw_fuse_violation_irq(gpu); 2015 2016 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 2017 msm_gpu_retire(gpu); 2018 a6xx_preempt_trigger(gpu); 2019 } 2020 2021 if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 2022 a6xx_preempt_irq(gpu); 2023 2024 done: 2025 a6xx_gpu_keepalive_vote(gpu, false); 2026 2027 return IRQ_HANDLED; 2028 } 2029 2030 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu) 2031 { 2032 llcc_slice_deactivate(a6xx_gpu->llc_slice); 2033 llcc_slice_deactivate(a6xx_gpu->htw_llc_slice); 2034 } 2035 2036 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 2037 { 2038 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 2039 struct msm_gpu *gpu = &adreno_gpu->base; 2040 u32 cntl1_regval = 0; 2041 2042 if (IS_ERR(a6xx_gpu->llc_mmio)) 2043 return; 2044 2045 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 2046 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 2047 2048 gpu_scid &= 0x1f; 2049 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) | 2050 (gpu_scid << 15) | (gpu_scid << 20); 2051 2052 /* On A660, the SCID programming for UCHE traffic is done in 2053 * A6XX_GBIF_SCACHE_CNTL0[14:10] 2054 */ 2055 if (adreno_is_a660_family(adreno_gpu)) 2056 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | 2057 (1 << 8), (gpu_scid << 10) | (1 << 8)); 2058 } 2059 2060 /* 2061 * For targets with a MMU500, activate the slice but don't program the 2062 * register. The XBL will take care of that. 2063 */ 2064 if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { 2065 if (!a6xx_gpu->have_mmu500) { 2066 u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); 2067 2068 gpuhtw_scid &= 0x1f; 2069 cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); 2070 } 2071 } 2072 2073 if (!cntl1_regval) 2074 return; 2075 2076 /* 2077 * Program the slice IDs for the various GPU blocks and GPU MMU 2078 * pagetables 2079 */ 2080 if (!a6xx_gpu->have_mmu500) { 2081 a6xx_llc_write(a6xx_gpu, 2082 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); 2083 2084 /* 2085 * Program cacheability overrides to not allocate cache 2086 * lines on a write miss 2087 */ 2088 a6xx_llc_rmw(a6xx_gpu, 2089 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); 2090 return; 2091 } 2092 2093 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval); 2094 } 2095 2096 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 2097 { 2098 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 2099 struct msm_gpu *gpu = &adreno_gpu->base; 2100 2101 if (IS_ERR(a6xx_gpu->llc_mmio)) 2102 return; 2103 2104 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 2105 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 2106 2107 gpu_scid &= GENMASK(4, 0); 2108 2109 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 2110 FIELD_PREP(GENMASK(29, 25), gpu_scid) | 2111 FIELD_PREP(GENMASK(24, 20), gpu_scid) | 2112 FIELD_PREP(GENMASK(19, 15), gpu_scid) | 2113 FIELD_PREP(GENMASK(14, 10), gpu_scid) | 2114 FIELD_PREP(GENMASK(9, 5), gpu_scid) | 2115 FIELD_PREP(GENMASK(4, 0), gpu_scid)); 2116 2117 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 2118 FIELD_PREP(GENMASK(14, 10), gpu_scid) | 2119 BIT(8)); 2120 } 2121 2122 llcc_slice_activate(a6xx_gpu->htw_llc_slice); 2123 } 2124 2125 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) 2126 { 2127 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ 2128 if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) 2129 return; 2130 2131 llcc_slice_putd(a6xx_gpu->llc_slice); 2132 llcc_slice_putd(a6xx_gpu->htw_llc_slice); 2133 } 2134 2135 static void a6xx_llc_slices_init(struct platform_device *pdev, 2136 struct a6xx_gpu *a6xx_gpu, bool is_a7xx) 2137 { 2138 struct device_node *phandle; 2139 2140 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ 2141 if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) 2142 return; 2143 2144 /* 2145 * There is a different programming path for A6xx targets with an 2146 * mmu500 attached, so detect if that is the case 2147 */ 2148 phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); 2149 a6xx_gpu->have_mmu500 = (phandle && 2150 of_device_is_compatible(phandle, "arm,mmu-500")); 2151 of_node_put(phandle); 2152 2153 if (is_a7xx || !a6xx_gpu->have_mmu500) 2154 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem"); 2155 else 2156 a6xx_gpu->llc_mmio = NULL; 2157 2158 a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); 2159 a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); 2160 2161 if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) 2162 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); 2163 } 2164 2165 #define GBIF_CLIENT_HALT_MASK BIT(0) 2166 #define GBIF_ARB_HALT_MASK BIT(1) 2167 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 2168 #define VBIF_RESET_ACK_MASK 0xF0 2169 #define GPR0_GBIF_HALT_REQUEST 0x1E0 2170 2171 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 2172 { 2173 struct msm_gpu *gpu = &adreno_gpu->base; 2174 2175 if (adreno_is_a619_holi(adreno_gpu)) { 2176 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST); 2177 spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) & 2178 (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK); 2179 } else if (!a6xx_has_gbif(adreno_gpu)) { 2180 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK); 2181 spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 2182 (VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK); 2183 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); 2184 2185 return; 2186 } 2187 2188 if (gx_off) { 2189 /* Halt the gx side of GBIF */ 2190 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1); 2191 spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1); 2192 } 2193 2194 /* Halt new client requests on GBIF */ 2195 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 2196 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 2197 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 2198 2199 /* Halt all AXI requests on GBIF */ 2200 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 2201 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 2202 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 2203 2204 /* The GBIF halt needs to be explicitly cleared */ 2205 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 2206 } 2207 2208 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert) 2209 { 2210 /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */ 2211 if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu))) 2212 return; 2213 2214 gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert); 2215 /* Perform a bogus read and add a brief delay to ensure ordering. */ 2216 gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD); 2217 udelay(1); 2218 2219 /* The reset line needs to be asserted for at least 100 us */ 2220 if (assert) 2221 udelay(100); 2222 } 2223 2224 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu) 2225 { 2226 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2227 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2228 int ret; 2229 2230 gpu->needs_hw_init = true; 2231 2232 trace_msm_gpu_resume(0); 2233 2234 mutex_lock(&a6xx_gpu->gmu.lock); 2235 ret = a6xx_gmu_resume(a6xx_gpu); 2236 mutex_unlock(&a6xx_gpu->gmu.lock); 2237 if (ret) 2238 return ret; 2239 2240 msm_devfreq_resume(gpu); 2241 2242 if (adreno_is_a8xx(adreno_gpu)) 2243 a8xx_llc_activate(a6xx_gpu); 2244 else if (adreno_is_a7xx(adreno_gpu)) 2245 a7xx_llc_activate(a6xx_gpu); 2246 else 2247 a6xx_llc_activate(a6xx_gpu); 2248 2249 return ret; 2250 } 2251 2252 static int a6xx_pm_resume(struct msm_gpu *gpu) 2253 { 2254 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2255 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2256 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 2257 unsigned long freq = gpu->fast_rate; 2258 struct dev_pm_opp *opp; 2259 int ret; 2260 2261 gpu->needs_hw_init = true; 2262 2263 trace_msm_gpu_resume(0); 2264 2265 mutex_lock(&a6xx_gpu->gmu.lock); 2266 2267 opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq); 2268 if (IS_ERR(opp)) { 2269 ret = PTR_ERR(opp); 2270 goto err_set_opp; 2271 } 2272 dev_pm_opp_put(opp); 2273 2274 /* Set the core clock and bus bw, having VDD scaling in mind */ 2275 dev_pm_opp_set_opp(&gpu->pdev->dev, opp); 2276 2277 pm_runtime_resume_and_get(gmu->dev); 2278 pm_runtime_resume_and_get(gmu->gxpd); 2279 2280 ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks); 2281 if (ret) 2282 goto err_bulk_clk; 2283 2284 ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks); 2285 if (ret) { 2286 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); 2287 goto err_bulk_clk; 2288 } 2289 2290 if (adreno_is_a619_holi(adreno_gpu)) 2291 a6xx_sptprac_enable(gmu); 2292 2293 /* If anything goes south, tear the GPU down piece by piece.. */ 2294 if (ret) { 2295 err_bulk_clk: 2296 pm_runtime_put(gmu->gxpd); 2297 pm_runtime_put(gmu->dev); 2298 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); 2299 } 2300 err_set_opp: 2301 mutex_unlock(&a6xx_gpu->gmu.lock); 2302 2303 if (!ret) { 2304 msm_devfreq_resume(gpu); 2305 a6xx_llc_activate(a6xx_gpu); 2306 } 2307 2308 return ret; 2309 } 2310 2311 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu) 2312 { 2313 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2314 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2315 int i, ret; 2316 2317 trace_msm_gpu_suspend(0); 2318 2319 a6xx_llc_deactivate(a6xx_gpu); 2320 2321 msm_devfreq_suspend(gpu); 2322 2323 mutex_lock(&a6xx_gpu->gmu.lock); 2324 ret = a6xx_gmu_stop(a6xx_gpu); 2325 mutex_unlock(&a6xx_gpu->gmu.lock); 2326 if (ret) 2327 return ret; 2328 2329 if (a6xx_gpu->shadow_bo) 2330 for (i = 0; i < gpu->nr_rings; i++) 2331 a6xx_gpu->shadow[i] = 0; 2332 2333 gpu->suspend_count++; 2334 2335 return 0; 2336 } 2337 2338 static int a6xx_pm_suspend(struct msm_gpu *gpu) 2339 { 2340 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2341 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2342 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 2343 int i; 2344 2345 trace_msm_gpu_suspend(0); 2346 2347 a6xx_llc_deactivate(a6xx_gpu); 2348 2349 msm_devfreq_suspend(gpu); 2350 2351 mutex_lock(&a6xx_gpu->gmu.lock); 2352 2353 /* Drain the outstanding traffic on memory buses */ 2354 adreno_gpu->funcs->bus_halt(adreno_gpu, true); 2355 2356 if (adreno_is_a619_holi(adreno_gpu)) 2357 a6xx_sptprac_disable(gmu); 2358 2359 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); 2360 clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks); 2361 2362 pm_runtime_put_sync(gmu->gxpd); 2363 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); 2364 pm_runtime_put_sync(gmu->dev); 2365 2366 mutex_unlock(&a6xx_gpu->gmu.lock); 2367 2368 if (a6xx_gpu->shadow_bo) 2369 for (i = 0; i < gpu->nr_rings; i++) 2370 a6xx_gpu->shadow[i] = 0; 2371 2372 gpu->suspend_count++; 2373 2374 return 0; 2375 } 2376 2377 static u64 a6xx_get_timestamp(struct msm_gpu *gpu) 2378 { 2379 return gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); 2380 } 2381 2382 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu) 2383 { 2384 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2385 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2386 2387 return a6xx_gpu->cur_ring; 2388 } 2389 2390 static void a6xx_destroy(struct msm_gpu *gpu) 2391 { 2392 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2393 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2394 2395 if (a6xx_gpu->sqe_bo) { 2396 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm); 2397 drm_gem_object_put(a6xx_gpu->sqe_bo); 2398 } 2399 2400 if (a6xx_gpu->aqe_bo) { 2401 msm_gem_unpin_iova(a6xx_gpu->aqe_bo, gpu->vm); 2402 drm_gem_object_put(a6xx_gpu->aqe_bo); 2403 } 2404 2405 if (a6xx_gpu->shadow_bo) { 2406 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->vm); 2407 drm_gem_object_put(a6xx_gpu->shadow_bo); 2408 } 2409 2410 a6xx_llc_slices_destroy(a6xx_gpu); 2411 2412 a6xx_gmu_remove(a6xx_gpu); 2413 2414 adreno_gpu_cleanup(adreno_gpu); 2415 2416 kfree(a6xx_gpu); 2417 } 2418 2419 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 2420 { 2421 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2422 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2423 u64 busy_cycles; 2424 2425 /* 19.2MHz */ 2426 *out_sample_rate = 19200000; 2427 2428 busy_cycles = gmu_read64(&a6xx_gpu->gmu, 2429 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 2430 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 2431 2432 return busy_cycles; 2433 } 2434 2435 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, 2436 bool suspended) 2437 { 2438 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2439 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2440 2441 mutex_lock(&a6xx_gpu->gmu.lock); 2442 a6xx_gmu_set_freq(gpu, opp, suspended); 2443 mutex_unlock(&a6xx_gpu->gmu.lock); 2444 } 2445 2446 static struct drm_gpuvm * 2447 a6xx_create_vm(struct msm_gpu *gpu, struct platform_device *pdev) 2448 { 2449 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2450 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2451 unsigned long quirks = 0; 2452 2453 /* 2454 * This allows GPU to set the bus attributes required to use system 2455 * cache on behalf of the iommu page table walker. 2456 */ 2457 if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) && 2458 !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY)) 2459 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; 2460 2461 return adreno_iommu_create_vm(gpu, pdev, quirks); 2462 } 2463 2464 static struct drm_gpuvm * 2465 a6xx_create_private_vm(struct msm_gpu *gpu, bool kernel_managed) 2466 { 2467 struct msm_mmu *mmu; 2468 2469 mmu = msm_iommu_pagetable_create(to_msm_vm(gpu->vm)->mmu, kernel_managed); 2470 2471 if (IS_ERR(mmu)) 2472 return ERR_CAST(mmu); 2473 2474 return msm_gem_vm_create(gpu->dev, mmu, "gpu", ADRENO_VM_START, 2475 adreno_private_vm_size(gpu), kernel_managed); 2476 } 2477 2478 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 2479 { 2480 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2481 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2482 2483 if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) 2484 return a6xx_gpu->shadow[ring->id]; 2485 2486 /* 2487 * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware 2488 * without 'whereami' support 2489 */ 2490 WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC), 2491 "Can't read CP_RB_RPTR register reliably\n"); 2492 2493 return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); 2494 } 2495 2496 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 2497 { 2498 struct msm_cp_state cp_state; 2499 bool progress; 2500 2501 /* 2502 * With IFPC, KMD doesn't know whether GX power domain is collapsed 2503 * or not. So, we can't blindly read the below registers in GX domain. 2504 * Lets trust the hang detection in HW and lie to the caller that 2505 * there was progress. 2506 */ 2507 if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC) 2508 return true; 2509 2510 cp_state = (struct msm_cp_state) { 2511 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), 2512 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), 2513 .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), 2514 .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), 2515 }; 2516 2517 /* 2518 * Adjust the remaining data to account for what has already been 2519 * fetched from memory, but not yet consumed by the SQE. 2520 * 2521 * This is not *technically* correct, the amount buffered could 2522 * exceed the IB size due to hw prefetching ahead, but: 2523 * 2524 * (1) We aren't trying to find the exact position, just whether 2525 * progress has been made 2526 * (2) The CP_REG_TO_MEM at the end of a submit should be enough 2527 * to prevent prefetching into an unrelated submit. (And 2528 * either way, at some point the ROQ will be full.) 2529 */ 2530 cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16; 2531 cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16; 2532 2533 progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state)); 2534 2535 ring->last_cp_state = cp_state; 2536 2537 return progress; 2538 } 2539 2540 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse) 2541 { 2542 if (!info->speedbins) 2543 return UINT_MAX; 2544 2545 for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++) 2546 if (info->speedbins[i].fuse == fuse) 2547 return BIT(info->speedbins[i].speedbin); 2548 2549 return UINT_MAX; 2550 } 2551 2552 static int a6xx_read_speedbin(struct device *dev, struct a6xx_gpu *a6xx_gpu, 2553 const struct adreno_info *info, u32 *speedbin) 2554 { 2555 int ret; 2556 2557 /* Use speedbin fuse if present. Otherwise, fallback to softfuse */ 2558 ret = adreno_read_speedbin(dev, speedbin); 2559 if (ret != -ENOENT) 2560 return ret; 2561 2562 if (info->quirks & ADRENO_QUIRK_SOFTFUSE) { 2563 *speedbin = a6xx_llc_read(a6xx_gpu, REG_A8XX_CX_MISC_SW_FUSE_FREQ_LIMIT_STATUS); 2564 *speedbin = A8XX_CX_MISC_SW_FUSE_FREQ_LIMIT_STATUS_FINALFREQLIMIT(*speedbin); 2565 return 0; 2566 } 2567 2568 return -ENOENT; 2569 } 2570 2571 static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu, 2572 const struct adreno_info *info) 2573 { 2574 u32 supp_hw; 2575 u32 speedbin; 2576 int ret; 2577 2578 ret = a6xx_read_speedbin(dev, a6xx_gpu, info, &speedbin); 2579 /* 2580 * -ENOENT means that the platform doesn't support speedbin which is 2581 * fine 2582 */ 2583 if (ret == -ENOENT) { 2584 return 0; 2585 } else if (ret) { 2586 dev_err_probe(dev, ret, 2587 "failed to read speed-bin. Some OPPs may not be supported by hardware\n"); 2588 return ret; 2589 } 2590 2591 supp_hw = fuse_to_supp_hw(info, speedbin); 2592 2593 if (supp_hw == UINT_MAX) { 2594 DRM_DEV_ERROR(dev, 2595 "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n", 2596 speedbin); 2597 supp_hw = BIT(0); /* Default */ 2598 } 2599 2600 ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); 2601 if (ret) 2602 return ret; 2603 2604 return 0; 2605 } 2606 2607 static bool a6xx_aqe_is_enabled(struct adreno_gpu *adreno_gpu) 2608 { 2609 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2610 2611 /* 2612 * AQE uses preemption context record as scratch pad, so check if 2613 * preemption is enabled 2614 */ 2615 return (adreno_gpu->base.nr_rings > 1) && !!a6xx_gpu->aqe_bo; 2616 } 2617 2618 static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) 2619 { 2620 struct msm_drm_private *priv = dev->dev_private; 2621 struct platform_device *pdev = priv->gpu_pdev; 2622 struct adreno_platform_config *config = pdev->dev.platform_data; 2623 const struct adreno_info *info = config->info; 2624 struct device_node *node; 2625 struct a6xx_gpu *a6xx_gpu; 2626 struct adreno_gpu *adreno_gpu; 2627 struct msm_gpu *gpu; 2628 extern int enable_preemption; 2629 u32 speedbin; 2630 bool is_a7xx; 2631 int ret, nr_rings = 1; 2632 2633 a6xx_gpu = kzalloc_obj(*a6xx_gpu); 2634 if (!a6xx_gpu) 2635 return ERR_PTR(-ENOMEM); 2636 2637 adreno_gpu = &a6xx_gpu->base; 2638 gpu = &adreno_gpu->base; 2639 2640 mutex_init(&a6xx_gpu->gmu.lock); 2641 spin_lock_init(&a6xx_gpu->aperture_lock); 2642 2643 adreno_gpu->registers = NULL; 2644 2645 /* Check if there is a GMU phandle and set it up */ 2646 node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); 2647 /* FIXME: How do we gracefully handle this? */ 2648 BUG_ON(!node); 2649 2650 adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper"); 2651 2652 adreno_gpu->base.hw_apriv = 2653 !!(info->quirks & ADRENO_QUIRK_HAS_HW_APRIV); 2654 2655 /* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */ 2656 is_a7xx = info->family >= ADRENO_7XX_GEN1; 2657 2658 a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx); 2659 2660 ret = a6xx_set_supported_hw(&pdev->dev, a6xx_gpu, info); 2661 if (ret) { 2662 a6xx_llc_slices_destroy(a6xx_gpu); 2663 kfree(a6xx_gpu); 2664 return ERR_PTR(ret); 2665 } 2666 2667 if ((enable_preemption == 1) || (enable_preemption == -1 && 2668 (info->quirks & ADRENO_QUIRK_PREEMPTION))) 2669 nr_rings = 4; 2670 2671 ret = adreno_gpu_init(dev, pdev, adreno_gpu, info->funcs, nr_rings); 2672 if (ret) { 2673 a6xx_destroy(&(a6xx_gpu->base.base)); 2674 return ERR_PTR(ret); 2675 } 2676 2677 /* Set the speedbin value that is passed to userspace */ 2678 if (a6xx_read_speedbin(&pdev->dev, a6xx_gpu, info, &speedbin) || !speedbin) 2679 speedbin = 0xffff; 2680 adreno_gpu->speedbin = (uint16_t) (0xffff & speedbin); 2681 2682 /* 2683 * For now only clamp to idle freq for devices where this is known not 2684 * to cause power supply issues: 2685 */ 2686 if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu)) 2687 priv->gpu_clamp_to_idle = true; 2688 2689 if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) 2690 ret = a6xx_gmu_wrapper_init(a6xx_gpu, node); 2691 else 2692 ret = a6xx_gmu_init(a6xx_gpu, node); 2693 of_node_put(node); 2694 if (ret) { 2695 a6xx_destroy(&(a6xx_gpu->base.base)); 2696 return ERR_PTR(ret); 2697 } 2698 2699 adreno_gpu->uche_trap_base = 0x1fffffffff000ull; 2700 2701 msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu, 2702 adreno_gpu->funcs->mmu_fault_handler); 2703 2704 ret = a6xx_calc_ubwc_config(adreno_gpu); 2705 if (ret) { 2706 a6xx_destroy(&(a6xx_gpu->base.base)); 2707 return ERR_PTR(ret); 2708 } 2709 2710 /* Set up the preemption specific bits and pieces for each ringbuffer */ 2711 a6xx_preempt_init(gpu); 2712 2713 return gpu; 2714 } 2715 2716 const struct adreno_gpu_funcs a6xx_gpu_funcs = { 2717 .base = { 2718 .get_param = adreno_get_param, 2719 .set_param = adreno_set_param, 2720 .hw_init = a6xx_hw_init, 2721 .ucode_load = a6xx_ucode_load, 2722 .pm_suspend = a6xx_gmu_pm_suspend, 2723 .pm_resume = a6xx_gmu_pm_resume, 2724 .recover = a6xx_recover, 2725 .submit = a6xx_submit, 2726 .active_ring = a6xx_active_ring, 2727 .irq = a6xx_irq, 2728 .destroy = a6xx_destroy, 2729 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2730 .show = a6xx_show, 2731 #endif 2732 .gpu_busy = a6xx_gpu_busy, 2733 .gpu_get_freq = a6xx_gmu_get_freq, 2734 .gpu_set_freq = a6xx_gpu_set_freq, 2735 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2736 .gpu_state_get = a6xx_gpu_state_get, 2737 .gpu_state_put = a6xx_gpu_state_put, 2738 #endif 2739 .create_vm = a6xx_create_vm, 2740 .create_private_vm = a6xx_create_private_vm, 2741 .get_rptr = a6xx_get_rptr, 2742 .progress = a6xx_progress, 2743 }, 2744 .init = a6xx_gpu_init, 2745 .get_timestamp = a6xx_gmu_get_timestamp, 2746 .bus_halt = a6xx_bus_clear_pending_transactions, 2747 .mmu_fault_handler = a6xx_fault_handler, 2748 .gx_is_on = a6xx_gmu_gx_is_on, 2749 }; 2750 2751 const struct adreno_gpu_funcs a6xx_gmuwrapper_funcs = { 2752 .base = { 2753 .get_param = adreno_get_param, 2754 .set_param = adreno_set_param, 2755 .hw_init = a6xx_hw_init, 2756 .ucode_load = a6xx_ucode_load, 2757 .pm_suspend = a6xx_pm_suspend, 2758 .pm_resume = a6xx_pm_resume, 2759 .recover = a6xx_recover, 2760 .submit = a6xx_submit, 2761 .active_ring = a6xx_active_ring, 2762 .irq = a6xx_irq, 2763 .destroy = a6xx_destroy, 2764 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2765 .show = a6xx_show, 2766 #endif 2767 .gpu_busy = a6xx_gpu_busy, 2768 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2769 .gpu_state_get = a6xx_gpu_state_get, 2770 .gpu_state_put = a6xx_gpu_state_put, 2771 #endif 2772 .create_vm = a6xx_create_vm, 2773 .create_private_vm = a6xx_create_private_vm, 2774 .get_rptr = a6xx_get_rptr, 2775 .progress = a6xx_progress, 2776 }, 2777 .init = a6xx_gpu_init, 2778 .get_timestamp = a6xx_get_timestamp, 2779 .bus_halt = a6xx_bus_clear_pending_transactions, 2780 .mmu_fault_handler = a6xx_fault_handler, 2781 .gx_is_on = a6xx_gmu_gx_is_on, 2782 }; 2783 2784 const struct adreno_gpu_funcs a7xx_gpu_funcs = { 2785 .base = { 2786 .get_param = adreno_get_param, 2787 .set_param = adreno_set_param, 2788 .hw_init = a6xx_hw_init, 2789 .ucode_load = a6xx_ucode_load, 2790 .pm_suspend = a6xx_gmu_pm_suspend, 2791 .pm_resume = a6xx_gmu_pm_resume, 2792 .recover = a6xx_recover, 2793 .submit = a7xx_submit, 2794 .active_ring = a6xx_active_ring, 2795 .irq = a6xx_irq, 2796 .destroy = a6xx_destroy, 2797 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2798 .show = a6xx_show, 2799 #endif 2800 .gpu_busy = a6xx_gpu_busy, 2801 .gpu_get_freq = a6xx_gmu_get_freq, 2802 .gpu_set_freq = a6xx_gpu_set_freq, 2803 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2804 .gpu_state_get = a6xx_gpu_state_get, 2805 .gpu_state_put = a6xx_gpu_state_put, 2806 #endif 2807 .create_vm = a6xx_create_vm, 2808 .create_private_vm = a6xx_create_private_vm, 2809 .get_rptr = a6xx_get_rptr, 2810 .progress = a6xx_progress, 2811 }, 2812 .init = a6xx_gpu_init, 2813 .get_timestamp = a6xx_gmu_get_timestamp, 2814 .bus_halt = a6xx_bus_clear_pending_transactions, 2815 .mmu_fault_handler = a6xx_fault_handler, 2816 .gx_is_on = a7xx_gmu_gx_is_on, 2817 .aqe_is_enabled = a6xx_aqe_is_enabled, 2818 }; 2819 2820 const struct adreno_gpu_funcs a8xx_gpu_funcs = { 2821 .base = { 2822 .get_param = adreno_get_param, 2823 .set_param = adreno_set_param, 2824 .hw_init = a8xx_hw_init, 2825 .ucode_load = a6xx_ucode_load, 2826 .pm_suspend = a6xx_gmu_pm_suspend, 2827 .pm_resume = a6xx_gmu_pm_resume, 2828 .recover = a8xx_recover, 2829 .submit = a7xx_submit, 2830 .active_ring = a6xx_active_ring, 2831 .irq = a8xx_irq, 2832 .destroy = a6xx_destroy, 2833 .gpu_busy = a8xx_gpu_busy, 2834 .gpu_get_freq = a6xx_gmu_get_freq, 2835 .gpu_set_freq = a6xx_gpu_set_freq, 2836 .create_vm = a6xx_create_vm, 2837 .create_private_vm = a6xx_create_private_vm, 2838 .get_rptr = a6xx_get_rptr, 2839 .progress = a8xx_progress, 2840 }, 2841 .init = a6xx_gpu_init, 2842 .get_timestamp = a8xx_gmu_get_timestamp, 2843 .bus_halt = a8xx_bus_clear_pending_transactions, 2844 .mmu_fault_handler = a8xx_fault_handler, 2845 .gx_is_on = a8xx_gmu_gx_is_on, 2846 .aqe_is_enabled = a6xx_aqe_is_enabled, 2847 }; 2848