1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */ 3 4 5 #include "msm_gem.h" 6 #include "msm_mmu.h" 7 #include "msm_gpu_trace.h" 8 #include "a6xx_gpu.h" 9 #include "a6xx_gmu.xml.h" 10 11 #include <linux/bitfield.h> 12 #include <linux/devfreq.h> 13 #include <linux/firmware/qcom/qcom_scm.h> 14 #include <linux/pm_domain.h> 15 #include <linux/soc/qcom/llcc-qcom.h> 16 17 #define GPU_PAS_ID 13 18 19 static u64 read_gmu_ao_counter(struct a6xx_gpu *a6xx_gpu) 20 { 21 u64 count_hi, count_lo, temp; 22 23 do { 24 count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); 25 count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L); 26 temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); 27 } while (unlikely(count_hi != temp)); 28 29 return (count_hi << 32) | count_lo; 30 } 31 32 static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask) 33 { 34 /* Success if !writedropped0/1 */ 35 if (!(status & mask)) 36 return true; 37 38 udelay(10); 39 40 /* Try to update fenced register again */ 41 gpu_write(gpu, offset, value); 42 43 /* We can't do a posted write here because the power domain could be 44 * in collapse state. So use the heaviest barrier instead 45 */ 46 mb(); 47 return false; 48 } 49 50 static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask) 51 { 52 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 53 struct msm_gpu *gpu = &adreno_gpu->base; 54 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 55 u32 status; 56 57 gpu_write(gpu, offset, value); 58 59 /* Nothing else to be done in the case of no-GMU */ 60 if (adreno_has_gmu_wrapper(adreno_gpu)) 61 return 0; 62 63 /* We can't do a posted write here because the power domain could be 64 * in collapse state. So use the heaviest barrier instead 65 */ 66 mb(); 67 68 if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, 69 fence_status_check(gpu, offset, value, status, mask), 0, 1000)) 70 return 0; 71 72 /* Try again for another 1ms before failing */ 73 gpu_write(gpu, offset, value); 74 mb(); 75 76 if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, 77 fence_status_check(gpu, offset, value, status, mask), 0, 1000)) { 78 /* 79 * The 'delay' warning is here because the pause to print this 80 * warning will allow gpu to move to power collapse which 81 * defeats the purpose of continuous polling for 2 ms 82 */ 83 dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n", 84 offset); 85 return 0; 86 } 87 88 dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n", 89 offset); 90 91 return -ETIMEDOUT; 92 } 93 94 int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b) 95 { 96 int ret; 97 98 ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask); 99 if (ret) 100 return ret; 101 102 if (!is_64b) 103 return 0; 104 105 ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask); 106 107 return ret; 108 } 109 110 static inline bool _a6xx_check_idle(struct msm_gpu *gpu) 111 { 112 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 113 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 114 115 /* Check that the GMU is idle */ 116 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu)) 117 return false; 118 119 /* Check tha the CX master is idle */ 120 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) & 121 ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 122 return false; 123 124 return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) & 125 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 126 } 127 128 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 129 { 130 /* wait for CP to drain ringbuffer: */ 131 if (!adreno_idle(gpu, ring)) 132 return false; 133 134 if (spin_until(_a6xx_check_idle(gpu))) { 135 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 136 gpu->name, __builtin_return_address(0), 137 gpu_read(gpu, REG_A6XX_RBBM_STATUS), 138 gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS), 139 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 140 gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 141 return false; 142 } 143 144 return true; 145 } 146 147 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 148 { 149 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 150 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 151 152 /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */ 153 if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) { 154 OUT_PKT7(ring, CP_WHERE_AM_I, 2); 155 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring))); 156 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); 157 } 158 } 159 160 void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 161 { 162 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 163 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 164 uint32_t wptr; 165 unsigned long flags; 166 167 update_shadow_rptr(gpu, ring); 168 169 spin_lock_irqsave(&ring->preempt_lock, flags); 170 171 /* Copy the shadow to the actual register */ 172 ring->cur = ring->next; 173 174 /* Make sure to wrap wptr if we need to */ 175 wptr = get_wptr(ring); 176 177 /* Update HW if this is the current ring and we are not in preempt*/ 178 if (!a6xx_in_preempt(a6xx_gpu)) { 179 if (a6xx_gpu->cur_ring == ring) 180 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); 181 else 182 ring->restore_wptr = true; 183 } else { 184 ring->restore_wptr = true; 185 } 186 187 spin_unlock_irqrestore(&ring->preempt_lock, flags); 188 } 189 190 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, 191 u64 iova) 192 { 193 OUT_PKT7(ring, CP_REG_TO_MEM, 3); 194 OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) | 195 CP_REG_TO_MEM_0_CNT(2) | 196 CP_REG_TO_MEM_0_64B); 197 OUT_RING(ring, lower_32_bits(iova)); 198 OUT_RING(ring, upper_32_bits(iova)); 199 } 200 201 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, 202 struct msm_ringbuffer *ring, struct msm_gem_submit *submit) 203 { 204 bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; 205 struct msm_context *ctx = submit->queue->ctx; 206 struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx); 207 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 208 phys_addr_t ttbr; 209 u32 asid; 210 u64 memptr = rbmemptr(ring, ttbr0); 211 212 if (ctx->seqno == ring->cur_ctx_seqno) 213 return; 214 215 if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid)) 216 return; 217 218 if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) { 219 /* Wait for previous submit to complete before continuing: */ 220 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); 221 OUT_RING(ring, 0); 222 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 223 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 224 OUT_RING(ring, submit->seqno - 1); 225 226 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 227 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH); 228 229 /* Reset state used to synchronize BR and BV */ 230 OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); 231 OUT_RING(ring, 232 CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | 233 CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | 234 CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | 235 CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); 236 237 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 238 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH); 239 240 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 241 OUT_RING(ring, LRZ_FLUSH_INVALIDATE); 242 243 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 244 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 245 } 246 247 if (!sysprof) { 248 if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) { 249 /* Turn off protected mode to write to special registers */ 250 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 251 OUT_RING(ring, 0); 252 } 253 254 if (adreno_is_a8xx(adreno_gpu)) { 255 OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 256 OUT_RING(ring, 1); 257 OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1); 258 OUT_RING(ring, 1); 259 } else { 260 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 261 OUT_RING(ring, 1); 262 } 263 } 264 265 /* Execute the table update */ 266 OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4); 267 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr))); 268 269 OUT_RING(ring, 270 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) | 271 CP_SMMU_TABLE_UPDATE_1_ASID(asid)); 272 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0)); 273 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0)); 274 275 /* 276 * Write the new TTBR0 to the memstore. This is good for debugging. 277 * Needed for preemption 278 */ 279 OUT_PKT7(ring, CP_MEM_WRITE, 5); 280 OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr))); 281 OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr))); 282 OUT_RING(ring, lower_32_bits(ttbr)); 283 OUT_RING(ring, upper_32_bits(ttbr)); 284 OUT_RING(ring, ctx->seqno); 285 286 /* 287 * Sync both threads after switching pagetables and enable BR only 288 * to make sure BV doesn't race ahead while BR is still switching 289 * pagetables. 290 */ 291 if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) { 292 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 293 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 294 } 295 296 /* 297 * And finally, trigger a uche flush to be sure there isn't anything 298 * lingering in that part of the GPU 299 */ 300 301 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 302 OUT_RING(ring, CACHE_INVALIDATE); 303 304 if (!sysprof) { 305 u32 reg_status = adreno_is_a8xx(adreno_gpu) ? 306 REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS : 307 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS; 308 /* 309 * Wait for SRAM clear after the pgtable update, so the 310 * two can happen in parallel: 311 */ 312 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); 313 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); 314 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status)); 315 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0)); 316 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); 317 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); 318 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); 319 320 if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) { 321 /* Re-enable protected mode: */ 322 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 323 OUT_RING(ring, 1); 324 } 325 } 326 } 327 328 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 329 { 330 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 331 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 332 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 333 struct msm_ringbuffer *ring = submit->ring; 334 unsigned int i, ibs = 0; 335 336 adreno_check_and_reenable_stall(adreno_gpu); 337 338 a6xx_set_pagetable(a6xx_gpu, ring, submit); 339 340 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), 341 rbmemptr_stats(ring, index, cpcycles_start)); 342 343 /* 344 * For PM4 the GMU register offsets are calculated from the base of the 345 * GPU registers so we need to add 0x1a800 to the register value on A630 346 * to get the right value from PM4. 347 */ 348 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 349 rbmemptr_stats(ring, index, alwayson_start)); 350 351 /* Invalidate CCU depth and color */ 352 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 353 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH)); 354 355 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 356 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR)); 357 358 /* Submit the commands */ 359 for (i = 0; i < submit->nr_cmds; i++) { 360 switch (submit->cmd[i].type) { 361 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 362 break; 363 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 364 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) 365 break; 366 fallthrough; 367 case MSM_SUBMIT_CMD_BUF: 368 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 369 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 370 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 371 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 372 ibs++; 373 break; 374 } 375 376 /* 377 * Periodically update shadow-wptr if needed, so that we 378 * can see partial progress of submits with large # of 379 * cmds.. otherwise we could needlessly stall waiting for 380 * ringbuffer state, simply due to looking at a shadow 381 * rptr value that has not been updated 382 */ 383 if ((ibs % 32) == 0) 384 update_shadow_rptr(gpu, ring); 385 } 386 387 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), 388 rbmemptr_stats(ring, index, cpcycles_end)); 389 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 390 rbmemptr_stats(ring, index, alwayson_end)); 391 392 /* Write the fence to the scratch register */ 393 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); 394 OUT_RING(ring, submit->seqno); 395 396 /* 397 * Execute a CACHE_FLUSH_TS event. This will ensure that the 398 * timestamp is written to the memory and then triggers the interrupt 399 */ 400 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 401 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) | 402 CP_EVENT_WRITE_0_IRQ); 403 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 404 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 405 OUT_RING(ring, submit->seqno); 406 407 trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); 408 409 a6xx_flush(gpu, ring); 410 } 411 412 static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring, 413 struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue) 414 { 415 u64 preempt_postamble; 416 417 OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12); 418 419 OUT_RING(ring, SMMU_INFO); 420 /* don't save SMMU, we write the record from the kernel instead */ 421 OUT_RING(ring, 0); 422 OUT_RING(ring, 0); 423 424 /* privileged and non secure buffer save */ 425 OUT_RING(ring, NON_SECURE_SAVE_ADDR); 426 OUT_RING(ring, lower_32_bits( 427 a6xx_gpu->preempt_iova[ring->id])); 428 OUT_RING(ring, upper_32_bits( 429 a6xx_gpu->preempt_iova[ring->id])); 430 431 /* user context buffer save, seems to be unnused by fw */ 432 OUT_RING(ring, NON_PRIV_SAVE_ADDR); 433 OUT_RING(ring, 0); 434 OUT_RING(ring, 0); 435 436 OUT_RING(ring, COUNTER); 437 /* seems OK to set to 0 to disable it */ 438 OUT_RING(ring, 0); 439 OUT_RING(ring, 0); 440 441 /* Emit postamble to clear perfcounters */ 442 preempt_postamble = a6xx_gpu->preempt_postamble_iova; 443 444 OUT_PKT7(ring, CP_SET_AMBLE, 3); 445 OUT_RING(ring, lower_32_bits(preempt_postamble)); 446 OUT_RING(ring, upper_32_bits(preempt_postamble)); 447 OUT_RING(ring, CP_SET_AMBLE_2_DWORDS( 448 a6xx_gpu->preempt_postamble_len) | 449 CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE)); 450 } 451 452 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 453 { 454 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 455 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 456 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 457 struct msm_ringbuffer *ring = submit->ring; 458 u32 rbbm_perfctr_cp0, cp_always_on_counter; 459 unsigned int i, ibs = 0; 460 461 adreno_check_and_reenable_stall(adreno_gpu); 462 463 /* 464 * Toggle concurrent binning for pagetable switch and set the thread to 465 * BR since only it can execute the pagetable switch packets. 466 */ 467 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 468 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 469 470 a6xx_set_pagetable(a6xx_gpu, ring, submit); 471 472 /* 473 * If preemption is enabled, then set the pseudo register for the save 474 * sequence 475 */ 476 if (gpu->nr_rings > 1) 477 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); 478 479 if (adreno_is_a8xx(adreno_gpu)) { 480 rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0); 481 cp_always_on_counter = REG_A8XX_CP_ALWAYS_ON_COUNTER; 482 } else { 483 rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0); 484 cp_always_on_counter = REG_A6XX_CP_ALWAYS_ON_COUNTER; 485 } 486 487 get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start)); 488 get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_start)); 489 490 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 491 OUT_RING(ring, CP_SET_THREAD_BOTH); 492 493 OUT_PKT7(ring, CP_SET_MARKER, 1); 494 OUT_RING(ring, 0x101); /* IFPC disable */ 495 496 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { 497 OUT_PKT7(ring, CP_SET_MARKER, 1); 498 OUT_RING(ring, 0x00d); /* IB1LIST start */ 499 } 500 501 /* Submit the commands */ 502 for (i = 0; i < submit->nr_cmds; i++) { 503 switch (submit->cmd[i].type) { 504 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 505 break; 506 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 507 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) 508 break; 509 fallthrough; 510 case MSM_SUBMIT_CMD_BUF: 511 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 512 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 513 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 514 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 515 ibs++; 516 break; 517 } 518 519 /* 520 * Periodically update shadow-wptr if needed, so that we 521 * can see partial progress of submits with large # of 522 * cmds.. otherwise we could needlessly stall waiting for 523 * ringbuffer state, simply due to looking at a shadow 524 * rptr value that has not been updated 525 */ 526 if ((ibs % 32) == 0) 527 update_shadow_rptr(gpu, ring); 528 } 529 530 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { 531 OUT_PKT7(ring, CP_SET_MARKER, 1); 532 OUT_RING(ring, 0x00e); /* IB1LIST end */ 533 } 534 535 get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end)); 536 get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_end)); 537 538 /* Write the fence to the scratch register */ 539 if (adreno_is_a8xx(adreno_gpu)) { 540 OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1); 541 OUT_RING(ring, submit->seqno); 542 } else { 543 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); 544 OUT_RING(ring, submit->seqno); 545 } 546 547 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 548 OUT_RING(ring, CP_SET_THREAD_BR); 549 550 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 551 OUT_RING(ring, CCU_INVALIDATE_DEPTH); 552 553 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 554 OUT_RING(ring, CCU_INVALIDATE_COLOR); 555 556 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 557 OUT_RING(ring, CP_SET_THREAD_BV); 558 559 /* 560 * Make sure the timestamp is committed once BV pipe is 561 * completely done with this submission. 562 */ 563 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 564 OUT_RING(ring, CACHE_CLEAN | BIT(27)); 565 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence))); 566 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); 567 OUT_RING(ring, submit->seqno); 568 569 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 570 OUT_RING(ring, CP_SET_THREAD_BR); 571 572 /* 573 * This makes sure that BR doesn't race ahead and commit 574 * timestamp to memstore while BV is still processing 575 * this submission. 576 */ 577 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); 578 OUT_RING(ring, 0); 579 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence))); 580 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); 581 OUT_RING(ring, submit->seqno); 582 583 a6xx_gpu->last_seqno[ring->id] = submit->seqno; 584 585 /* write the ringbuffer timestamp */ 586 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 587 OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27)); 588 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 589 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 590 OUT_RING(ring, submit->seqno); 591 592 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 593 OUT_RING(ring, CP_SET_THREAD_BOTH); 594 595 OUT_PKT7(ring, CP_SET_MARKER, 1); 596 OUT_RING(ring, 0x100); /* IFPC enable */ 597 598 /* If preemption is enabled */ 599 if (gpu->nr_rings > 1) { 600 /* Yield the floor on command completion */ 601 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 602 603 /* 604 * If dword[2:1] are non zero, they specify an address for 605 * the CP to write the value of dword[3] to on preemption 606 * complete. Write 0 to skip the write 607 */ 608 OUT_RING(ring, 0x00); 609 OUT_RING(ring, 0x00); 610 /* Data value - not used if the address above is 0 */ 611 OUT_RING(ring, 0x01); 612 /* generate interrupt on preemption completion */ 613 OUT_RING(ring, 0x00); 614 } 615 616 617 trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); 618 619 a6xx_flush(gpu, ring); 620 621 /* Check to see if we need to start preemption */ 622 a6xx_preempt_trigger(gpu); 623 } 624 625 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) 626 { 627 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 628 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 629 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 630 const struct adreno_reglist *reg; 631 unsigned int i; 632 u32 cgc_delay, cgc_hyst; 633 u32 val, clock_cntl_on; 634 635 if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu))) 636 return; 637 638 if (adreno_is_a630(adreno_gpu)) 639 clock_cntl_on = 0x8aa8aa02; 640 else if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) 641 clock_cntl_on = 0xaaa8aa82; 642 else if (adreno_is_a702(adreno_gpu)) 643 clock_cntl_on = 0xaaaaaa82; 644 else 645 clock_cntl_on = 0x8aa8aa82; 646 647 if (adreno_is_a612(adreno_gpu)) 648 cgc_delay = 0x11; 649 else if (adreno_is_a615_family(adreno_gpu)) 650 cgc_delay = 0x111; 651 else 652 cgc_delay = 0x10111; 653 654 if (adreno_is_a612(adreno_gpu)) 655 cgc_hyst = 0x55; 656 else if (adreno_is_a615_family(adreno_gpu)) 657 cgc_hyst = 0x555; 658 else 659 cgc_hyst = 0x5555; 660 661 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 662 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 663 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 664 state ? cgc_delay : 0); 665 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 666 state ? cgc_hyst : 0); 667 668 if (!adreno_gpu->info->a6xx->hwcg) { 669 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 670 gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0); 671 672 if (state) { 673 gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1); 674 675 if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val, 676 val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 677 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 678 return; 679 } 680 681 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 682 } 683 684 return; 685 } 686 687 val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL); 688 689 /* Don't re-program the registers if they are already correct */ 690 if ((!state && !val) || (state && (val == clock_cntl_on))) 691 return; 692 693 /* Disable SP clock before programming HWCG registers */ 694 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) 695 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); 696 697 for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++) 698 gpu_write(gpu, reg->offset, state ? reg->value : 0); 699 700 /* Enable SP clock */ 701 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) 702 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); 703 704 gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0); 705 } 706 707 static void a6xx_set_cp_protect(struct msm_gpu *gpu) 708 { 709 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 710 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 711 unsigned i; 712 713 /* 714 * Enable access protection to privileged registers, fault on an access 715 * protect violation and select the last span to protect from the start 716 * address all the way to the end of the register address space 717 */ 718 gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 719 A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN | 720 A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN | 721 A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE); 722 723 for (i = 0; i < protect->count - 1; i++) { 724 /* Intentionally skip writing to some registers */ 725 if (protect->regs[i]) 726 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]); 727 } 728 /* last CP_PROTECT to have "infinite" length on the last entry */ 729 gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]); 730 } 731 732 static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu) 733 { 734 const struct qcom_ubwc_cfg_data *common_cfg; 735 struct qcom_ubwc_cfg_data *cfg = &gpu->_ubwc_config; 736 737 /* Inherit the common config and make some necessary fixups */ 738 common_cfg = qcom_ubwc_config_get_data(); 739 if (IS_ERR(common_cfg)) 740 return PTR_ERR(common_cfg); 741 742 /* Copy the data into the internal struct to drop the const qualifier (temporarily) */ 743 *cfg = *common_cfg; 744 745 /* Use common config as is for A8x */ 746 if (!adreno_is_a8xx(gpu)) { 747 cfg->ubwc_swizzle = 0x6; 748 cfg->highest_bank_bit = 15; 749 } 750 751 if (adreno_is_a610(gpu)) { 752 cfg->highest_bank_bit = 13; 753 cfg->ubwc_swizzle = 0x7; 754 } 755 756 if (adreno_is_a612(gpu)) 757 cfg->highest_bank_bit = 14; 758 759 if (adreno_is_a618(gpu)) 760 cfg->highest_bank_bit = 14; 761 762 if (adreno_is_a619(gpu)) 763 /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */ 764 cfg->highest_bank_bit = 13; 765 766 if (adreno_is_a619_holi(gpu)) 767 cfg->highest_bank_bit = 13; 768 769 if (adreno_is_a621(gpu)) 770 cfg->highest_bank_bit = 13; 771 772 if (adreno_is_a623(gpu)) 773 cfg->highest_bank_bit = 16; 774 775 if (adreno_is_a650(gpu) || 776 adreno_is_a660(gpu) || 777 adreno_is_a690(gpu) || 778 adreno_is_a730(gpu) || 779 adreno_is_a740_family(gpu)) { 780 /* TODO: get ddr type from bootloader and use 15 for LPDDR4 */ 781 cfg->highest_bank_bit = 16; 782 } 783 784 if (adreno_is_a663(gpu)) { 785 cfg->highest_bank_bit = 13; 786 cfg->ubwc_swizzle = 0x4; 787 } 788 789 if (adreno_is_7c3(gpu)) 790 cfg->highest_bank_bit = 14; 791 792 if (adreno_is_a702(gpu)) 793 cfg->highest_bank_bit = 14; 794 795 if (cfg->highest_bank_bit != common_cfg->highest_bank_bit) 796 DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n", 797 cfg->highest_bank_bit, common_cfg->highest_bank_bit); 798 799 if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle) 800 DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n", 801 cfg->ubwc_swizzle, common_cfg->ubwc_swizzle); 802 803 gpu->ubwc_config = &gpu->_ubwc_config; 804 805 return 0; 806 } 807 808 static void a6xx_set_ubwc_config(struct msm_gpu *gpu) 809 { 810 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 811 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; 812 /* 813 * We subtract 13 from the highest bank bit (13 is the minimum value 814 * allowed by hw) and write the lowest two bits of the remaining value 815 * as hbb_lo and the one above it as hbb_hi to the hardware. 816 */ 817 BUG_ON(cfg->highest_bank_bit < 13); 818 u32 hbb = cfg->highest_bank_bit - 13; 819 bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0; 820 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); 821 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); 822 bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0; 823 bool min_acc_len_64b = false; 824 u8 uavflagprd_inv = 0; 825 u32 hbb_hi = hbb >> 2; 826 u32 hbb_lo = hbb & 3; 827 828 if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu)) 829 uavflagprd_inv = 2; 830 831 if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) 832 min_acc_len_64b = true; 833 834 gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 835 level2_swizzling_dis << 12 | 836 rgb565_predicator << 11 | 837 hbb_hi << 10 | amsbc << 4 | 838 min_acc_len_64b << 3 | 839 hbb_lo << 1 | ubwc_mode); 840 841 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 842 level2_swizzling_dis << 6 | hbb_hi << 4 | 843 min_acc_len_64b << 3 | 844 hbb_lo << 1 | ubwc_mode); 845 846 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 847 level2_swizzling_dis << 12 | hbb_hi << 10 | 848 uavflagprd_inv << 4 | 849 min_acc_len_64b << 3 | 850 hbb_lo << 1 | ubwc_mode); 851 852 if (adreno_is_a7xx(adreno_gpu)) 853 gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL, 854 FIELD_PREP(GENMASK(8, 5), hbb_lo)); 855 856 gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 857 min_acc_len_64b << 23 | hbb_lo << 21); 858 859 gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL, 860 cfg->macrotile_mode); 861 } 862 863 static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) 864 { 865 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 866 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 867 const struct adreno_reglist_list *reglist; 868 void *ptr = a6xx_gpu->pwrup_reglist_ptr; 869 struct cpu_gpu_lock *lock = ptr; 870 u32 *dest = (u32 *)&lock->regs[0]; 871 int i; 872 873 lock->gpu_req = lock->cpu_req = lock->turn = 0; 874 875 reglist = adreno_gpu->info->a6xx->ifpc_reglist; 876 lock->ifpc_list_len = reglist->count; 877 878 /* 879 * For each entry in each of the lists, write the offset and the current 880 * register value into the GPU buffer 881 */ 882 for (i = 0; i < reglist->count; i++) { 883 *dest++ = reglist->regs[i]; 884 *dest++ = gpu_read(gpu, reglist->regs[i]); 885 } 886 887 reglist = adreno_gpu->info->a6xx->pwrup_reglist; 888 lock->preemption_list_len = reglist->count; 889 890 for (i = 0; i < reglist->count; i++) { 891 *dest++ = reglist->regs[i]; 892 *dest++ = gpu_read(gpu, reglist->regs[i]); 893 } 894 895 /* 896 * The overall register list is composed of 897 * 1. Static IFPC-only registers 898 * 2. Static IFPC + preemption registers 899 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) 900 * 901 * The first two lists are static. Size of these lists are stored as 902 * number of pairs in ifpc_list_len and preemption_list_len 903 * respectively. With concurrent binning, Some of the perfcounter 904 * registers being virtualized, CP needs to know the pipe id to program 905 * the aperture inorder to restore the same. Thus, third list is a 906 * dynamic list with triplets as 907 * (<aperture, shifted 12 bits> <address> <data>), and the length is 908 * stored as number for triplets in dynamic_list_len. 909 */ 910 lock->dynamic_list_len = 0; 911 } 912 913 static int a7xx_preempt_start(struct msm_gpu *gpu) 914 { 915 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 916 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 917 struct msm_ringbuffer *ring = gpu->rb[0]; 918 919 if (gpu->nr_rings <= 1) 920 return 0; 921 922 /* Turn CP protection off */ 923 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 924 OUT_RING(ring, 0); 925 926 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); 927 928 /* Yield the floor on command completion */ 929 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 930 OUT_RING(ring, 0x00); 931 OUT_RING(ring, 0x00); 932 OUT_RING(ring, 0x00); 933 /* Generate interrupt on preemption completion */ 934 OUT_RING(ring, 0x00); 935 936 a6xx_flush(gpu, ring); 937 938 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 939 } 940 941 static int a6xx_cp_init(struct msm_gpu *gpu) 942 { 943 struct msm_ringbuffer *ring = gpu->rb[0]; 944 945 OUT_PKT7(ring, CP_ME_INIT, 8); 946 947 OUT_RING(ring, 0x0000002f); 948 949 /* Enable multiple hardware contexts */ 950 OUT_RING(ring, 0x00000003); 951 952 /* Enable error detection */ 953 OUT_RING(ring, 0x20000000); 954 955 /* Don't enable header dump */ 956 OUT_RING(ring, 0x00000000); 957 OUT_RING(ring, 0x00000000); 958 959 /* No workarounds enabled */ 960 OUT_RING(ring, 0x00000000); 961 962 /* Pad rest of the cmds with 0's */ 963 OUT_RING(ring, 0x00000000); 964 OUT_RING(ring, 0x00000000); 965 966 a6xx_flush(gpu, ring); 967 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 968 } 969 970 static int a7xx_cp_init(struct msm_gpu *gpu) 971 { 972 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 973 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 974 struct msm_ringbuffer *ring = gpu->rb[0]; 975 u32 mask; 976 977 /* Disable concurrent binning before sending CP init */ 978 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 979 OUT_RING(ring, BIT(27)); 980 981 OUT_PKT7(ring, CP_ME_INIT, 7); 982 983 /* Use multiple HW contexts */ 984 mask = BIT(0); 985 986 /* Enable error detection */ 987 mask |= BIT(1); 988 989 /* Set default reset state */ 990 mask |= BIT(3); 991 992 /* Disable save/restore of performance counters across preemption */ 993 mask |= BIT(6); 994 995 /* Enable the register init list with the spinlock */ 996 mask |= BIT(8); 997 998 OUT_RING(ring, mask); 999 1000 /* Enable multiple hardware contexts */ 1001 OUT_RING(ring, 0x00000003); 1002 1003 /* Enable error detection */ 1004 OUT_RING(ring, 0x20000000); 1005 1006 /* Operation mode mask */ 1007 OUT_RING(ring, 0x00000002); 1008 1009 /* *Don't* send a power up reg list for concurrent binning (TODO) */ 1010 /* Lo address */ 1011 OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); 1012 /* Hi address */ 1013 OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); 1014 /* BIT(31) set => read the regs from the list */ 1015 OUT_RING(ring, BIT(31)); 1016 1017 a6xx_flush(gpu, ring); 1018 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 1019 } 1020 1021 /* 1022 * Check that the microcode version is new enough to include several key 1023 * security fixes. Return true if the ucode is safe. 1024 */ 1025 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, 1026 struct drm_gem_object *obj) 1027 { 1028 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1029 struct msm_gpu *gpu = &adreno_gpu->base; 1030 const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE]; 1031 u32 *buf = msm_gem_get_vaddr(obj); 1032 bool ret = false; 1033 1034 if (IS_ERR(buf)) 1035 return false; 1036 1037 /* A7xx is safe! */ 1038 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) 1039 return true; 1040 1041 /* 1042 * Targets up to a640 (a618, a630 and a640) need to check for a 1043 * microcode version that is patched to support the whereami opcode or 1044 * one that is new enough to include it by default. 1045 * 1046 * a650 tier targets don't need whereami but still need to be 1047 * equal to or newer than 0.95 for other security fixes 1048 * 1049 * a660 targets have all the critical security fixes from the start 1050 */ 1051 if (!strcmp(sqe_name, "a630_sqe.fw")) { 1052 /* 1053 * If the lowest nibble is 0xa that is an indication that this 1054 * microcode has been patched. The actual version is in dword 1055 * [3] but we only care about the patchlevel which is the lowest 1056 * nibble of dword [3] 1057 * 1058 * Otherwise check that the firmware is greater than or equal 1059 * to 1.90 which was the first version that had this fix built 1060 * in 1061 */ 1062 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) || 1063 (buf[0] & 0xfff) >= 0x190) { 1064 a6xx_gpu->has_whereami = true; 1065 ret = true; 1066 goto out; 1067 } 1068 1069 DRM_DEV_ERROR(&gpu->pdev->dev, 1070 "a630 SQE ucode is too old. Have version %x need at least %x\n", 1071 buf[0] & 0xfff, 0x190); 1072 } else if (!strcmp(sqe_name, "a650_sqe.fw")) { 1073 if ((buf[0] & 0xfff) >= 0x095) { 1074 ret = true; 1075 goto out; 1076 } 1077 1078 DRM_DEV_ERROR(&gpu->pdev->dev, 1079 "a650 SQE ucode is too old. Have version %x need at least %x\n", 1080 buf[0] & 0xfff, 0x095); 1081 } else if (!strcmp(sqe_name, "a660_sqe.fw")) { 1082 ret = true; 1083 } else { 1084 DRM_DEV_ERROR(&gpu->pdev->dev, 1085 "unknown GPU, add it to a6xx_ucode_check_version()!!\n"); 1086 } 1087 out: 1088 msm_gem_put_vaddr(obj); 1089 return ret; 1090 } 1091 1092 static int a6xx_ucode_load(struct msm_gpu *gpu) 1093 { 1094 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1095 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1096 1097 if (!a6xx_gpu->sqe_bo) { 1098 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu, 1099 adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova); 1100 1101 if (IS_ERR(a6xx_gpu->sqe_bo)) { 1102 int ret = PTR_ERR(a6xx_gpu->sqe_bo); 1103 1104 a6xx_gpu->sqe_bo = NULL; 1105 DRM_DEV_ERROR(&gpu->pdev->dev, 1106 "Could not allocate SQE ucode: %d\n", ret); 1107 1108 return ret; 1109 } 1110 1111 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw"); 1112 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) { 1113 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm); 1114 drm_gem_object_put(a6xx_gpu->sqe_bo); 1115 1116 a6xx_gpu->sqe_bo = NULL; 1117 return -EPERM; 1118 } 1119 } 1120 1121 if (!a6xx_gpu->aqe_bo && adreno_gpu->fw[ADRENO_FW_AQE]) { 1122 a6xx_gpu->aqe_bo = adreno_fw_create_bo(gpu, 1123 adreno_gpu->fw[ADRENO_FW_AQE], &a6xx_gpu->aqe_iova); 1124 1125 if (IS_ERR(a6xx_gpu->aqe_bo)) { 1126 int ret = PTR_ERR(a6xx_gpu->aqe_bo); 1127 1128 a6xx_gpu->aqe_bo = NULL; 1129 DRM_DEV_ERROR(&gpu->pdev->dev, 1130 "Could not allocate AQE ucode: %d\n", ret); 1131 1132 return ret; 1133 } 1134 1135 msm_gem_object_set_name(a6xx_gpu->aqe_bo, "aqefw"); 1136 } 1137 1138 /* 1139 * Expanded APRIV and targets that support WHERE_AM_I both need a 1140 * privileged buffer to store the RPTR shadow 1141 */ 1142 if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) && 1143 !a6xx_gpu->shadow_bo) { 1144 a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, 1145 sizeof(u32) * gpu->nr_rings, 1146 MSM_BO_WC | MSM_BO_MAP_PRIV, 1147 gpu->vm, &a6xx_gpu->shadow_bo, 1148 &a6xx_gpu->shadow_iova); 1149 1150 if (IS_ERR(a6xx_gpu->shadow)) 1151 return PTR_ERR(a6xx_gpu->shadow); 1152 1153 msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow"); 1154 } 1155 1156 a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE, 1157 MSM_BO_WC | MSM_BO_MAP_PRIV, 1158 gpu->vm, &a6xx_gpu->pwrup_reglist_bo, 1159 &a6xx_gpu->pwrup_reglist_iova); 1160 1161 if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr)) 1162 return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr); 1163 1164 msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist"); 1165 1166 return 0; 1167 } 1168 1169 int a6xx_zap_shader_init(struct msm_gpu *gpu) 1170 { 1171 static bool loaded; 1172 int ret; 1173 1174 if (loaded) 1175 return 0; 1176 1177 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID); 1178 1179 loaded = !ret; 1180 return ret; 1181 } 1182 1183 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 1184 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 1185 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 1186 A6XX_RBBM_INT_0_MASK_CP_IB2 | \ 1187 A6XX_RBBM_INT_0_MASK_CP_IB1 | \ 1188 A6XX_RBBM_INT_0_MASK_CP_RB | \ 1189 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 1190 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 1191 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 1192 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 1193 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1194 1195 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 1196 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 1197 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 1198 A6XX_RBBM_INT_0_MASK_CP_SW | \ 1199 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 1200 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 1201 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 1202 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 1203 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 1204 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 1205 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 1206 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 1207 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 1208 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1209 1210 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ 1211 A6XX_CP_APRIV_CNTL_RBFETCH | \ 1212 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \ 1213 A6XX_CP_APRIV_CNTL_RBRPWB) 1214 1215 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \ 1216 A6XX_CP_APRIV_CNTL_CDREAD | \ 1217 A6XX_CP_APRIV_CNTL_CDWRITE) 1218 1219 static int hw_init(struct msm_gpu *gpu) 1220 { 1221 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1222 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1223 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1224 u64 gmem_range_min; 1225 unsigned int i; 1226 int ret; 1227 1228 if (!adreno_has_gmu_wrapper(adreno_gpu)) { 1229 /* Make sure the GMU keeps the GPU on while we set it up */ 1230 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 1231 if (ret) 1232 return ret; 1233 } 1234 1235 /* Clear GBIF halt in case GX domain was not collapsed */ 1236 if (adreno_is_a619_holi(adreno_gpu)) { 1237 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 1238 gpu_read(gpu, REG_A6XX_GBIF_HALT); 1239 1240 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0); 1241 gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL); 1242 } else if (a6xx_has_gbif(adreno_gpu)) { 1243 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 1244 gpu_read(gpu, REG_A6XX_GBIF_HALT); 1245 1246 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0); 1247 gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT); 1248 } 1249 1250 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 1251 1252 if (adreno_is_a619_holi(adreno_gpu)) 1253 a6xx_sptprac_enable(gmu); 1254 1255 /* 1256 * Disable the trusted memory range - we don't actually supported secure 1257 * memory rendering at this point in time and we don't want to block off 1258 * part of the virtual memory space. 1259 */ 1260 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 1261 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 1262 1263 if (!adreno_is_a7xx(adreno_gpu)) { 1264 /* Turn on 64 bit addressing for all blocks */ 1265 gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1); 1266 gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1); 1267 gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1); 1268 gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1); 1269 gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1); 1270 gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1); 1271 gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1); 1272 gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1); 1273 gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1); 1274 gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1); 1275 gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1); 1276 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); 1277 } 1278 1279 /* enable hardware clockgating */ 1280 a6xx_set_hwcg(gpu, true); 1281 1282 /* For gmuwrapper implementations, do the VBIF/GBIF CX configuration here */ 1283 if (adreno_is_a610_family(adreno_gpu)) { 1284 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620); 1285 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620); 1286 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620); 1287 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620); 1288 } 1289 1290 if (adreno_is_a610_family(adreno_gpu) || 1291 adreno_is_a640_family(adreno_gpu) || 1292 adreno_is_a650_family(adreno_gpu)) { 1293 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3); 1294 } else if (adreno_is_a7xx(adreno_gpu)) { 1295 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212); 1296 } else { 1297 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3); 1298 } 1299 1300 if (adreno_is_a630(adreno_gpu)) 1301 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); 1302 1303 if (adreno_is_a7xx(adreno_gpu)) 1304 gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0); 1305 1306 /* Make all blocks contribute to the GPU BUSY perf counter */ 1307 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 1308 1309 /* Disable L2 bypass in the UCHE */ 1310 if (adreno_is_a7xx(adreno_gpu)) { 1311 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 1312 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 1313 } else { 1314 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); 1315 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 1316 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 1317 } 1318 1319 if (!(adreno_is_a650_family(adreno_gpu) || 1320 adreno_is_a702(adreno_gpu) || 1321 adreno_is_a730(adreno_gpu))) { 1322 gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M; 1323 1324 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 1325 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min); 1326 1327 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX, 1328 gmem_range_min + adreno_gpu->info->gmem - 1); 1329 } 1330 1331 if (adreno_is_a7xx(adreno_gpu)) 1332 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23)); 1333 else { 1334 gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); 1335 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); 1336 } 1337 1338 if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) { 1339 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); 1340 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); 1341 } else if (adreno_is_a610_family(adreno_gpu)) { 1342 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060); 1343 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16); 1344 } else if (!adreno_is_a7xx(adreno_gpu)) { 1345 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0); 1346 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); 1347 } 1348 1349 if (adreno_is_a660_family(adreno_gpu)) 1350 gpu_write(gpu, REG_A7XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); 1351 1352 /* Setting the mem pool size */ 1353 if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) { 1354 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48); 1355 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47); 1356 } else if (adreno_is_a702(adreno_gpu)) { 1357 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64); 1358 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63); 1359 } else if (!adreno_is_a7xx(adreno_gpu)) 1360 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128); 1361 1362 1363 /* Set the default primFifo threshold values */ 1364 if (adreno_gpu->info->a6xx->prim_fifo_threshold) 1365 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 1366 adreno_gpu->info->a6xx->prim_fifo_threshold); 1367 1368 /* Set the AHB default slave response to "ERROR" */ 1369 gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1); 1370 1371 /* Turn on performance counters */ 1372 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1); 1373 1374 if (adreno_is_a7xx(adreno_gpu)) { 1375 /* Turn on the IFPC counter (countable 4 on XOCLK4) */ 1376 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1, 1377 FIELD_PREP(GENMASK(7, 0), 0x4)); 1378 } 1379 1380 /* Select CP0 to always count cycles */ 1381 gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT); 1382 1383 a6xx_set_ubwc_config(gpu); 1384 1385 /* Enable fault detection */ 1386 if (adreno_is_a612(adreno_gpu) || 1387 adreno_is_a730(adreno_gpu) || 1388 adreno_is_a740_family(adreno_gpu)) 1389 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff); 1390 else if (adreno_is_a690(adreno_gpu)) 1391 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff); 1392 else if (adreno_is_a619(adreno_gpu)) 1393 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff); 1394 else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) 1395 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff); 1396 else 1397 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff); 1398 1399 gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 1400 1401 /* Set weights for bicubic filtering */ 1402 if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { 1403 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); 1404 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 1405 0x3fe05ff4); 1406 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 1407 0x3fa0ebee); 1408 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 1409 0x3f5193ed); 1410 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 1411 0x3f0243f0); 1412 } 1413 1414 /* Set up the CX GMU counter 0 to count busy ticks */ 1415 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 1416 1417 /* Enable the power counter */ 1418 gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5)); 1419 gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 1420 1421 /* Protect registers from the CP */ 1422 a6xx_set_cp_protect(gpu); 1423 1424 if (adreno_is_a660_family(adreno_gpu)) { 1425 if (adreno_is_a690(adreno_gpu)) 1426 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801); 1427 else 1428 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); 1429 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); 1430 } else if (adreno_is_a702(adreno_gpu)) { 1431 /* Something to do with the HLSQ cluster */ 1432 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24)); 1433 } 1434 1435 if (adreno_is_a690(adreno_gpu)) 1436 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90); 1437 /* Set dualQ + disable afull for A660 GPU */ 1438 else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu)) 1439 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); 1440 else if (adreno_is_a7xx(adreno_gpu)) 1441 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 1442 FIELD_PREP(GENMASK(19, 16), 6) | 1443 FIELD_PREP(GENMASK(15, 12), 6) | 1444 FIELD_PREP(GENMASK(11, 8), 9) | 1445 BIT(3) | BIT(2) | 1446 FIELD_PREP(GENMASK(1, 0), 2)); 1447 1448 /* Enable expanded apriv for targets that support it */ 1449 if (gpu->hw_apriv) { 1450 if (adreno_is_a7xx(adreno_gpu)) { 1451 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, 1452 A7XX_BR_APRIVMASK); 1453 gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL, 1454 A7XX_APRIV_MASK); 1455 gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL, 1456 A7XX_APRIV_MASK); 1457 } else 1458 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, 1459 BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1)); 1460 } 1461 1462 if (adreno_is_a750(adreno_gpu)) { 1463 /* Disable ubwc merged UFC request feature */ 1464 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19)); 1465 1466 /* Enable TP flaghint and other performance settings */ 1467 gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700); 1468 } else if (adreno_is_a7xx(adreno_gpu)) { 1469 /* Disable non-ubwc read reqs from passing write reqs */ 1470 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11)); 1471 } 1472 1473 /* Enable interrupts */ 1474 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 1475 adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK); 1476 1477 ret = adreno_hw_init(gpu); 1478 if (ret) 1479 goto out; 1480 1481 gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 1482 1483 /* Set the ringbuffer address */ 1484 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 1485 1486 /* Targets that support extended APRIV can use the RPTR shadow from 1487 * hardware but all the other ones need to disable the feature. Targets 1488 * that support the WHERE_AM_I opcode can use that instead 1489 */ 1490 if (adreno_gpu->base.hw_apriv) 1491 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 1492 else 1493 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, 1494 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); 1495 1496 /* Configure the RPTR shadow if needed: */ 1497 if (a6xx_gpu->shadow_bo) { 1498 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, 1499 shadowptr(a6xx_gpu, gpu->rb[0])); 1500 for (unsigned int i = 0; i < gpu->nr_rings; i++) 1501 a6xx_gpu->shadow[i] = 0; 1502 } 1503 1504 /* ..which means "always" on A7xx, also for BV shadow */ 1505 if (adreno_is_a7xx(adreno_gpu)) { 1506 gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR, 1507 rbmemptr(gpu->rb[0], bv_rptr)); 1508 } 1509 1510 a6xx_preempt_hw_init(gpu); 1511 1512 /* Always come up on rb 0 */ 1513 a6xx_gpu->cur_ring = gpu->rb[0]; 1514 1515 for (i = 0; i < gpu->nr_rings; i++) 1516 gpu->rb[i]->cur_ctx_seqno = 0; 1517 1518 /* Enable the SQE_to start the CP engine */ 1519 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); 1520 1521 if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) { 1522 a7xx_patch_pwrup_reglist(gpu); 1523 a6xx_gpu->pwrup_reglist_emitted = true; 1524 } 1525 1526 ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu); 1527 if (ret) 1528 goto out; 1529 1530 /* 1531 * Try to load a zap shader into the secure world. If successful 1532 * we can use the CP to switch out of secure mode. If not then we 1533 * have no resource but to try to switch ourselves out manually. If we 1534 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 1535 * be blocked and a permissions violation will soon follow. 1536 */ 1537 ret = a6xx_zap_shader_init(gpu); 1538 if (!ret) { 1539 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 1540 OUT_RING(gpu->rb[0], 0x00000000); 1541 1542 a6xx_flush(gpu, gpu->rb[0]); 1543 if (!a6xx_idle(gpu, gpu->rb[0])) 1544 return -EINVAL; 1545 } else if (ret == -ENODEV) { 1546 /* 1547 * This device does not use zap shader (but print a warning 1548 * just in case someone got their dt wrong.. hopefully they 1549 * have a debug UART to realize the error of their ways... 1550 * if you mess this up you are about to crash horribly) 1551 */ 1552 dev_warn_once(gpu->dev->dev, 1553 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 1554 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 1555 ret = 0; 1556 } else { 1557 return ret; 1558 } 1559 1560 out: 1561 if (adreno_has_gmu_wrapper(adreno_gpu)) 1562 return ret; 1563 1564 /* Last step - yield the ringbuffer */ 1565 a7xx_preempt_start(gpu); 1566 1567 /* 1568 * Tell the GMU that we are done touching the GPU and it can start power 1569 * management 1570 */ 1571 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 1572 1573 if (a6xx_gpu->gmu.legacy) { 1574 /* Take the GMU out of its special boot mode */ 1575 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER); 1576 } 1577 1578 return ret; 1579 } 1580 1581 static int a6xx_hw_init(struct msm_gpu *gpu) 1582 { 1583 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1584 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1585 int ret; 1586 1587 mutex_lock(&a6xx_gpu->gmu.lock); 1588 ret = hw_init(gpu); 1589 mutex_unlock(&a6xx_gpu->gmu.lock); 1590 1591 return ret; 1592 } 1593 1594 static void a6xx_dump(struct msm_gpu *gpu) 1595 { 1596 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", 1597 gpu_read(gpu, REG_A6XX_RBBM_STATUS)); 1598 adreno_dump(gpu); 1599 } 1600 1601 static void a6xx_recover(struct msm_gpu *gpu) 1602 { 1603 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1604 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1605 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1606 int active_submits; 1607 1608 adreno_dump_info(gpu); 1609 1610 if (a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) { 1611 /* Sometimes crashstate capture is skipped, so SQE should be halted here again */ 1612 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); 1613 1614 if (hang_debug) 1615 a6xx_dump(gpu); 1616 1617 } 1618 1619 /* 1620 * To handle recovery specific sequences during the rpm suspend we are 1621 * about to trigger 1622 */ 1623 1624 a6xx_gpu->hung = true; 1625 1626 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 1627 1628 /* active_submit won't change until we make a submission */ 1629 mutex_lock(&gpu->active_lock); 1630 active_submits = gpu->active_submits; 1631 1632 /* 1633 * Temporarily clear active_submits count to silence a WARN() in the 1634 * runtime suspend cb 1635 */ 1636 gpu->active_submits = 0; 1637 1638 if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) { 1639 /* Drain the outstanding traffic on memory buses */ 1640 adreno_gpu->funcs->bus_halt(adreno_gpu, true); 1641 1642 /* Reset the GPU to a clean state */ 1643 a6xx_gpu_sw_reset(gpu, true); 1644 a6xx_gpu_sw_reset(gpu, false); 1645 } 1646 1647 reinit_completion(&gmu->pd_gate); 1648 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 1649 dev_pm_genpd_synced_poweroff(gmu->cxpd); 1650 1651 /* Drop the rpm refcount from active submits */ 1652 if (active_submits) 1653 pm_runtime_put(&gpu->pdev->dev); 1654 1655 /* And the final one from recover worker */ 1656 pm_runtime_put_sync(&gpu->pdev->dev); 1657 1658 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 1659 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 1660 1661 dev_pm_genpd_remove_notifier(gmu->cxpd); 1662 1663 pm_runtime_use_autosuspend(&gpu->pdev->dev); 1664 1665 if (active_submits) 1666 pm_runtime_get(&gpu->pdev->dev); 1667 1668 pm_runtime_get_sync(&gpu->pdev->dev); 1669 1670 gpu->active_submits = active_submits; 1671 mutex_unlock(&gpu->active_lock); 1672 1673 msm_gpu_hw_init(gpu); 1674 a6xx_gpu->hung = false; 1675 } 1676 1677 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 1678 { 1679 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1680 static const char *uche_clients[7] = { 1681 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", 1682 }; 1683 u32 val; 1684 1685 if (adreno_is_a7xx(adreno_gpu)) { 1686 if (mid != 1 && mid != 2 && mid != 3 && mid != 8) 1687 return "UNKNOWN"; 1688 } else { 1689 if (mid < 1 || mid > 3) 1690 return "UNKNOWN"; 1691 } 1692 1693 /* 1694 * The source of the data depends on the mid ID read from FSYNR1. 1695 * and the client ID read from the UCHE block 1696 */ 1697 val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF); 1698 1699 if (adreno_is_a7xx(adreno_gpu)) { 1700 /* Bit 3 for mid=3 indicates BR or BV */ 1701 static const char *uche_clients_a7xx[16] = { 1702 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", 1703 "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 1704 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", 1705 "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 1706 }; 1707 1708 /* LPAC has the same clients as BR and BV, but because it is 1709 * compute-only some of them do not exist and there are holes 1710 * in the array. 1711 */ 1712 static const char *uche_clients_lpac_a7xx[8] = { 1713 "-", "LPAC_SP", "-", "-", 1714 "LPAC_HLSQ", "-", "-", "LPAC_TP", 1715 }; 1716 1717 val &= GENMASK(6, 0); 1718 1719 /* mid=3 refers to BR or BV */ 1720 if (mid == 3) { 1721 if (val < ARRAY_SIZE(uche_clients_a7xx)) 1722 return uche_clients_a7xx[val]; 1723 else 1724 return "UCHE"; 1725 } 1726 1727 /* mid=8 refers to LPAC */ 1728 if (mid == 8) { 1729 if (val < ARRAY_SIZE(uche_clients_lpac_a7xx)) 1730 return uche_clients_lpac_a7xx[val]; 1731 else 1732 return "UCHE_LPAC"; 1733 } 1734 1735 /* mid=2 is a catchall for everything else in LPAC */ 1736 if (mid == 2) 1737 return "UCHE_LPAC"; 1738 1739 /* mid=1 is a catchall for everything else in BR/BV */ 1740 return "UCHE"; 1741 } else if (adreno_is_a660_family(adreno_gpu)) { 1742 static const char *uche_clients_a660[8] = { 1743 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP", 1744 }; 1745 1746 static const char *uche_clients_a660_not[8] = { 1747 "not VFD", "not SP", "not VSC", "not VPC", 1748 "not HLSQ", "not PC", "not LRZ", "not TP", 1749 }; 1750 1751 val &= GENMASK(6, 0); 1752 1753 if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660)) 1754 return uche_clients_a660[val]; 1755 1756 if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not)) 1757 return uche_clients_a660_not[val]; 1758 1759 return "UCHE"; 1760 } else { 1761 /* mid = 3 is most precise and refers to only one block per client */ 1762 if (mid == 3) 1763 return uche_clients[val & 7]; 1764 1765 /* For mid=2 the source is TP or VFD except when the client id is 0 */ 1766 if (mid == 2) 1767 return ((val & 7) == 0) ? "TP" : "TP|VFD"; 1768 1769 /* For mid=1 just return "UCHE" as a catchall for everything else */ 1770 return "UCHE"; 1771 } 1772 } 1773 1774 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id) 1775 { 1776 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1777 1778 if (id == 0) 1779 return "CP"; 1780 else if (id == 4) 1781 return "CCU"; 1782 else if (id == 6) 1783 return "CDP Prefetch"; 1784 else if (id == 7) 1785 return "GMU"; 1786 else if (id == 5 && adreno_is_a7xx(adreno_gpu)) 1787 return "Flag cache"; 1788 1789 return a6xx_uche_fault_block(gpu, id); 1790 } 1791 1792 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 1793 { 1794 struct msm_gpu *gpu = arg; 1795 struct adreno_smmu_fault_info *info = data; 1796 const char *block = "unknown"; 1797 1798 u32 scratch[] = { 1799 gpu_read(gpu, REG_A6XX_CP_SCRATCH(4)), 1800 gpu_read(gpu, REG_A6XX_CP_SCRATCH(5)), 1801 gpu_read(gpu, REG_A6XX_CP_SCRATCH(6)), 1802 gpu_read(gpu, REG_A6XX_CP_SCRATCH(7)), 1803 }; 1804 1805 if (info) 1806 block = a6xx_fault_block(gpu, info->fsynr1 & 0xff); 1807 1808 return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 1809 } 1810 1811 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) 1812 { 1813 u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS); 1814 1815 if (status & A6XX_CP_INT_CP_OPCODE_ERROR) { 1816 u32 val; 1817 1818 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1); 1819 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA); 1820 dev_err_ratelimited(&gpu->pdev->dev, 1821 "CP | opcode error | possible opcode=0x%8.8X\n", 1822 val); 1823 } 1824 1825 if (status & A6XX_CP_INT_CP_UCODE_ERROR) 1826 dev_err_ratelimited(&gpu->pdev->dev, 1827 "CP ucode error interrupt\n"); 1828 1829 if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR) 1830 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n", 1831 gpu_read(gpu, REG_A6XX_CP_HW_FAULT)); 1832 1833 if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 1834 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS); 1835 1836 dev_err_ratelimited(&gpu->pdev->dev, 1837 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 1838 val & (1 << 20) ? "READ" : "WRITE", 1839 (val & 0x3ffff), val); 1840 } 1841 1842 if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu))) 1843 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n"); 1844 1845 if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR) 1846 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n"); 1847 1848 if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR) 1849 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n"); 1850 1851 } 1852 1853 static void a6xx_fault_detect_irq(struct msm_gpu *gpu) 1854 { 1855 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1856 1857 /* 1858 * If stalled on SMMU fault, we could trip the GPU's hang detection, 1859 * but the fault handler will trigger the devcore dump, and we want 1860 * to otherwise resume normally rather than killing the submit, so 1861 * just bail. 1862 */ 1863 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) 1864 return; 1865 1866 DRM_DEV_ERROR(&gpu->pdev->dev, 1867 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 1868 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 1869 gpu_read(gpu, REG_A6XX_RBBM_STATUS), 1870 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1871 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1872 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), 1873 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), 1874 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), 1875 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); 1876 1877 /* Turn off the hangcheck timer to keep it from bothering us */ 1878 timer_delete(&gpu->hangcheck_timer); 1879 1880 /* Turn off interrupts to avoid triggering recovery again */ 1881 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0); 1882 1883 kthread_queue_work(gpu->worker, &gpu->recover_work); 1884 } 1885 1886 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1887 { 1888 u32 status; 1889 1890 status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); 1891 gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); 1892 1893 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1894 1895 /* 1896 * Ignore FASTBLEND violations, because the HW will silently fall back 1897 * to legacy blending. 1898 */ 1899 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1900 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1901 timer_delete(&gpu->hangcheck_timer); 1902 1903 kthread_queue_work(gpu->worker, &gpu->recover_work); 1904 } 1905 } 1906 1907 static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on) 1908 { 1909 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1910 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1911 1912 if (adreno_has_gmu_wrapper(adreno_gpu)) 1913 return; 1914 1915 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on); 1916 } 1917 1918 static int irq_poll_fence(struct msm_gpu *gpu) 1919 { 1920 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1921 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1922 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1923 u32 status; 1924 1925 if (adreno_has_gmu_wrapper(adreno_gpu)) 1926 return 0; 1927 1928 if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) { 1929 u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS); 1930 1931 dev_err_ratelimited(&gpu->pdev->dev, 1932 "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n", 1933 status, rbbm_unmasked); 1934 return -ETIMEDOUT; 1935 } 1936 1937 return 0; 1938 } 1939 1940 static irqreturn_t a6xx_irq(struct msm_gpu *gpu) 1941 { 1942 struct msm_drm_private *priv = gpu->dev->dev_private; 1943 1944 /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */ 1945 a6xx_gpu_keepalive_vote(gpu, true); 1946 1947 if (irq_poll_fence(gpu)) 1948 goto done; 1949 1950 u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); 1951 1952 gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); 1953 1954 if (priv->disable_err_irq) 1955 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1956 1957 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1958 a6xx_fault_detect_irq(gpu); 1959 1960 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) 1961 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n"); 1962 1963 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1964 a6xx_cp_hw_err_irq(gpu); 1965 1966 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 1967 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 1968 1969 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1970 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 1971 1972 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1973 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 1974 1975 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1976 a7xx_sw_fuse_violation_irq(gpu); 1977 1978 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1979 msm_gpu_retire(gpu); 1980 a6xx_preempt_trigger(gpu); 1981 } 1982 1983 if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 1984 a6xx_preempt_irq(gpu); 1985 1986 done: 1987 a6xx_gpu_keepalive_vote(gpu, false); 1988 1989 return IRQ_HANDLED; 1990 } 1991 1992 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu) 1993 { 1994 llcc_slice_deactivate(a6xx_gpu->llc_slice); 1995 llcc_slice_deactivate(a6xx_gpu->htw_llc_slice); 1996 } 1997 1998 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1999 { 2000 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 2001 struct msm_gpu *gpu = &adreno_gpu->base; 2002 u32 cntl1_regval = 0; 2003 2004 if (IS_ERR(a6xx_gpu->llc_mmio)) 2005 return; 2006 2007 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 2008 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 2009 2010 gpu_scid &= 0x1f; 2011 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) | 2012 (gpu_scid << 15) | (gpu_scid << 20); 2013 2014 /* On A660, the SCID programming for UCHE traffic is done in 2015 * A6XX_GBIF_SCACHE_CNTL0[14:10] 2016 */ 2017 if (adreno_is_a660_family(adreno_gpu)) 2018 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | 2019 (1 << 8), (gpu_scid << 10) | (1 << 8)); 2020 } 2021 2022 /* 2023 * For targets with a MMU500, activate the slice but don't program the 2024 * register. The XBL will take care of that. 2025 */ 2026 if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { 2027 if (!a6xx_gpu->have_mmu500) { 2028 u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); 2029 2030 gpuhtw_scid &= 0x1f; 2031 cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); 2032 } 2033 } 2034 2035 if (!cntl1_regval) 2036 return; 2037 2038 /* 2039 * Program the slice IDs for the various GPU blocks and GPU MMU 2040 * pagetables 2041 */ 2042 if (!a6xx_gpu->have_mmu500) { 2043 a6xx_llc_write(a6xx_gpu, 2044 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); 2045 2046 /* 2047 * Program cacheability overrides to not allocate cache 2048 * lines on a write miss 2049 */ 2050 a6xx_llc_rmw(a6xx_gpu, 2051 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); 2052 return; 2053 } 2054 2055 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval); 2056 } 2057 2058 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 2059 { 2060 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 2061 struct msm_gpu *gpu = &adreno_gpu->base; 2062 2063 if (IS_ERR(a6xx_gpu->llc_mmio)) 2064 return; 2065 2066 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 2067 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 2068 2069 gpu_scid &= GENMASK(4, 0); 2070 2071 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 2072 FIELD_PREP(GENMASK(29, 25), gpu_scid) | 2073 FIELD_PREP(GENMASK(24, 20), gpu_scid) | 2074 FIELD_PREP(GENMASK(19, 15), gpu_scid) | 2075 FIELD_PREP(GENMASK(14, 10), gpu_scid) | 2076 FIELD_PREP(GENMASK(9, 5), gpu_scid) | 2077 FIELD_PREP(GENMASK(4, 0), gpu_scid)); 2078 2079 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 2080 FIELD_PREP(GENMASK(14, 10), gpu_scid) | 2081 BIT(8)); 2082 } 2083 2084 llcc_slice_activate(a6xx_gpu->htw_llc_slice); 2085 } 2086 2087 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) 2088 { 2089 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ 2090 if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) 2091 return; 2092 2093 llcc_slice_putd(a6xx_gpu->llc_slice); 2094 llcc_slice_putd(a6xx_gpu->htw_llc_slice); 2095 } 2096 2097 static void a6xx_llc_slices_init(struct platform_device *pdev, 2098 struct a6xx_gpu *a6xx_gpu, bool is_a7xx) 2099 { 2100 struct device_node *phandle; 2101 2102 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ 2103 if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) 2104 return; 2105 2106 /* 2107 * There is a different programming path for A6xx targets with an 2108 * mmu500 attached, so detect if that is the case 2109 */ 2110 phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); 2111 a6xx_gpu->have_mmu500 = (phandle && 2112 of_device_is_compatible(phandle, "arm,mmu-500")); 2113 of_node_put(phandle); 2114 2115 if (is_a7xx || !a6xx_gpu->have_mmu500) 2116 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem"); 2117 else 2118 a6xx_gpu->llc_mmio = NULL; 2119 2120 a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); 2121 a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); 2122 2123 if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) 2124 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); 2125 } 2126 2127 static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) 2128 { 2129 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 2130 struct msm_gpu *gpu = &adreno_gpu->base; 2131 u32 fuse_val; 2132 int ret; 2133 2134 if (adreno_is_a750(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) { 2135 /* 2136 * Assume that if qcom scm isn't available, that whatever 2137 * replacement allows writing the fuse register ourselves. 2138 * Users of alternative firmware need to make sure this 2139 * register is writeable or indicate that it's not somehow. 2140 * Print a warning because if you mess this up you're about to 2141 * crash horribly. 2142 */ 2143 if (!qcom_scm_is_available()) { 2144 dev_warn_once(gpu->dev->dev, 2145 "SCM is not available, poking fuse register\n"); 2146 a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, 2147 A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 2148 A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | 2149 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); 2150 adreno_gpu->has_ray_tracing = true; 2151 return 0; 2152 } 2153 2154 ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ | 2155 QCOM_SCM_GPU_TSENSE_EN_REQ); 2156 if (ret) 2157 return ret; 2158 2159 /* 2160 * On A7XX_GEN3 and newer, raytracing may be disabled by the 2161 * firmware, find out whether that's the case. The scm call 2162 * above sets the fuse register. 2163 */ 2164 fuse_val = a6xx_llc_read(a6xx_gpu, 2165 REG_A7XX_CX_MISC_SW_FUSE_VALUE); 2166 adreno_gpu->has_ray_tracing = 2167 !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); 2168 } else if (adreno_is_a740(adreno_gpu)) { 2169 /* Raytracing is always enabled on a740 */ 2170 adreno_gpu->has_ray_tracing = true; 2171 } 2172 2173 return 0; 2174 } 2175 2176 2177 #define GBIF_CLIENT_HALT_MASK BIT(0) 2178 #define GBIF_ARB_HALT_MASK BIT(1) 2179 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 2180 #define VBIF_RESET_ACK_MASK 0xF0 2181 #define GPR0_GBIF_HALT_REQUEST 0x1E0 2182 2183 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 2184 { 2185 struct msm_gpu *gpu = &adreno_gpu->base; 2186 2187 if (adreno_is_a619_holi(adreno_gpu)) { 2188 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST); 2189 spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) & 2190 (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK); 2191 } else if (!a6xx_has_gbif(adreno_gpu)) { 2192 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK); 2193 spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 2194 (VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK); 2195 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); 2196 2197 return; 2198 } 2199 2200 if (gx_off) { 2201 /* Halt the gx side of GBIF */ 2202 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1); 2203 spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1); 2204 } 2205 2206 /* Halt new client requests on GBIF */ 2207 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 2208 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 2209 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 2210 2211 /* Halt all AXI requests on GBIF */ 2212 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 2213 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 2214 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 2215 2216 /* The GBIF halt needs to be explicitly cleared */ 2217 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 2218 } 2219 2220 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert) 2221 { 2222 /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */ 2223 if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu))) 2224 return; 2225 2226 gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert); 2227 /* Perform a bogus read and add a brief delay to ensure ordering. */ 2228 gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD); 2229 udelay(1); 2230 2231 /* The reset line needs to be asserted for at least 100 us */ 2232 if (assert) 2233 udelay(100); 2234 } 2235 2236 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu) 2237 { 2238 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2239 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2240 int ret; 2241 2242 gpu->needs_hw_init = true; 2243 2244 trace_msm_gpu_resume(0); 2245 2246 mutex_lock(&a6xx_gpu->gmu.lock); 2247 ret = a6xx_gmu_resume(a6xx_gpu); 2248 mutex_unlock(&a6xx_gpu->gmu.lock); 2249 if (ret) 2250 return ret; 2251 2252 msm_devfreq_resume(gpu); 2253 2254 if (adreno_is_a8xx(adreno_gpu)) 2255 a8xx_llc_activate(a6xx_gpu); 2256 else if (adreno_is_a7xx(adreno_gpu)) 2257 a7xx_llc_activate(a6xx_gpu); 2258 else 2259 a6xx_llc_activate(a6xx_gpu); 2260 2261 return ret; 2262 } 2263 2264 static int a6xx_pm_resume(struct msm_gpu *gpu) 2265 { 2266 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2267 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2268 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 2269 unsigned long freq = gpu->fast_rate; 2270 struct dev_pm_opp *opp; 2271 int ret; 2272 2273 gpu->needs_hw_init = true; 2274 2275 trace_msm_gpu_resume(0); 2276 2277 mutex_lock(&a6xx_gpu->gmu.lock); 2278 2279 opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq); 2280 if (IS_ERR(opp)) { 2281 ret = PTR_ERR(opp); 2282 goto err_set_opp; 2283 } 2284 dev_pm_opp_put(opp); 2285 2286 /* Set the core clock and bus bw, having VDD scaling in mind */ 2287 dev_pm_opp_set_opp(&gpu->pdev->dev, opp); 2288 2289 pm_runtime_resume_and_get(gmu->dev); 2290 pm_runtime_resume_and_get(gmu->gxpd); 2291 2292 ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks); 2293 if (ret) 2294 goto err_bulk_clk; 2295 2296 ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks); 2297 if (ret) { 2298 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); 2299 goto err_bulk_clk; 2300 } 2301 2302 if (adreno_is_a619_holi(adreno_gpu)) 2303 a6xx_sptprac_enable(gmu); 2304 2305 /* If anything goes south, tear the GPU down piece by piece.. */ 2306 if (ret) { 2307 err_bulk_clk: 2308 pm_runtime_put(gmu->gxpd); 2309 pm_runtime_put(gmu->dev); 2310 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); 2311 } 2312 err_set_opp: 2313 mutex_unlock(&a6xx_gpu->gmu.lock); 2314 2315 if (!ret) { 2316 msm_devfreq_resume(gpu); 2317 a6xx_llc_activate(a6xx_gpu); 2318 } 2319 2320 return ret; 2321 } 2322 2323 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu) 2324 { 2325 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2326 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2327 int i, ret; 2328 2329 trace_msm_gpu_suspend(0); 2330 2331 a6xx_llc_deactivate(a6xx_gpu); 2332 2333 msm_devfreq_suspend(gpu); 2334 2335 mutex_lock(&a6xx_gpu->gmu.lock); 2336 ret = a6xx_gmu_stop(a6xx_gpu); 2337 mutex_unlock(&a6xx_gpu->gmu.lock); 2338 if (ret) 2339 return ret; 2340 2341 if (a6xx_gpu->shadow_bo) 2342 for (i = 0; i < gpu->nr_rings; i++) 2343 a6xx_gpu->shadow[i] = 0; 2344 2345 gpu->suspend_count++; 2346 2347 return 0; 2348 } 2349 2350 static int a6xx_pm_suspend(struct msm_gpu *gpu) 2351 { 2352 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2353 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2354 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 2355 int i; 2356 2357 trace_msm_gpu_suspend(0); 2358 2359 a6xx_llc_deactivate(a6xx_gpu); 2360 2361 msm_devfreq_suspend(gpu); 2362 2363 mutex_lock(&a6xx_gpu->gmu.lock); 2364 2365 /* Drain the outstanding traffic on memory buses */ 2366 adreno_gpu->funcs->bus_halt(adreno_gpu, true); 2367 2368 if (adreno_is_a619_holi(adreno_gpu)) 2369 a6xx_sptprac_disable(gmu); 2370 2371 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); 2372 clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks); 2373 2374 pm_runtime_put_sync(gmu->gxpd); 2375 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); 2376 pm_runtime_put_sync(gmu->dev); 2377 2378 mutex_unlock(&a6xx_gpu->gmu.lock); 2379 2380 if (a6xx_gpu->shadow_bo) 2381 for (i = 0; i < gpu->nr_rings; i++) 2382 a6xx_gpu->shadow[i] = 0; 2383 2384 gpu->suspend_count++; 2385 2386 return 0; 2387 } 2388 2389 static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 2390 { 2391 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2392 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2393 2394 *value = read_gmu_ao_counter(a6xx_gpu); 2395 2396 return 0; 2397 } 2398 2399 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 2400 { 2401 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); 2402 return 0; 2403 } 2404 2405 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu) 2406 { 2407 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2408 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2409 2410 return a6xx_gpu->cur_ring; 2411 } 2412 2413 static void a6xx_destroy(struct msm_gpu *gpu) 2414 { 2415 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2416 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2417 2418 if (a6xx_gpu->sqe_bo) { 2419 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm); 2420 drm_gem_object_put(a6xx_gpu->sqe_bo); 2421 } 2422 2423 if (a6xx_gpu->aqe_bo) { 2424 msm_gem_unpin_iova(a6xx_gpu->aqe_bo, gpu->vm); 2425 drm_gem_object_put(a6xx_gpu->aqe_bo); 2426 } 2427 2428 if (a6xx_gpu->shadow_bo) { 2429 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->vm); 2430 drm_gem_object_put(a6xx_gpu->shadow_bo); 2431 } 2432 2433 a6xx_llc_slices_destroy(a6xx_gpu); 2434 2435 a6xx_gmu_remove(a6xx_gpu); 2436 2437 adreno_gpu_cleanup(adreno_gpu); 2438 2439 kfree(a6xx_gpu); 2440 } 2441 2442 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 2443 { 2444 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2445 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2446 u64 busy_cycles; 2447 2448 /* 19.2MHz */ 2449 *out_sample_rate = 19200000; 2450 2451 busy_cycles = gmu_read64(&a6xx_gpu->gmu, 2452 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 2453 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 2454 2455 return busy_cycles; 2456 } 2457 2458 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, 2459 bool suspended) 2460 { 2461 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2462 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2463 2464 mutex_lock(&a6xx_gpu->gmu.lock); 2465 a6xx_gmu_set_freq(gpu, opp, suspended); 2466 mutex_unlock(&a6xx_gpu->gmu.lock); 2467 } 2468 2469 static struct drm_gpuvm * 2470 a6xx_create_vm(struct msm_gpu *gpu, struct platform_device *pdev) 2471 { 2472 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2473 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2474 unsigned long quirks = 0; 2475 2476 /* 2477 * This allows GPU to set the bus attributes required to use system 2478 * cache on behalf of the iommu page table walker. 2479 */ 2480 if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) && 2481 !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY)) 2482 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; 2483 2484 return adreno_iommu_create_vm(gpu, pdev, quirks); 2485 } 2486 2487 static struct drm_gpuvm * 2488 a6xx_create_private_vm(struct msm_gpu *gpu, bool kernel_managed) 2489 { 2490 struct msm_mmu *mmu; 2491 2492 mmu = msm_iommu_pagetable_create(to_msm_vm(gpu->vm)->mmu, kernel_managed); 2493 2494 if (IS_ERR(mmu)) 2495 return ERR_CAST(mmu); 2496 2497 return msm_gem_vm_create(gpu->dev, mmu, "gpu", ADRENO_VM_START, 2498 adreno_private_vm_size(gpu), kernel_managed); 2499 } 2500 2501 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 2502 { 2503 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2504 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2505 2506 if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) 2507 return a6xx_gpu->shadow[ring->id]; 2508 2509 /* 2510 * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware 2511 * without 'whereami' support 2512 */ 2513 WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC), 2514 "Can't read CP_RB_RPTR register reliably\n"); 2515 2516 return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); 2517 } 2518 2519 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 2520 { 2521 struct msm_cp_state cp_state; 2522 bool progress; 2523 2524 /* 2525 * With IFPC, KMD doesn't know whether GX power domain is collapsed 2526 * or not. So, we can't blindly read the below registers in GX domain. 2527 * Lets trust the hang detection in HW and lie to the caller that 2528 * there was progress. 2529 */ 2530 if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC) 2531 return true; 2532 2533 cp_state = (struct msm_cp_state) { 2534 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), 2535 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), 2536 .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), 2537 .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), 2538 }; 2539 2540 /* 2541 * Adjust the remaining data to account for what has already been 2542 * fetched from memory, but not yet consumed by the SQE. 2543 * 2544 * This is not *technically* correct, the amount buffered could 2545 * exceed the IB size due to hw prefetching ahead, but: 2546 * 2547 * (1) We aren't trying to find the exact position, just whether 2548 * progress has been made 2549 * (2) The CP_REG_TO_MEM at the end of a submit should be enough 2550 * to prevent prefetching into an unrelated submit. (And 2551 * either way, at some point the ROQ will be full.) 2552 */ 2553 cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16; 2554 cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16; 2555 2556 progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state)); 2557 2558 ring->last_cp_state = cp_state; 2559 2560 return progress; 2561 } 2562 2563 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse) 2564 { 2565 if (!info->speedbins) 2566 return UINT_MAX; 2567 2568 for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++) 2569 if (info->speedbins[i].fuse == fuse) 2570 return BIT(info->speedbins[i].speedbin); 2571 2572 return UINT_MAX; 2573 } 2574 2575 static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *info) 2576 { 2577 u32 supp_hw; 2578 u32 speedbin; 2579 int ret; 2580 2581 ret = adreno_read_speedbin(dev, &speedbin); 2582 /* 2583 * -ENOENT means that the platform doesn't support speedbin which is 2584 * fine 2585 */ 2586 if (ret == -ENOENT) { 2587 return 0; 2588 } else if (ret) { 2589 dev_err_probe(dev, ret, 2590 "failed to read speed-bin. Some OPPs may not be supported by hardware\n"); 2591 return ret; 2592 } 2593 2594 supp_hw = fuse_to_supp_hw(info, speedbin); 2595 2596 if (supp_hw == UINT_MAX) { 2597 DRM_DEV_ERROR(dev, 2598 "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n", 2599 speedbin); 2600 supp_hw = BIT(0); /* Default */ 2601 } 2602 2603 ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); 2604 if (ret) 2605 return ret; 2606 2607 return 0; 2608 } 2609 2610 static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) 2611 { 2612 struct msm_drm_private *priv = dev->dev_private; 2613 struct platform_device *pdev = priv->gpu_pdev; 2614 struct adreno_platform_config *config = pdev->dev.platform_data; 2615 struct device_node *node; 2616 struct a6xx_gpu *a6xx_gpu; 2617 struct adreno_gpu *adreno_gpu; 2618 struct msm_gpu *gpu; 2619 extern int enable_preemption; 2620 bool is_a7xx; 2621 int ret, nr_rings = 1; 2622 2623 a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL); 2624 if (!a6xx_gpu) 2625 return ERR_PTR(-ENOMEM); 2626 2627 adreno_gpu = &a6xx_gpu->base; 2628 gpu = &adreno_gpu->base; 2629 2630 mutex_init(&a6xx_gpu->gmu.lock); 2631 2632 adreno_gpu->registers = NULL; 2633 2634 /* Check if there is a GMU phandle and set it up */ 2635 node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); 2636 /* FIXME: How do we gracefully handle this? */ 2637 BUG_ON(!node); 2638 2639 adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper"); 2640 2641 adreno_gpu->base.hw_apriv = 2642 !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV); 2643 2644 /* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */ 2645 is_a7xx = config->info->family >= ADRENO_7XX_GEN1; 2646 2647 a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx); 2648 2649 ret = a6xx_set_supported_hw(&pdev->dev, config->info); 2650 if (ret) { 2651 a6xx_llc_slices_destroy(a6xx_gpu); 2652 kfree(a6xx_gpu); 2653 return ERR_PTR(ret); 2654 } 2655 2656 if ((enable_preemption == 1) || (enable_preemption == -1 && 2657 (config->info->quirks & ADRENO_QUIRK_PREEMPTION))) 2658 nr_rings = 4; 2659 2660 ret = adreno_gpu_init(dev, pdev, adreno_gpu, config->info->funcs, nr_rings); 2661 if (ret) { 2662 a6xx_destroy(&(a6xx_gpu->base.base)); 2663 return ERR_PTR(ret); 2664 } 2665 2666 /* 2667 * For now only clamp to idle freq for devices where this is known not 2668 * to cause power supply issues: 2669 */ 2670 if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu)) 2671 priv->gpu_clamp_to_idle = true; 2672 2673 if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) 2674 ret = a6xx_gmu_wrapper_init(a6xx_gpu, node); 2675 else 2676 ret = a6xx_gmu_init(a6xx_gpu, node); 2677 of_node_put(node); 2678 if (ret) { 2679 a6xx_destroy(&(a6xx_gpu->base.base)); 2680 return ERR_PTR(ret); 2681 } 2682 2683 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) { 2684 ret = a7xx_cx_mem_init(a6xx_gpu); 2685 if (ret) { 2686 a6xx_destroy(&(a6xx_gpu->base.base)); 2687 return ERR_PTR(ret); 2688 } 2689 } 2690 2691 adreno_gpu->uche_trap_base = 0x1fffffffff000ull; 2692 2693 msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu, 2694 adreno_gpu->funcs->mmu_fault_handler); 2695 2696 ret = a6xx_calc_ubwc_config(adreno_gpu); 2697 if (ret) { 2698 a6xx_destroy(&(a6xx_gpu->base.base)); 2699 return ERR_PTR(ret); 2700 } 2701 2702 /* Set up the preemption specific bits and pieces for each ringbuffer */ 2703 a6xx_preempt_init(gpu); 2704 2705 return gpu; 2706 } 2707 2708 const struct adreno_gpu_funcs a6xx_gpu_funcs = { 2709 .base = { 2710 .get_param = adreno_get_param, 2711 .set_param = adreno_set_param, 2712 .hw_init = a6xx_hw_init, 2713 .ucode_load = a6xx_ucode_load, 2714 .pm_suspend = a6xx_gmu_pm_suspend, 2715 .pm_resume = a6xx_gmu_pm_resume, 2716 .recover = a6xx_recover, 2717 .submit = a6xx_submit, 2718 .active_ring = a6xx_active_ring, 2719 .irq = a6xx_irq, 2720 .destroy = a6xx_destroy, 2721 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2722 .show = a6xx_show, 2723 #endif 2724 .gpu_busy = a6xx_gpu_busy, 2725 .gpu_get_freq = a6xx_gmu_get_freq, 2726 .gpu_set_freq = a6xx_gpu_set_freq, 2727 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2728 .gpu_state_get = a6xx_gpu_state_get, 2729 .gpu_state_put = a6xx_gpu_state_put, 2730 #endif 2731 .create_vm = a6xx_create_vm, 2732 .create_private_vm = a6xx_create_private_vm, 2733 .get_rptr = a6xx_get_rptr, 2734 .progress = a6xx_progress, 2735 }, 2736 .init = a6xx_gpu_init, 2737 .get_timestamp = a6xx_gmu_get_timestamp, 2738 .bus_halt = a6xx_bus_clear_pending_transactions, 2739 .mmu_fault_handler = a6xx_fault_handler, 2740 }; 2741 2742 const struct adreno_gpu_funcs a6xx_gmuwrapper_funcs = { 2743 .base = { 2744 .get_param = adreno_get_param, 2745 .set_param = adreno_set_param, 2746 .hw_init = a6xx_hw_init, 2747 .ucode_load = a6xx_ucode_load, 2748 .pm_suspend = a6xx_pm_suspend, 2749 .pm_resume = a6xx_pm_resume, 2750 .recover = a6xx_recover, 2751 .submit = a6xx_submit, 2752 .active_ring = a6xx_active_ring, 2753 .irq = a6xx_irq, 2754 .destroy = a6xx_destroy, 2755 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2756 .show = a6xx_show, 2757 #endif 2758 .gpu_busy = a6xx_gpu_busy, 2759 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2760 .gpu_state_get = a6xx_gpu_state_get, 2761 .gpu_state_put = a6xx_gpu_state_put, 2762 #endif 2763 .create_vm = a6xx_create_vm, 2764 .create_private_vm = a6xx_create_private_vm, 2765 .get_rptr = a6xx_get_rptr, 2766 .progress = a6xx_progress, 2767 }, 2768 .init = a6xx_gpu_init, 2769 .get_timestamp = a6xx_get_timestamp, 2770 .bus_halt = a6xx_bus_clear_pending_transactions, 2771 .mmu_fault_handler = a6xx_fault_handler, 2772 }; 2773 2774 const struct adreno_gpu_funcs a7xx_gpu_funcs = { 2775 .base = { 2776 .get_param = adreno_get_param, 2777 .set_param = adreno_set_param, 2778 .hw_init = a6xx_hw_init, 2779 .ucode_load = a6xx_ucode_load, 2780 .pm_suspend = a6xx_gmu_pm_suspend, 2781 .pm_resume = a6xx_gmu_pm_resume, 2782 .recover = a6xx_recover, 2783 .submit = a7xx_submit, 2784 .active_ring = a6xx_active_ring, 2785 .irq = a6xx_irq, 2786 .destroy = a6xx_destroy, 2787 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2788 .show = a6xx_show, 2789 #endif 2790 .gpu_busy = a6xx_gpu_busy, 2791 .gpu_get_freq = a6xx_gmu_get_freq, 2792 .gpu_set_freq = a6xx_gpu_set_freq, 2793 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2794 .gpu_state_get = a6xx_gpu_state_get, 2795 .gpu_state_put = a6xx_gpu_state_put, 2796 #endif 2797 .create_vm = a6xx_create_vm, 2798 .create_private_vm = a6xx_create_private_vm, 2799 .get_rptr = a6xx_get_rptr, 2800 .progress = a6xx_progress, 2801 }, 2802 .init = a6xx_gpu_init, 2803 .get_timestamp = a6xx_gmu_get_timestamp, 2804 .bus_halt = a6xx_bus_clear_pending_transactions, 2805 .mmu_fault_handler = a6xx_fault_handler, 2806 }; 2807 2808 const struct adreno_gpu_funcs a8xx_gpu_funcs = { 2809 .base = { 2810 .get_param = adreno_get_param, 2811 .set_param = adreno_set_param, 2812 .hw_init = a8xx_hw_init, 2813 .ucode_load = a6xx_ucode_load, 2814 .pm_suspend = a6xx_gmu_pm_suspend, 2815 .pm_resume = a6xx_gmu_pm_resume, 2816 .recover = a8xx_recover, 2817 .submit = a7xx_submit, 2818 .active_ring = a6xx_active_ring, 2819 .irq = a8xx_irq, 2820 .destroy = a6xx_destroy, 2821 .gpu_busy = a8xx_gpu_busy, 2822 .gpu_get_freq = a6xx_gmu_get_freq, 2823 .gpu_set_freq = a6xx_gpu_set_freq, 2824 .create_vm = a6xx_create_vm, 2825 .create_private_vm = a6xx_create_private_vm, 2826 .get_rptr = a6xx_get_rptr, 2827 .progress = a8xx_progress, 2828 }, 2829 .init = a6xx_gpu_init, 2830 .get_timestamp = a8xx_gmu_get_timestamp, 2831 .bus_halt = a8xx_bus_clear_pending_transactions, 2832 .mmu_fault_handler = a8xx_fault_handler, 2833 }; 2834