1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */ 3 4 5 #include "msm_gem.h" 6 #include "msm_mmu.h" 7 #include "msm_gpu_trace.h" 8 #include "a6xx_gpu.h" 9 #include "a6xx_gmu.xml.h" 10 11 #include <linux/bitfield.h> 12 #include <linux/devfreq.h> 13 #include <linux/firmware/qcom/qcom_scm.h> 14 #include <linux/pm_domain.h> 15 #include <linux/soc/qcom/llcc-qcom.h> 16 17 #define GPU_PAS_ID 13 18 19 static u64 read_gmu_ao_counter(struct a6xx_gpu *a6xx_gpu) 20 { 21 u64 count_hi, count_lo, temp; 22 23 do { 24 count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); 25 count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L); 26 temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); 27 } while (unlikely(count_hi != temp)); 28 29 return (count_hi << 32) | count_lo; 30 } 31 32 static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask) 33 { 34 /* Success if !writedropped0/1 */ 35 if (!(status & mask)) 36 return true; 37 38 udelay(10); 39 40 /* Try to update fenced register again */ 41 gpu_write(gpu, offset, value); 42 43 /* We can't do a posted write here because the power domain could be 44 * in collapse state. So use the heaviest barrier instead 45 */ 46 mb(); 47 return false; 48 } 49 50 static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask) 51 { 52 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 53 struct msm_gpu *gpu = &adreno_gpu->base; 54 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 55 u32 status; 56 57 gpu_write(gpu, offset, value); 58 59 /* Nothing else to be done in the case of no-GMU */ 60 if (adreno_has_gmu_wrapper(adreno_gpu)) 61 return 0; 62 63 /* We can't do a posted write here because the power domain could be 64 * in collapse state. So use the heaviest barrier instead 65 */ 66 mb(); 67 68 if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, 69 fence_status_check(gpu, offset, value, status, mask), 0, 1000)) 70 return 0; 71 72 /* Try again for another 1ms before failing */ 73 gpu_write(gpu, offset, value); 74 mb(); 75 76 if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, 77 fence_status_check(gpu, offset, value, status, mask), 0, 1000)) { 78 /* 79 * The 'delay' warning is here because the pause to print this 80 * warning will allow gpu to move to power collapse which 81 * defeats the purpose of continuous polling for 2 ms 82 */ 83 dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n", 84 offset); 85 return 0; 86 } 87 88 dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n", 89 offset); 90 91 return -ETIMEDOUT; 92 } 93 94 int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b) 95 { 96 int ret; 97 98 ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask); 99 if (ret) 100 return ret; 101 102 if (!is_64b) 103 return 0; 104 105 ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask); 106 107 return ret; 108 } 109 110 static inline bool _a6xx_check_idle(struct msm_gpu *gpu) 111 { 112 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 113 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 114 115 /* Check that the GMU is idle */ 116 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu)) 117 return false; 118 119 /* Check tha the CX master is idle */ 120 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) & 121 ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 122 return false; 123 124 return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) & 125 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 126 } 127 128 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 129 { 130 /* wait for CP to drain ringbuffer: */ 131 if (!adreno_idle(gpu, ring)) 132 return false; 133 134 if (spin_until(_a6xx_check_idle(gpu))) { 135 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 136 gpu->name, __builtin_return_address(0), 137 gpu_read(gpu, REG_A6XX_RBBM_STATUS), 138 gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS), 139 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 140 gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 141 return false; 142 } 143 144 return true; 145 } 146 147 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 148 { 149 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 150 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 151 152 /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */ 153 if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) { 154 OUT_PKT7(ring, CP_WHERE_AM_I, 2); 155 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring))); 156 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); 157 } 158 } 159 160 void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 161 { 162 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 163 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 164 uint32_t wptr; 165 unsigned long flags; 166 167 update_shadow_rptr(gpu, ring); 168 169 spin_lock_irqsave(&ring->preempt_lock, flags); 170 171 /* Copy the shadow to the actual register */ 172 ring->cur = ring->next; 173 174 /* Make sure to wrap wptr if we need to */ 175 wptr = get_wptr(ring); 176 177 /* Update HW if this is the current ring and we are not in preempt*/ 178 if (!a6xx_in_preempt(a6xx_gpu)) { 179 if (a6xx_gpu->cur_ring == ring) 180 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); 181 else 182 ring->restore_wptr = true; 183 } else { 184 ring->restore_wptr = true; 185 } 186 187 spin_unlock_irqrestore(&ring->preempt_lock, flags); 188 } 189 190 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, 191 u64 iova) 192 { 193 OUT_PKT7(ring, CP_REG_TO_MEM, 3); 194 OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) | 195 CP_REG_TO_MEM_0_CNT(2) | 196 CP_REG_TO_MEM_0_64B); 197 OUT_RING(ring, lower_32_bits(iova)); 198 OUT_RING(ring, upper_32_bits(iova)); 199 } 200 201 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, 202 struct msm_ringbuffer *ring, struct msm_gem_submit *submit) 203 { 204 bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; 205 struct msm_context *ctx = submit->queue->ctx; 206 struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx); 207 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 208 phys_addr_t ttbr; 209 u32 asid; 210 u64 memptr = rbmemptr(ring, ttbr0); 211 212 if (ctx->seqno == ring->cur_ctx_seqno) 213 return; 214 215 if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid)) 216 return; 217 218 if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) { 219 /* Wait for previous submit to complete before continuing: */ 220 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); 221 OUT_RING(ring, 0); 222 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 223 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 224 OUT_RING(ring, submit->seqno - 1); 225 226 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 227 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH); 228 229 /* Reset state used to synchronize BR and BV */ 230 OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); 231 OUT_RING(ring, 232 CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | 233 CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | 234 CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | 235 CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); 236 237 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 238 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH); 239 240 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 241 OUT_RING(ring, LRZ_FLUSH_INVALIDATE); 242 243 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 244 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 245 } 246 247 if (!sysprof) { 248 if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) { 249 /* Turn off protected mode to write to special registers */ 250 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 251 OUT_RING(ring, 0); 252 } 253 254 if (adreno_is_a8xx(adreno_gpu)) { 255 OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 256 OUT_RING(ring, 1); 257 OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1); 258 OUT_RING(ring, 1); 259 } else { 260 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 261 OUT_RING(ring, 1); 262 } 263 } 264 265 /* Execute the table update */ 266 OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4); 267 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr))); 268 269 OUT_RING(ring, 270 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) | 271 CP_SMMU_TABLE_UPDATE_1_ASID(asid)); 272 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0)); 273 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0)); 274 275 /* 276 * Write the new TTBR0 to the memstore. This is good for debugging. 277 * Needed for preemption 278 */ 279 OUT_PKT7(ring, CP_MEM_WRITE, 5); 280 OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr))); 281 OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr))); 282 OUT_RING(ring, lower_32_bits(ttbr)); 283 OUT_RING(ring, upper_32_bits(ttbr)); 284 OUT_RING(ring, ctx->seqno); 285 286 /* 287 * Sync both threads after switching pagetables and enable BR only 288 * to make sure BV doesn't race ahead while BR is still switching 289 * pagetables. 290 */ 291 if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) { 292 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 293 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 294 } 295 296 /* 297 * And finally, trigger a uche flush to be sure there isn't anything 298 * lingering in that part of the GPU 299 */ 300 301 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 302 OUT_RING(ring, CACHE_INVALIDATE); 303 304 if (!sysprof) { 305 u32 reg_status = adreno_is_a8xx(adreno_gpu) ? 306 REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS : 307 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS; 308 /* 309 * Wait for SRAM clear after the pgtable update, so the 310 * two can happen in parallel: 311 */ 312 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); 313 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); 314 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status)); 315 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0)); 316 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); 317 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); 318 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); 319 320 if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) { 321 /* Re-enable protected mode: */ 322 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 323 OUT_RING(ring, 1); 324 } 325 } 326 } 327 328 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 329 { 330 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 331 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 332 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 333 struct msm_ringbuffer *ring = submit->ring; 334 unsigned int i, ibs = 0; 335 336 adreno_check_and_reenable_stall(adreno_gpu); 337 338 a6xx_set_pagetable(a6xx_gpu, ring, submit); 339 340 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), 341 rbmemptr_stats(ring, index, cpcycles_start)); 342 343 /* 344 * For PM4 the GMU register offsets are calculated from the base of the 345 * GPU registers so we need to add 0x1a800 to the register value on A630 346 * to get the right value from PM4. 347 */ 348 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 349 rbmemptr_stats(ring, index, alwayson_start)); 350 351 /* Invalidate CCU depth and color */ 352 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 353 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH)); 354 355 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 356 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR)); 357 358 /* Submit the commands */ 359 for (i = 0; i < submit->nr_cmds; i++) { 360 switch (submit->cmd[i].type) { 361 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 362 break; 363 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 364 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) 365 break; 366 fallthrough; 367 case MSM_SUBMIT_CMD_BUF: 368 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 369 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 370 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 371 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 372 ibs++; 373 break; 374 } 375 376 /* 377 * Periodically update shadow-wptr if needed, so that we 378 * can see partial progress of submits with large # of 379 * cmds.. otherwise we could needlessly stall waiting for 380 * ringbuffer state, simply due to looking at a shadow 381 * rptr value that has not been updated 382 */ 383 if ((ibs % 32) == 0) 384 update_shadow_rptr(gpu, ring); 385 } 386 387 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), 388 rbmemptr_stats(ring, index, cpcycles_end)); 389 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 390 rbmemptr_stats(ring, index, alwayson_end)); 391 392 /* Write the fence to the scratch register */ 393 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); 394 OUT_RING(ring, submit->seqno); 395 396 /* 397 * Execute a CACHE_FLUSH_TS event. This will ensure that the 398 * timestamp is written to the memory and then triggers the interrupt 399 */ 400 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 401 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) | 402 CP_EVENT_WRITE_0_IRQ); 403 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 404 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 405 OUT_RING(ring, submit->seqno); 406 407 trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); 408 409 a6xx_flush(gpu, ring); 410 } 411 412 static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring, 413 struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue) 414 { 415 u64 preempt_postamble; 416 417 OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12); 418 419 OUT_RING(ring, SMMU_INFO); 420 /* don't save SMMU, we write the record from the kernel instead */ 421 OUT_RING(ring, 0); 422 OUT_RING(ring, 0); 423 424 /* privileged and non secure buffer save */ 425 OUT_RING(ring, NON_SECURE_SAVE_ADDR); 426 OUT_RING(ring, lower_32_bits( 427 a6xx_gpu->preempt_iova[ring->id])); 428 OUT_RING(ring, upper_32_bits( 429 a6xx_gpu->preempt_iova[ring->id])); 430 431 /* user context buffer save, seems to be unnused by fw */ 432 OUT_RING(ring, NON_PRIV_SAVE_ADDR); 433 OUT_RING(ring, 0); 434 OUT_RING(ring, 0); 435 436 OUT_RING(ring, COUNTER); 437 /* seems OK to set to 0 to disable it */ 438 OUT_RING(ring, 0); 439 OUT_RING(ring, 0); 440 441 /* Emit postamble to clear perfcounters */ 442 preempt_postamble = a6xx_gpu->preempt_postamble_iova; 443 444 OUT_PKT7(ring, CP_SET_AMBLE, 3); 445 OUT_RING(ring, lower_32_bits(preempt_postamble)); 446 OUT_RING(ring, upper_32_bits(preempt_postamble)); 447 OUT_RING(ring, CP_SET_AMBLE_2_DWORDS( 448 a6xx_gpu->preempt_postamble_len) | 449 CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE)); 450 } 451 452 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 453 { 454 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 455 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 456 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 457 struct msm_ringbuffer *ring = submit->ring; 458 u32 rbbm_perfctr_cp0, cp_always_on_counter; 459 unsigned int i, ibs = 0; 460 461 adreno_check_and_reenable_stall(adreno_gpu); 462 463 /* 464 * Toggle concurrent binning for pagetable switch and set the thread to 465 * BR since only it can execute the pagetable switch packets. 466 */ 467 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 468 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 469 470 a6xx_set_pagetable(a6xx_gpu, ring, submit); 471 472 /* 473 * If preemption is enabled, then set the pseudo register for the save 474 * sequence 475 */ 476 if (gpu->nr_rings > 1) 477 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); 478 479 if (adreno_is_a8xx(adreno_gpu)) { 480 rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0); 481 cp_always_on_counter = REG_A8XX_CP_ALWAYS_ON_COUNTER; 482 } else { 483 rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0); 484 cp_always_on_counter = REG_A6XX_CP_ALWAYS_ON_COUNTER; 485 } 486 487 get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start)); 488 get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_start)); 489 490 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 491 OUT_RING(ring, CP_SET_THREAD_BOTH); 492 493 OUT_PKT7(ring, CP_SET_MARKER, 1); 494 OUT_RING(ring, 0x101); /* IFPC disable */ 495 496 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { 497 OUT_PKT7(ring, CP_SET_MARKER, 1); 498 OUT_RING(ring, 0x00d); /* IB1LIST start */ 499 } 500 501 /* Submit the commands */ 502 for (i = 0; i < submit->nr_cmds; i++) { 503 switch (submit->cmd[i].type) { 504 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 505 break; 506 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 507 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) 508 break; 509 fallthrough; 510 case MSM_SUBMIT_CMD_BUF: 511 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 512 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 513 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 514 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 515 ibs++; 516 break; 517 } 518 519 /* 520 * Periodically update shadow-wptr if needed, so that we 521 * can see partial progress of submits with large # of 522 * cmds.. otherwise we could needlessly stall waiting for 523 * ringbuffer state, simply due to looking at a shadow 524 * rptr value that has not been updated 525 */ 526 if ((ibs % 32) == 0) 527 update_shadow_rptr(gpu, ring); 528 } 529 530 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { 531 OUT_PKT7(ring, CP_SET_MARKER, 1); 532 OUT_RING(ring, 0x00e); /* IB1LIST end */ 533 } 534 535 get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end)); 536 get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_end)); 537 538 /* Write the fence to the scratch register */ 539 if (adreno_is_a8xx(adreno_gpu)) { 540 OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1); 541 OUT_RING(ring, submit->seqno); 542 } else { 543 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); 544 OUT_RING(ring, submit->seqno); 545 } 546 547 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 548 OUT_RING(ring, CP_SET_THREAD_BR); 549 550 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 551 OUT_RING(ring, CCU_INVALIDATE_DEPTH); 552 553 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 554 OUT_RING(ring, CCU_INVALIDATE_COLOR); 555 556 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 557 OUT_RING(ring, CP_SET_THREAD_BV); 558 559 /* 560 * Make sure the timestamp is committed once BV pipe is 561 * completely done with this submission. 562 */ 563 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 564 OUT_RING(ring, CACHE_CLEAN | BIT(27)); 565 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence))); 566 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); 567 OUT_RING(ring, submit->seqno); 568 569 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 570 OUT_RING(ring, CP_SET_THREAD_BR); 571 572 /* 573 * This makes sure that BR doesn't race ahead and commit 574 * timestamp to memstore while BV is still processing 575 * this submission. 576 */ 577 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); 578 OUT_RING(ring, 0); 579 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence))); 580 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); 581 OUT_RING(ring, submit->seqno); 582 583 a6xx_gpu->last_seqno[ring->id] = submit->seqno; 584 585 /* write the ringbuffer timestamp */ 586 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 587 OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27)); 588 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 589 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 590 OUT_RING(ring, submit->seqno); 591 592 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 593 OUT_RING(ring, CP_SET_THREAD_BOTH); 594 595 OUT_PKT7(ring, CP_SET_MARKER, 1); 596 OUT_RING(ring, 0x100); /* IFPC enable */ 597 598 /* If preemption is enabled */ 599 if (gpu->nr_rings > 1) { 600 /* Yield the floor on command completion */ 601 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 602 603 /* 604 * If dword[2:1] are non zero, they specify an address for 605 * the CP to write the value of dword[3] to on preemption 606 * complete. Write 0 to skip the write 607 */ 608 OUT_RING(ring, 0x00); 609 OUT_RING(ring, 0x00); 610 /* Data value - not used if the address above is 0 */ 611 OUT_RING(ring, 0x01); 612 /* generate interrupt on preemption completion */ 613 OUT_RING(ring, 0x00); 614 } 615 616 617 trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); 618 619 a6xx_flush(gpu, ring); 620 621 /* Check to see if we need to start preemption */ 622 a6xx_preempt_trigger(gpu); 623 } 624 625 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) 626 { 627 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 628 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 629 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 630 const struct adreno_reglist *reg; 631 unsigned int i; 632 u32 cgc_delay, cgc_hyst; 633 u32 val, clock_cntl_on; 634 635 if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu))) 636 return; 637 638 if (adreno_is_a630(adreno_gpu)) 639 clock_cntl_on = 0x8aa8aa02; 640 else if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) 641 clock_cntl_on = 0xaaa8aa82; 642 else if (adreno_is_a702(adreno_gpu)) 643 clock_cntl_on = 0xaaaaaa82; 644 else 645 clock_cntl_on = 0x8aa8aa82; 646 647 if (adreno_is_a612(adreno_gpu)) 648 cgc_delay = 0x11; 649 else if (adreno_is_a615_family(adreno_gpu)) 650 cgc_delay = 0x111; 651 else 652 cgc_delay = 0x10111; 653 654 if (adreno_is_a612(adreno_gpu)) 655 cgc_hyst = 0x55; 656 else if (adreno_is_a615_family(adreno_gpu)) 657 cgc_hyst = 0x555; 658 else 659 cgc_hyst = 0x5555; 660 661 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 662 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 663 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 664 state ? cgc_delay : 0); 665 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 666 state ? cgc_hyst : 0); 667 668 if (!adreno_gpu->info->a6xx->hwcg) { 669 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 670 gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0); 671 672 if (state) { 673 gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1); 674 675 if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val, 676 val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 677 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 678 return; 679 } 680 681 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 682 } 683 684 return; 685 } 686 687 val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL); 688 689 /* Don't re-program the registers if they are already correct */ 690 if ((!state && !val) || (state && (val == clock_cntl_on))) 691 return; 692 693 /* Disable SP clock before programming HWCG registers */ 694 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) 695 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); 696 697 for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++) 698 gpu_write(gpu, reg->offset, state ? reg->value : 0); 699 700 /* Enable SP clock */ 701 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) 702 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); 703 704 gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0); 705 } 706 707 static void a6xx_set_cp_protect(struct msm_gpu *gpu) 708 { 709 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 710 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 711 unsigned i; 712 713 /* 714 * Enable access protection to privileged registers, fault on an access 715 * protect violation and select the last span to protect from the start 716 * address all the way to the end of the register address space 717 */ 718 gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 719 A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN | 720 A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN | 721 A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE); 722 723 for (i = 0; i < protect->count - 1; i++) { 724 /* Intentionally skip writing to some registers */ 725 if (protect->regs[i]) 726 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]); 727 } 728 /* last CP_PROTECT to have "infinite" length on the last entry */ 729 gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]); 730 } 731 732 static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu) 733 { 734 const struct qcom_ubwc_cfg_data *common_cfg; 735 struct qcom_ubwc_cfg_data *cfg = &gpu->_ubwc_config; 736 737 /* Inherit the common config and make some necessary fixups */ 738 common_cfg = qcom_ubwc_config_get_data(); 739 if (IS_ERR(common_cfg)) 740 return PTR_ERR(common_cfg); 741 742 /* Copy the data into the internal struct to drop the const qualifier (temporarily) */ 743 *cfg = *common_cfg; 744 745 /* Use common config as is for A8x */ 746 if (!adreno_is_a8xx(gpu)) { 747 cfg->ubwc_swizzle = 0x6; 748 cfg->highest_bank_bit = 15; 749 } 750 751 if (adreno_is_a610(gpu)) { 752 cfg->highest_bank_bit = 13; 753 cfg->ubwc_swizzle = 0x7; 754 } 755 756 if (adreno_is_a612(gpu)) 757 cfg->highest_bank_bit = 14; 758 759 if (adreno_is_a618(gpu)) 760 cfg->highest_bank_bit = 14; 761 762 if (adreno_is_a619(gpu)) 763 /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */ 764 cfg->highest_bank_bit = 13; 765 766 if (adreno_is_a619_holi(gpu)) 767 cfg->highest_bank_bit = 13; 768 769 if (adreno_is_a621(gpu)) 770 cfg->highest_bank_bit = 13; 771 772 if (adreno_is_a623(gpu)) 773 cfg->highest_bank_bit = 16; 774 775 if (adreno_is_a650(gpu) || 776 adreno_is_a660(gpu) || 777 adreno_is_a690(gpu) || 778 adreno_is_a730(gpu) || 779 adreno_is_a740_family(gpu)) { 780 /* TODO: get ddr type from bootloader and use 15 for LPDDR4 */ 781 cfg->highest_bank_bit = 16; 782 } 783 784 if (adreno_is_a663(gpu)) { 785 cfg->highest_bank_bit = 13; 786 cfg->ubwc_swizzle = 0x4; 787 } 788 789 if (adreno_is_7c3(gpu)) 790 cfg->highest_bank_bit = 14; 791 792 if (adreno_is_a702(gpu)) 793 cfg->highest_bank_bit = 14; 794 795 if (cfg->highest_bank_bit != common_cfg->highest_bank_bit) 796 DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n", 797 cfg->highest_bank_bit, common_cfg->highest_bank_bit); 798 799 if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle) 800 DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n", 801 cfg->ubwc_swizzle, common_cfg->ubwc_swizzle); 802 803 gpu->ubwc_config = &gpu->_ubwc_config; 804 805 return 0; 806 } 807 808 static void a6xx_set_ubwc_config(struct msm_gpu *gpu) 809 { 810 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 811 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; 812 /* 813 * We subtract 13 from the highest bank bit (13 is the minimum value 814 * allowed by hw) and write the lowest two bits of the remaining value 815 * as hbb_lo and the one above it as hbb_hi to the hardware. 816 */ 817 BUG_ON(cfg->highest_bank_bit < 13); 818 u32 hbb = cfg->highest_bank_bit - 13; 819 bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0; 820 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); 821 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); 822 bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0; 823 bool min_acc_len_64b = false; 824 u8 uavflagprd_inv = 0; 825 u32 hbb_hi = hbb >> 2; 826 u32 hbb_lo = hbb & 3; 827 828 if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu)) 829 uavflagprd_inv = 2; 830 831 if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) 832 min_acc_len_64b = true; 833 834 gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 835 level2_swizzling_dis << 12 | 836 rgb565_predicator << 11 | 837 hbb_hi << 10 | amsbc << 4 | 838 min_acc_len_64b << 3 | 839 hbb_lo << 1 | ubwc_mode); 840 841 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 842 level2_swizzling_dis << 6 | hbb_hi << 4 | 843 min_acc_len_64b << 3 | 844 hbb_lo << 1 | ubwc_mode); 845 846 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 847 level2_swizzling_dis << 12 | hbb_hi << 10 | 848 uavflagprd_inv << 4 | 849 min_acc_len_64b << 3 | 850 hbb_lo << 1 | ubwc_mode); 851 852 if (adreno_is_a7xx(adreno_gpu)) { 853 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) { 854 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST, 855 A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id)); 856 gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL, 857 FIELD_PREP(GENMASK(8, 5), hbb_lo)); 858 } 859 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST, 860 A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE)); 861 } 862 863 gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 864 min_acc_len_64b << 23 | hbb_lo << 21); 865 866 gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL, 867 cfg->macrotile_mode); 868 } 869 870 static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) 871 { 872 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 873 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 874 const struct adreno_reglist_list *reglist; 875 const struct adreno_reglist_pipe_list *dyn_pwrup_reglist; 876 void *ptr = a6xx_gpu->pwrup_reglist_ptr; 877 struct cpu_gpu_lock *lock = ptr; 878 u32 *dest = (u32 *)&lock->regs[0]; 879 u32 dyn_pwrup_reglist_count = 0; 880 int i; 881 882 lock->gpu_req = lock->cpu_req = lock->turn = 0; 883 884 reglist = adreno_gpu->info->a6xx->ifpc_reglist; 885 if (reglist) { 886 lock->ifpc_list_len = reglist->count; 887 888 /* 889 * For each entry in each of the lists, write the offset and the current 890 * register value into the GPU buffer 891 */ 892 for (i = 0; i < reglist->count; i++) { 893 *dest++ = reglist->regs[i]; 894 *dest++ = gpu_read(gpu, reglist->regs[i]); 895 } 896 } 897 898 reglist = adreno_gpu->info->a6xx->pwrup_reglist; 899 lock->preemption_list_len = reglist->count; 900 901 for (i = 0; i < reglist->count; i++) { 902 *dest++ = reglist->regs[i]; 903 *dest++ = gpu_read(gpu, reglist->regs[i]); 904 } 905 906 /* 907 * The overall register list is composed of 908 * 1. Static IFPC-only registers 909 * 2. Static IFPC + preemption registers 910 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) 911 * 912 * The first two lists are static. Size of these lists are stored as 913 * number of pairs in ifpc_list_len and preemption_list_len 914 * respectively. With concurrent binning, Some of the perfcounter 915 * registers being virtualized, CP needs to know the pipe id to program 916 * the aperture inorder to restore the same. Thus, third list is a 917 * dynamic list with triplets as 918 * (<aperture, shifted 12 bits> <address> <data>), and the length is 919 * stored as number for triplets in dynamic_list_len. 920 */ 921 dyn_pwrup_reglist = adreno_gpu->info->a6xx->dyn_pwrup_reglist; 922 if (dyn_pwrup_reglist) { 923 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) { 924 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST, 925 A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id)); 926 for (i = 0; i < dyn_pwrup_reglist->count; i++) { 927 if ((dyn_pwrup_reglist->regs[i].pipe & BIT(pipe_id)) == 0) 928 continue; 929 *dest++ = A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id); 930 *dest++ = dyn_pwrup_reglist->regs[i].offset; 931 *dest++ = gpu_read(gpu, dyn_pwrup_reglist->regs[i].offset); 932 dyn_pwrup_reglist_count++; 933 } 934 } 935 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST, 936 A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE)); 937 } 938 lock->dynamic_list_len = dyn_pwrup_reglist_count; 939 } 940 941 static int a7xx_preempt_start(struct msm_gpu *gpu) 942 { 943 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 944 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 945 struct msm_ringbuffer *ring = gpu->rb[0]; 946 947 if (gpu->nr_rings <= 1) 948 return 0; 949 950 /* Turn CP protection off */ 951 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 952 OUT_RING(ring, 0); 953 954 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); 955 956 /* Yield the floor on command completion */ 957 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 958 OUT_RING(ring, 0x00); 959 OUT_RING(ring, 0x00); 960 OUT_RING(ring, 0x00); 961 /* Generate interrupt on preemption completion */ 962 OUT_RING(ring, 0x00); 963 964 a6xx_flush(gpu, ring); 965 966 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 967 } 968 969 static int a6xx_cp_init(struct msm_gpu *gpu) 970 { 971 struct msm_ringbuffer *ring = gpu->rb[0]; 972 973 OUT_PKT7(ring, CP_ME_INIT, 8); 974 975 OUT_RING(ring, 0x0000002f); 976 977 /* Enable multiple hardware contexts */ 978 OUT_RING(ring, 0x00000003); 979 980 /* Enable error detection */ 981 OUT_RING(ring, 0x20000000); 982 983 /* Don't enable header dump */ 984 OUT_RING(ring, 0x00000000); 985 OUT_RING(ring, 0x00000000); 986 987 /* No workarounds enabled */ 988 OUT_RING(ring, 0x00000000); 989 990 /* Pad rest of the cmds with 0's */ 991 OUT_RING(ring, 0x00000000); 992 OUT_RING(ring, 0x00000000); 993 994 a6xx_flush(gpu, ring); 995 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 996 } 997 998 static int a7xx_cp_init(struct msm_gpu *gpu) 999 { 1000 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1001 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1002 struct msm_ringbuffer *ring = gpu->rb[0]; 1003 u32 mask; 1004 1005 /* Disable concurrent binning before sending CP init */ 1006 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 1007 OUT_RING(ring, BIT(27)); 1008 1009 OUT_PKT7(ring, CP_ME_INIT, 7); 1010 1011 /* Use multiple HW contexts */ 1012 mask = BIT(0); 1013 1014 /* Enable error detection */ 1015 mask |= BIT(1); 1016 1017 /* Set default reset state */ 1018 mask |= BIT(3); 1019 1020 /* Disable save/restore of performance counters across preemption */ 1021 mask |= BIT(6); 1022 1023 /* Enable the register init list with the spinlock */ 1024 mask |= BIT(8); 1025 1026 OUT_RING(ring, mask); 1027 1028 /* Enable multiple hardware contexts */ 1029 OUT_RING(ring, 0x00000003); 1030 1031 /* Enable error detection */ 1032 OUT_RING(ring, 0x20000000); 1033 1034 /* Operation mode mask */ 1035 OUT_RING(ring, 0x00000002); 1036 1037 /* *Don't* send a power up reg list for concurrent binning (TODO) */ 1038 /* Lo address */ 1039 OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); 1040 /* Hi address */ 1041 OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); 1042 /* BIT(31) set => read the regs from the list */ 1043 OUT_RING(ring, BIT(31)); 1044 1045 a6xx_flush(gpu, ring); 1046 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 1047 } 1048 1049 /* 1050 * Check that the microcode version is new enough to include several key 1051 * security fixes. Return true if the ucode is safe. 1052 */ 1053 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, 1054 struct drm_gem_object *obj) 1055 { 1056 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1057 struct msm_gpu *gpu = &adreno_gpu->base; 1058 const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE]; 1059 u32 *buf = msm_gem_get_vaddr(obj); 1060 bool ret = false; 1061 1062 if (IS_ERR(buf)) 1063 return false; 1064 1065 /* A7xx is safe! */ 1066 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) 1067 return true; 1068 1069 /* 1070 * Targets up to a640 (a618, a630 and a640) need to check for a 1071 * microcode version that is patched to support the whereami opcode or 1072 * one that is new enough to include it by default. 1073 * 1074 * a650 tier targets don't need whereami but still need to be 1075 * equal to or newer than 0.95 for other security fixes 1076 * 1077 * a660 targets have all the critical security fixes from the start 1078 */ 1079 if (!strcmp(sqe_name, "a630_sqe.fw")) { 1080 /* 1081 * If the lowest nibble is 0xa that is an indication that this 1082 * microcode has been patched. The actual version is in dword 1083 * [3] but we only care about the patchlevel which is the lowest 1084 * nibble of dword [3] 1085 * 1086 * Otherwise check that the firmware is greater than or equal 1087 * to 1.90 which was the first version that had this fix built 1088 * in 1089 */ 1090 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) || 1091 (buf[0] & 0xfff) >= 0x190) { 1092 a6xx_gpu->has_whereami = true; 1093 ret = true; 1094 goto out; 1095 } 1096 1097 DRM_DEV_ERROR(&gpu->pdev->dev, 1098 "a630 SQE ucode is too old. Have version %x need at least %x\n", 1099 buf[0] & 0xfff, 0x190); 1100 } else if (!strcmp(sqe_name, "a650_sqe.fw")) { 1101 if ((buf[0] & 0xfff) >= 0x095) { 1102 ret = true; 1103 goto out; 1104 } 1105 1106 DRM_DEV_ERROR(&gpu->pdev->dev, 1107 "a650 SQE ucode is too old. Have version %x need at least %x\n", 1108 buf[0] & 0xfff, 0x095); 1109 } else if (!strcmp(sqe_name, "a660_sqe.fw")) { 1110 ret = true; 1111 } else { 1112 DRM_DEV_ERROR(&gpu->pdev->dev, 1113 "unknown GPU, add it to a6xx_ucode_check_version()!!\n"); 1114 } 1115 out: 1116 msm_gem_put_vaddr(obj); 1117 return ret; 1118 } 1119 1120 static int a6xx_ucode_load(struct msm_gpu *gpu) 1121 { 1122 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1123 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1124 1125 if (!a6xx_gpu->sqe_bo) { 1126 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu, 1127 adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova); 1128 1129 if (IS_ERR(a6xx_gpu->sqe_bo)) { 1130 int ret = PTR_ERR(a6xx_gpu->sqe_bo); 1131 1132 a6xx_gpu->sqe_bo = NULL; 1133 DRM_DEV_ERROR(&gpu->pdev->dev, 1134 "Could not allocate SQE ucode: %d\n", ret); 1135 1136 return ret; 1137 } 1138 1139 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw"); 1140 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) { 1141 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm); 1142 drm_gem_object_put(a6xx_gpu->sqe_bo); 1143 1144 a6xx_gpu->sqe_bo = NULL; 1145 return -EPERM; 1146 } 1147 } 1148 1149 if (!a6xx_gpu->aqe_bo && adreno_gpu->fw[ADRENO_FW_AQE]) { 1150 a6xx_gpu->aqe_bo = adreno_fw_create_bo(gpu, 1151 adreno_gpu->fw[ADRENO_FW_AQE], &a6xx_gpu->aqe_iova); 1152 1153 if (IS_ERR(a6xx_gpu->aqe_bo)) { 1154 int ret = PTR_ERR(a6xx_gpu->aqe_bo); 1155 1156 a6xx_gpu->aqe_bo = NULL; 1157 DRM_DEV_ERROR(&gpu->pdev->dev, 1158 "Could not allocate AQE ucode: %d\n", ret); 1159 1160 return ret; 1161 } 1162 1163 msm_gem_object_set_name(a6xx_gpu->aqe_bo, "aqefw"); 1164 } 1165 1166 /* 1167 * Expanded APRIV and targets that support WHERE_AM_I both need a 1168 * privileged buffer to store the RPTR shadow 1169 */ 1170 if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) && 1171 !a6xx_gpu->shadow_bo) { 1172 a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, 1173 sizeof(u32) * gpu->nr_rings, 1174 MSM_BO_WC | MSM_BO_MAP_PRIV, 1175 gpu->vm, &a6xx_gpu->shadow_bo, 1176 &a6xx_gpu->shadow_iova); 1177 1178 if (IS_ERR(a6xx_gpu->shadow)) 1179 return PTR_ERR(a6xx_gpu->shadow); 1180 1181 msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow"); 1182 } 1183 1184 a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE, 1185 MSM_BO_WC | MSM_BO_MAP_PRIV, 1186 gpu->vm, &a6xx_gpu->pwrup_reglist_bo, 1187 &a6xx_gpu->pwrup_reglist_iova); 1188 1189 if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr)) 1190 return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr); 1191 1192 msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist"); 1193 1194 return 0; 1195 } 1196 1197 int a6xx_zap_shader_init(struct msm_gpu *gpu) 1198 { 1199 static bool loaded; 1200 int ret; 1201 1202 if (loaded) 1203 return 0; 1204 1205 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID); 1206 1207 loaded = !ret; 1208 return ret; 1209 } 1210 1211 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 1212 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 1213 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 1214 A6XX_RBBM_INT_0_MASK_CP_IB2 | \ 1215 A6XX_RBBM_INT_0_MASK_CP_IB1 | \ 1216 A6XX_RBBM_INT_0_MASK_CP_RB | \ 1217 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 1218 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 1219 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 1220 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 1221 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1222 1223 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 1224 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 1225 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 1226 A6XX_RBBM_INT_0_MASK_CP_SW | \ 1227 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 1228 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 1229 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 1230 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 1231 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 1232 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 1233 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 1234 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 1235 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 1236 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1237 1238 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ 1239 A6XX_CP_APRIV_CNTL_RBFETCH | \ 1240 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \ 1241 A6XX_CP_APRIV_CNTL_RBRPWB) 1242 1243 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \ 1244 A6XX_CP_APRIV_CNTL_CDREAD | \ 1245 A6XX_CP_APRIV_CNTL_CDWRITE) 1246 1247 static int hw_init(struct msm_gpu *gpu) 1248 { 1249 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1250 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1251 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1252 u64 gmem_range_min; 1253 unsigned int i; 1254 int ret; 1255 1256 if (!adreno_has_gmu_wrapper(adreno_gpu)) { 1257 /* Make sure the GMU keeps the GPU on while we set it up */ 1258 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 1259 if (ret) 1260 return ret; 1261 } 1262 1263 /* Clear GBIF halt in case GX domain was not collapsed */ 1264 if (adreno_is_a619_holi(adreno_gpu)) { 1265 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 1266 gpu_read(gpu, REG_A6XX_GBIF_HALT); 1267 1268 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0); 1269 gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL); 1270 } else if (a6xx_has_gbif(adreno_gpu)) { 1271 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 1272 gpu_read(gpu, REG_A6XX_GBIF_HALT); 1273 1274 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0); 1275 gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT); 1276 } 1277 1278 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 1279 1280 if (adreno_is_a619_holi(adreno_gpu)) 1281 a6xx_sptprac_enable(gmu); 1282 1283 /* 1284 * Disable the trusted memory range - we don't actually supported secure 1285 * memory rendering at this point in time and we don't want to block off 1286 * part of the virtual memory space. 1287 */ 1288 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 1289 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 1290 1291 if (!adreno_is_a7xx(adreno_gpu)) { 1292 /* Turn on 64 bit addressing for all blocks */ 1293 gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1); 1294 gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1); 1295 gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1); 1296 gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1); 1297 gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1); 1298 gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1); 1299 gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1); 1300 gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1); 1301 gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1); 1302 gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1); 1303 gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1); 1304 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); 1305 } 1306 1307 /* enable hardware clockgating */ 1308 a6xx_set_hwcg(gpu, true); 1309 1310 /* For gmuwrapper implementations, do the VBIF/GBIF CX configuration here */ 1311 if (adreno_is_a610_family(adreno_gpu)) { 1312 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620); 1313 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620); 1314 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620); 1315 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620); 1316 } 1317 1318 if (adreno_is_a610_family(adreno_gpu) || 1319 adreno_is_a640_family(adreno_gpu) || 1320 adreno_is_a650_family(adreno_gpu)) { 1321 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3); 1322 } else if (adreno_is_a7xx(adreno_gpu)) { 1323 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212); 1324 } else { 1325 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3); 1326 } 1327 1328 if (adreno_is_a630(adreno_gpu)) 1329 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); 1330 1331 if (adreno_is_a7xx(adreno_gpu)) 1332 gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0); 1333 1334 /* Make all blocks contribute to the GPU BUSY perf counter */ 1335 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 1336 1337 /* Disable L2 bypass in the UCHE */ 1338 if (adreno_is_a7xx(adreno_gpu)) { 1339 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 1340 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 1341 } else { 1342 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); 1343 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 1344 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 1345 } 1346 1347 if (!(adreno_is_a650_family(adreno_gpu) || 1348 adreno_is_a702(adreno_gpu) || 1349 adreno_is_a730(adreno_gpu))) { 1350 gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M; 1351 1352 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 1353 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min); 1354 1355 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX, 1356 gmem_range_min + adreno_gpu->info->gmem - 1); 1357 } 1358 1359 if (adreno_is_a7xx(adreno_gpu)) 1360 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23)); 1361 else { 1362 gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); 1363 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); 1364 } 1365 1366 if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) { 1367 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); 1368 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); 1369 } else if (adreno_is_a610_family(adreno_gpu)) { 1370 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060); 1371 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16); 1372 } else if (!adreno_is_a7xx(adreno_gpu)) { 1373 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0); 1374 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); 1375 } 1376 1377 if (adreno_is_a660_family(adreno_gpu)) 1378 gpu_write(gpu, REG_A7XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); 1379 1380 /* Setting the mem pool size */ 1381 if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) { 1382 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48); 1383 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47); 1384 } else if (adreno_is_a702(adreno_gpu)) { 1385 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64); 1386 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63); 1387 } else if (!adreno_is_a7xx(adreno_gpu)) 1388 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128); 1389 1390 1391 /* Set the default primFifo threshold values */ 1392 if (adreno_gpu->info->a6xx->prim_fifo_threshold) 1393 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 1394 adreno_gpu->info->a6xx->prim_fifo_threshold); 1395 1396 /* Set the AHB default slave response to "ERROR" */ 1397 gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1); 1398 1399 /* Turn on performance counters */ 1400 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1); 1401 1402 if (adreno_is_a7xx(adreno_gpu)) { 1403 /* Turn on the IFPC counter (countable 4 on XOCLK4) */ 1404 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1, 1405 FIELD_PREP(GENMASK(7, 0), 0x4)); 1406 } 1407 1408 /* Select CP0 to always count cycles */ 1409 gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT); 1410 1411 a6xx_set_ubwc_config(gpu); 1412 1413 /* Enable fault detection */ 1414 if (adreno_is_a612(adreno_gpu) || 1415 adreno_is_a730(adreno_gpu) || 1416 adreno_is_a740_family(adreno_gpu)) 1417 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff); 1418 else if (adreno_is_a690(adreno_gpu)) 1419 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff); 1420 else if (adreno_is_a619(adreno_gpu)) 1421 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff); 1422 else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) 1423 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff); 1424 else 1425 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff); 1426 1427 gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 1428 1429 /* Set weights for bicubic filtering */ 1430 if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { 1431 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); 1432 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 1433 0x3fe05ff4); 1434 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 1435 0x3fa0ebee); 1436 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 1437 0x3f5193ed); 1438 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 1439 0x3f0243f0); 1440 } 1441 1442 /* Set up the CX GMU counter 0 to count busy ticks */ 1443 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 1444 1445 /* Enable the power counter */ 1446 gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5)); 1447 gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 1448 1449 /* Protect registers from the CP */ 1450 a6xx_set_cp_protect(gpu); 1451 1452 if (adreno_is_a660_family(adreno_gpu)) { 1453 if (adreno_is_a690(adreno_gpu)) 1454 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801); 1455 else 1456 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); 1457 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); 1458 } else if (adreno_is_a702(adreno_gpu)) { 1459 /* Something to do with the HLSQ cluster */ 1460 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24)); 1461 } 1462 1463 if (adreno_is_a690(adreno_gpu)) 1464 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90); 1465 /* Set dualQ + disable afull for A660 GPU */ 1466 else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu)) 1467 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); 1468 else if (adreno_is_a7xx(adreno_gpu)) 1469 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 1470 FIELD_PREP(GENMASK(19, 16), 6) | 1471 FIELD_PREP(GENMASK(15, 12), 6) | 1472 FIELD_PREP(GENMASK(11, 8), 9) | 1473 BIT(3) | BIT(2) | 1474 FIELD_PREP(GENMASK(1, 0), 2)); 1475 1476 /* Enable expanded apriv for targets that support it */ 1477 if (gpu->hw_apriv) { 1478 if (adreno_is_a7xx(adreno_gpu)) { 1479 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, 1480 A7XX_BR_APRIVMASK); 1481 gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL, 1482 A7XX_APRIV_MASK); 1483 gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL, 1484 A7XX_APRIV_MASK); 1485 } else 1486 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, 1487 BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1)); 1488 } 1489 1490 if (adreno_is_a750(adreno_gpu)) { 1491 /* Disable ubwc merged UFC request feature */ 1492 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19)); 1493 1494 /* Enable TP flaghint and other performance settings */ 1495 gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700); 1496 } else if (adreno_is_a7xx(adreno_gpu)) { 1497 /* Disable non-ubwc read reqs from passing write reqs */ 1498 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11)); 1499 } 1500 1501 /* Enable interrupts */ 1502 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 1503 adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK); 1504 1505 ret = adreno_hw_init(gpu); 1506 if (ret) 1507 goto out; 1508 1509 gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 1510 1511 /* Set the ringbuffer address */ 1512 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 1513 1514 /* Targets that support extended APRIV can use the RPTR shadow from 1515 * hardware but all the other ones need to disable the feature. Targets 1516 * that support the WHERE_AM_I opcode can use that instead 1517 */ 1518 if (adreno_gpu->base.hw_apriv) 1519 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 1520 else 1521 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, 1522 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); 1523 1524 /* Configure the RPTR shadow if needed: */ 1525 if (a6xx_gpu->shadow_bo) { 1526 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, 1527 shadowptr(a6xx_gpu, gpu->rb[0])); 1528 for (unsigned int i = 0; i < gpu->nr_rings; i++) 1529 a6xx_gpu->shadow[i] = 0; 1530 } 1531 1532 /* ..which means "always" on A7xx, also for BV shadow */ 1533 if (adreno_is_a7xx(adreno_gpu)) { 1534 gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR, 1535 rbmemptr(gpu->rb[0], bv_rptr)); 1536 } 1537 1538 a6xx_preempt_hw_init(gpu); 1539 1540 /* Always come up on rb 0 */ 1541 a6xx_gpu->cur_ring = gpu->rb[0]; 1542 1543 for (i = 0; i < gpu->nr_rings; i++) 1544 gpu->rb[i]->cur_ctx_seqno = 0; 1545 1546 /* Enable the SQE_to start the CP engine */ 1547 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); 1548 1549 if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) { 1550 a7xx_patch_pwrup_reglist(gpu); 1551 a6xx_gpu->pwrup_reglist_emitted = true; 1552 } 1553 1554 ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu); 1555 if (ret) 1556 goto out; 1557 1558 /* 1559 * Try to load a zap shader into the secure world. If successful 1560 * we can use the CP to switch out of secure mode. If not then we 1561 * have no resource but to try to switch ourselves out manually. If we 1562 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 1563 * be blocked and a permissions violation will soon follow. 1564 */ 1565 ret = a6xx_zap_shader_init(gpu); 1566 if (!ret) { 1567 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 1568 OUT_RING(gpu->rb[0], 0x00000000); 1569 1570 a6xx_flush(gpu, gpu->rb[0]); 1571 if (!a6xx_idle(gpu, gpu->rb[0])) 1572 return -EINVAL; 1573 } else if (ret == -ENODEV) { 1574 /* 1575 * This device does not use zap shader (but print a warning 1576 * just in case someone got their dt wrong.. hopefully they 1577 * have a debug UART to realize the error of their ways... 1578 * if you mess this up you are about to crash horribly) 1579 */ 1580 dev_warn_once(gpu->dev->dev, 1581 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 1582 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 1583 ret = 0; 1584 } else { 1585 return ret; 1586 } 1587 1588 out: 1589 if (adreno_has_gmu_wrapper(adreno_gpu)) 1590 return ret; 1591 1592 /* Last step - yield the ringbuffer */ 1593 a7xx_preempt_start(gpu); 1594 1595 /* 1596 * Tell the GMU that we are done touching the GPU and it can start power 1597 * management 1598 */ 1599 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 1600 1601 if (a6xx_gpu->gmu.legacy) { 1602 /* Take the GMU out of its special boot mode */ 1603 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER); 1604 } 1605 1606 return ret; 1607 } 1608 1609 static int a6xx_hw_init(struct msm_gpu *gpu) 1610 { 1611 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1612 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1613 int ret; 1614 1615 mutex_lock(&a6xx_gpu->gmu.lock); 1616 ret = hw_init(gpu); 1617 mutex_unlock(&a6xx_gpu->gmu.lock); 1618 1619 return ret; 1620 } 1621 1622 static void a6xx_dump(struct msm_gpu *gpu) 1623 { 1624 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", 1625 gpu_read(gpu, REG_A6XX_RBBM_STATUS)); 1626 adreno_dump(gpu); 1627 } 1628 1629 static void a6xx_recover(struct msm_gpu *gpu) 1630 { 1631 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1632 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1633 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1634 int active_submits; 1635 1636 adreno_dump_info(gpu); 1637 1638 if (a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) { 1639 /* Sometimes crashstate capture is skipped, so SQE should be halted here again */ 1640 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); 1641 1642 if (hang_debug) 1643 a6xx_dump(gpu); 1644 1645 } 1646 1647 /* 1648 * To handle recovery specific sequences during the rpm suspend we are 1649 * about to trigger 1650 */ 1651 1652 a6xx_gpu->hung = true; 1653 1654 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 1655 1656 /* active_submit won't change until we make a submission */ 1657 mutex_lock(&gpu->active_lock); 1658 active_submits = gpu->active_submits; 1659 1660 /* 1661 * Temporarily clear active_submits count to silence a WARN() in the 1662 * runtime suspend cb 1663 */ 1664 gpu->active_submits = 0; 1665 1666 if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) { 1667 /* Drain the outstanding traffic on memory buses */ 1668 adreno_gpu->funcs->bus_halt(adreno_gpu, true); 1669 1670 /* Reset the GPU to a clean state */ 1671 a6xx_gpu_sw_reset(gpu, true); 1672 a6xx_gpu_sw_reset(gpu, false); 1673 } 1674 1675 reinit_completion(&gmu->pd_gate); 1676 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 1677 dev_pm_genpd_synced_poweroff(gmu->cxpd); 1678 1679 /* Drop the rpm refcount from active submits */ 1680 if (active_submits) 1681 pm_runtime_put(&gpu->pdev->dev); 1682 1683 /* And the final one from recover worker */ 1684 pm_runtime_put_sync(&gpu->pdev->dev); 1685 1686 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 1687 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 1688 1689 dev_pm_genpd_remove_notifier(gmu->cxpd); 1690 1691 pm_runtime_use_autosuspend(&gpu->pdev->dev); 1692 1693 if (active_submits) 1694 pm_runtime_get(&gpu->pdev->dev); 1695 1696 pm_runtime_get_sync(&gpu->pdev->dev); 1697 1698 gpu->active_submits = active_submits; 1699 mutex_unlock(&gpu->active_lock); 1700 1701 msm_gpu_hw_init(gpu); 1702 a6xx_gpu->hung = false; 1703 } 1704 1705 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 1706 { 1707 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1708 static const char *uche_clients[7] = { 1709 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", 1710 }; 1711 u32 val; 1712 1713 if (adreno_is_a7xx(adreno_gpu)) { 1714 if (mid != 1 && mid != 2 && mid != 3 && mid != 8) 1715 return "UNKNOWN"; 1716 } else { 1717 if (mid < 1 || mid > 3) 1718 return "UNKNOWN"; 1719 } 1720 1721 /* 1722 * The source of the data depends on the mid ID read from FSYNR1. 1723 * and the client ID read from the UCHE block 1724 */ 1725 val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF); 1726 1727 if (adreno_is_a7xx(adreno_gpu)) { 1728 /* Bit 3 for mid=3 indicates BR or BV */ 1729 static const char *uche_clients_a7xx[16] = { 1730 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", 1731 "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 1732 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", 1733 "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 1734 }; 1735 1736 /* LPAC has the same clients as BR and BV, but because it is 1737 * compute-only some of them do not exist and there are holes 1738 * in the array. 1739 */ 1740 static const char *uche_clients_lpac_a7xx[8] = { 1741 "-", "LPAC_SP", "-", "-", 1742 "LPAC_HLSQ", "-", "-", "LPAC_TP", 1743 }; 1744 1745 val &= GENMASK(6, 0); 1746 1747 /* mid=3 refers to BR or BV */ 1748 if (mid == 3) { 1749 if (val < ARRAY_SIZE(uche_clients_a7xx)) 1750 return uche_clients_a7xx[val]; 1751 else 1752 return "UCHE"; 1753 } 1754 1755 /* mid=8 refers to LPAC */ 1756 if (mid == 8) { 1757 if (val < ARRAY_SIZE(uche_clients_lpac_a7xx)) 1758 return uche_clients_lpac_a7xx[val]; 1759 else 1760 return "UCHE_LPAC"; 1761 } 1762 1763 /* mid=2 is a catchall for everything else in LPAC */ 1764 if (mid == 2) 1765 return "UCHE_LPAC"; 1766 1767 /* mid=1 is a catchall for everything else in BR/BV */ 1768 return "UCHE"; 1769 } else if (adreno_is_a660_family(adreno_gpu)) { 1770 static const char *uche_clients_a660[8] = { 1771 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP", 1772 }; 1773 1774 static const char *uche_clients_a660_not[8] = { 1775 "not VFD", "not SP", "not VSC", "not VPC", 1776 "not HLSQ", "not PC", "not LRZ", "not TP", 1777 }; 1778 1779 val &= GENMASK(6, 0); 1780 1781 if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660)) 1782 return uche_clients_a660[val]; 1783 1784 if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not)) 1785 return uche_clients_a660_not[val]; 1786 1787 return "UCHE"; 1788 } else { 1789 /* mid = 3 is most precise and refers to only one block per client */ 1790 if (mid == 3) 1791 return uche_clients[val & 7]; 1792 1793 /* For mid=2 the source is TP or VFD except when the client id is 0 */ 1794 if (mid == 2) 1795 return ((val & 7) == 0) ? "TP" : "TP|VFD"; 1796 1797 /* For mid=1 just return "UCHE" as a catchall for everything else */ 1798 return "UCHE"; 1799 } 1800 } 1801 1802 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id) 1803 { 1804 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1805 1806 if (id == 0) 1807 return "CP"; 1808 else if (id == 4) 1809 return "CCU"; 1810 else if (id == 6) 1811 return "CDP Prefetch"; 1812 else if (id == 7) 1813 return "GMU"; 1814 else if (id == 5 && adreno_is_a7xx(adreno_gpu)) 1815 return "Flag cache"; 1816 1817 return a6xx_uche_fault_block(gpu, id); 1818 } 1819 1820 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 1821 { 1822 struct msm_gpu *gpu = arg; 1823 struct adreno_smmu_fault_info *info = data; 1824 const char *block = "unknown"; 1825 1826 u32 scratch[] = { 1827 gpu_read(gpu, REG_A6XX_CP_SCRATCH(4)), 1828 gpu_read(gpu, REG_A6XX_CP_SCRATCH(5)), 1829 gpu_read(gpu, REG_A6XX_CP_SCRATCH(6)), 1830 gpu_read(gpu, REG_A6XX_CP_SCRATCH(7)), 1831 }; 1832 1833 if (info) 1834 block = a6xx_fault_block(gpu, info->fsynr1 & 0xff); 1835 1836 return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 1837 } 1838 1839 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) 1840 { 1841 u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS); 1842 1843 if (status & A6XX_CP_INT_CP_OPCODE_ERROR) { 1844 u32 val; 1845 1846 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1); 1847 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA); 1848 dev_err_ratelimited(&gpu->pdev->dev, 1849 "CP | opcode error | possible opcode=0x%8.8X\n", 1850 val); 1851 } 1852 1853 if (status & A6XX_CP_INT_CP_UCODE_ERROR) 1854 dev_err_ratelimited(&gpu->pdev->dev, 1855 "CP ucode error interrupt\n"); 1856 1857 if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR) 1858 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n", 1859 gpu_read(gpu, REG_A6XX_CP_HW_FAULT)); 1860 1861 if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 1862 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS); 1863 1864 dev_err_ratelimited(&gpu->pdev->dev, 1865 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 1866 val & (1 << 20) ? "READ" : "WRITE", 1867 (val & 0x3ffff), val); 1868 } 1869 1870 if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu))) 1871 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n"); 1872 1873 if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR) 1874 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n"); 1875 1876 if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR) 1877 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n"); 1878 1879 } 1880 1881 static void a6xx_fault_detect_irq(struct msm_gpu *gpu) 1882 { 1883 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1884 1885 /* 1886 * If stalled on SMMU fault, we could trip the GPU's hang detection, 1887 * but the fault handler will trigger the devcore dump, and we want 1888 * to otherwise resume normally rather than killing the submit, so 1889 * just bail. 1890 */ 1891 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) 1892 return; 1893 1894 DRM_DEV_ERROR(&gpu->pdev->dev, 1895 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 1896 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 1897 gpu_read(gpu, REG_A6XX_RBBM_STATUS), 1898 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1899 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1900 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), 1901 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), 1902 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), 1903 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); 1904 1905 /* Turn off the hangcheck timer to keep it from bothering us */ 1906 timer_delete(&gpu->hangcheck_timer); 1907 1908 /* Turn off interrupts to avoid triggering recovery again */ 1909 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0); 1910 1911 kthread_queue_work(gpu->worker, &gpu->recover_work); 1912 } 1913 1914 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1915 { 1916 u32 status; 1917 1918 status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); 1919 gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); 1920 1921 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1922 1923 /* 1924 * Ignore FASTBLEND violations, because the HW will silently fall back 1925 * to legacy blending. 1926 */ 1927 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1928 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1929 timer_delete(&gpu->hangcheck_timer); 1930 1931 kthread_queue_work(gpu->worker, &gpu->recover_work); 1932 } 1933 } 1934 1935 static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on) 1936 { 1937 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1938 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1939 1940 if (adreno_has_gmu_wrapper(adreno_gpu)) 1941 return; 1942 1943 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on); 1944 } 1945 1946 static int irq_poll_fence(struct msm_gpu *gpu) 1947 { 1948 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1949 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1950 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1951 u32 status; 1952 1953 if (adreno_has_gmu_wrapper(adreno_gpu)) 1954 return 0; 1955 1956 if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) { 1957 u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS); 1958 1959 dev_err_ratelimited(&gpu->pdev->dev, 1960 "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n", 1961 status, rbbm_unmasked); 1962 return -ETIMEDOUT; 1963 } 1964 1965 return 0; 1966 } 1967 1968 static irqreturn_t a6xx_irq(struct msm_gpu *gpu) 1969 { 1970 struct msm_drm_private *priv = gpu->dev->dev_private; 1971 1972 /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */ 1973 a6xx_gpu_keepalive_vote(gpu, true); 1974 1975 if (irq_poll_fence(gpu)) 1976 goto done; 1977 1978 u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); 1979 1980 gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); 1981 1982 if (priv->disable_err_irq) 1983 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1984 1985 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1986 a6xx_fault_detect_irq(gpu); 1987 1988 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) 1989 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n"); 1990 1991 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1992 a6xx_cp_hw_err_irq(gpu); 1993 1994 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 1995 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 1996 1997 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1998 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 1999 2000 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 2001 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 2002 2003 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 2004 a7xx_sw_fuse_violation_irq(gpu); 2005 2006 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 2007 msm_gpu_retire(gpu); 2008 a6xx_preempt_trigger(gpu); 2009 } 2010 2011 if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 2012 a6xx_preempt_irq(gpu); 2013 2014 done: 2015 a6xx_gpu_keepalive_vote(gpu, false); 2016 2017 return IRQ_HANDLED; 2018 } 2019 2020 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu) 2021 { 2022 llcc_slice_deactivate(a6xx_gpu->llc_slice); 2023 llcc_slice_deactivate(a6xx_gpu->htw_llc_slice); 2024 } 2025 2026 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 2027 { 2028 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 2029 struct msm_gpu *gpu = &adreno_gpu->base; 2030 u32 cntl1_regval = 0; 2031 2032 if (IS_ERR(a6xx_gpu->llc_mmio)) 2033 return; 2034 2035 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 2036 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 2037 2038 gpu_scid &= 0x1f; 2039 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) | 2040 (gpu_scid << 15) | (gpu_scid << 20); 2041 2042 /* On A660, the SCID programming for UCHE traffic is done in 2043 * A6XX_GBIF_SCACHE_CNTL0[14:10] 2044 */ 2045 if (adreno_is_a660_family(adreno_gpu)) 2046 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | 2047 (1 << 8), (gpu_scid << 10) | (1 << 8)); 2048 } 2049 2050 /* 2051 * For targets with a MMU500, activate the slice but don't program the 2052 * register. The XBL will take care of that. 2053 */ 2054 if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { 2055 if (!a6xx_gpu->have_mmu500) { 2056 u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); 2057 2058 gpuhtw_scid &= 0x1f; 2059 cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); 2060 } 2061 } 2062 2063 if (!cntl1_regval) 2064 return; 2065 2066 /* 2067 * Program the slice IDs for the various GPU blocks and GPU MMU 2068 * pagetables 2069 */ 2070 if (!a6xx_gpu->have_mmu500) { 2071 a6xx_llc_write(a6xx_gpu, 2072 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); 2073 2074 /* 2075 * Program cacheability overrides to not allocate cache 2076 * lines on a write miss 2077 */ 2078 a6xx_llc_rmw(a6xx_gpu, 2079 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); 2080 return; 2081 } 2082 2083 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval); 2084 } 2085 2086 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 2087 { 2088 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 2089 struct msm_gpu *gpu = &adreno_gpu->base; 2090 2091 if (IS_ERR(a6xx_gpu->llc_mmio)) 2092 return; 2093 2094 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 2095 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 2096 2097 gpu_scid &= GENMASK(4, 0); 2098 2099 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 2100 FIELD_PREP(GENMASK(29, 25), gpu_scid) | 2101 FIELD_PREP(GENMASK(24, 20), gpu_scid) | 2102 FIELD_PREP(GENMASK(19, 15), gpu_scid) | 2103 FIELD_PREP(GENMASK(14, 10), gpu_scid) | 2104 FIELD_PREP(GENMASK(9, 5), gpu_scid) | 2105 FIELD_PREP(GENMASK(4, 0), gpu_scid)); 2106 2107 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 2108 FIELD_PREP(GENMASK(14, 10), gpu_scid) | 2109 BIT(8)); 2110 } 2111 2112 llcc_slice_activate(a6xx_gpu->htw_llc_slice); 2113 } 2114 2115 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) 2116 { 2117 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ 2118 if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) 2119 return; 2120 2121 llcc_slice_putd(a6xx_gpu->llc_slice); 2122 llcc_slice_putd(a6xx_gpu->htw_llc_slice); 2123 } 2124 2125 static void a6xx_llc_slices_init(struct platform_device *pdev, 2126 struct a6xx_gpu *a6xx_gpu, bool is_a7xx) 2127 { 2128 struct device_node *phandle; 2129 2130 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ 2131 if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) 2132 return; 2133 2134 /* 2135 * There is a different programming path for A6xx targets with an 2136 * mmu500 attached, so detect if that is the case 2137 */ 2138 phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); 2139 a6xx_gpu->have_mmu500 = (phandle && 2140 of_device_is_compatible(phandle, "arm,mmu-500")); 2141 of_node_put(phandle); 2142 2143 if (is_a7xx || !a6xx_gpu->have_mmu500) 2144 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem"); 2145 else 2146 a6xx_gpu->llc_mmio = NULL; 2147 2148 a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); 2149 a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); 2150 2151 if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) 2152 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); 2153 } 2154 2155 static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) 2156 { 2157 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 2158 struct msm_gpu *gpu = &adreno_gpu->base; 2159 u32 fuse_val; 2160 int ret; 2161 2162 if (adreno_is_a750(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) { 2163 /* 2164 * Assume that if qcom scm isn't available, that whatever 2165 * replacement allows writing the fuse register ourselves. 2166 * Users of alternative firmware need to make sure this 2167 * register is writeable or indicate that it's not somehow. 2168 * Print a warning because if you mess this up you're about to 2169 * crash horribly. 2170 */ 2171 if (!qcom_scm_is_available()) { 2172 dev_warn_once(gpu->dev->dev, 2173 "SCM is not available, poking fuse register\n"); 2174 a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, 2175 A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 2176 A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | 2177 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); 2178 adreno_gpu->has_ray_tracing = true; 2179 return 0; 2180 } 2181 2182 ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ | 2183 QCOM_SCM_GPU_TSENSE_EN_REQ); 2184 if (ret) 2185 return ret; 2186 2187 /* 2188 * On A7XX_GEN3 and newer, raytracing may be disabled by the 2189 * firmware, find out whether that's the case. The scm call 2190 * above sets the fuse register. 2191 */ 2192 fuse_val = a6xx_llc_read(a6xx_gpu, 2193 REG_A7XX_CX_MISC_SW_FUSE_VALUE); 2194 adreno_gpu->has_ray_tracing = 2195 !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); 2196 } else if (adreno_is_a740(adreno_gpu)) { 2197 /* Raytracing is always enabled on a740 */ 2198 adreno_gpu->has_ray_tracing = true; 2199 } 2200 2201 return 0; 2202 } 2203 2204 2205 #define GBIF_CLIENT_HALT_MASK BIT(0) 2206 #define GBIF_ARB_HALT_MASK BIT(1) 2207 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 2208 #define VBIF_RESET_ACK_MASK 0xF0 2209 #define GPR0_GBIF_HALT_REQUEST 0x1E0 2210 2211 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 2212 { 2213 struct msm_gpu *gpu = &adreno_gpu->base; 2214 2215 if (adreno_is_a619_holi(adreno_gpu)) { 2216 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST); 2217 spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) & 2218 (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK); 2219 } else if (!a6xx_has_gbif(adreno_gpu)) { 2220 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK); 2221 spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 2222 (VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK); 2223 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); 2224 2225 return; 2226 } 2227 2228 if (gx_off) { 2229 /* Halt the gx side of GBIF */ 2230 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1); 2231 spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1); 2232 } 2233 2234 /* Halt new client requests on GBIF */ 2235 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 2236 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 2237 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 2238 2239 /* Halt all AXI requests on GBIF */ 2240 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 2241 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 2242 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 2243 2244 /* The GBIF halt needs to be explicitly cleared */ 2245 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 2246 } 2247 2248 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert) 2249 { 2250 /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */ 2251 if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu))) 2252 return; 2253 2254 gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert); 2255 /* Perform a bogus read and add a brief delay to ensure ordering. */ 2256 gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD); 2257 udelay(1); 2258 2259 /* The reset line needs to be asserted for at least 100 us */ 2260 if (assert) 2261 udelay(100); 2262 } 2263 2264 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu) 2265 { 2266 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2267 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2268 int ret; 2269 2270 gpu->needs_hw_init = true; 2271 2272 trace_msm_gpu_resume(0); 2273 2274 mutex_lock(&a6xx_gpu->gmu.lock); 2275 ret = a6xx_gmu_resume(a6xx_gpu); 2276 mutex_unlock(&a6xx_gpu->gmu.lock); 2277 if (ret) 2278 return ret; 2279 2280 msm_devfreq_resume(gpu); 2281 2282 if (adreno_is_a8xx(adreno_gpu)) 2283 a8xx_llc_activate(a6xx_gpu); 2284 else if (adreno_is_a7xx(adreno_gpu)) 2285 a7xx_llc_activate(a6xx_gpu); 2286 else 2287 a6xx_llc_activate(a6xx_gpu); 2288 2289 return ret; 2290 } 2291 2292 static int a6xx_pm_resume(struct msm_gpu *gpu) 2293 { 2294 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2295 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2296 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 2297 unsigned long freq = gpu->fast_rate; 2298 struct dev_pm_opp *opp; 2299 int ret; 2300 2301 gpu->needs_hw_init = true; 2302 2303 trace_msm_gpu_resume(0); 2304 2305 mutex_lock(&a6xx_gpu->gmu.lock); 2306 2307 opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq); 2308 if (IS_ERR(opp)) { 2309 ret = PTR_ERR(opp); 2310 goto err_set_opp; 2311 } 2312 dev_pm_opp_put(opp); 2313 2314 /* Set the core clock and bus bw, having VDD scaling in mind */ 2315 dev_pm_opp_set_opp(&gpu->pdev->dev, opp); 2316 2317 pm_runtime_resume_and_get(gmu->dev); 2318 pm_runtime_resume_and_get(gmu->gxpd); 2319 2320 ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks); 2321 if (ret) 2322 goto err_bulk_clk; 2323 2324 ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks); 2325 if (ret) { 2326 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); 2327 goto err_bulk_clk; 2328 } 2329 2330 if (adreno_is_a619_holi(adreno_gpu)) 2331 a6xx_sptprac_enable(gmu); 2332 2333 /* If anything goes south, tear the GPU down piece by piece.. */ 2334 if (ret) { 2335 err_bulk_clk: 2336 pm_runtime_put(gmu->gxpd); 2337 pm_runtime_put(gmu->dev); 2338 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); 2339 } 2340 err_set_opp: 2341 mutex_unlock(&a6xx_gpu->gmu.lock); 2342 2343 if (!ret) { 2344 msm_devfreq_resume(gpu); 2345 a6xx_llc_activate(a6xx_gpu); 2346 } 2347 2348 return ret; 2349 } 2350 2351 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu) 2352 { 2353 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2354 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2355 int i, ret; 2356 2357 trace_msm_gpu_suspend(0); 2358 2359 a6xx_llc_deactivate(a6xx_gpu); 2360 2361 msm_devfreq_suspend(gpu); 2362 2363 mutex_lock(&a6xx_gpu->gmu.lock); 2364 ret = a6xx_gmu_stop(a6xx_gpu); 2365 mutex_unlock(&a6xx_gpu->gmu.lock); 2366 if (ret) 2367 return ret; 2368 2369 if (a6xx_gpu->shadow_bo) 2370 for (i = 0; i < gpu->nr_rings; i++) 2371 a6xx_gpu->shadow[i] = 0; 2372 2373 gpu->suspend_count++; 2374 2375 return 0; 2376 } 2377 2378 static int a6xx_pm_suspend(struct msm_gpu *gpu) 2379 { 2380 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2381 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2382 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 2383 int i; 2384 2385 trace_msm_gpu_suspend(0); 2386 2387 a6xx_llc_deactivate(a6xx_gpu); 2388 2389 msm_devfreq_suspend(gpu); 2390 2391 mutex_lock(&a6xx_gpu->gmu.lock); 2392 2393 /* Drain the outstanding traffic on memory buses */ 2394 adreno_gpu->funcs->bus_halt(adreno_gpu, true); 2395 2396 if (adreno_is_a619_holi(adreno_gpu)) 2397 a6xx_sptprac_disable(gmu); 2398 2399 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); 2400 clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks); 2401 2402 pm_runtime_put_sync(gmu->gxpd); 2403 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); 2404 pm_runtime_put_sync(gmu->dev); 2405 2406 mutex_unlock(&a6xx_gpu->gmu.lock); 2407 2408 if (a6xx_gpu->shadow_bo) 2409 for (i = 0; i < gpu->nr_rings; i++) 2410 a6xx_gpu->shadow[i] = 0; 2411 2412 gpu->suspend_count++; 2413 2414 return 0; 2415 } 2416 2417 static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 2418 { 2419 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2420 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2421 2422 *value = read_gmu_ao_counter(a6xx_gpu); 2423 2424 return 0; 2425 } 2426 2427 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 2428 { 2429 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); 2430 return 0; 2431 } 2432 2433 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu) 2434 { 2435 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2436 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2437 2438 return a6xx_gpu->cur_ring; 2439 } 2440 2441 static void a6xx_destroy(struct msm_gpu *gpu) 2442 { 2443 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2444 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2445 2446 if (a6xx_gpu->sqe_bo) { 2447 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm); 2448 drm_gem_object_put(a6xx_gpu->sqe_bo); 2449 } 2450 2451 if (a6xx_gpu->aqe_bo) { 2452 msm_gem_unpin_iova(a6xx_gpu->aqe_bo, gpu->vm); 2453 drm_gem_object_put(a6xx_gpu->aqe_bo); 2454 } 2455 2456 if (a6xx_gpu->shadow_bo) { 2457 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->vm); 2458 drm_gem_object_put(a6xx_gpu->shadow_bo); 2459 } 2460 2461 a6xx_llc_slices_destroy(a6xx_gpu); 2462 2463 a6xx_gmu_remove(a6xx_gpu); 2464 2465 adreno_gpu_cleanup(adreno_gpu); 2466 2467 kfree(a6xx_gpu); 2468 } 2469 2470 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 2471 { 2472 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2473 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2474 u64 busy_cycles; 2475 2476 /* 19.2MHz */ 2477 *out_sample_rate = 19200000; 2478 2479 busy_cycles = gmu_read64(&a6xx_gpu->gmu, 2480 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 2481 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 2482 2483 return busy_cycles; 2484 } 2485 2486 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, 2487 bool suspended) 2488 { 2489 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2490 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2491 2492 mutex_lock(&a6xx_gpu->gmu.lock); 2493 a6xx_gmu_set_freq(gpu, opp, suspended); 2494 mutex_unlock(&a6xx_gpu->gmu.lock); 2495 } 2496 2497 static struct drm_gpuvm * 2498 a6xx_create_vm(struct msm_gpu *gpu, struct platform_device *pdev) 2499 { 2500 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2501 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2502 unsigned long quirks = 0; 2503 2504 /* 2505 * This allows GPU to set the bus attributes required to use system 2506 * cache on behalf of the iommu page table walker. 2507 */ 2508 if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) && 2509 !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY)) 2510 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; 2511 2512 return adreno_iommu_create_vm(gpu, pdev, quirks); 2513 } 2514 2515 static struct drm_gpuvm * 2516 a6xx_create_private_vm(struct msm_gpu *gpu, bool kernel_managed) 2517 { 2518 struct msm_mmu *mmu; 2519 2520 mmu = msm_iommu_pagetable_create(to_msm_vm(gpu->vm)->mmu, kernel_managed); 2521 2522 if (IS_ERR(mmu)) 2523 return ERR_CAST(mmu); 2524 2525 return msm_gem_vm_create(gpu->dev, mmu, "gpu", ADRENO_VM_START, 2526 adreno_private_vm_size(gpu), kernel_managed); 2527 } 2528 2529 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 2530 { 2531 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2532 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2533 2534 if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) 2535 return a6xx_gpu->shadow[ring->id]; 2536 2537 /* 2538 * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware 2539 * without 'whereami' support 2540 */ 2541 WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC), 2542 "Can't read CP_RB_RPTR register reliably\n"); 2543 2544 return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); 2545 } 2546 2547 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 2548 { 2549 struct msm_cp_state cp_state; 2550 bool progress; 2551 2552 /* 2553 * With IFPC, KMD doesn't know whether GX power domain is collapsed 2554 * or not. So, we can't blindly read the below registers in GX domain. 2555 * Lets trust the hang detection in HW and lie to the caller that 2556 * there was progress. 2557 */ 2558 if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC) 2559 return true; 2560 2561 cp_state = (struct msm_cp_state) { 2562 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), 2563 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), 2564 .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), 2565 .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), 2566 }; 2567 2568 /* 2569 * Adjust the remaining data to account for what has already been 2570 * fetched from memory, but not yet consumed by the SQE. 2571 * 2572 * This is not *technically* correct, the amount buffered could 2573 * exceed the IB size due to hw prefetching ahead, but: 2574 * 2575 * (1) We aren't trying to find the exact position, just whether 2576 * progress has been made 2577 * (2) The CP_REG_TO_MEM at the end of a submit should be enough 2578 * to prevent prefetching into an unrelated submit. (And 2579 * either way, at some point the ROQ will be full.) 2580 */ 2581 cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16; 2582 cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16; 2583 2584 progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state)); 2585 2586 ring->last_cp_state = cp_state; 2587 2588 return progress; 2589 } 2590 2591 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse) 2592 { 2593 if (!info->speedbins) 2594 return UINT_MAX; 2595 2596 for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++) 2597 if (info->speedbins[i].fuse == fuse) 2598 return BIT(info->speedbins[i].speedbin); 2599 2600 return UINT_MAX; 2601 } 2602 2603 static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *info) 2604 { 2605 u32 supp_hw; 2606 u32 speedbin; 2607 int ret; 2608 2609 ret = adreno_read_speedbin(dev, &speedbin); 2610 /* 2611 * -ENOENT means that the platform doesn't support speedbin which is 2612 * fine 2613 */ 2614 if (ret == -ENOENT) { 2615 return 0; 2616 } else if (ret) { 2617 dev_err_probe(dev, ret, 2618 "failed to read speed-bin. Some OPPs may not be supported by hardware\n"); 2619 return ret; 2620 } 2621 2622 supp_hw = fuse_to_supp_hw(info, speedbin); 2623 2624 if (supp_hw == UINT_MAX) { 2625 DRM_DEV_ERROR(dev, 2626 "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n", 2627 speedbin); 2628 supp_hw = BIT(0); /* Default */ 2629 } 2630 2631 ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); 2632 if (ret) 2633 return ret; 2634 2635 return 0; 2636 } 2637 2638 static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) 2639 { 2640 struct msm_drm_private *priv = dev->dev_private; 2641 struct platform_device *pdev = priv->gpu_pdev; 2642 struct adreno_platform_config *config = pdev->dev.platform_data; 2643 struct device_node *node; 2644 struct a6xx_gpu *a6xx_gpu; 2645 struct adreno_gpu *adreno_gpu; 2646 struct msm_gpu *gpu; 2647 extern int enable_preemption; 2648 bool is_a7xx; 2649 int ret, nr_rings = 1; 2650 2651 a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL); 2652 if (!a6xx_gpu) 2653 return ERR_PTR(-ENOMEM); 2654 2655 adreno_gpu = &a6xx_gpu->base; 2656 gpu = &adreno_gpu->base; 2657 2658 mutex_init(&a6xx_gpu->gmu.lock); 2659 2660 adreno_gpu->registers = NULL; 2661 2662 /* Check if there is a GMU phandle and set it up */ 2663 node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); 2664 /* FIXME: How do we gracefully handle this? */ 2665 BUG_ON(!node); 2666 2667 adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper"); 2668 2669 adreno_gpu->base.hw_apriv = 2670 !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV); 2671 2672 /* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */ 2673 is_a7xx = config->info->family >= ADRENO_7XX_GEN1; 2674 2675 a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx); 2676 2677 ret = a6xx_set_supported_hw(&pdev->dev, config->info); 2678 if (ret) { 2679 a6xx_llc_slices_destroy(a6xx_gpu); 2680 kfree(a6xx_gpu); 2681 return ERR_PTR(ret); 2682 } 2683 2684 if ((enable_preemption == 1) || (enable_preemption == -1 && 2685 (config->info->quirks & ADRENO_QUIRK_PREEMPTION))) 2686 nr_rings = 4; 2687 2688 ret = adreno_gpu_init(dev, pdev, adreno_gpu, config->info->funcs, nr_rings); 2689 if (ret) { 2690 a6xx_destroy(&(a6xx_gpu->base.base)); 2691 return ERR_PTR(ret); 2692 } 2693 2694 /* 2695 * For now only clamp to idle freq for devices where this is known not 2696 * to cause power supply issues: 2697 */ 2698 if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu)) 2699 priv->gpu_clamp_to_idle = true; 2700 2701 if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) 2702 ret = a6xx_gmu_wrapper_init(a6xx_gpu, node); 2703 else 2704 ret = a6xx_gmu_init(a6xx_gpu, node); 2705 of_node_put(node); 2706 if (ret) { 2707 a6xx_destroy(&(a6xx_gpu->base.base)); 2708 return ERR_PTR(ret); 2709 } 2710 2711 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) { 2712 ret = a7xx_cx_mem_init(a6xx_gpu); 2713 if (ret) { 2714 a6xx_destroy(&(a6xx_gpu->base.base)); 2715 return ERR_PTR(ret); 2716 } 2717 } 2718 2719 adreno_gpu->uche_trap_base = 0x1fffffffff000ull; 2720 2721 msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu, 2722 adreno_gpu->funcs->mmu_fault_handler); 2723 2724 ret = a6xx_calc_ubwc_config(adreno_gpu); 2725 if (ret) { 2726 a6xx_destroy(&(a6xx_gpu->base.base)); 2727 return ERR_PTR(ret); 2728 } 2729 2730 /* Set up the preemption specific bits and pieces for each ringbuffer */ 2731 a6xx_preempt_init(gpu); 2732 2733 return gpu; 2734 } 2735 2736 const struct adreno_gpu_funcs a6xx_gpu_funcs = { 2737 .base = { 2738 .get_param = adreno_get_param, 2739 .set_param = adreno_set_param, 2740 .hw_init = a6xx_hw_init, 2741 .ucode_load = a6xx_ucode_load, 2742 .pm_suspend = a6xx_gmu_pm_suspend, 2743 .pm_resume = a6xx_gmu_pm_resume, 2744 .recover = a6xx_recover, 2745 .submit = a6xx_submit, 2746 .active_ring = a6xx_active_ring, 2747 .irq = a6xx_irq, 2748 .destroy = a6xx_destroy, 2749 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2750 .show = a6xx_show, 2751 #endif 2752 .gpu_busy = a6xx_gpu_busy, 2753 .gpu_get_freq = a6xx_gmu_get_freq, 2754 .gpu_set_freq = a6xx_gpu_set_freq, 2755 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2756 .gpu_state_get = a6xx_gpu_state_get, 2757 .gpu_state_put = a6xx_gpu_state_put, 2758 #endif 2759 .create_vm = a6xx_create_vm, 2760 .create_private_vm = a6xx_create_private_vm, 2761 .get_rptr = a6xx_get_rptr, 2762 .progress = a6xx_progress, 2763 }, 2764 .init = a6xx_gpu_init, 2765 .get_timestamp = a6xx_gmu_get_timestamp, 2766 .bus_halt = a6xx_bus_clear_pending_transactions, 2767 .mmu_fault_handler = a6xx_fault_handler, 2768 }; 2769 2770 const struct adreno_gpu_funcs a6xx_gmuwrapper_funcs = { 2771 .base = { 2772 .get_param = adreno_get_param, 2773 .set_param = adreno_set_param, 2774 .hw_init = a6xx_hw_init, 2775 .ucode_load = a6xx_ucode_load, 2776 .pm_suspend = a6xx_pm_suspend, 2777 .pm_resume = a6xx_pm_resume, 2778 .recover = a6xx_recover, 2779 .submit = a6xx_submit, 2780 .active_ring = a6xx_active_ring, 2781 .irq = a6xx_irq, 2782 .destroy = a6xx_destroy, 2783 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2784 .show = a6xx_show, 2785 #endif 2786 .gpu_busy = a6xx_gpu_busy, 2787 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2788 .gpu_state_get = a6xx_gpu_state_get, 2789 .gpu_state_put = a6xx_gpu_state_put, 2790 #endif 2791 .create_vm = a6xx_create_vm, 2792 .create_private_vm = a6xx_create_private_vm, 2793 .get_rptr = a6xx_get_rptr, 2794 .progress = a6xx_progress, 2795 }, 2796 .init = a6xx_gpu_init, 2797 .get_timestamp = a6xx_get_timestamp, 2798 .bus_halt = a6xx_bus_clear_pending_transactions, 2799 .mmu_fault_handler = a6xx_fault_handler, 2800 }; 2801 2802 const struct adreno_gpu_funcs a7xx_gpu_funcs = { 2803 .base = { 2804 .get_param = adreno_get_param, 2805 .set_param = adreno_set_param, 2806 .hw_init = a6xx_hw_init, 2807 .ucode_load = a6xx_ucode_load, 2808 .pm_suspend = a6xx_gmu_pm_suspend, 2809 .pm_resume = a6xx_gmu_pm_resume, 2810 .recover = a6xx_recover, 2811 .submit = a7xx_submit, 2812 .active_ring = a6xx_active_ring, 2813 .irq = a6xx_irq, 2814 .destroy = a6xx_destroy, 2815 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2816 .show = a6xx_show, 2817 #endif 2818 .gpu_busy = a6xx_gpu_busy, 2819 .gpu_get_freq = a6xx_gmu_get_freq, 2820 .gpu_set_freq = a6xx_gpu_set_freq, 2821 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2822 .gpu_state_get = a6xx_gpu_state_get, 2823 .gpu_state_put = a6xx_gpu_state_put, 2824 #endif 2825 .create_vm = a6xx_create_vm, 2826 .create_private_vm = a6xx_create_private_vm, 2827 .get_rptr = a6xx_get_rptr, 2828 .progress = a6xx_progress, 2829 }, 2830 .init = a6xx_gpu_init, 2831 .get_timestamp = a6xx_gmu_get_timestamp, 2832 .bus_halt = a6xx_bus_clear_pending_transactions, 2833 .mmu_fault_handler = a6xx_fault_handler, 2834 }; 2835 2836 const struct adreno_gpu_funcs a8xx_gpu_funcs = { 2837 .base = { 2838 .get_param = adreno_get_param, 2839 .set_param = adreno_set_param, 2840 .hw_init = a8xx_hw_init, 2841 .ucode_load = a6xx_ucode_load, 2842 .pm_suspend = a6xx_gmu_pm_suspend, 2843 .pm_resume = a6xx_gmu_pm_resume, 2844 .recover = a8xx_recover, 2845 .submit = a7xx_submit, 2846 .active_ring = a6xx_active_ring, 2847 .irq = a8xx_irq, 2848 .destroy = a6xx_destroy, 2849 .gpu_busy = a8xx_gpu_busy, 2850 .gpu_get_freq = a6xx_gmu_get_freq, 2851 .gpu_set_freq = a6xx_gpu_set_freq, 2852 .create_vm = a6xx_create_vm, 2853 .create_private_vm = a6xx_create_private_vm, 2854 .get_rptr = a6xx_get_rptr, 2855 .progress = a8xx_progress, 2856 }, 2857 .init = a6xx_gpu_init, 2858 .get_timestamp = a8xx_gmu_get_timestamp, 2859 .bus_halt = a8xx_bus_clear_pending_transactions, 2860 .mmu_fault_handler = a8xx_fault_handler, 2861 }; 2862