1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */ 3 4 5 #include "msm_gem.h" 6 #include "msm_mmu.h" 7 #include "msm_gpu_trace.h" 8 #include "a6xx_gpu.h" 9 #include "a6xx_gmu.xml.h" 10 11 #include <linux/bitfield.h> 12 #include <linux/devfreq.h> 13 #include <linux/firmware/qcom/qcom_scm.h> 14 #include <linux/pm_domain.h> 15 #include <linux/soc/qcom/llcc-qcom.h> 16 17 #define GPU_PAS_ID 13 18 19 static inline bool _a6xx_check_idle(struct msm_gpu *gpu) 20 { 21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 23 24 /* Check that the GMU is idle */ 25 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu)) 26 return false; 27 28 /* Check tha the CX master is idle */ 29 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) & 30 ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 31 return false; 32 33 return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) & 34 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 35 } 36 37 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 38 { 39 /* wait for CP to drain ringbuffer: */ 40 if (!adreno_idle(gpu, ring)) 41 return false; 42 43 if (spin_until(_a6xx_check_idle(gpu))) { 44 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 45 gpu->name, __builtin_return_address(0), 46 gpu_read(gpu, REG_A6XX_RBBM_STATUS), 47 gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS), 48 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 49 gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 50 return false; 51 } 52 53 return true; 54 } 55 56 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 57 { 58 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 59 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 60 61 /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */ 62 if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) { 63 OUT_PKT7(ring, CP_WHERE_AM_I, 2); 64 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring))); 65 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); 66 } 67 } 68 69 static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 70 { 71 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 72 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 73 uint32_t wptr; 74 unsigned long flags; 75 76 update_shadow_rptr(gpu, ring); 77 78 spin_lock_irqsave(&ring->preempt_lock, flags); 79 80 /* Copy the shadow to the actual register */ 81 ring->cur = ring->next; 82 83 /* Make sure to wrap wptr if we need to */ 84 wptr = get_wptr(ring); 85 86 /* Update HW if this is the current ring and we are not in preempt*/ 87 if (!a6xx_in_preempt(a6xx_gpu)) { 88 if (a6xx_gpu->cur_ring == ring) 89 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); 90 else 91 ring->restore_wptr = true; 92 } else { 93 ring->restore_wptr = true; 94 } 95 96 spin_unlock_irqrestore(&ring->preempt_lock, flags); 97 } 98 99 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, 100 u64 iova) 101 { 102 OUT_PKT7(ring, CP_REG_TO_MEM, 3); 103 OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) | 104 CP_REG_TO_MEM_0_CNT(2) | 105 CP_REG_TO_MEM_0_64B); 106 OUT_RING(ring, lower_32_bits(iova)); 107 OUT_RING(ring, upper_32_bits(iova)); 108 } 109 110 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, 111 struct msm_ringbuffer *ring, struct msm_gem_submit *submit) 112 { 113 bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; 114 struct msm_context *ctx = submit->queue->ctx; 115 struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx); 116 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 117 phys_addr_t ttbr; 118 u32 asid; 119 u64 memptr = rbmemptr(ring, ttbr0); 120 121 if (ctx->seqno == ring->cur_ctx_seqno) 122 return; 123 124 if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid)) 125 return; 126 127 if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) { 128 /* Wait for previous submit to complete before continuing: */ 129 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); 130 OUT_RING(ring, 0); 131 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 132 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 133 OUT_RING(ring, submit->seqno - 1); 134 135 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 136 OUT_RING(ring, CP_SET_THREAD_BOTH); 137 138 /* Reset state used to synchronize BR and BV */ 139 OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); 140 OUT_RING(ring, 141 CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | 142 CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | 143 CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | 144 CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); 145 146 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 147 OUT_RING(ring, CP_SET_THREAD_BR); 148 } 149 150 if (!sysprof) { 151 if (!adreno_is_a7xx(adreno_gpu)) { 152 /* Turn off protected mode to write to special registers */ 153 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 154 OUT_RING(ring, 0); 155 } 156 157 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 158 OUT_RING(ring, 1); 159 } 160 161 /* Execute the table update */ 162 OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4); 163 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr))); 164 165 OUT_RING(ring, 166 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) | 167 CP_SMMU_TABLE_UPDATE_1_ASID(asid)); 168 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0)); 169 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0)); 170 171 /* 172 * Write the new TTBR0 to the memstore. This is good for debugging. 173 * Needed for preemption 174 */ 175 OUT_PKT7(ring, CP_MEM_WRITE, 5); 176 OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); 177 OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); 178 OUT_RING(ring, lower_32_bits(ttbr)); 179 OUT_RING(ring, upper_32_bits(ttbr)); 180 OUT_RING(ring, ctx->seqno); 181 182 /* 183 * Sync both threads after switching pagetables and enable BR only 184 * to make sure BV doesn't race ahead while BR is still switching 185 * pagetables. 186 */ 187 if (adreno_is_a7xx(&a6xx_gpu->base)) { 188 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 189 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 190 } 191 192 /* 193 * And finally, trigger a uche flush to be sure there isn't anything 194 * lingering in that part of the GPU 195 */ 196 197 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 198 OUT_RING(ring, CACHE_INVALIDATE); 199 200 if (!sysprof) { 201 /* 202 * Wait for SRAM clear after the pgtable update, so the 203 * two can happen in parallel: 204 */ 205 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); 206 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); 207 OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO( 208 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS)); 209 OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0)); 210 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); 211 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); 212 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); 213 214 if (!adreno_is_a7xx(adreno_gpu)) { 215 /* Re-enable protected mode: */ 216 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 217 OUT_RING(ring, 1); 218 } 219 } 220 } 221 222 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 223 { 224 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 225 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 226 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 227 struct msm_ringbuffer *ring = submit->ring; 228 unsigned int i, ibs = 0; 229 230 adreno_check_and_reenable_stall(adreno_gpu); 231 232 a6xx_set_pagetable(a6xx_gpu, ring, submit); 233 234 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), 235 rbmemptr_stats(ring, index, cpcycles_start)); 236 237 /* 238 * For PM4 the GMU register offsets are calculated from the base of the 239 * GPU registers so we need to add 0x1a800 to the register value on A630 240 * to get the right value from PM4. 241 */ 242 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 243 rbmemptr_stats(ring, index, alwayson_start)); 244 245 /* Invalidate CCU depth and color */ 246 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 247 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH)); 248 249 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 250 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR)); 251 252 /* Submit the commands */ 253 for (i = 0; i < submit->nr_cmds; i++) { 254 switch (submit->cmd[i].type) { 255 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 256 break; 257 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 258 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) 259 break; 260 fallthrough; 261 case MSM_SUBMIT_CMD_BUF: 262 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 263 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 264 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 265 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 266 ibs++; 267 break; 268 } 269 270 /* 271 * Periodically update shadow-wptr if needed, so that we 272 * can see partial progress of submits with large # of 273 * cmds.. otherwise we could needlessly stall waiting for 274 * ringbuffer state, simply due to looking at a shadow 275 * rptr value that has not been updated 276 */ 277 if ((ibs % 32) == 0) 278 update_shadow_rptr(gpu, ring); 279 } 280 281 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), 282 rbmemptr_stats(ring, index, cpcycles_end)); 283 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 284 rbmemptr_stats(ring, index, alwayson_end)); 285 286 /* Write the fence to the scratch register */ 287 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1); 288 OUT_RING(ring, submit->seqno); 289 290 /* 291 * Execute a CACHE_FLUSH_TS event. This will ensure that the 292 * timestamp is written to the memory and then triggers the interrupt 293 */ 294 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 295 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) | 296 CP_EVENT_WRITE_0_IRQ); 297 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 298 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 299 OUT_RING(ring, submit->seqno); 300 301 trace_msm_gpu_submit_flush(submit, 302 gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); 303 304 a6xx_flush(gpu, ring); 305 } 306 307 static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring, 308 struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue) 309 { 310 u64 preempt_postamble; 311 312 OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12); 313 314 OUT_RING(ring, SMMU_INFO); 315 /* don't save SMMU, we write the record from the kernel instead */ 316 OUT_RING(ring, 0); 317 OUT_RING(ring, 0); 318 319 /* privileged and non secure buffer save */ 320 OUT_RING(ring, NON_SECURE_SAVE_ADDR); 321 OUT_RING(ring, lower_32_bits( 322 a6xx_gpu->preempt_iova[ring->id])); 323 OUT_RING(ring, upper_32_bits( 324 a6xx_gpu->preempt_iova[ring->id])); 325 326 /* user context buffer save, seems to be unnused by fw */ 327 OUT_RING(ring, NON_PRIV_SAVE_ADDR); 328 OUT_RING(ring, 0); 329 OUT_RING(ring, 0); 330 331 OUT_RING(ring, COUNTER); 332 /* seems OK to set to 0 to disable it */ 333 OUT_RING(ring, 0); 334 OUT_RING(ring, 0); 335 336 /* Emit postamble to clear perfcounters */ 337 preempt_postamble = a6xx_gpu->preempt_postamble_iova; 338 339 OUT_PKT7(ring, CP_SET_AMBLE, 3); 340 OUT_RING(ring, lower_32_bits(preempt_postamble)); 341 OUT_RING(ring, upper_32_bits(preempt_postamble)); 342 OUT_RING(ring, CP_SET_AMBLE_2_DWORDS( 343 a6xx_gpu->preempt_postamble_len) | 344 CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE)); 345 } 346 347 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 348 { 349 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 350 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 351 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 352 struct msm_ringbuffer *ring = submit->ring; 353 unsigned int i, ibs = 0; 354 355 adreno_check_and_reenable_stall(adreno_gpu); 356 357 /* 358 * Toggle concurrent binning for pagetable switch and set the thread to 359 * BR since only it can execute the pagetable switch packets. 360 */ 361 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 362 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 363 364 a6xx_set_pagetable(a6xx_gpu, ring, submit); 365 366 /* 367 * If preemption is enabled, then set the pseudo register for the save 368 * sequence 369 */ 370 if (gpu->nr_rings > 1) 371 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); 372 373 get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), 374 rbmemptr_stats(ring, index, cpcycles_start)); 375 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 376 rbmemptr_stats(ring, index, alwayson_start)); 377 378 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 379 OUT_RING(ring, CP_SET_THREAD_BOTH); 380 381 OUT_PKT7(ring, CP_SET_MARKER, 1); 382 OUT_RING(ring, 0x101); /* IFPC disable */ 383 384 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { 385 OUT_PKT7(ring, CP_SET_MARKER, 1); 386 OUT_RING(ring, 0x00d); /* IB1LIST start */ 387 } 388 389 /* Submit the commands */ 390 for (i = 0; i < submit->nr_cmds; i++) { 391 switch (submit->cmd[i].type) { 392 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 393 break; 394 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 395 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) 396 break; 397 fallthrough; 398 case MSM_SUBMIT_CMD_BUF: 399 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 400 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 401 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 402 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 403 ibs++; 404 break; 405 } 406 407 /* 408 * Periodically update shadow-wptr if needed, so that we 409 * can see partial progress of submits with large # of 410 * cmds.. otherwise we could needlessly stall waiting for 411 * ringbuffer state, simply due to looking at a shadow 412 * rptr value that has not been updated 413 */ 414 if ((ibs % 32) == 0) 415 update_shadow_rptr(gpu, ring); 416 } 417 418 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { 419 OUT_PKT7(ring, CP_SET_MARKER, 1); 420 OUT_RING(ring, 0x00e); /* IB1LIST end */ 421 } 422 423 get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), 424 rbmemptr_stats(ring, index, cpcycles_end)); 425 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 426 rbmemptr_stats(ring, index, alwayson_end)); 427 428 /* Write the fence to the scratch register */ 429 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1); 430 OUT_RING(ring, submit->seqno); 431 432 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 433 OUT_RING(ring, CP_SET_THREAD_BR); 434 435 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 436 OUT_RING(ring, CCU_INVALIDATE_DEPTH); 437 438 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 439 OUT_RING(ring, CCU_INVALIDATE_COLOR); 440 441 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 442 OUT_RING(ring, CP_SET_THREAD_BV); 443 444 /* 445 * Make sure the timestamp is committed once BV pipe is 446 * completely done with this submission. 447 */ 448 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 449 OUT_RING(ring, CACHE_CLEAN | BIT(27)); 450 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence))); 451 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); 452 OUT_RING(ring, submit->seqno); 453 454 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 455 OUT_RING(ring, CP_SET_THREAD_BR); 456 457 /* 458 * This makes sure that BR doesn't race ahead and commit 459 * timestamp to memstore while BV is still processing 460 * this submission. 461 */ 462 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); 463 OUT_RING(ring, 0); 464 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence))); 465 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); 466 OUT_RING(ring, submit->seqno); 467 468 a6xx_gpu->last_seqno[ring->id] = submit->seqno; 469 470 /* write the ringbuffer timestamp */ 471 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 472 OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27)); 473 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 474 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 475 OUT_RING(ring, submit->seqno); 476 477 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 478 OUT_RING(ring, CP_SET_THREAD_BOTH); 479 480 OUT_PKT7(ring, CP_SET_MARKER, 1); 481 OUT_RING(ring, 0x100); /* IFPC enable */ 482 483 /* If preemption is enabled */ 484 if (gpu->nr_rings > 1) { 485 /* Yield the floor on command completion */ 486 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 487 488 /* 489 * If dword[2:1] are non zero, they specify an address for 490 * the CP to write the value of dword[3] to on preemption 491 * complete. Write 0 to skip the write 492 */ 493 OUT_RING(ring, 0x00); 494 OUT_RING(ring, 0x00); 495 /* Data value - not used if the address above is 0 */ 496 OUT_RING(ring, 0x01); 497 /* generate interrupt on preemption completion */ 498 OUT_RING(ring, 0x00); 499 } 500 501 502 trace_msm_gpu_submit_flush(submit, 503 gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); 504 505 a6xx_flush(gpu, ring); 506 507 /* Check to see if we need to start preemption */ 508 a6xx_preempt_trigger(gpu); 509 } 510 511 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) 512 { 513 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 514 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 515 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 516 const struct adreno_reglist *reg; 517 unsigned int i; 518 u32 cgc_delay, cgc_hyst; 519 u32 val, clock_cntl_on; 520 521 if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu))) 522 return; 523 524 if (adreno_is_a630(adreno_gpu)) 525 clock_cntl_on = 0x8aa8aa02; 526 else if (adreno_is_a610(adreno_gpu)) 527 clock_cntl_on = 0xaaa8aa82; 528 else if (adreno_is_a702(adreno_gpu)) 529 clock_cntl_on = 0xaaaaaa82; 530 else 531 clock_cntl_on = 0x8aa8aa82; 532 533 cgc_delay = adreno_is_a615_family(adreno_gpu) ? 0x111 : 0x10111; 534 cgc_hyst = adreno_is_a615_family(adreno_gpu) ? 0x555 : 0x5555; 535 536 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 537 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 538 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 539 state ? cgc_delay : 0); 540 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 541 state ? cgc_hyst : 0); 542 543 if (!adreno_gpu->info->a6xx->hwcg) { 544 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 545 gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0); 546 547 if (state) { 548 gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1); 549 550 if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val, 551 val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 552 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 553 return; 554 } 555 556 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 557 } 558 559 return; 560 } 561 562 val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL); 563 564 /* Don't re-program the registers if they are already correct */ 565 if ((!state && !val) || (state && (val == clock_cntl_on))) 566 return; 567 568 /* Disable SP clock before programming HWCG registers */ 569 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) 570 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); 571 572 for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++) 573 gpu_write(gpu, reg->offset, state ? reg->value : 0); 574 575 /* Enable SP clock */ 576 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) 577 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); 578 579 gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0); 580 } 581 582 static void a6xx_set_cp_protect(struct msm_gpu *gpu) 583 { 584 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 585 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 586 unsigned i; 587 588 /* 589 * Enable access protection to privileged registers, fault on an access 590 * protect violation and select the last span to protect from the start 591 * address all the way to the end of the register address space 592 */ 593 gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 594 A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN | 595 A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN | 596 A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE); 597 598 for (i = 0; i < protect->count - 1; i++) { 599 /* Intentionally skip writing to some registers */ 600 if (protect->regs[i]) 601 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]); 602 } 603 /* last CP_PROTECT to have "infinite" length on the last entry */ 604 gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]); 605 } 606 607 static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu) 608 { 609 const struct qcom_ubwc_cfg_data *common_cfg; 610 struct qcom_ubwc_cfg_data *cfg = &gpu->_ubwc_config; 611 612 /* Inherit the common config and make some necessary fixups */ 613 common_cfg = qcom_ubwc_config_get_data(); 614 if (IS_ERR(common_cfg)) 615 return PTR_ERR(common_cfg); 616 617 /* Copy the data into the internal struct to drop the const qualifier (temporarily) */ 618 *cfg = *common_cfg; 619 620 cfg->ubwc_swizzle = 0x6; 621 cfg->highest_bank_bit = 15; 622 623 if (adreno_is_a610(gpu)) { 624 cfg->highest_bank_bit = 13; 625 cfg->ubwc_swizzle = 0x7; 626 } 627 628 if (adreno_is_a618(gpu)) 629 cfg->highest_bank_bit = 14; 630 631 if (adreno_is_a619(gpu)) 632 /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */ 633 cfg->highest_bank_bit = 13; 634 635 if (adreno_is_a619_holi(gpu)) 636 cfg->highest_bank_bit = 13; 637 638 if (adreno_is_a621(gpu)) 639 cfg->highest_bank_bit = 13; 640 641 if (adreno_is_a623(gpu)) 642 cfg->highest_bank_bit = 16; 643 644 if (adreno_is_a650(gpu) || 645 adreno_is_a660(gpu) || 646 adreno_is_a690(gpu) || 647 adreno_is_a730(gpu) || 648 adreno_is_a740_family(gpu)) { 649 /* TODO: get ddr type from bootloader and use 15 for LPDDR4 */ 650 cfg->highest_bank_bit = 16; 651 } 652 653 if (adreno_is_a663(gpu)) { 654 cfg->highest_bank_bit = 13; 655 cfg->ubwc_swizzle = 0x4; 656 } 657 658 if (adreno_is_7c3(gpu)) 659 cfg->highest_bank_bit = 14; 660 661 if (adreno_is_a702(gpu)) 662 cfg->highest_bank_bit = 14; 663 664 if (cfg->highest_bank_bit != common_cfg->highest_bank_bit) 665 DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n", 666 cfg->highest_bank_bit, common_cfg->highest_bank_bit); 667 668 if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle) 669 DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n", 670 cfg->ubwc_swizzle, common_cfg->ubwc_swizzle); 671 672 gpu->ubwc_config = &gpu->_ubwc_config; 673 674 return 0; 675 } 676 677 static void a6xx_set_ubwc_config(struct msm_gpu *gpu) 678 { 679 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 680 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; 681 /* 682 * We subtract 13 from the highest bank bit (13 is the minimum value 683 * allowed by hw) and write the lowest two bits of the remaining value 684 * as hbb_lo and the one above it as hbb_hi to the hardware. 685 */ 686 BUG_ON(cfg->highest_bank_bit < 13); 687 u32 hbb = cfg->highest_bank_bit - 13; 688 bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0; 689 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); 690 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); 691 bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0; 692 bool min_acc_len_64b = false; 693 u8 uavflagprd_inv = 0; 694 u32 hbb_hi = hbb >> 2; 695 u32 hbb_lo = hbb & 3; 696 697 if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu)) 698 uavflagprd_inv = 2; 699 700 if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) 701 min_acc_len_64b = true; 702 703 gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 704 level2_swizzling_dis << 12 | 705 rgb565_predicator << 11 | 706 hbb_hi << 10 | amsbc << 4 | 707 min_acc_len_64b << 3 | 708 hbb_lo << 1 | ubwc_mode); 709 710 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 711 level2_swizzling_dis << 6 | hbb_hi << 4 | 712 min_acc_len_64b << 3 | 713 hbb_lo << 1 | ubwc_mode); 714 715 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 716 level2_swizzling_dis << 12 | hbb_hi << 10 | 717 uavflagprd_inv << 4 | 718 min_acc_len_64b << 3 | 719 hbb_lo << 1 | ubwc_mode); 720 721 if (adreno_is_a7xx(adreno_gpu)) 722 gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL, 723 FIELD_PREP(GENMASK(8, 5), hbb_lo)); 724 725 gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 726 min_acc_len_64b << 23 | hbb_lo << 21); 727 728 gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL, 729 cfg->macrotile_mode); 730 } 731 732 static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) 733 { 734 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 735 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 736 const struct adreno_reglist_list *reglist; 737 void *ptr = a6xx_gpu->pwrup_reglist_ptr; 738 struct cpu_gpu_lock *lock = ptr; 739 u32 *dest = (u32 *)&lock->regs[0]; 740 int i; 741 742 reglist = adreno_gpu->info->a6xx->pwrup_reglist; 743 744 lock->gpu_req = lock->cpu_req = lock->turn = 0; 745 lock->ifpc_list_len = 0; 746 lock->preemption_list_len = reglist->count; 747 748 /* 749 * For each entry in each of the lists, write the offset and the current 750 * register value into the GPU buffer 751 */ 752 for (i = 0; i < reglist->count; i++) { 753 *dest++ = reglist->regs[i]; 754 *dest++ = gpu_read(gpu, reglist->regs[i]); 755 } 756 757 /* 758 * The overall register list is composed of 759 * 1. Static IFPC-only registers 760 * 2. Static IFPC + preemption registers 761 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) 762 * 763 * The first two lists are static. Size of these lists are stored as 764 * number of pairs in ifpc_list_len and preemption_list_len 765 * respectively. With concurrent binning, Some of the perfcounter 766 * registers being virtualized, CP needs to know the pipe id to program 767 * the aperture inorder to restore the same. Thus, third list is a 768 * dynamic list with triplets as 769 * (<aperture, shifted 12 bits> <address> <data>), and the length is 770 * stored as number for triplets in dynamic_list_len. 771 */ 772 lock->dynamic_list_len = 0; 773 } 774 775 static int a7xx_preempt_start(struct msm_gpu *gpu) 776 { 777 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 778 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 779 struct msm_ringbuffer *ring = gpu->rb[0]; 780 781 if (gpu->nr_rings <= 1) 782 return 0; 783 784 /* Turn CP protection off */ 785 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 786 OUT_RING(ring, 0); 787 788 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); 789 790 /* Yield the floor on command completion */ 791 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 792 OUT_RING(ring, 0x00); 793 OUT_RING(ring, 0x00); 794 OUT_RING(ring, 0x00); 795 /* Generate interrupt on preemption completion */ 796 OUT_RING(ring, 0x00); 797 798 a6xx_flush(gpu, ring); 799 800 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 801 } 802 803 static int a6xx_cp_init(struct msm_gpu *gpu) 804 { 805 struct msm_ringbuffer *ring = gpu->rb[0]; 806 807 OUT_PKT7(ring, CP_ME_INIT, 8); 808 809 OUT_RING(ring, 0x0000002f); 810 811 /* Enable multiple hardware contexts */ 812 OUT_RING(ring, 0x00000003); 813 814 /* Enable error detection */ 815 OUT_RING(ring, 0x20000000); 816 817 /* Don't enable header dump */ 818 OUT_RING(ring, 0x00000000); 819 OUT_RING(ring, 0x00000000); 820 821 /* No workarounds enabled */ 822 OUT_RING(ring, 0x00000000); 823 824 /* Pad rest of the cmds with 0's */ 825 OUT_RING(ring, 0x00000000); 826 OUT_RING(ring, 0x00000000); 827 828 a6xx_flush(gpu, ring); 829 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 830 } 831 832 static int a7xx_cp_init(struct msm_gpu *gpu) 833 { 834 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 835 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 836 struct msm_ringbuffer *ring = gpu->rb[0]; 837 u32 mask; 838 839 /* Disable concurrent binning before sending CP init */ 840 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 841 OUT_RING(ring, BIT(27)); 842 843 OUT_PKT7(ring, CP_ME_INIT, 7); 844 845 /* Use multiple HW contexts */ 846 mask = BIT(0); 847 848 /* Enable error detection */ 849 mask |= BIT(1); 850 851 /* Set default reset state */ 852 mask |= BIT(3); 853 854 /* Disable save/restore of performance counters across preemption */ 855 mask |= BIT(6); 856 857 /* Enable the register init list with the spinlock */ 858 mask |= BIT(8); 859 860 OUT_RING(ring, mask); 861 862 /* Enable multiple hardware contexts */ 863 OUT_RING(ring, 0x00000003); 864 865 /* Enable error detection */ 866 OUT_RING(ring, 0x20000000); 867 868 /* Operation mode mask */ 869 OUT_RING(ring, 0x00000002); 870 871 /* *Don't* send a power up reg list for concurrent binning (TODO) */ 872 /* Lo address */ 873 OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); 874 /* Hi address */ 875 OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); 876 /* BIT(31) set => read the regs from the list */ 877 OUT_RING(ring, BIT(31)); 878 879 a6xx_flush(gpu, ring); 880 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 881 } 882 883 /* 884 * Check that the microcode version is new enough to include several key 885 * security fixes. Return true if the ucode is safe. 886 */ 887 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, 888 struct drm_gem_object *obj) 889 { 890 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 891 struct msm_gpu *gpu = &adreno_gpu->base; 892 const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE]; 893 u32 *buf = msm_gem_get_vaddr(obj); 894 bool ret = false; 895 896 if (IS_ERR(buf)) 897 return false; 898 899 /* A7xx is safe! */ 900 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu)) 901 return true; 902 903 /* 904 * Targets up to a640 (a618, a630 and a640) need to check for a 905 * microcode version that is patched to support the whereami opcode or 906 * one that is new enough to include it by default. 907 * 908 * a650 tier targets don't need whereami but still need to be 909 * equal to or newer than 0.95 for other security fixes 910 * 911 * a660 targets have all the critical security fixes from the start 912 */ 913 if (!strcmp(sqe_name, "a630_sqe.fw")) { 914 /* 915 * If the lowest nibble is 0xa that is an indication that this 916 * microcode has been patched. The actual version is in dword 917 * [3] but we only care about the patchlevel which is the lowest 918 * nibble of dword [3] 919 * 920 * Otherwise check that the firmware is greater than or equal 921 * to 1.90 which was the first version that had this fix built 922 * in 923 */ 924 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) || 925 (buf[0] & 0xfff) >= 0x190) { 926 a6xx_gpu->has_whereami = true; 927 ret = true; 928 goto out; 929 } 930 931 DRM_DEV_ERROR(&gpu->pdev->dev, 932 "a630 SQE ucode is too old. Have version %x need at least %x\n", 933 buf[0] & 0xfff, 0x190); 934 } else if (!strcmp(sqe_name, "a650_sqe.fw")) { 935 if ((buf[0] & 0xfff) >= 0x095) { 936 ret = true; 937 goto out; 938 } 939 940 DRM_DEV_ERROR(&gpu->pdev->dev, 941 "a650 SQE ucode is too old. Have version %x need at least %x\n", 942 buf[0] & 0xfff, 0x095); 943 } else if (!strcmp(sqe_name, "a660_sqe.fw")) { 944 ret = true; 945 } else { 946 DRM_DEV_ERROR(&gpu->pdev->dev, 947 "unknown GPU, add it to a6xx_ucode_check_version()!!\n"); 948 } 949 out: 950 msm_gem_put_vaddr(obj); 951 return ret; 952 } 953 954 static int a6xx_ucode_load(struct msm_gpu *gpu) 955 { 956 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 957 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 958 959 if (!a6xx_gpu->sqe_bo) { 960 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu, 961 adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova); 962 963 if (IS_ERR(a6xx_gpu->sqe_bo)) { 964 int ret = PTR_ERR(a6xx_gpu->sqe_bo); 965 966 a6xx_gpu->sqe_bo = NULL; 967 DRM_DEV_ERROR(&gpu->pdev->dev, 968 "Could not allocate SQE ucode: %d\n", ret); 969 970 return ret; 971 } 972 973 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw"); 974 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) { 975 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm); 976 drm_gem_object_put(a6xx_gpu->sqe_bo); 977 978 a6xx_gpu->sqe_bo = NULL; 979 return -EPERM; 980 } 981 } 982 983 /* 984 * Expanded APRIV and targets that support WHERE_AM_I both need a 985 * privileged buffer to store the RPTR shadow 986 */ 987 if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) && 988 !a6xx_gpu->shadow_bo) { 989 a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, 990 sizeof(u32) * gpu->nr_rings, 991 MSM_BO_WC | MSM_BO_MAP_PRIV, 992 gpu->vm, &a6xx_gpu->shadow_bo, 993 &a6xx_gpu->shadow_iova); 994 995 if (IS_ERR(a6xx_gpu->shadow)) 996 return PTR_ERR(a6xx_gpu->shadow); 997 998 msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow"); 999 } 1000 1001 a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE, 1002 MSM_BO_WC | MSM_BO_MAP_PRIV, 1003 gpu->vm, &a6xx_gpu->pwrup_reglist_bo, 1004 &a6xx_gpu->pwrup_reglist_iova); 1005 1006 if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr)) 1007 return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr); 1008 1009 msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist"); 1010 1011 return 0; 1012 } 1013 1014 static int a6xx_zap_shader_init(struct msm_gpu *gpu) 1015 { 1016 static bool loaded; 1017 int ret; 1018 1019 if (loaded) 1020 return 0; 1021 1022 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID); 1023 1024 loaded = !ret; 1025 return ret; 1026 } 1027 1028 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 1029 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 1030 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 1031 A6XX_RBBM_INT_0_MASK_CP_IB2 | \ 1032 A6XX_RBBM_INT_0_MASK_CP_IB1 | \ 1033 A6XX_RBBM_INT_0_MASK_CP_RB | \ 1034 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 1035 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 1036 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 1037 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 1038 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1039 1040 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 1041 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 1042 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 1043 A6XX_RBBM_INT_0_MASK_CP_SW | \ 1044 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 1045 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 1046 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 1047 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 1048 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 1049 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 1050 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 1051 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 1052 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 1053 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1054 1055 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ 1056 A6XX_CP_APRIV_CNTL_RBFETCH | \ 1057 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \ 1058 A6XX_CP_APRIV_CNTL_RBRPWB) 1059 1060 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \ 1061 A6XX_CP_APRIV_CNTL_CDREAD | \ 1062 A6XX_CP_APRIV_CNTL_CDWRITE) 1063 1064 static int hw_init(struct msm_gpu *gpu) 1065 { 1066 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1067 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1068 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1069 u64 gmem_range_min; 1070 unsigned int i; 1071 int ret; 1072 1073 if (!adreno_has_gmu_wrapper(adreno_gpu)) { 1074 /* Make sure the GMU keeps the GPU on while we set it up */ 1075 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 1076 if (ret) 1077 return ret; 1078 } 1079 1080 /* Clear GBIF halt in case GX domain was not collapsed */ 1081 if (adreno_is_a619_holi(adreno_gpu)) { 1082 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 1083 gpu_read(gpu, REG_A6XX_GBIF_HALT); 1084 1085 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0); 1086 gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL); 1087 } else if (a6xx_has_gbif(adreno_gpu)) { 1088 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 1089 gpu_read(gpu, REG_A6XX_GBIF_HALT); 1090 1091 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0); 1092 gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT); 1093 } 1094 1095 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 1096 1097 if (adreno_is_a619_holi(adreno_gpu)) 1098 a6xx_sptprac_enable(gmu); 1099 1100 /* 1101 * Disable the trusted memory range - we don't actually supported secure 1102 * memory rendering at this point in time and we don't want to block off 1103 * part of the virtual memory space. 1104 */ 1105 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 1106 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 1107 1108 if (!adreno_is_a7xx(adreno_gpu)) { 1109 /* Turn on 64 bit addressing for all blocks */ 1110 gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1); 1111 gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1); 1112 gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1); 1113 gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1); 1114 gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1); 1115 gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1); 1116 gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1); 1117 gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1); 1118 gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1); 1119 gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1); 1120 gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1); 1121 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); 1122 } 1123 1124 /* enable hardware clockgating */ 1125 a6xx_set_hwcg(gpu, true); 1126 1127 /* VBIF/GBIF start*/ 1128 if (adreno_is_a610_family(adreno_gpu) || 1129 adreno_is_a640_family(adreno_gpu) || 1130 adreno_is_a650_family(adreno_gpu) || 1131 adreno_is_a7xx(adreno_gpu)) { 1132 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620); 1133 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620); 1134 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620); 1135 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620); 1136 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 1137 adreno_is_a7xx(adreno_gpu) ? 0x2120212 : 0x3); 1138 } else { 1139 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3); 1140 } 1141 1142 if (adreno_is_a630(adreno_gpu)) 1143 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); 1144 1145 if (adreno_is_a7xx(adreno_gpu)) 1146 gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0); 1147 1148 /* Make all blocks contribute to the GPU BUSY perf counter */ 1149 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 1150 1151 /* Disable L2 bypass in the UCHE */ 1152 if (adreno_is_a7xx(adreno_gpu)) { 1153 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 1154 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 1155 } else { 1156 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); 1157 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 1158 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 1159 } 1160 1161 if (!(adreno_is_a650_family(adreno_gpu) || 1162 adreno_is_a702(adreno_gpu) || 1163 adreno_is_a730(adreno_gpu))) { 1164 gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M; 1165 1166 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 1167 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min); 1168 1169 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX, 1170 gmem_range_min + adreno_gpu->info->gmem - 1); 1171 } 1172 1173 if (adreno_is_a7xx(adreno_gpu)) 1174 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23)); 1175 else { 1176 gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); 1177 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); 1178 } 1179 1180 if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) { 1181 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); 1182 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); 1183 } else if (adreno_is_a610_family(adreno_gpu)) { 1184 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060); 1185 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16); 1186 } else if (!adreno_is_a7xx(adreno_gpu)) { 1187 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0); 1188 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); 1189 } 1190 1191 if (adreno_is_a660_family(adreno_gpu)) 1192 gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); 1193 1194 /* Setting the mem pool size */ 1195 if (adreno_is_a610(adreno_gpu)) { 1196 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48); 1197 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47); 1198 } else if (adreno_is_a702(adreno_gpu)) { 1199 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64); 1200 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63); 1201 } else if (!adreno_is_a7xx(adreno_gpu)) 1202 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128); 1203 1204 1205 /* Set the default primFifo threshold values */ 1206 if (adreno_gpu->info->a6xx->prim_fifo_threshold) 1207 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 1208 adreno_gpu->info->a6xx->prim_fifo_threshold); 1209 1210 /* Set the AHB default slave response to "ERROR" */ 1211 gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1); 1212 1213 /* Turn on performance counters */ 1214 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1); 1215 1216 if (adreno_is_a7xx(adreno_gpu)) { 1217 /* Turn on the IFPC counter (countable 4 on XOCLK4) */ 1218 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1, 1219 FIELD_PREP(GENMASK(7, 0), 0x4)); 1220 } 1221 1222 /* Select CP0 to always count cycles */ 1223 gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT); 1224 1225 a6xx_set_ubwc_config(gpu); 1226 1227 /* Enable fault detection */ 1228 if (adreno_is_a730(adreno_gpu) || 1229 adreno_is_a740_family(adreno_gpu)) 1230 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff); 1231 else if (adreno_is_a690(adreno_gpu)) 1232 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff); 1233 else if (adreno_is_a619(adreno_gpu)) 1234 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff); 1235 else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) 1236 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff); 1237 else 1238 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff); 1239 1240 gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 1241 1242 /* Set weights for bicubic filtering */ 1243 if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { 1244 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); 1245 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, 1246 0x3fe05ff4); 1247 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, 1248 0x3fa0ebee); 1249 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, 1250 0x3f5193ed); 1251 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, 1252 0x3f0243f0); 1253 } 1254 1255 /* Set up the CX GMU counter 0 to count busy ticks */ 1256 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 1257 1258 /* Enable the power counter */ 1259 gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5)); 1260 gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 1261 1262 /* Protect registers from the CP */ 1263 a6xx_set_cp_protect(gpu); 1264 1265 if (adreno_is_a660_family(adreno_gpu)) { 1266 if (adreno_is_a690(adreno_gpu)) 1267 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801); 1268 else 1269 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); 1270 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); 1271 } else if (adreno_is_a702(adreno_gpu)) { 1272 /* Something to do with the HLSQ cluster */ 1273 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24)); 1274 } 1275 1276 if (adreno_is_a690(adreno_gpu)) 1277 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90); 1278 /* Set dualQ + disable afull for A660 GPU */ 1279 else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu)) 1280 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); 1281 else if (adreno_is_a7xx(adreno_gpu)) 1282 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 1283 FIELD_PREP(GENMASK(19, 16), 6) | 1284 FIELD_PREP(GENMASK(15, 12), 6) | 1285 FIELD_PREP(GENMASK(11, 8), 9) | 1286 BIT(3) | BIT(2) | 1287 FIELD_PREP(GENMASK(1, 0), 2)); 1288 1289 /* Enable expanded apriv for targets that support it */ 1290 if (gpu->hw_apriv) { 1291 if (adreno_is_a7xx(adreno_gpu)) { 1292 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, 1293 A7XX_BR_APRIVMASK); 1294 gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL, 1295 A7XX_APRIV_MASK); 1296 gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL, 1297 A7XX_APRIV_MASK); 1298 } else 1299 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, 1300 BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1)); 1301 } 1302 1303 if (adreno_is_a750(adreno_gpu)) { 1304 /* Disable ubwc merged UFC request feature */ 1305 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19)); 1306 1307 /* Enable TP flaghint and other performance settings */ 1308 gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700); 1309 } else if (adreno_is_a7xx(adreno_gpu)) { 1310 /* Disable non-ubwc read reqs from passing write reqs */ 1311 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11)); 1312 } 1313 1314 /* Enable interrupts */ 1315 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 1316 adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK); 1317 1318 ret = adreno_hw_init(gpu); 1319 if (ret) 1320 goto out; 1321 1322 gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 1323 1324 /* Set the ringbuffer address */ 1325 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 1326 1327 /* Targets that support extended APRIV can use the RPTR shadow from 1328 * hardware but all the other ones need to disable the feature. Targets 1329 * that support the WHERE_AM_I opcode can use that instead 1330 */ 1331 if (adreno_gpu->base.hw_apriv) 1332 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 1333 else 1334 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, 1335 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); 1336 1337 /* Configure the RPTR shadow if needed: */ 1338 if (a6xx_gpu->shadow_bo) { 1339 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, 1340 shadowptr(a6xx_gpu, gpu->rb[0])); 1341 for (unsigned int i = 0; i < gpu->nr_rings; i++) 1342 a6xx_gpu->shadow[i] = 0; 1343 } 1344 1345 /* ..which means "always" on A7xx, also for BV shadow */ 1346 if (adreno_is_a7xx(adreno_gpu)) { 1347 gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR, 1348 rbmemptr(gpu->rb[0], bv_rptr)); 1349 } 1350 1351 a6xx_preempt_hw_init(gpu); 1352 1353 /* Always come up on rb 0 */ 1354 a6xx_gpu->cur_ring = gpu->rb[0]; 1355 1356 for (i = 0; i < gpu->nr_rings; i++) 1357 gpu->rb[i]->cur_ctx_seqno = 0; 1358 1359 /* Enable the SQE_to start the CP engine */ 1360 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); 1361 1362 if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) { 1363 a7xx_patch_pwrup_reglist(gpu); 1364 a6xx_gpu->pwrup_reglist_emitted = true; 1365 } 1366 1367 ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu); 1368 if (ret) 1369 goto out; 1370 1371 /* 1372 * Try to load a zap shader into the secure world. If successful 1373 * we can use the CP to switch out of secure mode. If not then we 1374 * have no resource but to try to switch ourselves out manually. If we 1375 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 1376 * be blocked and a permissions violation will soon follow. 1377 */ 1378 ret = a6xx_zap_shader_init(gpu); 1379 if (!ret) { 1380 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 1381 OUT_RING(gpu->rb[0], 0x00000000); 1382 1383 a6xx_flush(gpu, gpu->rb[0]); 1384 if (!a6xx_idle(gpu, gpu->rb[0])) 1385 return -EINVAL; 1386 } else if (ret == -ENODEV) { 1387 /* 1388 * This device does not use zap shader (but print a warning 1389 * just in case someone got their dt wrong.. hopefully they 1390 * have a debug UART to realize the error of their ways... 1391 * if you mess this up you are about to crash horribly) 1392 */ 1393 dev_warn_once(gpu->dev->dev, 1394 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 1395 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 1396 ret = 0; 1397 } else { 1398 return ret; 1399 } 1400 1401 out: 1402 if (adreno_has_gmu_wrapper(adreno_gpu)) 1403 return ret; 1404 1405 /* Last step - yield the ringbuffer */ 1406 a7xx_preempt_start(gpu); 1407 1408 /* 1409 * Tell the GMU that we are done touching the GPU and it can start power 1410 * management 1411 */ 1412 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 1413 1414 if (a6xx_gpu->gmu.legacy) { 1415 /* Take the GMU out of its special boot mode */ 1416 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER); 1417 } 1418 1419 return ret; 1420 } 1421 1422 static int a6xx_hw_init(struct msm_gpu *gpu) 1423 { 1424 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1425 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1426 int ret; 1427 1428 mutex_lock(&a6xx_gpu->gmu.lock); 1429 ret = hw_init(gpu); 1430 mutex_unlock(&a6xx_gpu->gmu.lock); 1431 1432 return ret; 1433 } 1434 1435 static void a6xx_dump(struct msm_gpu *gpu) 1436 { 1437 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", 1438 gpu_read(gpu, REG_A6XX_RBBM_STATUS)); 1439 adreno_dump(gpu); 1440 } 1441 1442 static void a6xx_recover(struct msm_gpu *gpu) 1443 { 1444 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1445 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1446 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1447 int i, active_submits; 1448 1449 adreno_dump_info(gpu); 1450 1451 for (i = 0; i < 8; i++) 1452 DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, 1453 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); 1454 1455 if (hang_debug) 1456 a6xx_dump(gpu); 1457 1458 /* 1459 * To handle recovery specific sequences during the rpm suspend we are 1460 * about to trigger 1461 */ 1462 a6xx_gpu->hung = true; 1463 1464 /* Halt SQE first */ 1465 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); 1466 1467 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 1468 1469 /* active_submit won't change until we make a submission */ 1470 mutex_lock(&gpu->active_lock); 1471 active_submits = gpu->active_submits; 1472 1473 /* 1474 * Temporarily clear active_submits count to silence a WARN() in the 1475 * runtime suspend cb 1476 */ 1477 gpu->active_submits = 0; 1478 1479 if (adreno_has_gmu_wrapper(adreno_gpu)) { 1480 /* Drain the outstanding traffic on memory buses */ 1481 a6xx_bus_clear_pending_transactions(adreno_gpu, true); 1482 1483 /* Reset the GPU to a clean state */ 1484 a6xx_gpu_sw_reset(gpu, true); 1485 a6xx_gpu_sw_reset(gpu, false); 1486 } 1487 1488 reinit_completion(&gmu->pd_gate); 1489 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 1490 dev_pm_genpd_synced_poweroff(gmu->cxpd); 1491 1492 /* Drop the rpm refcount from active submits */ 1493 if (active_submits) 1494 pm_runtime_put(&gpu->pdev->dev); 1495 1496 /* And the final one from recover worker */ 1497 pm_runtime_put_sync(&gpu->pdev->dev); 1498 1499 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 1500 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 1501 1502 dev_pm_genpd_remove_notifier(gmu->cxpd); 1503 1504 pm_runtime_use_autosuspend(&gpu->pdev->dev); 1505 1506 if (active_submits) 1507 pm_runtime_get(&gpu->pdev->dev); 1508 1509 pm_runtime_get_sync(&gpu->pdev->dev); 1510 1511 gpu->active_submits = active_submits; 1512 mutex_unlock(&gpu->active_lock); 1513 1514 msm_gpu_hw_init(gpu); 1515 a6xx_gpu->hung = false; 1516 } 1517 1518 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 1519 { 1520 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1521 static const char *uche_clients[7] = { 1522 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", 1523 }; 1524 u32 val; 1525 1526 if (adreno_is_a7xx(adreno_gpu)) { 1527 if (mid != 1 && mid != 2 && mid != 3 && mid != 8) 1528 return "UNKNOWN"; 1529 } else { 1530 if (mid < 1 || mid > 3) 1531 return "UNKNOWN"; 1532 } 1533 1534 /* 1535 * The source of the data depends on the mid ID read from FSYNR1. 1536 * and the client ID read from the UCHE block 1537 */ 1538 val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF); 1539 1540 if (adreno_is_a7xx(adreno_gpu)) { 1541 /* Bit 3 for mid=3 indicates BR or BV */ 1542 static const char *uche_clients_a7xx[16] = { 1543 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", 1544 "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 1545 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", 1546 "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 1547 }; 1548 1549 /* LPAC has the same clients as BR and BV, but because it is 1550 * compute-only some of them do not exist and there are holes 1551 * in the array. 1552 */ 1553 static const char *uche_clients_lpac_a7xx[8] = { 1554 "-", "LPAC_SP", "-", "-", 1555 "LPAC_HLSQ", "-", "-", "LPAC_TP", 1556 }; 1557 1558 val &= GENMASK(6, 0); 1559 1560 /* mid=3 refers to BR or BV */ 1561 if (mid == 3) { 1562 if (val < ARRAY_SIZE(uche_clients_a7xx)) 1563 return uche_clients_a7xx[val]; 1564 else 1565 return "UCHE"; 1566 } 1567 1568 /* mid=8 refers to LPAC */ 1569 if (mid == 8) { 1570 if (val < ARRAY_SIZE(uche_clients_lpac_a7xx)) 1571 return uche_clients_lpac_a7xx[val]; 1572 else 1573 return "UCHE_LPAC"; 1574 } 1575 1576 /* mid=2 is a catchall for everything else in LPAC */ 1577 if (mid == 2) 1578 return "UCHE_LPAC"; 1579 1580 /* mid=1 is a catchall for everything else in BR/BV */ 1581 return "UCHE"; 1582 } else if (adreno_is_a660_family(adreno_gpu)) { 1583 static const char *uche_clients_a660[8] = { 1584 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP", 1585 }; 1586 1587 static const char *uche_clients_a660_not[8] = { 1588 "not VFD", "not SP", "not VSC", "not VPC", 1589 "not HLSQ", "not PC", "not LRZ", "not TP", 1590 }; 1591 1592 val &= GENMASK(6, 0); 1593 1594 if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660)) 1595 return uche_clients_a660[val]; 1596 1597 if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not)) 1598 return uche_clients_a660_not[val]; 1599 1600 return "UCHE"; 1601 } else { 1602 /* mid = 3 is most precise and refers to only one block per client */ 1603 if (mid == 3) 1604 return uche_clients[val & 7]; 1605 1606 /* For mid=2 the source is TP or VFD except when the client id is 0 */ 1607 if (mid == 2) 1608 return ((val & 7) == 0) ? "TP" : "TP|VFD"; 1609 1610 /* For mid=1 just return "UCHE" as a catchall for everything else */ 1611 return "UCHE"; 1612 } 1613 } 1614 1615 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id) 1616 { 1617 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1618 1619 if (id == 0) 1620 return "CP"; 1621 else if (id == 4) 1622 return "CCU"; 1623 else if (id == 6) 1624 return "CDP Prefetch"; 1625 else if (id == 7) 1626 return "GMU"; 1627 else if (id == 5 && adreno_is_a7xx(adreno_gpu)) 1628 return "Flag cache"; 1629 1630 return a6xx_uche_fault_block(gpu, id); 1631 } 1632 1633 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 1634 { 1635 struct msm_gpu *gpu = arg; 1636 struct adreno_smmu_fault_info *info = data; 1637 const char *block = "unknown"; 1638 1639 u32 scratch[] = { 1640 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)), 1641 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)), 1642 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)), 1643 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)), 1644 }; 1645 1646 if (info) 1647 block = a6xx_fault_block(gpu, info->fsynr1 & 0xff); 1648 1649 return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 1650 } 1651 1652 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) 1653 { 1654 u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS); 1655 1656 if (status & A6XX_CP_INT_CP_OPCODE_ERROR) { 1657 u32 val; 1658 1659 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1); 1660 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA); 1661 dev_err_ratelimited(&gpu->pdev->dev, 1662 "CP | opcode error | possible opcode=0x%8.8X\n", 1663 val); 1664 } 1665 1666 if (status & A6XX_CP_INT_CP_UCODE_ERROR) 1667 dev_err_ratelimited(&gpu->pdev->dev, 1668 "CP ucode error interrupt\n"); 1669 1670 if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR) 1671 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n", 1672 gpu_read(gpu, REG_A6XX_CP_HW_FAULT)); 1673 1674 if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 1675 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS); 1676 1677 dev_err_ratelimited(&gpu->pdev->dev, 1678 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 1679 val & (1 << 20) ? "READ" : "WRITE", 1680 (val & 0x3ffff), val); 1681 } 1682 1683 if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu))) 1684 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n"); 1685 1686 if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR) 1687 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n"); 1688 1689 if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR) 1690 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n"); 1691 1692 } 1693 1694 static void a6xx_fault_detect_irq(struct msm_gpu *gpu) 1695 { 1696 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1697 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1698 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1699 1700 /* 1701 * If stalled on SMMU fault, we could trip the GPU's hang detection, 1702 * but the fault handler will trigger the devcore dump, and we want 1703 * to otherwise resume normally rather than killing the submit, so 1704 * just bail. 1705 */ 1706 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) 1707 return; 1708 1709 /* 1710 * Force the GPU to stay on until after we finish 1711 * collecting information 1712 */ 1713 if (!adreno_has_gmu_wrapper(adreno_gpu)) 1714 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); 1715 1716 DRM_DEV_ERROR(&gpu->pdev->dev, 1717 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 1718 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 1719 gpu_read(gpu, REG_A6XX_RBBM_STATUS), 1720 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1721 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1722 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), 1723 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), 1724 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), 1725 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); 1726 1727 /* Turn off the hangcheck timer to keep it from bothering us */ 1728 timer_delete(&gpu->hangcheck_timer); 1729 1730 kthread_queue_work(gpu->worker, &gpu->recover_work); 1731 } 1732 1733 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1734 { 1735 u32 status; 1736 1737 status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); 1738 gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); 1739 1740 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1741 1742 /* 1743 * Ignore FASTBLEND violations, because the HW will silently fall back 1744 * to legacy blending. 1745 */ 1746 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1747 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1748 timer_delete(&gpu->hangcheck_timer); 1749 1750 kthread_queue_work(gpu->worker, &gpu->recover_work); 1751 } 1752 } 1753 1754 static irqreturn_t a6xx_irq(struct msm_gpu *gpu) 1755 { 1756 struct msm_drm_private *priv = gpu->dev->dev_private; 1757 u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); 1758 1759 gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); 1760 1761 if (priv->disable_err_irq) 1762 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1763 1764 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1765 a6xx_fault_detect_irq(gpu); 1766 1767 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) 1768 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n"); 1769 1770 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1771 a6xx_cp_hw_err_irq(gpu); 1772 1773 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 1774 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 1775 1776 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1777 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 1778 1779 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1780 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 1781 1782 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1783 a7xx_sw_fuse_violation_irq(gpu); 1784 1785 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1786 msm_gpu_retire(gpu); 1787 a6xx_preempt_trigger(gpu); 1788 } 1789 1790 if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 1791 a6xx_preempt_irq(gpu); 1792 1793 return IRQ_HANDLED; 1794 } 1795 1796 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu) 1797 { 1798 llcc_slice_deactivate(a6xx_gpu->llc_slice); 1799 llcc_slice_deactivate(a6xx_gpu->htw_llc_slice); 1800 } 1801 1802 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1803 { 1804 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1805 struct msm_gpu *gpu = &adreno_gpu->base; 1806 u32 cntl1_regval = 0; 1807 1808 if (IS_ERR(a6xx_gpu->llc_mmio)) 1809 return; 1810 1811 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 1812 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 1813 1814 gpu_scid &= 0x1f; 1815 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) | 1816 (gpu_scid << 15) | (gpu_scid << 20); 1817 1818 /* On A660, the SCID programming for UCHE traffic is done in 1819 * A6XX_GBIF_SCACHE_CNTL0[14:10] 1820 */ 1821 if (adreno_is_a660_family(adreno_gpu)) 1822 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | 1823 (1 << 8), (gpu_scid << 10) | (1 << 8)); 1824 } 1825 1826 /* 1827 * For targets with a MMU500, activate the slice but don't program the 1828 * register. The XBL will take care of that. 1829 */ 1830 if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { 1831 if (!a6xx_gpu->have_mmu500) { 1832 u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); 1833 1834 gpuhtw_scid &= 0x1f; 1835 cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); 1836 } 1837 } 1838 1839 if (!cntl1_regval) 1840 return; 1841 1842 /* 1843 * Program the slice IDs for the various GPU blocks and GPU MMU 1844 * pagetables 1845 */ 1846 if (!a6xx_gpu->have_mmu500) { 1847 a6xx_llc_write(a6xx_gpu, 1848 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); 1849 1850 /* 1851 * Program cacheability overrides to not allocate cache 1852 * lines on a write miss 1853 */ 1854 a6xx_llc_rmw(a6xx_gpu, 1855 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); 1856 return; 1857 } 1858 1859 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval); 1860 } 1861 1862 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1863 { 1864 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1865 struct msm_gpu *gpu = &adreno_gpu->base; 1866 1867 if (IS_ERR(a6xx_gpu->llc_mmio)) 1868 return; 1869 1870 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 1871 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 1872 1873 gpu_scid &= GENMASK(4, 0); 1874 1875 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 1876 FIELD_PREP(GENMASK(29, 25), gpu_scid) | 1877 FIELD_PREP(GENMASK(24, 20), gpu_scid) | 1878 FIELD_PREP(GENMASK(19, 15), gpu_scid) | 1879 FIELD_PREP(GENMASK(14, 10), gpu_scid) | 1880 FIELD_PREP(GENMASK(9, 5), gpu_scid) | 1881 FIELD_PREP(GENMASK(4, 0), gpu_scid)); 1882 1883 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 1884 FIELD_PREP(GENMASK(14, 10), gpu_scid) | 1885 BIT(8)); 1886 } 1887 1888 llcc_slice_activate(a6xx_gpu->htw_llc_slice); 1889 } 1890 1891 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) 1892 { 1893 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ 1894 if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) 1895 return; 1896 1897 llcc_slice_putd(a6xx_gpu->llc_slice); 1898 llcc_slice_putd(a6xx_gpu->htw_llc_slice); 1899 } 1900 1901 static void a6xx_llc_slices_init(struct platform_device *pdev, 1902 struct a6xx_gpu *a6xx_gpu, bool is_a7xx) 1903 { 1904 struct device_node *phandle; 1905 1906 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ 1907 if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) 1908 return; 1909 1910 /* 1911 * There is a different programming path for A6xx targets with an 1912 * mmu500 attached, so detect if that is the case 1913 */ 1914 phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); 1915 a6xx_gpu->have_mmu500 = (phandle && 1916 of_device_is_compatible(phandle, "arm,mmu-500")); 1917 of_node_put(phandle); 1918 1919 if (is_a7xx || !a6xx_gpu->have_mmu500) 1920 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem"); 1921 else 1922 a6xx_gpu->llc_mmio = NULL; 1923 1924 a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); 1925 a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); 1926 1927 if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) 1928 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); 1929 } 1930 1931 static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) 1932 { 1933 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1934 struct msm_gpu *gpu = &adreno_gpu->base; 1935 u32 fuse_val; 1936 int ret; 1937 1938 if (adreno_is_a750(adreno_gpu)) { 1939 /* 1940 * Assume that if qcom scm isn't available, that whatever 1941 * replacement allows writing the fuse register ourselves. 1942 * Users of alternative firmware need to make sure this 1943 * register is writeable or indicate that it's not somehow. 1944 * Print a warning because if you mess this up you're about to 1945 * crash horribly. 1946 */ 1947 if (!qcom_scm_is_available()) { 1948 dev_warn_once(gpu->dev->dev, 1949 "SCM is not available, poking fuse register\n"); 1950 a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, 1951 A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1952 A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | 1953 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); 1954 adreno_gpu->has_ray_tracing = true; 1955 return 0; 1956 } 1957 1958 ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ | 1959 QCOM_SCM_GPU_TSENSE_EN_REQ); 1960 if (ret) 1961 return ret; 1962 1963 /* 1964 * On a750 raytracing may be disabled by the firmware, find out 1965 * whether that's the case. The scm call above sets the fuse 1966 * register. 1967 */ 1968 fuse_val = a6xx_llc_read(a6xx_gpu, 1969 REG_A7XX_CX_MISC_SW_FUSE_VALUE); 1970 adreno_gpu->has_ray_tracing = 1971 !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); 1972 } else if (adreno_is_a740(adreno_gpu)) { 1973 /* Raytracing is always enabled on a740 */ 1974 adreno_gpu->has_ray_tracing = true; 1975 } 1976 1977 return 0; 1978 } 1979 1980 1981 #define GBIF_CLIENT_HALT_MASK BIT(0) 1982 #define GBIF_ARB_HALT_MASK BIT(1) 1983 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 1984 #define VBIF_RESET_ACK_MASK 0xF0 1985 #define GPR0_GBIF_HALT_REQUEST 0x1E0 1986 1987 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 1988 { 1989 struct msm_gpu *gpu = &adreno_gpu->base; 1990 1991 if (adreno_is_a619_holi(adreno_gpu)) { 1992 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST); 1993 spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) & 1994 (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK); 1995 } else if (!a6xx_has_gbif(adreno_gpu)) { 1996 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK); 1997 spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 1998 (VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK); 1999 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); 2000 2001 return; 2002 } 2003 2004 if (gx_off) { 2005 /* Halt the gx side of GBIF */ 2006 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1); 2007 spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1); 2008 } 2009 2010 /* Halt new client requests on GBIF */ 2011 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 2012 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 2013 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 2014 2015 /* Halt all AXI requests on GBIF */ 2016 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 2017 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 2018 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 2019 2020 /* The GBIF halt needs to be explicitly cleared */ 2021 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 2022 } 2023 2024 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert) 2025 { 2026 /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */ 2027 if (adreno_is_a610(to_adreno_gpu(gpu))) 2028 return; 2029 2030 gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert); 2031 /* Perform a bogus read and add a brief delay to ensure ordering. */ 2032 gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD); 2033 udelay(1); 2034 2035 /* The reset line needs to be asserted for at least 100 us */ 2036 if (assert) 2037 udelay(100); 2038 } 2039 2040 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu) 2041 { 2042 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2043 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2044 int ret; 2045 2046 gpu->needs_hw_init = true; 2047 2048 trace_msm_gpu_resume(0); 2049 2050 mutex_lock(&a6xx_gpu->gmu.lock); 2051 ret = a6xx_gmu_resume(a6xx_gpu); 2052 mutex_unlock(&a6xx_gpu->gmu.lock); 2053 if (ret) 2054 return ret; 2055 2056 msm_devfreq_resume(gpu); 2057 2058 adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu); 2059 2060 return ret; 2061 } 2062 2063 static int a6xx_pm_resume(struct msm_gpu *gpu) 2064 { 2065 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2066 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2067 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 2068 unsigned long freq = gpu->fast_rate; 2069 struct dev_pm_opp *opp; 2070 int ret; 2071 2072 gpu->needs_hw_init = true; 2073 2074 trace_msm_gpu_resume(0); 2075 2076 mutex_lock(&a6xx_gpu->gmu.lock); 2077 2078 opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq); 2079 if (IS_ERR(opp)) { 2080 ret = PTR_ERR(opp); 2081 goto err_set_opp; 2082 } 2083 dev_pm_opp_put(opp); 2084 2085 /* Set the core clock and bus bw, having VDD scaling in mind */ 2086 dev_pm_opp_set_opp(&gpu->pdev->dev, opp); 2087 2088 pm_runtime_resume_and_get(gmu->dev); 2089 pm_runtime_resume_and_get(gmu->gxpd); 2090 2091 ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks); 2092 if (ret) 2093 goto err_bulk_clk; 2094 2095 if (adreno_is_a619_holi(adreno_gpu)) 2096 a6xx_sptprac_enable(gmu); 2097 2098 /* If anything goes south, tear the GPU down piece by piece.. */ 2099 if (ret) { 2100 err_bulk_clk: 2101 pm_runtime_put(gmu->gxpd); 2102 pm_runtime_put(gmu->dev); 2103 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); 2104 } 2105 err_set_opp: 2106 mutex_unlock(&a6xx_gpu->gmu.lock); 2107 2108 if (!ret) 2109 msm_devfreq_resume(gpu); 2110 2111 return ret; 2112 } 2113 2114 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu) 2115 { 2116 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2117 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2118 int i, ret; 2119 2120 trace_msm_gpu_suspend(0); 2121 2122 a6xx_llc_deactivate(a6xx_gpu); 2123 2124 msm_devfreq_suspend(gpu); 2125 2126 mutex_lock(&a6xx_gpu->gmu.lock); 2127 ret = a6xx_gmu_stop(a6xx_gpu); 2128 mutex_unlock(&a6xx_gpu->gmu.lock); 2129 if (ret) 2130 return ret; 2131 2132 if (a6xx_gpu->shadow_bo) 2133 for (i = 0; i < gpu->nr_rings; i++) 2134 a6xx_gpu->shadow[i] = 0; 2135 2136 gpu->suspend_count++; 2137 2138 return 0; 2139 } 2140 2141 static int a6xx_pm_suspend(struct msm_gpu *gpu) 2142 { 2143 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2144 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2145 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 2146 int i; 2147 2148 trace_msm_gpu_suspend(0); 2149 2150 msm_devfreq_suspend(gpu); 2151 2152 mutex_lock(&a6xx_gpu->gmu.lock); 2153 2154 /* Drain the outstanding traffic on memory buses */ 2155 a6xx_bus_clear_pending_transactions(adreno_gpu, true); 2156 2157 if (adreno_is_a619_holi(adreno_gpu)) 2158 a6xx_sptprac_disable(gmu); 2159 2160 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); 2161 2162 pm_runtime_put_sync(gmu->gxpd); 2163 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); 2164 pm_runtime_put_sync(gmu->dev); 2165 2166 mutex_unlock(&a6xx_gpu->gmu.lock); 2167 2168 if (a6xx_gpu->shadow_bo) 2169 for (i = 0; i < gpu->nr_rings; i++) 2170 a6xx_gpu->shadow[i] = 0; 2171 2172 gpu->suspend_count++; 2173 2174 return 0; 2175 } 2176 2177 static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 2178 { 2179 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2180 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2181 2182 mutex_lock(&a6xx_gpu->gmu.lock); 2183 2184 /* Force the GPU power on so we can read this register */ 2185 a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 2186 2187 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); 2188 2189 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 2190 2191 mutex_unlock(&a6xx_gpu->gmu.lock); 2192 2193 return 0; 2194 } 2195 2196 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 2197 { 2198 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); 2199 return 0; 2200 } 2201 2202 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu) 2203 { 2204 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2205 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2206 2207 return a6xx_gpu->cur_ring; 2208 } 2209 2210 static void a6xx_destroy(struct msm_gpu *gpu) 2211 { 2212 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2213 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2214 2215 if (a6xx_gpu->sqe_bo) { 2216 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm); 2217 drm_gem_object_put(a6xx_gpu->sqe_bo); 2218 } 2219 2220 if (a6xx_gpu->shadow_bo) { 2221 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->vm); 2222 drm_gem_object_put(a6xx_gpu->shadow_bo); 2223 } 2224 2225 a6xx_llc_slices_destroy(a6xx_gpu); 2226 2227 a6xx_gmu_remove(a6xx_gpu); 2228 2229 adreno_gpu_cleanup(adreno_gpu); 2230 2231 kfree(a6xx_gpu); 2232 } 2233 2234 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 2235 { 2236 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2237 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2238 u64 busy_cycles; 2239 2240 /* 19.2MHz */ 2241 *out_sample_rate = 19200000; 2242 2243 busy_cycles = gmu_read64(&a6xx_gpu->gmu, 2244 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 2245 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 2246 2247 return busy_cycles; 2248 } 2249 2250 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, 2251 bool suspended) 2252 { 2253 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2254 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2255 2256 mutex_lock(&a6xx_gpu->gmu.lock); 2257 a6xx_gmu_set_freq(gpu, opp, suspended); 2258 mutex_unlock(&a6xx_gpu->gmu.lock); 2259 } 2260 2261 static struct drm_gpuvm * 2262 a6xx_create_vm(struct msm_gpu *gpu, struct platform_device *pdev) 2263 { 2264 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2265 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2266 unsigned long quirks = 0; 2267 2268 /* 2269 * This allows GPU to set the bus attributes required to use system 2270 * cache on behalf of the iommu page table walker. 2271 */ 2272 if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) && 2273 !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY)) 2274 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; 2275 2276 return adreno_iommu_create_vm(gpu, pdev, quirks); 2277 } 2278 2279 static struct drm_gpuvm * 2280 a6xx_create_private_vm(struct msm_gpu *gpu, bool kernel_managed) 2281 { 2282 struct msm_mmu *mmu; 2283 2284 mmu = msm_iommu_pagetable_create(to_msm_vm(gpu->vm)->mmu, kernel_managed); 2285 2286 if (IS_ERR(mmu)) 2287 return ERR_CAST(mmu); 2288 2289 return msm_gem_vm_create(gpu->dev, mmu, "gpu", ADRENO_VM_START, 2290 adreno_private_vm_size(gpu), kernel_managed); 2291 } 2292 2293 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 2294 { 2295 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2296 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2297 2298 if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) 2299 return a6xx_gpu->shadow[ring->id]; 2300 2301 return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); 2302 } 2303 2304 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 2305 { 2306 struct msm_cp_state cp_state = { 2307 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), 2308 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), 2309 .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), 2310 .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), 2311 }; 2312 bool progress; 2313 2314 /* 2315 * Adjust the remaining data to account for what has already been 2316 * fetched from memory, but not yet consumed by the SQE. 2317 * 2318 * This is not *technically* correct, the amount buffered could 2319 * exceed the IB size due to hw prefetching ahead, but: 2320 * 2321 * (1) We aren't trying to find the exact position, just whether 2322 * progress has been made 2323 * (2) The CP_REG_TO_MEM at the end of a submit should be enough 2324 * to prevent prefetching into an unrelated submit. (And 2325 * either way, at some point the ROQ will be full.) 2326 */ 2327 cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16; 2328 cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16; 2329 2330 progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state)); 2331 2332 ring->last_cp_state = cp_state; 2333 2334 return progress; 2335 } 2336 2337 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse) 2338 { 2339 if (!info->speedbins) 2340 return UINT_MAX; 2341 2342 for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++) 2343 if (info->speedbins[i].fuse == fuse) 2344 return BIT(info->speedbins[i].speedbin); 2345 2346 return UINT_MAX; 2347 } 2348 2349 static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *info) 2350 { 2351 u32 supp_hw; 2352 u32 speedbin; 2353 int ret; 2354 2355 ret = adreno_read_speedbin(dev, &speedbin); 2356 /* 2357 * -ENOENT means that the platform doesn't support speedbin which is 2358 * fine 2359 */ 2360 if (ret == -ENOENT) { 2361 return 0; 2362 } else if (ret) { 2363 dev_err_probe(dev, ret, 2364 "failed to read speed-bin. Some OPPs may not be supported by hardware\n"); 2365 return ret; 2366 } 2367 2368 supp_hw = fuse_to_supp_hw(info, speedbin); 2369 2370 if (supp_hw == UINT_MAX) { 2371 DRM_DEV_ERROR(dev, 2372 "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n", 2373 speedbin); 2374 supp_hw = BIT(0); /* Default */ 2375 } 2376 2377 ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); 2378 if (ret) 2379 return ret; 2380 2381 return 0; 2382 } 2383 2384 static const struct adreno_gpu_funcs funcs = { 2385 .base = { 2386 .get_param = adreno_get_param, 2387 .set_param = adreno_set_param, 2388 .hw_init = a6xx_hw_init, 2389 .ucode_load = a6xx_ucode_load, 2390 .pm_suspend = a6xx_gmu_pm_suspend, 2391 .pm_resume = a6xx_gmu_pm_resume, 2392 .recover = a6xx_recover, 2393 .submit = a6xx_submit, 2394 .active_ring = a6xx_active_ring, 2395 .irq = a6xx_irq, 2396 .destroy = a6xx_destroy, 2397 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2398 .show = a6xx_show, 2399 #endif 2400 .gpu_busy = a6xx_gpu_busy, 2401 .gpu_get_freq = a6xx_gmu_get_freq, 2402 .gpu_set_freq = a6xx_gpu_set_freq, 2403 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2404 .gpu_state_get = a6xx_gpu_state_get, 2405 .gpu_state_put = a6xx_gpu_state_put, 2406 #endif 2407 .create_vm = a6xx_create_vm, 2408 .create_private_vm = a6xx_create_private_vm, 2409 .get_rptr = a6xx_get_rptr, 2410 .progress = a6xx_progress, 2411 }, 2412 .get_timestamp = a6xx_gmu_get_timestamp, 2413 }; 2414 2415 static const struct adreno_gpu_funcs funcs_gmuwrapper = { 2416 .base = { 2417 .get_param = adreno_get_param, 2418 .set_param = adreno_set_param, 2419 .hw_init = a6xx_hw_init, 2420 .ucode_load = a6xx_ucode_load, 2421 .pm_suspend = a6xx_pm_suspend, 2422 .pm_resume = a6xx_pm_resume, 2423 .recover = a6xx_recover, 2424 .submit = a6xx_submit, 2425 .active_ring = a6xx_active_ring, 2426 .irq = a6xx_irq, 2427 .destroy = a6xx_destroy, 2428 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2429 .show = a6xx_show, 2430 #endif 2431 .gpu_busy = a6xx_gpu_busy, 2432 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2433 .gpu_state_get = a6xx_gpu_state_get, 2434 .gpu_state_put = a6xx_gpu_state_put, 2435 #endif 2436 .create_vm = a6xx_create_vm, 2437 .create_private_vm = a6xx_create_private_vm, 2438 .get_rptr = a6xx_get_rptr, 2439 .progress = a6xx_progress, 2440 }, 2441 .get_timestamp = a6xx_get_timestamp, 2442 }; 2443 2444 static const struct adreno_gpu_funcs funcs_a7xx = { 2445 .base = { 2446 .get_param = adreno_get_param, 2447 .set_param = adreno_set_param, 2448 .hw_init = a6xx_hw_init, 2449 .ucode_load = a6xx_ucode_load, 2450 .pm_suspend = a6xx_gmu_pm_suspend, 2451 .pm_resume = a6xx_gmu_pm_resume, 2452 .recover = a6xx_recover, 2453 .submit = a7xx_submit, 2454 .active_ring = a6xx_active_ring, 2455 .irq = a6xx_irq, 2456 .destroy = a6xx_destroy, 2457 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2458 .show = a6xx_show, 2459 #endif 2460 .gpu_busy = a6xx_gpu_busy, 2461 .gpu_get_freq = a6xx_gmu_get_freq, 2462 .gpu_set_freq = a6xx_gpu_set_freq, 2463 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2464 .gpu_state_get = a6xx_gpu_state_get, 2465 .gpu_state_put = a6xx_gpu_state_put, 2466 #endif 2467 .create_vm = a6xx_create_vm, 2468 .create_private_vm = a6xx_create_private_vm, 2469 .get_rptr = a6xx_get_rptr, 2470 .progress = a6xx_progress, 2471 }, 2472 .get_timestamp = a6xx_gmu_get_timestamp, 2473 }; 2474 2475 struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) 2476 { 2477 struct msm_drm_private *priv = dev->dev_private; 2478 struct platform_device *pdev = priv->gpu_pdev; 2479 struct adreno_platform_config *config = pdev->dev.platform_data; 2480 struct device_node *node; 2481 struct a6xx_gpu *a6xx_gpu; 2482 struct adreno_gpu *adreno_gpu; 2483 struct msm_gpu *gpu; 2484 extern int enable_preemption; 2485 bool is_a7xx; 2486 int ret; 2487 2488 a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL); 2489 if (!a6xx_gpu) 2490 return ERR_PTR(-ENOMEM); 2491 2492 adreno_gpu = &a6xx_gpu->base; 2493 gpu = &adreno_gpu->base; 2494 2495 mutex_init(&a6xx_gpu->gmu.lock); 2496 2497 adreno_gpu->registers = NULL; 2498 2499 /* Check if there is a GMU phandle and set it up */ 2500 node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); 2501 /* FIXME: How do we gracefully handle this? */ 2502 BUG_ON(!node); 2503 2504 adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper"); 2505 2506 adreno_gpu->base.hw_apriv = 2507 !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV); 2508 2509 /* gpu->info only gets assigned in adreno_gpu_init() */ 2510 is_a7xx = config->info->family == ADRENO_7XX_GEN1 || 2511 config->info->family == ADRENO_7XX_GEN2 || 2512 config->info->family == ADRENO_7XX_GEN3; 2513 2514 a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx); 2515 2516 ret = a6xx_set_supported_hw(&pdev->dev, config->info); 2517 if (ret) { 2518 a6xx_llc_slices_destroy(a6xx_gpu); 2519 kfree(a6xx_gpu); 2520 return ERR_PTR(ret); 2521 } 2522 2523 if ((enable_preemption == 1) || (enable_preemption == -1 && 2524 (config->info->quirks & ADRENO_QUIRK_PREEMPTION))) 2525 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 4); 2526 else if (is_a7xx) 2527 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1); 2528 else if (adreno_has_gmu_wrapper(adreno_gpu)) 2529 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1); 2530 else 2531 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); 2532 if (ret) { 2533 a6xx_destroy(&(a6xx_gpu->base.base)); 2534 return ERR_PTR(ret); 2535 } 2536 2537 /* 2538 * For now only clamp to idle freq for devices where this is known not 2539 * to cause power supply issues: 2540 */ 2541 if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu)) 2542 priv->gpu_clamp_to_idle = true; 2543 2544 if (adreno_has_gmu_wrapper(adreno_gpu)) 2545 ret = a6xx_gmu_wrapper_init(a6xx_gpu, node); 2546 else 2547 ret = a6xx_gmu_init(a6xx_gpu, node); 2548 of_node_put(node); 2549 if (ret) { 2550 a6xx_destroy(&(a6xx_gpu->base.base)); 2551 return ERR_PTR(ret); 2552 } 2553 2554 if (adreno_is_a7xx(adreno_gpu)) { 2555 ret = a7xx_cx_mem_init(a6xx_gpu); 2556 if (ret) { 2557 a6xx_destroy(&(a6xx_gpu->base.base)); 2558 return ERR_PTR(ret); 2559 } 2560 } 2561 2562 adreno_gpu->uche_trap_base = 0x1fffffffff000ull; 2563 2564 msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu, 2565 a6xx_fault_handler); 2566 2567 ret = a6xx_calc_ubwc_config(adreno_gpu); 2568 if (ret) { 2569 a6xx_destroy(&(a6xx_gpu->base.base)); 2570 return ERR_PTR(ret); 2571 } 2572 2573 /* Set up the preemption specific bits and pieces for each ringbuffer */ 2574 a6xx_preempt_init(gpu); 2575 2576 return gpu; 2577 } 2578