1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
3
4
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/firmware/qcom/qcom_scm.h>
14 #include <linux/pm_domain.h>
15 #include <linux/soc/qcom/llcc-qcom.h>
16
17 #define GPU_PAS_ID 13
18
_a6xx_check_idle(struct msm_gpu * gpu)19 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
20 {
21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
23
24 /* Check that the GMU is idle */
25 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu))
26 return false;
27
28 /* Check tha the CX master is idle */
29 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
30 ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
31 return false;
32
33 return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
34 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
35 }
36
a6xx_idle(struct msm_gpu * gpu,struct msm_ringbuffer * ring)37 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
38 {
39 /* wait for CP to drain ringbuffer: */
40 if (!adreno_idle(gpu, ring))
41 return false;
42
43 if (spin_until(_a6xx_check_idle(gpu))) {
44 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
45 gpu->name, __builtin_return_address(0),
46 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
47 gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
48 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
49 gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
50 return false;
51 }
52
53 return true;
54 }
55
update_shadow_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)56 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
57 {
58 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
59 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
60
61 /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
62 if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
63 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
64 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
65 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
66 }
67 }
68
a6xx_flush(struct msm_gpu * gpu,struct msm_ringbuffer * ring)69 static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
70 {
71 uint32_t wptr;
72 unsigned long flags;
73
74 update_shadow_rptr(gpu, ring);
75
76 spin_lock_irqsave(&ring->preempt_lock, flags);
77
78 /* Copy the shadow to the actual register */
79 ring->cur = ring->next;
80
81 /* Make sure to wrap wptr if we need to */
82 wptr = get_wptr(ring);
83
84 spin_unlock_irqrestore(&ring->preempt_lock, flags);
85
86 /* Make sure everything is posted before making a decision */
87 mb();
88
89 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
90 }
91
get_stats_counter(struct msm_ringbuffer * ring,u32 counter,u64 iova)92 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
93 u64 iova)
94 {
95 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
96 OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
97 CP_REG_TO_MEM_0_CNT(2) |
98 CP_REG_TO_MEM_0_64B);
99 OUT_RING(ring, lower_32_bits(iova));
100 OUT_RING(ring, upper_32_bits(iova));
101 }
102
a6xx_set_pagetable(struct a6xx_gpu * a6xx_gpu,struct msm_ringbuffer * ring,struct msm_gem_submit * submit)103 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
104 struct msm_ringbuffer *ring, struct msm_gem_submit *submit)
105 {
106 bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
107 struct msm_file_private *ctx = submit->queue->ctx;
108 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
109 phys_addr_t ttbr;
110 u32 asid;
111 u64 memptr = rbmemptr(ring, ttbr0);
112
113 if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno)
114 return;
115
116 if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
117 return;
118
119 if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) {
120 /* Wait for previous submit to complete before continuing: */
121 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
122 OUT_RING(ring, 0);
123 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
124 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
125 OUT_RING(ring, submit->seqno - 1);
126 }
127
128 if (!sysprof) {
129 if (!adreno_is_a7xx(adreno_gpu)) {
130 /* Turn off protected mode to write to special registers */
131 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
132 OUT_RING(ring, 0);
133 }
134
135 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
136 OUT_RING(ring, 1);
137 }
138
139 /* Execute the table update */
140 OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
141 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
142
143 OUT_RING(ring,
144 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
145 CP_SMMU_TABLE_UPDATE_1_ASID(asid));
146 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
147 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
148
149 /*
150 * Write the new TTBR0 to the memstore. This is good for debugging.
151 */
152 OUT_PKT7(ring, CP_MEM_WRITE, 4);
153 OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
154 OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
155 OUT_RING(ring, lower_32_bits(ttbr));
156 OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
157
158 /*
159 * Sync both threads after switching pagetables and enable BR only
160 * to make sure BV doesn't race ahead while BR is still switching
161 * pagetables.
162 */
163 if (adreno_is_a7xx(&a6xx_gpu->base)) {
164 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
165 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
166 }
167
168 /*
169 * And finally, trigger a uche flush to be sure there isn't anything
170 * lingering in that part of the GPU
171 */
172
173 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
174 OUT_RING(ring, CACHE_INVALIDATE);
175
176 if (!sysprof) {
177 /*
178 * Wait for SRAM clear after the pgtable update, so the
179 * two can happen in parallel:
180 */
181 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
182 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
183 OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
184 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
185 OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
186 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
187 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
188 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
189
190 if (!adreno_is_a7xx(adreno_gpu)) {
191 /* Re-enable protected mode: */
192 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
193 OUT_RING(ring, 1);
194 }
195 }
196 }
197
a6xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)198 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
199 {
200 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
201 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
202 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
203 struct msm_ringbuffer *ring = submit->ring;
204 unsigned int i, ibs = 0;
205
206 a6xx_set_pagetable(a6xx_gpu, ring, submit);
207
208 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
209 rbmemptr_stats(ring, index, cpcycles_start));
210
211 /*
212 * For PM4 the GMU register offsets are calculated from the base of the
213 * GPU registers so we need to add 0x1a800 to the register value on A630
214 * to get the right value from PM4.
215 */
216 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
217 rbmemptr_stats(ring, index, alwayson_start));
218
219 /* Invalidate CCU depth and color */
220 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
221 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
222
223 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
224 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
225
226 /* Submit the commands */
227 for (i = 0; i < submit->nr_cmds; i++) {
228 switch (submit->cmd[i].type) {
229 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
230 break;
231 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
232 if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
233 break;
234 fallthrough;
235 case MSM_SUBMIT_CMD_BUF:
236 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
237 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
238 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
239 OUT_RING(ring, submit->cmd[i].size);
240 ibs++;
241 break;
242 }
243
244 /*
245 * Periodically update shadow-wptr if needed, so that we
246 * can see partial progress of submits with large # of
247 * cmds.. otherwise we could needlessly stall waiting for
248 * ringbuffer state, simply due to looking at a shadow
249 * rptr value that has not been updated
250 */
251 if ((ibs % 32) == 0)
252 update_shadow_rptr(gpu, ring);
253 }
254
255 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
256 rbmemptr_stats(ring, index, cpcycles_end));
257 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
258 rbmemptr_stats(ring, index, alwayson_end));
259
260 /* Write the fence to the scratch register */
261 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
262 OUT_RING(ring, submit->seqno);
263
264 /*
265 * Execute a CACHE_FLUSH_TS event. This will ensure that the
266 * timestamp is written to the memory and then triggers the interrupt
267 */
268 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
269 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
270 CP_EVENT_WRITE_0_IRQ);
271 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
272 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
273 OUT_RING(ring, submit->seqno);
274
275 trace_msm_gpu_submit_flush(submit,
276 gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
277
278 a6xx_flush(gpu, ring);
279 }
280
a7xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)281 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
282 {
283 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
284 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
285 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
286 struct msm_ringbuffer *ring = submit->ring;
287 unsigned int i, ibs = 0;
288
289 /*
290 * Toggle concurrent binning for pagetable switch and set the thread to
291 * BR since only it can execute the pagetable switch packets.
292 */
293 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
294 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
295
296 a6xx_set_pagetable(a6xx_gpu, ring, submit);
297
298 get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
299 rbmemptr_stats(ring, index, cpcycles_start));
300 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
301 rbmemptr_stats(ring, index, alwayson_start));
302
303 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
304 OUT_RING(ring, CP_SET_THREAD_BOTH);
305
306 OUT_PKT7(ring, CP_SET_MARKER, 1);
307 OUT_RING(ring, 0x101); /* IFPC disable */
308
309 OUT_PKT7(ring, CP_SET_MARKER, 1);
310 OUT_RING(ring, 0x00d); /* IB1LIST start */
311
312 /* Submit the commands */
313 for (i = 0; i < submit->nr_cmds; i++) {
314 switch (submit->cmd[i].type) {
315 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
316 break;
317 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
318 if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
319 break;
320 fallthrough;
321 case MSM_SUBMIT_CMD_BUF:
322 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
323 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
324 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
325 OUT_RING(ring, submit->cmd[i].size);
326 ibs++;
327 break;
328 }
329
330 /*
331 * Periodically update shadow-wptr if needed, so that we
332 * can see partial progress of submits with large # of
333 * cmds.. otherwise we could needlessly stall waiting for
334 * ringbuffer state, simply due to looking at a shadow
335 * rptr value that has not been updated
336 */
337 if ((ibs % 32) == 0)
338 update_shadow_rptr(gpu, ring);
339 }
340
341 OUT_PKT7(ring, CP_SET_MARKER, 1);
342 OUT_RING(ring, 0x00e); /* IB1LIST end */
343
344 get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
345 rbmemptr_stats(ring, index, cpcycles_end));
346 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
347 rbmemptr_stats(ring, index, alwayson_end));
348
349 /* Write the fence to the scratch register */
350 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
351 OUT_RING(ring, submit->seqno);
352
353 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
354 OUT_RING(ring, CP_SET_THREAD_BR);
355
356 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
357 OUT_RING(ring, CCU_INVALIDATE_DEPTH);
358
359 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
360 OUT_RING(ring, CCU_INVALIDATE_COLOR);
361
362 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
363 OUT_RING(ring, CP_SET_THREAD_BV);
364
365 /*
366 * Make sure the timestamp is committed once BV pipe is
367 * completely done with this submission.
368 */
369 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
370 OUT_RING(ring, CACHE_CLEAN | BIT(27));
371 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
372 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
373 OUT_RING(ring, submit->seqno);
374
375 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
376 OUT_RING(ring, CP_SET_THREAD_BR);
377
378 /*
379 * This makes sure that BR doesn't race ahead and commit
380 * timestamp to memstore while BV is still processing
381 * this submission.
382 */
383 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
384 OUT_RING(ring, 0);
385 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
386 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
387 OUT_RING(ring, submit->seqno);
388
389 /* write the ringbuffer timestamp */
390 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
391 OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27));
392 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
393 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
394 OUT_RING(ring, submit->seqno);
395
396 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
397 OUT_RING(ring, CP_SET_THREAD_BOTH);
398
399 OUT_PKT7(ring, CP_SET_MARKER, 1);
400 OUT_RING(ring, 0x100); /* IFPC enable */
401
402 trace_msm_gpu_submit_flush(submit,
403 gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
404
405 a6xx_flush(gpu, ring);
406 }
407
a6xx_set_hwcg(struct msm_gpu * gpu,bool state)408 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
409 {
410 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
411 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
412 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
413 const struct adreno_reglist *reg;
414 unsigned int i;
415 u32 cgc_delay, cgc_hyst;
416 u32 val, clock_cntl_on;
417
418 if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu)))
419 return;
420
421 if (adreno_is_a630(adreno_gpu))
422 clock_cntl_on = 0x8aa8aa02;
423 else if (adreno_is_a610(adreno_gpu))
424 clock_cntl_on = 0xaaa8aa82;
425 else if (adreno_is_a702(adreno_gpu))
426 clock_cntl_on = 0xaaaaaa82;
427 else
428 clock_cntl_on = 0x8aa8aa82;
429
430 cgc_delay = adreno_is_a615_family(adreno_gpu) ? 0x111 : 0x10111;
431 cgc_hyst = adreno_is_a615_family(adreno_gpu) ? 0x555 : 0x5555;
432
433 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
434 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
435 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
436 state ? cgc_delay : 0);
437 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
438 state ? cgc_hyst : 0);
439
440 if (!adreno_gpu->info->a6xx->hwcg) {
441 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
442 gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0);
443
444 if (state) {
445 gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1);
446
447 if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val,
448 val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
449 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
450 return;
451 }
452
453 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
454 }
455
456 return;
457 }
458
459 val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
460
461 /* Don't re-program the registers if they are already correct */
462 if ((!state && !val) || (state && (val == clock_cntl_on)))
463 return;
464
465 /* Disable SP clock before programming HWCG registers */
466 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
467 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
468
469 for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++)
470 gpu_write(gpu, reg->offset, state ? reg->value : 0);
471
472 /* Enable SP clock */
473 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
474 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
475
476 gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
477 }
478
a6xx_set_cp_protect(struct msm_gpu * gpu)479 static void a6xx_set_cp_protect(struct msm_gpu *gpu)
480 {
481 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
482 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
483 unsigned i;
484
485 /*
486 * Enable access protection to privileged registers, fault on an access
487 * protect violation and select the last span to protect from the start
488 * address all the way to the end of the register address space
489 */
490 gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL,
491 A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN |
492 A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN |
493 A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE);
494
495 for (i = 0; i < protect->count - 1; i++) {
496 /* Intentionally skip writing to some registers */
497 if (protect->regs[i])
498 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]);
499 }
500 /* last CP_PROTECT to have "infinite" length on the last entry */
501 gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]);
502 }
503
a6xx_calc_ubwc_config(struct adreno_gpu * gpu)504 static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
505 {
506 gpu->ubwc_config.rgb565_predicator = 0;
507 gpu->ubwc_config.uavflagprd_inv = 0;
508 gpu->ubwc_config.min_acc_len = 0;
509 gpu->ubwc_config.ubwc_swizzle = 0x6;
510 gpu->ubwc_config.macrotile_mode = 0;
511 gpu->ubwc_config.highest_bank_bit = 15;
512
513 if (adreno_is_a610(gpu)) {
514 gpu->ubwc_config.highest_bank_bit = 13;
515 gpu->ubwc_config.min_acc_len = 1;
516 gpu->ubwc_config.ubwc_swizzle = 0x7;
517 }
518
519 if (adreno_is_a618(gpu))
520 gpu->ubwc_config.highest_bank_bit = 14;
521
522 if (adreno_is_a619(gpu))
523 /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
524 gpu->ubwc_config.highest_bank_bit = 13;
525
526 if (adreno_is_a619_holi(gpu))
527 gpu->ubwc_config.highest_bank_bit = 13;
528
529 if (adreno_is_a621(gpu)) {
530 gpu->ubwc_config.highest_bank_bit = 13;
531 gpu->ubwc_config.amsbc = 1;
532 gpu->ubwc_config.uavflagprd_inv = 2;
533 }
534
535 if (adreno_is_a640_family(gpu))
536 gpu->ubwc_config.amsbc = 1;
537
538 if (adreno_is_a680(gpu))
539 gpu->ubwc_config.macrotile_mode = 1;
540
541 if (adreno_is_a650(gpu) ||
542 adreno_is_a660(gpu) ||
543 adreno_is_a690(gpu) ||
544 adreno_is_a730(gpu) ||
545 adreno_is_a740_family(gpu)) {
546 /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
547 gpu->ubwc_config.highest_bank_bit = 16;
548 gpu->ubwc_config.amsbc = 1;
549 gpu->ubwc_config.rgb565_predicator = 1;
550 gpu->ubwc_config.uavflagprd_inv = 2;
551 gpu->ubwc_config.macrotile_mode = 1;
552 }
553
554 if (adreno_is_7c3(gpu)) {
555 gpu->ubwc_config.highest_bank_bit = 14;
556 gpu->ubwc_config.amsbc = 1;
557 gpu->ubwc_config.rgb565_predicator = 1;
558 gpu->ubwc_config.uavflagprd_inv = 2;
559 gpu->ubwc_config.macrotile_mode = 1;
560 }
561
562 if (adreno_is_a702(gpu)) {
563 gpu->ubwc_config.highest_bank_bit = 14;
564 gpu->ubwc_config.min_acc_len = 1;
565 }
566 }
567
a6xx_set_ubwc_config(struct msm_gpu * gpu)568 static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
569 {
570 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
571 /*
572 * We subtract 13 from the highest bank bit (13 is the minimum value
573 * allowed by hw) and write the lowest two bits of the remaining value
574 * as hbb_lo and the one above it as hbb_hi to the hardware.
575 */
576 BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13);
577 u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13;
578 u32 hbb_hi = hbb >> 2;
579 u32 hbb_lo = hbb & 3;
580 u32 ubwc_mode = adreno_gpu->ubwc_config.ubwc_swizzle & 1;
581 u32 level2_swizzling_dis = !(adreno_gpu->ubwc_config.ubwc_swizzle & 2);
582
583 gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
584 level2_swizzling_dis << 12 |
585 adreno_gpu->ubwc_config.rgb565_predicator << 11 |
586 hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 |
587 adreno_gpu->ubwc_config.min_acc_len << 3 |
588 hbb_lo << 1 | ubwc_mode);
589
590 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
591 level2_swizzling_dis << 6 | hbb_hi << 4 |
592 adreno_gpu->ubwc_config.min_acc_len << 3 |
593 hbb_lo << 1 | ubwc_mode);
594
595 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
596 level2_swizzling_dis << 12 | hbb_hi << 10 |
597 adreno_gpu->ubwc_config.uavflagprd_inv << 4 |
598 adreno_gpu->ubwc_config.min_acc_len << 3 |
599 hbb_lo << 1 | ubwc_mode);
600
601 if (adreno_is_a7xx(adreno_gpu))
602 gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL,
603 FIELD_PREP(GENMASK(8, 5), hbb_lo));
604
605 gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL,
606 adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21);
607
608 gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL,
609 adreno_gpu->ubwc_config.macrotile_mode);
610 }
611
a6xx_cp_init(struct msm_gpu * gpu)612 static int a6xx_cp_init(struct msm_gpu *gpu)
613 {
614 struct msm_ringbuffer *ring = gpu->rb[0];
615
616 OUT_PKT7(ring, CP_ME_INIT, 8);
617
618 OUT_RING(ring, 0x0000002f);
619
620 /* Enable multiple hardware contexts */
621 OUT_RING(ring, 0x00000003);
622
623 /* Enable error detection */
624 OUT_RING(ring, 0x20000000);
625
626 /* Don't enable header dump */
627 OUT_RING(ring, 0x00000000);
628 OUT_RING(ring, 0x00000000);
629
630 /* No workarounds enabled */
631 OUT_RING(ring, 0x00000000);
632
633 /* Pad rest of the cmds with 0's */
634 OUT_RING(ring, 0x00000000);
635 OUT_RING(ring, 0x00000000);
636
637 a6xx_flush(gpu, ring);
638 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
639 }
640
a7xx_cp_init(struct msm_gpu * gpu)641 static int a7xx_cp_init(struct msm_gpu *gpu)
642 {
643 struct msm_ringbuffer *ring = gpu->rb[0];
644 u32 mask;
645
646 /* Disable concurrent binning before sending CP init */
647 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
648 OUT_RING(ring, BIT(27));
649
650 OUT_PKT7(ring, CP_ME_INIT, 7);
651
652 /* Use multiple HW contexts */
653 mask = BIT(0);
654
655 /* Enable error detection */
656 mask |= BIT(1);
657
658 /* Set default reset state */
659 mask |= BIT(3);
660
661 /* Disable save/restore of performance counters across preemption */
662 mask |= BIT(6);
663
664 /* Enable the register init list with the spinlock */
665 mask |= BIT(8);
666
667 OUT_RING(ring, mask);
668
669 /* Enable multiple hardware contexts */
670 OUT_RING(ring, 0x00000003);
671
672 /* Enable error detection */
673 OUT_RING(ring, 0x20000000);
674
675 /* Operation mode mask */
676 OUT_RING(ring, 0x00000002);
677
678 /* *Don't* send a power up reg list for concurrent binning (TODO) */
679 /* Lo address */
680 OUT_RING(ring, 0x00000000);
681 /* Hi address */
682 OUT_RING(ring, 0x00000000);
683 /* BIT(31) set => read the regs from the list */
684 OUT_RING(ring, 0x00000000);
685
686 a6xx_flush(gpu, ring);
687 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
688 }
689
690 /*
691 * Check that the microcode version is new enough to include several key
692 * security fixes. Return true if the ucode is safe.
693 */
a6xx_ucode_check_version(struct a6xx_gpu * a6xx_gpu,struct drm_gem_object * obj)694 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
695 struct drm_gem_object *obj)
696 {
697 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
698 struct msm_gpu *gpu = &adreno_gpu->base;
699 const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
700 u32 *buf = msm_gem_get_vaddr(obj);
701 bool ret = false;
702
703 if (IS_ERR(buf))
704 return false;
705
706 /* A7xx is safe! */
707 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu))
708 return true;
709
710 /*
711 * Targets up to a640 (a618, a630 and a640) need to check for a
712 * microcode version that is patched to support the whereami opcode or
713 * one that is new enough to include it by default.
714 *
715 * a650 tier targets don't need whereami but still need to be
716 * equal to or newer than 0.95 for other security fixes
717 *
718 * a660 targets have all the critical security fixes from the start
719 */
720 if (!strcmp(sqe_name, "a630_sqe.fw")) {
721 /*
722 * If the lowest nibble is 0xa that is an indication that this
723 * microcode has been patched. The actual version is in dword
724 * [3] but we only care about the patchlevel which is the lowest
725 * nibble of dword [3]
726 *
727 * Otherwise check that the firmware is greater than or equal
728 * to 1.90 which was the first version that had this fix built
729 * in
730 */
731 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
732 (buf[0] & 0xfff) >= 0x190) {
733 a6xx_gpu->has_whereami = true;
734 ret = true;
735 goto out;
736 }
737
738 DRM_DEV_ERROR(&gpu->pdev->dev,
739 "a630 SQE ucode is too old. Have version %x need at least %x\n",
740 buf[0] & 0xfff, 0x190);
741 } else if (!strcmp(sqe_name, "a650_sqe.fw")) {
742 if ((buf[0] & 0xfff) >= 0x095) {
743 ret = true;
744 goto out;
745 }
746
747 DRM_DEV_ERROR(&gpu->pdev->dev,
748 "a650 SQE ucode is too old. Have version %x need at least %x\n",
749 buf[0] & 0xfff, 0x095);
750 } else if (!strcmp(sqe_name, "a660_sqe.fw")) {
751 ret = true;
752 } else {
753 DRM_DEV_ERROR(&gpu->pdev->dev,
754 "unknown GPU, add it to a6xx_ucode_check_version()!!\n");
755 }
756 out:
757 msm_gem_put_vaddr(obj);
758 return ret;
759 }
760
a6xx_ucode_load(struct msm_gpu * gpu)761 static int a6xx_ucode_load(struct msm_gpu *gpu)
762 {
763 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
764 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
765
766 if (!a6xx_gpu->sqe_bo) {
767 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
768 adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
769
770 if (IS_ERR(a6xx_gpu->sqe_bo)) {
771 int ret = PTR_ERR(a6xx_gpu->sqe_bo);
772
773 a6xx_gpu->sqe_bo = NULL;
774 DRM_DEV_ERROR(&gpu->pdev->dev,
775 "Could not allocate SQE ucode: %d\n", ret);
776
777 return ret;
778 }
779
780 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
781 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
782 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
783 drm_gem_object_put(a6xx_gpu->sqe_bo);
784
785 a6xx_gpu->sqe_bo = NULL;
786 return -EPERM;
787 }
788 }
789
790 /*
791 * Expanded APRIV and targets that support WHERE_AM_I both need a
792 * privileged buffer to store the RPTR shadow
793 */
794 if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) &&
795 !a6xx_gpu->shadow_bo) {
796 a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
797 sizeof(u32) * gpu->nr_rings,
798 MSM_BO_WC | MSM_BO_MAP_PRIV,
799 gpu->aspace, &a6xx_gpu->shadow_bo,
800 &a6xx_gpu->shadow_iova);
801
802 if (IS_ERR(a6xx_gpu->shadow))
803 return PTR_ERR(a6xx_gpu->shadow);
804
805 msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
806 }
807
808 return 0;
809 }
810
a6xx_zap_shader_init(struct msm_gpu * gpu)811 static int a6xx_zap_shader_init(struct msm_gpu *gpu)
812 {
813 static bool loaded;
814 int ret;
815
816 if (loaded)
817 return 0;
818
819 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
820
821 loaded = !ret;
822 return ret;
823 }
824
825 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
826 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
827 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
828 A6XX_RBBM_INT_0_MASK_CP_IB2 | \
829 A6XX_RBBM_INT_0_MASK_CP_IB1 | \
830 A6XX_RBBM_INT_0_MASK_CP_RB | \
831 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
832 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
833 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
834 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
835 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
836
837 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
838 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
839 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
840 A6XX_RBBM_INT_0_MASK_CP_SW | \
841 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
842 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
843 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
844 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
845 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
846 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
847 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
848 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
849 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
850 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
851
852 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \
853 A6XX_CP_APRIV_CNTL_RBFETCH | \
854 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \
855 A6XX_CP_APRIV_CNTL_RBRPWB)
856
857 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \
858 A6XX_CP_APRIV_CNTL_CDREAD | \
859 A6XX_CP_APRIV_CNTL_CDWRITE)
860
hw_init(struct msm_gpu * gpu)861 static int hw_init(struct msm_gpu *gpu)
862 {
863 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
864 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
865 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
866 u64 gmem_range_min;
867 int ret;
868
869 if (!adreno_has_gmu_wrapper(adreno_gpu)) {
870 /* Make sure the GMU keeps the GPU on while we set it up */
871 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
872 if (ret)
873 return ret;
874 }
875
876 /* Clear GBIF halt in case GX domain was not collapsed */
877 if (adreno_is_a619_holi(adreno_gpu)) {
878 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
879 gpu_read(gpu, REG_A6XX_GBIF_HALT);
880
881 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0);
882 gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL);
883 } else if (a6xx_has_gbif(adreno_gpu)) {
884 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
885 gpu_read(gpu, REG_A6XX_GBIF_HALT);
886
887 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
888 gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT);
889 }
890
891 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
892
893 if (adreno_is_a619_holi(adreno_gpu))
894 a6xx_sptprac_enable(gmu);
895
896 /*
897 * Disable the trusted memory range - we don't actually supported secure
898 * memory rendering at this point in time and we don't want to block off
899 * part of the virtual memory space.
900 */
901 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
902 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
903
904 if (!adreno_is_a7xx(adreno_gpu)) {
905 /* Turn on 64 bit addressing for all blocks */
906 gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
907 gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
908 gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
909 gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
910 gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
911 gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
912 gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
913 gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
914 gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
915 gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
916 gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
917 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
918 }
919
920 /* enable hardware clockgating */
921 a6xx_set_hwcg(gpu, true);
922
923 /* VBIF/GBIF start*/
924 if (adreno_is_a610_family(adreno_gpu) ||
925 adreno_is_a640_family(adreno_gpu) ||
926 adreno_is_a650_family(adreno_gpu) ||
927 adreno_is_a7xx(adreno_gpu)) {
928 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
929 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
930 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
931 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
932 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL,
933 adreno_is_a7xx(adreno_gpu) ? 0x2120212 : 0x3);
934 } else {
935 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
936 }
937
938 if (adreno_is_a630(adreno_gpu))
939 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
940
941 if (adreno_is_a7xx(adreno_gpu))
942 gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0);
943
944 /* Make all blocks contribute to the GPU BUSY perf counter */
945 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
946
947 /* Disable L2 bypass in the UCHE */
948 if (adreno_is_a7xx(adreno_gpu)) {
949 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
950 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
951 } else {
952 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu);
953 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
954 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
955 }
956
957 if (!(adreno_is_a650_family(adreno_gpu) ||
958 adreno_is_a702(adreno_gpu) ||
959 adreno_is_a730(adreno_gpu))) {
960 gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M;
961
962 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
963 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min);
964
965 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX,
966 gmem_range_min + adreno_gpu->info->gmem - 1);
967 }
968
969 if (adreno_is_a7xx(adreno_gpu))
970 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23));
971 else {
972 gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
973 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
974 }
975
976 if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) {
977 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
978 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
979 } else if (adreno_is_a610_family(adreno_gpu)) {
980 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060);
981 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16);
982 } else if (!adreno_is_a7xx(adreno_gpu)) {
983 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
984 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
985 }
986
987 if (adreno_is_a660_family(adreno_gpu))
988 gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
989
990 /* Setting the mem pool size */
991 if (adreno_is_a610(adreno_gpu)) {
992 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48);
993 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47);
994 } else if (adreno_is_a702(adreno_gpu)) {
995 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64);
996 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63);
997 } else if (!adreno_is_a7xx(adreno_gpu))
998 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
999
1000
1001 /* Set the default primFifo threshold values */
1002 if (adreno_gpu->info->a6xx->prim_fifo_threshold)
1003 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL,
1004 adreno_gpu->info->a6xx->prim_fifo_threshold);
1005
1006 /* Set the AHB default slave response to "ERROR" */
1007 gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1008
1009 /* Turn on performance counters */
1010 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1011
1012 if (adreno_is_a7xx(adreno_gpu)) {
1013 /* Turn on the IFPC counter (countable 4 on XOCLK4) */
1014 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1,
1015 FIELD_PREP(GENMASK(7, 0), 0x4));
1016 }
1017
1018 /* Select CP0 to always count cycles */
1019 gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1020
1021 a6xx_set_ubwc_config(gpu);
1022
1023 /* Enable fault detection */
1024 if (adreno_is_a730(adreno_gpu) ||
1025 adreno_is_a740_family(adreno_gpu))
1026 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff);
1027 else if (adreno_is_a690(adreno_gpu))
1028 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff);
1029 else if (adreno_is_a619(adreno_gpu))
1030 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff);
1031 else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
1032 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff);
1033 else
1034 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff);
1035
1036 gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
1037
1038 /* Set weights for bicubic filtering */
1039 if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) {
1040 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
1041 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
1042 0x3fe05ff4);
1043 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
1044 0x3fa0ebee);
1045 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
1046 0x3f5193ed);
1047 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
1048 0x3f0243f0);
1049 }
1050
1051 /* Set up the CX GMU counter 0 to count busy ticks */
1052 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
1053
1054 /* Enable the power counter */
1055 gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5));
1056 gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
1057
1058 /* Protect registers from the CP */
1059 a6xx_set_cp_protect(gpu);
1060
1061 if (adreno_is_a660_family(adreno_gpu)) {
1062 if (adreno_is_a690(adreno_gpu))
1063 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801);
1064 else
1065 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1066 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1067 } else if (adreno_is_a702(adreno_gpu)) {
1068 /* Something to do with the HLSQ cluster */
1069 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24));
1070 }
1071
1072 if (adreno_is_a690(adreno_gpu))
1073 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90);
1074 /* Set dualQ + disable afull for A660 GPU */
1075 else if (adreno_is_a660(adreno_gpu))
1076 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1077 else if (adreno_is_a7xx(adreno_gpu))
1078 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG,
1079 FIELD_PREP(GENMASK(19, 16), 6) |
1080 FIELD_PREP(GENMASK(15, 12), 6) |
1081 FIELD_PREP(GENMASK(11, 8), 9) |
1082 BIT(3) | BIT(2) |
1083 FIELD_PREP(GENMASK(1, 0), 2));
1084
1085 /* Enable expanded apriv for targets that support it */
1086 if (gpu->hw_apriv) {
1087 if (adreno_is_a7xx(adreno_gpu)) {
1088 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1089 A7XX_BR_APRIVMASK);
1090 gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL,
1091 A7XX_APRIV_MASK);
1092 gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL,
1093 A7XX_APRIV_MASK);
1094 } else
1095 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1096 BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1));
1097 }
1098
1099 if (adreno_is_a750(adreno_gpu)) {
1100 /* Disable ubwc merged UFC request feature */
1101 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19));
1102
1103 /* Enable TP flaghint and other performance settings */
1104 gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700);
1105 } else if (adreno_is_a7xx(adreno_gpu)) {
1106 /* Disable non-ubwc read reqs from passing write reqs */
1107 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11));
1108 }
1109
1110 /* Enable interrupts */
1111 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK,
1112 adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK);
1113
1114 ret = adreno_hw_init(gpu);
1115 if (ret)
1116 goto out;
1117
1118 gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
1119
1120 /* Set the ringbuffer address */
1121 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1122
1123 /* Targets that support extended APRIV can use the RPTR shadow from
1124 * hardware but all the other ones need to disable the feature. Targets
1125 * that support the WHERE_AM_I opcode can use that instead
1126 */
1127 if (adreno_gpu->base.hw_apriv)
1128 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1129 else
1130 gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1131 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1132
1133 /* Configure the RPTR shadow if needed: */
1134 if (a6xx_gpu->shadow_bo) {
1135 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
1136 shadowptr(a6xx_gpu, gpu->rb[0]));
1137 }
1138
1139 /* ..which means "always" on A7xx, also for BV shadow */
1140 if (adreno_is_a7xx(adreno_gpu)) {
1141 gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR,
1142 rbmemptr(gpu->rb[0], bv_fence));
1143 }
1144
1145 /* Always come up on rb 0 */
1146 a6xx_gpu->cur_ring = gpu->rb[0];
1147
1148 gpu->cur_ctx_seqno = 0;
1149
1150 /* Enable the SQE_to start the CP engine */
1151 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1152
1153 ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu);
1154 if (ret)
1155 goto out;
1156
1157 /*
1158 * Try to load a zap shader into the secure world. If successful
1159 * we can use the CP to switch out of secure mode. If not then we
1160 * have no resource but to try to switch ourselves out manually. If we
1161 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1162 * be blocked and a permissions violation will soon follow.
1163 */
1164 ret = a6xx_zap_shader_init(gpu);
1165 if (!ret) {
1166 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1167 OUT_RING(gpu->rb[0], 0x00000000);
1168
1169 a6xx_flush(gpu, gpu->rb[0]);
1170 if (!a6xx_idle(gpu, gpu->rb[0]))
1171 return -EINVAL;
1172 } else if (ret == -ENODEV) {
1173 /*
1174 * This device does not use zap shader (but print a warning
1175 * just in case someone got their dt wrong.. hopefully they
1176 * have a debug UART to realize the error of their ways...
1177 * if you mess this up you are about to crash horribly)
1178 */
1179 dev_warn_once(gpu->dev->dev,
1180 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1181 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1182 ret = 0;
1183 } else {
1184 return ret;
1185 }
1186
1187 out:
1188 if (adreno_has_gmu_wrapper(adreno_gpu))
1189 return ret;
1190 /*
1191 * Tell the GMU that we are done touching the GPU and it can start power
1192 * management
1193 */
1194 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1195
1196 if (a6xx_gpu->gmu.legacy) {
1197 /* Take the GMU out of its special boot mode */
1198 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1199 }
1200
1201 return ret;
1202 }
1203
a6xx_hw_init(struct msm_gpu * gpu)1204 static int a6xx_hw_init(struct msm_gpu *gpu)
1205 {
1206 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1207 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1208 int ret;
1209
1210 mutex_lock(&a6xx_gpu->gmu.lock);
1211 ret = hw_init(gpu);
1212 mutex_unlock(&a6xx_gpu->gmu.lock);
1213
1214 return ret;
1215 }
1216
a6xx_dump(struct msm_gpu * gpu)1217 static void a6xx_dump(struct msm_gpu *gpu)
1218 {
1219 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n",
1220 gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1221 adreno_dump(gpu);
1222 }
1223
a6xx_recover(struct msm_gpu * gpu)1224 static void a6xx_recover(struct msm_gpu *gpu)
1225 {
1226 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1227 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1228 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1229 int i, active_submits;
1230
1231 adreno_dump_info(gpu);
1232
1233 for (i = 0; i < 8; i++)
1234 DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
1235 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
1236
1237 if (hang_debug)
1238 a6xx_dump(gpu);
1239
1240 /*
1241 * To handle recovery specific sequences during the rpm suspend we are
1242 * about to trigger
1243 */
1244 a6xx_gpu->hung = true;
1245
1246 /* Halt SQE first */
1247 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1248
1249 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1250
1251 /* active_submit won't change until we make a submission */
1252 mutex_lock(&gpu->active_lock);
1253 active_submits = gpu->active_submits;
1254
1255 /*
1256 * Temporarily clear active_submits count to silence a WARN() in the
1257 * runtime suspend cb
1258 */
1259 gpu->active_submits = 0;
1260
1261 if (adreno_has_gmu_wrapper(adreno_gpu)) {
1262 /* Drain the outstanding traffic on memory buses */
1263 a6xx_bus_clear_pending_transactions(adreno_gpu, true);
1264
1265 /* Reset the GPU to a clean state */
1266 a6xx_gpu_sw_reset(gpu, true);
1267 a6xx_gpu_sw_reset(gpu, false);
1268 }
1269
1270 reinit_completion(&gmu->pd_gate);
1271 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
1272 dev_pm_genpd_synced_poweroff(gmu->cxpd);
1273
1274 /* Drop the rpm refcount from active submits */
1275 if (active_submits)
1276 pm_runtime_put(&gpu->pdev->dev);
1277
1278 /* And the final one from recover worker */
1279 pm_runtime_put_sync(&gpu->pdev->dev);
1280
1281 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
1282 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
1283
1284 dev_pm_genpd_remove_notifier(gmu->cxpd);
1285
1286 pm_runtime_use_autosuspend(&gpu->pdev->dev);
1287
1288 if (active_submits)
1289 pm_runtime_get(&gpu->pdev->dev);
1290
1291 pm_runtime_get_sync(&gpu->pdev->dev);
1292
1293 gpu->active_submits = active_submits;
1294 mutex_unlock(&gpu->active_lock);
1295
1296 msm_gpu_hw_init(gpu);
1297 a6xx_gpu->hung = false;
1298 }
1299
a6xx_uche_fault_block(struct msm_gpu * gpu,u32 mid)1300 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1301 {
1302 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1303 static const char *uche_clients[7] = {
1304 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1305 };
1306 u32 val;
1307
1308 if (adreno_is_a7xx(adreno_gpu)) {
1309 if (mid != 1 && mid != 2 && mid != 3 && mid != 8)
1310 return "UNKNOWN";
1311 } else {
1312 if (mid < 1 || mid > 3)
1313 return "UNKNOWN";
1314 }
1315
1316 /*
1317 * The source of the data depends on the mid ID read from FSYNR1.
1318 * and the client ID read from the UCHE block
1319 */
1320 val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1321
1322 if (adreno_is_a7xx(adreno_gpu)) {
1323 /* Bit 3 for mid=3 indicates BR or BV */
1324 static const char *uche_clients_a7xx[16] = {
1325 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC",
1326 "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
1327 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC",
1328 "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
1329 };
1330
1331 /* LPAC has the same clients as BR and BV, but because it is
1332 * compute-only some of them do not exist and there are holes
1333 * in the array.
1334 */
1335 static const char *uche_clients_lpac_a7xx[8] = {
1336 "-", "LPAC_SP", "-", "-",
1337 "LPAC_HLSQ", "-", "-", "LPAC_TP",
1338 };
1339
1340 val &= GENMASK(6, 0);
1341
1342 /* mid=3 refers to BR or BV */
1343 if (mid == 3) {
1344 if (val < ARRAY_SIZE(uche_clients_a7xx))
1345 return uche_clients_a7xx[val];
1346 else
1347 return "UCHE";
1348 }
1349
1350 /* mid=8 refers to LPAC */
1351 if (mid == 8) {
1352 if (val < ARRAY_SIZE(uche_clients_lpac_a7xx))
1353 return uche_clients_lpac_a7xx[val];
1354 else
1355 return "UCHE_LPAC";
1356 }
1357
1358 /* mid=2 is a catchall for everything else in LPAC */
1359 if (mid == 2)
1360 return "UCHE_LPAC";
1361
1362 /* mid=1 is a catchall for everything else in BR/BV */
1363 return "UCHE";
1364 } else if (adreno_is_a660_family(adreno_gpu)) {
1365 static const char *uche_clients_a660[8] = {
1366 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP",
1367 };
1368
1369 static const char *uche_clients_a660_not[8] = {
1370 "not VFD", "not SP", "not VSC", "not VPC",
1371 "not HLSQ", "not PC", "not LRZ", "not TP",
1372 };
1373
1374 val &= GENMASK(6, 0);
1375
1376 if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660))
1377 return uche_clients_a660[val];
1378
1379 if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not))
1380 return uche_clients_a660_not[val];
1381
1382 return "UCHE";
1383 } else {
1384 /* mid = 3 is most precise and refers to only one block per client */
1385 if (mid == 3)
1386 return uche_clients[val & 7];
1387
1388 /* For mid=2 the source is TP or VFD except when the client id is 0 */
1389 if (mid == 2)
1390 return ((val & 7) == 0) ? "TP" : "TP|VFD";
1391
1392 /* For mid=1 just return "UCHE" as a catchall for everything else */
1393 return "UCHE";
1394 }
1395 }
1396
a6xx_fault_block(struct msm_gpu * gpu,u32 id)1397 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1398 {
1399 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1400
1401 if (id == 0)
1402 return "CP";
1403 else if (id == 4)
1404 return "CCU";
1405 else if (id == 6)
1406 return "CDP Prefetch";
1407 else if (id == 7)
1408 return "GMU";
1409 else if (id == 5 && adreno_is_a7xx(adreno_gpu))
1410 return "Flag cache";
1411
1412 return a6xx_uche_fault_block(gpu, id);
1413 }
1414
a6xx_fault_handler(void * arg,unsigned long iova,int flags,void * data)1415 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1416 {
1417 struct msm_gpu *gpu = arg;
1418 struct adreno_smmu_fault_info *info = data;
1419 const char *block = "unknown";
1420
1421 u32 scratch[] = {
1422 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1423 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1424 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1425 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)),
1426 };
1427
1428 if (info)
1429 block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1430
1431 return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1432 }
1433
a6xx_cp_hw_err_irq(struct msm_gpu * gpu)1434 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1435 {
1436 u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1437
1438 if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1439 u32 val;
1440
1441 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1442 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1443 dev_err_ratelimited(&gpu->pdev->dev,
1444 "CP | opcode error | possible opcode=0x%8.8X\n",
1445 val);
1446 }
1447
1448 if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1449 dev_err_ratelimited(&gpu->pdev->dev,
1450 "CP ucode error interrupt\n");
1451
1452 if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1453 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1454 gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1455
1456 if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1457 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1458
1459 dev_err_ratelimited(&gpu->pdev->dev,
1460 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1461 val & (1 << 20) ? "READ" : "WRITE",
1462 (val & 0x3ffff), val);
1463 }
1464
1465 if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu)))
1466 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1467
1468 if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1469 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1470
1471 if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1472 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1473
1474 }
1475
a6xx_fault_detect_irq(struct msm_gpu * gpu)1476 static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1477 {
1478 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1479 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1480 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1481
1482 /*
1483 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1484 * but the fault handler will trigger the devcore dump, and we want
1485 * to otherwise resume normally rather than killing the submit, so
1486 * just bail.
1487 */
1488 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1489 return;
1490
1491 /*
1492 * Force the GPU to stay on until after we finish
1493 * collecting information
1494 */
1495 if (!adreno_has_gmu_wrapper(adreno_gpu))
1496 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1497
1498 DRM_DEV_ERROR(&gpu->pdev->dev,
1499 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1500 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1501 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1502 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1503 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1504 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1505 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1506 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1507 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1508
1509 /* Turn off the hangcheck timer to keep it from bothering us */
1510 del_timer(&gpu->hangcheck_timer);
1511
1512 kthread_queue_work(gpu->worker, &gpu->recover_work);
1513 }
1514
a7xx_sw_fuse_violation_irq(struct msm_gpu * gpu)1515 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1516 {
1517 u32 status;
1518
1519 status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS);
1520 gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0);
1521
1522 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1523
1524 /*
1525 * Ignore FASTBLEND violations, because the HW will silently fall back
1526 * to legacy blending.
1527 */
1528 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1529 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1530 del_timer(&gpu->hangcheck_timer);
1531
1532 kthread_queue_work(gpu->worker, &gpu->recover_work);
1533 }
1534 }
1535
a6xx_irq(struct msm_gpu * gpu)1536 static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1537 {
1538 struct msm_drm_private *priv = gpu->dev->dev_private;
1539 u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1540
1541 gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1542
1543 if (priv->disable_err_irq)
1544 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1545
1546 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1547 a6xx_fault_detect_irq(gpu);
1548
1549 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1550 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1551
1552 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1553 a6xx_cp_hw_err_irq(gpu);
1554
1555 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1556 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1557
1558 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1559 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1560
1561 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1562 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1563
1564 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1565 a7xx_sw_fuse_violation_irq(gpu);
1566
1567 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
1568 msm_gpu_retire(gpu);
1569
1570 return IRQ_HANDLED;
1571 }
1572
a6xx_llc_deactivate(struct a6xx_gpu * a6xx_gpu)1573 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1574 {
1575 llcc_slice_deactivate(a6xx_gpu->llc_slice);
1576 llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1577 }
1578
a6xx_llc_activate(struct a6xx_gpu * a6xx_gpu)1579 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1580 {
1581 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1582 struct msm_gpu *gpu = &adreno_gpu->base;
1583 u32 cntl1_regval = 0;
1584
1585 if (IS_ERR(a6xx_gpu->llc_mmio))
1586 return;
1587
1588 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1589 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1590
1591 gpu_scid &= 0x1f;
1592 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1593 (gpu_scid << 15) | (gpu_scid << 20);
1594
1595 /* On A660, the SCID programming for UCHE traffic is done in
1596 * A6XX_GBIF_SCACHE_CNTL0[14:10]
1597 */
1598 if (adreno_is_a660_family(adreno_gpu))
1599 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
1600 (1 << 8), (gpu_scid << 10) | (1 << 8));
1601 }
1602
1603 /*
1604 * For targets with a MMU500, activate the slice but don't program the
1605 * register. The XBL will take care of that.
1606 */
1607 if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1608 if (!a6xx_gpu->have_mmu500) {
1609 u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1610
1611 gpuhtw_scid &= 0x1f;
1612 cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1613 }
1614 }
1615
1616 if (!cntl1_regval)
1617 return;
1618
1619 /*
1620 * Program the slice IDs for the various GPU blocks and GPU MMU
1621 * pagetables
1622 */
1623 if (!a6xx_gpu->have_mmu500) {
1624 a6xx_llc_write(a6xx_gpu,
1625 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
1626
1627 /*
1628 * Program cacheability overrides to not allocate cache
1629 * lines on a write miss
1630 */
1631 a6xx_llc_rmw(a6xx_gpu,
1632 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
1633 return;
1634 }
1635
1636 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
1637 }
1638
a7xx_llc_activate(struct a6xx_gpu * a6xx_gpu)1639 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1640 {
1641 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1642 struct msm_gpu *gpu = &adreno_gpu->base;
1643
1644 if (IS_ERR(a6xx_gpu->llc_mmio))
1645 return;
1646
1647 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1648 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1649
1650 gpu_scid &= GENMASK(4, 0);
1651
1652 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
1653 FIELD_PREP(GENMASK(29, 25), gpu_scid) |
1654 FIELD_PREP(GENMASK(24, 20), gpu_scid) |
1655 FIELD_PREP(GENMASK(19, 15), gpu_scid) |
1656 FIELD_PREP(GENMASK(14, 10), gpu_scid) |
1657 FIELD_PREP(GENMASK(9, 5), gpu_scid) |
1658 FIELD_PREP(GENMASK(4, 0), gpu_scid));
1659
1660 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
1661 FIELD_PREP(GENMASK(14, 10), gpu_scid) |
1662 BIT(8));
1663 }
1664
1665 llcc_slice_activate(a6xx_gpu->htw_llc_slice);
1666 }
1667
a6xx_llc_slices_destroy(struct a6xx_gpu * a6xx_gpu)1668 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
1669 {
1670 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
1671 if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
1672 return;
1673
1674 llcc_slice_putd(a6xx_gpu->llc_slice);
1675 llcc_slice_putd(a6xx_gpu->htw_llc_slice);
1676 }
1677
a6xx_llc_slices_init(struct platform_device * pdev,struct a6xx_gpu * a6xx_gpu,bool is_a7xx)1678 static void a6xx_llc_slices_init(struct platform_device *pdev,
1679 struct a6xx_gpu *a6xx_gpu, bool is_a7xx)
1680 {
1681 struct device_node *phandle;
1682
1683 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
1684 if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
1685 return;
1686
1687 /*
1688 * There is a different programming path for A6xx targets with an
1689 * mmu500 attached, so detect if that is the case
1690 */
1691 phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
1692 a6xx_gpu->have_mmu500 = (phandle &&
1693 of_device_is_compatible(phandle, "arm,mmu-500"));
1694 of_node_put(phandle);
1695
1696 if (is_a7xx || !a6xx_gpu->have_mmu500)
1697 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
1698 else
1699 a6xx_gpu->llc_mmio = NULL;
1700
1701 a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
1702 a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
1703
1704 if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1705 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
1706 }
1707
a7xx_cx_mem_init(struct a6xx_gpu * a6xx_gpu)1708 static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
1709 {
1710 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1711 struct msm_gpu *gpu = &adreno_gpu->base;
1712 u32 fuse_val;
1713 int ret;
1714
1715 if (adreno_is_a750(adreno_gpu)) {
1716 /*
1717 * Assume that if qcom scm isn't available, that whatever
1718 * replacement allows writing the fuse register ourselves.
1719 * Users of alternative firmware need to make sure this
1720 * register is writeable or indicate that it's not somehow.
1721 * Print a warning because if you mess this up you're about to
1722 * crash horribly.
1723 */
1724 if (!qcom_scm_is_available()) {
1725 dev_warn_once(gpu->dev->dev,
1726 "SCM is not available, poking fuse register\n");
1727 a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE,
1728 A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1729 A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND |
1730 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC);
1731 adreno_gpu->has_ray_tracing = true;
1732 return 0;
1733 }
1734
1735 ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ |
1736 QCOM_SCM_GPU_TSENSE_EN_REQ);
1737 if (ret)
1738 return ret;
1739
1740 /*
1741 * On a750 raytracing may be disabled by the firmware, find out
1742 * whether that's the case. The scm call above sets the fuse
1743 * register.
1744 */
1745 fuse_val = a6xx_llc_read(a6xx_gpu,
1746 REG_A7XX_CX_MISC_SW_FUSE_VALUE);
1747 adreno_gpu->has_ray_tracing =
1748 !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING);
1749 } else if (adreno_is_a740(adreno_gpu)) {
1750 /* Raytracing is always enabled on a740 */
1751 adreno_gpu->has_ray_tracing = true;
1752 }
1753
1754 return 0;
1755 }
1756
1757
1758 #define GBIF_CLIENT_HALT_MASK BIT(0)
1759 #define GBIF_ARB_HALT_MASK BIT(1)
1760 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0)
1761 #define VBIF_RESET_ACK_MASK 0xF0
1762 #define GPR0_GBIF_HALT_REQUEST 0x1E0
1763
a6xx_bus_clear_pending_transactions(struct adreno_gpu * adreno_gpu,bool gx_off)1764 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
1765 {
1766 struct msm_gpu *gpu = &adreno_gpu->base;
1767
1768 if (adreno_is_a619_holi(adreno_gpu)) {
1769 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST);
1770 spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) &
1771 (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK);
1772 } else if (!a6xx_has_gbif(adreno_gpu)) {
1773 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK);
1774 spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
1775 (VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK);
1776 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
1777
1778 return;
1779 }
1780
1781 if (gx_off) {
1782 /* Halt the gx side of GBIF */
1783 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
1784 spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
1785 }
1786
1787 /* Halt new client requests on GBIF */
1788 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
1789 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1790 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
1791
1792 /* Halt all AXI requests on GBIF */
1793 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
1794 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1795 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
1796
1797 /* The GBIF halt needs to be explicitly cleared */
1798 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
1799 }
1800
a6xx_gpu_sw_reset(struct msm_gpu * gpu,bool assert)1801 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
1802 {
1803 /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
1804 if (adreno_is_a610(to_adreno_gpu(gpu)))
1805 return;
1806
1807 gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
1808 /* Perform a bogus read and add a brief delay to ensure ordering. */
1809 gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD);
1810 udelay(1);
1811
1812 /* The reset line needs to be asserted for at least 100 us */
1813 if (assert)
1814 udelay(100);
1815 }
1816
a6xx_gmu_pm_resume(struct msm_gpu * gpu)1817 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
1818 {
1819 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1820 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1821 int ret;
1822
1823 gpu->needs_hw_init = true;
1824
1825 trace_msm_gpu_resume(0);
1826
1827 mutex_lock(&a6xx_gpu->gmu.lock);
1828 ret = a6xx_gmu_resume(a6xx_gpu);
1829 mutex_unlock(&a6xx_gpu->gmu.lock);
1830 if (ret)
1831 return ret;
1832
1833 msm_devfreq_resume(gpu);
1834
1835 adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu);
1836
1837 return ret;
1838 }
1839
a6xx_pm_resume(struct msm_gpu * gpu)1840 static int a6xx_pm_resume(struct msm_gpu *gpu)
1841 {
1842 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1843 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1844 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1845 unsigned long freq = gpu->fast_rate;
1846 struct dev_pm_opp *opp;
1847 int ret;
1848
1849 gpu->needs_hw_init = true;
1850
1851 trace_msm_gpu_resume(0);
1852
1853 mutex_lock(&a6xx_gpu->gmu.lock);
1854
1855 opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq);
1856 if (IS_ERR(opp)) {
1857 ret = PTR_ERR(opp);
1858 goto err_set_opp;
1859 }
1860 dev_pm_opp_put(opp);
1861
1862 /* Set the core clock and bus bw, having VDD scaling in mind */
1863 dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
1864
1865 pm_runtime_resume_and_get(gmu->dev);
1866 pm_runtime_resume_and_get(gmu->gxpd);
1867
1868 ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
1869 if (ret)
1870 goto err_bulk_clk;
1871
1872 if (adreno_is_a619_holi(adreno_gpu))
1873 a6xx_sptprac_enable(gmu);
1874
1875 /* If anything goes south, tear the GPU down piece by piece.. */
1876 if (ret) {
1877 err_bulk_clk:
1878 pm_runtime_put(gmu->gxpd);
1879 pm_runtime_put(gmu->dev);
1880 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
1881 }
1882 err_set_opp:
1883 mutex_unlock(&a6xx_gpu->gmu.lock);
1884
1885 if (!ret)
1886 msm_devfreq_resume(gpu);
1887
1888 return ret;
1889 }
1890
a6xx_gmu_pm_suspend(struct msm_gpu * gpu)1891 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu)
1892 {
1893 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1894 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1895 int i, ret;
1896
1897 trace_msm_gpu_suspend(0);
1898
1899 a6xx_llc_deactivate(a6xx_gpu);
1900
1901 msm_devfreq_suspend(gpu);
1902
1903 mutex_lock(&a6xx_gpu->gmu.lock);
1904 ret = a6xx_gmu_stop(a6xx_gpu);
1905 mutex_unlock(&a6xx_gpu->gmu.lock);
1906 if (ret)
1907 return ret;
1908
1909 if (a6xx_gpu->shadow_bo)
1910 for (i = 0; i < gpu->nr_rings; i++)
1911 a6xx_gpu->shadow[i] = 0;
1912
1913 gpu->suspend_count++;
1914
1915 return 0;
1916 }
1917
a6xx_pm_suspend(struct msm_gpu * gpu)1918 static int a6xx_pm_suspend(struct msm_gpu *gpu)
1919 {
1920 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1921 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1922 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1923 int i;
1924
1925 trace_msm_gpu_suspend(0);
1926
1927 msm_devfreq_suspend(gpu);
1928
1929 mutex_lock(&a6xx_gpu->gmu.lock);
1930
1931 /* Drain the outstanding traffic on memory buses */
1932 a6xx_bus_clear_pending_transactions(adreno_gpu, true);
1933
1934 if (adreno_is_a619_holi(adreno_gpu))
1935 a6xx_sptprac_disable(gmu);
1936
1937 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
1938
1939 pm_runtime_put_sync(gmu->gxpd);
1940 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
1941 pm_runtime_put_sync(gmu->dev);
1942
1943 mutex_unlock(&a6xx_gpu->gmu.lock);
1944
1945 if (a6xx_gpu->shadow_bo)
1946 for (i = 0; i < gpu->nr_rings; i++)
1947 a6xx_gpu->shadow[i] = 0;
1948
1949 gpu->suspend_count++;
1950
1951 return 0;
1952 }
1953
a6xx_gmu_get_timestamp(struct msm_gpu * gpu,uint64_t * value)1954 static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1955 {
1956 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1957 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1958
1959 mutex_lock(&a6xx_gpu->gmu.lock);
1960
1961 /* Force the GPU power on so we can read this register */
1962 a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1963
1964 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
1965
1966 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1967
1968 mutex_unlock(&a6xx_gpu->gmu.lock);
1969
1970 return 0;
1971 }
1972
a6xx_get_timestamp(struct msm_gpu * gpu,uint64_t * value)1973 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1974 {
1975 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
1976 return 0;
1977 }
1978
a6xx_active_ring(struct msm_gpu * gpu)1979 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
1980 {
1981 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1982 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1983
1984 return a6xx_gpu->cur_ring;
1985 }
1986
a6xx_destroy(struct msm_gpu * gpu)1987 static void a6xx_destroy(struct msm_gpu *gpu)
1988 {
1989 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1990 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1991
1992 if (a6xx_gpu->sqe_bo) {
1993 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
1994 drm_gem_object_put(a6xx_gpu->sqe_bo);
1995 }
1996
1997 if (a6xx_gpu->shadow_bo) {
1998 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
1999 drm_gem_object_put(a6xx_gpu->shadow_bo);
2000 }
2001
2002 a6xx_llc_slices_destroy(a6xx_gpu);
2003
2004 a6xx_gmu_remove(a6xx_gpu);
2005
2006 adreno_gpu_cleanup(adreno_gpu);
2007
2008 kfree(a6xx_gpu);
2009 }
2010
a6xx_gpu_busy(struct msm_gpu * gpu,unsigned long * out_sample_rate)2011 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
2012 {
2013 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2014 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2015 u64 busy_cycles;
2016
2017 /* 19.2MHz */
2018 *out_sample_rate = 19200000;
2019
2020 busy_cycles = gmu_read64(&a6xx_gpu->gmu,
2021 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
2022 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
2023
2024 return busy_cycles;
2025 }
2026
a6xx_gpu_set_freq(struct msm_gpu * gpu,struct dev_pm_opp * opp,bool suspended)2027 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
2028 bool suspended)
2029 {
2030 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2031 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2032
2033 mutex_lock(&a6xx_gpu->gmu.lock);
2034 a6xx_gmu_set_freq(gpu, opp, suspended);
2035 mutex_unlock(&a6xx_gpu->gmu.lock);
2036 }
2037
2038 static struct msm_gem_address_space *
a6xx_create_address_space(struct msm_gpu * gpu,struct platform_device * pdev)2039 a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
2040 {
2041 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2042 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2043 unsigned long quirks = 0;
2044
2045 /*
2046 * This allows GPU to set the bus attributes required to use system
2047 * cache on behalf of the iommu page table walker.
2048 */
2049 if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) &&
2050 !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY))
2051 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
2052
2053 return adreno_iommu_create_address_space(gpu, pdev, quirks);
2054 }
2055
2056 static struct msm_gem_address_space *
a6xx_create_private_address_space(struct msm_gpu * gpu)2057 a6xx_create_private_address_space(struct msm_gpu *gpu)
2058 {
2059 struct msm_mmu *mmu;
2060
2061 mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
2062
2063 if (IS_ERR(mmu))
2064 return ERR_CAST(mmu);
2065
2066 return msm_gem_address_space_create(mmu,
2067 "gpu", 0x100000000ULL,
2068 adreno_private_address_space_size(gpu));
2069 }
2070
a6xx_get_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)2071 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2072 {
2073 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2074 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2075
2076 if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
2077 return a6xx_gpu->shadow[ring->id];
2078
2079 return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
2080 }
2081
a6xx_progress(struct msm_gpu * gpu,struct msm_ringbuffer * ring)2082 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2083 {
2084 struct msm_cp_state cp_state = {
2085 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
2086 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
2087 .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
2088 .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
2089 };
2090 bool progress;
2091
2092 /*
2093 * Adjust the remaining data to account for what has already been
2094 * fetched from memory, but not yet consumed by the SQE.
2095 *
2096 * This is not *technically* correct, the amount buffered could
2097 * exceed the IB size due to hw prefetching ahead, but:
2098 *
2099 * (1) We aren't trying to find the exact position, just whether
2100 * progress has been made
2101 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
2102 * to prevent prefetching into an unrelated submit. (And
2103 * either way, at some point the ROQ will be full.)
2104 */
2105 cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16;
2106 cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16;
2107
2108 progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
2109
2110 ring->last_cp_state = cp_state;
2111
2112 return progress;
2113 }
2114
fuse_to_supp_hw(const struct adreno_info * info,u32 fuse)2115 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse)
2116 {
2117 if (!info->speedbins)
2118 return UINT_MAX;
2119
2120 for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++)
2121 if (info->speedbins[i].fuse == fuse)
2122 return BIT(info->speedbins[i].speedbin);
2123
2124 return UINT_MAX;
2125 }
2126
a6xx_set_supported_hw(struct device * dev,const struct adreno_info * info)2127 static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *info)
2128 {
2129 u32 supp_hw;
2130 u32 speedbin;
2131 int ret;
2132
2133 ret = adreno_read_speedbin(dev, &speedbin);
2134 /*
2135 * -ENOENT means that the platform doesn't support speedbin which is
2136 * fine
2137 */
2138 if (ret == -ENOENT) {
2139 return 0;
2140 } else if (ret) {
2141 dev_err_probe(dev, ret,
2142 "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
2143 return ret;
2144 }
2145
2146 supp_hw = fuse_to_supp_hw(info, speedbin);
2147
2148 if (supp_hw == UINT_MAX) {
2149 DRM_DEV_ERROR(dev,
2150 "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
2151 speedbin);
2152 supp_hw = BIT(0); /* Default */
2153 }
2154
2155 ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
2156 if (ret)
2157 return ret;
2158
2159 return 0;
2160 }
2161
2162 static const struct adreno_gpu_funcs funcs = {
2163 .base = {
2164 .get_param = adreno_get_param,
2165 .set_param = adreno_set_param,
2166 .hw_init = a6xx_hw_init,
2167 .ucode_load = a6xx_ucode_load,
2168 .pm_suspend = a6xx_gmu_pm_suspend,
2169 .pm_resume = a6xx_gmu_pm_resume,
2170 .recover = a6xx_recover,
2171 .submit = a6xx_submit,
2172 .active_ring = a6xx_active_ring,
2173 .irq = a6xx_irq,
2174 .destroy = a6xx_destroy,
2175 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2176 .show = a6xx_show,
2177 #endif
2178 .gpu_busy = a6xx_gpu_busy,
2179 .gpu_get_freq = a6xx_gmu_get_freq,
2180 .gpu_set_freq = a6xx_gpu_set_freq,
2181 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2182 .gpu_state_get = a6xx_gpu_state_get,
2183 .gpu_state_put = a6xx_gpu_state_put,
2184 #endif
2185 .create_address_space = a6xx_create_address_space,
2186 .create_private_address_space = a6xx_create_private_address_space,
2187 .get_rptr = a6xx_get_rptr,
2188 .progress = a6xx_progress,
2189 },
2190 .get_timestamp = a6xx_gmu_get_timestamp,
2191 };
2192
2193 static const struct adreno_gpu_funcs funcs_gmuwrapper = {
2194 .base = {
2195 .get_param = adreno_get_param,
2196 .set_param = adreno_set_param,
2197 .hw_init = a6xx_hw_init,
2198 .ucode_load = a6xx_ucode_load,
2199 .pm_suspend = a6xx_pm_suspend,
2200 .pm_resume = a6xx_pm_resume,
2201 .recover = a6xx_recover,
2202 .submit = a6xx_submit,
2203 .active_ring = a6xx_active_ring,
2204 .irq = a6xx_irq,
2205 .destroy = a6xx_destroy,
2206 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2207 .show = a6xx_show,
2208 #endif
2209 .gpu_busy = a6xx_gpu_busy,
2210 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2211 .gpu_state_get = a6xx_gpu_state_get,
2212 .gpu_state_put = a6xx_gpu_state_put,
2213 #endif
2214 .create_address_space = a6xx_create_address_space,
2215 .create_private_address_space = a6xx_create_private_address_space,
2216 .get_rptr = a6xx_get_rptr,
2217 .progress = a6xx_progress,
2218 },
2219 .get_timestamp = a6xx_get_timestamp,
2220 };
2221
2222 static const struct adreno_gpu_funcs funcs_a7xx = {
2223 .base = {
2224 .get_param = adreno_get_param,
2225 .set_param = adreno_set_param,
2226 .hw_init = a6xx_hw_init,
2227 .ucode_load = a6xx_ucode_load,
2228 .pm_suspend = a6xx_gmu_pm_suspend,
2229 .pm_resume = a6xx_gmu_pm_resume,
2230 .recover = a6xx_recover,
2231 .submit = a7xx_submit,
2232 .active_ring = a6xx_active_ring,
2233 .irq = a6xx_irq,
2234 .destroy = a6xx_destroy,
2235 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2236 .show = a6xx_show,
2237 #endif
2238 .gpu_busy = a6xx_gpu_busy,
2239 .gpu_get_freq = a6xx_gmu_get_freq,
2240 .gpu_set_freq = a6xx_gpu_set_freq,
2241 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2242 .gpu_state_get = a6xx_gpu_state_get,
2243 .gpu_state_put = a6xx_gpu_state_put,
2244 #endif
2245 .create_address_space = a6xx_create_address_space,
2246 .create_private_address_space = a6xx_create_private_address_space,
2247 .get_rptr = a6xx_get_rptr,
2248 .progress = a6xx_progress,
2249 },
2250 .get_timestamp = a6xx_gmu_get_timestamp,
2251 };
2252
a6xx_gpu_init(struct drm_device * dev)2253 struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
2254 {
2255 struct msm_drm_private *priv = dev->dev_private;
2256 struct platform_device *pdev = priv->gpu_pdev;
2257 struct adreno_platform_config *config = pdev->dev.platform_data;
2258 struct device_node *node;
2259 struct a6xx_gpu *a6xx_gpu;
2260 struct adreno_gpu *adreno_gpu;
2261 struct msm_gpu *gpu;
2262 bool is_a7xx;
2263 int ret;
2264
2265 a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
2266 if (!a6xx_gpu)
2267 return ERR_PTR(-ENOMEM);
2268
2269 adreno_gpu = &a6xx_gpu->base;
2270 gpu = &adreno_gpu->base;
2271
2272 mutex_init(&a6xx_gpu->gmu.lock);
2273
2274 adreno_gpu->registers = NULL;
2275
2276 /* Check if there is a GMU phandle and set it up */
2277 node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2278 /* FIXME: How do we gracefully handle this? */
2279 BUG_ON(!node);
2280
2281 adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper");
2282
2283 adreno_gpu->base.hw_apriv =
2284 !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
2285
2286 /* gpu->info only gets assigned in adreno_gpu_init() */
2287 is_a7xx = config->info->family == ADRENO_7XX_GEN1 ||
2288 config->info->family == ADRENO_7XX_GEN2 ||
2289 config->info->family == ADRENO_7XX_GEN3;
2290
2291 a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
2292
2293 ret = a6xx_set_supported_hw(&pdev->dev, config->info);
2294 if (ret) {
2295 a6xx_llc_slices_destroy(a6xx_gpu);
2296 kfree(a6xx_gpu);
2297 return ERR_PTR(ret);
2298 }
2299
2300 if (is_a7xx)
2301 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1);
2302 else if (adreno_has_gmu_wrapper(adreno_gpu))
2303 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1);
2304 else
2305 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
2306 if (ret) {
2307 a6xx_destroy(&(a6xx_gpu->base.base));
2308 return ERR_PTR(ret);
2309 }
2310
2311 /*
2312 * For now only clamp to idle freq for devices where this is known not
2313 * to cause power supply issues:
2314 */
2315 if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2316 priv->gpu_clamp_to_idle = true;
2317
2318 if (adreno_has_gmu_wrapper(adreno_gpu))
2319 ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
2320 else
2321 ret = a6xx_gmu_init(a6xx_gpu, node);
2322 of_node_put(node);
2323 if (ret) {
2324 a6xx_destroy(&(a6xx_gpu->base.base));
2325 return ERR_PTR(ret);
2326 }
2327
2328 if (adreno_is_a7xx(adreno_gpu)) {
2329 ret = a7xx_cx_mem_init(a6xx_gpu);
2330 if (ret) {
2331 a6xx_destroy(&(a6xx_gpu->base.base));
2332 return ERR_PTR(ret);
2333 }
2334 }
2335
2336 if (gpu->aspace)
2337 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
2338 a6xx_fault_handler);
2339
2340 a6xx_calc_ubwc_config(adreno_gpu);
2341
2342 return gpu;
2343 }
2344