1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
3
4
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/firmware/qcom/qcom_scm.h>
14 #include <linux/pm_domain.h>
15 #include <linux/soc/qcom/llcc-qcom.h>
16
17 #define GPU_PAS_ID 13
18
read_gmu_ao_counter(struct a6xx_gpu * a6xx_gpu)19 static u64 read_gmu_ao_counter(struct a6xx_gpu *a6xx_gpu)
20 {
21 u64 count_hi, count_lo, temp;
22
23 do {
24 count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H);
25 count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L);
26 temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H);
27 } while (unlikely(count_hi != temp));
28
29 return (count_hi << 32) | count_lo;
30 }
31
fence_status_check(struct msm_gpu * gpu,u32 offset,u32 value,u32 status,u32 mask)32 static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
33 {
34 /* Success if !writedropped0/1 */
35 if (!(status & mask))
36 return true;
37
38 udelay(10);
39
40 /* Try to update fenced register again */
41 gpu_write(gpu, offset, value);
42
43 /* We can't do a posted write here because the power domain could be
44 * in collapse state. So use the heaviest barrier instead
45 */
46 mb();
47 return false;
48 }
49
fenced_write(struct a6xx_gpu * a6xx_gpu,u32 offset,u32 value,u32 mask)50 static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
51 {
52 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
53 struct msm_gpu *gpu = &adreno_gpu->base;
54 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
55 u32 status;
56
57 gpu_write(gpu, offset, value);
58
59 /* Nothing else to be done in the case of no-GMU */
60 if (adreno_has_gmu_wrapper(adreno_gpu))
61 return 0;
62
63 /* We can't do a posted write here because the power domain could be
64 * in collapse state. So use the heaviest barrier instead
65 */
66 mb();
67
68 if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
69 fence_status_check(gpu, offset, value, status, mask), 0, 1000))
70 return 0;
71
72 /* Try again for another 1ms before failing */
73 gpu_write(gpu, offset, value);
74 mb();
75
76 if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
77 fence_status_check(gpu, offset, value, status, mask), 0, 1000)) {
78 /*
79 * The 'delay' warning is here because the pause to print this
80 * warning will allow gpu to move to power collapse which
81 * defeats the purpose of continuous polling for 2 ms
82 */
83 dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
84 offset);
85 return 0;
86 }
87
88 dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
89 offset);
90
91 return -ETIMEDOUT;
92 }
93
a6xx_fenced_write(struct a6xx_gpu * a6xx_gpu,u32 offset,u64 value,u32 mask,bool is_64b)94 int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
95 {
96 int ret;
97
98 ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
99 if (ret)
100 return ret;
101
102 if (!is_64b)
103 return 0;
104
105 ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
106
107 return ret;
108 }
109
_a6xx_check_idle(struct msm_gpu * gpu)110 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
111 {
112 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
113 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
114
115 /* Check that the GMU is idle */
116 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu))
117 return false;
118
119 /* Check tha the CX master is idle */
120 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
121 ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
122 return false;
123
124 return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
125 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
126 }
127
a6xx_idle(struct msm_gpu * gpu,struct msm_ringbuffer * ring)128 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
129 {
130 /* wait for CP to drain ringbuffer: */
131 if (!adreno_idle(gpu, ring))
132 return false;
133
134 if (spin_until(_a6xx_check_idle(gpu))) {
135 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
136 gpu->name, __builtin_return_address(0),
137 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
138 gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
139 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
140 gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
141 return false;
142 }
143
144 return true;
145 }
146
update_shadow_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)147 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
148 {
149 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
150 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
151
152 /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
153 if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
154 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
155 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
156 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
157 }
158 }
159
a6xx_flush(struct msm_gpu * gpu,struct msm_ringbuffer * ring)160 void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
161 {
162 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
163 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
164 uint32_t wptr;
165 unsigned long flags;
166
167 update_shadow_rptr(gpu, ring);
168
169 spin_lock_irqsave(&ring->preempt_lock, flags);
170
171 /* Copy the shadow to the actual register */
172 ring->cur = ring->next;
173
174 /* Make sure to wrap wptr if we need to */
175 wptr = get_wptr(ring);
176
177 /* Update HW if this is the current ring and we are not in preempt*/
178 if (!a6xx_in_preempt(a6xx_gpu)) {
179 if (a6xx_gpu->cur_ring == ring)
180 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
181 else
182 ring->restore_wptr = true;
183 } else {
184 ring->restore_wptr = true;
185 }
186
187 spin_unlock_irqrestore(&ring->preempt_lock, flags);
188 }
189
get_stats_counter(struct msm_ringbuffer * ring,u32 counter,u64 iova)190 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
191 u64 iova)
192 {
193 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
194 OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
195 CP_REG_TO_MEM_0_CNT(2) |
196 CP_REG_TO_MEM_0_64B);
197 OUT_RING(ring, lower_32_bits(iova));
198 OUT_RING(ring, upper_32_bits(iova));
199 }
200
a6xx_set_pagetable(struct a6xx_gpu * a6xx_gpu,struct msm_ringbuffer * ring,struct msm_gem_submit * submit)201 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
202 struct msm_ringbuffer *ring, struct msm_gem_submit *submit)
203 {
204 bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
205 struct msm_context *ctx = submit->queue->ctx;
206 struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx);
207 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
208 phys_addr_t ttbr;
209 u32 asid;
210 u64 memptr = rbmemptr(ring, ttbr0);
211
212 if (ctx->seqno == ring->cur_ctx_seqno)
213 return;
214
215 if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid))
216 return;
217
218 if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) {
219 /* Wait for previous submit to complete before continuing: */
220 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
221 OUT_RING(ring, 0);
222 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
223 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
224 OUT_RING(ring, submit->seqno - 1);
225
226 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
227 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH);
228
229 /* Reset state used to synchronize BR and BV */
230 OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1);
231 OUT_RING(ring,
232 CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS |
233 CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE |
234 CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER |
235 CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS);
236
237 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
238 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH);
239
240 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
241 OUT_RING(ring, LRZ_FLUSH_INVALIDATE);
242
243 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
244 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
245 }
246
247 if (!sysprof) {
248 if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
249 /* Turn off protected mode to write to special registers */
250 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
251 OUT_RING(ring, 0);
252 }
253
254 if (adreno_is_a8xx(adreno_gpu)) {
255 OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
256 OUT_RING(ring, 1);
257 OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1);
258 OUT_RING(ring, 1);
259 } else {
260 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
261 OUT_RING(ring, 1);
262 }
263 }
264
265 /* Execute the table update */
266 OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
267 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
268
269 OUT_RING(ring,
270 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
271 CP_SMMU_TABLE_UPDATE_1_ASID(asid));
272 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
273 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
274
275 /*
276 * Write the new TTBR0 to the memstore. This is good for debugging.
277 * Needed for preemption
278 */
279 OUT_PKT7(ring, CP_MEM_WRITE, 5);
280 OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr)));
281 OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr)));
282 OUT_RING(ring, lower_32_bits(ttbr));
283 OUT_RING(ring, upper_32_bits(ttbr));
284 OUT_RING(ring, ctx->seqno);
285
286 /*
287 * Sync both threads after switching pagetables and enable BR only
288 * to make sure BV doesn't race ahead while BR is still switching
289 * pagetables.
290 */
291 if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) {
292 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
293 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
294 }
295
296 /*
297 * And finally, trigger a uche flush to be sure there isn't anything
298 * lingering in that part of the GPU
299 */
300
301 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
302 OUT_RING(ring, CACHE_INVALIDATE);
303
304 if (!sysprof) {
305 u32 reg_status = adreno_is_a8xx(adreno_gpu) ?
306 REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS :
307 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
308 /*
309 * Wait for SRAM clear after the pgtable update, so the
310 * two can happen in parallel:
311 */
312 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
313 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
314 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status));
315 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0));
316 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
317 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
318 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
319
320 if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
321 /* Re-enable protected mode: */
322 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
323 OUT_RING(ring, 1);
324 }
325 }
326 }
327
a6xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)328 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
329 {
330 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
331 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
332 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
333 struct msm_ringbuffer *ring = submit->ring;
334 unsigned int i, ibs = 0;
335
336 adreno_check_and_reenable_stall(adreno_gpu);
337
338 a6xx_set_pagetable(a6xx_gpu, ring, submit);
339
340 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
341 rbmemptr_stats(ring, index, cpcycles_start));
342
343 /*
344 * For PM4 the GMU register offsets are calculated from the base of the
345 * GPU registers so we need to add 0x1a800 to the register value on A630
346 * to get the right value from PM4.
347 */
348 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
349 rbmemptr_stats(ring, index, alwayson_start));
350
351 /* Invalidate CCU depth and color */
352 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
353 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
354
355 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
356 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
357
358 /* Submit the commands */
359 for (i = 0; i < submit->nr_cmds; i++) {
360 switch (submit->cmd[i].type) {
361 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
362 break;
363 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
364 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
365 break;
366 fallthrough;
367 case MSM_SUBMIT_CMD_BUF:
368 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
369 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
370 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
371 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
372 ibs++;
373 break;
374 }
375
376 /*
377 * Periodically update shadow-wptr if needed, so that we
378 * can see partial progress of submits with large # of
379 * cmds.. otherwise we could needlessly stall waiting for
380 * ringbuffer state, simply due to looking at a shadow
381 * rptr value that has not been updated
382 */
383 if ((ibs % 32) == 0)
384 update_shadow_rptr(gpu, ring);
385 }
386
387 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
388 rbmemptr_stats(ring, index, cpcycles_end));
389 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
390 rbmemptr_stats(ring, index, alwayson_end));
391
392 /* Write the fence to the scratch register */
393 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
394 OUT_RING(ring, submit->seqno);
395
396 /*
397 * Execute a CACHE_FLUSH_TS event. This will ensure that the
398 * timestamp is written to the memory and then triggers the interrupt
399 */
400 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
401 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
402 CP_EVENT_WRITE_0_IRQ);
403 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
404 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
405 OUT_RING(ring, submit->seqno);
406
407 trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu));
408
409 a6xx_flush(gpu, ring);
410 }
411
a6xx_emit_set_pseudo_reg(struct msm_ringbuffer * ring,struct a6xx_gpu * a6xx_gpu,struct msm_gpu_submitqueue * queue)412 static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
413 struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue)
414 {
415 u64 preempt_postamble;
416
417 OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12);
418
419 OUT_RING(ring, SMMU_INFO);
420 /* don't save SMMU, we write the record from the kernel instead */
421 OUT_RING(ring, 0);
422 OUT_RING(ring, 0);
423
424 /* privileged and non secure buffer save */
425 OUT_RING(ring, NON_SECURE_SAVE_ADDR);
426 OUT_RING(ring, lower_32_bits(
427 a6xx_gpu->preempt_iova[ring->id]));
428 OUT_RING(ring, upper_32_bits(
429 a6xx_gpu->preempt_iova[ring->id]));
430
431 /* user context buffer save, seems to be unnused by fw */
432 OUT_RING(ring, NON_PRIV_SAVE_ADDR);
433 OUT_RING(ring, 0);
434 OUT_RING(ring, 0);
435
436 OUT_RING(ring, COUNTER);
437 /* seems OK to set to 0 to disable it */
438 OUT_RING(ring, 0);
439 OUT_RING(ring, 0);
440
441 /* Emit postamble to clear perfcounters */
442 preempt_postamble = a6xx_gpu->preempt_postamble_iova;
443
444 OUT_PKT7(ring, CP_SET_AMBLE, 3);
445 OUT_RING(ring, lower_32_bits(preempt_postamble));
446 OUT_RING(ring, upper_32_bits(preempt_postamble));
447 OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(
448 a6xx_gpu->preempt_postamble_len) |
449 CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE));
450 }
451
a7xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)452 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
453 {
454 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
455 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
456 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
457 struct msm_ringbuffer *ring = submit->ring;
458 u32 rbbm_perfctr_cp0, cp_always_on_counter;
459 unsigned int i, ibs = 0;
460
461 adreno_check_and_reenable_stall(adreno_gpu);
462
463 /*
464 * Toggle concurrent binning for pagetable switch and set the thread to
465 * BR since only it can execute the pagetable switch packets.
466 */
467 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
468 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
469
470 a6xx_set_pagetable(a6xx_gpu, ring, submit);
471
472 /*
473 * If preemption is enabled, then set the pseudo register for the save
474 * sequence
475 */
476 if (gpu->nr_rings > 1)
477 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
478
479 if (adreno_is_a8xx(adreno_gpu)) {
480 rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0);
481 cp_always_on_counter = REG_A8XX_CP_ALWAYS_ON_COUNTER;
482 } else {
483 rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0);
484 cp_always_on_counter = REG_A6XX_CP_ALWAYS_ON_COUNTER;
485 }
486
487 get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start));
488 get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_start));
489
490 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
491 OUT_RING(ring, CP_SET_THREAD_BOTH);
492
493 OUT_PKT7(ring, CP_SET_MARKER, 1);
494 OUT_RING(ring, 0x101); /* IFPC disable */
495
496 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
497 OUT_PKT7(ring, CP_SET_MARKER, 1);
498 OUT_RING(ring, 0x00d); /* IB1LIST start */
499 }
500
501 /* Submit the commands */
502 for (i = 0; i < submit->nr_cmds; i++) {
503 switch (submit->cmd[i].type) {
504 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
505 break;
506 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
507 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
508 break;
509 fallthrough;
510 case MSM_SUBMIT_CMD_BUF:
511 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
512 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
513 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
514 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
515 ibs++;
516 break;
517 }
518
519 /*
520 * Periodically update shadow-wptr if needed, so that we
521 * can see partial progress of submits with large # of
522 * cmds.. otherwise we could needlessly stall waiting for
523 * ringbuffer state, simply due to looking at a shadow
524 * rptr value that has not been updated
525 */
526 if ((ibs % 32) == 0)
527 update_shadow_rptr(gpu, ring);
528 }
529
530 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
531 OUT_PKT7(ring, CP_SET_MARKER, 1);
532 OUT_RING(ring, 0x00e); /* IB1LIST end */
533 }
534
535 get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end));
536 get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_end));
537
538 /* Write the fence to the scratch register */
539 if (adreno_is_a8xx(adreno_gpu)) {
540 OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1);
541 OUT_RING(ring, submit->seqno);
542 } else {
543 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
544 OUT_RING(ring, submit->seqno);
545 }
546
547 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
548 OUT_RING(ring, CP_SET_THREAD_BR);
549
550 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
551 OUT_RING(ring, CCU_INVALIDATE_DEPTH);
552
553 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
554 OUT_RING(ring, CCU_INVALIDATE_COLOR);
555
556 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
557 OUT_RING(ring, CP_SET_THREAD_BV);
558
559 /*
560 * Make sure the timestamp is committed once BV pipe is
561 * completely done with this submission.
562 */
563 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
564 OUT_RING(ring, CACHE_CLEAN | BIT(27));
565 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
566 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
567 OUT_RING(ring, submit->seqno);
568
569 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
570 OUT_RING(ring, CP_SET_THREAD_BR);
571
572 /*
573 * This makes sure that BR doesn't race ahead and commit
574 * timestamp to memstore while BV is still processing
575 * this submission.
576 */
577 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
578 OUT_RING(ring, 0);
579 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
580 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
581 OUT_RING(ring, submit->seqno);
582
583 a6xx_gpu->last_seqno[ring->id] = submit->seqno;
584
585 /* write the ringbuffer timestamp */
586 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
587 OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27));
588 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
589 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
590 OUT_RING(ring, submit->seqno);
591
592 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
593 OUT_RING(ring, CP_SET_THREAD_BOTH);
594
595 OUT_PKT7(ring, CP_SET_MARKER, 1);
596 OUT_RING(ring, 0x100); /* IFPC enable */
597
598 /* If preemption is enabled */
599 if (gpu->nr_rings > 1) {
600 /* Yield the floor on command completion */
601 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
602
603 /*
604 * If dword[2:1] are non zero, they specify an address for
605 * the CP to write the value of dword[3] to on preemption
606 * complete. Write 0 to skip the write
607 */
608 OUT_RING(ring, 0x00);
609 OUT_RING(ring, 0x00);
610 /* Data value - not used if the address above is 0 */
611 OUT_RING(ring, 0x01);
612 /* generate interrupt on preemption completion */
613 OUT_RING(ring, 0x00);
614 }
615
616
617 trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu));
618
619 a6xx_flush(gpu, ring);
620
621 /* Check to see if we need to start preemption */
622 a6xx_preempt_trigger(gpu);
623 }
624
a6xx_set_hwcg(struct msm_gpu * gpu,bool state)625 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
626 {
627 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
628 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
629 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
630 const struct adreno_reglist *reg;
631 unsigned int i;
632 u32 cgc_delay, cgc_hyst;
633 u32 val, clock_cntl_on;
634
635 if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu)))
636 return;
637
638 if (adreno_is_a630(adreno_gpu))
639 clock_cntl_on = 0x8aa8aa02;
640 else if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu))
641 clock_cntl_on = 0xaaa8aa82;
642 else if (adreno_is_a702(adreno_gpu))
643 clock_cntl_on = 0xaaaaaa82;
644 else
645 clock_cntl_on = 0x8aa8aa82;
646
647 if (adreno_is_a612(adreno_gpu))
648 cgc_delay = 0x11;
649 else if (adreno_is_a615_family(adreno_gpu))
650 cgc_delay = 0x111;
651 else
652 cgc_delay = 0x10111;
653
654 if (adreno_is_a612(adreno_gpu))
655 cgc_hyst = 0x55;
656 else if (adreno_is_a615_family(adreno_gpu))
657 cgc_hyst = 0x555;
658 else
659 cgc_hyst = 0x5555;
660
661 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
662 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
663 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
664 state ? cgc_delay : 0);
665 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
666 state ? cgc_hyst : 0);
667
668 if (!adreno_gpu->info->a6xx->hwcg) {
669 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
670 gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0);
671
672 if (state) {
673 gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1);
674
675 if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val,
676 val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
677 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
678 return;
679 }
680
681 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
682 }
683
684 return;
685 }
686
687 val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
688
689 /* Don't re-program the registers if they are already correct */
690 if ((!state && !val) || (state && (val == clock_cntl_on)))
691 return;
692
693 /* Disable SP clock before programming HWCG registers */
694 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
695 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
696
697 for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++)
698 gpu_write(gpu, reg->offset, state ? reg->value : 0);
699
700 /* Enable SP clock */
701 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
702 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
703
704 gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
705 }
706
a6xx_set_cp_protect(struct msm_gpu * gpu)707 static void a6xx_set_cp_protect(struct msm_gpu *gpu)
708 {
709 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
710 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
711 unsigned i;
712
713 /*
714 * Enable access protection to privileged registers, fault on an access
715 * protect violation and select the last span to protect from the start
716 * address all the way to the end of the register address space
717 */
718 gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL,
719 A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN |
720 A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN |
721 A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE);
722
723 for (i = 0; i < protect->count - 1; i++) {
724 /* Intentionally skip writing to some registers */
725 if (protect->regs[i])
726 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]);
727 }
728 /* last CP_PROTECT to have "infinite" length on the last entry */
729 gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]);
730 }
731
a6xx_calc_ubwc_config(struct adreno_gpu * gpu)732 static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
733 {
734 const struct qcom_ubwc_cfg_data *common_cfg;
735 struct qcom_ubwc_cfg_data *cfg = &gpu->_ubwc_config;
736
737 /* Inherit the common config and make some necessary fixups */
738 common_cfg = qcom_ubwc_config_get_data();
739 if (IS_ERR(common_cfg))
740 return PTR_ERR(common_cfg);
741
742 /* Copy the data into the internal struct to drop the const qualifier (temporarily) */
743 *cfg = *common_cfg;
744
745 /* Use common config as is for A8x */
746 if (!adreno_is_a8xx(gpu)) {
747 cfg->ubwc_swizzle = 0x6;
748 cfg->highest_bank_bit = 15;
749 }
750
751 if (adreno_is_a610(gpu)) {
752 cfg->highest_bank_bit = 13;
753 cfg->ubwc_swizzle = 0x7;
754 }
755
756 if (adreno_is_a612(gpu))
757 cfg->highest_bank_bit = 14;
758
759 if (adreno_is_a618(gpu))
760 cfg->highest_bank_bit = 14;
761
762 if (adreno_is_a619(gpu))
763 /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
764 cfg->highest_bank_bit = 13;
765
766 if (adreno_is_a619_holi(gpu))
767 cfg->highest_bank_bit = 13;
768
769 if (adreno_is_a621(gpu))
770 cfg->highest_bank_bit = 13;
771
772 if (adreno_is_a623(gpu))
773 cfg->highest_bank_bit = 16;
774
775 if (adreno_is_a650(gpu) ||
776 adreno_is_a660(gpu) ||
777 adreno_is_a690(gpu) ||
778 adreno_is_a730(gpu) ||
779 adreno_is_a740_family(gpu)) {
780 /* TODO: get ddr type from bootloader and use 15 for LPDDR4 */
781 cfg->highest_bank_bit = 16;
782 }
783
784 if (adreno_is_a663(gpu)) {
785 cfg->highest_bank_bit = 13;
786 cfg->ubwc_swizzle = 0x4;
787 }
788
789 if (adreno_is_7c3(gpu))
790 cfg->highest_bank_bit = 14;
791
792 if (adreno_is_a702(gpu))
793 cfg->highest_bank_bit = 14;
794
795 if (cfg->highest_bank_bit != common_cfg->highest_bank_bit)
796 DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n",
797 cfg->highest_bank_bit, common_cfg->highest_bank_bit);
798
799 if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle)
800 DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n",
801 cfg->ubwc_swizzle, common_cfg->ubwc_swizzle);
802
803 gpu->ubwc_config = &gpu->_ubwc_config;
804
805 return 0;
806 }
807
a6xx_set_ubwc_config(struct msm_gpu * gpu)808 static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
809 {
810 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
811 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
812 /*
813 * We subtract 13 from the highest bank bit (13 is the minimum value
814 * allowed by hw) and write the lowest two bits of the remaining value
815 * as hbb_lo and the one above it as hbb_hi to the hardware.
816 */
817 BUG_ON(cfg->highest_bank_bit < 13);
818 u32 hbb = cfg->highest_bank_bit - 13;
819 bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0;
820 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2);
821 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
822 bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0;
823 bool min_acc_len_64b = false;
824 u8 uavflagprd_inv = 0;
825 u32 hbb_hi = hbb >> 2;
826 u32 hbb_lo = hbb & 3;
827
828 if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu))
829 uavflagprd_inv = 2;
830
831 if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
832 min_acc_len_64b = true;
833
834 gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
835 level2_swizzling_dis << 12 |
836 rgb565_predicator << 11 |
837 hbb_hi << 10 | amsbc << 4 |
838 min_acc_len_64b << 3 |
839 hbb_lo << 1 | ubwc_mode);
840
841 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
842 level2_swizzling_dis << 6 | hbb_hi << 4 |
843 min_acc_len_64b << 3 |
844 hbb_lo << 1 | ubwc_mode);
845
846 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
847 level2_swizzling_dis << 12 | hbb_hi << 10 |
848 uavflagprd_inv << 4 |
849 min_acc_len_64b << 3 |
850 hbb_lo << 1 | ubwc_mode);
851
852 if (adreno_is_a7xx(adreno_gpu)) {
853 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) {
854 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
855 A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id));
856 gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL,
857 FIELD_PREP(GENMASK(8, 5), hbb_lo));
858 }
859 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
860 A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE));
861 }
862
863 gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL,
864 min_acc_len_64b << 23 | hbb_lo << 21);
865
866 gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL,
867 cfg->macrotile_mode);
868 }
869
a7xx_patch_pwrup_reglist(struct msm_gpu * gpu)870 static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
871 {
872 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
873 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
874 const struct adreno_reglist_list *reglist;
875 const struct adreno_reglist_pipe_list *dyn_pwrup_reglist;
876 void *ptr = a6xx_gpu->pwrup_reglist_ptr;
877 struct cpu_gpu_lock *lock = ptr;
878 u32 *dest = (u32 *)&lock->regs[0];
879 u32 dyn_pwrup_reglist_count = 0;
880 int i;
881
882 lock->gpu_req = lock->cpu_req = lock->turn = 0;
883
884 reglist = adreno_gpu->info->a6xx->ifpc_reglist;
885 if (reglist) {
886 lock->ifpc_list_len = reglist->count;
887
888 /*
889 * For each entry in each of the lists, write the offset and the current
890 * register value into the GPU buffer
891 */
892 for (i = 0; i < reglist->count; i++) {
893 *dest++ = reglist->regs[i];
894 *dest++ = gpu_read(gpu, reglist->regs[i]);
895 }
896 }
897
898 reglist = adreno_gpu->info->a6xx->pwrup_reglist;
899 lock->preemption_list_len = reglist->count;
900
901 for (i = 0; i < reglist->count; i++) {
902 *dest++ = reglist->regs[i];
903 *dest++ = gpu_read(gpu, reglist->regs[i]);
904 }
905
906 /*
907 * The overall register list is composed of
908 * 1. Static IFPC-only registers
909 * 2. Static IFPC + preemption registers
910 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
911 *
912 * The first two lists are static. Size of these lists are stored as
913 * number of pairs in ifpc_list_len and preemption_list_len
914 * respectively. With concurrent binning, Some of the perfcounter
915 * registers being virtualized, CP needs to know the pipe id to program
916 * the aperture inorder to restore the same. Thus, third list is a
917 * dynamic list with triplets as
918 * (<aperture, shifted 12 bits> <address> <data>), and the length is
919 * stored as number for triplets in dynamic_list_len.
920 */
921 dyn_pwrup_reglist = adreno_gpu->info->a6xx->dyn_pwrup_reglist;
922 if (dyn_pwrup_reglist) {
923 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) {
924 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
925 A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id));
926 for (i = 0; i < dyn_pwrup_reglist->count; i++) {
927 if ((dyn_pwrup_reglist->regs[i].pipe & BIT(pipe_id)) == 0)
928 continue;
929 *dest++ = A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id);
930 *dest++ = dyn_pwrup_reglist->regs[i].offset;
931 *dest++ = gpu_read(gpu, dyn_pwrup_reglist->regs[i].offset);
932 dyn_pwrup_reglist_count++;
933 }
934 }
935 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
936 A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE));
937 }
938 lock->dynamic_list_len = dyn_pwrup_reglist_count;
939 }
940
a7xx_preempt_start(struct msm_gpu * gpu)941 static int a7xx_preempt_start(struct msm_gpu *gpu)
942 {
943 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
944 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
945 struct msm_ringbuffer *ring = gpu->rb[0];
946
947 if (gpu->nr_rings <= 1)
948 return 0;
949
950 /* Turn CP protection off */
951 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
952 OUT_RING(ring, 0);
953
954 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL);
955
956 /* Yield the floor on command completion */
957 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
958 OUT_RING(ring, 0x00);
959 OUT_RING(ring, 0x00);
960 OUT_RING(ring, 0x00);
961 /* Generate interrupt on preemption completion */
962 OUT_RING(ring, 0x00);
963
964 a6xx_flush(gpu, ring);
965
966 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
967 }
968
a6xx_cp_init(struct msm_gpu * gpu)969 static int a6xx_cp_init(struct msm_gpu *gpu)
970 {
971 struct msm_ringbuffer *ring = gpu->rb[0];
972
973 OUT_PKT7(ring, CP_ME_INIT, 8);
974
975 OUT_RING(ring, 0x0000002f);
976
977 /* Enable multiple hardware contexts */
978 OUT_RING(ring, 0x00000003);
979
980 /* Enable error detection */
981 OUT_RING(ring, 0x20000000);
982
983 /* Don't enable header dump */
984 OUT_RING(ring, 0x00000000);
985 OUT_RING(ring, 0x00000000);
986
987 /* No workarounds enabled */
988 OUT_RING(ring, 0x00000000);
989
990 /* Pad rest of the cmds with 0's */
991 OUT_RING(ring, 0x00000000);
992 OUT_RING(ring, 0x00000000);
993
994 a6xx_flush(gpu, ring);
995 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
996 }
997
a7xx_cp_init(struct msm_gpu * gpu)998 static int a7xx_cp_init(struct msm_gpu *gpu)
999 {
1000 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1001 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1002 struct msm_ringbuffer *ring = gpu->rb[0];
1003 u32 mask;
1004
1005 /* Disable concurrent binning before sending CP init */
1006 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
1007 OUT_RING(ring, BIT(27));
1008
1009 OUT_PKT7(ring, CP_ME_INIT, 7);
1010
1011 /* Use multiple HW contexts */
1012 mask = BIT(0);
1013
1014 /* Enable error detection */
1015 mask |= BIT(1);
1016
1017 /* Set default reset state */
1018 mask |= BIT(3);
1019
1020 /* Disable save/restore of performance counters across preemption */
1021 mask |= BIT(6);
1022
1023 /* Enable the register init list with the spinlock */
1024 mask |= BIT(8);
1025
1026 OUT_RING(ring, mask);
1027
1028 /* Enable multiple hardware contexts */
1029 OUT_RING(ring, 0x00000003);
1030
1031 /* Enable error detection */
1032 OUT_RING(ring, 0x20000000);
1033
1034 /* Operation mode mask */
1035 OUT_RING(ring, 0x00000002);
1036
1037 /* *Don't* send a power up reg list for concurrent binning (TODO) */
1038 /* Lo address */
1039 OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova));
1040 /* Hi address */
1041 OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova));
1042 /* BIT(31) set => read the regs from the list */
1043 OUT_RING(ring, BIT(31));
1044
1045 a6xx_flush(gpu, ring);
1046 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
1047 }
1048
1049 /*
1050 * Check that the microcode version is new enough to include several key
1051 * security fixes. Return true if the ucode is safe.
1052 */
a6xx_ucode_check_version(struct a6xx_gpu * a6xx_gpu,struct drm_gem_object * obj)1053 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
1054 struct drm_gem_object *obj)
1055 {
1056 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1057 struct msm_gpu *gpu = &adreno_gpu->base;
1058 const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
1059 u32 *buf = msm_gem_get_vaddr(obj);
1060 bool ret = false;
1061
1062 if (IS_ERR(buf))
1063 return false;
1064
1065 /* A7xx is safe! */
1066 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu))
1067 return true;
1068
1069 /*
1070 * Targets up to a640 (a618, a630 and a640) need to check for a
1071 * microcode version that is patched to support the whereami opcode or
1072 * one that is new enough to include it by default.
1073 *
1074 * a650 tier targets don't need whereami but still need to be
1075 * equal to or newer than 0.95 for other security fixes
1076 *
1077 * a660 targets have all the critical security fixes from the start
1078 */
1079 if (!strcmp(sqe_name, "a630_sqe.fw")) {
1080 /*
1081 * If the lowest nibble is 0xa that is an indication that this
1082 * microcode has been patched. The actual version is in dword
1083 * [3] but we only care about the patchlevel which is the lowest
1084 * nibble of dword [3]
1085 *
1086 * Otherwise check that the firmware is greater than or equal
1087 * to 1.90 which was the first version that had this fix built
1088 * in
1089 */
1090 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
1091 (buf[0] & 0xfff) >= 0x190) {
1092 a6xx_gpu->has_whereami = true;
1093 ret = true;
1094 goto out;
1095 }
1096
1097 DRM_DEV_ERROR(&gpu->pdev->dev,
1098 "a630 SQE ucode is too old. Have version %x need at least %x\n",
1099 buf[0] & 0xfff, 0x190);
1100 } else if (!strcmp(sqe_name, "a650_sqe.fw")) {
1101 if ((buf[0] & 0xfff) >= 0x095) {
1102 ret = true;
1103 goto out;
1104 }
1105
1106 DRM_DEV_ERROR(&gpu->pdev->dev,
1107 "a650 SQE ucode is too old. Have version %x need at least %x\n",
1108 buf[0] & 0xfff, 0x095);
1109 } else if (!strcmp(sqe_name, "a660_sqe.fw")) {
1110 ret = true;
1111 } else {
1112 DRM_DEV_ERROR(&gpu->pdev->dev,
1113 "unknown GPU, add it to a6xx_ucode_check_version()!!\n");
1114 }
1115 out:
1116 msm_gem_put_vaddr(obj);
1117 return ret;
1118 }
1119
a6xx_ucode_load(struct msm_gpu * gpu)1120 static int a6xx_ucode_load(struct msm_gpu *gpu)
1121 {
1122 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1123 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1124
1125 if (!a6xx_gpu->sqe_bo) {
1126 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
1127 adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
1128
1129 if (IS_ERR(a6xx_gpu->sqe_bo)) {
1130 int ret = PTR_ERR(a6xx_gpu->sqe_bo);
1131
1132 a6xx_gpu->sqe_bo = NULL;
1133 DRM_DEV_ERROR(&gpu->pdev->dev,
1134 "Could not allocate SQE ucode: %d\n", ret);
1135
1136 return ret;
1137 }
1138
1139 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
1140 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
1141 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
1142 drm_gem_object_put(a6xx_gpu->sqe_bo);
1143
1144 a6xx_gpu->sqe_bo = NULL;
1145 return -EPERM;
1146 }
1147 }
1148
1149 if (!a6xx_gpu->aqe_bo && adreno_gpu->fw[ADRENO_FW_AQE]) {
1150 a6xx_gpu->aqe_bo = adreno_fw_create_bo(gpu,
1151 adreno_gpu->fw[ADRENO_FW_AQE], &a6xx_gpu->aqe_iova);
1152
1153 if (IS_ERR(a6xx_gpu->aqe_bo)) {
1154 int ret = PTR_ERR(a6xx_gpu->aqe_bo);
1155
1156 a6xx_gpu->aqe_bo = NULL;
1157 DRM_DEV_ERROR(&gpu->pdev->dev,
1158 "Could not allocate AQE ucode: %d\n", ret);
1159
1160 return ret;
1161 }
1162
1163 msm_gem_object_set_name(a6xx_gpu->aqe_bo, "aqefw");
1164 }
1165
1166 /*
1167 * Expanded APRIV and targets that support WHERE_AM_I both need a
1168 * privileged buffer to store the RPTR shadow
1169 */
1170 if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) &&
1171 !a6xx_gpu->shadow_bo) {
1172 a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
1173 sizeof(u32) * gpu->nr_rings,
1174 MSM_BO_WC | MSM_BO_MAP_PRIV,
1175 gpu->vm, &a6xx_gpu->shadow_bo,
1176 &a6xx_gpu->shadow_iova);
1177
1178 if (IS_ERR(a6xx_gpu->shadow))
1179 return PTR_ERR(a6xx_gpu->shadow);
1180
1181 msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
1182 }
1183
1184 a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE,
1185 MSM_BO_WC | MSM_BO_MAP_PRIV,
1186 gpu->vm, &a6xx_gpu->pwrup_reglist_bo,
1187 &a6xx_gpu->pwrup_reglist_iova);
1188
1189 if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr))
1190 return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr);
1191
1192 msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist");
1193
1194 return 0;
1195 }
1196
a6xx_zap_shader_init(struct msm_gpu * gpu)1197 int a6xx_zap_shader_init(struct msm_gpu *gpu)
1198 {
1199 static bool loaded;
1200 int ret;
1201
1202 if (loaded)
1203 return 0;
1204
1205 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
1206
1207 loaded = !ret;
1208 return ret;
1209 }
1210
1211 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1212 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1213 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1214 A6XX_RBBM_INT_0_MASK_CP_IB2 | \
1215 A6XX_RBBM_INT_0_MASK_CP_IB1 | \
1216 A6XX_RBBM_INT_0_MASK_CP_RB | \
1217 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1218 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1219 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1220 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1221 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
1222
1223 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1224 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1225 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
1226 A6XX_RBBM_INT_0_MASK_CP_SW | \
1227 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1228 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
1229 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
1230 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1231 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1232 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1233 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1234 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
1235 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
1236 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1237
1238 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \
1239 A6XX_CP_APRIV_CNTL_RBFETCH | \
1240 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \
1241 A6XX_CP_APRIV_CNTL_RBRPWB)
1242
1243 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \
1244 A6XX_CP_APRIV_CNTL_CDREAD | \
1245 A6XX_CP_APRIV_CNTL_CDWRITE)
1246
hw_init(struct msm_gpu * gpu)1247 static int hw_init(struct msm_gpu *gpu)
1248 {
1249 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1250 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1251 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1252 u64 gmem_range_min;
1253 unsigned int i;
1254 int ret;
1255
1256 if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1257 /* Make sure the GMU keeps the GPU on while we set it up */
1258 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1259 if (ret)
1260 return ret;
1261 }
1262
1263 /* Clear GBIF halt in case GX domain was not collapsed */
1264 if (adreno_is_a619_holi(adreno_gpu)) {
1265 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1266 gpu_read(gpu, REG_A6XX_GBIF_HALT);
1267
1268 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0);
1269 gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL);
1270 } else if (a6xx_has_gbif(adreno_gpu)) {
1271 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1272 gpu_read(gpu, REG_A6XX_GBIF_HALT);
1273
1274 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
1275 gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT);
1276 }
1277
1278 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
1279
1280 if (adreno_is_a619_holi(adreno_gpu))
1281 a6xx_sptprac_enable(gmu);
1282
1283 /*
1284 * Disable the trusted memory range - we don't actually supported secure
1285 * memory rendering at this point in time and we don't want to block off
1286 * part of the virtual memory space.
1287 */
1288 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
1289 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
1290
1291 if (!adreno_is_a7xx(adreno_gpu)) {
1292 /* Turn on 64 bit addressing for all blocks */
1293 gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
1294 gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
1295 gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
1296 gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
1297 gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
1298 gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
1299 gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
1300 gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
1301 gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
1302 gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
1303 gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
1304 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
1305 }
1306
1307 /* enable hardware clockgating */
1308 a6xx_set_hwcg(gpu, true);
1309
1310 /* For gmuwrapper implementations, do the VBIF/GBIF CX configuration here */
1311 if (adreno_is_a610_family(adreno_gpu)) {
1312 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
1313 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
1314 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
1315 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1316 }
1317
1318 if (adreno_is_a610_family(adreno_gpu) ||
1319 adreno_is_a640_family(adreno_gpu) ||
1320 adreno_is_a650_family(adreno_gpu)) {
1321 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
1322 } else if (adreno_is_a7xx(adreno_gpu)) {
1323 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212);
1324 } else {
1325 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
1326 }
1327
1328 if (adreno_is_a630(adreno_gpu))
1329 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
1330
1331 if (adreno_is_a7xx(adreno_gpu))
1332 gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0);
1333
1334 /* Make all blocks contribute to the GPU BUSY perf counter */
1335 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
1336
1337 /* Disable L2 bypass in the UCHE */
1338 if (adreno_is_a7xx(adreno_gpu)) {
1339 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
1340 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
1341 } else {
1342 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0);
1343 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
1344 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
1345 }
1346
1347 if (!(adreno_is_a650_family(adreno_gpu) ||
1348 adreno_is_a702(adreno_gpu) ||
1349 adreno_is_a730(adreno_gpu))) {
1350 gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M;
1351
1352 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1353 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min);
1354
1355 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX,
1356 gmem_range_min + adreno_gpu->info->gmem - 1);
1357 }
1358
1359 if (adreno_is_a7xx(adreno_gpu))
1360 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23));
1361 else {
1362 gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
1363 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
1364 }
1365
1366 if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) {
1367 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
1368 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1369 } else if (adreno_is_a610_family(adreno_gpu)) {
1370 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060);
1371 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16);
1372 } else if (!adreno_is_a7xx(adreno_gpu)) {
1373 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
1374 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1375 }
1376
1377 if (adreno_is_a660_family(adreno_gpu))
1378 gpu_write(gpu, REG_A7XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1379
1380 /* Setting the mem pool size */
1381 if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) {
1382 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48);
1383 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47);
1384 } else if (adreno_is_a702(adreno_gpu)) {
1385 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64);
1386 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63);
1387 } else if (!adreno_is_a7xx(adreno_gpu))
1388 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
1389
1390
1391 /* Set the default primFifo threshold values */
1392 if (adreno_gpu->info->a6xx->prim_fifo_threshold)
1393 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL,
1394 adreno_gpu->info->a6xx->prim_fifo_threshold);
1395
1396 /* Set the AHB default slave response to "ERROR" */
1397 gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1398
1399 /* Turn on performance counters */
1400 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1401
1402 if (adreno_is_a7xx(adreno_gpu)) {
1403 /* Turn on the IFPC counter (countable 4 on XOCLK4) */
1404 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1,
1405 FIELD_PREP(GENMASK(7, 0), 0x4));
1406 }
1407
1408 /* Select CP0 to always count cycles */
1409 gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1410
1411 a6xx_set_ubwc_config(gpu);
1412
1413 /* Enable fault detection */
1414 if (adreno_is_a612(adreno_gpu) ||
1415 adreno_is_a730(adreno_gpu) ||
1416 adreno_is_a740_family(adreno_gpu))
1417 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff);
1418 else if (adreno_is_a690(adreno_gpu))
1419 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff);
1420 else if (adreno_is_a619(adreno_gpu))
1421 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff);
1422 else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
1423 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff);
1424 else
1425 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff);
1426
1427 gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
1428
1429 /* Set weights for bicubic filtering */
1430 if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) {
1431 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
1432 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1),
1433 0x3fe05ff4);
1434 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2),
1435 0x3fa0ebee);
1436 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3),
1437 0x3f5193ed);
1438 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4),
1439 0x3f0243f0);
1440 }
1441
1442 /* Set up the CX GMU counter 0 to count busy ticks */
1443 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
1444
1445 /* Enable the power counter */
1446 gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5));
1447 gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
1448
1449 /* Protect registers from the CP */
1450 a6xx_set_cp_protect(gpu);
1451
1452 if (adreno_is_a660_family(adreno_gpu)) {
1453 if (adreno_is_a690(adreno_gpu))
1454 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801);
1455 else
1456 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1457 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1458 } else if (adreno_is_a702(adreno_gpu)) {
1459 /* Something to do with the HLSQ cluster */
1460 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24));
1461 }
1462
1463 if (adreno_is_a690(adreno_gpu))
1464 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90);
1465 /* Set dualQ + disable afull for A660 GPU */
1466 else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu))
1467 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1468 else if (adreno_is_a7xx(adreno_gpu))
1469 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG,
1470 FIELD_PREP(GENMASK(19, 16), 6) |
1471 FIELD_PREP(GENMASK(15, 12), 6) |
1472 FIELD_PREP(GENMASK(11, 8), 9) |
1473 BIT(3) | BIT(2) |
1474 FIELD_PREP(GENMASK(1, 0), 2));
1475
1476 /* Enable expanded apriv for targets that support it */
1477 if (gpu->hw_apriv) {
1478 if (adreno_is_a7xx(adreno_gpu)) {
1479 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1480 A7XX_BR_APRIVMASK);
1481 gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL,
1482 A7XX_APRIV_MASK);
1483 gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL,
1484 A7XX_APRIV_MASK);
1485 } else
1486 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1487 BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1));
1488 }
1489
1490 if (adreno_is_a750(adreno_gpu)) {
1491 /* Disable ubwc merged UFC request feature */
1492 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19));
1493
1494 /* Enable TP flaghint and other performance settings */
1495 gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700);
1496 } else if (adreno_is_a7xx(adreno_gpu)) {
1497 /* Disable non-ubwc read reqs from passing write reqs */
1498 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11));
1499 }
1500
1501 /* Enable interrupts */
1502 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK,
1503 adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK);
1504
1505 ret = adreno_hw_init(gpu);
1506 if (ret)
1507 goto out;
1508
1509 gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
1510
1511 /* Set the ringbuffer address */
1512 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1513
1514 /* Targets that support extended APRIV can use the RPTR shadow from
1515 * hardware but all the other ones need to disable the feature. Targets
1516 * that support the WHERE_AM_I opcode can use that instead
1517 */
1518 if (adreno_gpu->base.hw_apriv)
1519 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1520 else
1521 gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1522 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1523
1524 /* Configure the RPTR shadow if needed: */
1525 if (a6xx_gpu->shadow_bo) {
1526 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
1527 shadowptr(a6xx_gpu, gpu->rb[0]));
1528 for (unsigned int i = 0; i < gpu->nr_rings; i++)
1529 a6xx_gpu->shadow[i] = 0;
1530 }
1531
1532 /* ..which means "always" on A7xx, also for BV shadow */
1533 if (adreno_is_a7xx(adreno_gpu)) {
1534 gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR,
1535 rbmemptr(gpu->rb[0], bv_rptr));
1536 }
1537
1538 a6xx_preempt_hw_init(gpu);
1539
1540 /* Always come up on rb 0 */
1541 a6xx_gpu->cur_ring = gpu->rb[0];
1542
1543 for (i = 0; i < gpu->nr_rings; i++)
1544 gpu->rb[i]->cur_ctx_seqno = 0;
1545
1546 /* Enable the SQE_to start the CP engine */
1547 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1548
1549 if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) {
1550 a7xx_patch_pwrup_reglist(gpu);
1551 a6xx_gpu->pwrup_reglist_emitted = true;
1552 }
1553
1554 ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu);
1555 if (ret)
1556 goto out;
1557
1558 /*
1559 * Try to load a zap shader into the secure world. If successful
1560 * we can use the CP to switch out of secure mode. If not then we
1561 * have no resource but to try to switch ourselves out manually. If we
1562 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1563 * be blocked and a permissions violation will soon follow.
1564 */
1565 ret = a6xx_zap_shader_init(gpu);
1566 if (!ret) {
1567 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1568 OUT_RING(gpu->rb[0], 0x00000000);
1569
1570 a6xx_flush(gpu, gpu->rb[0]);
1571 if (!a6xx_idle(gpu, gpu->rb[0]))
1572 return -EINVAL;
1573 } else if (ret == -ENODEV) {
1574 /*
1575 * This device does not use zap shader (but print a warning
1576 * just in case someone got their dt wrong.. hopefully they
1577 * have a debug UART to realize the error of their ways...
1578 * if you mess this up you are about to crash horribly)
1579 */
1580 dev_warn_once(gpu->dev->dev,
1581 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1582 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1583 ret = 0;
1584 } else {
1585 return ret;
1586 }
1587
1588 out:
1589 if (adreno_has_gmu_wrapper(adreno_gpu))
1590 return ret;
1591
1592 /* Last step - yield the ringbuffer */
1593 a7xx_preempt_start(gpu);
1594
1595 /*
1596 * Tell the GMU that we are done touching the GPU and it can start power
1597 * management
1598 */
1599 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1600
1601 if (a6xx_gpu->gmu.legacy) {
1602 /* Take the GMU out of its special boot mode */
1603 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1604 }
1605
1606 return ret;
1607 }
1608
a6xx_hw_init(struct msm_gpu * gpu)1609 static int a6xx_hw_init(struct msm_gpu *gpu)
1610 {
1611 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1612 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1613 int ret;
1614
1615 mutex_lock(&a6xx_gpu->gmu.lock);
1616 ret = hw_init(gpu);
1617 mutex_unlock(&a6xx_gpu->gmu.lock);
1618
1619 return ret;
1620 }
1621
a6xx_dump(struct msm_gpu * gpu)1622 static void a6xx_dump(struct msm_gpu *gpu)
1623 {
1624 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n",
1625 gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1626 adreno_dump(gpu);
1627 }
1628
a6xx_recover(struct msm_gpu * gpu)1629 static void a6xx_recover(struct msm_gpu *gpu)
1630 {
1631 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1632 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1633 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1634 int active_submits;
1635
1636 adreno_dump_info(gpu);
1637
1638 if (a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) {
1639 /* Sometimes crashstate capture is skipped, so SQE should be halted here again */
1640 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1641
1642 if (hang_debug)
1643 a6xx_dump(gpu);
1644
1645 }
1646
1647 /*
1648 * To handle recovery specific sequences during the rpm suspend we are
1649 * about to trigger
1650 */
1651
1652 a6xx_gpu->hung = true;
1653
1654 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1655
1656 /* active_submit won't change until we make a submission */
1657 mutex_lock(&gpu->active_lock);
1658 active_submits = gpu->active_submits;
1659
1660 /*
1661 * Temporarily clear active_submits count to silence a WARN() in the
1662 * runtime suspend cb
1663 */
1664 gpu->active_submits = 0;
1665
1666 if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) {
1667 /* Drain the outstanding traffic on memory buses */
1668 adreno_gpu->funcs->bus_halt(adreno_gpu, true);
1669
1670 /* Reset the GPU to a clean state */
1671 a6xx_gpu_sw_reset(gpu, true);
1672 a6xx_gpu_sw_reset(gpu, false);
1673 }
1674
1675 reinit_completion(&gmu->pd_gate);
1676 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
1677 dev_pm_genpd_synced_poweroff(gmu->cxpd);
1678
1679 /* Drop the rpm refcount from active submits */
1680 if (active_submits)
1681 pm_runtime_put(&gpu->pdev->dev);
1682
1683 /* And the final one from recover worker */
1684 pm_runtime_put_sync(&gpu->pdev->dev);
1685
1686 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
1687 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
1688
1689 dev_pm_genpd_remove_notifier(gmu->cxpd);
1690
1691 pm_runtime_use_autosuspend(&gpu->pdev->dev);
1692
1693 if (active_submits)
1694 pm_runtime_get(&gpu->pdev->dev);
1695
1696 pm_runtime_get_sync(&gpu->pdev->dev);
1697
1698 gpu->active_submits = active_submits;
1699 mutex_unlock(&gpu->active_lock);
1700
1701 msm_gpu_hw_init(gpu);
1702 a6xx_gpu->hung = false;
1703 }
1704
a6xx_uche_fault_block(struct msm_gpu * gpu,u32 mid)1705 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1706 {
1707 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1708 static const char *uche_clients[7] = {
1709 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1710 };
1711 u32 val;
1712
1713 if (adreno_is_a7xx(adreno_gpu)) {
1714 if (mid != 1 && mid != 2 && mid != 3 && mid != 8)
1715 return "UNKNOWN";
1716 } else {
1717 if (mid < 1 || mid > 3)
1718 return "UNKNOWN";
1719 }
1720
1721 /*
1722 * The source of the data depends on the mid ID read from FSYNR1.
1723 * and the client ID read from the UCHE block
1724 */
1725 val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1726
1727 if (adreno_is_a7xx(adreno_gpu)) {
1728 /* Bit 3 for mid=3 indicates BR or BV */
1729 static const char *uche_clients_a7xx[16] = {
1730 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC",
1731 "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
1732 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC",
1733 "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
1734 };
1735
1736 /* LPAC has the same clients as BR and BV, but because it is
1737 * compute-only some of them do not exist and there are holes
1738 * in the array.
1739 */
1740 static const char *uche_clients_lpac_a7xx[8] = {
1741 "-", "LPAC_SP", "-", "-",
1742 "LPAC_HLSQ", "-", "-", "LPAC_TP",
1743 };
1744
1745 val &= GENMASK(6, 0);
1746
1747 /* mid=3 refers to BR or BV */
1748 if (mid == 3) {
1749 if (val < ARRAY_SIZE(uche_clients_a7xx))
1750 return uche_clients_a7xx[val];
1751 else
1752 return "UCHE";
1753 }
1754
1755 /* mid=8 refers to LPAC */
1756 if (mid == 8) {
1757 if (val < ARRAY_SIZE(uche_clients_lpac_a7xx))
1758 return uche_clients_lpac_a7xx[val];
1759 else
1760 return "UCHE_LPAC";
1761 }
1762
1763 /* mid=2 is a catchall for everything else in LPAC */
1764 if (mid == 2)
1765 return "UCHE_LPAC";
1766
1767 /* mid=1 is a catchall for everything else in BR/BV */
1768 return "UCHE";
1769 } else if (adreno_is_a660_family(adreno_gpu)) {
1770 static const char *uche_clients_a660[8] = {
1771 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP",
1772 };
1773
1774 static const char *uche_clients_a660_not[8] = {
1775 "not VFD", "not SP", "not VSC", "not VPC",
1776 "not HLSQ", "not PC", "not LRZ", "not TP",
1777 };
1778
1779 val &= GENMASK(6, 0);
1780
1781 if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660))
1782 return uche_clients_a660[val];
1783
1784 if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not))
1785 return uche_clients_a660_not[val];
1786
1787 return "UCHE";
1788 } else {
1789 /* mid = 3 is most precise and refers to only one block per client */
1790 if (mid == 3)
1791 return uche_clients[val & 7];
1792
1793 /* For mid=2 the source is TP or VFD except when the client id is 0 */
1794 if (mid == 2)
1795 return ((val & 7) == 0) ? "TP" : "TP|VFD";
1796
1797 /* For mid=1 just return "UCHE" as a catchall for everything else */
1798 return "UCHE";
1799 }
1800 }
1801
a6xx_fault_block(struct msm_gpu * gpu,u32 id)1802 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1803 {
1804 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1805
1806 if (id == 0)
1807 return "CP";
1808 else if (id == 4)
1809 return "CCU";
1810 else if (id == 6)
1811 return "CDP Prefetch";
1812 else if (id == 7)
1813 return "GMU";
1814 else if (id == 5 && adreno_is_a7xx(adreno_gpu))
1815 return "Flag cache";
1816
1817 return a6xx_uche_fault_block(gpu, id);
1818 }
1819
a6xx_fault_handler(void * arg,unsigned long iova,int flags,void * data)1820 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1821 {
1822 struct msm_gpu *gpu = arg;
1823 struct adreno_smmu_fault_info *info = data;
1824 const char *block = "unknown";
1825
1826 u32 scratch[] = {
1827 gpu_read(gpu, REG_A6XX_CP_SCRATCH(4)),
1828 gpu_read(gpu, REG_A6XX_CP_SCRATCH(5)),
1829 gpu_read(gpu, REG_A6XX_CP_SCRATCH(6)),
1830 gpu_read(gpu, REG_A6XX_CP_SCRATCH(7)),
1831 };
1832
1833 if (info)
1834 block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1835
1836 return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1837 }
1838
a6xx_cp_hw_err_irq(struct msm_gpu * gpu)1839 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1840 {
1841 u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1842
1843 if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1844 u32 val;
1845
1846 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1847 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1848 dev_err_ratelimited(&gpu->pdev->dev,
1849 "CP | opcode error | possible opcode=0x%8.8X\n",
1850 val);
1851 }
1852
1853 if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1854 dev_err_ratelimited(&gpu->pdev->dev,
1855 "CP ucode error interrupt\n");
1856
1857 if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1858 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1859 gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1860
1861 if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1862 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1863
1864 dev_err_ratelimited(&gpu->pdev->dev,
1865 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1866 val & (1 << 20) ? "READ" : "WRITE",
1867 (val & 0x3ffff), val);
1868 }
1869
1870 if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu)))
1871 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1872
1873 if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1874 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1875
1876 if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1877 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1878
1879 }
1880
a6xx_fault_detect_irq(struct msm_gpu * gpu)1881 static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1882 {
1883 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1884
1885 /*
1886 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1887 * but the fault handler will trigger the devcore dump, and we want
1888 * to otherwise resume normally rather than killing the submit, so
1889 * just bail.
1890 */
1891 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1892 return;
1893
1894 DRM_DEV_ERROR(&gpu->pdev->dev,
1895 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1896 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1897 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1898 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1899 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1900 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1901 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1902 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1903 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1904
1905 /* Turn off the hangcheck timer to keep it from bothering us */
1906 timer_delete(&gpu->hangcheck_timer);
1907
1908 /* Turn off interrupts to avoid triggering recovery again */
1909 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0);
1910
1911 kthread_queue_work(gpu->worker, &gpu->recover_work);
1912 }
1913
a7xx_sw_fuse_violation_irq(struct msm_gpu * gpu)1914 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1915 {
1916 u32 status;
1917
1918 status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS);
1919 gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0);
1920
1921 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1922
1923 /*
1924 * Ignore FASTBLEND violations, because the HW will silently fall back
1925 * to legacy blending.
1926 */
1927 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1928 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1929 timer_delete(&gpu->hangcheck_timer);
1930
1931 kthread_queue_work(gpu->worker, &gpu->recover_work);
1932 }
1933 }
1934
a6xx_gpu_keepalive_vote(struct msm_gpu * gpu,bool on)1935 static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on)
1936 {
1937 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1938 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1939
1940 if (adreno_has_gmu_wrapper(adreno_gpu))
1941 return;
1942
1943 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on);
1944 }
1945
irq_poll_fence(struct msm_gpu * gpu)1946 static int irq_poll_fence(struct msm_gpu *gpu)
1947 {
1948 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1949 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1950 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1951 u32 status;
1952
1953 if (adreno_has_gmu_wrapper(adreno_gpu))
1954 return 0;
1955
1956 if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) {
1957 u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS);
1958
1959 dev_err_ratelimited(&gpu->pdev->dev,
1960 "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n",
1961 status, rbbm_unmasked);
1962 return -ETIMEDOUT;
1963 }
1964
1965 return 0;
1966 }
1967
a6xx_irq(struct msm_gpu * gpu)1968 static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1969 {
1970 struct msm_drm_private *priv = gpu->dev->dev_private;
1971
1972 /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */
1973 a6xx_gpu_keepalive_vote(gpu, true);
1974
1975 if (irq_poll_fence(gpu))
1976 goto done;
1977
1978 u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1979
1980 gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1981
1982 if (priv->disable_err_irq)
1983 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1984
1985 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1986 a6xx_fault_detect_irq(gpu);
1987
1988 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1989 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1990
1991 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1992 a6xx_cp_hw_err_irq(gpu);
1993
1994 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1995 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1996
1997 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1998 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1999
2000 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
2001 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
2002
2003 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
2004 a7xx_sw_fuse_violation_irq(gpu);
2005
2006 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
2007 msm_gpu_retire(gpu);
2008 a6xx_preempt_trigger(gpu);
2009 }
2010
2011 if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
2012 a6xx_preempt_irq(gpu);
2013
2014 done:
2015 a6xx_gpu_keepalive_vote(gpu, false);
2016
2017 return IRQ_HANDLED;
2018 }
2019
a6xx_llc_deactivate(struct a6xx_gpu * a6xx_gpu)2020 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
2021 {
2022 llcc_slice_deactivate(a6xx_gpu->llc_slice);
2023 llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
2024 }
2025
a6xx_llc_activate(struct a6xx_gpu * a6xx_gpu)2026 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
2027 {
2028 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
2029 struct msm_gpu *gpu = &adreno_gpu->base;
2030 u32 cntl1_regval = 0;
2031
2032 if (IS_ERR(a6xx_gpu->llc_mmio))
2033 return;
2034
2035 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
2036 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
2037
2038 gpu_scid &= 0x1f;
2039 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
2040 (gpu_scid << 15) | (gpu_scid << 20);
2041
2042 /* On A660, the SCID programming for UCHE traffic is done in
2043 * A6XX_GBIF_SCACHE_CNTL0[14:10]
2044 */
2045 if (adreno_is_a660_family(adreno_gpu))
2046 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
2047 (1 << 8), (gpu_scid << 10) | (1 << 8));
2048 }
2049
2050 /*
2051 * For targets with a MMU500, activate the slice but don't program the
2052 * register. The XBL will take care of that.
2053 */
2054 if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
2055 if (!a6xx_gpu->have_mmu500) {
2056 u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
2057
2058 gpuhtw_scid &= 0x1f;
2059 cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
2060 }
2061 }
2062
2063 if (!cntl1_regval)
2064 return;
2065
2066 /*
2067 * Program the slice IDs for the various GPU blocks and GPU MMU
2068 * pagetables
2069 */
2070 if (!a6xx_gpu->have_mmu500) {
2071 a6xx_llc_write(a6xx_gpu,
2072 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
2073
2074 /*
2075 * Program cacheability overrides to not allocate cache
2076 * lines on a write miss
2077 */
2078 a6xx_llc_rmw(a6xx_gpu,
2079 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
2080 return;
2081 }
2082
2083 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
2084 }
2085
a7xx_llc_activate(struct a6xx_gpu * a6xx_gpu)2086 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
2087 {
2088 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
2089 struct msm_gpu *gpu = &adreno_gpu->base;
2090
2091 if (IS_ERR(a6xx_gpu->llc_mmio))
2092 return;
2093
2094 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
2095 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
2096
2097 gpu_scid &= GENMASK(4, 0);
2098
2099 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
2100 FIELD_PREP(GENMASK(29, 25), gpu_scid) |
2101 FIELD_PREP(GENMASK(24, 20), gpu_scid) |
2102 FIELD_PREP(GENMASK(19, 15), gpu_scid) |
2103 FIELD_PREP(GENMASK(14, 10), gpu_scid) |
2104 FIELD_PREP(GENMASK(9, 5), gpu_scid) |
2105 FIELD_PREP(GENMASK(4, 0), gpu_scid));
2106
2107 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
2108 FIELD_PREP(GENMASK(14, 10), gpu_scid) |
2109 BIT(8));
2110 }
2111
2112 llcc_slice_activate(a6xx_gpu->htw_llc_slice);
2113 }
2114
a6xx_llc_slices_destroy(struct a6xx_gpu * a6xx_gpu)2115 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
2116 {
2117 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
2118 if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
2119 return;
2120
2121 llcc_slice_putd(a6xx_gpu->llc_slice);
2122 llcc_slice_putd(a6xx_gpu->htw_llc_slice);
2123 }
2124
a6xx_llc_slices_init(struct platform_device * pdev,struct a6xx_gpu * a6xx_gpu,bool is_a7xx)2125 static void a6xx_llc_slices_init(struct platform_device *pdev,
2126 struct a6xx_gpu *a6xx_gpu, bool is_a7xx)
2127 {
2128 struct device_node *phandle;
2129
2130 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
2131 if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
2132 return;
2133
2134 /*
2135 * There is a different programming path for A6xx targets with an
2136 * mmu500 attached, so detect if that is the case
2137 */
2138 phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
2139 a6xx_gpu->have_mmu500 = (phandle &&
2140 of_device_is_compatible(phandle, "arm,mmu-500"));
2141 of_node_put(phandle);
2142
2143 if (is_a7xx || !a6xx_gpu->have_mmu500)
2144 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
2145 else
2146 a6xx_gpu->llc_mmio = NULL;
2147
2148 a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
2149 a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
2150
2151 if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
2152 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
2153 }
2154
a7xx_cx_mem_init(struct a6xx_gpu * a6xx_gpu)2155 static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
2156 {
2157 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
2158 struct msm_gpu *gpu = &adreno_gpu->base;
2159 u32 fuse_val;
2160 int ret;
2161
2162 if (adreno_is_a750(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) {
2163 /*
2164 * Assume that if qcom scm isn't available, that whatever
2165 * replacement allows writing the fuse register ourselves.
2166 * Users of alternative firmware need to make sure this
2167 * register is writeable or indicate that it's not somehow.
2168 * Print a warning because if you mess this up you're about to
2169 * crash horribly.
2170 */
2171 if (!qcom_scm_is_available()) {
2172 dev_warn_once(gpu->dev->dev,
2173 "SCM is not available, poking fuse register\n");
2174 a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE,
2175 A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
2176 A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND |
2177 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC);
2178 adreno_gpu->has_ray_tracing = true;
2179 return 0;
2180 }
2181
2182 ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ |
2183 QCOM_SCM_GPU_TSENSE_EN_REQ);
2184 if (ret)
2185 return ret;
2186
2187 /*
2188 * On A7XX_GEN3 and newer, raytracing may be disabled by the
2189 * firmware, find out whether that's the case. The scm call
2190 * above sets the fuse register.
2191 */
2192 fuse_val = a6xx_llc_read(a6xx_gpu,
2193 REG_A7XX_CX_MISC_SW_FUSE_VALUE);
2194 adreno_gpu->has_ray_tracing =
2195 !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING);
2196 } else if (adreno_is_a740(adreno_gpu)) {
2197 /* Raytracing is always enabled on a740 */
2198 adreno_gpu->has_ray_tracing = true;
2199 }
2200
2201 return 0;
2202 }
2203
2204
2205 #define GBIF_CLIENT_HALT_MASK BIT(0)
2206 #define GBIF_ARB_HALT_MASK BIT(1)
2207 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0)
2208 #define VBIF_RESET_ACK_MASK 0xF0
2209 #define GPR0_GBIF_HALT_REQUEST 0x1E0
2210
a6xx_bus_clear_pending_transactions(struct adreno_gpu * adreno_gpu,bool gx_off)2211 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
2212 {
2213 struct msm_gpu *gpu = &adreno_gpu->base;
2214
2215 if (adreno_is_a619_holi(adreno_gpu)) {
2216 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST);
2217 spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) &
2218 (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK);
2219 } else if (!a6xx_has_gbif(adreno_gpu)) {
2220 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK);
2221 spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
2222 (VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK);
2223 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
2224
2225 return;
2226 }
2227
2228 if (gx_off) {
2229 /* Halt the gx side of GBIF */
2230 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
2231 spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
2232 }
2233
2234 /* Halt new client requests on GBIF */
2235 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
2236 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
2237 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
2238
2239 /* Halt all AXI requests on GBIF */
2240 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
2241 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
2242 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
2243
2244 /* The GBIF halt needs to be explicitly cleared */
2245 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
2246 }
2247
a6xx_gpu_sw_reset(struct msm_gpu * gpu,bool assert)2248 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
2249 {
2250 /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
2251 if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu)))
2252 return;
2253
2254 gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
2255 /* Perform a bogus read and add a brief delay to ensure ordering. */
2256 gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD);
2257 udelay(1);
2258
2259 /* The reset line needs to be asserted for at least 100 us */
2260 if (assert)
2261 udelay(100);
2262 }
2263
a6xx_gmu_pm_resume(struct msm_gpu * gpu)2264 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
2265 {
2266 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2267 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2268 int ret;
2269
2270 gpu->needs_hw_init = true;
2271
2272 trace_msm_gpu_resume(0);
2273
2274 mutex_lock(&a6xx_gpu->gmu.lock);
2275 ret = a6xx_gmu_resume(a6xx_gpu);
2276 mutex_unlock(&a6xx_gpu->gmu.lock);
2277 if (ret)
2278 return ret;
2279
2280 msm_devfreq_resume(gpu);
2281
2282 if (adreno_is_a8xx(adreno_gpu))
2283 a8xx_llc_activate(a6xx_gpu);
2284 else if (adreno_is_a7xx(adreno_gpu))
2285 a7xx_llc_activate(a6xx_gpu);
2286 else
2287 a6xx_llc_activate(a6xx_gpu);
2288
2289 return ret;
2290 }
2291
a6xx_pm_resume(struct msm_gpu * gpu)2292 static int a6xx_pm_resume(struct msm_gpu *gpu)
2293 {
2294 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2295 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2296 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2297 unsigned long freq = gpu->fast_rate;
2298 struct dev_pm_opp *opp;
2299 int ret;
2300
2301 gpu->needs_hw_init = true;
2302
2303 trace_msm_gpu_resume(0);
2304
2305 mutex_lock(&a6xx_gpu->gmu.lock);
2306
2307 opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq);
2308 if (IS_ERR(opp)) {
2309 ret = PTR_ERR(opp);
2310 goto err_set_opp;
2311 }
2312 dev_pm_opp_put(opp);
2313
2314 /* Set the core clock and bus bw, having VDD scaling in mind */
2315 dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
2316
2317 pm_runtime_resume_and_get(gmu->dev);
2318 pm_runtime_resume_and_get(gmu->gxpd);
2319
2320 ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
2321 if (ret)
2322 goto err_bulk_clk;
2323
2324 ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks);
2325 if (ret) {
2326 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
2327 goto err_bulk_clk;
2328 }
2329
2330 if (adreno_is_a619_holi(adreno_gpu))
2331 a6xx_sptprac_enable(gmu);
2332
2333 /* If anything goes south, tear the GPU down piece by piece.. */
2334 if (ret) {
2335 err_bulk_clk:
2336 pm_runtime_put(gmu->gxpd);
2337 pm_runtime_put(gmu->dev);
2338 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2339 }
2340 err_set_opp:
2341 mutex_unlock(&a6xx_gpu->gmu.lock);
2342
2343 if (!ret) {
2344 msm_devfreq_resume(gpu);
2345 a6xx_llc_activate(a6xx_gpu);
2346 }
2347
2348 return ret;
2349 }
2350
a6xx_gmu_pm_suspend(struct msm_gpu * gpu)2351 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu)
2352 {
2353 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2354 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2355 int i, ret;
2356
2357 trace_msm_gpu_suspend(0);
2358
2359 a6xx_llc_deactivate(a6xx_gpu);
2360
2361 msm_devfreq_suspend(gpu);
2362
2363 mutex_lock(&a6xx_gpu->gmu.lock);
2364 ret = a6xx_gmu_stop(a6xx_gpu);
2365 mutex_unlock(&a6xx_gpu->gmu.lock);
2366 if (ret)
2367 return ret;
2368
2369 if (a6xx_gpu->shadow_bo)
2370 for (i = 0; i < gpu->nr_rings; i++)
2371 a6xx_gpu->shadow[i] = 0;
2372
2373 gpu->suspend_count++;
2374
2375 return 0;
2376 }
2377
a6xx_pm_suspend(struct msm_gpu * gpu)2378 static int a6xx_pm_suspend(struct msm_gpu *gpu)
2379 {
2380 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2381 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2382 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2383 int i;
2384
2385 trace_msm_gpu_suspend(0);
2386
2387 a6xx_llc_deactivate(a6xx_gpu);
2388
2389 msm_devfreq_suspend(gpu);
2390
2391 mutex_lock(&a6xx_gpu->gmu.lock);
2392
2393 /* Drain the outstanding traffic on memory buses */
2394 adreno_gpu->funcs->bus_halt(adreno_gpu, true);
2395
2396 if (adreno_is_a619_holi(adreno_gpu))
2397 a6xx_sptprac_disable(gmu);
2398
2399 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
2400 clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks);
2401
2402 pm_runtime_put_sync(gmu->gxpd);
2403 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2404 pm_runtime_put_sync(gmu->dev);
2405
2406 mutex_unlock(&a6xx_gpu->gmu.lock);
2407
2408 if (a6xx_gpu->shadow_bo)
2409 for (i = 0; i < gpu->nr_rings; i++)
2410 a6xx_gpu->shadow[i] = 0;
2411
2412 gpu->suspend_count++;
2413
2414 return 0;
2415 }
2416
a6xx_gmu_get_timestamp(struct msm_gpu * gpu,uint64_t * value)2417 static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
2418 {
2419 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2420 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2421
2422 *value = read_gmu_ao_counter(a6xx_gpu);
2423
2424 return 0;
2425 }
2426
a6xx_get_timestamp(struct msm_gpu * gpu,uint64_t * value)2427 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
2428 {
2429 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
2430 return 0;
2431 }
2432
a6xx_active_ring(struct msm_gpu * gpu)2433 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
2434 {
2435 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2436 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2437
2438 return a6xx_gpu->cur_ring;
2439 }
2440
a6xx_destroy(struct msm_gpu * gpu)2441 static void a6xx_destroy(struct msm_gpu *gpu)
2442 {
2443 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2444 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2445
2446 if (a6xx_gpu->sqe_bo) {
2447 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
2448 drm_gem_object_put(a6xx_gpu->sqe_bo);
2449 }
2450
2451 if (a6xx_gpu->aqe_bo) {
2452 msm_gem_unpin_iova(a6xx_gpu->aqe_bo, gpu->vm);
2453 drm_gem_object_put(a6xx_gpu->aqe_bo);
2454 }
2455
2456 if (a6xx_gpu->shadow_bo) {
2457 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->vm);
2458 drm_gem_object_put(a6xx_gpu->shadow_bo);
2459 }
2460
2461 a6xx_llc_slices_destroy(a6xx_gpu);
2462
2463 a6xx_gmu_remove(a6xx_gpu);
2464
2465 adreno_gpu_cleanup(adreno_gpu);
2466
2467 kfree(a6xx_gpu);
2468 }
2469
a6xx_gpu_busy(struct msm_gpu * gpu,unsigned long * out_sample_rate)2470 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
2471 {
2472 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2473 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2474 u64 busy_cycles;
2475
2476 /* 19.2MHz */
2477 *out_sample_rate = 19200000;
2478
2479 busy_cycles = gmu_read64(&a6xx_gpu->gmu,
2480 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
2481 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
2482
2483 return busy_cycles;
2484 }
2485
a6xx_gpu_set_freq(struct msm_gpu * gpu,struct dev_pm_opp * opp,bool suspended)2486 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
2487 bool suspended)
2488 {
2489 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2490 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2491
2492 mutex_lock(&a6xx_gpu->gmu.lock);
2493 a6xx_gmu_set_freq(gpu, opp, suspended);
2494 mutex_unlock(&a6xx_gpu->gmu.lock);
2495 }
2496
2497 static struct drm_gpuvm *
a6xx_create_vm(struct msm_gpu * gpu,struct platform_device * pdev)2498 a6xx_create_vm(struct msm_gpu *gpu, struct platform_device *pdev)
2499 {
2500 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2501 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2502 unsigned long quirks = 0;
2503
2504 /*
2505 * This allows GPU to set the bus attributes required to use system
2506 * cache on behalf of the iommu page table walker.
2507 */
2508 if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) &&
2509 !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY))
2510 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
2511
2512 return adreno_iommu_create_vm(gpu, pdev, quirks);
2513 }
2514
2515 static struct drm_gpuvm *
a6xx_create_private_vm(struct msm_gpu * gpu,bool kernel_managed)2516 a6xx_create_private_vm(struct msm_gpu *gpu, bool kernel_managed)
2517 {
2518 struct msm_mmu *mmu;
2519
2520 mmu = msm_iommu_pagetable_create(to_msm_vm(gpu->vm)->mmu, kernel_managed);
2521
2522 if (IS_ERR(mmu))
2523 return ERR_CAST(mmu);
2524
2525 return msm_gem_vm_create(gpu->dev, mmu, "gpu", ADRENO_VM_START,
2526 adreno_private_vm_size(gpu), kernel_managed);
2527 }
2528
a6xx_get_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)2529 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2530 {
2531 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2532 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2533
2534 if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
2535 return a6xx_gpu->shadow[ring->id];
2536
2537 /*
2538 * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware
2539 * without 'whereami' support
2540 */
2541 WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC),
2542 "Can't read CP_RB_RPTR register reliably\n");
2543
2544 return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
2545 }
2546
a6xx_progress(struct msm_gpu * gpu,struct msm_ringbuffer * ring)2547 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2548 {
2549 struct msm_cp_state cp_state;
2550 bool progress;
2551
2552 /*
2553 * With IFPC, KMD doesn't know whether GX power domain is collapsed
2554 * or not. So, we can't blindly read the below registers in GX domain.
2555 * Lets trust the hang detection in HW and lie to the caller that
2556 * there was progress.
2557 */
2558 if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC)
2559 return true;
2560
2561 cp_state = (struct msm_cp_state) {
2562 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
2563 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
2564 .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
2565 .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
2566 };
2567
2568 /*
2569 * Adjust the remaining data to account for what has already been
2570 * fetched from memory, but not yet consumed by the SQE.
2571 *
2572 * This is not *technically* correct, the amount buffered could
2573 * exceed the IB size due to hw prefetching ahead, but:
2574 *
2575 * (1) We aren't trying to find the exact position, just whether
2576 * progress has been made
2577 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
2578 * to prevent prefetching into an unrelated submit. (And
2579 * either way, at some point the ROQ will be full.)
2580 */
2581 cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16;
2582 cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16;
2583
2584 progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
2585
2586 ring->last_cp_state = cp_state;
2587
2588 return progress;
2589 }
2590
fuse_to_supp_hw(const struct adreno_info * info,u32 fuse)2591 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse)
2592 {
2593 if (!info->speedbins)
2594 return UINT_MAX;
2595
2596 for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++)
2597 if (info->speedbins[i].fuse == fuse)
2598 return BIT(info->speedbins[i].speedbin);
2599
2600 return UINT_MAX;
2601 }
2602
a6xx_set_supported_hw(struct device * dev,const struct adreno_info * info)2603 static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *info)
2604 {
2605 u32 supp_hw;
2606 u32 speedbin;
2607 int ret;
2608
2609 ret = adreno_read_speedbin(dev, &speedbin);
2610 /*
2611 * -ENOENT means that the platform doesn't support speedbin which is
2612 * fine
2613 */
2614 if (ret == -ENOENT) {
2615 return 0;
2616 } else if (ret) {
2617 dev_err_probe(dev, ret,
2618 "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
2619 return ret;
2620 }
2621
2622 supp_hw = fuse_to_supp_hw(info, speedbin);
2623
2624 if (supp_hw == UINT_MAX) {
2625 DRM_DEV_ERROR(dev,
2626 "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
2627 speedbin);
2628 supp_hw = BIT(0); /* Default */
2629 }
2630
2631 ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
2632 if (ret)
2633 return ret;
2634
2635 return 0;
2636 }
2637
a6xx_gpu_init(struct drm_device * dev)2638 static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
2639 {
2640 struct msm_drm_private *priv = dev->dev_private;
2641 struct platform_device *pdev = priv->gpu_pdev;
2642 struct adreno_platform_config *config = pdev->dev.platform_data;
2643 struct device_node *node;
2644 struct a6xx_gpu *a6xx_gpu;
2645 struct adreno_gpu *adreno_gpu;
2646 struct msm_gpu *gpu;
2647 extern int enable_preemption;
2648 bool is_a7xx;
2649 int ret, nr_rings = 1;
2650
2651 a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
2652 if (!a6xx_gpu)
2653 return ERR_PTR(-ENOMEM);
2654
2655 adreno_gpu = &a6xx_gpu->base;
2656 gpu = &adreno_gpu->base;
2657
2658 mutex_init(&a6xx_gpu->gmu.lock);
2659
2660 adreno_gpu->registers = NULL;
2661
2662 /* Check if there is a GMU phandle and set it up */
2663 node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2664 /* FIXME: How do we gracefully handle this? */
2665 BUG_ON(!node);
2666
2667 adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper");
2668
2669 adreno_gpu->base.hw_apriv =
2670 !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
2671
2672 /* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */
2673 is_a7xx = config->info->family >= ADRENO_7XX_GEN1;
2674
2675 a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
2676
2677 ret = a6xx_set_supported_hw(&pdev->dev, config->info);
2678 if (ret) {
2679 a6xx_llc_slices_destroy(a6xx_gpu);
2680 kfree(a6xx_gpu);
2681 return ERR_PTR(ret);
2682 }
2683
2684 if ((enable_preemption == 1) || (enable_preemption == -1 &&
2685 (config->info->quirks & ADRENO_QUIRK_PREEMPTION)))
2686 nr_rings = 4;
2687
2688 ret = adreno_gpu_init(dev, pdev, adreno_gpu, config->info->funcs, nr_rings);
2689 if (ret) {
2690 a6xx_destroy(&(a6xx_gpu->base.base));
2691 return ERR_PTR(ret);
2692 }
2693
2694 /*
2695 * For now only clamp to idle freq for devices where this is known not
2696 * to cause power supply issues:
2697 */
2698 if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2699 priv->gpu_clamp_to_idle = true;
2700
2701 if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu))
2702 ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
2703 else
2704 ret = a6xx_gmu_init(a6xx_gpu, node);
2705 of_node_put(node);
2706 if (ret) {
2707 a6xx_destroy(&(a6xx_gpu->base.base));
2708 return ERR_PTR(ret);
2709 }
2710
2711 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) {
2712 ret = a7xx_cx_mem_init(a6xx_gpu);
2713 if (ret) {
2714 a6xx_destroy(&(a6xx_gpu->base.base));
2715 return ERR_PTR(ret);
2716 }
2717 }
2718
2719 adreno_gpu->uche_trap_base = 0x1fffffffff000ull;
2720
2721 msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu,
2722 adreno_gpu->funcs->mmu_fault_handler);
2723
2724 ret = a6xx_calc_ubwc_config(adreno_gpu);
2725 if (ret) {
2726 a6xx_destroy(&(a6xx_gpu->base.base));
2727 return ERR_PTR(ret);
2728 }
2729
2730 /* Set up the preemption specific bits and pieces for each ringbuffer */
2731 a6xx_preempt_init(gpu);
2732
2733 return gpu;
2734 }
2735
2736 const struct adreno_gpu_funcs a6xx_gpu_funcs = {
2737 .base = {
2738 .get_param = adreno_get_param,
2739 .set_param = adreno_set_param,
2740 .hw_init = a6xx_hw_init,
2741 .ucode_load = a6xx_ucode_load,
2742 .pm_suspend = a6xx_gmu_pm_suspend,
2743 .pm_resume = a6xx_gmu_pm_resume,
2744 .recover = a6xx_recover,
2745 .submit = a6xx_submit,
2746 .active_ring = a6xx_active_ring,
2747 .irq = a6xx_irq,
2748 .destroy = a6xx_destroy,
2749 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2750 .show = a6xx_show,
2751 #endif
2752 .gpu_busy = a6xx_gpu_busy,
2753 .gpu_get_freq = a6xx_gmu_get_freq,
2754 .gpu_set_freq = a6xx_gpu_set_freq,
2755 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2756 .gpu_state_get = a6xx_gpu_state_get,
2757 .gpu_state_put = a6xx_gpu_state_put,
2758 #endif
2759 .create_vm = a6xx_create_vm,
2760 .create_private_vm = a6xx_create_private_vm,
2761 .get_rptr = a6xx_get_rptr,
2762 .progress = a6xx_progress,
2763 },
2764 .init = a6xx_gpu_init,
2765 .get_timestamp = a6xx_gmu_get_timestamp,
2766 .bus_halt = a6xx_bus_clear_pending_transactions,
2767 .mmu_fault_handler = a6xx_fault_handler,
2768 };
2769
2770 const struct adreno_gpu_funcs a6xx_gmuwrapper_funcs = {
2771 .base = {
2772 .get_param = adreno_get_param,
2773 .set_param = adreno_set_param,
2774 .hw_init = a6xx_hw_init,
2775 .ucode_load = a6xx_ucode_load,
2776 .pm_suspend = a6xx_pm_suspend,
2777 .pm_resume = a6xx_pm_resume,
2778 .recover = a6xx_recover,
2779 .submit = a6xx_submit,
2780 .active_ring = a6xx_active_ring,
2781 .irq = a6xx_irq,
2782 .destroy = a6xx_destroy,
2783 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2784 .show = a6xx_show,
2785 #endif
2786 .gpu_busy = a6xx_gpu_busy,
2787 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2788 .gpu_state_get = a6xx_gpu_state_get,
2789 .gpu_state_put = a6xx_gpu_state_put,
2790 #endif
2791 .create_vm = a6xx_create_vm,
2792 .create_private_vm = a6xx_create_private_vm,
2793 .get_rptr = a6xx_get_rptr,
2794 .progress = a6xx_progress,
2795 },
2796 .init = a6xx_gpu_init,
2797 .get_timestamp = a6xx_get_timestamp,
2798 .bus_halt = a6xx_bus_clear_pending_transactions,
2799 .mmu_fault_handler = a6xx_fault_handler,
2800 };
2801
2802 const struct adreno_gpu_funcs a7xx_gpu_funcs = {
2803 .base = {
2804 .get_param = adreno_get_param,
2805 .set_param = adreno_set_param,
2806 .hw_init = a6xx_hw_init,
2807 .ucode_load = a6xx_ucode_load,
2808 .pm_suspend = a6xx_gmu_pm_suspend,
2809 .pm_resume = a6xx_gmu_pm_resume,
2810 .recover = a6xx_recover,
2811 .submit = a7xx_submit,
2812 .active_ring = a6xx_active_ring,
2813 .irq = a6xx_irq,
2814 .destroy = a6xx_destroy,
2815 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2816 .show = a6xx_show,
2817 #endif
2818 .gpu_busy = a6xx_gpu_busy,
2819 .gpu_get_freq = a6xx_gmu_get_freq,
2820 .gpu_set_freq = a6xx_gpu_set_freq,
2821 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2822 .gpu_state_get = a6xx_gpu_state_get,
2823 .gpu_state_put = a6xx_gpu_state_put,
2824 #endif
2825 .create_vm = a6xx_create_vm,
2826 .create_private_vm = a6xx_create_private_vm,
2827 .get_rptr = a6xx_get_rptr,
2828 .progress = a6xx_progress,
2829 },
2830 .init = a6xx_gpu_init,
2831 .get_timestamp = a6xx_gmu_get_timestamp,
2832 .bus_halt = a6xx_bus_clear_pending_transactions,
2833 .mmu_fault_handler = a6xx_fault_handler,
2834 };
2835
2836 const struct adreno_gpu_funcs a8xx_gpu_funcs = {
2837 .base = {
2838 .get_param = adreno_get_param,
2839 .set_param = adreno_set_param,
2840 .hw_init = a8xx_hw_init,
2841 .ucode_load = a6xx_ucode_load,
2842 .pm_suspend = a6xx_gmu_pm_suspend,
2843 .pm_resume = a6xx_gmu_pm_resume,
2844 .recover = a8xx_recover,
2845 .submit = a7xx_submit,
2846 .active_ring = a6xx_active_ring,
2847 .irq = a8xx_irq,
2848 .destroy = a6xx_destroy,
2849 .gpu_busy = a8xx_gpu_busy,
2850 .gpu_get_freq = a6xx_gmu_get_freq,
2851 .gpu_set_freq = a6xx_gpu_set_freq,
2852 .create_vm = a6xx_create_vm,
2853 .create_private_vm = a6xx_create_private_vm,
2854 .get_rptr = a6xx_get_rptr,
2855 .progress = a8xx_progress,
2856 },
2857 .init = a6xx_gpu_init,
2858 .get_timestamp = a8xx_gmu_get_timestamp,
2859 .bus_halt = a8xx_bus_clear_pending_transactions,
2860 .mmu_fault_handler = a8xx_fault_handler,
2861 };
2862