1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
3
4
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/pm_domain.h>
14 #include <linux/soc/qcom/llcc-qcom.h>
15
16 #define GPU_PAS_ID 13
17
a6xx_gmu_get_timestamp(struct msm_gpu * gpu)18 static u64 a6xx_gmu_get_timestamp(struct msm_gpu *gpu)
19 {
20 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
21 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
22 u64 count_hi, count_lo, temp;
23
24 do {
25 count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H);
26 count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L);
27 temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H);
28 } while (unlikely(count_hi != temp));
29
30 return (count_hi << 32) | count_lo;
31 }
32
fence_status_check(struct msm_gpu * gpu,u32 offset,u32 value,u32 status,u32 mask)33 static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
34 {
35 /* Success if !writedropped0/1 */
36 if (!(status & mask))
37 return true;
38
39 udelay(10);
40
41 /* Try to update fenced register again */
42 gpu_write(gpu, offset, value);
43
44 /* We can't do a posted write here because the power domain could be
45 * in collapse state. So use the heaviest barrier instead
46 */
47 mb();
48 return false;
49 }
50
fenced_write(struct a6xx_gpu * a6xx_gpu,u32 offset,u32 value,u32 mask)51 static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
52 {
53 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
54 struct msm_gpu *gpu = &adreno_gpu->base;
55 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
56 u32 status;
57
58 gpu_write(gpu, offset, value);
59
60 /* Nothing else to be done in the case of no-GMU */
61 if (adreno_has_gmu_wrapper(adreno_gpu))
62 return 0;
63
64 /* We can't do a posted write here because the power domain could be
65 * in collapse state. So use the heaviest barrier instead
66 */
67 mb();
68
69 if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
70 fence_status_check(gpu, offset, value, status, mask), 0, 1000))
71 return 0;
72
73 /* Try again for another 1ms before failing */
74 gpu_write(gpu, offset, value);
75 mb();
76
77 if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
78 fence_status_check(gpu, offset, value, status, mask), 0, 1000)) {
79 /*
80 * The 'delay' warning is here because the pause to print this
81 * warning will allow gpu to move to power collapse which
82 * defeats the purpose of continuous polling for 2 ms
83 */
84 dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
85 offset);
86 return 0;
87 }
88
89 dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
90 offset);
91
92 return -ETIMEDOUT;
93 }
94
a6xx_fenced_write(struct a6xx_gpu * a6xx_gpu,u32 offset,u64 value,u32 mask,bool is_64b)95 int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
96 {
97 int ret;
98
99 ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
100 if (ret)
101 return ret;
102
103 if (!is_64b)
104 return 0;
105
106 ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
107
108 return ret;
109 }
110
_a6xx_check_idle(struct msm_gpu * gpu)111 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
112 {
113 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
114 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
115
116 /* Check that the GMU is idle */
117 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu))
118 return false;
119
120 /* Check tha the CX master is idle */
121 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
122 ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
123 return false;
124
125 return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
126 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
127 }
128
a6xx_idle(struct msm_gpu * gpu,struct msm_ringbuffer * ring)129 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
130 {
131 /* wait for CP to drain ringbuffer: */
132 if (!adreno_idle(gpu, ring))
133 return false;
134
135 if (spin_until(_a6xx_check_idle(gpu))) {
136 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
137 gpu->name, __builtin_return_address(0),
138 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
139 gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
140 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
141 gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
142 return false;
143 }
144
145 return true;
146 }
147
update_shadow_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)148 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
149 {
150 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
151 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
152
153 /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
154 if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
155 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
156 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
157 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
158 }
159 }
160
a6xx_flush(struct msm_gpu * gpu,struct msm_ringbuffer * ring)161 void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
162 {
163 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
164 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
165 uint32_t wptr;
166 unsigned long flags;
167
168 update_shadow_rptr(gpu, ring);
169
170 spin_lock_irqsave(&ring->preempt_lock, flags);
171
172 /* Copy the shadow to the actual register */
173 ring->cur = ring->next;
174
175 /* Make sure to wrap wptr if we need to */
176 wptr = get_wptr(ring);
177
178 /* Update HW if this is the current ring and we are not in preempt*/
179 if (!a6xx_in_preempt(a6xx_gpu)) {
180 if (a6xx_gpu->cur_ring == ring)
181 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
182 else
183 ring->restore_wptr = true;
184 } else {
185 ring->restore_wptr = true;
186 }
187
188 spin_unlock_irqrestore(&ring->preempt_lock, flags);
189 }
190
get_stats_counter(struct msm_ringbuffer * ring,u32 counter,u64 iova)191 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
192 u64 iova)
193 {
194 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
195 OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
196 CP_REG_TO_MEM_0_CNT(2) |
197 CP_REG_TO_MEM_0_64B);
198 OUT_RING(ring, lower_32_bits(iova));
199 OUT_RING(ring, upper_32_bits(iova));
200 }
201
a6xx_set_pagetable(struct a6xx_gpu * a6xx_gpu,struct msm_ringbuffer * ring,struct msm_gem_submit * submit)202 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
203 struct msm_ringbuffer *ring, struct msm_gem_submit *submit)
204 {
205 bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
206 struct msm_context *ctx = submit->queue->ctx;
207 struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx);
208 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
209 phys_addr_t ttbr;
210 u32 asid;
211 u64 memptr = rbmemptr(ring, ttbr0);
212
213 if (ctx->seqno == ring->cur_ctx_seqno)
214 return;
215
216 if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid))
217 return;
218
219 if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) {
220 /* Wait for previous submit to complete before continuing: */
221 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
222 OUT_RING(ring, 0);
223 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
224 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
225 OUT_RING(ring, submit->seqno - 1);
226
227 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
228 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH);
229
230 /* Reset state used to synchronize BR and BV */
231 OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1);
232 OUT_RING(ring,
233 CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS |
234 CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE |
235 CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER |
236 CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS);
237
238 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
239 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH);
240
241 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
242 OUT_RING(ring, LRZ_FLUSH_INVALIDATE);
243
244 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
245 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
246 }
247
248 if (!sysprof) {
249 if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
250 /* Turn off protected mode to write to special registers */
251 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
252 OUT_RING(ring, 0);
253 }
254
255 if (adreno_is_a8xx(adreno_gpu)) {
256 OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
257 OUT_RING(ring, 1);
258 OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1);
259 OUT_RING(ring, 1);
260 } else {
261 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
262 OUT_RING(ring, 1);
263 }
264 }
265
266 /* Execute the table update */
267 OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
268 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
269
270 OUT_RING(ring,
271 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
272 CP_SMMU_TABLE_UPDATE_1_ASID(asid));
273 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
274 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
275
276 /*
277 * Write the new TTBR0 to the memstore. This is good for debugging.
278 * Needed for preemption
279 */
280 OUT_PKT7(ring, CP_MEM_WRITE, 5);
281 OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr)));
282 OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr)));
283 OUT_RING(ring, lower_32_bits(ttbr));
284 OUT_RING(ring, upper_32_bits(ttbr));
285 OUT_RING(ring, ctx->seqno);
286
287 /*
288 * Sync both threads after switching pagetables and enable BR only
289 * to make sure BV doesn't race ahead while BR is still switching
290 * pagetables.
291 */
292 if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) {
293 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
294 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
295 }
296
297 /*
298 * And finally, trigger a uche flush to be sure there isn't anything
299 * lingering in that part of the GPU
300 */
301
302 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
303 OUT_RING(ring, CACHE_INVALIDATE);
304
305 if (!sysprof) {
306 u32 reg_status = adreno_is_a8xx(adreno_gpu) ?
307 REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS :
308 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
309 /*
310 * Wait for SRAM clear after the pgtable update, so the
311 * two can happen in parallel:
312 */
313 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
314 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
315 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status));
316 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0));
317 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
318 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
319 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
320
321 if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
322 /* Re-enable protected mode: */
323 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
324 OUT_RING(ring, 1);
325 }
326 }
327 }
328
a6xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)329 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
330 {
331 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
332 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
333 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
334 struct msm_ringbuffer *ring = submit->ring;
335 unsigned int i, ibs = 0;
336
337 adreno_check_and_reenable_stall(adreno_gpu);
338
339 a6xx_set_pagetable(a6xx_gpu, ring, submit);
340
341 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
342 rbmemptr_stats(ring, index, cpcycles_start));
343
344 /*
345 * For PM4 the GMU register offsets are calculated from the base of the
346 * GPU registers so we need to add 0x1a800 to the register value on A630
347 * to get the right value from PM4.
348 */
349 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_CONTEXT,
350 rbmemptr_stats(ring, index, alwayson_start));
351
352 /* Invalidate CCU depth and color */
353 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
354 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
355
356 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
357 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
358
359 /* Submit the commands */
360 for (i = 0; i < submit->nr_cmds; i++) {
361 switch (submit->cmd[i].type) {
362 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
363 break;
364 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
365 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
366 break;
367 fallthrough;
368 case MSM_SUBMIT_CMD_BUF:
369 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
370 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
371 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
372 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
373 ibs++;
374 break;
375 }
376
377 /*
378 * Periodically update shadow-wptr if needed, so that we
379 * can see partial progress of submits with large # of
380 * cmds.. otherwise we could needlessly stall waiting for
381 * ringbuffer state, simply due to looking at a shadow
382 * rptr value that has not been updated
383 */
384 if ((ibs % 32) == 0)
385 update_shadow_rptr(gpu, ring);
386 }
387
388 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
389 rbmemptr_stats(ring, index, cpcycles_end));
390 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_CONTEXT,
391 rbmemptr_stats(ring, index, alwayson_end));
392
393 /* Write the fence to the scratch register */
394 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
395 OUT_RING(ring, submit->seqno);
396
397 /*
398 * Execute a CACHE_FLUSH_TS event. This will ensure that the
399 * timestamp is written to the memory and then triggers the interrupt
400 */
401 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
402 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
403 CP_EVENT_WRITE_0_IRQ);
404 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
405 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
406 OUT_RING(ring, submit->seqno);
407
408 trace_msm_gpu_submit_flush(submit, adreno_gpu->funcs->get_timestamp(gpu));
409
410 a6xx_flush(gpu, ring);
411 }
412
a6xx_emit_set_pseudo_reg(struct msm_ringbuffer * ring,struct a6xx_gpu * a6xx_gpu,struct msm_gpu_submitqueue * queue)413 void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
414 struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue)
415 {
416 u64 preempt_postamble;
417
418 OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12);
419
420 OUT_RING(ring, SMMU_INFO);
421 /* don't save SMMU, we write the record from the kernel instead */
422 OUT_RING(ring, 0);
423 OUT_RING(ring, 0);
424
425 /* privileged and non secure buffer save */
426 OUT_RING(ring, NON_SECURE_SAVE_ADDR);
427 OUT_RING(ring, lower_32_bits(
428 a6xx_gpu->preempt_iova[ring->id]));
429 OUT_RING(ring, upper_32_bits(
430 a6xx_gpu->preempt_iova[ring->id]));
431
432 /* user context buffer save, seems to be unnused by fw */
433 OUT_RING(ring, NON_PRIV_SAVE_ADDR);
434 OUT_RING(ring, 0);
435 OUT_RING(ring, 0);
436
437 OUT_RING(ring, COUNTER);
438 /* seems OK to set to 0 to disable it */
439 OUT_RING(ring, 0);
440 OUT_RING(ring, 0);
441
442 /* Emit postamble to clear perfcounters */
443 preempt_postamble = a6xx_gpu->preempt_postamble_iova;
444
445 OUT_PKT7(ring, CP_SET_AMBLE, 3);
446 OUT_RING(ring, lower_32_bits(preempt_postamble));
447 OUT_RING(ring, upper_32_bits(preempt_postamble));
448 OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(
449 a6xx_gpu->preempt_postamble_len) |
450 CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE));
451 }
452
a7xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)453 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
454 {
455 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
456 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
457 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
458 struct msm_ringbuffer *ring = submit->ring;
459 u32 rbbm_perfctr_cp0, cp_always_on_context;
460 unsigned int i, ibs = 0;
461
462 adreno_check_and_reenable_stall(adreno_gpu);
463
464 /*
465 * Toggle concurrent binning for pagetable switch and set the thread to
466 * BR since only it can execute the pagetable switch packets.
467 */
468 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
469 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
470
471 a6xx_set_pagetable(a6xx_gpu, ring, submit);
472
473 /*
474 * If preemption is enabled, then set the pseudo register for the save
475 * sequence
476 */
477 if (gpu->nr_rings > 1)
478 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
479
480 if (adreno_is_a8xx(adreno_gpu)) {
481 rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0);
482 cp_always_on_context = REG_A8XX_CP_ALWAYS_ON_CONTEXT;
483 } else {
484 rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0);
485 cp_always_on_context = REG_A6XX_CP_ALWAYS_ON_CONTEXT;
486 }
487
488 get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start));
489 get_stats_counter(ring, cp_always_on_context, rbmemptr_stats(ring, index, alwayson_start));
490
491 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
492 OUT_RING(ring, CP_SET_THREAD_BOTH);
493
494 OUT_PKT7(ring, CP_SET_MARKER, 1);
495 OUT_RING(ring, 0x101); /* IFPC disable */
496
497 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
498 OUT_PKT7(ring, CP_SET_MARKER, 1);
499 OUT_RING(ring, 0x00d); /* IB1LIST start */
500 }
501
502 /* Submit the commands */
503 for (i = 0; i < submit->nr_cmds; i++) {
504 switch (submit->cmd[i].type) {
505 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
506 break;
507 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
508 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
509 break;
510 fallthrough;
511 case MSM_SUBMIT_CMD_BUF:
512 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
513 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
514 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
515 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
516 ibs++;
517 break;
518 }
519
520 /*
521 * Periodically update shadow-wptr if needed, so that we
522 * can see partial progress of submits with large # of
523 * cmds.. otherwise we could needlessly stall waiting for
524 * ringbuffer state, simply due to looking at a shadow
525 * rptr value that has not been updated
526 */
527 if ((ibs % 32) == 0)
528 update_shadow_rptr(gpu, ring);
529 }
530
531 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
532 OUT_PKT7(ring, CP_SET_MARKER, 1);
533 OUT_RING(ring, 0x00e); /* IB1LIST end */
534 }
535
536 get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end));
537 get_stats_counter(ring, cp_always_on_context, rbmemptr_stats(ring, index, alwayson_end));
538
539 /* Write the fence to the scratch register */
540 if (adreno_is_a8xx(adreno_gpu)) {
541 OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1);
542 OUT_RING(ring, submit->seqno);
543 } else {
544 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
545 OUT_RING(ring, submit->seqno);
546 }
547
548 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
549 OUT_RING(ring, CP_SET_THREAD_BR);
550
551 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
552 OUT_RING(ring, CCU_INVALIDATE_DEPTH);
553
554 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
555 OUT_RING(ring, CCU_INVALIDATE_COLOR);
556
557 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
558 OUT_RING(ring, CP_SET_THREAD_BV);
559
560 /*
561 * Make sure the timestamp is committed once BV pipe is
562 * completely done with this submission.
563 */
564 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
565 OUT_RING(ring, CACHE_CLEAN | BIT(27));
566 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
567 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
568 OUT_RING(ring, submit->seqno);
569
570 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
571 OUT_RING(ring, CP_SET_THREAD_BR);
572
573 /*
574 * This makes sure that BR doesn't race ahead and commit
575 * timestamp to memstore while BV is still processing
576 * this submission.
577 */
578 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
579 OUT_RING(ring, 0);
580 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
581 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
582 OUT_RING(ring, submit->seqno);
583
584 a6xx_gpu->last_seqno[ring->id] = submit->seqno;
585
586 /* write the ringbuffer timestamp */
587 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
588 OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27));
589 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
590 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
591 OUT_RING(ring, submit->seqno);
592
593 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
594 OUT_RING(ring, CP_SET_THREAD_BOTH);
595
596 OUT_PKT7(ring, CP_SET_MARKER, 1);
597 OUT_RING(ring, 0x100); /* IFPC enable */
598
599 /* If preemption is enabled */
600 if (gpu->nr_rings > 1) {
601 /* Yield the floor on command completion */
602 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
603
604 /*
605 * If dword[2:1] are non zero, they specify an address for
606 * the CP to write the value of dword[3] to on preemption
607 * complete. Write 0 to skip the write
608 */
609 OUT_RING(ring, 0x00);
610 OUT_RING(ring, 0x00);
611 /* Data value - not used if the address above is 0 */
612 OUT_RING(ring, 0x01);
613 /* generate interrupt on preemption completion */
614 OUT_RING(ring, 0x00);
615 }
616
617
618 trace_msm_gpu_submit_flush(submit, adreno_gpu->funcs->get_timestamp(gpu));
619
620 a6xx_flush(gpu, ring);
621
622 /* Check to see if we need to start preemption */
623 if (adreno_is_a8xx(adreno_gpu))
624 a8xx_preempt_trigger(gpu);
625 else
626 a6xx_preempt_trigger(gpu);
627 }
628
a6xx_set_hwcg(struct msm_gpu * gpu,bool state)629 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
630 {
631 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
632 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
633 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
634 const struct adreno_reglist *reg;
635 unsigned int i;
636 u32 cgc_delay, cgc_hyst;
637 u32 val, clock_cntl_on;
638
639 if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu)))
640 return;
641
642 if (adreno_is_a630(adreno_gpu))
643 clock_cntl_on = 0x8aa8aa02;
644 else if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu))
645 clock_cntl_on = 0xaaa8aa82;
646 else if (adreno_is_a702(adreno_gpu))
647 clock_cntl_on = 0xaaaaaa82;
648 else
649 clock_cntl_on = 0x8aa8aa82;
650
651 if (adreno_is_a612(adreno_gpu))
652 cgc_delay = 0x11;
653 else if (adreno_is_a615_family(adreno_gpu))
654 cgc_delay = 0x111;
655 else
656 cgc_delay = 0x10111;
657
658 if (adreno_is_a612(adreno_gpu))
659 cgc_hyst = 0x55;
660 else if (adreno_is_a615_family(adreno_gpu))
661 cgc_hyst = 0x555;
662 else
663 cgc_hyst = 0x5555;
664
665 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
666 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
667 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
668 state ? cgc_delay : 0);
669 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
670 state ? cgc_hyst : 0);
671
672 if (!adreno_gpu->info->a6xx->hwcg) {
673 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
674 gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0);
675
676 if (state) {
677 gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1);
678
679 if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val,
680 val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
681 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
682 return;
683 }
684
685 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
686 }
687
688 return;
689 }
690
691 val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
692
693 /* Don't re-program the registers if they are already correct */
694 if ((!state && !val) || (state && (val == clock_cntl_on)))
695 return;
696
697 /* Disable SP clock before programming HWCG registers */
698 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
699 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
700
701 for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++)
702 gpu_write(gpu, reg->offset, state ? reg->value : 0);
703
704 /* Enable SP clock */
705 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
706 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
707
708 gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
709 }
710
a6xx_set_cp_protect(struct msm_gpu * gpu)711 static void a6xx_set_cp_protect(struct msm_gpu *gpu)
712 {
713 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
714 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
715 unsigned i;
716
717 /*
718 * Enable access protection to privileged registers, fault on an access
719 * protect violation and select the last span to protect from the start
720 * address all the way to the end of the register address space
721 */
722 gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL,
723 A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN |
724 A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN |
725 A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE);
726
727 for (i = 0; i < protect->count - 1; i++) {
728 /* Intentionally skip writing to some registers */
729 if (protect->regs[i])
730 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]);
731 }
732 /* last CP_PROTECT to have "infinite" length on the last entry */
733 gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]);
734 }
735
a6xx_calc_ubwc_config(struct adreno_gpu * gpu)736 static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
737 {
738 const struct qcom_ubwc_cfg_data *common_cfg;
739 struct qcom_ubwc_cfg_data *cfg = &gpu->_ubwc_config;
740
741 /* Inherit the common config and make some necessary fixups */
742 common_cfg = qcom_ubwc_config_get_data();
743 if (IS_ERR(common_cfg))
744 return PTR_ERR(common_cfg);
745
746 /* Copy the data into the internal struct to drop the const qualifier (temporarily) */
747 *cfg = *common_cfg;
748
749 /* Use common config as is for A8x */
750 if (!adreno_is_a8xx(gpu)) {
751 cfg->ubwc_swizzle = 0x6;
752 cfg->highest_bank_bit = 15;
753 }
754
755 if (adreno_is_a610(gpu)) {
756 cfg->highest_bank_bit = 13;
757 cfg->ubwc_swizzle = 0x7;
758 }
759
760 if (adreno_is_a612(gpu))
761 cfg->highest_bank_bit = 14;
762
763 if (adreno_is_a618(gpu))
764 cfg->highest_bank_bit = 14;
765
766 if (adreno_is_a619(gpu))
767 /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
768 cfg->highest_bank_bit = 13;
769
770 if (adreno_is_a619_holi(gpu))
771 cfg->highest_bank_bit = 13;
772
773 if (adreno_is_a621(gpu))
774 cfg->highest_bank_bit = 13;
775
776 if (adreno_is_a623(gpu))
777 cfg->highest_bank_bit = 16;
778
779 if (adreno_is_a650(gpu) ||
780 adreno_is_a660(gpu) ||
781 adreno_is_a690(gpu) ||
782 adreno_is_a730(gpu) ||
783 adreno_is_a740_family(gpu)) {
784 /* TODO: get ddr type from bootloader and use 15 for LPDDR4 */
785 cfg->highest_bank_bit = 16;
786 }
787
788 if (adreno_is_a663(gpu)) {
789 cfg->highest_bank_bit = 13;
790 cfg->ubwc_swizzle = 0x4;
791 }
792
793 if (adreno_is_7c3(gpu))
794 cfg->highest_bank_bit = 14;
795
796 if (adreno_is_a702(gpu))
797 cfg->highest_bank_bit = 14;
798
799 if (cfg->highest_bank_bit != common_cfg->highest_bank_bit)
800 DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n",
801 cfg->highest_bank_bit, common_cfg->highest_bank_bit);
802
803 if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle)
804 DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n",
805 cfg->ubwc_swizzle, common_cfg->ubwc_swizzle);
806
807 gpu->ubwc_config = &gpu->_ubwc_config;
808
809 return 0;
810 }
811
a6xx_set_ubwc_config(struct msm_gpu * gpu)812 static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
813 {
814 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
815 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
816 /*
817 * We subtract 13 from the highest bank bit (13 is the minimum value
818 * allowed by hw) and write the lowest two bits of the remaining value
819 * as hbb_lo and the one above it as hbb_hi to the hardware.
820 */
821 BUG_ON(cfg->highest_bank_bit < 13);
822 u32 hbb = cfg->highest_bank_bit - 13;
823 bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0;
824 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2);
825 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
826 bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0;
827 bool min_acc_len_64b = false;
828 u8 uavflagprd_inv = 0;
829 u32 hbb_hi = hbb >> 2;
830 u32 hbb_lo = hbb & 3;
831
832 if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu))
833 uavflagprd_inv = 2;
834
835 if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
836 min_acc_len_64b = true;
837
838 gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
839 level2_swizzling_dis << 12 |
840 rgb565_predicator << 11 |
841 hbb_hi << 10 | amsbc << 4 |
842 min_acc_len_64b << 3 |
843 hbb_lo << 1 | ubwc_mode);
844
845 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
846 level2_swizzling_dis << 6 | hbb_hi << 4 |
847 min_acc_len_64b << 3 |
848 hbb_lo << 1 | ubwc_mode);
849
850 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
851 level2_swizzling_dis << 12 | hbb_hi << 10 |
852 uavflagprd_inv << 4 |
853 min_acc_len_64b << 3 |
854 hbb_lo << 1 | ubwc_mode);
855
856 if (adreno_is_a7xx(adreno_gpu)) {
857 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) {
858 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
859 A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id));
860 gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL,
861 FIELD_PREP(GENMASK(8, 5), hbb_lo));
862 }
863 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
864 A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE));
865 }
866
867 gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL,
868 min_acc_len_64b << 23 | hbb_lo << 21);
869
870 gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL,
871 cfg->macrotile_mode);
872 }
873
a7xx_patch_pwrup_reglist(struct msm_gpu * gpu)874 static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
875 {
876 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
877 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
878 const struct adreno_reglist_list *reglist;
879 const struct adreno_reglist_pipe_list *dyn_pwrup_reglist;
880 void *ptr = a6xx_gpu->pwrup_reglist_ptr;
881 struct cpu_gpu_lock *lock = ptr;
882 u32 *dest = (u32 *)&lock->regs[0];
883 u32 dyn_pwrup_reglist_count = 0;
884 int i;
885
886 lock->gpu_req = lock->cpu_req = lock->turn = 0;
887
888 reglist = adreno_gpu->info->a6xx->ifpc_reglist;
889 if (reglist) {
890 lock->ifpc_list_len = reglist->count;
891
892 /*
893 * For each entry in each of the lists, write the offset and the current
894 * register value into the GPU buffer
895 */
896 for (i = 0; i < reglist->count; i++) {
897 *dest++ = reglist->regs[i];
898 *dest++ = gpu_read(gpu, reglist->regs[i]);
899 }
900 }
901
902 reglist = adreno_gpu->info->a6xx->pwrup_reglist;
903 lock->preemption_list_len = reglist->count;
904
905 for (i = 0; i < reglist->count; i++) {
906 *dest++ = reglist->regs[i];
907 *dest++ = gpu_read(gpu, reglist->regs[i]);
908 }
909
910 /*
911 * The overall register list is composed of
912 * 1. Static IFPC-only registers
913 * 2. Static IFPC + preemption registers
914 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
915 *
916 * The first two lists are static. Size of these lists are stored as
917 * number of pairs in ifpc_list_len and preemption_list_len
918 * respectively. With concurrent binning, Some of the perfcounter
919 * registers being virtualized, CP needs to know the pipe id to program
920 * the aperture inorder to restore the same. Thus, third list is a
921 * dynamic list with triplets as
922 * (<aperture, shifted 12 bits> <address> <data>), and the length is
923 * stored as number for triplets in dynamic_list_len.
924 */
925 dyn_pwrup_reglist = adreno_gpu->info->a6xx->dyn_pwrup_reglist;
926 if (dyn_pwrup_reglist) {
927 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) {
928 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
929 A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id));
930 for (i = 0; i < dyn_pwrup_reglist->count; i++) {
931 if ((dyn_pwrup_reglist->regs[i].pipe & BIT(pipe_id)) == 0)
932 continue;
933 *dest++ = A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id);
934 *dest++ = dyn_pwrup_reglist->regs[i].offset;
935 *dest++ = gpu_read(gpu, dyn_pwrup_reglist->regs[i].offset);
936 dyn_pwrup_reglist_count++;
937 }
938 }
939 gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
940 A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE));
941 }
942 lock->dynamic_list_len = dyn_pwrup_reglist_count;
943 }
944
a7xx_preempt_start(struct msm_gpu * gpu)945 static int a7xx_preempt_start(struct msm_gpu *gpu)
946 {
947 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
948 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
949 struct msm_ringbuffer *ring = gpu->rb[0];
950
951 if (gpu->nr_rings <= 1)
952 return 0;
953
954 /* Turn CP protection off */
955 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
956 OUT_RING(ring, 0);
957
958 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL);
959
960 /* Yield the floor on command completion */
961 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
962 OUT_RING(ring, 0x00);
963 OUT_RING(ring, 0x00);
964 OUT_RING(ring, 0x00);
965 /* Generate interrupt on preemption completion */
966 OUT_RING(ring, 0x00);
967
968 a6xx_flush(gpu, ring);
969
970 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
971 }
972
a6xx_cp_init(struct msm_gpu * gpu)973 static int a6xx_cp_init(struct msm_gpu *gpu)
974 {
975 struct msm_ringbuffer *ring = gpu->rb[0];
976
977 OUT_PKT7(ring, CP_ME_INIT, 8);
978
979 OUT_RING(ring, 0x0000002f);
980
981 /* Enable multiple hardware contexts */
982 OUT_RING(ring, 0x00000003);
983
984 /* Enable error detection */
985 OUT_RING(ring, 0x20000000);
986
987 /* Don't enable header dump */
988 OUT_RING(ring, 0x00000000);
989 OUT_RING(ring, 0x00000000);
990
991 /* No workarounds enabled */
992 OUT_RING(ring, 0x00000000);
993
994 /* Pad rest of the cmds with 0's */
995 OUT_RING(ring, 0x00000000);
996 OUT_RING(ring, 0x00000000);
997
998 a6xx_flush(gpu, ring);
999 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
1000 }
1001
a7xx_cp_init(struct msm_gpu * gpu)1002 static int a7xx_cp_init(struct msm_gpu *gpu)
1003 {
1004 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1005 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1006 struct msm_ringbuffer *ring = gpu->rb[0];
1007 u32 mask;
1008
1009 /* Disable concurrent binning before sending CP init */
1010 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
1011 OUT_RING(ring, BIT(27));
1012
1013 OUT_PKT7(ring, CP_ME_INIT, 7);
1014
1015 /* Use multiple HW contexts */
1016 mask = BIT(0);
1017
1018 /* Enable error detection */
1019 mask |= BIT(1);
1020
1021 /* Set default reset state */
1022 mask |= BIT(3);
1023
1024 /* Disable save/restore of performance counters across preemption */
1025 mask |= BIT(6);
1026
1027 /* Enable the register init list with the spinlock */
1028 mask |= BIT(8);
1029
1030 OUT_RING(ring, mask);
1031
1032 /* Enable multiple hardware contexts */
1033 OUT_RING(ring, 0x00000003);
1034
1035 /* Enable error detection */
1036 OUT_RING(ring, 0x20000000);
1037
1038 /* Operation mode mask */
1039 OUT_RING(ring, 0x00000002);
1040
1041 /* *Don't* send a power up reg list for concurrent binning (TODO) */
1042 /* Lo address */
1043 OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova));
1044 /* Hi address */
1045 OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova));
1046 /* BIT(31) set => read the regs from the list */
1047 OUT_RING(ring, BIT(31));
1048
1049 a6xx_flush(gpu, ring);
1050 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
1051 }
1052
1053 /*
1054 * Check that the microcode version is new enough to include several key
1055 * security fixes. Return true if the ucode is safe.
1056 */
a6xx_ucode_check_version(struct a6xx_gpu * a6xx_gpu,struct drm_gem_object * obj)1057 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
1058 struct drm_gem_object *obj)
1059 {
1060 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1061 struct msm_gpu *gpu = &adreno_gpu->base;
1062 const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
1063 u32 *buf = msm_gem_get_vaddr(obj);
1064 bool ret = false;
1065
1066 if (IS_ERR(buf))
1067 return false;
1068
1069 /* A7xx is safe! */
1070 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu))
1071 return true;
1072
1073 /*
1074 * Targets up to a640 (a618, a630 and a640) need to check for a
1075 * microcode version that is patched to support the whereami opcode or
1076 * one that is new enough to include it by default.
1077 *
1078 * a650 tier targets don't need whereami but still need to be
1079 * equal to or newer than 0.95 for other security fixes
1080 *
1081 * a660 targets have all the critical security fixes from the start
1082 */
1083 if (!strcmp(sqe_name, "a630_sqe.fw")) {
1084 /*
1085 * If the lowest nibble is 0xa that is an indication that this
1086 * microcode has been patched. The actual version is in dword
1087 * [3] but we only care about the patchlevel which is the lowest
1088 * nibble of dword [3]
1089 *
1090 * Otherwise check that the firmware is greater than or equal
1091 * to 1.90 which was the first version that had this fix built
1092 * in
1093 */
1094 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
1095 (buf[0] & 0xfff) >= 0x190) {
1096 a6xx_gpu->has_whereami = true;
1097 ret = true;
1098 goto out;
1099 }
1100
1101 DRM_DEV_ERROR(&gpu->pdev->dev,
1102 "a630 SQE ucode is too old. Have version %x need at least %x\n",
1103 buf[0] & 0xfff, 0x190);
1104 } else if (!strcmp(sqe_name, "a650_sqe.fw")) {
1105 if ((buf[0] & 0xfff) >= 0x095) {
1106 ret = true;
1107 goto out;
1108 }
1109
1110 DRM_DEV_ERROR(&gpu->pdev->dev,
1111 "a650 SQE ucode is too old. Have version %x need at least %x\n",
1112 buf[0] & 0xfff, 0x095);
1113 } else if (!strcmp(sqe_name, "a660_sqe.fw")) {
1114 ret = true;
1115 } else {
1116 DRM_DEV_ERROR(&gpu->pdev->dev,
1117 "unknown GPU, add it to a6xx_ucode_check_version()!!\n");
1118 }
1119 out:
1120 msm_gem_put_vaddr(obj);
1121 return ret;
1122 }
1123
a6xx_ucode_load(struct msm_gpu * gpu)1124 static int a6xx_ucode_load(struct msm_gpu *gpu)
1125 {
1126 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1127 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1128
1129 if (!a6xx_gpu->sqe_bo) {
1130 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
1131 adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
1132
1133 if (IS_ERR(a6xx_gpu->sqe_bo)) {
1134 int ret = PTR_ERR(a6xx_gpu->sqe_bo);
1135
1136 a6xx_gpu->sqe_bo = NULL;
1137 DRM_DEV_ERROR(&gpu->pdev->dev,
1138 "Could not allocate SQE ucode: %d\n", ret);
1139
1140 return ret;
1141 }
1142
1143 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
1144 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
1145 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
1146 drm_gem_object_put(a6xx_gpu->sqe_bo);
1147
1148 a6xx_gpu->sqe_bo = NULL;
1149 return -EPERM;
1150 }
1151 }
1152
1153 if (!a6xx_gpu->aqe_bo && adreno_gpu->fw[ADRENO_FW_AQE]) {
1154 a6xx_gpu->aqe_bo = adreno_fw_create_bo(gpu,
1155 adreno_gpu->fw[ADRENO_FW_AQE], &a6xx_gpu->aqe_iova);
1156
1157 if (IS_ERR(a6xx_gpu->aqe_bo)) {
1158 int ret = PTR_ERR(a6xx_gpu->aqe_bo);
1159
1160 a6xx_gpu->aqe_bo = NULL;
1161 DRM_DEV_ERROR(&gpu->pdev->dev,
1162 "Could not allocate AQE ucode: %d\n", ret);
1163
1164 return ret;
1165 }
1166
1167 msm_gem_object_set_name(a6xx_gpu->aqe_bo, "aqefw");
1168 }
1169
1170 /*
1171 * Expanded APRIV and targets that support WHERE_AM_I both need a
1172 * privileged buffer to store the RPTR shadow
1173 */
1174 if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) &&
1175 !a6xx_gpu->shadow_bo) {
1176 a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
1177 sizeof(u32) * gpu->nr_rings,
1178 MSM_BO_WC | MSM_BO_MAP_PRIV,
1179 gpu->vm, &a6xx_gpu->shadow_bo,
1180 &a6xx_gpu->shadow_iova);
1181
1182 if (IS_ERR(a6xx_gpu->shadow))
1183 return PTR_ERR(a6xx_gpu->shadow);
1184
1185 msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
1186 }
1187
1188 a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE,
1189 MSM_BO_WC | MSM_BO_MAP_PRIV,
1190 gpu->vm, &a6xx_gpu->pwrup_reglist_bo,
1191 &a6xx_gpu->pwrup_reglist_iova);
1192
1193 if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr))
1194 return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr);
1195
1196 msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist");
1197
1198 return 0;
1199 }
1200
a6xx_zap_shader_init(struct msm_gpu * gpu)1201 int a6xx_zap_shader_init(struct msm_gpu *gpu)
1202 {
1203 static bool loaded;
1204 int ret;
1205
1206 if (loaded)
1207 return 0;
1208
1209 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
1210
1211 loaded = !ret;
1212 return ret;
1213 }
1214
1215 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1216 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1217 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1218 A6XX_RBBM_INT_0_MASK_CP_IB2 | \
1219 A6XX_RBBM_INT_0_MASK_CP_IB1 | \
1220 A6XX_RBBM_INT_0_MASK_CP_RB | \
1221 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1222 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1223 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1224 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1225 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
1226
1227 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1228 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1229 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
1230 A6XX_RBBM_INT_0_MASK_CP_SW | \
1231 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1232 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
1233 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
1234 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1235 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1236 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1237 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1238 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
1239 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
1240 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1241
1242 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \
1243 A6XX_CP_APRIV_CNTL_RBFETCH | \
1244 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \
1245 A6XX_CP_APRIV_CNTL_RBRPWB)
1246
1247 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \
1248 A6XX_CP_APRIV_CNTL_CDREAD | \
1249 A6XX_CP_APRIV_CNTL_CDWRITE)
1250
hw_init(struct msm_gpu * gpu)1251 static int hw_init(struct msm_gpu *gpu)
1252 {
1253 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1254 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1255 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1256 u64 gmem_range_min;
1257 unsigned int i;
1258 int ret;
1259
1260 if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1261 /* Make sure the GMU keeps the GPU on while we set it up */
1262 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1263 if (ret)
1264 return ret;
1265 }
1266
1267 /* Clear GBIF halt in case GX domain was not collapsed */
1268 if (adreno_is_a619_holi(adreno_gpu)) {
1269 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1270 gpu_read(gpu, REG_A6XX_GBIF_HALT);
1271
1272 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0);
1273 gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL);
1274 } else if (a6xx_has_gbif(adreno_gpu)) {
1275 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1276 gpu_read(gpu, REG_A6XX_GBIF_HALT);
1277
1278 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
1279 gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT);
1280 }
1281
1282 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
1283
1284 if (adreno_is_a619_holi(adreno_gpu))
1285 a6xx_sptprac_enable(gmu);
1286
1287 /*
1288 * Disable the trusted memory range - we don't actually supported secure
1289 * memory rendering at this point in time and we don't want to block off
1290 * part of the virtual memory space.
1291 */
1292 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
1293 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
1294
1295 if (!adreno_is_a7xx(adreno_gpu)) {
1296 /* Turn on 64 bit addressing for all blocks */
1297 gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
1298 gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
1299 gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
1300 gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
1301 gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
1302 gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
1303 gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
1304 gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
1305 gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
1306 gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
1307 gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
1308 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
1309 }
1310
1311 /* enable hardware clockgating */
1312 a6xx_set_hwcg(gpu, true);
1313
1314 /* For gmuwrapper implementations, do the VBIF/GBIF CX configuration here */
1315 if (adreno_is_a610_family(adreno_gpu)) {
1316 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
1317 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
1318 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
1319 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1320 }
1321
1322 if (adreno_is_a610_family(adreno_gpu) ||
1323 adreno_is_a640_family(adreno_gpu) ||
1324 adreno_is_a650_family(adreno_gpu)) {
1325 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
1326 } else if (adreno_is_a7xx(adreno_gpu)) {
1327 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212);
1328 } else {
1329 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
1330 }
1331
1332 if (adreno_is_a630(adreno_gpu))
1333 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
1334
1335 if (adreno_is_a7xx(adreno_gpu))
1336 gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0);
1337
1338 /* Make all blocks contribute to the GPU BUSY perf counter */
1339 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
1340
1341 /* Disable L2 bypass in the UCHE */
1342 if (adreno_is_a7xx(adreno_gpu)) {
1343 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
1344 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
1345 } else {
1346 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0);
1347 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
1348 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
1349 }
1350
1351 if (!(adreno_is_a650_family(adreno_gpu) ||
1352 adreno_is_a702(adreno_gpu) ||
1353 adreno_is_a730(adreno_gpu))) {
1354 gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M;
1355
1356 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1357 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min);
1358
1359 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX,
1360 gmem_range_min + adreno_gpu->info->gmem - 1);
1361 }
1362
1363 if (adreno_is_a7xx(adreno_gpu))
1364 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23));
1365 else {
1366 gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
1367 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
1368 }
1369
1370 if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) {
1371 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
1372 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1373 } else if (adreno_is_a610_family(adreno_gpu)) {
1374 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060);
1375 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16);
1376 } else if (!adreno_is_a7xx(adreno_gpu)) {
1377 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
1378 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1379 }
1380
1381 if (adreno_is_a660_family(adreno_gpu))
1382 gpu_write(gpu, REG_A7XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1383
1384 /* Setting the mem pool size */
1385 if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) {
1386 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48);
1387 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47);
1388 } else if (adreno_is_a702(adreno_gpu)) {
1389 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64);
1390 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63);
1391 } else if (!adreno_is_a7xx(adreno_gpu))
1392 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
1393
1394
1395 /* Set the default primFifo threshold values */
1396 if (adreno_gpu->info->a6xx->prim_fifo_threshold)
1397 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL,
1398 adreno_gpu->info->a6xx->prim_fifo_threshold);
1399
1400 /* Set the AHB default slave response to "ERROR" */
1401 gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1402
1403 /* Turn on performance counters */
1404 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1405
1406 if (adreno_is_a7xx(adreno_gpu)) {
1407 /* Turn on the IFPC counter (countable 4 on XOCLK4) */
1408 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1,
1409 FIELD_PREP(GENMASK(7, 0), 0x4));
1410 }
1411
1412 /* Select CP0 to always count cycles */
1413 gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1414
1415 a6xx_set_ubwc_config(gpu);
1416
1417 /* Enable fault detection */
1418 if (adreno_is_a612(adreno_gpu) ||
1419 adreno_is_a730(adreno_gpu) ||
1420 adreno_is_a740_family(adreno_gpu))
1421 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff);
1422 else if (adreno_is_a690(adreno_gpu))
1423 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff);
1424 else if (adreno_is_a619(adreno_gpu))
1425 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff);
1426 else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
1427 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff);
1428 else
1429 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff);
1430
1431 gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
1432
1433 /* Set weights for bicubic filtering */
1434 if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) {
1435 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
1436 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1),
1437 0x3fe05ff4);
1438 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2),
1439 0x3fa0ebee);
1440 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3),
1441 0x3f5193ed);
1442 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4),
1443 0x3f0243f0);
1444 }
1445
1446 /* Set up the CX GMU counter 0 to count busy ticks */
1447 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
1448
1449 /* Enable the power counter */
1450 gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5));
1451 gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
1452
1453 /* Protect registers from the CP */
1454 a6xx_set_cp_protect(gpu);
1455
1456 if (adreno_is_a660_family(adreno_gpu)) {
1457 if (adreno_is_a690(adreno_gpu))
1458 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801);
1459 else
1460 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1461 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1462 } else if (adreno_is_a702(adreno_gpu)) {
1463 /* Something to do with the HLSQ cluster */
1464 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24));
1465 }
1466
1467 if (adreno_is_a690(adreno_gpu))
1468 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90);
1469 /* Set dualQ + disable afull for A660 GPU */
1470 else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu))
1471 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1472 else if (adreno_is_a7xx(adreno_gpu))
1473 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG,
1474 FIELD_PREP(GENMASK(19, 16), 6) |
1475 FIELD_PREP(GENMASK(15, 12), 6) |
1476 FIELD_PREP(GENMASK(11, 8), 9) |
1477 BIT(3) | BIT(2) |
1478 FIELD_PREP(GENMASK(1, 0), 2));
1479
1480 /* Enable expanded apriv for targets that support it */
1481 if (gpu->hw_apriv) {
1482 if (adreno_is_a7xx(adreno_gpu)) {
1483 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1484 A7XX_BR_APRIVMASK);
1485 gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL,
1486 A7XX_APRIV_MASK);
1487 gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL,
1488 A7XX_APRIV_MASK);
1489 } else
1490 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1491 BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1));
1492 }
1493
1494 if (adreno_is_a750(adreno_gpu)) {
1495 /* Disable ubwc merged UFC request feature */
1496 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19));
1497
1498 /* Enable TP flaghint and other performance settings */
1499 gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700);
1500 } else if (adreno_is_a7xx(adreno_gpu)) {
1501 /* Disable non-ubwc read reqs from passing write reqs */
1502 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11));
1503 }
1504
1505 /* Enable interrupts */
1506 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK,
1507 adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK);
1508
1509 ret = adreno_hw_init(gpu);
1510 if (ret)
1511 goto out;
1512
1513 gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
1514
1515 /* Set the ringbuffer address */
1516 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1517
1518 /* Targets that support extended APRIV can use the RPTR shadow from
1519 * hardware but all the other ones need to disable the feature. Targets
1520 * that support the WHERE_AM_I opcode can use that instead
1521 */
1522 if (adreno_gpu->base.hw_apriv)
1523 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1524 else
1525 gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1526 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1527
1528 /* Configure the RPTR shadow if needed: */
1529 if (a6xx_gpu->shadow_bo) {
1530 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
1531 shadowptr(a6xx_gpu, gpu->rb[0]));
1532 for (unsigned int i = 0; i < gpu->nr_rings; i++)
1533 a6xx_gpu->shadow[i] = 0;
1534 }
1535
1536 /* ..which means "always" on A7xx, also for BV shadow */
1537 if (adreno_is_a7xx(adreno_gpu)) {
1538 gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR,
1539 rbmemptr(gpu->rb[0], bv_rptr));
1540 }
1541
1542 a6xx_preempt_hw_init(gpu);
1543
1544 /* Always come up on rb 0 */
1545 a6xx_gpu->cur_ring = gpu->rb[0];
1546
1547 for (i = 0; i < gpu->nr_rings; i++)
1548 gpu->rb[i]->cur_ctx_seqno = 0;
1549
1550 /* Enable the SQE_to start the CP engine */
1551 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1552
1553 if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) {
1554 a7xx_patch_pwrup_reglist(gpu);
1555 a6xx_gpu->pwrup_reglist_emitted = true;
1556 }
1557
1558 ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu);
1559 if (ret)
1560 goto out;
1561
1562 /*
1563 * Try to load a zap shader into the secure world. If successful
1564 * we can use the CP to switch out of secure mode. If not then we
1565 * have no resource but to try to switch ourselves out manually. If we
1566 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1567 * be blocked and a permissions violation will soon follow.
1568 */
1569 ret = a6xx_zap_shader_init(gpu);
1570 if (!ret) {
1571 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1572 OUT_RING(gpu->rb[0], 0x00000000);
1573
1574 a6xx_flush(gpu, gpu->rb[0]);
1575 if (!a6xx_idle(gpu, gpu->rb[0]))
1576 return -EINVAL;
1577 } else if (ret == -ENODEV) {
1578 /*
1579 * This device does not use zap shader (but print a warning
1580 * just in case someone got their dt wrong.. hopefully they
1581 * have a debug UART to realize the error of their ways...
1582 * if you mess this up you are about to crash horribly)
1583 */
1584 dev_warn_once(gpu->dev->dev,
1585 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1586 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1587 ret = 0;
1588 } else {
1589 return ret;
1590 }
1591
1592 out:
1593 if (adreno_has_gmu_wrapper(adreno_gpu))
1594 return ret;
1595
1596 /* Last step - yield the ringbuffer */
1597 a7xx_preempt_start(gpu);
1598
1599 /*
1600 * Tell the GMU that we are done touching the GPU and it can start power
1601 * management
1602 */
1603 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1604
1605 if (a6xx_gpu->gmu.legacy) {
1606 /* Take the GMU out of its special boot mode */
1607 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1608 }
1609
1610 if (!ret && (refcount_read(&gpu->sysprof_active) > 1)) {
1611 ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
1612 if (!ret)
1613 set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status);
1614 }
1615
1616 return ret;
1617 }
1618
a6xx_hw_init(struct msm_gpu * gpu)1619 static int a6xx_hw_init(struct msm_gpu *gpu)
1620 {
1621 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1622 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1623 int ret;
1624
1625 mutex_lock(&a6xx_gpu->gmu.lock);
1626 ret = hw_init(gpu);
1627 mutex_unlock(&a6xx_gpu->gmu.lock);
1628
1629 return ret;
1630 }
1631
a6xx_dump(struct msm_gpu * gpu)1632 static void a6xx_dump(struct msm_gpu *gpu)
1633 {
1634 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n",
1635 gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1636 adreno_dump(gpu);
1637 }
1638
a6xx_recover(struct msm_gpu * gpu)1639 static void a6xx_recover(struct msm_gpu *gpu)
1640 {
1641 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1642 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1643 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1644 int active_submits;
1645
1646 adreno_dump_info(gpu);
1647
1648 if (adreno_gpu->funcs->gx_is_on(adreno_gpu)) {
1649 /* Sometimes crashstate capture is skipped, so SQE should be halted here again */
1650 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1651
1652 if (hang_debug)
1653 a6xx_dump(gpu);
1654
1655 }
1656
1657 /*
1658 * To handle recovery specific sequences during the rpm suspend we are
1659 * about to trigger
1660 */
1661
1662 a6xx_gpu->hung = true;
1663
1664 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1665
1666 /* active_submit won't change until we make a submission */
1667 mutex_lock(&gpu->active_lock);
1668 active_submits = gpu->active_submits;
1669
1670 /*
1671 * Temporarily clear active_submits count to silence a WARN() in the
1672 * runtime suspend cb
1673 */
1674 gpu->active_submits = 0;
1675
1676 if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) {
1677 /* Drain the outstanding traffic on memory buses */
1678 adreno_gpu->funcs->bus_halt(adreno_gpu, true);
1679
1680 /* Reset the GPU to a clean state */
1681 a6xx_gpu_sw_reset(gpu, true);
1682 a6xx_gpu_sw_reset(gpu, false);
1683 }
1684
1685 reinit_completion(&gmu->pd_gate);
1686 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
1687 dev_pm_genpd_synced_poweroff(gmu->cxpd);
1688
1689 /* Drop the rpm refcount from active submits */
1690 if (active_submits)
1691 pm_runtime_put(&gpu->pdev->dev);
1692
1693 /* And the final one from recover worker */
1694 pm_runtime_put_sync(&gpu->pdev->dev);
1695
1696 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
1697 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
1698
1699 dev_pm_genpd_remove_notifier(gmu->cxpd);
1700
1701 pm_runtime_use_autosuspend(&gpu->pdev->dev);
1702
1703 if (active_submits)
1704 pm_runtime_get(&gpu->pdev->dev);
1705
1706 pm_runtime_get_sync(&gpu->pdev->dev);
1707
1708 gpu->active_submits = active_submits;
1709 mutex_unlock(&gpu->active_lock);
1710
1711 msm_gpu_hw_init(gpu);
1712 a6xx_gpu->hung = false;
1713 }
1714
a6xx_uche_fault_block(struct msm_gpu * gpu,u32 mid)1715 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1716 {
1717 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1718 static const char *uche_clients[7] = {
1719 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1720 };
1721 u32 val;
1722
1723 if (adreno_is_a7xx(adreno_gpu)) {
1724 if (mid != 1 && mid != 2 && mid != 3 && mid != 8)
1725 return "UNKNOWN";
1726 } else {
1727 if (mid < 1 || mid > 3)
1728 return "UNKNOWN";
1729 }
1730
1731 /*
1732 * The source of the data depends on the mid ID read from FSYNR1.
1733 * and the client ID read from the UCHE block
1734 */
1735 val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1736
1737 if (adreno_is_a7xx(adreno_gpu)) {
1738 /* Bit 3 for mid=3 indicates BR or BV */
1739 static const char *uche_clients_a7xx[16] = {
1740 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC",
1741 "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
1742 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC",
1743 "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
1744 };
1745
1746 /* LPAC has the same clients as BR and BV, but because it is
1747 * compute-only some of them do not exist and there are holes
1748 * in the array.
1749 */
1750 static const char *uche_clients_lpac_a7xx[8] = {
1751 "-", "LPAC_SP", "-", "-",
1752 "LPAC_HLSQ", "-", "-", "LPAC_TP",
1753 };
1754
1755 val &= GENMASK(6, 0);
1756
1757 /* mid=3 refers to BR or BV */
1758 if (mid == 3) {
1759 if (val < ARRAY_SIZE(uche_clients_a7xx))
1760 return uche_clients_a7xx[val];
1761 else
1762 return "UCHE";
1763 }
1764
1765 /* mid=8 refers to LPAC */
1766 if (mid == 8) {
1767 if (val < ARRAY_SIZE(uche_clients_lpac_a7xx))
1768 return uche_clients_lpac_a7xx[val];
1769 else
1770 return "UCHE_LPAC";
1771 }
1772
1773 /* mid=2 is a catchall for everything else in LPAC */
1774 if (mid == 2)
1775 return "UCHE_LPAC";
1776
1777 /* mid=1 is a catchall for everything else in BR/BV */
1778 return "UCHE";
1779 } else if (adreno_is_a660_family(adreno_gpu)) {
1780 static const char *uche_clients_a660[8] = {
1781 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP",
1782 };
1783
1784 static const char *uche_clients_a660_not[8] = {
1785 "not VFD", "not SP", "not VSC", "not VPC",
1786 "not HLSQ", "not PC", "not LRZ", "not TP",
1787 };
1788
1789 val &= GENMASK(6, 0);
1790
1791 if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660))
1792 return uche_clients_a660[val];
1793
1794 if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not))
1795 return uche_clients_a660_not[val];
1796
1797 return "UCHE";
1798 } else {
1799 /* mid = 3 is most precise and refers to only one block per client */
1800 if (mid == 3)
1801 return uche_clients[val & 7];
1802
1803 /* For mid=2 the source is TP or VFD except when the client id is 0 */
1804 if (mid == 2)
1805 return ((val & 7) == 0) ? "TP" : "TP|VFD";
1806
1807 /* For mid=1 just return "UCHE" as a catchall for everything else */
1808 return "UCHE";
1809 }
1810 }
1811
a6xx_fault_block(struct msm_gpu * gpu,u32 id)1812 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1813 {
1814 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1815
1816 if (id == 0)
1817 return "CP";
1818 else if (id == 4)
1819 return "CCU";
1820 else if (id == 6)
1821 return "CDP Prefetch";
1822 else if (id == 7)
1823 return "GMU";
1824 else if (id == 5 && adreno_is_a7xx(adreno_gpu))
1825 return "Flag cache";
1826
1827 return a6xx_uche_fault_block(gpu, id);
1828 }
1829
a6xx_fault_handler(void * arg,unsigned long iova,int flags,void * data)1830 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1831 {
1832 struct msm_gpu *gpu = arg;
1833 struct adreno_smmu_fault_info *info = data;
1834 const char *block = "unknown";
1835
1836 u32 scratch[] = {
1837 gpu_read(gpu, REG_A6XX_CP_SCRATCH(4)),
1838 gpu_read(gpu, REG_A6XX_CP_SCRATCH(5)),
1839 gpu_read(gpu, REG_A6XX_CP_SCRATCH(6)),
1840 gpu_read(gpu, REG_A6XX_CP_SCRATCH(7)),
1841 };
1842
1843 if (info)
1844 block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1845
1846 return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1847 }
1848
a6xx_cp_hw_err_irq(struct msm_gpu * gpu)1849 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1850 {
1851 u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1852
1853 if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1854 u32 val;
1855
1856 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1857 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1858 dev_err_ratelimited(&gpu->pdev->dev,
1859 "CP | opcode error | possible opcode=0x%8.8X\n",
1860 val);
1861 }
1862
1863 if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1864 dev_err_ratelimited(&gpu->pdev->dev,
1865 "CP ucode error interrupt\n");
1866
1867 if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1868 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1869 gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1870
1871 if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1872 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1873
1874 dev_err_ratelimited(&gpu->pdev->dev,
1875 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1876 val & (1 << 20) ? "READ" : "WRITE",
1877 (val & 0x3ffff), val);
1878 }
1879
1880 if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu)))
1881 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1882
1883 if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1884 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1885
1886 if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1887 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1888
1889 }
1890
a6xx_fault_detect_irq(struct msm_gpu * gpu)1891 static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1892 {
1893 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1894
1895 /*
1896 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1897 * but the fault handler will trigger the devcore dump, and we want
1898 * to otherwise resume normally rather than killing the submit, so
1899 * just bail.
1900 */
1901 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1902 return;
1903
1904 DRM_DEV_ERROR(&gpu->pdev->dev,
1905 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1906 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1907 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1908 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1909 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1910 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1911 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1912 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1913 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1914
1915 /* Turn off the hangcheck timer to keep it from bothering us */
1916 timer_delete(&gpu->hangcheck_timer);
1917
1918 /* Turn off interrupts to avoid triggering recovery again */
1919 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0);
1920
1921 kthread_queue_work(gpu->worker, &gpu->recover_work);
1922 }
1923
a7xx_sw_fuse_violation_irq(struct msm_gpu * gpu)1924 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1925 {
1926 u32 status;
1927
1928 status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS);
1929 gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0);
1930
1931 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1932
1933 /*
1934 * Ignore FASTBLEND violations, because the HW will silently fall back
1935 * to legacy blending.
1936 */
1937 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1938 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1939 timer_delete(&gpu->hangcheck_timer);
1940
1941 kthread_queue_work(gpu->worker, &gpu->recover_work);
1942 }
1943 }
1944
a6xx_gpu_keepalive_vote(struct msm_gpu * gpu,bool on)1945 static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on)
1946 {
1947 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1948 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1949
1950 if (adreno_has_gmu_wrapper(adreno_gpu))
1951 return;
1952
1953 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on);
1954 }
1955
irq_poll_fence(struct msm_gpu * gpu)1956 static int irq_poll_fence(struct msm_gpu *gpu)
1957 {
1958 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1959 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1960 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1961 u32 status;
1962
1963 if (adreno_has_gmu_wrapper(adreno_gpu))
1964 return 0;
1965
1966 if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) {
1967 u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS);
1968
1969 dev_err_ratelimited(&gpu->pdev->dev,
1970 "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n",
1971 status, rbbm_unmasked);
1972 return -ETIMEDOUT;
1973 }
1974
1975 return 0;
1976 }
1977
a6xx_irq(struct msm_gpu * gpu)1978 static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1979 {
1980 struct msm_drm_private *priv = gpu->dev->dev_private;
1981
1982 /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */
1983 a6xx_gpu_keepalive_vote(gpu, true);
1984
1985 if (irq_poll_fence(gpu))
1986 goto done;
1987
1988 u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1989
1990 gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1991
1992 if (priv->disable_err_irq)
1993 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1994
1995 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1996 a6xx_fault_detect_irq(gpu);
1997
1998 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1999 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
2000
2001 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
2002 a6xx_cp_hw_err_irq(gpu);
2003
2004 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
2005 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
2006
2007 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
2008 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
2009
2010 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
2011 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
2012
2013 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
2014 a7xx_sw_fuse_violation_irq(gpu);
2015
2016 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
2017 msm_gpu_retire(gpu);
2018 a6xx_preempt_trigger(gpu);
2019 }
2020
2021 if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
2022 a6xx_preempt_irq(gpu);
2023
2024 done:
2025 a6xx_gpu_keepalive_vote(gpu, false);
2026
2027 return IRQ_HANDLED;
2028 }
2029
a6xx_llc_deactivate(struct a6xx_gpu * a6xx_gpu)2030 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
2031 {
2032 llcc_slice_deactivate(a6xx_gpu->llc_slice);
2033 llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
2034 }
2035
a6xx_llc_activate(struct a6xx_gpu * a6xx_gpu)2036 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
2037 {
2038 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
2039 struct msm_gpu *gpu = &adreno_gpu->base;
2040 u32 cntl1_regval = 0;
2041
2042 if (IS_ERR(a6xx_gpu->llc_mmio))
2043 return;
2044
2045 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
2046 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
2047
2048 gpu_scid &= 0x1f;
2049 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
2050 (gpu_scid << 15) | (gpu_scid << 20);
2051
2052 /* On A660, the SCID programming for UCHE traffic is done in
2053 * A6XX_GBIF_SCACHE_CNTL0[14:10]
2054 */
2055 if (adreno_is_a660_family(adreno_gpu))
2056 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
2057 (1 << 8), (gpu_scid << 10) | (1 << 8));
2058 }
2059
2060 /*
2061 * For targets with a MMU500, activate the slice but don't program the
2062 * register. The XBL will take care of that.
2063 */
2064 if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
2065 if (!a6xx_gpu->have_mmu500) {
2066 u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
2067
2068 gpuhtw_scid &= 0x1f;
2069 cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
2070 }
2071 }
2072
2073 if (!cntl1_regval)
2074 return;
2075
2076 /*
2077 * Program the slice IDs for the various GPU blocks and GPU MMU
2078 * pagetables
2079 */
2080 if (!a6xx_gpu->have_mmu500) {
2081 a6xx_llc_write(a6xx_gpu,
2082 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
2083
2084 /*
2085 * Program cacheability overrides to not allocate cache
2086 * lines on a write miss
2087 */
2088 a6xx_llc_rmw(a6xx_gpu,
2089 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
2090 return;
2091 }
2092
2093 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
2094 }
2095
a7xx_llc_activate(struct a6xx_gpu * a6xx_gpu)2096 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
2097 {
2098 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
2099 struct msm_gpu *gpu = &adreno_gpu->base;
2100
2101 if (IS_ERR(a6xx_gpu->llc_mmio))
2102 return;
2103
2104 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
2105 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
2106
2107 gpu_scid &= GENMASK(4, 0);
2108
2109 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
2110 FIELD_PREP(GENMASK(29, 25), gpu_scid) |
2111 FIELD_PREP(GENMASK(24, 20), gpu_scid) |
2112 FIELD_PREP(GENMASK(19, 15), gpu_scid) |
2113 FIELD_PREP(GENMASK(14, 10), gpu_scid) |
2114 FIELD_PREP(GENMASK(9, 5), gpu_scid) |
2115 FIELD_PREP(GENMASK(4, 0), gpu_scid));
2116
2117 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
2118 FIELD_PREP(GENMASK(14, 10), gpu_scid) |
2119 BIT(8));
2120 }
2121
2122 llcc_slice_activate(a6xx_gpu->htw_llc_slice);
2123 }
2124
a6xx_llc_slices_destroy(struct a6xx_gpu * a6xx_gpu)2125 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
2126 {
2127 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
2128 if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
2129 return;
2130
2131 llcc_slice_putd(a6xx_gpu->llc_slice);
2132 llcc_slice_putd(a6xx_gpu->htw_llc_slice);
2133 }
2134
a6xx_llc_slices_init(struct platform_device * pdev,struct a6xx_gpu * a6xx_gpu,bool is_a7xx)2135 static void a6xx_llc_slices_init(struct platform_device *pdev,
2136 struct a6xx_gpu *a6xx_gpu, bool is_a7xx)
2137 {
2138 struct device_node *phandle;
2139
2140 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
2141 if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
2142 return;
2143
2144 /*
2145 * There is a different programming path for A6xx targets with an
2146 * mmu500 attached, so detect if that is the case
2147 */
2148 phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
2149 a6xx_gpu->have_mmu500 = (phandle &&
2150 of_device_is_compatible(phandle, "arm,mmu-500"));
2151 of_node_put(phandle);
2152
2153 if (is_a7xx || !a6xx_gpu->have_mmu500)
2154 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
2155 else
2156 a6xx_gpu->llc_mmio = NULL;
2157
2158 a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
2159 a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
2160
2161 if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
2162 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
2163 }
2164
2165 #define GBIF_CLIENT_HALT_MASK BIT(0)
2166 #define GBIF_ARB_HALT_MASK BIT(1)
2167 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0)
2168 #define VBIF_RESET_ACK_MASK 0xF0
2169 #define GPR0_GBIF_HALT_REQUEST 0x1E0
2170
a6xx_bus_clear_pending_transactions(struct adreno_gpu * adreno_gpu,bool gx_off)2171 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
2172 {
2173 struct msm_gpu *gpu = &adreno_gpu->base;
2174
2175 if (adreno_is_a619_holi(adreno_gpu)) {
2176 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST);
2177 spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) &
2178 (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK);
2179 } else if (!a6xx_has_gbif(adreno_gpu)) {
2180 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK);
2181 spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
2182 (VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK);
2183 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
2184
2185 return;
2186 }
2187
2188 if (gx_off) {
2189 /* Halt the gx side of GBIF */
2190 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
2191 spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
2192 }
2193
2194 /* Halt new client requests on GBIF */
2195 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
2196 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
2197 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
2198
2199 /* Halt all AXI requests on GBIF */
2200 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
2201 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
2202 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
2203
2204 /* The GBIF halt needs to be explicitly cleared */
2205 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
2206 }
2207
a6xx_gpu_sw_reset(struct msm_gpu * gpu,bool assert)2208 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
2209 {
2210 /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
2211 if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu)))
2212 return;
2213
2214 gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
2215 /* Perform a bogus read and add a brief delay to ensure ordering. */
2216 gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD);
2217 udelay(1);
2218
2219 /* The reset line needs to be asserted for at least 100 us */
2220 if (assert)
2221 udelay(100);
2222 }
2223
a6xx_gmu_pm_resume(struct msm_gpu * gpu)2224 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
2225 {
2226 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2227 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2228 int ret;
2229
2230 gpu->needs_hw_init = true;
2231
2232 trace_msm_gpu_resume(0);
2233
2234 mutex_lock(&a6xx_gpu->gmu.lock);
2235 ret = a6xx_gmu_resume(a6xx_gpu);
2236 mutex_unlock(&a6xx_gpu->gmu.lock);
2237 if (ret)
2238 return ret;
2239
2240 msm_devfreq_resume(gpu);
2241
2242 if (adreno_is_a8xx(adreno_gpu))
2243 a8xx_llc_activate(a6xx_gpu);
2244 else if (adreno_is_a7xx(adreno_gpu))
2245 a7xx_llc_activate(a6xx_gpu);
2246 else
2247 a6xx_llc_activate(a6xx_gpu);
2248
2249 return ret;
2250 }
2251
a6xx_pm_resume(struct msm_gpu * gpu)2252 static int a6xx_pm_resume(struct msm_gpu *gpu)
2253 {
2254 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2255 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2256 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2257 unsigned long freq = gpu->fast_rate;
2258 struct dev_pm_opp *opp;
2259 int ret;
2260
2261 gpu->needs_hw_init = true;
2262
2263 trace_msm_gpu_resume(0);
2264
2265 mutex_lock(&a6xx_gpu->gmu.lock);
2266
2267 opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq);
2268 if (IS_ERR(opp)) {
2269 ret = PTR_ERR(opp);
2270 goto err_set_opp;
2271 }
2272 dev_pm_opp_put(opp);
2273
2274 /* Set the core clock and bus bw, having VDD scaling in mind */
2275 dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
2276
2277 pm_runtime_resume_and_get(gmu->dev);
2278 pm_runtime_resume_and_get(gmu->gxpd);
2279
2280 ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
2281 if (ret)
2282 goto err_bulk_clk;
2283
2284 ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks);
2285 if (ret) {
2286 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
2287 goto err_bulk_clk;
2288 }
2289
2290 if (adreno_is_a619_holi(adreno_gpu))
2291 a6xx_sptprac_enable(gmu);
2292
2293 /* If anything goes south, tear the GPU down piece by piece.. */
2294 if (ret) {
2295 err_bulk_clk:
2296 pm_runtime_put(gmu->gxpd);
2297 pm_runtime_put(gmu->dev);
2298 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2299 }
2300 err_set_opp:
2301 mutex_unlock(&a6xx_gpu->gmu.lock);
2302
2303 if (!ret) {
2304 msm_devfreq_resume(gpu);
2305 a6xx_llc_activate(a6xx_gpu);
2306 }
2307
2308 return ret;
2309 }
2310
a6xx_gmu_pm_suspend(struct msm_gpu * gpu)2311 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu)
2312 {
2313 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2314 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2315 int i, ret;
2316
2317 trace_msm_gpu_suspend(0);
2318
2319 a6xx_llc_deactivate(a6xx_gpu);
2320
2321 msm_devfreq_suspend(gpu);
2322
2323 mutex_lock(&a6xx_gpu->gmu.lock);
2324 ret = a6xx_gmu_stop(a6xx_gpu);
2325 mutex_unlock(&a6xx_gpu->gmu.lock);
2326 if (ret)
2327 return ret;
2328
2329 if (a6xx_gpu->shadow_bo)
2330 for (i = 0; i < gpu->nr_rings; i++)
2331 a6xx_gpu->shadow[i] = 0;
2332
2333 gpu->suspend_count++;
2334
2335 return 0;
2336 }
2337
a6xx_pm_suspend(struct msm_gpu * gpu)2338 static int a6xx_pm_suspend(struct msm_gpu *gpu)
2339 {
2340 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2341 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2342 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2343 int i;
2344
2345 trace_msm_gpu_suspend(0);
2346
2347 a6xx_llc_deactivate(a6xx_gpu);
2348
2349 msm_devfreq_suspend(gpu);
2350
2351 mutex_lock(&a6xx_gpu->gmu.lock);
2352
2353 /* Drain the outstanding traffic on memory buses */
2354 adreno_gpu->funcs->bus_halt(adreno_gpu, true);
2355
2356 if (adreno_is_a619_holi(adreno_gpu))
2357 a6xx_sptprac_disable(gmu);
2358
2359 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
2360 clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks);
2361
2362 pm_runtime_put_sync(gmu->gxpd);
2363 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2364 pm_runtime_put_sync(gmu->dev);
2365
2366 mutex_unlock(&a6xx_gpu->gmu.lock);
2367
2368 if (a6xx_gpu->shadow_bo)
2369 for (i = 0; i < gpu->nr_rings; i++)
2370 a6xx_gpu->shadow[i] = 0;
2371
2372 gpu->suspend_count++;
2373
2374 return 0;
2375 }
2376
a6xx_get_timestamp(struct msm_gpu * gpu)2377 static u64 a6xx_get_timestamp(struct msm_gpu *gpu)
2378 {
2379 return gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
2380 }
2381
a6xx_active_ring(struct msm_gpu * gpu)2382 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
2383 {
2384 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2385 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2386
2387 return a6xx_gpu->cur_ring;
2388 }
2389
a6xx_destroy(struct msm_gpu * gpu)2390 static void a6xx_destroy(struct msm_gpu *gpu)
2391 {
2392 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2393 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2394
2395 if (a6xx_gpu->sqe_bo) {
2396 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
2397 drm_gem_object_put(a6xx_gpu->sqe_bo);
2398 }
2399
2400 if (a6xx_gpu->aqe_bo) {
2401 msm_gem_unpin_iova(a6xx_gpu->aqe_bo, gpu->vm);
2402 drm_gem_object_put(a6xx_gpu->aqe_bo);
2403 }
2404
2405 if (a6xx_gpu->shadow_bo) {
2406 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->vm);
2407 drm_gem_object_put(a6xx_gpu->shadow_bo);
2408 }
2409
2410 a6xx_llc_slices_destroy(a6xx_gpu);
2411
2412 a6xx_gmu_remove(a6xx_gpu);
2413
2414 adreno_gpu_cleanup(adreno_gpu);
2415
2416 kfree(a6xx_gpu);
2417 }
2418
a6xx_gpu_busy(struct msm_gpu * gpu,unsigned long * out_sample_rate)2419 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
2420 {
2421 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2422 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2423 u64 busy_cycles;
2424
2425 /* 19.2MHz */
2426 *out_sample_rate = 19200000;
2427
2428 busy_cycles = gmu_read64(&a6xx_gpu->gmu,
2429 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
2430 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
2431
2432 return busy_cycles;
2433 }
2434
a6xx_gpu_set_freq(struct msm_gpu * gpu,struct dev_pm_opp * opp,bool suspended)2435 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
2436 bool suspended)
2437 {
2438 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2439 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2440
2441 mutex_lock(&a6xx_gpu->gmu.lock);
2442 a6xx_gmu_set_freq(gpu, opp, suspended);
2443 mutex_unlock(&a6xx_gpu->gmu.lock);
2444 }
2445
2446 static struct drm_gpuvm *
a6xx_create_vm(struct msm_gpu * gpu,struct platform_device * pdev)2447 a6xx_create_vm(struct msm_gpu *gpu, struct platform_device *pdev)
2448 {
2449 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2450 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2451 unsigned long quirks = 0;
2452
2453 /*
2454 * This allows GPU to set the bus attributes required to use system
2455 * cache on behalf of the iommu page table walker.
2456 */
2457 if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) &&
2458 !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY))
2459 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
2460
2461 return adreno_iommu_create_vm(gpu, pdev, quirks);
2462 }
2463
2464 static struct drm_gpuvm *
a6xx_create_private_vm(struct msm_gpu * gpu,bool kernel_managed)2465 a6xx_create_private_vm(struct msm_gpu *gpu, bool kernel_managed)
2466 {
2467 struct msm_mmu *mmu;
2468
2469 mmu = msm_iommu_pagetable_create(to_msm_vm(gpu->vm)->mmu, kernel_managed);
2470
2471 if (IS_ERR(mmu))
2472 return ERR_CAST(mmu);
2473
2474 return msm_gem_vm_create(gpu->dev, mmu, "gpu", ADRENO_VM_START,
2475 adreno_private_vm_size(gpu), kernel_managed);
2476 }
2477
a6xx_get_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)2478 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2479 {
2480 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2481 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2482
2483 if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
2484 return a6xx_gpu->shadow[ring->id];
2485
2486 /*
2487 * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware
2488 * without 'whereami' support
2489 */
2490 WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC),
2491 "Can't read CP_RB_RPTR register reliably\n");
2492
2493 return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
2494 }
2495
a6xx_progress(struct msm_gpu * gpu,struct msm_ringbuffer * ring)2496 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2497 {
2498 struct msm_cp_state cp_state;
2499 bool progress;
2500
2501 /*
2502 * With IFPC, KMD doesn't know whether GX power domain is collapsed
2503 * or not. So, we can't blindly read the below registers in GX domain.
2504 * Lets trust the hang detection in HW and lie to the caller that
2505 * there was progress.
2506 */
2507 if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC)
2508 return true;
2509
2510 cp_state = (struct msm_cp_state) {
2511 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
2512 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
2513 .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
2514 .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
2515 };
2516
2517 /*
2518 * Adjust the remaining data to account for what has already been
2519 * fetched from memory, but not yet consumed by the SQE.
2520 *
2521 * This is not *technically* correct, the amount buffered could
2522 * exceed the IB size due to hw prefetching ahead, but:
2523 *
2524 * (1) We aren't trying to find the exact position, just whether
2525 * progress has been made
2526 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
2527 * to prevent prefetching into an unrelated submit. (And
2528 * either way, at some point the ROQ will be full.)
2529 */
2530 cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16;
2531 cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16;
2532
2533 progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
2534
2535 ring->last_cp_state = cp_state;
2536
2537 return progress;
2538 }
2539
fuse_to_supp_hw(const struct adreno_info * info,u32 fuse)2540 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse)
2541 {
2542 if (!info->speedbins)
2543 return UINT_MAX;
2544
2545 for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++)
2546 if (info->speedbins[i].fuse == fuse)
2547 return BIT(info->speedbins[i].speedbin);
2548
2549 return UINT_MAX;
2550 }
2551
a6xx_read_speedbin(struct device * dev,struct a6xx_gpu * a6xx_gpu,const struct adreno_info * info,u32 * speedbin)2552 static int a6xx_read_speedbin(struct device *dev, struct a6xx_gpu *a6xx_gpu,
2553 const struct adreno_info *info, u32 *speedbin)
2554 {
2555 int ret;
2556
2557 /* Use speedbin fuse if present. Otherwise, fallback to softfuse */
2558 ret = adreno_read_speedbin(dev, speedbin);
2559 if (ret != -ENOENT)
2560 return ret;
2561
2562 if (info->quirks & ADRENO_QUIRK_SOFTFUSE) {
2563 *speedbin = a6xx_llc_read(a6xx_gpu, REG_A8XX_CX_MISC_SW_FUSE_FREQ_LIMIT_STATUS);
2564 *speedbin = A8XX_CX_MISC_SW_FUSE_FREQ_LIMIT_STATUS_FINALFREQLIMIT(*speedbin);
2565 return 0;
2566 }
2567
2568 return -ENOENT;
2569 }
2570
a6xx_set_supported_hw(struct device * dev,struct a6xx_gpu * a6xx_gpu,const struct adreno_info * info)2571 static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu,
2572 const struct adreno_info *info)
2573 {
2574 u32 supp_hw;
2575 u32 speedbin;
2576 int ret;
2577
2578 ret = a6xx_read_speedbin(dev, a6xx_gpu, info, &speedbin);
2579 /*
2580 * -ENOENT means that the platform doesn't support speedbin which is
2581 * fine
2582 */
2583 if (ret == -ENOENT) {
2584 return 0;
2585 } else if (ret) {
2586 dev_err_probe(dev, ret,
2587 "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
2588 return ret;
2589 }
2590
2591 supp_hw = fuse_to_supp_hw(info, speedbin);
2592
2593 if (supp_hw == UINT_MAX) {
2594 DRM_DEV_ERROR(dev,
2595 "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
2596 speedbin);
2597 supp_hw = BIT(0); /* Default */
2598 }
2599
2600 ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
2601 if (ret)
2602 return ret;
2603
2604 return 0;
2605 }
2606
a6xx_aqe_is_enabled(struct adreno_gpu * adreno_gpu)2607 static bool a6xx_aqe_is_enabled(struct adreno_gpu *adreno_gpu)
2608 {
2609 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2610
2611 /*
2612 * AQE uses preemption context record as scratch pad, so check if
2613 * preemption is enabled
2614 */
2615 return (adreno_gpu->base.nr_rings > 1) && !!a6xx_gpu->aqe_bo;
2616 }
2617
a6xx_gpu_init(struct drm_device * dev)2618 static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
2619 {
2620 struct msm_drm_private *priv = dev->dev_private;
2621 struct platform_device *pdev = priv->gpu_pdev;
2622 struct adreno_platform_config *config = pdev->dev.platform_data;
2623 const struct adreno_info *info = config->info;
2624 struct a6xx_gpu *a6xx_gpu;
2625 struct adreno_gpu *adreno_gpu;
2626 struct msm_gpu *gpu;
2627 extern int enable_preemption;
2628 u32 speedbin;
2629 bool is_a7xx;
2630 int ret, nr_rings = 1;
2631
2632 a6xx_gpu = kzalloc_obj(*a6xx_gpu);
2633 if (!a6xx_gpu)
2634 return ERR_PTR(-ENOMEM);
2635
2636 adreno_gpu = &a6xx_gpu->base;
2637 gpu = &adreno_gpu->base;
2638
2639 mutex_init(&a6xx_gpu->gmu.lock);
2640 spin_lock_init(&a6xx_gpu->aperture_lock);
2641
2642 adreno_gpu->registers = NULL;
2643
2644 /* Check if there is a GMU phandle and set it up */
2645 struct device_node *node __free(device_node) =
2646 of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2647 /* FIXME: How do we gracefully handle this? */
2648 BUG_ON(!node);
2649
2650 adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper");
2651
2652 adreno_gpu->base.hw_apriv =
2653 !!(info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
2654
2655 /* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */
2656 is_a7xx = info->family >= ADRENO_7XX_GEN1;
2657
2658 a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
2659
2660 ret = a6xx_set_supported_hw(&pdev->dev, a6xx_gpu, info);
2661 if (ret) {
2662 a6xx_llc_slices_destroy(a6xx_gpu);
2663 kfree(a6xx_gpu);
2664 return ERR_PTR(ret);
2665 }
2666
2667 if ((enable_preemption == 1) || (enable_preemption == -1 &&
2668 (info->quirks & ADRENO_QUIRK_PREEMPTION)))
2669 nr_rings = 4;
2670
2671 ret = adreno_gpu_init(dev, pdev, adreno_gpu, info->funcs, nr_rings);
2672 if (ret) {
2673 a6xx_destroy(&(a6xx_gpu->base.base));
2674 return ERR_PTR(ret);
2675 }
2676
2677 /* Set the speedbin value that is passed to userspace */
2678 if (a6xx_read_speedbin(&pdev->dev, a6xx_gpu, info, &speedbin) || !speedbin)
2679 speedbin = 0xffff;
2680 adreno_gpu->speedbin = (uint16_t) (0xffff & speedbin);
2681
2682 /*
2683 * For now only clamp to idle freq for devices where this is known not
2684 * to cause power supply issues:
2685 */
2686 if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2687 priv->gpu_clamp_to_idle = true;
2688
2689 if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu))
2690 ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
2691 else
2692 ret = a6xx_gmu_init(a6xx_gpu, node);
2693 if (ret) {
2694 a6xx_destroy(&(a6xx_gpu->base.base));
2695 return ERR_PTR(ret);
2696 }
2697
2698 adreno_gpu->uche_trap_base = 0x1fffffffff000ull;
2699
2700 msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu,
2701 adreno_gpu->funcs->mmu_fault_handler);
2702
2703 ret = a6xx_calc_ubwc_config(adreno_gpu);
2704 if (ret) {
2705 a6xx_destroy(&(a6xx_gpu->base.base));
2706 return ERR_PTR(ret);
2707 }
2708
2709 /* Set up the preemption specific bits and pieces for each ringbuffer */
2710 a6xx_preempt_init(gpu);
2711
2712 return gpu;
2713 }
2714
2715 const struct adreno_gpu_funcs a6xx_gpu_funcs = {
2716 .base = {
2717 .get_param = adreno_get_param,
2718 .set_param = adreno_set_param,
2719 .hw_init = a6xx_hw_init,
2720 .ucode_load = a6xx_ucode_load,
2721 .pm_suspend = a6xx_gmu_pm_suspend,
2722 .pm_resume = a6xx_gmu_pm_resume,
2723 .recover = a6xx_recover,
2724 .submit = a6xx_submit,
2725 .active_ring = a6xx_active_ring,
2726 .irq = a6xx_irq,
2727 .destroy = a6xx_destroy,
2728 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2729 .show = a6xx_show,
2730 #endif
2731 .gpu_busy = a6xx_gpu_busy,
2732 .gpu_get_freq = a6xx_gmu_get_freq,
2733 .gpu_set_freq = a6xx_gpu_set_freq,
2734 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2735 .gpu_state_get = a6xx_gpu_state_get,
2736 .gpu_state_put = a6xx_gpu_state_put,
2737 #endif
2738 .create_vm = a6xx_create_vm,
2739 .create_private_vm = a6xx_create_private_vm,
2740 .get_rptr = a6xx_get_rptr,
2741 .progress = a6xx_progress,
2742 .sysprof_setup = a6xx_gmu_sysprof_setup,
2743 },
2744 .init = a6xx_gpu_init,
2745 .get_timestamp = a6xx_gmu_get_timestamp,
2746 .bus_halt = a6xx_bus_clear_pending_transactions,
2747 .mmu_fault_handler = a6xx_fault_handler,
2748 .gx_is_on = a6xx_gmu_gx_is_on,
2749 };
2750
2751 const struct adreno_gpu_funcs a6xx_gmuwrapper_funcs = {
2752 .base = {
2753 .get_param = adreno_get_param,
2754 .set_param = adreno_set_param,
2755 .hw_init = a6xx_hw_init,
2756 .ucode_load = a6xx_ucode_load,
2757 .pm_suspend = a6xx_pm_suspend,
2758 .pm_resume = a6xx_pm_resume,
2759 .recover = a6xx_recover,
2760 .submit = a6xx_submit,
2761 .active_ring = a6xx_active_ring,
2762 .irq = a6xx_irq,
2763 .destroy = a6xx_destroy,
2764 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2765 .show = a6xx_show,
2766 #endif
2767 .gpu_busy = a6xx_gpu_busy,
2768 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2769 .gpu_state_get = a6xx_gpu_state_get,
2770 .gpu_state_put = a6xx_gpu_state_put,
2771 #endif
2772 .create_vm = a6xx_create_vm,
2773 .create_private_vm = a6xx_create_private_vm,
2774 .get_rptr = a6xx_get_rptr,
2775 .progress = a6xx_progress,
2776 },
2777 .init = a6xx_gpu_init,
2778 .get_timestamp = a6xx_get_timestamp,
2779 .bus_halt = a6xx_bus_clear_pending_transactions,
2780 .mmu_fault_handler = a6xx_fault_handler,
2781 .gx_is_on = a6xx_gmu_gx_is_on,
2782 };
2783
2784 const struct adreno_gpu_funcs a7xx_gpu_funcs = {
2785 .base = {
2786 .get_param = adreno_get_param,
2787 .set_param = adreno_set_param,
2788 .hw_init = a6xx_hw_init,
2789 .ucode_load = a6xx_ucode_load,
2790 .pm_suspend = a6xx_gmu_pm_suspend,
2791 .pm_resume = a6xx_gmu_pm_resume,
2792 .recover = a6xx_recover,
2793 .submit = a7xx_submit,
2794 .active_ring = a6xx_active_ring,
2795 .irq = a6xx_irq,
2796 .destroy = a6xx_destroy,
2797 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2798 .show = a6xx_show,
2799 #endif
2800 .gpu_busy = a6xx_gpu_busy,
2801 .gpu_get_freq = a6xx_gmu_get_freq,
2802 .gpu_set_freq = a6xx_gpu_set_freq,
2803 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2804 .gpu_state_get = a6xx_gpu_state_get,
2805 .gpu_state_put = a6xx_gpu_state_put,
2806 #endif
2807 .create_vm = a6xx_create_vm,
2808 .create_private_vm = a6xx_create_private_vm,
2809 .get_rptr = a6xx_get_rptr,
2810 .progress = a6xx_progress,
2811 .sysprof_setup = a6xx_gmu_sysprof_setup,
2812 },
2813 .init = a6xx_gpu_init,
2814 .get_timestamp = a6xx_gmu_get_timestamp,
2815 .bus_halt = a6xx_bus_clear_pending_transactions,
2816 .mmu_fault_handler = a6xx_fault_handler,
2817 .gx_is_on = a7xx_gmu_gx_is_on,
2818 .aqe_is_enabled = a6xx_aqe_is_enabled,
2819 };
2820
2821 const struct adreno_gpu_funcs a8xx_gpu_funcs = {
2822 .base = {
2823 .get_param = adreno_get_param,
2824 .set_param = adreno_set_param,
2825 .hw_init = a8xx_hw_init,
2826 .ucode_load = a6xx_ucode_load,
2827 .pm_suspend = a6xx_gmu_pm_suspend,
2828 .pm_resume = a6xx_gmu_pm_resume,
2829 .recover = a8xx_recover,
2830 .submit = a7xx_submit,
2831 .active_ring = a6xx_active_ring,
2832 .irq = a8xx_irq,
2833 .destroy = a6xx_destroy,
2834 .gpu_busy = a8xx_gpu_busy,
2835 .gpu_get_freq = a6xx_gmu_get_freq,
2836 .gpu_set_freq = a6xx_gpu_set_freq,
2837 .create_vm = a6xx_create_vm,
2838 .create_private_vm = a6xx_create_private_vm,
2839 .get_rptr = a6xx_get_rptr,
2840 .progress = a8xx_progress,
2841 },
2842 .init = a6xx_gpu_init,
2843 .get_timestamp = a8xx_gmu_get_timestamp,
2844 .bus_halt = a8xx_bus_clear_pending_transactions,
2845 .mmu_fault_handler = a8xx_fault_handler,
2846 .gx_is_on = a8xx_gmu_gx_is_on,
2847 .aqe_is_enabled = a6xx_aqe_is_enabled,
2848 };
2849