1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
3
4
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/firmware/qcom/qcom_scm.h>
14 #include <linux/pm_domain.h>
15 #include <linux/soc/qcom/llcc-qcom.h>
16
17 #define GPU_PAS_ID 13
18
a8xx_aperture_slice_set(struct msm_gpu * gpu,enum adreno_pipe pipe,u32 slice)19 static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice)
20 {
21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
23 u32 val;
24
25 val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice);
26
27 if (a6xx_gpu->cached_aperture == val)
28 return;
29
30 gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val);
31
32 a6xx_gpu->cached_aperture = val;
33 }
34
a8xx_aperture_acquire(struct msm_gpu * gpu,enum adreno_pipe pipe,unsigned long * flags)35 static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags)
36 {
37 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
38 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
39
40 spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags);
41
42 a8xx_aperture_slice_set(gpu, pipe, 0);
43 }
44
a8xx_aperture_release(struct msm_gpu * gpu,unsigned long flags)45 static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags)
46 {
47 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
48 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
49
50 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
51 }
52
a8xx_aperture_clear(struct msm_gpu * gpu)53 static void a8xx_aperture_clear(struct msm_gpu *gpu)
54 {
55 unsigned long flags;
56
57 a8xx_aperture_acquire(gpu, PIPE_NONE, &flags);
58 a8xx_aperture_release(gpu, flags);
59 }
60
a8xx_write_pipe(struct msm_gpu * gpu,enum adreno_pipe pipe,u32 offset,u32 data)61 static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data)
62 {
63 unsigned long flags;
64
65 a8xx_aperture_acquire(gpu, pipe, &flags);
66 gpu_write(gpu, offset, data);
67 a8xx_aperture_release(gpu, flags);
68 }
69
a8xx_read_pipe_slice(struct msm_gpu * gpu,enum adreno_pipe pipe,u32 slice,u32 offset)70 static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset)
71 {
72 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
73 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
74 unsigned long flags;
75 u32 val;
76
77 spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags);
78 a8xx_aperture_slice_set(gpu, pipe, slice);
79 val = gpu_read(gpu, offset);
80 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
81
82 return val;
83 }
84
a8xx_gpu_get_slice_info(struct msm_gpu * gpu)85 void a8xx_gpu_get_slice_info(struct msm_gpu *gpu)
86 {
87 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
88 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
89 const struct a6xx_info *info = adreno_gpu->info->a6xx;
90 u32 slice_mask;
91
92 if (adreno_gpu->info->family < ADRENO_8XX_GEN1)
93 return;
94
95 if (a6xx_gpu->slice_mask)
96 return;
97
98 slice_mask = GENMASK(info->max_slices - 1, 0);
99
100 /* GEN1 doesn't support partial slice configurations */
101 if (adreno_gpu->info->family == ADRENO_8XX_GEN1) {
102 a6xx_gpu->slice_mask = slice_mask;
103 return;
104 }
105
106 slice_mask &= a6xx_llc_read(a6xx_gpu,
107 REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL);
108
109 a6xx_gpu->slice_mask = slice_mask;
110
111 /* Chip ID depends on the number of slices available. So update it */
112 adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask));
113 }
114
a8xx_get_first_slice(struct a6xx_gpu * a6xx_gpu)115 static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu)
116 {
117 return ffs(a6xx_gpu->slice_mask) - 1;
118 }
119
_a8xx_check_idle(struct msm_gpu * gpu)120 static inline bool _a8xx_check_idle(struct msm_gpu *gpu)
121 {
122 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
123 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
124
125 /* Check that the GMU is idle */
126 if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
127 return false;
128
129 /* Check that the CX master is idle */
130 if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) &
131 ~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
132 return false;
133
134 return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) &
135 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
136 }
137
a8xx_idle(struct msm_gpu * gpu,struct msm_ringbuffer * ring)138 static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
139 {
140 /* wait for CP to drain ringbuffer: */
141 if (!adreno_idle(gpu, ring))
142 return false;
143
144 if (spin_until(_a8xx_check_idle(gpu))) {
145 DRM_ERROR(
146 "%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
147 gpu->name, __builtin_return_address(0),
148 gpu_read(gpu, REG_A8XX_RBBM_STATUS),
149 gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS),
150 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
151 gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
152 return false;
153 }
154
155 return true;
156 }
157
a8xx_flush(struct msm_gpu * gpu,struct msm_ringbuffer * ring)158 void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
159 {
160 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
161 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
162 uint32_t wptr;
163 unsigned long flags;
164
165 spin_lock_irqsave(&ring->preempt_lock, flags);
166
167 /* Copy the shadow to the actual register */
168 ring->cur = ring->next;
169
170 /* Make sure to wrap wptr if we need to */
171 wptr = get_wptr(ring);
172
173 /* Update HW if this is the current ring and we are not in preempt*/
174 if (!a6xx_in_preempt(a6xx_gpu)) {
175 if (a6xx_gpu->cur_ring == ring)
176 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
177 else
178 ring->restore_wptr = true;
179 } else {
180 ring->restore_wptr = true;
181 }
182
183 spin_unlock_irqrestore(&ring->preempt_lock, flags);
184 }
185
a8xx_set_hwcg(struct msm_gpu * gpu,bool state)186 static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state)
187 {
188 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
189 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
190 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
191 u32 val;
192
193 if (adreno_is_x285(adreno_gpu) && state)
194 gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702);
195
196 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
197 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
198 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
199 state ? 0x110111 : 0);
200 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
201 state ? 0x55555 : 0);
202
203 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
204 gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state);
205
206 if (state) {
207 gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1);
208
209 if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val,
210 val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
211 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
212 return;
213 }
214
215 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
216 } else {
217 /*
218 * GMU enables clk gating in GBIF during boot up. So,
219 * override that here when hwcg feature is disabled
220 */
221 gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0);
222 }
223 }
224
a8xx_set_cp_protect(struct msm_gpu * gpu)225 static void a8xx_set_cp_protect(struct msm_gpu *gpu)
226 {
227 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
228 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
229 u32 cntl, final_cfg;
230 unsigned int i;
231
232 cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN |
233 A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN |
234 A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE |
235 A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK;
236 /*
237 * Enable access protection to privileged registers, fault on an access
238 * protect violation and select the last span to protect from the start
239 * address all the way to the end of the register address space
240 */
241 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
242 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
243
244 a8xx_aperture_clear(gpu);
245
246 for (i = 0; i < protect->count; i++) {
247 /* Intentionally skip writing to some registers */
248 if (protect->regs[i]) {
249 gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]);
250 final_cfg = protect->regs[i];
251 }
252 }
253
254 /*
255 * Last span feature is only supported on PIPE specific register.
256 * So update those here
257 */
258 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg);
259 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg);
260
261 a8xx_aperture_clear(gpu);
262 }
263
a8xx_set_ubwc_config(struct msm_gpu * gpu)264 static void a8xx_set_ubwc_config(struct msm_gpu *gpu)
265 {
266 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
267 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
268 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2);
269 u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3);
270 bool rgba8888_lossless = false, fp16compoptdis = false;
271 bool yuvnotcomptofc = false, min_acc_len_64b = false;
272 bool rgb565_predicator = false, amsbc = false;
273 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
274 u32 ubwc_version = cfg->ubwc_enc_version;
275 u32 hbb, hbb_hi, hbb_lo, mode = 1;
276 u8 uavflagprd_inv = 2;
277
278 switch (ubwc_version) {
279 case UBWC_6_0:
280 yuvnotcomptofc = true;
281 mode = 5;
282 break;
283 case UBWC_5_0:
284 amsbc = true;
285 rgb565_predicator = true;
286 mode = 4;
287 break;
288 case UBWC_4_0:
289 amsbc = true;
290 rgb565_predicator = true;
291 fp16compoptdis = true;
292 rgba8888_lossless = true;
293 mode = 2;
294 break;
295 case UBWC_3_0:
296 amsbc = true;
297 mode = 1;
298 break;
299 default:
300 dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version);
301 break;
302 }
303
304 /*
305 * We subtract 13 from the highest bank bit (13 is the minimum value
306 * allowed by hw) and write the lowest two bits of the remaining value
307 * as hbb_lo and the one above it as hbb_hi to the hardware.
308 */
309 WARN_ON(cfg->highest_bank_bit < 13);
310 hbb = cfg->highest_bank_bit - 13;
311 hbb_hi = hbb >> 2;
312 hbb_lo = hbb & 3;
313
314 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL,
315 hbb << 5 |
316 level3_swizzling_dis << 4 |
317 level2_swizzling_dis << 3);
318
319 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL,
320 hbb << 5 |
321 level3_swizzling_dis << 4 |
322 level2_swizzling_dis << 3);
323
324 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL,
325 yuvnotcomptofc << 6 |
326 level3_swizzling_dis << 5 |
327 level2_swizzling_dis << 4 |
328 hbb_hi << 3 |
329 hbb_lo << 1);
330
331 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL,
332 mode << 15 |
333 yuvnotcomptofc << 6 |
334 rgba8888_lossless << 4 |
335 fp16compoptdis << 3 |
336 rgb565_predicator << 2 |
337 amsbc << 1 |
338 min_acc_len_64b);
339
340 a8xx_aperture_clear(gpu);
341
342 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
343 level3_swizzling_dis << 13 |
344 level2_swizzling_dis << 12 |
345 hbb_hi << 10 |
346 uavflagprd_inv << 4 |
347 min_acc_len_64b << 3 |
348 hbb_lo << 1 | ubwc_mode);
349
350 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
351 level3_swizzling_dis << 7 |
352 level2_swizzling_dis << 6 |
353 hbb_hi << 4 |
354 min_acc_len_64b << 3 |
355 hbb_lo << 1 | ubwc_mode);
356 }
357
a8xx_nonctxt_config(struct msm_gpu * gpu,u32 * gmem_protect)358 static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect)
359 {
360 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
361 const struct a6xx_info *info = adreno_gpu->info->a6xx;
362 const struct adreno_reglist_pipe *regs = info->nonctxt_reglist;
363 unsigned int pipe_id, i;
364 unsigned long flags;
365
366 for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
367 /* We don't have support for LPAC yet */
368 if (pipe_id == PIPE_LPAC)
369 continue;
370
371 a8xx_aperture_acquire(gpu, pipe_id, &flags);
372
373 for (i = 0; regs[i].offset; i++) {
374 if (!(BIT(pipe_id) & regs[i].pipe))
375 continue;
376
377 if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT)
378 *gmem_protect = regs[i].value;
379
380 gpu_write(gpu, regs[i].offset, regs[i].value);
381 }
382
383 a8xx_aperture_release(gpu, flags);
384 }
385
386 a8xx_aperture_clear(gpu);
387 }
388
a8xx_cp_init(struct msm_gpu * gpu)389 static int a8xx_cp_init(struct msm_gpu *gpu)
390 {
391 struct msm_ringbuffer *ring = gpu->rb[0];
392 u32 mask;
393
394 /* Disable concurrent binning before sending CP init */
395 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
396 OUT_RING(ring, BIT(27));
397
398 OUT_PKT7(ring, CP_ME_INIT, 4);
399
400 /* Use multiple HW contexts */
401 mask = BIT(0);
402
403 /* Enable error detection */
404 mask |= BIT(1);
405
406 /* Set default reset state */
407 mask |= BIT(3);
408
409 /* Disable save/restore of performance counters across preemption */
410 mask |= BIT(6);
411
412 OUT_RING(ring, mask);
413
414 /* Enable multiple hardware contexts */
415 OUT_RING(ring, 0x00000003);
416
417 /* Enable error detection */
418 OUT_RING(ring, 0x20000000);
419
420 /* Operation mode mask */
421 OUT_RING(ring, 0x00000002);
422
423 a6xx_flush(gpu, ring);
424 return a8xx_idle(gpu, ring) ? 0 : -EINVAL;
425 }
426
427 #define A8XX_INT_MASK \
428 (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
429 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
430 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
431 A6XX_RBBM_INT_0_MASK_CP_SW | \
432 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
433 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
434 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
435 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
436 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
437 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
438 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
439 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
440 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
441 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
442
443 #define A8XX_APRIV_MASK \
444 (A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \
445 A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \
446 A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \
447 A8XX_CP_APRIV_CNTL_PIPE_RBRPWB)
448
449 #define A8XX_BR_APRIV_MASK \
450 (A8XX_APRIV_MASK | \
451 A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \
452 A8XX_CP_APRIV_CNTL_PIPE_CDWRITE)
453
454 #define A8XX_CP_GLOBAL_INT_MASK \
455 (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \
456 A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \
457 A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \
458 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \
459 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \
460 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \
461 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \
462 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \
463 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \
464 A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \
465 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \
466 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \
467 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \
468 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV)
469
470 #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \
471 (A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \
472 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \
473 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \
474 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \
475 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \
476 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \
477 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \
478 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \
479 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \
480 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \
481 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \
482 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \
483 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \
484 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \
485 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \
486 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \
487 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \
488 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \
489 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \
490 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS)
491
492 #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \
493 (A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \
494 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \
495 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \
496 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \
497 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \
498 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \
499 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \
500 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \
501 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \
502 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR)
503
hw_init(struct msm_gpu * gpu)504 static int hw_init(struct msm_gpu *gpu)
505 {
506 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
507 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
508 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
509 unsigned int pipe_id, i;
510 u32 gmem_protect = 0;
511 u64 gmem_range_min;
512 int ret;
513
514 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
515 if (ret)
516 return ret;
517
518 /* Clear the cached value to force aperture configuration next time */
519 a6xx_gpu->cached_aperture = UINT_MAX;
520 a8xx_aperture_clear(gpu);
521
522 /* Clear GBIF halt in case GX domain was not collapsed */
523 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
524 gpu_read(gpu, REG_A6XX_GBIF_HALT);
525
526 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0);
527 gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT);
528
529 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
530
531 /*
532 * Disable the trusted memory range - we don't actually supported secure
533 * memory rendering at this point in time and we don't want to block off
534 * part of the virtual memory space.
535 */
536 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
537 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
538
539 /* Make all blocks contribute to the GPU BUSY perf counter */
540 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
541
542 /* Setup GMEM Range in UCHE */
543 gmem_range_min = SZ_64M;
544 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
545 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min);
546 gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min);
547
548 /* Setup UCHE Trap region */
549 gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
550 gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
551 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
552 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
553
554 /* Turn on performance counters */
555 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1);
556 gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1);
557
558 /* Turn on the IFPC counter (countable 4 on XOCLK1) */
559 gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1,
560 FIELD_PREP(GENMASK(7, 0), 0x4));
561
562 /* Select CP0 to always count cycles */
563 gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1);
564
565 a8xx_set_ubwc_config(gpu);
566
567 /* Set weights for bicubic filtering */
568 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
569 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4);
570 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee);
571 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed);
572 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0);
573 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000);
574 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8);
575 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc);
576 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb);
577 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0);
578 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b);
579 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d);
580 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412);
581 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a);
582 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05);
583 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e);
584 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001);
585 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa);
586 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7);
587 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7);
588
589 gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
590
591 a8xx_nonctxt_config(gpu, &gmem_protect);
592
593 /* Enable fault detection */
594 gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff);
595 gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30));
596
597 /* Set up the CX GMU counter 0 to count busy ticks */
598 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
599
600 /* Enable the power counter */
601 gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5));
602 gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
603
604 /* Protect registers from the CP */
605 a8xx_set_cp_protect(gpu);
606
607 /* Enable the GMEM save/restore feature for preemption */
608 a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1);
609
610 for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) {
611 u32 apriv_mask = A8XX_APRIV_MASK;
612 unsigned long flags;
613
614 if (pipe_id == PIPE_LPAC)
615 continue;
616
617 if (pipe_id == PIPE_BR)
618 apriv_mask = A8XX_BR_APRIV_MASK;
619
620 a8xx_aperture_acquire(gpu, pipe_id, &flags);
621 gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask);
622 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE,
623 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE);
624 gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE,
625 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE);
626 a8xx_aperture_release(gpu, flags);
627 }
628
629 a8xx_aperture_clear(gpu);
630
631 /* Enable interrupts */
632 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK);
633 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK);
634
635 ret = adreno_hw_init(gpu);
636 if (ret)
637 goto out;
638
639 gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
640 if (a6xx_gpu->aqe_iova)
641 gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova);
642
643 /* Set the ringbuffer address */
644 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
645 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
646
647 /* Configure the RPTR shadow if needed: */
648 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0]));
649 gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr));
650
651 for (i = 0; i < gpu->nr_rings; i++)
652 a6xx_gpu->shadow[i] = 0;
653
654 /* Always come up on rb 0 */
655 a6xx_gpu->cur_ring = gpu->rb[0];
656
657 for (i = 0; i < gpu->nr_rings; i++)
658 gpu->rb[i]->cur_ctx_seqno = 0;
659
660 /* Enable the SQE_to start the CP engine */
661 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1);
662
663 ret = a8xx_cp_init(gpu);
664 if (ret)
665 goto out;
666
667 /*
668 * Try to load a zap shader into the secure world. If successful
669 * we can use the CP to switch out of secure mode. If not then we
670 * have no resource but to try to switch ourselves out manually. If we
671 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
672 * be blocked and a permissions violation will soon follow.
673 */
674 ret = a6xx_zap_shader_init(gpu);
675 if (!ret) {
676 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
677 OUT_RING(gpu->rb[0], 0x00000000);
678
679 a6xx_flush(gpu, gpu->rb[0]);
680 if (!a8xx_idle(gpu, gpu->rb[0]))
681 return -EINVAL;
682 } else if (ret == -ENODEV) {
683 /*
684 * This device does not use zap shader (but print a warning
685 * just in case someone got their dt wrong.. hopefully they
686 * have a debug UART to realize the error of their ways...
687 * if you mess this up you are about to crash horribly)
688 */
689 dev_warn_once(gpu->dev->dev,
690 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
691 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
692 ret = 0;
693 } else {
694 return ret;
695 }
696
697 /*
698 * GMEM_PROTECT register should be programmed after GPU is transitioned to
699 * non-secure mode
700 */
701 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect);
702 WARN_ON(!gmem_protect);
703 a8xx_aperture_clear(gpu);
704
705 /* Enable hardware clockgating */
706 a8xx_set_hwcg(gpu, true);
707 out:
708 /*
709 * Tell the GMU that we are done touching the GPU and it can start power
710 * management
711 */
712 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
713
714 return ret;
715 }
716
a8xx_hw_init(struct msm_gpu * gpu)717 int a8xx_hw_init(struct msm_gpu *gpu)
718 {
719 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
720 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
721 int ret;
722
723 mutex_lock(&a6xx_gpu->gmu.lock);
724 ret = hw_init(gpu);
725 mutex_unlock(&a6xx_gpu->gmu.lock);
726
727 return ret;
728 }
729
a8xx_dump(struct msm_gpu * gpu)730 static void a8xx_dump(struct msm_gpu *gpu)
731 {
732 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS));
733 adreno_dump(gpu);
734 }
735
a8xx_recover(struct msm_gpu * gpu)736 void a8xx_recover(struct msm_gpu *gpu)
737 {
738 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
739 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
740 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
741 int active_submits;
742
743 adreno_dump_info(gpu);
744
745 if (hang_debug)
746 a8xx_dump(gpu);
747
748 /*
749 * To handle recovery specific sequences during the rpm suspend we are
750 * about to trigger
751 */
752 a6xx_gpu->hung = true;
753
754 /* Halt SQE first */
755 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3);
756
757 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
758
759 /* active_submit won't change until we make a submission */
760 mutex_lock(&gpu->active_lock);
761 active_submits = gpu->active_submits;
762
763 /*
764 * Temporarily clear active_submits count to silence a WARN() in the
765 * runtime suspend cb
766 */
767 gpu->active_submits = 0;
768
769 reinit_completion(&gmu->pd_gate);
770 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
771 dev_pm_genpd_synced_poweroff(gmu->cxpd);
772
773 /* Drop the rpm refcount from active submits */
774 if (active_submits)
775 pm_runtime_put(&gpu->pdev->dev);
776
777 /* And the final one from recover worker */
778 pm_runtime_put_sync(&gpu->pdev->dev);
779
780 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
781 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
782
783 dev_pm_genpd_remove_notifier(gmu->cxpd);
784
785 pm_runtime_use_autosuspend(&gpu->pdev->dev);
786
787 if (active_submits)
788 pm_runtime_get(&gpu->pdev->dev);
789
790 pm_runtime_get_sync(&gpu->pdev->dev);
791
792 gpu->active_submits = active_submits;
793 mutex_unlock(&gpu->active_lock);
794
795 msm_gpu_hw_init(gpu);
796 a6xx_gpu->hung = false;
797 }
798
a8xx_uche_fault_block(struct msm_gpu * gpu,u32 mid)799 static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
800 {
801 static const char * const uche_clients[] = {
802 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
803 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
804 "STCHE",
805 };
806 static const char * const uche_clients_lpac[] = {
807 "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC",
808 };
809 u32 val;
810
811 /*
812 * The source of the data depends on the mid ID read from FSYNR1.
813 * and the client ID read from the UCHE block
814 */
815 val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF);
816
817 val &= GENMASK(6, 0);
818
819 /* mid=3 refers to BR or BV */
820 if (mid == 3) {
821 if (val < ARRAY_SIZE(uche_clients))
822 return uche_clients[val];
823 else
824 return "UCHE";
825 }
826
827 /* mid=8 refers to LPAC */
828 if (mid == 8) {
829 if (val < ARRAY_SIZE(uche_clients_lpac))
830 return uche_clients_lpac[val];
831 else
832 return "UCHE_LPAC";
833 }
834
835 return "Unknown";
836 }
837
a8xx_fault_block(struct msm_gpu * gpu,u32 id)838 static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id)
839 {
840 switch (id) {
841 case 0x0:
842 return "CP";
843 case 0x1:
844 return "UCHE: Unknown";
845 case 0x2:
846 return "UCHE_LPAC: Unknown";
847 case 0x3:
848 case 0x8:
849 return a8xx_uche_fault_block(gpu, id);
850 case 0x4:
851 return "CCU";
852 case 0x5:
853 return "Flag cache";
854 case 0x6:
855 return "PREFETCH";
856 case 0x7:
857 return "GMU";
858 case 0x9:
859 return "UCHE_HPAC";
860 }
861
862 return "Unknown";
863 }
864
a8xx_fault_handler(void * arg,unsigned long iova,int flags,void * data)865 int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
866 {
867 struct msm_gpu *gpu = arg;
868 struct adreno_smmu_fault_info *info = data;
869 const char *block = "unknown";
870
871 u32 scratch[] = {
872 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)),
873 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)),
874 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)),
875 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)),
876 };
877
878 if (info)
879 block = a8xx_fault_block(gpu, info->fsynr1 & 0xff);
880
881 return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
882 }
883
a8xx_cp_hw_err_irq(struct msm_gpu * gpu)884 static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu)
885 {
886 u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL);
887 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
888 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
889 u32 slice = a8xx_get_first_slice(a6xx_gpu);
890 u32 hw_fault_mask = GENMASK(6, 0);
891 u32 sw_fault_mask = GENMASK(22, 16);
892 u32 pipe = 0;
893
894 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status);
895
896 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR |
897 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR))
898 pipe |= BIT(PIPE_BR);
899
900 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV |
901 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV))
902 pipe |= BIT(PIPE_BV);
903
904 if (!pipe) {
905 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n");
906 goto out;
907 }
908
909 for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
910 if (!(BIT(pipe_id) & pipe))
911 continue;
912
913 if (hw_fault_mask & status) {
914 status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
915 REG_A8XX_CP_HW_FAULT_STATUS_PIPE);
916 dev_err_ratelimited(&gpu->pdev->dev,
917 "CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
918 }
919
920 if (sw_fault_mask & status) {
921 status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
922 REG_A8XX_CP_INTERRUPT_STATUS_PIPE);
923 dev_err_ratelimited(&gpu->pdev->dev,
924 "CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
925
926 if (status & BIT(8)) {
927 a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1);
928 status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
929 REG_A8XX_CP_SQE_STAT_DATA_PIPE);
930 dev_err_ratelimited(&gpu->pdev->dev,
931 "CP Opcode error, opcode=0x%x\n", status);
932 }
933
934 if (status & BIT(10)) {
935 status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
936 REG_A8XX_CP_PROTECT_STATUS_PIPE);
937 dev_err_ratelimited(&gpu->pdev->dev,
938 "CP REG PROTECT error, status=0x%x\n", status);
939 }
940 }
941 }
942
943 out:
944 /* Turn off interrupts to avoid triggering recovery again */
945 a8xx_aperture_clear(gpu);
946 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0);
947 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0);
948
949 kthread_queue_work(gpu->worker, &gpu->recover_work);
950 }
951
gpu_periph_read(struct msm_gpu * gpu,u32 dbg_offset)952 static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset)
953 {
954 gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset);
955
956 return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE);
957 }
958
gpu_periph_read64(struct msm_gpu * gpu,u32 dbg_offset)959 static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset)
960 {
961 u64 lo, hi;
962
963 lo = gpu_periph_read(gpu, dbg_offset);
964 hi = gpu_periph_read(gpu, dbg_offset + 1);
965
966 return (hi << 32) | lo;
967 }
968
969 #define CP_PERIPH_IB1_BASE_LO 0x7005
970 #define CP_PERIPH_IB1_BASE_HI 0x7006
971 #define CP_PERIPH_IB1_SIZE 0x7007
972 #define CP_PERIPH_IB1_OFFSET 0x7008
973 #define CP_PERIPH_IB2_BASE_LO 0x7009
974 #define CP_PERIPH_IB2_BASE_HI 0x700a
975 #define CP_PERIPH_IB2_SIZE 0x700b
976 #define CP_PERIPH_IB2_OFFSET 0x700c
977 #define CP_PERIPH_IB3_BASE_LO 0x700d
978 #define CP_PERIPH_IB3_BASE_HI 0x700e
979 #define CP_PERIPH_IB3_SIZE 0x700f
980 #define CP_PERIPH_IB3_OFFSET 0x7010
981
a8xx_fault_detect_irq(struct msm_gpu * gpu)982 static void a8xx_fault_detect_irq(struct msm_gpu *gpu)
983 {
984 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
985 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
986 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
987 unsigned long flags;
988
989 /*
990 * If stalled on SMMU fault, we could trip the GPU's hang detection,
991 * but the fault handler will trigger the devcore dump, and we want
992 * to otherwise resume normally rather than killing the submit, so
993 * just bail.
994 */
995 if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT)
996 return;
997
998 /*
999 * Force the GPU to stay on until after we finish
1000 * collecting information
1001 */
1002 if (!adreno_has_gmu_wrapper(adreno_gpu))
1003 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1004
1005 DRM_DEV_ERROR(&gpu->pdev->dev,
1006 "gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n",
1007 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1008 gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS));
1009
1010 a8xx_aperture_acquire(gpu, PIPE_BR, &flags);
1011
1012 DRM_DEV_ERROR(&gpu->pdev->dev,
1013 "BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
1014 gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS),
1015 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1016 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1017 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
1018 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
1019 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
1020 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
1021 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
1022 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
1023
1024 a8xx_aperture_release(gpu, flags);
1025 a8xx_aperture_acquire(gpu, PIPE_BV, &flags);
1026
1027 DRM_DEV_ERROR(&gpu->pdev->dev,
1028 "BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
1029 gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS),
1030 gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV),
1031 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1032 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
1033 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
1034 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
1035 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
1036 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
1037 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
1038
1039 a8xx_aperture_release(gpu, flags);
1040 a8xx_aperture_clear(gpu);
1041
1042 /* Turn off the hangcheck timer to keep it from bothering us */
1043 timer_delete(&gpu->hangcheck_timer);
1044
1045 kthread_queue_work(gpu->worker, &gpu->recover_work);
1046 }
1047
a8xx_sw_fuse_violation_irq(struct msm_gpu * gpu)1048 static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1049 {
1050 u32 status;
1051
1052 status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS);
1053 gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0);
1054
1055 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1056
1057 /*
1058 * Ignore FASTBLEND violations, because the HW will silently fall back
1059 * to legacy blending.
1060 */
1061 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1062 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1063 timer_delete(&gpu->hangcheck_timer);
1064
1065 kthread_queue_work(gpu->worker, &gpu->recover_work);
1066 }
1067 }
1068
a8xx_irq(struct msm_gpu * gpu)1069 irqreturn_t a8xx_irq(struct msm_gpu *gpu)
1070 {
1071 struct msm_drm_private *priv = gpu->dev->dev_private;
1072 u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS);
1073
1074 gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status);
1075
1076 if (priv->disable_err_irq)
1077 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1078
1079 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1080 a8xx_fault_detect_irq(gpu);
1081
1082 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) {
1083 u32 rl0, rl1;
1084
1085 rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0);
1086 rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1);
1087 dev_err_ratelimited(&gpu->pdev->dev,
1088 "CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1);
1089 }
1090
1091 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1092 a8xx_cp_hw_err_irq(gpu);
1093
1094 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1095 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1096
1097 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1098 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1099
1100 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1101 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1102
1103 if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
1104 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n");
1105
1106 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1107 a8xx_sw_fuse_violation_irq(gpu);
1108
1109 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1110 msm_gpu_retire(gpu);
1111 a6xx_preempt_trigger(gpu);
1112 }
1113
1114 if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
1115 a6xx_preempt_irq(gpu);
1116
1117 return IRQ_HANDLED;
1118 }
1119
a8xx_llc_activate(struct a6xx_gpu * a6xx_gpu)1120 void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1121 {
1122 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1123 struct msm_gpu *gpu = &adreno_gpu->base;
1124
1125 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1126 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1127
1128 gpu_scid &= GENMASK(5, 0);
1129
1130 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
1131 FIELD_PREP(GENMASK(29, 24), gpu_scid) |
1132 FIELD_PREP(GENMASK(23, 18), gpu_scid) |
1133 FIELD_PREP(GENMASK(17, 12), gpu_scid) |
1134 FIELD_PREP(GENMASK(11, 6), gpu_scid) |
1135 FIELD_PREP(GENMASK(5, 0), gpu_scid));
1136
1137 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
1138 FIELD_PREP(GENMASK(27, 22), gpu_scid) |
1139 FIELD_PREP(GENMASK(21, 16), gpu_scid) |
1140 FIELD_PREP(GENMASK(15, 10), gpu_scid) |
1141 BIT(8));
1142 }
1143
1144 llcc_slice_activate(a6xx_gpu->htw_llc_slice);
1145 }
1146
1147 #define GBIF_CLIENT_HALT_MASK BIT(0)
1148 #define GBIF_ARB_HALT_MASK BIT(1)
1149 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0)
1150 #define VBIF_RESET_ACK_MASK 0xF0
1151 #define GPR0_GBIF_HALT_REQUEST 0x1E0
1152
a8xx_bus_clear_pending_transactions(struct adreno_gpu * adreno_gpu,bool gx_off)1153 void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
1154 {
1155 struct msm_gpu *gpu = &adreno_gpu->base;
1156
1157 if (gx_off) {
1158 /* Halt the gx side of GBIF */
1159 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1);
1160 spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1);
1161 }
1162
1163 /* Halt new client requests on GBIF */
1164 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
1165 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1166 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
1167
1168 /* Halt all AXI requests on GBIF */
1169 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
1170 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1171 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
1172
1173 /* The GBIF halt needs to be explicitly cleared */
1174 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
1175 }
1176
a8xx_gmu_get_timestamp(struct msm_gpu * gpu,uint64_t * value)1177 int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1178 {
1179 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1180 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1181
1182 mutex_lock(&a6xx_gpu->gmu.lock);
1183
1184 /* Force the GPU power on so we can read this register */
1185 a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1186
1187 *value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER);
1188
1189 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1190
1191 mutex_unlock(&a6xx_gpu->gmu.lock);
1192
1193 return 0;
1194 }
1195
a8xx_gpu_busy(struct msm_gpu * gpu,unsigned long * out_sample_rate)1196 u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1197 {
1198 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1199 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1200 u64 busy_cycles;
1201
1202 /* 19.2MHz */
1203 *out_sample_rate = 19200000;
1204
1205 busy_cycles = gmu_read64(&a6xx_gpu->gmu,
1206 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
1207 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
1208
1209 return busy_cycles;
1210 }
1211
a8xx_progress(struct msm_gpu * gpu,struct msm_ringbuffer * ring)1212 bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1213 {
1214 return true;
1215 }
1216