xref: /linux/drivers/gpu/drm/msm/adreno/a8xx_gpu.c (revision 3f1c07fc21c68bd3bd2df9d2c9441f6485e934d9)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
3 
4 
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10 
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/firmware/qcom/qcom_scm.h>
14 #include <linux/pm_domain.h>
15 #include <linux/soc/qcom/llcc-qcom.h>
16 
17 #define GPU_PAS_ID 13
18 
a8xx_aperture_slice_set(struct msm_gpu * gpu,enum adreno_pipe pipe,u32 slice)19 static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice)
20 {
21 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
22 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
23 	u32 val;
24 
25 	val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice);
26 
27 	if (a6xx_gpu->cached_aperture == val)
28 		return;
29 
30 	gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val);
31 
32 	a6xx_gpu->cached_aperture = val;
33 }
34 
a8xx_aperture_acquire(struct msm_gpu * gpu,enum adreno_pipe pipe,unsigned long * flags)35 static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags)
36 {
37 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
38 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
39 
40 	spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags);
41 
42 	a8xx_aperture_slice_set(gpu, pipe, 0);
43 }
44 
a8xx_aperture_release(struct msm_gpu * gpu,unsigned long flags)45 static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags)
46 {
47 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
48 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
49 
50 	spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
51 }
52 
a8xx_aperture_clear(struct msm_gpu * gpu)53 static void a8xx_aperture_clear(struct msm_gpu *gpu)
54 {
55 	unsigned long flags;
56 
57 	a8xx_aperture_acquire(gpu, PIPE_NONE, &flags);
58 	a8xx_aperture_release(gpu, flags);
59 }
60 
a8xx_write_pipe(struct msm_gpu * gpu,enum adreno_pipe pipe,u32 offset,u32 data)61 static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data)
62 {
63 	unsigned long flags;
64 
65 	a8xx_aperture_acquire(gpu, pipe, &flags);
66 	gpu_write(gpu, offset, data);
67 	a8xx_aperture_release(gpu, flags);
68 }
69 
a8xx_read_pipe_slice(struct msm_gpu * gpu,enum adreno_pipe pipe,u32 slice,u32 offset)70 static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset)
71 {
72 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
73 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
74 	unsigned long flags;
75 	u32 val;
76 
77 	spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags);
78 	a8xx_aperture_slice_set(gpu, pipe, slice);
79 	val = gpu_read(gpu, offset);
80 	spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
81 
82 	return val;
83 }
84 
a8xx_gpu_get_slice_info(struct msm_gpu * gpu)85 void a8xx_gpu_get_slice_info(struct msm_gpu *gpu)
86 {
87 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
88 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
89 	const struct a6xx_info *info = adreno_gpu->info->a6xx;
90 	u32 slice_mask;
91 
92 	if (adreno_gpu->info->family < ADRENO_8XX_GEN1)
93 		return;
94 
95 	if (a6xx_gpu->slice_mask)
96 		return;
97 
98 	slice_mask = GENMASK(info->max_slices - 1, 0);
99 
100 	/* GEN1 doesn't support partial slice configurations */
101 	if (adreno_gpu->info->family == ADRENO_8XX_GEN1) {
102 		a6xx_gpu->slice_mask = slice_mask;
103 		return;
104 	}
105 
106 	slice_mask &= a6xx_llc_read(a6xx_gpu,
107 			REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL);
108 
109 	a6xx_gpu->slice_mask = slice_mask;
110 
111 	/* Chip ID depends on the number of slices available. So update it */
112 	adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask));
113 }
114 
a8xx_get_first_slice(struct a6xx_gpu * a6xx_gpu)115 static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu)
116 {
117 	return ffs(a6xx_gpu->slice_mask) - 1;
118 }
119 
_a8xx_check_idle(struct msm_gpu * gpu)120 static inline bool _a8xx_check_idle(struct msm_gpu *gpu)
121 {
122 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
123 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
124 
125 	/* Check that the GMU is idle */
126 	if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
127 		return false;
128 
129 	/* Check that the CX master is idle */
130 	if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) &
131 			~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
132 		return false;
133 
134 	return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) &
135 		 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
136 }
137 
a8xx_idle(struct msm_gpu * gpu,struct msm_ringbuffer * ring)138 static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
139 {
140 	/* wait for CP to drain ringbuffer: */
141 	if (!adreno_idle(gpu, ring))
142 		return false;
143 
144 	if (spin_until(_a8xx_check_idle(gpu))) {
145 		DRM_ERROR(
146 			"%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
147 			gpu->name, __builtin_return_address(0),
148 			gpu_read(gpu, REG_A8XX_RBBM_STATUS),
149 			gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS),
150 			gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
151 			gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
152 		return false;
153 	}
154 
155 	return true;
156 }
157 
a8xx_flush(struct msm_gpu * gpu,struct msm_ringbuffer * ring)158 void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
159 {
160 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
161 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
162 	uint32_t wptr;
163 	unsigned long flags;
164 
165 	spin_lock_irqsave(&ring->preempt_lock, flags);
166 
167 	/* Copy the shadow to the actual register */
168 	ring->cur = ring->next;
169 
170 	/* Make sure to wrap wptr if we need to */
171 	wptr = get_wptr(ring);
172 
173 	/* Update HW if this is the current ring and we are not in preempt*/
174 	if (!a6xx_in_preempt(a6xx_gpu)) {
175 		if (a6xx_gpu->cur_ring == ring)
176 			gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
177 		else
178 			ring->restore_wptr = true;
179 	} else {
180 		ring->restore_wptr = true;
181 	}
182 
183 	spin_unlock_irqrestore(&ring->preempt_lock, flags);
184 }
185 
a8xx_set_hwcg(struct msm_gpu * gpu,bool state)186 static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state)
187 {
188 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
189 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
190 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
191 	u32 val;
192 
193 	if (adreno_is_x285(adreno_gpu) && state)
194 		gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702);
195 
196 	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
197 			state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
198 	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
199 			state ? 0x110111 : 0);
200 	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
201 			state ? 0x55555 : 0);
202 
203 	gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
204 	gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state);
205 
206 	if (state) {
207 		gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1);
208 
209 		if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val,
210 				     val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
211 			dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
212 			return;
213 		}
214 
215 		gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
216 	} else {
217 		/*
218 		 * GMU enables clk gating in GBIF during boot up. So,
219 		 * override that here when hwcg feature is disabled
220 		 */
221 		gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0);
222 	}
223 }
224 
a8xx_set_cp_protect(struct msm_gpu * gpu)225 static void a8xx_set_cp_protect(struct msm_gpu *gpu)
226 {
227 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
228 	const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
229 	u32 cntl, final_cfg;
230 	unsigned int i;
231 
232 	cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN |
233 		A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN |
234 		A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE |
235 		A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK;
236 	/*
237 	 * Enable access protection to privileged registers, fault on an access
238 	 * protect violation and select the last span to protect from the start
239 	 * address all the way to the end of the register address space
240 	 */
241 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
242 	a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
243 
244 	a8xx_aperture_clear(gpu);
245 
246 	for (i = 0; i < protect->count; i++) {
247 		/* Intentionally skip writing to some registers */
248 		if (protect->regs[i]) {
249 			gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]);
250 			final_cfg = protect->regs[i];
251 		}
252 	}
253 
254 	/*
255 	 * Last span feature is only supported on PIPE specific register.
256 	 * So update those here
257 	 */
258 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg);
259 	a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg);
260 
261 	a8xx_aperture_clear(gpu);
262 }
263 
a8xx_set_ubwc_config(struct msm_gpu * gpu)264 static void a8xx_set_ubwc_config(struct msm_gpu *gpu)
265 {
266 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
267 	const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
268 	u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2);
269 	u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3);
270 	bool rgba8888_lossless = false, fp16compoptdis = false;
271 	bool yuvnotcomptofc = false, min_acc_len_64b = false;
272 	bool rgb565_predicator = false, amsbc = false;
273 	bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
274 	u32 ubwc_version = cfg->ubwc_enc_version;
275 	u32 hbb, hbb_hi, hbb_lo, mode = 1;
276 	u8 uavflagprd_inv = 2;
277 
278 	switch (ubwc_version) {
279 	case UBWC_5_0:
280 		amsbc = true;
281 		rgb565_predicator = true;
282 		mode = 4;
283 		break;
284 	case UBWC_4_0:
285 		amsbc = true;
286 		rgb565_predicator = true;
287 		fp16compoptdis = true;
288 		rgba8888_lossless = true;
289 		mode = 2;
290 		break;
291 	case UBWC_3_0:
292 		amsbc = true;
293 		mode = 1;
294 		break;
295 	default:
296 		dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version);
297 		break;
298 	}
299 
300 	/*
301 	 * We subtract 13 from the highest bank bit (13 is the minimum value
302 	 * allowed by hw) and write the lowest two bits of the remaining value
303 	 * as hbb_lo and the one above it as hbb_hi to the hardware.
304 	 */
305 	WARN_ON(cfg->highest_bank_bit < 13);
306 	hbb = cfg->highest_bank_bit - 13;
307 	hbb_hi = hbb >> 2;
308 	hbb_lo = hbb & 3;
309 	a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5);
310 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5);
311 
312 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL,
313 			yuvnotcomptofc << 6 |
314 			hbb_hi << 3 |
315 			hbb_lo << 1);
316 
317 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL,
318 			mode << 15 |
319 			yuvnotcomptofc << 6 |
320 			rgba8888_lossless << 4 |
321 			fp16compoptdis << 3 |
322 			rgb565_predicator << 2 |
323 			amsbc << 1 |
324 			min_acc_len_64b);
325 
326 	a8xx_aperture_clear(gpu);
327 
328 	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
329 		  level3_swizzling_dis << 13 |
330 		  level2_swizzling_dis << 12 |
331 		  hbb_hi << 10 |
332 		  uavflagprd_inv << 4 |
333 		  min_acc_len_64b << 3 |
334 		  hbb_lo << 1 | ubwc_mode);
335 
336 	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
337 		  level3_swizzling_dis << 7 |
338 		  level2_swizzling_dis << 6 |
339 		  hbb_hi << 4 |
340 		  min_acc_len_64b << 3 |
341 		  hbb_lo << 1 | ubwc_mode);
342 }
343 
a8xx_nonctxt_config(struct msm_gpu * gpu,u32 * gmem_protect)344 static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect)
345 {
346 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
347 	const struct a6xx_info *info = adreno_gpu->info->a6xx;
348 	const struct adreno_reglist_pipe *regs = info->nonctxt_reglist;
349 	unsigned int pipe_id, i;
350 	unsigned long flags;
351 
352 	for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
353 		/* We don't have support for LPAC yet */
354 		if (pipe_id == PIPE_LPAC)
355 			continue;
356 
357 		a8xx_aperture_acquire(gpu, pipe_id, &flags);
358 
359 		for (i = 0; regs[i].offset; i++) {
360 			if (!(BIT(pipe_id) & regs[i].pipe))
361 				continue;
362 
363 			if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT)
364 				*gmem_protect = regs[i].value;
365 
366 			gpu_write(gpu, regs[i].offset, regs[i].value);
367 		}
368 
369 		a8xx_aperture_release(gpu, flags);
370 	}
371 
372 	a8xx_aperture_clear(gpu);
373 }
374 
a8xx_cp_init(struct msm_gpu * gpu)375 static int a8xx_cp_init(struct msm_gpu *gpu)
376 {
377 	struct msm_ringbuffer *ring = gpu->rb[0];
378 	u32 mask;
379 
380 	/* Disable concurrent binning before sending CP init */
381 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
382 	OUT_RING(ring, BIT(27));
383 
384 	OUT_PKT7(ring, CP_ME_INIT, 4);
385 
386 	/* Use multiple HW contexts */
387 	mask = BIT(0);
388 
389 	/* Enable error detection */
390 	mask |= BIT(1);
391 
392 	/* Set default reset state */
393 	mask |= BIT(3);
394 
395 	/* Disable save/restore of performance counters across preemption */
396 	mask |= BIT(6);
397 
398 	OUT_RING(ring, mask);
399 
400 	/* Enable multiple hardware contexts */
401 	OUT_RING(ring, 0x00000003);
402 
403 	/* Enable error detection */
404 	OUT_RING(ring, 0x20000000);
405 
406 	/* Operation mode mask */
407 	OUT_RING(ring, 0x00000002);
408 
409 	a6xx_flush(gpu, ring);
410 	return a8xx_idle(gpu, ring) ? 0 : -EINVAL;
411 }
412 
413 #define A8XX_INT_MASK \
414 	(A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
415 	 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
416 	 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
417 	 A6XX_RBBM_INT_0_MASK_CP_SW | \
418 	 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
419 	 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
420 	 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
421 	 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
422 	 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
423 	 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
424 	 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
425 	 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
426 	 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
427 	 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
428 
429 #define A8XX_APRIV_MASK \
430 	(A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \
431 	 A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \
432 	 A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \
433 	 A8XX_CP_APRIV_CNTL_PIPE_RBRPWB)
434 
435 #define A8XX_BR_APRIV_MASK \
436 	(A8XX_APRIV_MASK | \
437 	 A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \
438 	 A8XX_CP_APRIV_CNTL_PIPE_CDWRITE)
439 
440 #define A8XX_CP_GLOBAL_INT_MASK \
441 	(A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \
442 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \
443 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \
444 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \
445 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \
446 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \
447 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \
448 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \
449 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \
450 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \
451 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \
452 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \
453 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \
454 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV)
455 
456 #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \
457 	(A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \
458 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \
459 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \
460 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \
461 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \
462 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \
463 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \
464 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \
465 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \
466 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \
467 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \
468 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \
469 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \
470 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \
471 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \
472 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \
473 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \
474 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \
475 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \
476 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS)
477 
478 #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \
479 	(A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \
480 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \
481 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \
482 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \
483 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \
484 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \
485 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \
486 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \
487 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \
488 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR)
489 
hw_init(struct msm_gpu * gpu)490 static int hw_init(struct msm_gpu *gpu)
491 {
492 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
493 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
494 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
495 	unsigned int pipe_id, i;
496 	u32 gmem_protect = 0;
497 	u64 gmem_range_min;
498 	int ret;
499 
500 	ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
501 	if (ret)
502 		return ret;
503 
504 	/* Clear the cached value to force aperture configuration next time */
505 	a6xx_gpu->cached_aperture = UINT_MAX;
506 	a8xx_aperture_clear(gpu);
507 
508 	/* Clear GBIF halt in case GX domain was not collapsed */
509 	gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
510 	gpu_read(gpu, REG_A6XX_GBIF_HALT);
511 
512 	gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0);
513 	gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT);
514 
515 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
516 
517 	/*
518 	 * Disable the trusted memory range - we don't actually supported secure
519 	 * memory rendering at this point in time and we don't want to block off
520 	 * part of the virtual memory space.
521 	 */
522 	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
523 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
524 
525 	/* Make all blocks contribute to the GPU BUSY perf counter */
526 	gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
527 
528 	/* Setup GMEM Range in UCHE */
529 	gmem_range_min = SZ_64M;
530 	/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
531 	gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min);
532 	gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min);
533 
534 	/* Setup UCHE Trap region */
535 	gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
536 	gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
537 	gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
538 	gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
539 
540 	/* Turn on performance counters */
541 	gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1);
542 	gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1);
543 
544 	/* Turn on the IFPC counter (countable 4 on XOCLK1) */
545 	gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1,
546 		  FIELD_PREP(GENMASK(7, 0), 0x4));
547 
548 	/* Select CP0 to always count cycles */
549 	gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1);
550 
551 	a8xx_set_ubwc_config(gpu);
552 
553 	/* Set weights for bicubic filtering */
554 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
555 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4);
556 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee);
557 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed);
558 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0);
559 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000);
560 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8);
561 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc);
562 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb);
563 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0);
564 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b);
565 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d);
566 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412);
567 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a);
568 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05);
569 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e);
570 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001);
571 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa);
572 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7);
573 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7);
574 
575 	gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
576 
577 	a8xx_nonctxt_config(gpu, &gmem_protect);
578 
579 	/* Enable fault detection */
580 	gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff);
581 	gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30));
582 
583 	/* Set up the CX GMU counter 0 to count busy ticks */
584 	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
585 
586 	/* Enable the power counter */
587 	gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5));
588 	gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
589 
590 	/* Protect registers from the CP */
591 	a8xx_set_cp_protect(gpu);
592 
593 	/* Enable the GMEM save/restore feature for preemption */
594 	a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1);
595 
596 	for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) {
597 		u32 apriv_mask = A8XX_APRIV_MASK;
598 		unsigned long flags;
599 
600 		if (pipe_id == PIPE_LPAC)
601 			continue;
602 
603 		if (pipe_id == PIPE_BR)
604 			apriv_mask = A8XX_BR_APRIV_MASK;
605 
606 		a8xx_aperture_acquire(gpu, pipe_id, &flags);
607 		gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask);
608 		gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE,
609 				A8XX_CP_INTERRUPT_STATUS_MASK_PIPE);
610 		gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE,
611 				A8XX_CP_HW_FAULT_STATUS_MASK_PIPE);
612 		a8xx_aperture_release(gpu, flags);
613 	}
614 
615 	a8xx_aperture_clear(gpu);
616 
617 	/* Enable interrupts */
618 	gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK);
619 	gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK);
620 
621 	ret = adreno_hw_init(gpu);
622 	if (ret)
623 		goto out;
624 
625 	gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
626 	if (a6xx_gpu->aqe_iova)
627 		gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova);
628 
629 	/* Set the ringbuffer address */
630 	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
631 	gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
632 
633 	/* Configure the RPTR shadow if needed: */
634 	gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0]));
635 	gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr));
636 
637 	for (i = 0; i < gpu->nr_rings; i++)
638 		a6xx_gpu->shadow[i] = 0;
639 
640 	/* Always come up on rb 0 */
641 	a6xx_gpu->cur_ring = gpu->rb[0];
642 
643 	for (i = 0; i < gpu->nr_rings; i++)
644 		gpu->rb[i]->cur_ctx_seqno = 0;
645 
646 	/* Enable the SQE_to start the CP engine */
647 	gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1);
648 
649 	ret = a8xx_cp_init(gpu);
650 	if (ret)
651 		goto out;
652 
653 	/*
654 	 * Try to load a zap shader into the secure world. If successful
655 	 * we can use the CP to switch out of secure mode. If not then we
656 	 * have no resource but to try to switch ourselves out manually. If we
657 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
658 	 * be blocked and a permissions violation will soon follow.
659 	 */
660 	ret = a6xx_zap_shader_init(gpu);
661 	if (!ret) {
662 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
663 		OUT_RING(gpu->rb[0], 0x00000000);
664 
665 		a6xx_flush(gpu, gpu->rb[0]);
666 		if (!a8xx_idle(gpu, gpu->rb[0]))
667 			return -EINVAL;
668 	} else if (ret == -ENODEV) {
669 		/*
670 		 * This device does not use zap shader (but print a warning
671 		 * just in case someone got their dt wrong.. hopefully they
672 		 * have a debug UART to realize the error of their ways...
673 		 * if you mess this up you are about to crash horribly)
674 		 */
675 		dev_warn_once(gpu->dev->dev,
676 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
677 		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
678 		ret = 0;
679 	} else {
680 		return ret;
681 	}
682 
683 	/*
684 	 * GMEM_PROTECT register should be programmed after GPU is transitioned to
685 	 * non-secure mode
686 	 */
687 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect);
688 	WARN_ON(!gmem_protect);
689 	a8xx_aperture_clear(gpu);
690 
691 	/* Enable hardware clockgating */
692 	a8xx_set_hwcg(gpu, true);
693 out:
694 	/*
695 	 * Tell the GMU that we are done touching the GPU and it can start power
696 	 * management
697 	 */
698 	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
699 
700 	return ret;
701 }
702 
a8xx_hw_init(struct msm_gpu * gpu)703 int a8xx_hw_init(struct msm_gpu *gpu)
704 {
705 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
706 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
707 	int ret;
708 
709 	mutex_lock(&a6xx_gpu->gmu.lock);
710 	ret = hw_init(gpu);
711 	mutex_unlock(&a6xx_gpu->gmu.lock);
712 
713 	return ret;
714 }
715 
a8xx_dump(struct msm_gpu * gpu)716 static void a8xx_dump(struct msm_gpu *gpu)
717 {
718 	DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS));
719 	adreno_dump(gpu);
720 }
721 
a8xx_recover(struct msm_gpu * gpu)722 void a8xx_recover(struct msm_gpu *gpu)
723 {
724 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
725 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
726 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
727 	int active_submits;
728 
729 	adreno_dump_info(gpu);
730 
731 	if (hang_debug)
732 		a8xx_dump(gpu);
733 
734 	/*
735 	 * To handle recovery specific sequences during the rpm suspend we are
736 	 * about to trigger
737 	 */
738 	a6xx_gpu->hung = true;
739 
740 	/* Halt SQE first */
741 	gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3);
742 
743 	pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
744 
745 	/* active_submit won't change until we make a submission */
746 	mutex_lock(&gpu->active_lock);
747 	active_submits = gpu->active_submits;
748 
749 	/*
750 	 * Temporarily clear active_submits count to silence a WARN() in the
751 	 * runtime suspend cb
752 	 */
753 	gpu->active_submits = 0;
754 
755 	reinit_completion(&gmu->pd_gate);
756 	dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
757 	dev_pm_genpd_synced_poweroff(gmu->cxpd);
758 
759 	/* Drop the rpm refcount from active submits */
760 	if (active_submits)
761 		pm_runtime_put(&gpu->pdev->dev);
762 
763 	/* And the final one from recover worker */
764 	pm_runtime_put_sync(&gpu->pdev->dev);
765 
766 	if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
767 		DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
768 
769 	dev_pm_genpd_remove_notifier(gmu->cxpd);
770 
771 	pm_runtime_use_autosuspend(&gpu->pdev->dev);
772 
773 	if (active_submits)
774 		pm_runtime_get(&gpu->pdev->dev);
775 
776 	pm_runtime_get_sync(&gpu->pdev->dev);
777 
778 	gpu->active_submits = active_submits;
779 	mutex_unlock(&gpu->active_lock);
780 
781 	msm_gpu_hw_init(gpu);
782 	a6xx_gpu->hung = false;
783 }
784 
a8xx_uche_fault_block(struct msm_gpu * gpu,u32 mid)785 static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
786 {
787 	static const char * const uche_clients[] = {
788 		"BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
789 		"BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
790 		"STCHE",
791 	};
792 	static const char * const uche_clients_lpac[] = {
793 		"-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC",
794 	};
795 	u32 val;
796 
797 	/*
798 	 * The source of the data depends on the mid ID read from FSYNR1.
799 	 * and the client ID read from the UCHE block
800 	 */
801 	val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF);
802 
803 	val &= GENMASK(6, 0);
804 
805 	/* mid=3 refers to BR or BV */
806 	if (mid == 3) {
807 		if (val < ARRAY_SIZE(uche_clients))
808 			return uche_clients[val];
809 		else
810 			return "UCHE";
811 	}
812 
813 	/* mid=8 refers to LPAC */
814 	if (mid == 8) {
815 		if (val < ARRAY_SIZE(uche_clients_lpac))
816 			return uche_clients_lpac[val];
817 		else
818 			return "UCHE_LPAC";
819 	}
820 
821 	return "Unknown";
822 }
823 
a8xx_fault_block(struct msm_gpu * gpu,u32 id)824 static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id)
825 {
826 	switch (id) {
827 	case 0x0:
828 		return "CP";
829 	case 0x1:
830 		return "UCHE: Unknown";
831 	case 0x2:
832 		return "UCHE_LPAC: Unknown";
833 	case 0x3:
834 	case 0x8:
835 		return a8xx_uche_fault_block(gpu, id);
836 	case 0x4:
837 		return "CCU";
838 	case 0x5:
839 		return "Flag cache";
840 	case 0x6:
841 		return "PREFETCH";
842 	case 0x7:
843 		return "GMU";
844 	case 0x9:
845 		return "UCHE_HPAC";
846 	}
847 
848 	return "Unknown";
849 }
850 
a8xx_fault_handler(void * arg,unsigned long iova,int flags,void * data)851 int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
852 {
853 	struct msm_gpu *gpu = arg;
854 	struct adreno_smmu_fault_info *info = data;
855 	const char *block = "unknown";
856 
857 	u32 scratch[] = {
858 			gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)),
859 			gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)),
860 			gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)),
861 			gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)),
862 	};
863 
864 	if (info)
865 		block = a8xx_fault_block(gpu, info->fsynr1 & 0xff);
866 
867 	return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
868 }
869 
a8xx_cp_hw_err_irq(struct msm_gpu * gpu)870 static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu)
871 {
872 	u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL);
873 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
874 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
875 	u32 slice = a8xx_get_first_slice(a6xx_gpu);
876 	u32 hw_fault_mask = GENMASK(6, 0);
877 	u32 sw_fault_mask = GENMASK(22, 16);
878 	u32 pipe = 0;
879 
880 	dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status);
881 
882 	if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR |
883 		      A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR))
884 		pipe |= BIT(PIPE_BR);
885 
886 	if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV |
887 		      A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV))
888 		pipe |= BIT(PIPE_BV);
889 
890 	if (!pipe) {
891 		dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n");
892 		goto out;
893 	}
894 
895 	for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
896 		if (!(BIT(pipe_id) & pipe))
897 			continue;
898 
899 		if (hw_fault_mask & status) {
900 			status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
901 					REG_A8XX_CP_HW_FAULT_STATUS_PIPE);
902 			dev_err_ratelimited(&gpu->pdev->dev,
903 					"CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
904 		}
905 
906 		if (sw_fault_mask & status) {
907 			status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
908 					REG_A8XX_CP_INTERRUPT_STATUS_PIPE);
909 			dev_err_ratelimited(&gpu->pdev->dev,
910 					"CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
911 
912 			if (status & BIT(8)) {
913 				a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1);
914 				status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
915 						REG_A8XX_CP_SQE_STAT_DATA_PIPE);
916 				dev_err_ratelimited(&gpu->pdev->dev,
917 						"CP Opcode error, opcode=0x%x\n", status);
918 			}
919 
920 			if (status & BIT(10)) {
921 				status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
922 						REG_A8XX_CP_PROTECT_STATUS_PIPE);
923 				dev_err_ratelimited(&gpu->pdev->dev,
924 						"CP REG PROTECT error, status=0x%x\n", status);
925 			}
926 		}
927 	}
928 
929 out:
930 	/* Turn off interrupts to avoid triggering recovery again */
931 	a8xx_aperture_clear(gpu);
932 	gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0);
933 	gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0);
934 
935 	kthread_queue_work(gpu->worker, &gpu->recover_work);
936 }
937 
gpu_periph_read(struct msm_gpu * gpu,u32 dbg_offset)938 static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset)
939 {
940 	gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset);
941 
942 	return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE);
943 }
944 
gpu_periph_read64(struct msm_gpu * gpu,u32 dbg_offset)945 static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset)
946 {
947 	u64 lo, hi;
948 
949 	lo = gpu_periph_read(gpu, dbg_offset);
950 	hi = gpu_periph_read(gpu, dbg_offset + 1);
951 
952 	return (hi << 32) | lo;
953 }
954 
955 #define CP_PERIPH_IB1_BASE_LO   0x7005
956 #define CP_PERIPH_IB1_BASE_HI   0x7006
957 #define CP_PERIPH_IB1_SIZE      0x7007
958 #define CP_PERIPH_IB1_OFFSET    0x7008
959 #define CP_PERIPH_IB2_BASE_LO   0x7009
960 #define CP_PERIPH_IB2_BASE_HI   0x700a
961 #define CP_PERIPH_IB2_SIZE      0x700b
962 #define CP_PERIPH_IB2_OFFSET    0x700c
963 #define CP_PERIPH_IB3_BASE_LO   0x700d
964 #define CP_PERIPH_IB3_BASE_HI   0x700e
965 #define CP_PERIPH_IB3_SIZE      0x700f
966 #define CP_PERIPH_IB3_OFFSET    0x7010
967 
a8xx_fault_detect_irq(struct msm_gpu * gpu)968 static void a8xx_fault_detect_irq(struct msm_gpu *gpu)
969 {
970 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
971 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
972 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
973 	unsigned long flags;
974 
975 	/*
976 	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
977 	 * but the fault handler will trigger the devcore dump, and we want
978 	 * to otherwise resume normally rather than killing the submit, so
979 	 * just bail.
980 	 */
981 	if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT)
982 		return;
983 
984 	/*
985 	 * Force the GPU to stay on until after we finish
986 	 * collecting information
987 	 */
988 	if (!adreno_has_gmu_wrapper(adreno_gpu))
989 		gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
990 
991 	DRM_DEV_ERROR(&gpu->pdev->dev,
992 		"gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n",
993 		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
994 		gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS));
995 
996 	a8xx_aperture_acquire(gpu, PIPE_BR, &flags);
997 
998 	DRM_DEV_ERROR(&gpu->pdev->dev,
999 		"BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
1000 		gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS),
1001 		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1002 		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1003 		gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
1004 		gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
1005 		gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
1006 		gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
1007 		gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
1008 		gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
1009 
1010 	a8xx_aperture_release(gpu, flags);
1011 	a8xx_aperture_acquire(gpu, PIPE_BV, &flags);
1012 
1013 	DRM_DEV_ERROR(&gpu->pdev->dev,
1014 		"BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
1015 		gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS),
1016 		gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV),
1017 		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1018 		gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
1019 		gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
1020 		gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
1021 		gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
1022 		gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
1023 		gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
1024 
1025 	a8xx_aperture_release(gpu, flags);
1026 	a8xx_aperture_clear(gpu);
1027 
1028 	/* Turn off the hangcheck timer to keep it from bothering us */
1029 	timer_delete(&gpu->hangcheck_timer);
1030 
1031 	kthread_queue_work(gpu->worker, &gpu->recover_work);
1032 }
1033 
a8xx_sw_fuse_violation_irq(struct msm_gpu * gpu)1034 static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1035 {
1036 	u32 status;
1037 
1038 	status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS);
1039 	gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0);
1040 
1041 	dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1042 
1043 	/*
1044 	 * Ignore FASTBLEND violations, because the HW will silently fall back
1045 	 * to legacy blending.
1046 	 */
1047 	if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1048 		      A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1049 		timer_delete(&gpu->hangcheck_timer);
1050 
1051 		kthread_queue_work(gpu->worker, &gpu->recover_work);
1052 	}
1053 }
1054 
a8xx_irq(struct msm_gpu * gpu)1055 irqreturn_t a8xx_irq(struct msm_gpu *gpu)
1056 {
1057 	struct msm_drm_private *priv = gpu->dev->dev_private;
1058 	u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS);
1059 
1060 	gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status);
1061 
1062 	if (priv->disable_err_irq)
1063 		status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1064 
1065 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1066 		a8xx_fault_detect_irq(gpu);
1067 
1068 	if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) {
1069 		u32 rl0, rl1;
1070 
1071 		rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0);
1072 		rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1);
1073 		dev_err_ratelimited(&gpu->pdev->dev,
1074 				"CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1);
1075 	}
1076 
1077 	if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1078 		a8xx_cp_hw_err_irq(gpu);
1079 
1080 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1081 		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1082 
1083 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1084 		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1085 
1086 	if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1087 		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1088 
1089 	if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
1090 		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n");
1091 
1092 	if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1093 		a8xx_sw_fuse_violation_irq(gpu);
1094 
1095 	if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1096 		msm_gpu_retire(gpu);
1097 		a6xx_preempt_trigger(gpu);
1098 	}
1099 
1100 	if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
1101 		a6xx_preempt_irq(gpu);
1102 
1103 	return IRQ_HANDLED;
1104 }
1105 
a8xx_llc_activate(struct a6xx_gpu * a6xx_gpu)1106 void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1107 {
1108 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1109 	struct msm_gpu *gpu = &adreno_gpu->base;
1110 
1111 	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1112 		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1113 
1114 		gpu_scid &= GENMASK(5, 0);
1115 
1116 		gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
1117 			  FIELD_PREP(GENMASK(29, 24), gpu_scid) |
1118 			  FIELD_PREP(GENMASK(23, 18), gpu_scid) |
1119 			  FIELD_PREP(GENMASK(17, 12), gpu_scid) |
1120 			  FIELD_PREP(GENMASK(11, 6), gpu_scid)  |
1121 			  FIELD_PREP(GENMASK(5, 0), gpu_scid));
1122 
1123 		gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
1124 			  FIELD_PREP(GENMASK(27, 22), gpu_scid) |
1125 			  FIELD_PREP(GENMASK(21, 16), gpu_scid) |
1126 			  FIELD_PREP(GENMASK(15, 10), gpu_scid) |
1127 			  BIT(8));
1128 	}
1129 
1130 	llcc_slice_activate(a6xx_gpu->htw_llc_slice);
1131 }
1132 
1133 #define GBIF_CLIENT_HALT_MASK		BIT(0)
1134 #define GBIF_ARB_HALT_MASK		BIT(1)
1135 #define VBIF_XIN_HALT_CTRL0_MASK	GENMASK(3, 0)
1136 #define VBIF_RESET_ACK_MASK		0xF0
1137 #define GPR0_GBIF_HALT_REQUEST		0x1E0
1138 
a8xx_bus_clear_pending_transactions(struct adreno_gpu * adreno_gpu,bool gx_off)1139 void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
1140 {
1141 	struct msm_gpu *gpu = &adreno_gpu->base;
1142 
1143 	if (gx_off) {
1144 		/* Halt the gx side of GBIF */
1145 		gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1);
1146 		spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1);
1147 	}
1148 
1149 	/* Halt new client requests on GBIF */
1150 	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
1151 	spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1152 			(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
1153 
1154 	/* Halt all AXI requests on GBIF */
1155 	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
1156 	spin_until((gpu_read(gpu,  REG_A6XX_GBIF_HALT_ACK) &
1157 			(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
1158 
1159 	/* The GBIF halt needs to be explicitly cleared */
1160 	gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
1161 }
1162 
a8xx_gmu_get_timestamp(struct msm_gpu * gpu,uint64_t * value)1163 int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1164 {
1165 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1166 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1167 
1168 	mutex_lock(&a6xx_gpu->gmu.lock);
1169 
1170 	/* Force the GPU power on so we can read this register */
1171 	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1172 
1173 	*value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER);
1174 
1175 	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1176 
1177 	mutex_unlock(&a6xx_gpu->gmu.lock);
1178 
1179 	return 0;
1180 }
1181 
a8xx_gpu_busy(struct msm_gpu * gpu,unsigned long * out_sample_rate)1182 u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1183 {
1184 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1185 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1186 	u64 busy_cycles;
1187 
1188 	/* 19.2MHz */
1189 	*out_sample_rate = 19200000;
1190 
1191 	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
1192 			REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
1193 			REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
1194 
1195 	return busy_cycles;
1196 }
1197 
a8xx_progress(struct msm_gpu * gpu,struct msm_ringbuffer * ring)1198 bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1199 {
1200 	return true;
1201 }
1202