xref: /linux/drivers/gpu/drm/msm/adreno/a8xx_gpu.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
3 
4 
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10 
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/firmware/qcom/qcom_scm.h>
14 #include <linux/pm_domain.h>
15 #include <linux/soc/qcom/llcc-qcom.h>
16 
17 #define GPU_PAS_ID 13
18 
19 static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice)
20 {
21 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
22 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
23 	u32 val;
24 
25 	val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice);
26 
27 	if (a6xx_gpu->cached_aperture == val)
28 		return;
29 
30 	gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val);
31 
32 	a6xx_gpu->cached_aperture = val;
33 }
34 
35 static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags)
36 {
37 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
38 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
39 
40 	spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags);
41 
42 	a8xx_aperture_slice_set(gpu, pipe, 0);
43 }
44 
45 static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags)
46 {
47 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
48 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
49 
50 	spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
51 }
52 
53 static void a8xx_aperture_clear(struct msm_gpu *gpu)
54 {
55 	unsigned long flags;
56 
57 	a8xx_aperture_acquire(gpu, PIPE_NONE, &flags);
58 	a8xx_aperture_release(gpu, flags);
59 }
60 
61 static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data)
62 {
63 	unsigned long flags;
64 
65 	a8xx_aperture_acquire(gpu, pipe, &flags);
66 	gpu_write(gpu, offset, data);
67 	a8xx_aperture_release(gpu, flags);
68 }
69 
70 static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset)
71 {
72 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
73 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
74 	unsigned long flags;
75 	u32 val;
76 
77 	spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags);
78 	a8xx_aperture_slice_set(gpu, pipe, slice);
79 	val = gpu_read(gpu, offset);
80 	spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
81 
82 	return val;
83 }
84 
85 void a8xx_gpu_get_slice_info(struct msm_gpu *gpu)
86 {
87 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
88 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
89 	const struct a6xx_info *info = adreno_gpu->info->a6xx;
90 	struct device *dev = &gpu->pdev->dev;
91 	u32 slice_mask;
92 
93 	if (adreno_gpu->info->family < ADRENO_8XX_GEN1)
94 		return;
95 
96 	if (a6xx_gpu->slice_mask)
97 		return;
98 
99 	slice_mask = GENMASK(info->max_slices - 1, 0);
100 
101 	/* GEN1 doesn't support partial slice configurations */
102 	if (adreno_gpu->info->family == ADRENO_8XX_GEN1) {
103 		a6xx_gpu->slice_mask = slice_mask;
104 		return;
105 	}
106 
107 	slice_mask &= a6xx_cx_misc_read(a6xx_gpu,
108 			REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL);
109 
110 	a6xx_gpu->slice_mask = slice_mask;
111 
112 	/* Chip ID depends on the number of slices available. So update it */
113 	adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask));
114 
115 	/* Update the gpu-name to reflect the slice config: */
116 	const char *name = devm_kasprintf(dev, GFP_KERNEL,
117 			"%"ADRENO_CHIPID_FMT,
118 			ADRENO_CHIPID_ARGS(adreno_gpu->chip_id));
119 	if (name) {
120 		devm_kfree(dev, adreno_gpu->base.name);
121 		adreno_gpu->base.name = name;
122 	}
123 }
124 
125 static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu)
126 {
127 	return ffs(a6xx_gpu->slice_mask) - 1;
128 }
129 
130 static inline bool _a8xx_check_idle(struct msm_gpu *gpu)
131 {
132 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
133 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
134 
135 	/* Check that the GMU is idle */
136 	if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
137 		return false;
138 
139 	/* Check that the CX master is idle */
140 	if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) &
141 			~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
142 		return false;
143 
144 	return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) &
145 		 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
146 }
147 
148 static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
149 {
150 	/* wait for CP to drain ringbuffer: */
151 	if (!adreno_idle(gpu, ring))
152 		return false;
153 
154 	if (spin_until(_a8xx_check_idle(gpu))) {
155 		DRM_ERROR(
156 			"%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
157 			gpu->name, __builtin_return_address(0),
158 			gpu_read(gpu, REG_A8XX_RBBM_STATUS),
159 			gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS),
160 			gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
161 			gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
162 		return false;
163 	}
164 
165 	return true;
166 }
167 
168 void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
169 {
170 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
171 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
172 	uint32_t wptr;
173 	unsigned long flags;
174 
175 	spin_lock_irqsave(&ring->preempt_lock, flags);
176 
177 	/* Copy the shadow to the actual register */
178 	ring->cur = ring->next;
179 
180 	/* Make sure to wrap wptr if we need to */
181 	wptr = get_wptr(ring);
182 
183 	/* Update HW if this is the current ring and we are not in preempt*/
184 	if (!a6xx_in_preempt(a6xx_gpu)) {
185 		if (a6xx_gpu->cur_ring == ring)
186 			a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
187 		else
188 			ring->restore_wptr = true;
189 	} else {
190 		ring->restore_wptr = true;
191 	}
192 
193 	spin_unlock_irqrestore(&ring->preempt_lock, flags);
194 }
195 
196 static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state)
197 {
198 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
199 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
200 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
201 	u32 val;
202 
203 	if (adreno_is_x285(adreno_gpu) && state)
204 		gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702);
205 
206 	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
207 			state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
208 	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
209 			state ? 0x110111 : 0);
210 	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
211 			state ? 0x55555 : 0);
212 
213 	gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
214 	gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state);
215 
216 	if (state) {
217 		gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1);
218 
219 		if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val,
220 				     val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
221 			dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
222 			return;
223 		}
224 
225 		gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
226 	} else {
227 		/*
228 		 * GMU enables clk gating in GBIF during boot up. So,
229 		 * override that here when hwcg feature is disabled
230 		 */
231 		gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0);
232 	}
233 }
234 
235 static void a8xx_set_cp_protect(struct msm_gpu *gpu)
236 {
237 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
238 	const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
239 	u32 cntl, final_cfg;
240 	unsigned int i;
241 
242 	cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN |
243 		A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN |
244 		A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE |
245 		A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK;
246 	/*
247 	 * Enable access protection to privileged registers, fault on an access
248 	 * protect violation and select the last span to protect from the start
249 	 * address all the way to the end of the register address space
250 	 */
251 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
252 	a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
253 
254 	a8xx_aperture_clear(gpu);
255 
256 	for (i = 0; i < protect->count; i++) {
257 		/* Intentionally skip writing to some registers */
258 		if (protect->regs[i]) {
259 			gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]);
260 			final_cfg = protect->regs[i];
261 		}
262 	}
263 
264 	/*
265 	 * Last span feature is only supported on PIPE specific register.
266 	 * So update those here
267 	 */
268 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(15), final_cfg);
269 	a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(15), final_cfg);
270 
271 	a8xx_aperture_clear(gpu);
272 }
273 
274 static void a8xx_set_ubwc_config(struct msm_gpu *gpu)
275 {
276 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
277 	const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
278 	u32 level2_swizzling_dis = !(qcom_ubwc_swizzle(cfg) & UBWC_SWIZZLE_ENABLE_LVL2);
279 	u32 level3_swizzling_dis = !(qcom_ubwc_swizzle(cfg) & UBWC_SWIZZLE_ENABLE_LVL3);
280 	bool rgba8888_lossless = false, fp16compoptdis = false;
281 	bool yuvnotcomptofc = false, min_acc_len_64b = false;
282 	bool rgb565_predicator = false;
283 	bool amsbc = qcom_ubwc_enable_amsbc(cfg);
284 	bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
285 	u32 ubwc_version = cfg->ubwc_enc_version;
286 	u32 hbb, hbb_hi, hbb_lo, mode;
287 	u8 uavflagprd_inv = 2;
288 
289 	if (ubwc_version > UBWC_6_0)
290 		dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version);
291 
292 	if (ubwc_version == UBWC_6_0)
293 		yuvnotcomptofc = true;
294 
295 	if (ubwc_version < UBWC_5_0 &&
296 	    ubwc_version >= UBWC_4_0)
297 		rgba8888_lossless = true;
298 
299 	if (ubwc_version < UBWC_4_3)
300 		fp16compoptdis = true;
301 
302 	if (cfg->ubwc_enc_version >= UBWC_4_0)
303 		rgb565_predicator = true;
304 
305 	if (ubwc_version < UBWC_3_0)
306 		dev_err(&gpu->pdev->dev, "Unsupported UBWC version: 0x%x\n", ubwc_version);
307 
308 	mode = qcom_ubwc_version_tag(cfg);
309 
310 	/*
311 	 * We subtract 13 from the highest bank bit (13 is the minimum value
312 	 * allowed by hw) and write the lowest two bits of the remaining value
313 	 * as hbb_lo and the one above it as hbb_hi to the hardware.
314 	 */
315 	WARN_ON(cfg->highest_bank_bit < 13);
316 	hbb = cfg->highest_bank_bit - 13;
317 	hbb_hi = hbb >> 2;
318 	hbb_lo = hbb & 3;
319 
320 	a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL,
321 			hbb << 5 |
322 			level3_swizzling_dis << 4 |
323 			level2_swizzling_dis << 3);
324 
325 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL,
326 			hbb << 5 |
327 			level3_swizzling_dis << 4 |
328 			level2_swizzling_dis << 3);
329 
330 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL,
331 			yuvnotcomptofc << 6 |
332 			level3_swizzling_dis << 5 |
333 			level2_swizzling_dis << 4 |
334 			hbb_hi << 3 |
335 			hbb_lo << 1);
336 
337 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL,
338 			mode << 15 |
339 			yuvnotcomptofc << 6 |
340 			rgba8888_lossless << 4 |
341 			fp16compoptdis << 3 |
342 			rgb565_predicator << 2 |
343 			amsbc << 1 |
344 			min_acc_len_64b);
345 
346 	a8xx_aperture_clear(gpu);
347 
348 	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
349 		  level3_swizzling_dis << 13 |
350 		  level2_swizzling_dis << 12 |
351 		  hbb_hi << 10 |
352 		  uavflagprd_inv << 4 |
353 		  min_acc_len_64b << 3 |
354 		  hbb_lo << 1 | ubwc_mode);
355 
356 	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
357 		  level3_swizzling_dis << 7 |
358 		  level2_swizzling_dis << 6 |
359 		  hbb_hi << 4 |
360 		  min_acc_len_64b << 3 |
361 		  hbb_lo << 1 | ubwc_mode);
362 }
363 
364 static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect)
365 {
366 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
367 	const struct a6xx_info *info = adreno_gpu->info->a6xx;
368 	const struct adreno_reglist_pipe *regs = info->nonctxt_reglist;
369 	unsigned int pipe_id, i;
370 	unsigned long flags;
371 
372 	for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
373 		/* We don't have support for LPAC yet */
374 		if (pipe_id == PIPE_LPAC)
375 			continue;
376 
377 		a8xx_aperture_acquire(gpu, pipe_id, &flags);
378 
379 		for (i = 0; regs[i].offset; i++) {
380 			if (!(BIT(pipe_id) & regs[i].pipe))
381 				continue;
382 
383 			if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT)
384 				*gmem_protect = regs[i].value;
385 
386 			gpu_write(gpu, regs[i].offset, regs[i].value);
387 		}
388 
389 		a8xx_aperture_release(gpu, flags);
390 	}
391 
392 	a8xx_aperture_clear(gpu);
393 }
394 
395 static void a8xx_patch_pwrup_reglist(struct msm_gpu *gpu)
396 {
397 	const struct adreno_reglist_pipe_list *dyn_pwrup_reglist;
398 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
399 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
400 	const struct adreno_reglist_list *reglist;
401 	void *ptr = a6xx_gpu->pwrup_reglist_ptr;
402 	struct cpu_gpu_lock *lock = ptr;
403 	u32 *dest = (u32 *)&lock->regs[0];
404 	u32 dyn_pwrup_reglist_count = 0;
405 	int i;
406 
407 	lock->gpu_req = lock->cpu_req = lock->turn = 0;
408 
409 	reglist = adreno_gpu->info->a6xx->ifpc_reglist;
410 	if (reglist) {
411 		lock->ifpc_list_len = reglist->count;
412 
413 		/*
414 		 * For each entry in each of the lists, write the offset and the current
415 		 * register value into the GPU buffer
416 		 */
417 		for (i = 0; i < reglist->count; i++) {
418 			*dest++ = reglist->regs[i];
419 			*dest++ = gpu_read(gpu, reglist->regs[i]);
420 		}
421 	}
422 
423 	reglist = adreno_gpu->info->a6xx->pwrup_reglist;
424 	if (reglist) {
425 		lock->preemption_list_len = reglist->count;
426 
427 		for (i = 0; i < reglist->count; i++) {
428 			*dest++ = reglist->regs[i];
429 			*dest++ = gpu_read(gpu, reglist->regs[i]);
430 		}
431 	}
432 
433 	/*
434 	 * The overall register list is composed of
435 	 * 1. Static IFPC-only registers
436 	 * 2. Static IFPC + preemption registers
437 	 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
438 	 *
439 	 * The first two lists are static. Size of these lists are stored as
440 	 * number of pairs in ifpc_list_len and preemption_list_len
441 	 * respectively. With concurrent binning, Some of the perfcounter
442 	 * registers being virtualized, CP needs to know the pipe id to program
443 	 * the aperture inorder to restore the same. Thus, third list is a
444 	 * dynamic list with triplets as
445 	 * (<aperture, shifted 12 bits> <address> <data>), and the length is
446 	 * stored as number for triplets in dynamic_list_len.
447 	 */
448 	dyn_pwrup_reglist = adreno_gpu->info->a6xx->dyn_pwrup_reglist;
449 	if (!dyn_pwrup_reglist)
450 		goto done;
451 
452 	for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) {
453 		for (i = 0; i < dyn_pwrup_reglist->count; i++) {
454 			if (!(dyn_pwrup_reglist->regs[i].pipe & BIT(pipe_id)))
455 				continue;
456 			*dest++ = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe_id);
457 			*dest++ = dyn_pwrup_reglist->regs[i].offset;
458 			*dest++ = a8xx_read_pipe_slice(gpu,
459 						       pipe_id,
460 						       a8xx_get_first_slice(a6xx_gpu),
461 						       dyn_pwrup_reglist->regs[i].offset);
462 			dyn_pwrup_reglist_count++;
463 		}
464 	}
465 
466 	lock->dynamic_list_len = dyn_pwrup_reglist_count;
467 	a6xx_gpu->dynamic_sel_reglist_offset = dyn_pwrup_reglist_count;
468 
469 done:
470 	a8xx_aperture_clear(gpu);
471 }
472 
473 static int a8xx_preempt_start(struct msm_gpu *gpu)
474 {
475 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
476 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
477 	struct msm_ringbuffer *ring = gpu->rb[0];
478 
479 	if (gpu->nr_rings <= 1)
480 		return 0;
481 
482 	/* Turn CP protection off */
483 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
484 	OUT_RING(ring, 0);
485 
486 	a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL);
487 
488 	a6xx_flush_yield(gpu, ring);
489 
490 	return a8xx_idle(gpu, ring) ? 0 : -EINVAL;
491 }
492 
493 static int a8xx_cp_init(struct msm_gpu *gpu)
494 {
495 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
496 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
497 	struct msm_ringbuffer *ring = gpu->rb[0];
498 	u32 mask;
499 
500 	/* Disable concurrent binning before sending CP init */
501 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
502 	OUT_RING(ring, BIT(27));
503 
504 	OUT_PKT7(ring, CP_ME_INIT, 7);
505 
506 	/* Use multiple HW contexts */
507 	mask = BIT(0);
508 
509 	/* Enable error detection */
510 	mask |= BIT(1);
511 
512 	/* Set default reset state */
513 	mask |= BIT(3);
514 
515 	/* Disable save/restore of performance counters across preemption */
516 	mask |= BIT(6);
517 
518 	/* Enable the register init list with the spinlock */
519 	mask |= BIT(8);
520 
521 	OUT_RING(ring, mask);
522 
523 	/* Enable multiple hardware contexts */
524 	OUT_RING(ring, 0x00000003);
525 
526 	/* Enable error detection */
527 	OUT_RING(ring, 0x20000000);
528 
529 	/* Operation mode mask */
530 	OUT_RING(ring, 0x00000002);
531 
532 	/* Lo address */
533 	OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova));
534 	/* Hi address */
535 	OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova));
536 
537 	/* Enable dyn pwrup list with triplets (offset, value, pipe) */
538 	OUT_RING(ring, BIT(31));
539 
540 	a6xx_flush(gpu, ring);
541 	return a8xx_idle(gpu, ring) ? 0 : -EINVAL;
542 }
543 
544 #define A8XX_INT_MASK \
545 	(A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
546 	 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
547 	 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
548 	 A6XX_RBBM_INT_0_MASK_CP_SW | \
549 	 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
550 	 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
551 	 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
552 	 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
553 	 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
554 	 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
555 	 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
556 	 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
557 	 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
558 	 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
559 
560 #define A8XX_APRIV_MASK \
561 	(A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \
562 	 A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \
563 	 A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \
564 	 A8XX_CP_APRIV_CNTL_PIPE_RBRPWB)
565 
566 #define A8XX_BR_APRIV_MASK \
567 	(A8XX_APRIV_MASK | \
568 	 A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \
569 	 A8XX_CP_APRIV_CNTL_PIPE_CDWRITE)
570 
571 #define A8XX_CP_GLOBAL_INT_MASK \
572 	(A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \
573 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \
574 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \
575 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \
576 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \
577 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \
578 	 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \
579 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \
580 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \
581 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \
582 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \
583 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \
584 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \
585 	 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV)
586 
587 #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \
588 	(A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \
589 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \
590 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \
591 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \
592 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \
593 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \
594 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \
595 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \
596 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \
597 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \
598 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \
599 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \
600 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \
601 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \
602 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \
603 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \
604 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \
605 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \
606 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \
607 	 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS)
608 
609 #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \
610 	(A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \
611 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \
612 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \
613 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \
614 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \
615 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \
616 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \
617 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \
618 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \
619 	 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR)
620 
621 static int hw_init(struct msm_gpu *gpu)
622 {
623 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
624 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
625 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
626 	unsigned int pipe_id, i;
627 	u32 gmem_protect = 0;
628 	u64 gmem_range_min;
629 	int ret;
630 
631 	ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
632 	if (ret)
633 		return ret;
634 
635 	/* Clear the cached value to force aperture configuration next time */
636 	a6xx_gpu->cached_aperture = UINT_MAX;
637 	a8xx_aperture_clear(gpu);
638 
639 	/* Clear GBIF halt in case GX domain was not collapsed */
640 	gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
641 	gpu_read(gpu, REG_A6XX_GBIF_HALT);
642 
643 	gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0);
644 	gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT);
645 
646 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
647 
648 	/*
649 	 * Disable the trusted memory range - we don't actually supported secure
650 	 * memory rendering at this point in time and we don't want to block off
651 	 * part of the virtual memory space.
652 	 */
653 	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
654 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
655 
656 	/* Make all blocks contribute to the GPU BUSY perf counter */
657 	gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
658 
659 	/* Setup GMEM Range in UCHE */
660 	gmem_range_min = SZ_64M;
661 	/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
662 	gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min);
663 	gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min);
664 
665 	/* Setup UCHE Trap region */
666 	gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
667 	gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
668 	gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
669 	gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
670 
671 	/* Turn on performance counters */
672 	gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1);
673 	gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1);
674 
675 	/* Turn on the IFPC counter (countable 4 on XOCLK1) */
676 	gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1,
677 		  FIELD_PREP(GENMASK(7, 0), 0x4));
678 
679 	/* Select CP0 to always count cycles */
680 	gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1);
681 
682 	a8xx_set_ubwc_config(gpu);
683 
684 	/* Set weights for bicubic filtering */
685 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
686 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4);
687 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee);
688 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed);
689 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0);
690 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000);
691 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8);
692 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc);
693 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb);
694 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0);
695 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b);
696 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d);
697 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412);
698 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a);
699 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05);
700 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e);
701 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001);
702 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa);
703 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7);
704 	gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7);
705 
706 	gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
707 
708 	a8xx_nonctxt_config(gpu, &gmem_protect);
709 
710 	/* Enable fault detection */
711 	gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff);
712 	gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30));
713 
714 	/* Set up the CX GMU counter 0 to count busy ticks */
715 	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
716 
717 	/* Enable the power counter */
718 	gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5));
719 	gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
720 
721 	/* Protect registers from the CP */
722 	a8xx_set_cp_protect(gpu);
723 
724 	/* Enable the GMEM save/restore feature for preemption */
725 	a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1);
726 
727 	for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) {
728 		u32 apriv_mask = A8XX_APRIV_MASK;
729 		unsigned long flags;
730 
731 		if (pipe_id == PIPE_LPAC)
732 			continue;
733 
734 		if (pipe_id == PIPE_BR)
735 			apriv_mask = A8XX_BR_APRIV_MASK;
736 
737 		a8xx_aperture_acquire(gpu, pipe_id, &flags);
738 		gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask);
739 		gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE,
740 				A8XX_CP_INTERRUPT_STATUS_MASK_PIPE);
741 		gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE,
742 				A8XX_CP_HW_FAULT_STATUS_MASK_PIPE);
743 		a8xx_aperture_release(gpu, flags);
744 	}
745 
746 	a8xx_aperture_clear(gpu);
747 
748 	/* Enable interrupts */
749 	gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK);
750 	gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK);
751 
752 	ret = adreno_hw_init(gpu);
753 	if (ret)
754 		goto out;
755 
756 	gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
757 	if (a6xx_gpu->aqe_iova)
758 		gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova);
759 
760 	/* Set the ringbuffer address */
761 	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
762 	gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
763 
764 	/* Configure the RPTR shadow if needed: */
765 	gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0]));
766 	gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr));
767 
768 	a8xx_preempt_hw_init(gpu);
769 
770 	for (i = 0; i < gpu->nr_rings; i++)
771 		a6xx_gpu->shadow[i] = 0;
772 
773 	/* Always come up on rb 0 */
774 	a6xx_gpu->cur_ring = gpu->rb[0];
775 
776 	for (i = 0; i < gpu->nr_rings; i++)
777 		gpu->rb[i]->cur_ctx_seqno = 0;
778 
779 	/* Enable the SQE_to start the CP engine */
780 	gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1);
781 
782 	ret = a8xx_cp_init(gpu);
783 	if (ret)
784 		goto out;
785 
786 	/*
787 	 * Try to load a zap shader into the secure world. If successful
788 	 * we can use the CP to switch out of secure mode. If not then we
789 	 * have no resource but to try to switch ourselves out manually. If we
790 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
791 	 * be blocked and a permissions violation will soon follow.
792 	 */
793 	ret = a6xx_zap_shader_init(gpu);
794 	if (!ret) {
795 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
796 		OUT_RING(gpu->rb[0], 0x00000000);
797 
798 		a6xx_flush(gpu, gpu->rb[0]);
799 		if (!a8xx_idle(gpu, gpu->rb[0]))
800 			return -EINVAL;
801 	} else if (ret == -ENODEV) {
802 		/*
803 		 * This device does not use zap shader (but print a warning
804 		 * just in case someone got their dt wrong.. hopefully they
805 		 * have a debug UART to realize the error of their ways...
806 		 * if you mess this up you are about to crash horribly)
807 		 */
808 		dev_warn_once(gpu->dev->dev,
809 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
810 		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
811 		ret = 0;
812 	} else {
813 		return ret;
814 	}
815 
816 	/*
817 	 * GMEM_PROTECT register should be programmed after GPU is transitioned to
818 	 * non-secure mode
819 	 */
820 	a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect);
821 	WARN_ON(!gmem_protect);
822 	a8xx_aperture_clear(gpu);
823 
824 	if (!a6xx_gpu->pwrup_reglist_emitted) {
825 		a8xx_patch_pwrup_reglist(gpu);
826 		a6xx_gpu->pwrup_reglist_emitted = true;
827 	}
828 
829 	/* Enable hardware clockgating */
830 	a8xx_set_hwcg(gpu, true);
831 out:
832 	/* Last step - yield the ringbuffer */
833 	a8xx_preempt_start(gpu);
834 
835 	/*
836 	 * Tell the GMU that we are done touching the GPU and it can start power
837 	 * management
838 	 */
839 	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
840 
841 	if (!ret && msm_gpu_sysprof_no_perfcntr_zap(gpu)) {
842 		ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
843 		if (!ret)
844 			set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status);
845 	}
846 
847 	return ret;
848 }
849 
850 int a8xx_hw_init(struct msm_gpu *gpu)
851 {
852 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
853 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
854 	int ret;
855 
856 	mutex_lock(&a6xx_gpu->gmu.lock);
857 	ret = hw_init(gpu);
858 	mutex_unlock(&a6xx_gpu->gmu.lock);
859 
860 	return ret;
861 }
862 
863 static void a8xx_dump(struct msm_gpu *gpu)
864 {
865 	DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS));
866 	adreno_dump(gpu);
867 }
868 
869 void a8xx_recover(struct msm_gpu *gpu)
870 {
871 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
872 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
873 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
874 	int active_submits;
875 
876 	adreno_dump_info(gpu);
877 
878 	/*
879 	 * To handle recovery specific sequences during the rpm suspend we are
880 	 * about to trigger
881 	 */
882 	a6xx_gpu->hung = true;
883 
884 	if (adreno_gpu->funcs->gx_is_on(adreno_gpu)) {
885 		/*
886 		 * Sometimes crashstate capture is skipped, so SQE should be
887 		 * halted here again
888 		 */
889 		gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3);
890 
891 		if (hang_debug)
892 			a8xx_dump(gpu);
893 	}
894 
895 	pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
896 
897 	/* active_submit won't change until we make a submission */
898 	mutex_lock(&gpu->active_lock);
899 	active_submits = gpu->active_submits;
900 
901 	/*
902 	 * Temporarily clear active_submits count to silence a WARN() in the
903 	 * runtime suspend cb
904 	 */
905 	gpu->active_submits = 0;
906 
907 	reinit_completion(&gmu->pd_gate);
908 	dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
909 	dev_pm_genpd_synced_poweroff(gmu->cxpd);
910 
911 	/* Drop the rpm refcount from active submits */
912 	if (active_submits)
913 		pm_runtime_put(&gpu->pdev->dev);
914 
915 	/* And the final one from recover worker */
916 	pm_runtime_put_sync(&gpu->pdev->dev);
917 
918 	if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
919 		DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
920 
921 	dev_pm_genpd_remove_notifier(gmu->cxpd);
922 
923 	pm_runtime_use_autosuspend(&gpu->pdev->dev);
924 
925 	if (active_submits)
926 		pm_runtime_get(&gpu->pdev->dev);
927 
928 	pm_runtime_get_sync(&gpu->pdev->dev);
929 
930 	gpu->active_submits = active_submits;
931 	mutex_unlock(&gpu->active_lock);
932 
933 	msm_gpu_hw_init(gpu);
934 	a6xx_gpu->hung = false;
935 }
936 
937 static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
938 {
939 	static const char * const uche_clients[] = {
940 		"BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
941 		"BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
942 		"STCHE",
943 	};
944 	static const char * const uche_clients_lpac[] = {
945 		"-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC",
946 	};
947 	u32 val;
948 
949 	/*
950 	 * The source of the data depends on the mid ID read from FSYNR1.
951 	 * and the client ID read from the UCHE block
952 	 */
953 	val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF);
954 
955 	val &= GENMASK(6, 0);
956 
957 	/* mid=3 refers to BR or BV */
958 	if (mid == 3) {
959 		if (val < ARRAY_SIZE(uche_clients))
960 			return uche_clients[val];
961 		else
962 			return "UCHE";
963 	}
964 
965 	/* mid=8 refers to LPAC */
966 	if (mid == 8) {
967 		if (val < ARRAY_SIZE(uche_clients_lpac))
968 			return uche_clients_lpac[val];
969 		else
970 			return "UCHE_LPAC";
971 	}
972 
973 	return "Unknown";
974 }
975 
976 static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id)
977 {
978 	switch (id) {
979 	case 0x0:
980 		return "CP";
981 	case 0x1:
982 		return "UCHE: Unknown";
983 	case 0x2:
984 		return "UCHE_LPAC: Unknown";
985 	case 0x3:
986 	case 0x8:
987 		return a8xx_uche_fault_block(gpu, id);
988 	case 0x4:
989 		return "CCU";
990 	case 0x5:
991 		return "Flag cache";
992 	case 0x6:
993 		return "PREFETCH";
994 	case 0x7:
995 		return "GMU";
996 	case 0x9:
997 		return "UCHE_HPAC";
998 	}
999 
1000 	return "Unknown";
1001 }
1002 
1003 int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1004 {
1005 	struct msm_gpu *gpu = arg;
1006 	struct adreno_smmu_fault_info *info = data;
1007 	const char *block = "unknown";
1008 
1009 	u32 scratch[] = {
1010 			gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)),
1011 			gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)),
1012 			gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)),
1013 			gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)),
1014 	};
1015 
1016 	if (info)
1017 		block = a8xx_fault_block(gpu, info->fsynr1 & 0xff);
1018 
1019 	return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1020 }
1021 
1022 static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu)
1023 {
1024 	u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL);
1025 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1026 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1027 	u32 slice = a8xx_get_first_slice(a6xx_gpu);
1028 	u32 hw_fault_mask = GENMASK(6, 0);
1029 	u32 sw_fault_mask = GENMASK(22, 16);
1030 	u32 pipe = 0;
1031 
1032 	dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status);
1033 
1034 	if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR |
1035 		      A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR))
1036 		pipe |= BIT(PIPE_BR);
1037 
1038 	if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV |
1039 		      A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV))
1040 		pipe |= BIT(PIPE_BV);
1041 
1042 	if (!pipe) {
1043 		dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n");
1044 		goto out;
1045 	}
1046 
1047 	for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
1048 		if (!(BIT(pipe_id) & pipe))
1049 			continue;
1050 
1051 		if (hw_fault_mask & status) {
1052 			status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
1053 					REG_A8XX_CP_HW_FAULT_STATUS_PIPE);
1054 			dev_err_ratelimited(&gpu->pdev->dev,
1055 					"CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
1056 		}
1057 
1058 		if (sw_fault_mask & status) {
1059 			status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
1060 					REG_A8XX_CP_INTERRUPT_STATUS_PIPE);
1061 			dev_err_ratelimited(&gpu->pdev->dev,
1062 					"CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
1063 
1064 			if (status & BIT(8)) {
1065 				a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1);
1066 				status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
1067 						REG_A8XX_CP_SQE_STAT_DATA_PIPE);
1068 				dev_err_ratelimited(&gpu->pdev->dev,
1069 						"CP Opcode error, opcode=0x%x\n", status);
1070 			}
1071 
1072 			if (status & BIT(10)) {
1073 				status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
1074 						REG_A8XX_CP_PROTECT_STATUS_PIPE);
1075 				dev_err_ratelimited(&gpu->pdev->dev,
1076 						"CP REG PROTECT error, status=0x%x\n", status);
1077 			}
1078 		}
1079 	}
1080 
1081 out:
1082 	/* Turn off interrupts to avoid triggering recovery again */
1083 	a8xx_aperture_clear(gpu);
1084 	gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0);
1085 	gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0);
1086 
1087 	kthread_queue_work(gpu->worker, &gpu->recover_work);
1088 }
1089 
1090 static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset)
1091 {
1092 	gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset);
1093 
1094 	return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE);
1095 }
1096 
1097 static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset)
1098 {
1099 	u64 lo, hi;
1100 
1101 	lo = gpu_periph_read(gpu, dbg_offset);
1102 	hi = gpu_periph_read(gpu, dbg_offset + 1);
1103 
1104 	return (hi << 32) | lo;
1105 }
1106 
1107 #define CP_PERIPH_IB1_BASE_LO   0x7005
1108 #define CP_PERIPH_IB1_BASE_HI   0x7006
1109 #define CP_PERIPH_IB1_SIZE      0x7007
1110 #define CP_PERIPH_IB1_OFFSET    0x7008
1111 #define CP_PERIPH_IB2_BASE_LO   0x7009
1112 #define CP_PERIPH_IB2_BASE_HI   0x700a
1113 #define CP_PERIPH_IB2_SIZE      0x700b
1114 #define CP_PERIPH_IB2_OFFSET    0x700c
1115 #define CP_PERIPH_IB3_BASE_LO   0x700d
1116 #define CP_PERIPH_IB3_BASE_HI   0x700e
1117 #define CP_PERIPH_IB3_SIZE      0x700f
1118 #define CP_PERIPH_IB3_OFFSET    0x7010
1119 
1120 static void a8xx_fault_detect_irq(struct msm_gpu *gpu)
1121 {
1122 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1123 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1124 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1125 	unsigned long flags;
1126 
1127 	/*
1128 	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1129 	 * but the fault handler will trigger the devcore dump, and we want
1130 	 * to otherwise resume normally rather than killing the submit, so
1131 	 * just bail.
1132 	 */
1133 	if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT)
1134 		return;
1135 
1136 	/*
1137 	 * Force the GPU to stay on until after we finish
1138 	 * collecting information
1139 	 */
1140 	if (!adreno_has_gmu_wrapper(adreno_gpu))
1141 		gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1142 
1143 	DRM_DEV_ERROR(&gpu->pdev->dev,
1144 		"gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n",
1145 		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1146 		gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS));
1147 
1148 	a8xx_aperture_acquire(gpu, PIPE_BR, &flags);
1149 
1150 	DRM_DEV_ERROR(&gpu->pdev->dev,
1151 		"BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
1152 		gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS),
1153 		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1154 		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1155 		gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
1156 		gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
1157 		gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
1158 		gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
1159 		gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
1160 		gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
1161 
1162 	a8xx_aperture_release(gpu, flags);
1163 	a8xx_aperture_acquire(gpu, PIPE_BV, &flags);
1164 
1165 	DRM_DEV_ERROR(&gpu->pdev->dev,
1166 		"BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
1167 		gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS),
1168 		gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV),
1169 		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1170 		gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
1171 		gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
1172 		gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
1173 		gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
1174 		gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
1175 		gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
1176 
1177 	a8xx_aperture_release(gpu, flags);
1178 	a8xx_aperture_clear(gpu);
1179 
1180 	/* Turn off the hangcheck timer to keep it from bothering us */
1181 	timer_delete(&gpu->hangcheck_timer);
1182 
1183 	kthread_queue_work(gpu->worker, &gpu->recover_work);
1184 }
1185 
1186 static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1187 {
1188 	u32 status;
1189 
1190 	status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS);
1191 	gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0);
1192 
1193 	dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1194 
1195 	/*
1196 	 * Ignore FASTBLEND violations, because the HW will silently fall back
1197 	 * to legacy blending.
1198 	 */
1199 	if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1200 		      A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1201 		timer_delete(&gpu->hangcheck_timer);
1202 
1203 		kthread_queue_work(gpu->worker, &gpu->recover_work);
1204 	}
1205 }
1206 
1207 irqreturn_t a8xx_irq(struct msm_gpu *gpu)
1208 {
1209 	struct msm_drm_private *priv = gpu->dev->dev_private;
1210 	u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS);
1211 
1212 	gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status);
1213 
1214 	if (priv->disable_err_irq)
1215 		status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1216 
1217 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1218 		a8xx_fault_detect_irq(gpu);
1219 
1220 	if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) {
1221 		u32 rl0, rl1;
1222 
1223 		rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0);
1224 		rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1);
1225 		dev_err_ratelimited(&gpu->pdev->dev,
1226 				"CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1);
1227 	}
1228 
1229 	if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1230 		a8xx_cp_hw_err_irq(gpu);
1231 
1232 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1233 		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1234 
1235 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1236 		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1237 
1238 	if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1239 		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1240 
1241 	if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
1242 		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n");
1243 
1244 	if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1245 		a8xx_sw_fuse_violation_irq(gpu);
1246 
1247 	if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1248 		msm_gpu_retire(gpu);
1249 		a8xx_preempt_trigger(gpu);
1250 	}
1251 
1252 	if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
1253 		a8xx_preempt_irq(gpu);
1254 
1255 	return IRQ_HANDLED;
1256 }
1257 
1258 void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1259 {
1260 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1261 	struct msm_gpu *gpu = &adreno_gpu->base;
1262 
1263 	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1264 		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1265 
1266 		gpu_scid &= GENMASK(5, 0);
1267 
1268 		gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
1269 			  FIELD_PREP(GENMASK(29, 24), gpu_scid) |
1270 			  FIELD_PREP(GENMASK(23, 18), gpu_scid) |
1271 			  FIELD_PREP(GENMASK(17, 12), gpu_scid) |
1272 			  FIELD_PREP(GENMASK(11, 6), gpu_scid)  |
1273 			  FIELD_PREP(GENMASK(5, 0), gpu_scid));
1274 
1275 		gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
1276 			  FIELD_PREP(GENMASK(27, 22), gpu_scid) |
1277 			  FIELD_PREP(GENMASK(21, 16), gpu_scid) |
1278 			  FIELD_PREP(GENMASK(15, 10), gpu_scid) |
1279 			  BIT(8));
1280 	}
1281 
1282 	llcc_slice_activate(a6xx_gpu->htw_llc_slice);
1283 }
1284 
1285 #define GBIF_CLIENT_HALT_MASK		BIT(0)
1286 #define GBIF_ARB_HALT_MASK		BIT(1)
1287 #define VBIF_XIN_HALT_CTRL0_MASK	GENMASK(3, 0)
1288 #define VBIF_RESET_ACK_MASK		0xF0
1289 #define GPR0_GBIF_HALT_REQUEST		0x1E0
1290 
1291 void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
1292 {
1293 	struct msm_gpu *gpu = &adreno_gpu->base;
1294 
1295 	if (gx_off) {
1296 		/* Halt the gx side of GBIF */
1297 		gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1);
1298 		spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1);
1299 	}
1300 
1301 	/* Halt new client requests on GBIF */
1302 	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
1303 	spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1304 			(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
1305 
1306 	/* Halt all AXI requests on GBIF */
1307 	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
1308 	spin_until((gpu_read(gpu,  REG_A6XX_GBIF_HALT_ACK) &
1309 			(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
1310 
1311 	/* The GBIF halt needs to be explicitly cleared */
1312 	gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
1313 }
1314 
1315 u64 a8xx_gmu_get_timestamp(struct msm_gpu *gpu)
1316 {
1317 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1318 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1319 	u64 count_hi, count_lo, temp;
1320 
1321 	do {
1322 		count_hi = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_H);
1323 		count_lo = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_L);
1324 		temp = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_H);
1325 	} while (unlikely(count_hi != temp));
1326 
1327 	return (count_hi << 32) | count_lo;
1328 }
1329 
1330 u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1331 {
1332 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1333 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1334 	u64 busy_cycles;
1335 
1336 	/* 19.2MHz */
1337 	*out_sample_rate = 19200000;
1338 
1339 	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
1340 			REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
1341 			REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
1342 
1343 	return busy_cycles;
1344 }
1345 
1346 bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1347 {
1348 	return true;
1349 }
1350 
1351 void a8xx_perfcntr_flush(struct msm_gpu *gpu)
1352 {
1353 	u32 val;
1354 
1355 	/*
1356 	 * Flush delta counters (both perf counters and pipe stats) present in
1357 	 * RBBM_S and RBBM_US to perf RAM logic to get the latest data.
1358 	 */
1359 	gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_FLUSH_HOST_CMD, BIT(0));
1360 	gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD, BIT(0));
1361 
1362 	/* Ensure all writes are posted before polling status register */
1363 	wmb();
1364 
1365 	if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_PERFCTR_FLUSH_HOST_STATUS, val,
1366 			     val & BIT(0), 100, 100 * 1000)) {
1367 		dev_err(&gpu->pdev->dev, "Perfcounter flush timed out: status=0x%08x\n", val);
1368 	}
1369 }
1370