xref: /linux/drivers/gpu/drm/msm/adreno/a6xx_gpu.c (revision b150b562ec0607f068ed75eddfc31553976a874f)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
3 
4 
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "msm_perfcntr.h"
9 #include "a6xx_gpu.h"
10 #include "a6xx_gmu.xml.h"
11 
12 #include <linux/bitfield.h>
13 #include <linux/devfreq.h>
14 #include <linux/pm_domain.h>
15 #include <linux/soc/qcom/llcc-qcom.h>
16 
17 #define GPU_PAS_ID 13
18 
19 static u64 a6xx_gmu_get_timestamp(struct msm_gpu *gpu)
20 {
21 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
22 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
23 	u64 count_hi, count_lo, temp;
24 
25 	do {
26 		count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H);
27 		count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L);
28 		temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H);
29 	} while (unlikely(count_hi != temp));
30 
31 	return (count_hi << 32) | count_lo;
32 }
33 
34 static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
35 {
36 	/* Success if !writedropped0/1 */
37 	if (!(status & mask))
38 		return true;
39 
40 	udelay(10);
41 
42 	/* Try to update fenced register again */
43 	gpu_write(gpu, offset, value);
44 
45 	/* We can't do a posted write here because the power domain could be
46 	 * in collapse state. So use the heaviest barrier instead
47 	 */
48 	mb();
49 	return false;
50 }
51 
52 static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
53 {
54 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
55 	struct msm_gpu *gpu = &adreno_gpu->base;
56 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
57 	u32 status;
58 
59 	gpu_write(gpu, offset, value);
60 
61 	/* Nothing else to be done in the case of no-GMU */
62 	if (adreno_has_gmu_wrapper(adreno_gpu))
63 		return 0;
64 
65 	/* We can't do a posted write here because the power domain could be
66 	 * in collapse state. So use the heaviest barrier instead
67 	 */
68 	mb();
69 
70 	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
71 			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
72 		return 0;
73 
74 	/* Try again for another 1ms before failing */
75 	gpu_write(gpu, offset, value);
76 	mb();
77 
78 	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
79 			fence_status_check(gpu, offset, value, status, mask), 0, 1000)) {
80 		/*
81 		 * The 'delay' warning is here because the pause to print this
82 		 * warning will allow gpu to move to power collapse which
83 		 * defeats the purpose of continuous polling for 2 ms
84 		 */
85 		dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
86 				offset);
87 		return 0;
88 	}
89 
90 	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
91 			offset);
92 
93 	return -ETIMEDOUT;
94 }
95 
96 int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
97 {
98 	int ret;
99 
100 	ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
101 	if (ret)
102 		return ret;
103 
104 	if (!is_64b)
105 		return 0;
106 
107 	ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
108 
109 	return ret;
110 }
111 
112 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
113 {
114 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
115 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
116 
117 	/* Check that the GMU is idle */
118 	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu))
119 		return false;
120 
121 	/* Check tha the CX master is idle */
122 	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
123 			~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
124 		return false;
125 
126 	return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
127 		A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
128 }
129 
130 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
131 {
132 	/* wait for CP to drain ringbuffer: */
133 	if (!adreno_idle(gpu, ring))
134 		return false;
135 
136 	if (spin_until(_a6xx_check_idle(gpu))) {
137 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
138 			gpu->name, __builtin_return_address(0),
139 			gpu_read(gpu, REG_A6XX_RBBM_STATUS),
140 			gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
141 			gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
142 			gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
143 		return false;
144 	}
145 
146 	return true;
147 }
148 
149 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
150 {
151 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
152 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
153 
154 	/* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
155 	if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
156 		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
157 		OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
158 		OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
159 	}
160 }
161 
162 void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
163 {
164 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
165 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
166 	uint32_t wptr;
167 	unsigned long flags;
168 
169 	update_shadow_rptr(gpu, ring);
170 
171 	spin_lock_irqsave(&ring->preempt_lock, flags);
172 
173 	/* Copy the shadow to the actual register */
174 	ring->cur = ring->next;
175 
176 	/* Make sure to wrap wptr if we need to */
177 	wptr = get_wptr(ring);
178 
179 	/* Update HW if this is the current ring and we are not in preempt*/
180 	if (!a6xx_in_preempt(a6xx_gpu)) {
181 		if (a6xx_gpu->cur_ring == ring)
182 			a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
183 		else
184 			ring->restore_wptr = true;
185 	} else {
186 		ring->restore_wptr = true;
187 	}
188 
189 	spin_unlock_irqrestore(&ring->preempt_lock, flags);
190 }
191 
192 void
193 a6xx_flush_yield(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
194 {
195 	/* If preemption is enabled */
196 	if (gpu->nr_rings > 1) {
197 		/* Yield the floor on command completion */
198 		OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
199 
200 		/*
201 		 * If dword[2:1] are non zero, they specify an address for
202 		 * the CP to write the value of dword[3] to on preemption
203 		 * complete. Write 0 to skip the write
204 		 */
205 		OUT_RING(ring, 0x00);
206 		OUT_RING(ring, 0x00);
207 		/* Data value - not used if the address above is 0 */
208 		OUT_RING(ring, 0x01);
209 		/* generate interrupt on preemption completion */
210 		OUT_RING(ring, 0x00);
211 	}
212 
213 	a6xx_flush(gpu, ring);
214 }
215 
216 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
217 		u64 iova)
218 {
219 	OUT_PKT7(ring, CP_REG_TO_MEM, 3);
220 	OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
221 		CP_REG_TO_MEM_0_CNT(2) |
222 		CP_REG_TO_MEM_0_64B);
223 	OUT_RING(ring, lower_32_bits(iova));
224 	OUT_RING(ring, upper_32_bits(iova));
225 }
226 
227 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
228 		struct msm_ringbuffer *ring, struct msm_gem_submit *submit)
229 {
230 	bool sysprof = msm_gpu_sysprof_no_perfcntr_zap(&a6xx_gpu->base.base);
231 	struct msm_context *ctx = submit->queue->ctx;
232 	struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx);
233 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
234 	phys_addr_t ttbr;
235 	u32 asid;
236 	u64 memptr = rbmemptr(ring, ttbr0);
237 
238 	if (ctx->seqno == ring->cur_ctx_seqno)
239 		return;
240 
241 	if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid))
242 		return;
243 
244 	if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) {
245 		/* Wait for previous submit to complete before continuing: */
246 		OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
247 		OUT_RING(ring, 0);
248 		OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
249 		OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
250 		OUT_RING(ring, submit->seqno - 1);
251 
252 		OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
253 		OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH);
254 
255 		/* Reset state used to synchronize BR and BV */
256 		OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1);
257 		OUT_RING(ring,
258 			 CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS |
259 			 CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE |
260 			 CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER |
261 			 CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS);
262 
263 		OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
264 		OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH);
265 
266 		OUT_PKT7(ring, CP_EVENT_WRITE, 1);
267 		OUT_RING(ring, LRZ_FLUSH_INVALIDATE);
268 
269 		OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
270 		OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
271 	}
272 
273 	if (!sysprof) {
274 		if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
275 			/* Turn off protected mode to write to special registers */
276 			OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
277 			OUT_RING(ring, 0);
278 		}
279 
280 		if (adreno_is_a8xx(adreno_gpu)) {
281 			OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
282 			OUT_RING(ring, 1);
283 			OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1);
284 			OUT_RING(ring, 1);
285 		} else {
286 			OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
287 			OUT_RING(ring, 1);
288 		}
289 	}
290 
291 	/* Execute the table update */
292 	OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
293 	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
294 
295 	OUT_RING(ring,
296 		CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
297 		CP_SMMU_TABLE_UPDATE_1_ASID(asid));
298 	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
299 	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
300 
301 	/*
302 	 * Write the new TTBR0 to the memstore. This is good for debugging.
303 	 * Needed for preemption
304 	 */
305 	OUT_PKT7(ring, CP_MEM_WRITE, 5);
306 	OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr)));
307 	OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr)));
308 	OUT_RING(ring, lower_32_bits(ttbr));
309 	OUT_RING(ring, upper_32_bits(ttbr));
310 	OUT_RING(ring, ctx->seqno);
311 
312 	/*
313 	 * Sync both threads after switching pagetables and enable BR only
314 	 * to make sure BV doesn't race ahead while BR is still switching
315 	 * pagetables.
316 	 */
317 	if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) {
318 		OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
319 		OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
320 	}
321 
322 	/*
323 	 * And finally, trigger a uche flush to be sure there isn't anything
324 	 * lingering in that part of the GPU
325 	 */
326 
327 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
328 	OUT_RING(ring, CACHE_INVALIDATE);
329 
330 	if (!sysprof) {
331 		u32 reg_status = adreno_is_a8xx(adreno_gpu) ?
332 			REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS :
333 			REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
334 		/*
335 		 * Wait for SRAM clear after the pgtable update, so the
336 		 * two can happen in parallel:
337 		 */
338 		OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
339 		OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
340 		OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status));
341 		OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0));
342 		OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
343 		OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
344 		OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
345 
346 		if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
347 			/* Re-enable protected mode: */
348 			OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
349 			OUT_RING(ring, 1);
350 		}
351 	}
352 }
353 
354 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
355 {
356 	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
357 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
358 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
359 	struct msm_ringbuffer *ring = submit->ring;
360 	unsigned int i, ibs = 0;
361 
362 	adreno_check_and_reenable_stall(adreno_gpu);
363 
364 	a6xx_set_pagetable(a6xx_gpu, ring, submit);
365 
366 	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
367 		rbmemptr_stats(ring, index, cpcycles_start));
368 
369 	/*
370 	 * For PM4 the GMU register offsets are calculated from the base of the
371 	 * GPU registers so we need to add 0x1a800 to the register value on A630
372 	 * to get the right value from PM4.
373 	 */
374 	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_CONTEXT,
375 		rbmemptr_stats(ring, index, alwayson_start));
376 
377 	/* Invalidate CCU depth and color */
378 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
379 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
380 
381 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
382 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
383 
384 	/* Submit the commands */
385 	for (i = 0; i < submit->nr_cmds; i++) {
386 		switch (submit->cmd[i].type) {
387 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
388 			break;
389 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
390 			if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
391 				break;
392 			fallthrough;
393 		case MSM_SUBMIT_CMD_BUF:
394 			OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
395 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
396 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
397 			OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
398 			ibs++;
399 			break;
400 		}
401 
402 		/*
403 		 * Periodically update shadow-wptr if needed, so that we
404 		 * can see partial progress of submits with large # of
405 		 * cmds.. otherwise we could needlessly stall waiting for
406 		 * ringbuffer state, simply due to looking at a shadow
407 		 * rptr value that has not been updated
408 		 */
409 		if ((ibs % 32) == 0)
410 			update_shadow_rptr(gpu, ring);
411 	}
412 
413 	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
414 		rbmemptr_stats(ring, index, cpcycles_end));
415 	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_CONTEXT,
416 		rbmemptr_stats(ring, index, alwayson_end));
417 
418 	/* Write the fence to the scratch register */
419 	OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
420 	OUT_RING(ring, submit->seqno);
421 
422 	/*
423 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
424 	 * timestamp is written to the memory and then triggers the interrupt
425 	 */
426 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
427 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
428 		CP_EVENT_WRITE_0_IRQ);
429 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
430 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
431 	OUT_RING(ring, submit->seqno);
432 
433 	trace_msm_gpu_submit_flush(submit, adreno_gpu->funcs->get_timestamp(gpu));
434 
435 	a6xx_flush(gpu, ring);
436 }
437 
438 void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
439 		struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue)
440 {
441 	u64 preempt_postamble;
442 
443 	OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12);
444 
445 	OUT_RING(ring, SMMU_INFO);
446 	/* don't save SMMU, we write the record from the kernel instead */
447 	OUT_RING(ring, 0);
448 	OUT_RING(ring, 0);
449 
450 	/* privileged and non secure buffer save */
451 	OUT_RING(ring, NON_SECURE_SAVE_ADDR);
452 	OUT_RING(ring, lower_32_bits(
453 		a6xx_gpu->preempt_iova[ring->id]));
454 	OUT_RING(ring, upper_32_bits(
455 		a6xx_gpu->preempt_iova[ring->id]));
456 
457 	/* user context buffer save, seems to be unnused by fw */
458 	OUT_RING(ring, NON_PRIV_SAVE_ADDR);
459 	OUT_RING(ring, 0);
460 	OUT_RING(ring, 0);
461 
462 	OUT_RING(ring, COUNTER);
463 	/* seems OK to set to 0 to disable it */
464 	OUT_RING(ring, 0);
465 	OUT_RING(ring, 0);
466 
467 	/* Emit postamble to clear perfcounters */
468 	preempt_postamble = a6xx_gpu->preempt_postamble_iova;
469 
470 	OUT_PKT7(ring, CP_SET_AMBLE, 3);
471 	OUT_RING(ring, lower_32_bits(preempt_postamble));
472 	OUT_RING(ring, upper_32_bits(preempt_postamble));
473 	OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(
474 				 a6xx_gpu->preempt_postamble_len) |
475 			 CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE));
476 }
477 
478 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
479 {
480 	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
481 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
482 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
483 	struct msm_ringbuffer *ring = submit->ring;
484 	u32 rbbm_perfctr_cp0, cp_always_on_context;
485 	unsigned int i, ibs = 0;
486 
487 	adreno_check_and_reenable_stall(adreno_gpu);
488 
489 	/*
490 	 * Toggle concurrent binning for pagetable switch and set the thread to
491 	 * BR since only it can execute the pagetable switch packets.
492 	 */
493 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
494 	OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
495 
496 	a6xx_set_pagetable(a6xx_gpu, ring, submit);
497 
498 	/*
499 	 * If preemption is enabled, then set the pseudo register for the save
500 	 * sequence
501 	 */
502 	if (gpu->nr_rings > 1)
503 		a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
504 
505 	if (adreno_is_a8xx(adreno_gpu)) {
506 		rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0);
507 		cp_always_on_context = REG_A8XX_CP_ALWAYS_ON_CONTEXT;
508 	} else {
509 		rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0);
510 		cp_always_on_context = REG_A6XX_CP_ALWAYS_ON_CONTEXT;
511 	}
512 
513 	get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start));
514 	get_stats_counter(ring, cp_always_on_context, rbmemptr_stats(ring, index, alwayson_start));
515 
516 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
517 	OUT_RING(ring, CP_SET_THREAD_BOTH);
518 
519 	OUT_PKT7(ring, CP_SET_MARKER, 1);
520 	OUT_RING(ring, 0x101); /* IFPC disable */
521 
522 	if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
523 		OUT_PKT7(ring, CP_SET_MARKER, 1);
524 		OUT_RING(ring, 0x00d); /* IB1LIST start */
525 	}
526 
527 	/* Submit the commands */
528 	for (i = 0; i < submit->nr_cmds; i++) {
529 		switch (submit->cmd[i].type) {
530 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
531 			break;
532 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
533 			if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
534 				break;
535 			fallthrough;
536 		case MSM_SUBMIT_CMD_BUF:
537 			OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
538 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
539 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
540 			OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
541 			ibs++;
542 			break;
543 		}
544 
545 		/*
546 		 * Periodically update shadow-wptr if needed, so that we
547 		 * can see partial progress of submits with large # of
548 		 * cmds.. otherwise we could needlessly stall waiting for
549 		 * ringbuffer state, simply due to looking at a shadow
550 		 * rptr value that has not been updated
551 		 */
552 		if ((ibs % 32) == 0)
553 			update_shadow_rptr(gpu, ring);
554 	}
555 
556 	if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
557 		OUT_PKT7(ring, CP_SET_MARKER, 1);
558 		OUT_RING(ring, 0x00e); /* IB1LIST end */
559 	}
560 
561 	get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end));
562 	get_stats_counter(ring, cp_always_on_context, rbmemptr_stats(ring, index, alwayson_end));
563 
564 	/* Write the fence to the scratch register */
565 	if (adreno_is_a8xx(adreno_gpu)) {
566 		OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1);
567 		OUT_RING(ring, submit->seqno);
568 	} else {
569 		OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
570 		OUT_RING(ring, submit->seqno);
571 	}
572 
573 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
574 	OUT_RING(ring, CP_SET_THREAD_BR);
575 
576 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
577 	OUT_RING(ring, CCU_INVALIDATE_DEPTH);
578 
579 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
580 	OUT_RING(ring, CCU_INVALIDATE_COLOR);
581 
582 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
583 	OUT_RING(ring, CP_SET_THREAD_BV);
584 
585 	/*
586 	 * Make sure the timestamp is committed once BV pipe is
587 	 * completely done with this submission.
588 	 */
589 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
590 	OUT_RING(ring, CACHE_CLEAN | BIT(27));
591 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
592 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
593 	OUT_RING(ring, submit->seqno);
594 
595 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
596 	OUT_RING(ring, CP_SET_THREAD_BR);
597 
598 	/*
599 	 * This makes sure that BR doesn't race ahead and commit
600 	 * timestamp to memstore while BV is still processing
601 	 * this submission.
602 	 */
603 	OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
604 	OUT_RING(ring, 0);
605 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
606 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
607 	OUT_RING(ring, submit->seqno);
608 
609 	a6xx_gpu->last_seqno[ring->id] = submit->seqno;
610 
611 	/* write the ringbuffer timestamp */
612 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
613 	OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27));
614 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
615 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
616 	OUT_RING(ring, submit->seqno);
617 
618 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
619 	OUT_RING(ring, CP_SET_THREAD_BOTH);
620 
621 	OUT_PKT7(ring, CP_SET_MARKER, 1);
622 	OUT_RING(ring, 0x100); /* IFPC enable */
623 
624 	trace_msm_gpu_submit_flush(submit, adreno_gpu->funcs->get_timestamp(gpu));
625 
626 	a6xx_flush_yield(gpu, ring);
627 
628 	/* Check to see if we need to start preemption */
629 	if (adreno_is_a8xx(adreno_gpu))
630 		a8xx_preempt_trigger(gpu);
631 	else
632 		a6xx_preempt_trigger(gpu);
633 }
634 
635 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
636 {
637 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
638 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
639 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
640 	const struct adreno_reglist *reg;
641 	unsigned int i;
642 	u32 cgc_delay, cgc_hyst;
643 	u32 val, clock_cntl_on;
644 
645 	if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu)))
646 		return;
647 
648 	if (adreno_is_a630(adreno_gpu))
649 		clock_cntl_on = 0x8aa8aa02;
650 	else if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu))
651 		clock_cntl_on = 0xaaa8aa82;
652 	else if (adreno_is_a702(adreno_gpu))
653 		clock_cntl_on = 0xaaaaaa82;
654 	else
655 		clock_cntl_on = 0x8aa8aa82;
656 
657 	if (adreno_is_a612(adreno_gpu))
658 		cgc_delay = 0x11;
659 	else if (adreno_is_a615_family(adreno_gpu))
660 		cgc_delay = 0x111;
661 	else
662 		cgc_delay = 0x10111;
663 
664 	if (adreno_is_a612(adreno_gpu))
665 		cgc_hyst = 0x55;
666 	else if (adreno_is_a615_family(adreno_gpu))
667 		cgc_hyst = 0x555;
668 	else
669 		cgc_hyst = 0x5555;
670 
671 	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
672 			state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
673 	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
674 			state ? cgc_delay : 0);
675 	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
676 			state ? cgc_hyst : 0);
677 
678 	if (!adreno_gpu->info->a6xx->hwcg) {
679 		gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
680 		gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0);
681 
682 		if (state) {
683 			gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1);
684 
685 			if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val,
686 					     val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
687 				dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
688 				return;
689 			}
690 
691 			gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
692 		}
693 
694 		return;
695 	}
696 
697 	val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
698 
699 	/* Don't re-program the registers if they are already correct */
700 	if ((!state && !val) || (state && (val == clock_cntl_on)))
701 		return;
702 
703 	/* Disable SP clock before programming HWCG registers */
704 	if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
705 		gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
706 
707 	for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++)
708 		gpu_write(gpu, reg->offset, state ? reg->value : 0);
709 
710 	/* Enable SP clock */
711 	if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
712 		gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
713 
714 	gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
715 }
716 
717 static void a6xx_set_cp_protect(struct msm_gpu *gpu)
718 {
719 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
720 	const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
721 	unsigned i;
722 
723 	/*
724 	 * Enable access protection to privileged registers, fault on an access
725 	 * protect violation and select the last span to protect from the start
726 	 * address all the way to the end of the register address space
727 	 */
728 	gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL,
729 		  A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN |
730 		  A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN |
731 		  A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE);
732 
733 	for (i = 0; i < protect->count - 1; i++) {
734 		/* Intentionally skip writing to some registers */
735 		if (protect->regs[i])
736 			gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]);
737 	}
738 	/* last CP_PROTECT to have "infinite" length on the last entry */
739 	gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]);
740 }
741 
742 static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
743 {
744 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
745 	const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
746 	/*
747 	 * We subtract 13 from the highest bank bit (13 is the minimum value
748 	 * allowed by hw) and write the lowest two bits of the remaining value
749 	 * as hbb_lo and the one above it as hbb_hi to the hardware.
750 	 */
751 	BUG_ON(cfg->highest_bank_bit < 13);
752 	u32 hbb = cfg->highest_bank_bit - 13;
753 	bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0;
754 	u32 level2_swizzling_dis = !(qcom_ubwc_swizzle(cfg) & UBWC_SWIZZLE_ENABLE_LVL2);
755 	bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
756 	bool amsbc = qcom_ubwc_enable_amsbc(cfg);
757 	bool min_acc_len_64b;
758 	u8 uavflagprd_inv = 0;
759 	u32 hbb_hi = hbb >> 2;
760 	u32 hbb_lo = hbb & 3;
761 
762 	if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu))
763 		uavflagprd_inv = 2;
764 
765 	min_acc_len_64b = qcom_ubwc_min_acc_length_64b(cfg);
766 
767 	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
768 		  level2_swizzling_dis << 12 |
769 		  rgb565_predicator << 11 |
770 		  hbb_hi << 10 | amsbc << 4 |
771 		  min_acc_len_64b << 3 |
772 		  hbb_lo << 1 | ubwc_mode);
773 
774 	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
775 		  level2_swizzling_dis << 6 | hbb_hi << 4 |
776 		  min_acc_len_64b << 3 |
777 		  hbb_lo << 1 | ubwc_mode);
778 
779 	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
780 		  level2_swizzling_dis << 12 | hbb_hi << 10 |
781 		  uavflagprd_inv << 4 |
782 		  min_acc_len_64b << 3 |
783 		  hbb_lo << 1 | ubwc_mode);
784 
785 	if (adreno_is_a7xx(adreno_gpu)) {
786 		for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) {
787 			gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
788 				  A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id));
789 			gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL,
790 				  FIELD_PREP(GENMASK(8, 5), hbb_lo));
791 		}
792 		gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
793 			  A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE));
794 	}
795 
796 	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL,
797 		  min_acc_len_64b << 23 | hbb_lo << 21);
798 
799 	gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL,
800 		  qcom_ubwc_macrotile_mode(cfg));
801 }
802 
803 static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
804 {
805 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
806 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
807 	const struct adreno_reglist_list *reglist;
808 	const struct adreno_reglist_pipe_list *dyn_pwrup_reglist;
809 	void *ptr = a6xx_gpu->pwrup_reglist_ptr;
810 	struct cpu_gpu_lock *lock = ptr;
811 	u32 *dest = (u32 *)&lock->regs[0];
812 	u32 dyn_pwrup_reglist_count = 0;
813 	int i;
814 
815 	lock->gpu_req = lock->cpu_req = lock->turn = 0;
816 
817 	reglist = adreno_gpu->info->a6xx->ifpc_reglist;
818 	if (reglist) {
819 		lock->ifpc_list_len = reglist->count;
820 
821 		/*
822 		 * For each entry in each of the lists, write the offset and the current
823 		 * register value into the GPU buffer
824 		 */
825 		for (i = 0; i < reglist->count; i++) {
826 			*dest++ = reglist->regs[i];
827 			*dest++ = gpu_read(gpu, reglist->regs[i]);
828 		}
829 	}
830 
831 	reglist = adreno_gpu->info->a6xx->pwrup_reglist;
832 	lock->preemption_list_len = reglist->count;
833 
834 	for (i = 0; i < reglist->count; i++) {
835 		*dest++ = reglist->regs[i];
836 		*dest++ = gpu_read(gpu, reglist->regs[i]);
837 	}
838 
839 	/*
840 	 * The overall register list is composed of
841 	 * 1. Static IFPC-only registers
842 	 * 2. Static IFPC + preemption registers
843 	 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
844 	 *
845 	 * The first two lists are static. Size of these lists are stored as
846 	 * number of pairs in ifpc_list_len and preemption_list_len
847 	 * respectively. With concurrent binning, Some of the perfcounter
848 	 * registers being virtualized, CP needs to know the pipe id to program
849 	 * the aperture inorder to restore the same. Thus, third list is a
850 	 * dynamic list with triplets as
851 	 * (<aperture, shifted 12 bits> <address> <data>), and the length is
852 	 * stored as number for triplets in dynamic_list_len.
853 	 */
854 	dyn_pwrup_reglist = adreno_gpu->info->a6xx->dyn_pwrup_reglist;
855 	if (dyn_pwrup_reglist) {
856 		for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) {
857 			gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
858 				  A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id));
859 			for (i = 0; i < dyn_pwrup_reglist->count; i++) {
860 				if ((dyn_pwrup_reglist->regs[i].pipe & BIT(pipe_id)) == 0)
861 					continue;
862 				*dest++ = A7XX_CP_APERTURE_CNTL_HOST_PIPE(pipe_id);
863 				*dest++ = dyn_pwrup_reglist->regs[i].offset;
864 				*dest++ = gpu_read(gpu, dyn_pwrup_reglist->regs[i].offset);
865 				dyn_pwrup_reglist_count++;
866 			}
867 		}
868 		gpu_write(gpu, REG_A7XX_CP_APERTURE_CNTL_HOST,
869 			  A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE));
870 	}
871 	lock->dynamic_list_len = dyn_pwrup_reglist_count;
872 	a6xx_gpu->dynamic_sel_reglist_offset = dyn_pwrup_reglist_count;
873 }
874 
875 static int a7xx_preempt_start(struct msm_gpu *gpu)
876 {
877 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
878 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
879 	struct msm_ringbuffer *ring = gpu->rb[0];
880 
881 	if (gpu->nr_rings <= 1)
882 		return 0;
883 
884 	/* Turn CP protection off */
885 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
886 	OUT_RING(ring, 0);
887 
888 	a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL);
889 
890 	a6xx_flush_yield(gpu, ring);
891 
892 	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
893 }
894 
895 static int a6xx_cp_init(struct msm_gpu *gpu)
896 {
897 	struct msm_ringbuffer *ring = gpu->rb[0];
898 
899 	OUT_PKT7(ring, CP_ME_INIT, 8);
900 
901 	OUT_RING(ring, 0x0000002f);
902 
903 	/* Enable multiple hardware contexts */
904 	OUT_RING(ring, 0x00000003);
905 
906 	/* Enable error detection */
907 	OUT_RING(ring, 0x20000000);
908 
909 	/* Don't enable header dump */
910 	OUT_RING(ring, 0x00000000);
911 	OUT_RING(ring, 0x00000000);
912 
913 	/* No workarounds enabled */
914 	OUT_RING(ring, 0x00000000);
915 
916 	/* Pad rest of the cmds with 0's */
917 	OUT_RING(ring, 0x00000000);
918 	OUT_RING(ring, 0x00000000);
919 
920 	a6xx_flush(gpu, ring);
921 	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
922 }
923 
924 static int a7xx_cp_init(struct msm_gpu *gpu)
925 {
926 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
927 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
928 	struct msm_ringbuffer *ring = gpu->rb[0];
929 	u32 mask;
930 
931 	/* Disable concurrent binning before sending CP init */
932 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
933 	OUT_RING(ring, BIT(27));
934 
935 	OUT_PKT7(ring, CP_ME_INIT, 7);
936 
937 	/* Use multiple HW contexts */
938 	mask = BIT(0);
939 
940 	/* Enable error detection */
941 	mask |= BIT(1);
942 
943 	/* Set default reset state */
944 	mask |= BIT(3);
945 
946 	/* Disable save/restore of performance counters across preemption */
947 	mask |= BIT(6);
948 
949 	/* Enable the register init list with the spinlock */
950 	mask |= BIT(8);
951 
952 	OUT_RING(ring, mask);
953 
954 	/* Enable multiple hardware contexts */
955 	OUT_RING(ring, 0x00000003);
956 
957 	/* Enable error detection */
958 	OUT_RING(ring, 0x20000000);
959 
960 	/* Operation mode mask */
961 	OUT_RING(ring, 0x00000002);
962 
963 	/* *Don't* send a power up reg list for concurrent binning (TODO) */
964 	/* Lo address */
965 	OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova));
966 	/* Hi address */
967 	OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova));
968 	/* BIT(31) set => read the regs from the list */
969 	OUT_RING(ring, BIT(31));
970 
971 	a6xx_flush(gpu, ring);
972 	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
973 }
974 
975 /*
976  * Check that the microcode version is new enough to include several key
977  * security fixes. Return true if the ucode is safe.
978  */
979 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
980 		struct drm_gem_object *obj)
981 {
982 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
983 	struct msm_gpu *gpu = &adreno_gpu->base;
984 	const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
985 	u32 *buf = msm_gem_get_vaddr(obj);
986 	bool ret = false;
987 
988 	if (IS_ERR(buf))
989 		return false;
990 
991 	/* A7xx is safe! */
992 	if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu))
993 		return true;
994 
995 	/*
996 	 * Targets up to a640 (a618, a630 and a640) need to check for a
997 	 * microcode version that is patched to support the whereami opcode or
998 	 * one that is new enough to include it by default.
999 	 *
1000 	 * a650 tier targets don't need whereami but still need to be
1001 	 * equal to or newer than 0.95 for other security fixes
1002 	 *
1003 	 * a660 targets have all the critical security fixes from the start
1004 	 */
1005 	if (!strcmp(sqe_name, "a630_sqe.fw")) {
1006 		/*
1007 		 * If the lowest nibble is 0xa that is an indication that this
1008 		 * microcode has been patched. The actual version is in dword
1009 		 * [3] but we only care about the patchlevel which is the lowest
1010 		 * nibble of dword [3]
1011 		 *
1012 		 * Otherwise check that the firmware is greater than or equal
1013 		 * to 1.90 which was the first version that had this fix built
1014 		 * in
1015 		 */
1016 		if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
1017 			(buf[0] & 0xfff) >= 0x190) {
1018 			a6xx_gpu->has_whereami = true;
1019 			ret = true;
1020 			goto out;
1021 		}
1022 
1023 		DRM_DEV_ERROR(&gpu->pdev->dev,
1024 			"a630 SQE ucode is too old. Have version %x need at least %x\n",
1025 			buf[0] & 0xfff, 0x190);
1026 	} else if (!strcmp(sqe_name, "a650_sqe.fw")) {
1027 		if ((buf[0] & 0xfff) >= 0x095) {
1028 			ret = true;
1029 			goto out;
1030 		}
1031 
1032 		DRM_DEV_ERROR(&gpu->pdev->dev,
1033 			"a650 SQE ucode is too old. Have version %x need at least %x\n",
1034 			buf[0] & 0xfff, 0x095);
1035 	} else if (!strcmp(sqe_name, "a660_sqe.fw")) {
1036 		ret = true;
1037 	} else {
1038 		DRM_DEV_ERROR(&gpu->pdev->dev,
1039 			"unknown GPU, add it to a6xx_ucode_check_version()!!\n");
1040 	}
1041 out:
1042 	msm_gem_put_vaddr(obj);
1043 	return ret;
1044 }
1045 
1046 static int a6xx_ucode_load(struct msm_gpu *gpu)
1047 {
1048 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1049 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1050 
1051 	if (!a6xx_gpu->sqe_bo) {
1052 		a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
1053 			adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
1054 
1055 		if (IS_ERR(a6xx_gpu->sqe_bo)) {
1056 			int ret = PTR_ERR(a6xx_gpu->sqe_bo);
1057 
1058 			a6xx_gpu->sqe_bo = NULL;
1059 			DRM_DEV_ERROR(&gpu->pdev->dev,
1060 				"Could not allocate SQE ucode: %d\n", ret);
1061 
1062 			return ret;
1063 		}
1064 
1065 		msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
1066 		if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
1067 			msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
1068 			drm_gem_object_put(a6xx_gpu->sqe_bo);
1069 
1070 			a6xx_gpu->sqe_bo = NULL;
1071 			return -EPERM;
1072 		}
1073 	}
1074 
1075 	if (!a6xx_gpu->aqe_bo && adreno_gpu->fw[ADRENO_FW_AQE]) {
1076 		a6xx_gpu->aqe_bo = adreno_fw_create_bo(gpu,
1077 			adreno_gpu->fw[ADRENO_FW_AQE], &a6xx_gpu->aqe_iova);
1078 
1079 		if (IS_ERR(a6xx_gpu->aqe_bo)) {
1080 			int ret = PTR_ERR(a6xx_gpu->aqe_bo);
1081 
1082 			a6xx_gpu->aqe_bo = NULL;
1083 			DRM_DEV_ERROR(&gpu->pdev->dev,
1084 				"Could not allocate AQE ucode: %d\n", ret);
1085 
1086 			return ret;
1087 		}
1088 
1089 		msm_gem_object_set_name(a6xx_gpu->aqe_bo, "aqefw");
1090 	}
1091 
1092 	/*
1093 	 * Expanded APRIV and targets that support WHERE_AM_I both need a
1094 	 * privileged buffer to store the RPTR shadow
1095 	 */
1096 	if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) &&
1097 	    !a6xx_gpu->shadow_bo) {
1098 		a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
1099 						      sizeof(u32) * gpu->nr_rings,
1100 						      MSM_BO_WC | MSM_BO_MAP_PRIV,
1101 						      gpu->vm, &a6xx_gpu->shadow_bo,
1102 						      &a6xx_gpu->shadow_iova);
1103 
1104 		if (IS_ERR(a6xx_gpu->shadow))
1105 			return PTR_ERR(a6xx_gpu->shadow);
1106 
1107 		msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
1108 	}
1109 
1110 	a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PWRUP_REGLIST_SIZE,
1111 							 MSM_BO_WC  | MSM_BO_MAP_PRIV,
1112 							 gpu->vm, &a6xx_gpu->pwrup_reglist_bo,
1113 							 &a6xx_gpu->pwrup_reglist_iova);
1114 
1115 	if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr))
1116 		return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr);
1117 
1118 	msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist");
1119 
1120 	return 0;
1121 }
1122 
1123 int a6xx_zap_shader_init(struct msm_gpu *gpu)
1124 {
1125 	static bool loaded;
1126 	int ret;
1127 
1128 	if (loaded)
1129 		return 0;
1130 
1131 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
1132 
1133 	loaded = !ret;
1134 	return ret;
1135 }
1136 
1137 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1138 		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1139 		       A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1140 		       A6XX_RBBM_INT_0_MASK_CP_IB2 | \
1141 		       A6XX_RBBM_INT_0_MASK_CP_IB1 | \
1142 		       A6XX_RBBM_INT_0_MASK_CP_RB | \
1143 		       A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1144 		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1145 		       A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1146 		       A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1147 		       A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
1148 
1149 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1150 		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1151 		       A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
1152 		       A6XX_RBBM_INT_0_MASK_CP_SW | \
1153 		       A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1154 		       A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
1155 		       A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
1156 		       A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1157 		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1158 		       A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1159 		       A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1160 		       A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
1161 		       A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
1162 		       A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1163 
1164 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \
1165 			 A6XX_CP_APRIV_CNTL_RBFETCH | \
1166 			 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \
1167 			 A6XX_CP_APRIV_CNTL_RBRPWB)
1168 
1169 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \
1170 			   A6XX_CP_APRIV_CNTL_CDREAD | \
1171 			   A6XX_CP_APRIV_CNTL_CDWRITE)
1172 
1173 static int hw_init(struct msm_gpu *gpu)
1174 {
1175 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1176 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1177 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1178 	u64 gmem_range_min;
1179 	unsigned int i;
1180 	int ret;
1181 
1182 	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1183 		/* Make sure the GMU keeps the GPU on while we set it up */
1184 		ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1185 		if (ret)
1186 			return ret;
1187 	}
1188 
1189 	/* Clear GBIF halt in case GX domain was not collapsed */
1190 	if (adreno_is_a619_holi(adreno_gpu)) {
1191 		gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1192 		gpu_read(gpu, REG_A6XX_GBIF_HALT);
1193 
1194 		gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0);
1195 		gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL);
1196 	} else if (a6xx_has_gbif(adreno_gpu)) {
1197 		gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1198 		gpu_read(gpu, REG_A6XX_GBIF_HALT);
1199 
1200 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
1201 		gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT);
1202 	}
1203 
1204 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
1205 
1206 	if (adreno_is_a619_holi(adreno_gpu))
1207 		a6xx_sptprac_enable(gmu);
1208 
1209 	/*
1210 	 * Disable the trusted memory range - we don't actually supported secure
1211 	 * memory rendering at this point in time and we don't want to block off
1212 	 * part of the virtual memory space.
1213 	 */
1214 	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
1215 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
1216 
1217 	if (!adreno_is_a7xx(adreno_gpu)) {
1218 		/* Turn on 64 bit addressing for all blocks */
1219 		gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
1220 		gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
1221 		gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
1222 		gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
1223 		gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
1224 		gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
1225 		gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
1226 		gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
1227 		gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
1228 		gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
1229 		gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
1230 		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
1231 	}
1232 
1233 	/* enable hardware clockgating */
1234 	a6xx_set_hwcg(gpu, true);
1235 
1236 	/* For gmuwrapper implementations, do the VBIF/GBIF CX configuration here */
1237 	if (adreno_is_a610_family(adreno_gpu)) {
1238 		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
1239 		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
1240 		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
1241 		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1242 	}
1243 
1244 	if (adreno_is_a610_family(adreno_gpu) ||
1245 	    adreno_is_a640_family(adreno_gpu) ||
1246 	    adreno_is_a650_family(adreno_gpu)) {
1247 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
1248 	} else if (adreno_is_a7xx(adreno_gpu)) {
1249 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212);
1250 	} else {
1251 		gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
1252 	}
1253 
1254 	if (adreno_is_a630(adreno_gpu))
1255 		gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
1256 
1257 	if (adreno_is_a7xx(adreno_gpu))
1258 		gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0);
1259 
1260 	/* Make all blocks contribute to the GPU BUSY perf counter */
1261 	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
1262 
1263 	/* Disable L2 bypass in the UCHE */
1264 	if (adreno_is_a7xx(adreno_gpu)) {
1265 		gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
1266 		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
1267 	} else {
1268 		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0);
1269 		gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
1270 		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
1271 	}
1272 
1273 	if (!(adreno_is_a650_family(adreno_gpu) ||
1274 	      adreno_is_a702(adreno_gpu) ||
1275 	      adreno_is_a730(adreno_gpu))) {
1276 		gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M;
1277 
1278 		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1279 		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min);
1280 
1281 		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX,
1282 			gmem_range_min + adreno_gpu->info->gmem - 1);
1283 	}
1284 
1285 	if (adreno_is_a7xx(adreno_gpu))
1286 		gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23));
1287 	else {
1288 		gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
1289 		gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
1290 	}
1291 
1292 	if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) {
1293 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
1294 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1295 	} else if (adreno_is_a610_family(adreno_gpu)) {
1296 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060);
1297 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16);
1298 	} else if (!adreno_is_a7xx(adreno_gpu)) {
1299 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
1300 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1301 	}
1302 
1303 	if (adreno_is_a660_family(adreno_gpu))
1304 		gpu_write(gpu, REG_A7XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1305 
1306 	/* Setting the mem pool size */
1307 	if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) {
1308 		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48);
1309 		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47);
1310 	} else if (adreno_is_a702(adreno_gpu)) {
1311 		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64);
1312 		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63);
1313 	} else if (!adreno_is_a7xx(adreno_gpu))
1314 		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
1315 
1316 
1317 	/* Set the default primFifo threshold values */
1318 	if (adreno_gpu->info->a6xx->prim_fifo_threshold)
1319 		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL,
1320 			  adreno_gpu->info->a6xx->prim_fifo_threshold);
1321 
1322 	/* Set the AHB default slave response to "ERROR" */
1323 	gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1324 
1325 	/* Turn on performance counters */
1326 	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1327 
1328 	if (adreno_is_a7xx(adreno_gpu)) {
1329 		/* Turn on the IFPC counter (countable 4 on XOCLK4) */
1330 		gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1,
1331 			  FIELD_PREP(GENMASK(7, 0), 0x4));
1332 	}
1333 
1334 	/* Select CP0 to always count cycles */
1335 	gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1336 
1337 	a6xx_set_ubwc_config(gpu);
1338 
1339 	/* Enable fault detection */
1340 	if (adreno_is_a612(adreno_gpu) ||
1341 	    adreno_is_a730(adreno_gpu) ||
1342 	    adreno_is_a740_family(adreno_gpu))
1343 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff);
1344 	else if (adreno_is_a690(adreno_gpu))
1345 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff);
1346 	else if (adreno_is_a619(adreno_gpu))
1347 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff);
1348 	else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
1349 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff);
1350 	else
1351 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff);
1352 
1353 	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
1354 
1355 	/* Set weights for bicubic filtering */
1356 	if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) {
1357 		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
1358 		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1),
1359 			0x3fe05ff4);
1360 		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2),
1361 			0x3fa0ebee);
1362 		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3),
1363 			0x3f5193ed);
1364 		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4),
1365 			0x3f0243f0);
1366 	}
1367 
1368 	/* Set up the CX GMU counter 0 to count busy ticks */
1369 	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
1370 
1371 	/* Enable the power counter */
1372 	gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5));
1373 	gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
1374 
1375 	/* Protect registers from the CP */
1376 	a6xx_set_cp_protect(gpu);
1377 
1378 	if (adreno_is_a660_family(adreno_gpu)) {
1379 		if (adreno_is_a690(adreno_gpu))
1380 			gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801);
1381 		else
1382 			gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1383 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1384 	} else if (adreno_is_a702(adreno_gpu)) {
1385 		/* Something to do with the HLSQ cluster */
1386 		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24));
1387 	}
1388 
1389 	if (adreno_is_a690(adreno_gpu))
1390 		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90);
1391 	/* Set dualQ + disable afull for A660 GPU */
1392 	else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu))
1393 		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1394 	else if (adreno_is_a7xx(adreno_gpu))
1395 		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG,
1396 			  FIELD_PREP(GENMASK(19, 16), 6) |
1397 			  FIELD_PREP(GENMASK(15, 12), 6) |
1398 			  FIELD_PREP(GENMASK(11, 8), 9) |
1399 			  BIT(3) | BIT(2) |
1400 			  FIELD_PREP(GENMASK(1, 0), 2));
1401 
1402 	/* Enable expanded apriv for targets that support it */
1403 	if (gpu->hw_apriv) {
1404 		if (adreno_is_a7xx(adreno_gpu)) {
1405 			gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1406 				  A7XX_BR_APRIVMASK);
1407 			gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL,
1408 				  A7XX_APRIV_MASK);
1409 			gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL,
1410 				  A7XX_APRIV_MASK);
1411 		} else
1412 			gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1413 				  BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1));
1414 	}
1415 
1416 	if (adreno_is_a750(adreno_gpu)) {
1417 		/* Disable ubwc merged UFC request feature */
1418 		gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19));
1419 
1420 		/* Enable TP flaghint and other performance settings */
1421 		gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700);
1422 	} else if (adreno_is_a7xx(adreno_gpu)) {
1423 		/* Disable non-ubwc read reqs from passing write reqs */
1424 		gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11));
1425 	}
1426 
1427 	/* Enable interrupts */
1428 	gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK,
1429 		  adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK);
1430 
1431 	ret = adreno_hw_init(gpu);
1432 	if (ret)
1433 		goto out;
1434 
1435 	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
1436 
1437 	/* Set the ringbuffer address */
1438 	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1439 
1440 	/* Targets that support extended APRIV can use the RPTR shadow from
1441 	 * hardware but all the other ones need to disable the feature. Targets
1442 	 * that support the WHERE_AM_I opcode can use that instead
1443 	 */
1444 	if (adreno_gpu->base.hw_apriv)
1445 		gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1446 	else
1447 		gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1448 			MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1449 
1450 	/* Configure the RPTR shadow if needed: */
1451 	if (a6xx_gpu->shadow_bo) {
1452 		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
1453 			shadowptr(a6xx_gpu, gpu->rb[0]));
1454 		for (unsigned int i = 0; i < gpu->nr_rings; i++)
1455 			a6xx_gpu->shadow[i] = 0;
1456 	}
1457 
1458 	/* ..which means "always" on A7xx, also for BV shadow */
1459 	if (adreno_is_a7xx(adreno_gpu)) {
1460 		gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR,
1461 			    rbmemptr(gpu->rb[0], bv_rptr));
1462 	}
1463 
1464 	a6xx_preempt_hw_init(gpu);
1465 
1466 	/* Always come up on rb 0 */
1467 	a6xx_gpu->cur_ring = gpu->rb[0];
1468 
1469 	for (i = 0; i < gpu->nr_rings; i++)
1470 		gpu->rb[i]->cur_ctx_seqno = 0;
1471 
1472 	/* Enable the SQE_to start the CP engine */
1473 	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1474 
1475 	if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) {
1476 		a7xx_patch_pwrup_reglist(gpu);
1477 		a6xx_gpu->pwrup_reglist_emitted = true;
1478 	}
1479 
1480 	ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu);
1481 	if (ret)
1482 		goto out;
1483 
1484 	/*
1485 	 * Try to load a zap shader into the secure world. If successful
1486 	 * we can use the CP to switch out of secure mode. If not then we
1487 	 * have no resource but to try to switch ourselves out manually. If we
1488 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1489 	 * be blocked and a permissions violation will soon follow.
1490 	 */
1491 	ret = a6xx_zap_shader_init(gpu);
1492 	if (!ret) {
1493 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1494 		OUT_RING(gpu->rb[0], 0x00000000);
1495 
1496 		a6xx_flush(gpu, gpu->rb[0]);
1497 		if (!a6xx_idle(gpu, gpu->rb[0]))
1498 			return -EINVAL;
1499 	} else if (ret == -ENODEV) {
1500 		/*
1501 		 * This device does not use zap shader (but print a warning
1502 		 * just in case someone got their dt wrong.. hopefully they
1503 		 * have a debug UART to realize the error of their ways...
1504 		 * if you mess this up you are about to crash horribly)
1505 		 */
1506 		dev_warn_once(gpu->dev->dev,
1507 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1508 		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1509 		ret = 0;
1510 	} else {
1511 		return ret;
1512 	}
1513 
1514 out:
1515 	if (adreno_has_gmu_wrapper(adreno_gpu))
1516 		return ret;
1517 
1518 	/* Last step - yield the ringbuffer */
1519 	a7xx_preempt_start(gpu);
1520 
1521 	/*
1522 	 * Tell the GMU that we are done touching the GPU and it can start power
1523 	 * management
1524 	 */
1525 	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1526 
1527 	if (a6xx_gpu->gmu.legacy) {
1528 		/* Take the GMU out of its special boot mode */
1529 		a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1530 	}
1531 
1532 	if (!ret && msm_gpu_sysprof_no_ifpc(gpu)) {
1533 		ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
1534 		if (!ret)
1535 			set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status);
1536 	}
1537 
1538 	return ret;
1539 }
1540 
1541 static int a6xx_hw_init(struct msm_gpu *gpu)
1542 {
1543 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1544 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1545 	int ret;
1546 
1547 	mutex_lock(&a6xx_gpu->gmu.lock);
1548 	ret = hw_init(gpu);
1549 	mutex_unlock(&a6xx_gpu->gmu.lock);
1550 
1551 	return ret;
1552 }
1553 
1554 static void a6xx_dump(struct msm_gpu *gpu)
1555 {
1556 	DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n",
1557 			gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1558 	adreno_dump(gpu);
1559 }
1560 
1561 static void a6xx_recover(struct msm_gpu *gpu)
1562 {
1563 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1564 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1565 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1566 	int active_submits;
1567 
1568 	adreno_dump_info(gpu);
1569 
1570 	if (adreno_gpu->funcs->gx_is_on(adreno_gpu)) {
1571 		/* Sometimes crashstate capture is skipped, so SQE should be halted here again */
1572 		gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1573 
1574 		if (hang_debug)
1575 			a6xx_dump(gpu);
1576 
1577 	}
1578 
1579 	/*
1580 	 * To handle recovery specific sequences during the rpm suspend we are
1581 	 * about to trigger
1582 	 */
1583 
1584 	a6xx_gpu->hung = true;
1585 
1586 	pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1587 
1588 	/* active_submit won't change until we make a submission */
1589 	mutex_lock(&gpu->active_lock);
1590 	active_submits = gpu->active_submits;
1591 
1592 	/*
1593 	 * Temporarily clear active_submits count to silence a WARN() in the
1594 	 * runtime suspend cb
1595 	 */
1596 	gpu->active_submits = 0;
1597 
1598 	if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) {
1599 		/* Drain the outstanding traffic on memory buses */
1600 		adreno_gpu->funcs->bus_halt(adreno_gpu, true);
1601 
1602 		/* Reset the GPU to a clean state */
1603 		a6xx_gpu_sw_reset(gpu, true);
1604 		a6xx_gpu_sw_reset(gpu, false);
1605 	}
1606 
1607 	reinit_completion(&gmu->pd_gate);
1608 	dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
1609 	dev_pm_genpd_synced_poweroff(gmu->cxpd);
1610 
1611 	/* Drop the rpm refcount from active submits */
1612 	if (active_submits)
1613 		pm_runtime_put(&gpu->pdev->dev);
1614 
1615 	/* And the final one from recover worker */
1616 	pm_runtime_put_sync(&gpu->pdev->dev);
1617 
1618 	if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
1619 		DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
1620 
1621 	dev_pm_genpd_remove_notifier(gmu->cxpd);
1622 
1623 	pm_runtime_use_autosuspend(&gpu->pdev->dev);
1624 
1625 	if (active_submits)
1626 		pm_runtime_get(&gpu->pdev->dev);
1627 
1628 	pm_runtime_get_sync(&gpu->pdev->dev);
1629 
1630 	gpu->active_submits = active_submits;
1631 	mutex_unlock(&gpu->active_lock);
1632 
1633 	msm_gpu_hw_init(gpu);
1634 	a6xx_gpu->hung = false;
1635 }
1636 
1637 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1638 {
1639 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1640 	static const char *uche_clients[7] = {
1641 		"VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1642 	};
1643 	u32 val;
1644 
1645 	if (adreno_is_a7xx(adreno_gpu)) {
1646 		if (mid != 1 && mid != 2 && mid != 3 && mid != 8)
1647 			return "UNKNOWN";
1648 	} else {
1649 		if (mid < 1 || mid > 3)
1650 			return "UNKNOWN";
1651 	}
1652 
1653 	/*
1654 	 * The source of the data depends on the mid ID read from FSYNR1.
1655 	 * and the client ID read from the UCHE block
1656 	 */
1657 	val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1658 
1659 	if (adreno_is_a7xx(adreno_gpu)) {
1660 		/* Bit 3 for mid=3 indicates BR or BV */
1661 		static const char *uche_clients_a7xx[16] = {
1662 			"BR_VFD", "BR_SP", "BR_VSC", "BR_VPC",
1663 			"BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
1664 			"BV_VFD", "BV_SP", "BV_VSC", "BV_VPC",
1665 			"BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
1666 		};
1667 
1668 		/* LPAC has the same clients as BR and BV, but because it is
1669 		 * compute-only some of them do not exist and there are holes
1670 		 * in the array.
1671 		 */
1672 		static const char *uche_clients_lpac_a7xx[8] = {
1673 			"-", "LPAC_SP", "-", "-",
1674 			"LPAC_HLSQ", "-", "-", "LPAC_TP",
1675 		};
1676 
1677 		val &= GENMASK(6, 0);
1678 
1679 		/* mid=3 refers to BR or BV */
1680 		if (mid == 3) {
1681 			if (val < ARRAY_SIZE(uche_clients_a7xx))
1682 				return uche_clients_a7xx[val];
1683 			else
1684 				return "UCHE";
1685 		}
1686 
1687 		/* mid=8 refers to LPAC */
1688 		if (mid == 8) {
1689 			if (val < ARRAY_SIZE(uche_clients_lpac_a7xx))
1690 				return uche_clients_lpac_a7xx[val];
1691 			else
1692 				return "UCHE_LPAC";
1693 		}
1694 
1695 		/* mid=2 is a catchall for everything else in LPAC */
1696 		if (mid == 2)
1697 			return "UCHE_LPAC";
1698 
1699 		/* mid=1 is a catchall for everything else in BR/BV */
1700 		return "UCHE";
1701 	} else if (adreno_is_a660_family(adreno_gpu)) {
1702 		static const char *uche_clients_a660[8] = {
1703 			"VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP",
1704 		};
1705 
1706 		static const char *uche_clients_a660_not[8] = {
1707 			"not VFD", "not SP", "not VSC", "not VPC",
1708 			"not HLSQ", "not PC", "not LRZ", "not TP",
1709 		};
1710 
1711 		val &= GENMASK(6, 0);
1712 
1713 		if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660))
1714 			return uche_clients_a660[val];
1715 
1716 		if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not))
1717 			return uche_clients_a660_not[val];
1718 
1719 		return "UCHE";
1720 	} else {
1721 		/* mid = 3 is most precise and refers to only one block per client */
1722 		if (mid == 3)
1723 			return uche_clients[val & 7];
1724 
1725 		/* For mid=2 the source is TP or VFD except when the client id is 0 */
1726 		if (mid == 2)
1727 			return ((val & 7) == 0) ? "TP" : "TP|VFD";
1728 
1729 		/* For mid=1 just return "UCHE" as a catchall for everything else */
1730 		return "UCHE";
1731 	}
1732 }
1733 
1734 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1735 {
1736 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1737 
1738 	if (id == 0)
1739 		return "CP";
1740 	else if (id == 4)
1741 		return "CCU";
1742 	else if (id == 6)
1743 		return "CDP Prefetch";
1744 	else if (id == 7)
1745 		return "GMU";
1746 	else if (id == 5 && adreno_is_a7xx(adreno_gpu))
1747 		return "Flag cache";
1748 
1749 	return a6xx_uche_fault_block(gpu, id);
1750 }
1751 
1752 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1753 {
1754 	struct msm_gpu *gpu = arg;
1755 	struct adreno_smmu_fault_info *info = data;
1756 	const char *block = "unknown";
1757 
1758 	u32 scratch[] = {
1759 			gpu_read(gpu, REG_A6XX_CP_SCRATCH(4)),
1760 			gpu_read(gpu, REG_A6XX_CP_SCRATCH(5)),
1761 			gpu_read(gpu, REG_A6XX_CP_SCRATCH(6)),
1762 			gpu_read(gpu, REG_A6XX_CP_SCRATCH(7)),
1763 	};
1764 
1765 	if (info)
1766 		block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1767 
1768 	return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1769 }
1770 
1771 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1772 {
1773 	u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1774 
1775 	if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1776 		u32 val;
1777 
1778 		gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1779 		val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1780 		dev_err_ratelimited(&gpu->pdev->dev,
1781 			"CP | opcode error | possible opcode=0x%8.8X\n",
1782 			val);
1783 	}
1784 
1785 	if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1786 		dev_err_ratelimited(&gpu->pdev->dev,
1787 			"CP ucode error interrupt\n");
1788 
1789 	if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1790 		dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1791 			gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1792 
1793 	if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1794 		u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1795 
1796 		dev_err_ratelimited(&gpu->pdev->dev,
1797 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1798 			val & (1 << 20) ? "READ" : "WRITE",
1799 			(val & 0x3ffff), val);
1800 	}
1801 
1802 	if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu)))
1803 		dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1804 
1805 	if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1806 		dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1807 
1808 	if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1809 		dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1810 
1811 }
1812 
1813 static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1814 {
1815 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1816 
1817 	/*
1818 	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1819 	 * but the fault handler will trigger the devcore dump, and we want
1820 	 * to otherwise resume normally rather than killing the submit, so
1821 	 * just bail.
1822 	 */
1823 	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1824 		return;
1825 
1826 	DRM_DEV_ERROR(&gpu->pdev->dev,
1827 		"gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1828 		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1829 		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1830 		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1831 		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1832 		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1833 		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1834 		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1835 		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1836 
1837 	/* Turn off the hangcheck timer to keep it from bothering us */
1838 	timer_delete(&gpu->hangcheck_timer);
1839 
1840 	/* Turn off interrupts to avoid triggering recovery again */
1841 	gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0);
1842 
1843 	kthread_queue_work(gpu->worker, &gpu->recover_work);
1844 }
1845 
1846 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1847 {
1848 	u32 status;
1849 
1850 	status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS);
1851 	gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0);
1852 
1853 	dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1854 
1855 	/*
1856 	 * Ignore FASTBLEND violations, because the HW will silently fall back
1857 	 * to legacy blending.
1858 	 */
1859 	if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1860 		      A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1861 		timer_delete(&gpu->hangcheck_timer);
1862 
1863 		kthread_queue_work(gpu->worker, &gpu->recover_work);
1864 	}
1865 }
1866 
1867 static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on)
1868 {
1869 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1870 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1871 
1872 	if (adreno_has_gmu_wrapper(adreno_gpu))
1873 		return;
1874 
1875 	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on);
1876 }
1877 
1878 static int irq_poll_fence(struct msm_gpu *gpu)
1879 {
1880 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1881 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1882 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1883 	u32 status;
1884 
1885 	if (adreno_has_gmu_wrapper(adreno_gpu))
1886 		return 0;
1887 
1888 	if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) {
1889 		u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS);
1890 
1891 		dev_err_ratelimited(&gpu->pdev->dev,
1892 				"irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n",
1893 				status, rbbm_unmasked);
1894 		return -ETIMEDOUT;
1895 	}
1896 
1897 	return 0;
1898 }
1899 
1900 static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1901 {
1902 	struct msm_drm_private *priv = gpu->dev->dev_private;
1903 
1904 	/* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */
1905 	a6xx_gpu_keepalive_vote(gpu, true);
1906 
1907 	if (irq_poll_fence(gpu))
1908 		goto done;
1909 
1910 	u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1911 
1912 	gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1913 
1914 	if (priv->disable_err_irq)
1915 		status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1916 
1917 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1918 		a6xx_fault_detect_irq(gpu);
1919 
1920 	if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1921 		dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1922 
1923 	if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1924 		a6xx_cp_hw_err_irq(gpu);
1925 
1926 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1927 		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1928 
1929 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1930 		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1931 
1932 	if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1933 		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1934 
1935 	if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1936 		a7xx_sw_fuse_violation_irq(gpu);
1937 
1938 	if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1939 		msm_gpu_retire(gpu);
1940 		a6xx_preempt_trigger(gpu);
1941 	}
1942 
1943 	if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
1944 		a6xx_preempt_irq(gpu);
1945 
1946 done:
1947 	a6xx_gpu_keepalive_vote(gpu, false);
1948 
1949 	return IRQ_HANDLED;
1950 }
1951 
1952 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1953 {
1954 	llcc_slice_deactivate(a6xx_gpu->llc_slice);
1955 	llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1956 }
1957 
1958 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1959 {
1960 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1961 	struct msm_gpu *gpu = &adreno_gpu->base;
1962 	u32 cntl1_regval = 0;
1963 
1964 	if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1965 		return;
1966 
1967 	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1968 		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1969 
1970 		gpu_scid &= 0x1f;
1971 		cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1972 			       (gpu_scid << 15) | (gpu_scid << 20);
1973 
1974 		/* On A660, the SCID programming for UCHE traffic is done in
1975 		 * A6XX_GBIF_SCACHE_CNTL0[14:10]
1976 		 */
1977 		if (adreno_is_a660_family(adreno_gpu))
1978 			gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
1979 				(1 << 8), (gpu_scid << 10) | (1 << 8));
1980 	}
1981 
1982 	/*
1983 	 * For targets with a MMU500, activate the slice but don't program the
1984 	 * register.  The XBL will take care of that.
1985 	 */
1986 	if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1987 		if (!a6xx_gpu->have_mmu500) {
1988 			u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1989 
1990 			gpuhtw_scid &= 0x1f;
1991 			cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1992 		}
1993 	}
1994 
1995 	if (!cntl1_regval)
1996 		return;
1997 
1998 	/*
1999 	 * Program the slice IDs for the various GPU blocks and GPU MMU
2000 	 * pagetables
2001 	 */
2002 	if (!a6xx_gpu->have_mmu500) {
2003 		a6xx_cx_misc_write(a6xx_gpu,
2004 			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
2005 
2006 		/*
2007 		 * Program cacheability overrides to not allocate cache
2008 		 * lines on a write miss
2009 		 */
2010 		a6xx_cx_misc_rmw(a6xx_gpu,
2011 			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
2012 		return;
2013 	}
2014 
2015 	gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
2016 }
2017 
2018 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
2019 {
2020 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
2021 	struct msm_gpu *gpu = &adreno_gpu->base;
2022 
2023 	if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
2024 		return;
2025 
2026 	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
2027 		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
2028 
2029 		gpu_scid &= GENMASK(4, 0);
2030 
2031 		gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
2032 			  FIELD_PREP(GENMASK(29, 25), gpu_scid) |
2033 			  FIELD_PREP(GENMASK(24, 20), gpu_scid) |
2034 			  FIELD_PREP(GENMASK(19, 15), gpu_scid) |
2035 			  FIELD_PREP(GENMASK(14, 10), gpu_scid) |
2036 			  FIELD_PREP(GENMASK(9, 5), gpu_scid) |
2037 			  FIELD_PREP(GENMASK(4, 0), gpu_scid));
2038 
2039 		gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
2040 			  FIELD_PREP(GENMASK(14, 10), gpu_scid) |
2041 			  BIT(8));
2042 	}
2043 
2044 	llcc_slice_activate(a6xx_gpu->htw_llc_slice);
2045 }
2046 
2047 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
2048 {
2049 	/* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
2050 	if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
2051 		return;
2052 
2053 	llcc_slice_putd(a6xx_gpu->llc_slice);
2054 	llcc_slice_putd(a6xx_gpu->htw_llc_slice);
2055 }
2056 
2057 static void a6xx_llc_slices_init(struct platform_device *pdev,
2058 		struct a6xx_gpu *a6xx_gpu, bool is_a7xx)
2059 {
2060 	/* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
2061 	if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
2062 		return;
2063 
2064 	a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
2065 	a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
2066 }
2067 
2068 #define GBIF_CLIENT_HALT_MASK		BIT(0)
2069 #define GBIF_ARB_HALT_MASK		BIT(1)
2070 #define VBIF_XIN_HALT_CTRL0_MASK	GENMASK(3, 0)
2071 #define VBIF_RESET_ACK_MASK		0xF0
2072 #define GPR0_GBIF_HALT_REQUEST		0x1E0
2073 
2074 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
2075 {
2076 	struct msm_gpu *gpu = &adreno_gpu->base;
2077 
2078 	if (adreno_is_a619_holi(adreno_gpu)) {
2079 		gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST);
2080 		spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) &
2081 				(VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK);
2082 	} else if (!a6xx_has_gbif(adreno_gpu)) {
2083 		gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK);
2084 		spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
2085 				(VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK);
2086 		gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
2087 
2088 		return;
2089 	}
2090 
2091 	if (gx_off) {
2092 		/* Halt the gx side of GBIF */
2093 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
2094 		spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
2095 	}
2096 
2097 	/* Halt new client requests on GBIF */
2098 	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
2099 	spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
2100 			(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
2101 
2102 	/* Halt all AXI requests on GBIF */
2103 	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
2104 	spin_until((gpu_read(gpu,  REG_A6XX_GBIF_HALT_ACK) &
2105 			(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
2106 
2107 	/* The GBIF halt needs to be explicitly cleared */
2108 	gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
2109 }
2110 
2111 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
2112 {
2113 	/* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
2114 	if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu)))
2115 		return;
2116 
2117 	gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
2118 	/* Perform a bogus read and add a brief delay to ensure ordering. */
2119 	gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD);
2120 	udelay(1);
2121 
2122 	/* The reset line needs to be asserted for at least 100 us */
2123 	if (assert)
2124 		udelay(100);
2125 }
2126 
2127 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
2128 {
2129 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2130 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2131 	int ret;
2132 
2133 	gpu->needs_hw_init = true;
2134 
2135 	trace_msm_gpu_resume(0);
2136 
2137 	mutex_lock(&a6xx_gpu->gmu.lock);
2138 	ret = a6xx_gmu_resume(a6xx_gpu);
2139 	mutex_unlock(&a6xx_gpu->gmu.lock);
2140 	if (ret)
2141 		return ret;
2142 
2143 	msm_devfreq_resume(gpu);
2144 
2145 	if (adreno_is_a8xx(adreno_gpu))
2146 		a8xx_llc_activate(a6xx_gpu);
2147 	else if (adreno_is_a7xx(adreno_gpu))
2148 		a7xx_llc_activate(a6xx_gpu);
2149 	else
2150 		a6xx_llc_activate(a6xx_gpu);
2151 
2152 	return ret;
2153 }
2154 
2155 static int a6xx_pm_resume(struct msm_gpu *gpu)
2156 {
2157 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2158 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2159 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2160 	unsigned long freq = gpu->fast_rate;
2161 	struct dev_pm_opp *opp;
2162 	int ret;
2163 
2164 	gpu->needs_hw_init = true;
2165 
2166 	trace_msm_gpu_resume(0);
2167 
2168 	mutex_lock(&a6xx_gpu->gmu.lock);
2169 
2170 	opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq);
2171 	if (IS_ERR(opp)) {
2172 		ret = PTR_ERR(opp);
2173 		goto err_set_opp;
2174 	}
2175 	dev_pm_opp_put(opp);
2176 
2177 	/* Set the core clock and bus bw, having VDD scaling in mind */
2178 	dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
2179 
2180 	pm_runtime_resume_and_get(gmu->dev);
2181 	pm_runtime_resume_and_get(gmu->gxpd);
2182 
2183 	ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
2184 	if (ret)
2185 		goto err_bulk_clk;
2186 
2187 	ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks);
2188 	if (ret) {
2189 		clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
2190 		goto err_bulk_clk;
2191 	}
2192 
2193 	if (adreno_is_a619_holi(adreno_gpu))
2194 		a6xx_sptprac_enable(gmu);
2195 
2196 	/* If anything goes south, tear the GPU down piece by piece.. */
2197 	if (ret) {
2198 err_bulk_clk:
2199 		pm_runtime_put(gmu->gxpd);
2200 		pm_runtime_put(gmu->dev);
2201 		dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2202 	}
2203 err_set_opp:
2204 	mutex_unlock(&a6xx_gpu->gmu.lock);
2205 
2206 	if (!ret) {
2207 		msm_devfreq_resume(gpu);
2208 		a6xx_llc_activate(a6xx_gpu);
2209 	}
2210 
2211 	return ret;
2212 }
2213 
2214 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu)
2215 {
2216 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2217 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2218 	int i, ret;
2219 
2220 	trace_msm_gpu_suspend(0);
2221 
2222 	a6xx_llc_deactivate(a6xx_gpu);
2223 
2224 	msm_devfreq_suspend(gpu);
2225 
2226 	mutex_lock(&a6xx_gpu->gmu.lock);
2227 	ret = a6xx_gmu_stop(a6xx_gpu);
2228 	mutex_unlock(&a6xx_gpu->gmu.lock);
2229 	if (ret)
2230 		return ret;
2231 
2232 	if (a6xx_gpu->shadow_bo)
2233 		for (i = 0; i < gpu->nr_rings; i++)
2234 			a6xx_gpu->shadow[i] = 0;
2235 
2236 	gpu->suspend_count++;
2237 
2238 	return 0;
2239 }
2240 
2241 static int a6xx_pm_suspend(struct msm_gpu *gpu)
2242 {
2243 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2244 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2245 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2246 	int i;
2247 
2248 	trace_msm_gpu_suspend(0);
2249 
2250 	a6xx_llc_deactivate(a6xx_gpu);
2251 
2252 	msm_devfreq_suspend(gpu);
2253 
2254 	mutex_lock(&a6xx_gpu->gmu.lock);
2255 
2256 	/* Drain the outstanding traffic on memory buses */
2257 	adreno_gpu->funcs->bus_halt(adreno_gpu, true);
2258 
2259 	if (adreno_is_a619_holi(adreno_gpu))
2260 		a6xx_sptprac_disable(gmu);
2261 
2262 	clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
2263 	clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks);
2264 
2265 	pm_runtime_put_sync(gmu->gxpd);
2266 	dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2267 	pm_runtime_put_sync(gmu->dev);
2268 
2269 	mutex_unlock(&a6xx_gpu->gmu.lock);
2270 
2271 	if (a6xx_gpu->shadow_bo)
2272 		for (i = 0; i < gpu->nr_rings; i++)
2273 			a6xx_gpu->shadow[i] = 0;
2274 
2275 	gpu->suspend_count++;
2276 
2277 	return 0;
2278 }
2279 
2280 static u64 a6xx_get_timestamp(struct msm_gpu *gpu)
2281 {
2282 	return gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
2283 }
2284 
2285 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
2286 {
2287 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2288 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2289 
2290 	return a6xx_gpu->cur_ring;
2291 }
2292 
2293 static void a6xx_destroy(struct msm_gpu *gpu)
2294 {
2295 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2296 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2297 
2298 	if (a6xx_gpu->sqe_bo) {
2299 		msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
2300 		drm_gem_object_put(a6xx_gpu->sqe_bo);
2301 	}
2302 
2303 	if (a6xx_gpu->aqe_bo) {
2304 		msm_gem_unpin_iova(a6xx_gpu->aqe_bo, gpu->vm);
2305 		drm_gem_object_put(a6xx_gpu->aqe_bo);
2306 	}
2307 
2308 	if (a6xx_gpu->shadow_bo) {
2309 		msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->vm);
2310 		drm_gem_object_put(a6xx_gpu->shadow_bo);
2311 	}
2312 
2313 	a6xx_llc_slices_destroy(a6xx_gpu);
2314 
2315 	a6xx_gmu_remove(a6xx_gpu);
2316 
2317 	adreno_gpu_cleanup(adreno_gpu);
2318 
2319 	kfree(a6xx_gpu);
2320 }
2321 
2322 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
2323 {
2324 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2325 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2326 	u64 busy_cycles;
2327 
2328 	/* 19.2MHz */
2329 	*out_sample_rate = 19200000;
2330 
2331 	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
2332 			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
2333 			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
2334 
2335 	return busy_cycles;
2336 }
2337 
2338 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
2339 			      bool suspended)
2340 {
2341 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2342 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2343 
2344 	mutex_lock(&a6xx_gpu->gmu.lock);
2345 	a6xx_gmu_set_freq(gpu, opp, suspended);
2346 	mutex_unlock(&a6xx_gpu->gmu.lock);
2347 }
2348 
2349 static struct drm_gpuvm *
2350 a6xx_create_vm(struct msm_gpu *gpu, struct platform_device *pdev)
2351 {
2352 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2353 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2354 	unsigned long quirks = 0;
2355 
2356 	/*
2357 	 * This allows GPU to set the bus attributes required to use system
2358 	 * cache on behalf of the iommu page table walker.
2359 	 */
2360 	if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) &&
2361 	    !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY))
2362 		quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
2363 
2364 	return adreno_iommu_create_vm(gpu, pdev, quirks);
2365 }
2366 
2367 static struct drm_gpuvm *
2368 a6xx_create_private_vm(struct msm_gpu *gpu, bool kernel_managed)
2369 {
2370 	struct msm_mmu *mmu;
2371 
2372 	mmu = msm_iommu_pagetable_create(to_msm_vm(gpu->vm)->mmu, kernel_managed);
2373 
2374 	if (IS_ERR(mmu))
2375 		return ERR_CAST(mmu);
2376 
2377 	return msm_gem_vm_create(gpu->dev, mmu, "gpu", ADRENO_VM_START,
2378 				 adreno_private_vm_size(gpu), kernel_managed);
2379 }
2380 
2381 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2382 {
2383 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2384 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2385 
2386 	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
2387 		return a6xx_gpu->shadow[ring->id];
2388 
2389 	/*
2390 	 * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware
2391 	 * without 'whereami' support
2392 	 */
2393 	WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC),
2394 		"Can't read CP_RB_RPTR register reliably\n");
2395 
2396 	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
2397 }
2398 
2399 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2400 {
2401 	struct msm_cp_state cp_state;
2402 	bool progress;
2403 
2404 	/*
2405 	 * With IFPC, KMD doesn't know whether GX power domain is collapsed
2406 	 * or not. So, we can't blindly read the below registers in GX domain.
2407 	 * Lets trust the hang detection in HW and lie to the caller that
2408 	 * there was progress.
2409 	 */
2410 	if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC)
2411 		return true;
2412 
2413 	cp_state = (struct msm_cp_state) {
2414 		.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
2415 		.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
2416 		.ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
2417 		.ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
2418 	};
2419 
2420 	/*
2421 	 * Adjust the remaining data to account for what has already been
2422 	 * fetched from memory, but not yet consumed by the SQE.
2423 	 *
2424 	 * This is not *technically* correct, the amount buffered could
2425 	 * exceed the IB size due to hw prefetching ahead, but:
2426 	 *
2427 	 * (1) We aren't trying to find the exact position, just whether
2428 	 *     progress has been made
2429 	 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
2430 	 *     to prevent prefetching into an unrelated submit.  (And
2431 	 *     either way, at some point the ROQ will be full.)
2432 	 */
2433 	cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16;
2434 	cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16;
2435 
2436 	progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
2437 
2438 	ring->last_cp_state = cp_state;
2439 
2440 	return progress;
2441 }
2442 
2443 static void
2444 perfcntr_select(struct msm_ringbuffer *ring, enum adreno_pipe pipe,
2445 		uint32_t regidx, uint32_t *countables, uint32_t nr,
2446 		uint32_t **reglist)
2447 {
2448 	OUT_PKT4(ring, regidx, nr);
2449 	for (unsigned i = 0; i < nr; i++)
2450 		OUT_RING(ring, countables[i]);
2451 
2452 	if (!*reglist)
2453 		return;
2454 
2455 	for (unsigned i = 0; i < nr; i++) {
2456 		/*
2457 		 * Bitfield is in same position on a7xx, but only 2 bits..
2458 		 * which is sufficient for NONE/BR/BV:
2459 		 */
2460 		*(*reglist)++ = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe);
2461 		*(*reglist)++ = regidx + i;
2462 		*(*reglist)++ = countables[i];
2463 	}
2464 }
2465 
2466 static void
2467 a6xx_perfcntr_configure(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
2468 			const struct msm_perfcntr_stream *stream)
2469 {
2470 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2471 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2472 	enum adreno_pipe pipe = PIPE_NONE;
2473 	uint32_t *reglist = NULL;
2474 	uint32_t *reglist_sel_start;
2475 
2476 	if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) {
2477 		WARN_ON(!a6xx_gpu->pwrup_reglist_emitted);
2478 
2479 		struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr;
2480 		int off = (2 * lock->ifpc_list_len) +
2481 			  (2 * lock->preemption_list_len) +
2482 			  (3 * a6xx_gpu->dynamic_sel_reglist_offset);
2483 
2484 		reglist = (uint32_t *)&lock->regs[0];
2485 		reglist += off;
2486 		reglist_sel_start = reglist;
2487 
2488 		/* Clear any previously configured SEL reg entries: */
2489 		lock->dynamic_list_len = a6xx_gpu->dynamic_sel_reglist_offset;
2490 
2491 		/*
2492 		 * Ensure CP sees the dynamic_list_len update before we
2493 		 * start modifying the SEL entries:
2494 		 */
2495 		dma_wmb();
2496 	}
2497 
2498 	for (unsigned i = 0; i < stream->nr_groups; i++) {
2499 		unsigned group_idx = msm_perfcntr_group_idx(stream, i);
2500 		unsigned base = msm_perfcntr_counter_base(stream, group_idx);
2501 
2502 		const struct msm_perfcntr_group *group =
2503 			&gpu->perfcntr_groups[group_idx];
2504 
2505 		struct msm_perfcntr_group_state *group_state =
2506 			gpu->perfcntrs->groups[group_idx];
2507 
2508 		if (group->pipe != pipe) {
2509 			pipe = group->pipe;
2510 
2511 			OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
2512 
2513 			if (pipe == PIPE_BR) {
2514 				OUT_RING(ring, CP_SET_THREAD_BR);
2515 			} else if (pipe == PIPE_BV) {
2516 				OUT_RING(ring, CP_SET_THREAD_BV);
2517 			} else {
2518 				OUT_RING(ring, CP_SET_THREAD_BOTH);
2519 			}
2520 		}
2521 
2522 		const struct msm_perfcntr_counter *counter = &group->counters[base];
2523 		unsigned nr = group_state->allocated_counters;
2524 		perfcntr_select(ring, pipe, counter->select_reg,
2525 				group_state->countables, nr, &reglist);
2526 
2527 		for (unsigned s = 0; s < ARRAY_SIZE(counter->slice_select_regs); s++) {
2528 			if (!counter->slice_select_regs[s])
2529 				break;
2530 
2531 			perfcntr_select(ring, pipe, counter->slice_select_regs[s],
2532 					group_state->countables, nr, &reglist);
2533 		}
2534 	}
2535 
2536 	if (pipe != PIPE_NONE) {
2537 		OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
2538 		OUT_RING(ring, CP_SET_THREAD_BOTH);
2539 	}
2540 
2541 	OUT_PKT7(ring, CP_MEM_WRITE, 3);
2542 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, perfcntr_fence)));
2543 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, perfcntr_fence)));
2544 	OUT_RING(ring, stream->sel_fence);
2545 
2546 	/*
2547 	 * Update the pwrup reglist size before flushing.  Kgsl does a shared-
2548 	 * memory spinlock dance with SQE to avoid racing with IFPC exit.  But
2549 	 * we can skip that since the ringbuffer programming will be executed
2550 	 * by SQE after dynamic reglist size is updated.  So even if we lose
2551 	 * the race, the register programming in the rb will overwrite/correct
2552 	 * the SEL regs restored by SQE on IFPC exit, before sampling begins.
2553 	 */
2554 	if (reglist) {
2555 		struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr;
2556 		unsigned nr_regs = (reglist - reglist_sel_start) / 3;
2557 
2558 		/*
2559 		 * Ensure CP sees updates to the pwrup_reglist before it
2560 		 * sees the new (increased) length:
2561 		 */
2562 		dma_wmb();
2563 
2564 		/* Update dynamic reglist len to include new SEL reg programming: */
2565 		lock->dynamic_list_len = a6xx_gpu->dynamic_sel_reglist_offset + nr_regs;
2566 
2567 		WARN_ON_ONCE(reglist > (uint32_t *)((uint8_t *)lock + PWRUP_REGLIST_SIZE));
2568 	}
2569 
2570 	a6xx_flush_yield(gpu, ring);
2571 
2572 	/* Check to see if we need to start preemption */
2573 	if (adreno_is_a8xx(to_adreno_gpu(gpu)))
2574 		a8xx_preempt_trigger(gpu);
2575 	else
2576 		a6xx_preempt_trigger(gpu);
2577 }
2578 
2579 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse)
2580 {
2581 	if (!info->speedbins)
2582 		return UINT_MAX;
2583 
2584 	for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++)
2585 		if (info->speedbins[i].fuse == fuse)
2586 			return BIT(info->speedbins[i].speedbin);
2587 
2588 	return UINT_MAX;
2589 }
2590 
2591 static int a6xx_read_speedbin(struct device *dev, struct a6xx_gpu *a6xx_gpu,
2592 		const struct adreno_info *info, u32 *speedbin)
2593 {
2594 	int ret;
2595 
2596 	/* Use speedbin fuse if present. Otherwise, fallback to softfuse */
2597 	ret = adreno_read_speedbin(dev, speedbin);
2598 	if (ret != -ENOENT)
2599 		return ret;
2600 
2601 	if (info->quirks & ADRENO_QUIRK_SOFTFUSE) {
2602 		*speedbin = a6xx_cx_misc_read(a6xx_gpu, REG_A8XX_CX_MISC_SW_FUSE_FREQ_LIMIT_STATUS);
2603 		*speedbin = A8XX_CX_MISC_SW_FUSE_FREQ_LIMIT_STATUS_FINALFREQLIMIT(*speedbin);
2604 		return 0;
2605 	}
2606 
2607 	return -ENOENT;
2608 }
2609 
2610 static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu,
2611 		const struct adreno_info *info)
2612 {
2613 	u32 supp_hw;
2614 	u32 speedbin;
2615 	int ret;
2616 
2617 	ret = a6xx_read_speedbin(dev, a6xx_gpu, info, &speedbin);
2618 	/*
2619 	 * -ENOENT means that the platform doesn't support speedbin which is
2620 	 * fine
2621 	 */
2622 	if (ret == -ENOENT) {
2623 		return 0;
2624 	} else if (ret) {
2625 		dev_err_probe(dev, ret,
2626 			      "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
2627 		return ret;
2628 	}
2629 
2630 	supp_hw = fuse_to_supp_hw(info, speedbin);
2631 
2632 	if (supp_hw == UINT_MAX) {
2633 		DRM_DEV_ERROR(dev,
2634 			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
2635 			speedbin);
2636 		supp_hw = BIT(0); /* Default */
2637 	}
2638 
2639 	ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
2640 	if (ret)
2641 		return ret;
2642 
2643 	return 0;
2644 }
2645 
2646 static bool a6xx_aqe_is_enabled(struct adreno_gpu *adreno_gpu)
2647 {
2648 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2649 
2650 	/*
2651 	 * AQE uses preemption context record as scratch pad, so check if
2652 	 * preemption is enabled
2653 	 */
2654 	return (adreno_gpu->base.nr_rings > 1) && !!a6xx_gpu->aqe_bo;
2655 }
2656 
2657 static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
2658 {
2659 	struct msm_drm_private *priv = dev->dev_private;
2660 	struct platform_device *pdev = priv->gpu_pdev;
2661 	struct adreno_platform_config *config = pdev->dev.platform_data;
2662 	const struct adreno_info *info = config->info;
2663 	struct device_node *phandle;
2664 	struct a6xx_gpu *a6xx_gpu;
2665 	struct adreno_gpu *adreno_gpu;
2666 	struct msm_gpu *gpu;
2667 	extern int enable_preemption;
2668 	u32 speedbin;
2669 	bool is_a7xx;
2670 	int ret, nr_rings = 1;
2671 
2672 	a6xx_gpu = kzalloc_obj(*a6xx_gpu);
2673 	if (!a6xx_gpu)
2674 		return ERR_PTR(-ENOMEM);
2675 
2676 	adreno_gpu = &a6xx_gpu->base;
2677 	gpu = &adreno_gpu->base;
2678 
2679 	if ((ADRENO_6XX_GEN1 <= config->info->family) &&
2680 	    (config->info->family <= ADRENO_6XX_GEN4)) {
2681 		gpu->perfcntr_groups = a6xx_perfcntr_groups;
2682 		gpu->num_perfcntr_groups = a6xx_num_perfcntr_groups;
2683 	} else if ((ADRENO_7XX_GEN1 <= config->info->family) &&
2684 		   (config->info->family <= ADRENO_7XX_GEN3)) {
2685 		gpu->perfcntr_groups = a7xx_perfcntr_groups;
2686 		gpu->num_perfcntr_groups = a7xx_num_perfcntr_groups;
2687 	} else if ((ADRENO_8XX_GEN1 <= config->info->family) &&
2688 		   (config->info->family <= ADRENO_8XX_GEN2)) {
2689 		gpu->perfcntr_groups = a8xx_perfcntr_groups;
2690 		gpu->num_perfcntr_groups = a8xx_num_perfcntr_groups;
2691 	}
2692 
2693 	mutex_init(&a6xx_gpu->gmu.lock);
2694 	spin_lock_init(&a6xx_gpu->aperture_lock);
2695 
2696 	adreno_gpu->registers = NULL;
2697 
2698 	/* Check if there is a GMU phandle and set it up */
2699 	struct device_node *node __free(device_node) =
2700 		of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2701 	/* FIXME: How do we gracefully handle this? */
2702 	BUG_ON(!node);
2703 
2704 	adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper");
2705 
2706 	adreno_gpu->base.hw_apriv =
2707 		!!(info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
2708 
2709 	/* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */
2710 	is_a7xx = info->family >= ADRENO_7XX_GEN1;
2711 
2712 	a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
2713 
2714 	/*
2715 	 * There is a different programming path for A6xx targets with an
2716 	 * mmu500 attached, so detect if that is the case
2717 	 */
2718 	phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
2719 	a6xx_gpu->have_mmu500 = (phandle &&
2720 		of_device_is_compatible(phandle, "arm,mmu-500"));
2721 	of_node_put(phandle);
2722 
2723 	if (is_a7xx || !a6xx_gpu->have_mmu500)
2724 		a6xx_gpu->cx_misc_mmio = msm_ioremap(pdev, "cx_mem");
2725 	else
2726 		a6xx_gpu->cx_misc_mmio = NULL;
2727 
2728 	ret = a6xx_set_supported_hw(&pdev->dev, a6xx_gpu, info);
2729 	if (ret) {
2730 		a6xx_llc_slices_destroy(a6xx_gpu);
2731 		kfree(a6xx_gpu);
2732 		return ERR_PTR(ret);
2733 	}
2734 
2735 	if ((enable_preemption == 1) || (enable_preemption == -1 &&
2736 	    (info->quirks & ADRENO_QUIRK_PREEMPTION)))
2737 		nr_rings = 4;
2738 
2739 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, info->funcs, nr_rings);
2740 	if (ret) {
2741 		a6xx_destroy(&(a6xx_gpu->base.base));
2742 		return ERR_PTR(ret);
2743 	}
2744 
2745 	/* Set the speedbin value that is passed to userspace */
2746 	if (a6xx_read_speedbin(&pdev->dev, a6xx_gpu, info, &speedbin) || !speedbin)
2747 		speedbin = 0xffff;
2748 	adreno_gpu->speedbin = (uint16_t) (0xffff & speedbin);
2749 
2750 	/*
2751 	 * For now only clamp to idle freq for devices where this is known not
2752 	 * to cause power supply issues:
2753 	 */
2754 	if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2755 		priv->gpu_clamp_to_idle = true;
2756 
2757 	if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu))
2758 		ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
2759 	else
2760 		ret = a6xx_gmu_init(a6xx_gpu, node);
2761 	if (ret) {
2762 		a6xx_destroy(&(a6xx_gpu->base.base));
2763 		return ERR_PTR(ret);
2764 	}
2765 
2766 	adreno_gpu->uche_trap_base = 0x1fffffffff000ull;
2767 
2768 	msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu,
2769 				  adreno_gpu->funcs->mmu_fault_handler);
2770 
2771 	adreno_gpu->ubwc_config = qcom_ubwc_config_get_data();
2772 	if (IS_ERR(adreno_gpu->ubwc_config)) {
2773 		a6xx_destroy(&(a6xx_gpu->base.base));
2774 		return ERR_CAST(adreno_gpu->ubwc_config);
2775 	}
2776 
2777 	/* Set up the preemption specific bits and pieces for each ringbuffer */
2778 	a6xx_preempt_init(gpu);
2779 
2780 	return gpu;
2781 }
2782 
2783 const struct adreno_gpu_funcs a6xx_gpu_funcs = {
2784 	.base = {
2785 		.get_param = adreno_get_param,
2786 		.set_param = adreno_set_param,
2787 		.hw_init = a6xx_hw_init,
2788 		.ucode_load = a6xx_ucode_load,
2789 		.pm_suspend = a6xx_gmu_pm_suspend,
2790 		.pm_resume = a6xx_gmu_pm_resume,
2791 		.recover = a6xx_recover,
2792 		.submit = a6xx_submit,
2793 		.active_ring = a6xx_active_ring,
2794 		.irq = a6xx_irq,
2795 		.destroy = a6xx_destroy,
2796 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2797 		.show = a6xx_show,
2798 #endif
2799 		.gpu_busy = a6xx_gpu_busy,
2800 		.gpu_get_freq = a6xx_gmu_get_freq,
2801 		.gpu_set_freq = a6xx_gpu_set_freq,
2802 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2803 		.gpu_state_get = a6xx_gpu_state_get,
2804 		.gpu_state_put = a6xx_gpu_state_put,
2805 #endif
2806 		.create_vm = a6xx_create_vm,
2807 		.create_private_vm = a6xx_create_private_vm,
2808 		.get_rptr = a6xx_get_rptr,
2809 		.progress = a6xx_progress,
2810 		.sysprof_setup = a6xx_gmu_sysprof_setup,
2811 		.perfcntr_configure = a6xx_perfcntr_configure,
2812 	},
2813 	.init = a6xx_gpu_init,
2814 	.get_timestamp = a6xx_gmu_get_timestamp,
2815 	.bus_halt = a6xx_bus_clear_pending_transactions,
2816 	.mmu_fault_handler = a6xx_fault_handler,
2817 	.gx_is_on = a6xx_gmu_gx_is_on,
2818 };
2819 
2820 const struct adreno_gpu_funcs a6xx_gmuwrapper_funcs = {
2821 	.base = {
2822 		.get_param = adreno_get_param,
2823 		.set_param = adreno_set_param,
2824 		.hw_init = a6xx_hw_init,
2825 		.ucode_load = a6xx_ucode_load,
2826 		.pm_suspend = a6xx_pm_suspend,
2827 		.pm_resume = a6xx_pm_resume,
2828 		.recover = a6xx_recover,
2829 		.submit = a6xx_submit,
2830 		.active_ring = a6xx_active_ring,
2831 		.irq = a6xx_irq,
2832 		.destroy = a6xx_destroy,
2833 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2834 		.show = a6xx_show,
2835 #endif
2836 		.gpu_busy = a6xx_gpu_busy,
2837 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2838 		.gpu_state_get = a6xx_gpu_state_get,
2839 		.gpu_state_put = a6xx_gpu_state_put,
2840 #endif
2841 		.create_vm = a6xx_create_vm,
2842 		.create_private_vm = a6xx_create_private_vm,
2843 		.get_rptr = a6xx_get_rptr,
2844 		.progress = a6xx_progress,
2845 		.perfcntr_configure = a6xx_perfcntr_configure,
2846 	},
2847 	.init = a6xx_gpu_init,
2848 	.get_timestamp = a6xx_get_timestamp,
2849 	.bus_halt = a6xx_bus_clear_pending_transactions,
2850 	.mmu_fault_handler = a6xx_fault_handler,
2851 	.gx_is_on = a6xx_gmu_gx_is_on,
2852 };
2853 
2854 const struct adreno_gpu_funcs a7xx_gpu_funcs = {
2855 	.base = {
2856 		.get_param = adreno_get_param,
2857 		.set_param = adreno_set_param,
2858 		.hw_init = a6xx_hw_init,
2859 		.ucode_load = a6xx_ucode_load,
2860 		.pm_suspend = a6xx_gmu_pm_suspend,
2861 		.pm_resume = a6xx_gmu_pm_resume,
2862 		.recover = a6xx_recover,
2863 		.submit = a7xx_submit,
2864 		.active_ring = a6xx_active_ring,
2865 		.irq = a6xx_irq,
2866 		.destroy = a6xx_destroy,
2867 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2868 		.show = a6xx_show,
2869 #endif
2870 		.gpu_busy = a6xx_gpu_busy,
2871 		.gpu_get_freq = a6xx_gmu_get_freq,
2872 		.gpu_set_freq = a6xx_gpu_set_freq,
2873 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2874 		.gpu_state_get = a6xx_gpu_state_get,
2875 		.gpu_state_put = a6xx_gpu_state_put,
2876 #endif
2877 		.create_vm = a6xx_create_vm,
2878 		.create_private_vm = a6xx_create_private_vm,
2879 		.get_rptr = a6xx_get_rptr,
2880 		.progress = a6xx_progress,
2881 		.sysprof_setup = a6xx_gmu_sysprof_setup,
2882 		.perfcntr_configure = a6xx_perfcntr_configure,
2883 	},
2884 	.init = a6xx_gpu_init,
2885 	.get_timestamp = a6xx_gmu_get_timestamp,
2886 	.bus_halt = a6xx_bus_clear_pending_transactions,
2887 	.mmu_fault_handler = a6xx_fault_handler,
2888 	.gx_is_on = a7xx_gmu_gx_is_on,
2889 	.aqe_is_enabled = a6xx_aqe_is_enabled,
2890 };
2891 
2892 const struct adreno_gpu_funcs a8xx_gpu_funcs = {
2893 	.base = {
2894 		.get_param = adreno_get_param,
2895 		.set_param = adreno_set_param,
2896 		.hw_init = a8xx_hw_init,
2897 		.ucode_load = a6xx_ucode_load,
2898 		.pm_suspend = a6xx_gmu_pm_suspend,
2899 		.pm_resume = a6xx_gmu_pm_resume,
2900 		.recover = a8xx_recover,
2901 		.submit = a7xx_submit,
2902 		.active_ring = a6xx_active_ring,
2903 		.irq = a8xx_irq,
2904 		.destroy = a6xx_destroy,
2905 		.gpu_busy = a8xx_gpu_busy,
2906 		.gpu_get_freq = a6xx_gmu_get_freq,
2907 		.gpu_set_freq = a6xx_gpu_set_freq,
2908 		.create_vm = a6xx_create_vm,
2909 		.create_private_vm = a6xx_create_private_vm,
2910 		.get_rptr = a6xx_get_rptr,
2911 		.progress = a8xx_progress,
2912 		.sysprof_setup = a6xx_gmu_sysprof_setup,
2913 		.perfcntr_configure = a6xx_perfcntr_configure,
2914 		.perfcntr_flush = a8xx_perfcntr_flush,
2915 	},
2916 	.init = a6xx_gpu_init,
2917 	.get_timestamp = a8xx_gmu_get_timestamp,
2918 	.bus_halt = a8xx_bus_clear_pending_transactions,
2919 	.mmu_fault_handler = a8xx_fault_handler,
2920 	.gx_is_on = a8xx_gmu_gx_is_on,
2921 	.aqe_is_enabled = a6xx_aqe_is_enabled,
2922 };
2923