xref: /linux/drivers/gpu/drm/msm/adreno/a6xx_gpu.c (revision 54fd6bd42e7bd351802ff1d193a2e33e4bfb1836)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
3 
4 
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10 
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/firmware/qcom/qcom_scm.h>
14 #include <linux/pm_domain.h>
15 #include <linux/soc/qcom/llcc-qcom.h>
16 
17 #define GPU_PAS_ID 13
18 
19 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
20 {
21 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
22 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
23 
24 	/* Check that the GMU is idle */
25 	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu))
26 		return false;
27 
28 	/* Check tha the CX master is idle */
29 	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
30 			~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
31 		return false;
32 
33 	return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
34 		A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
35 }
36 
37 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
38 {
39 	/* wait for CP to drain ringbuffer: */
40 	if (!adreno_idle(gpu, ring))
41 		return false;
42 
43 	if (spin_until(_a6xx_check_idle(gpu))) {
44 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
45 			gpu->name, __builtin_return_address(0),
46 			gpu_read(gpu, REG_A6XX_RBBM_STATUS),
47 			gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
48 			gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
49 			gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
50 		return false;
51 	}
52 
53 	return true;
54 }
55 
56 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
57 {
58 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
59 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
60 
61 	/* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
62 	if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
63 		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
64 		OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
65 		OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
66 	}
67 }
68 
69 static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
70 {
71 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
72 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
73 	uint32_t wptr;
74 	unsigned long flags;
75 
76 	update_shadow_rptr(gpu, ring);
77 
78 	spin_lock_irqsave(&ring->preempt_lock, flags);
79 
80 	/* Copy the shadow to the actual register */
81 	ring->cur = ring->next;
82 
83 	/* Make sure to wrap wptr if we need to */
84 	wptr = get_wptr(ring);
85 
86 	/* Update HW if this is the current ring and we are not in preempt*/
87 	if (!a6xx_in_preempt(a6xx_gpu)) {
88 		if (a6xx_gpu->cur_ring == ring)
89 			gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
90 		else
91 			ring->restore_wptr = true;
92 	} else {
93 		ring->restore_wptr = true;
94 	}
95 
96 	spin_unlock_irqrestore(&ring->preempt_lock, flags);
97 }
98 
99 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
100 		u64 iova)
101 {
102 	OUT_PKT7(ring, CP_REG_TO_MEM, 3);
103 	OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
104 		CP_REG_TO_MEM_0_CNT(2) |
105 		CP_REG_TO_MEM_0_64B);
106 	OUT_RING(ring, lower_32_bits(iova));
107 	OUT_RING(ring, upper_32_bits(iova));
108 }
109 
110 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
111 		struct msm_ringbuffer *ring, struct msm_gem_submit *submit)
112 {
113 	bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
114 	struct msm_context *ctx = submit->queue->ctx;
115 	struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx);
116 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
117 	phys_addr_t ttbr;
118 	u32 asid;
119 	u64 memptr = rbmemptr(ring, ttbr0);
120 
121 	if (ctx->seqno == ring->cur_ctx_seqno)
122 		return;
123 
124 	if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid))
125 		return;
126 
127 	if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) {
128 		/* Wait for previous submit to complete before continuing: */
129 		OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
130 		OUT_RING(ring, 0);
131 		OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
132 		OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
133 		OUT_RING(ring, submit->seqno - 1);
134 
135 		OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
136 		OUT_RING(ring, CP_SET_THREAD_BOTH);
137 
138 		/* Reset state used to synchronize BR and BV */
139 		OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1);
140 		OUT_RING(ring,
141 			 CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS |
142 			 CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE |
143 			 CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER |
144 			 CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS);
145 
146 		OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
147 		OUT_RING(ring, CP_SET_THREAD_BR);
148 	}
149 
150 	if (!sysprof) {
151 		if (!adreno_is_a7xx(adreno_gpu)) {
152 			/* Turn off protected mode to write to special registers */
153 			OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
154 			OUT_RING(ring, 0);
155 		}
156 
157 		OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
158 		OUT_RING(ring, 1);
159 	}
160 
161 	/* Execute the table update */
162 	OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
163 	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
164 
165 	OUT_RING(ring,
166 		CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
167 		CP_SMMU_TABLE_UPDATE_1_ASID(asid));
168 	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
169 	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
170 
171 	/*
172 	 * Write the new TTBR0 to the memstore. This is good for debugging.
173 	 * Needed for preemption
174 	 */
175 	OUT_PKT7(ring, CP_MEM_WRITE, 5);
176 	OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
177 	OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
178 	OUT_RING(ring, lower_32_bits(ttbr));
179 	OUT_RING(ring, upper_32_bits(ttbr));
180 	OUT_RING(ring, ctx->seqno);
181 
182 	/*
183 	 * Sync both threads after switching pagetables and enable BR only
184 	 * to make sure BV doesn't race ahead while BR is still switching
185 	 * pagetables.
186 	 */
187 	if (adreno_is_a7xx(&a6xx_gpu->base)) {
188 		OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
189 		OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
190 	}
191 
192 	/*
193 	 * And finally, trigger a uche flush to be sure there isn't anything
194 	 * lingering in that part of the GPU
195 	 */
196 
197 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
198 	OUT_RING(ring, CACHE_INVALIDATE);
199 
200 	if (!sysprof) {
201 		/*
202 		 * Wait for SRAM clear after the pgtable update, so the
203 		 * two can happen in parallel:
204 		 */
205 		OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
206 		OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
207 		OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
208 				REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
209 		OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
210 		OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
211 		OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
212 		OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
213 
214 		if (!adreno_is_a7xx(adreno_gpu)) {
215 			/* Re-enable protected mode: */
216 			OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
217 			OUT_RING(ring, 1);
218 		}
219 	}
220 }
221 
222 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
223 {
224 	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
225 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
226 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
227 	struct msm_ringbuffer *ring = submit->ring;
228 	unsigned int i, ibs = 0;
229 
230 	adreno_check_and_reenable_stall(adreno_gpu);
231 
232 	a6xx_set_pagetable(a6xx_gpu, ring, submit);
233 
234 	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
235 		rbmemptr_stats(ring, index, cpcycles_start));
236 
237 	/*
238 	 * For PM4 the GMU register offsets are calculated from the base of the
239 	 * GPU registers so we need to add 0x1a800 to the register value on A630
240 	 * to get the right value from PM4.
241 	 */
242 	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
243 		rbmemptr_stats(ring, index, alwayson_start));
244 
245 	/* Invalidate CCU depth and color */
246 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
247 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
248 
249 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
250 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
251 
252 	/* Submit the commands */
253 	for (i = 0; i < submit->nr_cmds; i++) {
254 		switch (submit->cmd[i].type) {
255 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
256 			break;
257 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
258 			if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
259 				break;
260 			fallthrough;
261 		case MSM_SUBMIT_CMD_BUF:
262 			OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
263 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
264 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
265 			OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
266 			ibs++;
267 			break;
268 		}
269 
270 		/*
271 		 * Periodically update shadow-wptr if needed, so that we
272 		 * can see partial progress of submits with large # of
273 		 * cmds.. otherwise we could needlessly stall waiting for
274 		 * ringbuffer state, simply due to looking at a shadow
275 		 * rptr value that has not been updated
276 		 */
277 		if ((ibs % 32) == 0)
278 			update_shadow_rptr(gpu, ring);
279 	}
280 
281 	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
282 		rbmemptr_stats(ring, index, cpcycles_end));
283 	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
284 		rbmemptr_stats(ring, index, alwayson_end));
285 
286 	/* Write the fence to the scratch register */
287 	OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
288 	OUT_RING(ring, submit->seqno);
289 
290 	/*
291 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
292 	 * timestamp is written to the memory and then triggers the interrupt
293 	 */
294 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
295 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
296 		CP_EVENT_WRITE_0_IRQ);
297 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
298 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
299 	OUT_RING(ring, submit->seqno);
300 
301 	trace_msm_gpu_submit_flush(submit,
302 		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
303 
304 	a6xx_flush(gpu, ring);
305 }
306 
307 static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
308 		struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue)
309 {
310 	u64 preempt_postamble;
311 
312 	OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12);
313 
314 	OUT_RING(ring, SMMU_INFO);
315 	/* don't save SMMU, we write the record from the kernel instead */
316 	OUT_RING(ring, 0);
317 	OUT_RING(ring, 0);
318 
319 	/* privileged and non secure buffer save */
320 	OUT_RING(ring, NON_SECURE_SAVE_ADDR);
321 	OUT_RING(ring, lower_32_bits(
322 		a6xx_gpu->preempt_iova[ring->id]));
323 	OUT_RING(ring, upper_32_bits(
324 		a6xx_gpu->preempt_iova[ring->id]));
325 
326 	/* user context buffer save, seems to be unnused by fw */
327 	OUT_RING(ring, NON_PRIV_SAVE_ADDR);
328 	OUT_RING(ring, 0);
329 	OUT_RING(ring, 0);
330 
331 	OUT_RING(ring, COUNTER);
332 	/* seems OK to set to 0 to disable it */
333 	OUT_RING(ring, 0);
334 	OUT_RING(ring, 0);
335 
336 	/* Emit postamble to clear perfcounters */
337 	preempt_postamble = a6xx_gpu->preempt_postamble_iova;
338 
339 	OUT_PKT7(ring, CP_SET_AMBLE, 3);
340 	OUT_RING(ring, lower_32_bits(preempt_postamble));
341 	OUT_RING(ring, upper_32_bits(preempt_postamble));
342 	OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(
343 				 a6xx_gpu->preempt_postamble_len) |
344 			 CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE));
345 }
346 
347 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
348 {
349 	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
350 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
351 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
352 	struct msm_ringbuffer *ring = submit->ring;
353 	unsigned int i, ibs = 0;
354 
355 	adreno_check_and_reenable_stall(adreno_gpu);
356 
357 	/*
358 	 * Toggle concurrent binning for pagetable switch and set the thread to
359 	 * BR since only it can execute the pagetable switch packets.
360 	 */
361 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
362 	OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
363 
364 	a6xx_set_pagetable(a6xx_gpu, ring, submit);
365 
366 	/*
367 	 * If preemption is enabled, then set the pseudo register for the save
368 	 * sequence
369 	 */
370 	if (gpu->nr_rings > 1)
371 		a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
372 
373 	get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
374 		rbmemptr_stats(ring, index, cpcycles_start));
375 	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
376 		rbmemptr_stats(ring, index, alwayson_start));
377 
378 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
379 	OUT_RING(ring, CP_SET_THREAD_BOTH);
380 
381 	OUT_PKT7(ring, CP_SET_MARKER, 1);
382 	OUT_RING(ring, 0x101); /* IFPC disable */
383 
384 	if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
385 		OUT_PKT7(ring, CP_SET_MARKER, 1);
386 		OUT_RING(ring, 0x00d); /* IB1LIST start */
387 	}
388 
389 	/* Submit the commands */
390 	for (i = 0; i < submit->nr_cmds; i++) {
391 		switch (submit->cmd[i].type) {
392 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
393 			break;
394 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
395 			if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
396 				break;
397 			fallthrough;
398 		case MSM_SUBMIT_CMD_BUF:
399 			OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
400 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
401 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
402 			OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
403 			ibs++;
404 			break;
405 		}
406 
407 		/*
408 		 * Periodically update shadow-wptr if needed, so that we
409 		 * can see partial progress of submits with large # of
410 		 * cmds.. otherwise we could needlessly stall waiting for
411 		 * ringbuffer state, simply due to looking at a shadow
412 		 * rptr value that has not been updated
413 		 */
414 		if ((ibs % 32) == 0)
415 			update_shadow_rptr(gpu, ring);
416 	}
417 
418 	if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
419 		OUT_PKT7(ring, CP_SET_MARKER, 1);
420 		OUT_RING(ring, 0x00e); /* IB1LIST end */
421 	}
422 
423 	get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
424 		rbmemptr_stats(ring, index, cpcycles_end));
425 	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
426 		rbmemptr_stats(ring, index, alwayson_end));
427 
428 	/* Write the fence to the scratch register */
429 	OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
430 	OUT_RING(ring, submit->seqno);
431 
432 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
433 	OUT_RING(ring, CP_SET_THREAD_BR);
434 
435 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
436 	OUT_RING(ring, CCU_INVALIDATE_DEPTH);
437 
438 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
439 	OUT_RING(ring, CCU_INVALIDATE_COLOR);
440 
441 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
442 	OUT_RING(ring, CP_SET_THREAD_BV);
443 
444 	/*
445 	 * Make sure the timestamp is committed once BV pipe is
446 	 * completely done with this submission.
447 	 */
448 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
449 	OUT_RING(ring, CACHE_CLEAN | BIT(27));
450 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
451 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
452 	OUT_RING(ring, submit->seqno);
453 
454 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
455 	OUT_RING(ring, CP_SET_THREAD_BR);
456 
457 	/*
458 	 * This makes sure that BR doesn't race ahead and commit
459 	 * timestamp to memstore while BV is still processing
460 	 * this submission.
461 	 */
462 	OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
463 	OUT_RING(ring, 0);
464 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
465 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
466 	OUT_RING(ring, submit->seqno);
467 
468 	a6xx_gpu->last_seqno[ring->id] = submit->seqno;
469 
470 	/* write the ringbuffer timestamp */
471 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
472 	OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27));
473 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
474 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
475 	OUT_RING(ring, submit->seqno);
476 
477 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
478 	OUT_RING(ring, CP_SET_THREAD_BOTH);
479 
480 	OUT_PKT7(ring, CP_SET_MARKER, 1);
481 	OUT_RING(ring, 0x100); /* IFPC enable */
482 
483 	/* If preemption is enabled */
484 	if (gpu->nr_rings > 1) {
485 		/* Yield the floor on command completion */
486 		OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
487 
488 		/*
489 		 * If dword[2:1] are non zero, they specify an address for
490 		 * the CP to write the value of dword[3] to on preemption
491 		 * complete. Write 0 to skip the write
492 		 */
493 		OUT_RING(ring, 0x00);
494 		OUT_RING(ring, 0x00);
495 		/* Data value - not used if the address above is 0 */
496 		OUT_RING(ring, 0x01);
497 		/* generate interrupt on preemption completion */
498 		OUT_RING(ring, 0x00);
499 	}
500 
501 
502 	trace_msm_gpu_submit_flush(submit,
503 		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
504 
505 	a6xx_flush(gpu, ring);
506 
507 	/* Check to see if we need to start preemption */
508 	a6xx_preempt_trigger(gpu);
509 }
510 
511 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
512 {
513 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
514 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
515 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
516 	const struct adreno_reglist *reg;
517 	unsigned int i;
518 	u32 cgc_delay, cgc_hyst;
519 	u32 val, clock_cntl_on;
520 
521 	if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu)))
522 		return;
523 
524 	if (adreno_is_a630(adreno_gpu))
525 		clock_cntl_on = 0x8aa8aa02;
526 	else if (adreno_is_a610(adreno_gpu))
527 		clock_cntl_on = 0xaaa8aa82;
528 	else if (adreno_is_a702(adreno_gpu))
529 		clock_cntl_on = 0xaaaaaa82;
530 	else
531 		clock_cntl_on = 0x8aa8aa82;
532 
533 	cgc_delay = adreno_is_a615_family(adreno_gpu) ? 0x111 : 0x10111;
534 	cgc_hyst = adreno_is_a615_family(adreno_gpu) ? 0x555 : 0x5555;
535 
536 	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
537 			state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
538 	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
539 			state ? cgc_delay : 0);
540 	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
541 			state ? cgc_hyst : 0);
542 
543 	if (!adreno_gpu->info->a6xx->hwcg) {
544 		gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
545 		gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0);
546 
547 		if (state) {
548 			gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1);
549 
550 			if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val,
551 					     val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
552 				dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
553 				return;
554 			}
555 
556 			gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
557 		}
558 
559 		return;
560 	}
561 
562 	val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
563 
564 	/* Don't re-program the registers if they are already correct */
565 	if ((!state && !val) || (state && (val == clock_cntl_on)))
566 		return;
567 
568 	/* Disable SP clock before programming HWCG registers */
569 	if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
570 		gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
571 
572 	for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++)
573 		gpu_write(gpu, reg->offset, state ? reg->value : 0);
574 
575 	/* Enable SP clock */
576 	if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
577 		gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
578 
579 	gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
580 }
581 
582 static void a6xx_set_cp_protect(struct msm_gpu *gpu)
583 {
584 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
585 	const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
586 	unsigned i;
587 
588 	/*
589 	 * Enable access protection to privileged registers, fault on an access
590 	 * protect violation and select the last span to protect from the start
591 	 * address all the way to the end of the register address space
592 	 */
593 	gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL,
594 		  A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN |
595 		  A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN |
596 		  A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE);
597 
598 	for (i = 0; i < protect->count - 1; i++) {
599 		/* Intentionally skip writing to some registers */
600 		if (protect->regs[i])
601 			gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]);
602 	}
603 	/* last CP_PROTECT to have "infinite" length on the last entry */
604 	gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]);
605 }
606 
607 static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
608 {
609 	const struct qcom_ubwc_cfg_data *common_cfg;
610 	struct qcom_ubwc_cfg_data *cfg = &gpu->_ubwc_config;
611 
612 	/* Inherit the common config and make some necessary fixups */
613 	common_cfg = qcom_ubwc_config_get_data();
614 	if (IS_ERR(common_cfg))
615 		return PTR_ERR(common_cfg);
616 
617 	/* Copy the data into the internal struct to drop the const qualifier (temporarily) */
618 	*cfg = *common_cfg;
619 
620 	cfg->ubwc_swizzle = 0x6;
621 	cfg->highest_bank_bit = 15;
622 
623 	if (adreno_is_a610(gpu)) {
624 		cfg->highest_bank_bit = 13;
625 		cfg->ubwc_swizzle = 0x7;
626 	}
627 
628 	if (adreno_is_a618(gpu))
629 		cfg->highest_bank_bit = 14;
630 
631 	if (adreno_is_a619(gpu))
632 		/* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
633 		cfg->highest_bank_bit = 13;
634 
635 	if (adreno_is_a619_holi(gpu))
636 		cfg->highest_bank_bit = 13;
637 
638 	if (adreno_is_a621(gpu))
639 		cfg->highest_bank_bit = 13;
640 
641 	if (adreno_is_a623(gpu))
642 		cfg->highest_bank_bit = 16;
643 
644 	if (adreno_is_a650(gpu) ||
645 	    adreno_is_a660(gpu) ||
646 	    adreno_is_a690(gpu) ||
647 	    adreno_is_a730(gpu) ||
648 	    adreno_is_a740_family(gpu)) {
649 		/* TODO: get ddr type from bootloader and use 15 for LPDDR4 */
650 		cfg->highest_bank_bit = 16;
651 	}
652 
653 	if (adreno_is_a663(gpu)) {
654 		cfg->highest_bank_bit = 13;
655 		cfg->ubwc_swizzle = 0x4;
656 	}
657 
658 	if (adreno_is_7c3(gpu))
659 		cfg->highest_bank_bit = 14;
660 
661 	if (adreno_is_a702(gpu))
662 		cfg->highest_bank_bit = 14;
663 
664 	if (cfg->highest_bank_bit != common_cfg->highest_bank_bit)
665 		DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n",
666 			      cfg->highest_bank_bit, common_cfg->highest_bank_bit);
667 
668 	if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle)
669 		DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n",
670 			      cfg->ubwc_swizzle, common_cfg->ubwc_swizzle);
671 
672 	gpu->ubwc_config = &gpu->_ubwc_config;
673 
674 	return 0;
675 }
676 
677 static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
678 {
679 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
680 	const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
681 	/*
682 	 * We subtract 13 from the highest bank bit (13 is the minimum value
683 	 * allowed by hw) and write the lowest two bits of the remaining value
684 	 * as hbb_lo and the one above it as hbb_hi to the hardware.
685 	 */
686 	BUG_ON(cfg->highest_bank_bit < 13);
687 	u32 hbb = cfg->highest_bank_bit - 13;
688 	bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0;
689 	u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2);
690 	bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
691 	bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0;
692 	bool min_acc_len_64b = false;
693 	u8 uavflagprd_inv = 0;
694 	u32 hbb_hi = hbb >> 2;
695 	u32 hbb_lo = hbb & 3;
696 
697 	if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu))
698 		uavflagprd_inv = 2;
699 
700 	if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
701 		min_acc_len_64b = true;
702 
703 	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
704 		  level2_swizzling_dis << 12 |
705 		  rgb565_predicator << 11 |
706 		  hbb_hi << 10 | amsbc << 4 |
707 		  min_acc_len_64b << 3 |
708 		  hbb_lo << 1 | ubwc_mode);
709 
710 	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
711 		  level2_swizzling_dis << 6 | hbb_hi << 4 |
712 		  min_acc_len_64b << 3 |
713 		  hbb_lo << 1 | ubwc_mode);
714 
715 	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
716 		  level2_swizzling_dis << 12 | hbb_hi << 10 |
717 		  uavflagprd_inv << 4 |
718 		  min_acc_len_64b << 3 |
719 		  hbb_lo << 1 | ubwc_mode);
720 
721 	if (adreno_is_a7xx(adreno_gpu))
722 		gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL,
723 			  FIELD_PREP(GENMASK(8, 5), hbb_lo));
724 
725 	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL,
726 		  min_acc_len_64b << 23 | hbb_lo << 21);
727 
728 	gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL,
729 		  cfg->macrotile_mode);
730 }
731 
732 static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
733 {
734 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
735 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
736 	const struct adreno_reglist_list *reglist;
737 	void *ptr = a6xx_gpu->pwrup_reglist_ptr;
738 	struct cpu_gpu_lock *lock = ptr;
739 	u32 *dest = (u32 *)&lock->regs[0];
740 	int i;
741 
742 	reglist = adreno_gpu->info->a6xx->pwrup_reglist;
743 
744 	lock->gpu_req = lock->cpu_req = lock->turn = 0;
745 	lock->ifpc_list_len = 0;
746 	lock->preemption_list_len = reglist->count;
747 
748 	/*
749 	 * For each entry in each of the lists, write the offset and the current
750 	 * register value into the GPU buffer
751 	 */
752 	for (i = 0; i < reglist->count; i++) {
753 		*dest++ = reglist->regs[i];
754 		*dest++ = gpu_read(gpu, reglist->regs[i]);
755 	}
756 
757 	/*
758 	 * The overall register list is composed of
759 	 * 1. Static IFPC-only registers
760 	 * 2. Static IFPC + preemption registers
761 	 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
762 	 *
763 	 * The first two lists are static. Size of these lists are stored as
764 	 * number of pairs in ifpc_list_len and preemption_list_len
765 	 * respectively. With concurrent binning, Some of the perfcounter
766 	 * registers being virtualized, CP needs to know the pipe id to program
767 	 * the aperture inorder to restore the same. Thus, third list is a
768 	 * dynamic list with triplets as
769 	 * (<aperture, shifted 12 bits> <address> <data>), and the length is
770 	 * stored as number for triplets in dynamic_list_len.
771 	 */
772 	lock->dynamic_list_len = 0;
773 }
774 
775 static int a7xx_preempt_start(struct msm_gpu *gpu)
776 {
777 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
778 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
779 	struct msm_ringbuffer *ring = gpu->rb[0];
780 
781 	if (gpu->nr_rings <= 1)
782 		return 0;
783 
784 	/* Turn CP protection off */
785 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
786 	OUT_RING(ring, 0);
787 
788 	a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL);
789 
790 	/* Yield the floor on command completion */
791 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
792 	OUT_RING(ring, 0x00);
793 	OUT_RING(ring, 0x00);
794 	OUT_RING(ring, 0x00);
795 	/* Generate interrupt on preemption completion */
796 	OUT_RING(ring, 0x00);
797 
798 	a6xx_flush(gpu, ring);
799 
800 	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
801 }
802 
803 static int a6xx_cp_init(struct msm_gpu *gpu)
804 {
805 	struct msm_ringbuffer *ring = gpu->rb[0];
806 
807 	OUT_PKT7(ring, CP_ME_INIT, 8);
808 
809 	OUT_RING(ring, 0x0000002f);
810 
811 	/* Enable multiple hardware contexts */
812 	OUT_RING(ring, 0x00000003);
813 
814 	/* Enable error detection */
815 	OUT_RING(ring, 0x20000000);
816 
817 	/* Don't enable header dump */
818 	OUT_RING(ring, 0x00000000);
819 	OUT_RING(ring, 0x00000000);
820 
821 	/* No workarounds enabled */
822 	OUT_RING(ring, 0x00000000);
823 
824 	/* Pad rest of the cmds with 0's */
825 	OUT_RING(ring, 0x00000000);
826 	OUT_RING(ring, 0x00000000);
827 
828 	a6xx_flush(gpu, ring);
829 	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
830 }
831 
832 static int a7xx_cp_init(struct msm_gpu *gpu)
833 {
834 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
835 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
836 	struct msm_ringbuffer *ring = gpu->rb[0];
837 	u32 mask;
838 
839 	/* Disable concurrent binning before sending CP init */
840 	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
841 	OUT_RING(ring, BIT(27));
842 
843 	OUT_PKT7(ring, CP_ME_INIT, 7);
844 
845 	/* Use multiple HW contexts */
846 	mask = BIT(0);
847 
848 	/* Enable error detection */
849 	mask |= BIT(1);
850 
851 	/* Set default reset state */
852 	mask |= BIT(3);
853 
854 	/* Disable save/restore of performance counters across preemption */
855 	mask |= BIT(6);
856 
857 	/* Enable the register init list with the spinlock */
858 	mask |= BIT(8);
859 
860 	OUT_RING(ring, mask);
861 
862 	/* Enable multiple hardware contexts */
863 	OUT_RING(ring, 0x00000003);
864 
865 	/* Enable error detection */
866 	OUT_RING(ring, 0x20000000);
867 
868 	/* Operation mode mask */
869 	OUT_RING(ring, 0x00000002);
870 
871 	/* *Don't* send a power up reg list for concurrent binning (TODO) */
872 	/* Lo address */
873 	OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova));
874 	/* Hi address */
875 	OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova));
876 	/* BIT(31) set => read the regs from the list */
877 	OUT_RING(ring, BIT(31));
878 
879 	a6xx_flush(gpu, ring);
880 	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
881 }
882 
883 /*
884  * Check that the microcode version is new enough to include several key
885  * security fixes. Return true if the ucode is safe.
886  */
887 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
888 		struct drm_gem_object *obj)
889 {
890 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
891 	struct msm_gpu *gpu = &adreno_gpu->base;
892 	const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
893 	u32 *buf = msm_gem_get_vaddr(obj);
894 	bool ret = false;
895 
896 	if (IS_ERR(buf))
897 		return false;
898 
899 	/* A7xx is safe! */
900 	if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu))
901 		return true;
902 
903 	/*
904 	 * Targets up to a640 (a618, a630 and a640) need to check for a
905 	 * microcode version that is patched to support the whereami opcode or
906 	 * one that is new enough to include it by default.
907 	 *
908 	 * a650 tier targets don't need whereami but still need to be
909 	 * equal to or newer than 0.95 for other security fixes
910 	 *
911 	 * a660 targets have all the critical security fixes from the start
912 	 */
913 	if (!strcmp(sqe_name, "a630_sqe.fw")) {
914 		/*
915 		 * If the lowest nibble is 0xa that is an indication that this
916 		 * microcode has been patched. The actual version is in dword
917 		 * [3] but we only care about the patchlevel which is the lowest
918 		 * nibble of dword [3]
919 		 *
920 		 * Otherwise check that the firmware is greater than or equal
921 		 * to 1.90 which was the first version that had this fix built
922 		 * in
923 		 */
924 		if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
925 			(buf[0] & 0xfff) >= 0x190) {
926 			a6xx_gpu->has_whereami = true;
927 			ret = true;
928 			goto out;
929 		}
930 
931 		DRM_DEV_ERROR(&gpu->pdev->dev,
932 			"a630 SQE ucode is too old. Have version %x need at least %x\n",
933 			buf[0] & 0xfff, 0x190);
934 	} else if (!strcmp(sqe_name, "a650_sqe.fw")) {
935 		if ((buf[0] & 0xfff) >= 0x095) {
936 			ret = true;
937 			goto out;
938 		}
939 
940 		DRM_DEV_ERROR(&gpu->pdev->dev,
941 			"a650 SQE ucode is too old. Have version %x need at least %x\n",
942 			buf[0] & 0xfff, 0x095);
943 	} else if (!strcmp(sqe_name, "a660_sqe.fw")) {
944 		ret = true;
945 	} else {
946 		DRM_DEV_ERROR(&gpu->pdev->dev,
947 			"unknown GPU, add it to a6xx_ucode_check_version()!!\n");
948 	}
949 out:
950 	msm_gem_put_vaddr(obj);
951 	return ret;
952 }
953 
954 static int a6xx_ucode_load(struct msm_gpu *gpu)
955 {
956 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
957 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
958 
959 	if (!a6xx_gpu->sqe_bo) {
960 		a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
961 			adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
962 
963 		if (IS_ERR(a6xx_gpu->sqe_bo)) {
964 			int ret = PTR_ERR(a6xx_gpu->sqe_bo);
965 
966 			a6xx_gpu->sqe_bo = NULL;
967 			DRM_DEV_ERROR(&gpu->pdev->dev,
968 				"Could not allocate SQE ucode: %d\n", ret);
969 
970 			return ret;
971 		}
972 
973 		msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
974 		if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
975 			msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
976 			drm_gem_object_put(a6xx_gpu->sqe_bo);
977 
978 			a6xx_gpu->sqe_bo = NULL;
979 			return -EPERM;
980 		}
981 	}
982 
983 	/*
984 	 * Expanded APRIV and targets that support WHERE_AM_I both need a
985 	 * privileged buffer to store the RPTR shadow
986 	 */
987 	if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) &&
988 	    !a6xx_gpu->shadow_bo) {
989 		a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
990 						      sizeof(u32) * gpu->nr_rings,
991 						      MSM_BO_WC | MSM_BO_MAP_PRIV,
992 						      gpu->vm, &a6xx_gpu->shadow_bo,
993 						      &a6xx_gpu->shadow_iova);
994 
995 		if (IS_ERR(a6xx_gpu->shadow))
996 			return PTR_ERR(a6xx_gpu->shadow);
997 
998 		msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
999 	}
1000 
1001 	a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE,
1002 							 MSM_BO_WC  | MSM_BO_MAP_PRIV,
1003 							 gpu->vm, &a6xx_gpu->pwrup_reglist_bo,
1004 							 &a6xx_gpu->pwrup_reglist_iova);
1005 
1006 	if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr))
1007 		return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr);
1008 
1009 	msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist");
1010 
1011 	return 0;
1012 }
1013 
1014 static int a6xx_zap_shader_init(struct msm_gpu *gpu)
1015 {
1016 	static bool loaded;
1017 	int ret;
1018 
1019 	if (loaded)
1020 		return 0;
1021 
1022 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
1023 
1024 	loaded = !ret;
1025 	return ret;
1026 }
1027 
1028 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1029 		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1030 		       A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1031 		       A6XX_RBBM_INT_0_MASK_CP_IB2 | \
1032 		       A6XX_RBBM_INT_0_MASK_CP_IB1 | \
1033 		       A6XX_RBBM_INT_0_MASK_CP_RB | \
1034 		       A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1035 		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1036 		       A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1037 		       A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1038 		       A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
1039 
1040 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1041 		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1042 		       A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
1043 		       A6XX_RBBM_INT_0_MASK_CP_SW | \
1044 		       A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1045 		       A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
1046 		       A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
1047 		       A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1048 		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1049 		       A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1050 		       A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1051 		       A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
1052 		       A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
1053 		       A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1054 
1055 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \
1056 			 A6XX_CP_APRIV_CNTL_RBFETCH | \
1057 			 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \
1058 			 A6XX_CP_APRIV_CNTL_RBRPWB)
1059 
1060 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \
1061 			   A6XX_CP_APRIV_CNTL_CDREAD | \
1062 			   A6XX_CP_APRIV_CNTL_CDWRITE)
1063 
1064 static int hw_init(struct msm_gpu *gpu)
1065 {
1066 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1067 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1068 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1069 	u64 gmem_range_min;
1070 	unsigned int i;
1071 	int ret;
1072 
1073 	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1074 		/* Make sure the GMU keeps the GPU on while we set it up */
1075 		ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1076 		if (ret)
1077 			return ret;
1078 	}
1079 
1080 	/* Clear GBIF halt in case GX domain was not collapsed */
1081 	if (adreno_is_a619_holi(adreno_gpu)) {
1082 		gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1083 		gpu_read(gpu, REG_A6XX_GBIF_HALT);
1084 
1085 		gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0);
1086 		gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL);
1087 	} else if (a6xx_has_gbif(adreno_gpu)) {
1088 		gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1089 		gpu_read(gpu, REG_A6XX_GBIF_HALT);
1090 
1091 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
1092 		gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT);
1093 	}
1094 
1095 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
1096 
1097 	if (adreno_is_a619_holi(adreno_gpu))
1098 		a6xx_sptprac_enable(gmu);
1099 
1100 	/*
1101 	 * Disable the trusted memory range - we don't actually supported secure
1102 	 * memory rendering at this point in time and we don't want to block off
1103 	 * part of the virtual memory space.
1104 	 */
1105 	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
1106 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
1107 
1108 	if (!adreno_is_a7xx(adreno_gpu)) {
1109 		/* Turn on 64 bit addressing for all blocks */
1110 		gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
1111 		gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
1112 		gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
1113 		gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
1114 		gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
1115 		gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
1116 		gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
1117 		gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
1118 		gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
1119 		gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
1120 		gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
1121 		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
1122 	}
1123 
1124 	/* enable hardware clockgating */
1125 	a6xx_set_hwcg(gpu, true);
1126 
1127 	/* VBIF/GBIF start*/
1128 	if (adreno_is_a610_family(adreno_gpu) ||
1129 	    adreno_is_a640_family(adreno_gpu) ||
1130 	    adreno_is_a650_family(adreno_gpu) ||
1131 	    adreno_is_a7xx(adreno_gpu)) {
1132 		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
1133 		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
1134 		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
1135 		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1136 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL,
1137 			  adreno_is_a7xx(adreno_gpu) ? 0x2120212 : 0x3);
1138 	} else {
1139 		gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
1140 	}
1141 
1142 	if (adreno_is_a630(adreno_gpu))
1143 		gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
1144 
1145 	if (adreno_is_a7xx(adreno_gpu))
1146 		gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0);
1147 
1148 	/* Make all blocks contribute to the GPU BUSY perf counter */
1149 	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
1150 
1151 	/* Disable L2 bypass in the UCHE */
1152 	if (adreno_is_a7xx(adreno_gpu)) {
1153 		gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
1154 		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
1155 	} else {
1156 		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0);
1157 		gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
1158 		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
1159 	}
1160 
1161 	if (!(adreno_is_a650_family(adreno_gpu) ||
1162 	      adreno_is_a702(adreno_gpu) ||
1163 	      adreno_is_a730(adreno_gpu))) {
1164 		gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M;
1165 
1166 		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1167 		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min);
1168 
1169 		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX,
1170 			gmem_range_min + adreno_gpu->info->gmem - 1);
1171 	}
1172 
1173 	if (adreno_is_a7xx(adreno_gpu))
1174 		gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23));
1175 	else {
1176 		gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
1177 		gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
1178 	}
1179 
1180 	if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) {
1181 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
1182 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1183 	} else if (adreno_is_a610_family(adreno_gpu)) {
1184 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060);
1185 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16);
1186 	} else if (!adreno_is_a7xx(adreno_gpu)) {
1187 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
1188 		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1189 	}
1190 
1191 	if (adreno_is_a660_family(adreno_gpu))
1192 		gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1193 
1194 	/* Setting the mem pool size */
1195 	if (adreno_is_a610(adreno_gpu)) {
1196 		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48);
1197 		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47);
1198 	} else if (adreno_is_a702(adreno_gpu)) {
1199 		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64);
1200 		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63);
1201 	} else if (!adreno_is_a7xx(adreno_gpu))
1202 		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
1203 
1204 
1205 	/* Set the default primFifo threshold values */
1206 	if (adreno_gpu->info->a6xx->prim_fifo_threshold)
1207 		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL,
1208 			  adreno_gpu->info->a6xx->prim_fifo_threshold);
1209 
1210 	/* Set the AHB default slave response to "ERROR" */
1211 	gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1212 
1213 	/* Turn on performance counters */
1214 	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1215 
1216 	if (adreno_is_a7xx(adreno_gpu)) {
1217 		/* Turn on the IFPC counter (countable 4 on XOCLK4) */
1218 		gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1,
1219 			  FIELD_PREP(GENMASK(7, 0), 0x4));
1220 	}
1221 
1222 	/* Select CP0 to always count cycles */
1223 	gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1224 
1225 	a6xx_set_ubwc_config(gpu);
1226 
1227 	/* Enable fault detection */
1228 	if (adreno_is_a730(adreno_gpu) ||
1229 	    adreno_is_a740_family(adreno_gpu))
1230 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff);
1231 	else if (adreno_is_a690(adreno_gpu))
1232 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff);
1233 	else if (adreno_is_a619(adreno_gpu))
1234 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff);
1235 	else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
1236 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff);
1237 	else
1238 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff);
1239 
1240 	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
1241 
1242 	/* Set weights for bicubic filtering */
1243 	if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) {
1244 		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
1245 		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
1246 			0x3fe05ff4);
1247 		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
1248 			0x3fa0ebee);
1249 		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
1250 			0x3f5193ed);
1251 		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
1252 			0x3f0243f0);
1253 	}
1254 
1255 	/* Set up the CX GMU counter 0 to count busy ticks */
1256 	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
1257 
1258 	/* Enable the power counter */
1259 	gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5));
1260 	gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
1261 
1262 	/* Protect registers from the CP */
1263 	a6xx_set_cp_protect(gpu);
1264 
1265 	if (adreno_is_a660_family(adreno_gpu)) {
1266 		if (adreno_is_a690(adreno_gpu))
1267 			gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801);
1268 		else
1269 			gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1270 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1271 	} else if (adreno_is_a702(adreno_gpu)) {
1272 		/* Something to do with the HLSQ cluster */
1273 		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24));
1274 	}
1275 
1276 	if (adreno_is_a690(adreno_gpu))
1277 		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90);
1278 	/* Set dualQ + disable afull for A660 GPU */
1279 	else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu))
1280 		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1281 	else if (adreno_is_a7xx(adreno_gpu))
1282 		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG,
1283 			  FIELD_PREP(GENMASK(19, 16), 6) |
1284 			  FIELD_PREP(GENMASK(15, 12), 6) |
1285 			  FIELD_PREP(GENMASK(11, 8), 9) |
1286 			  BIT(3) | BIT(2) |
1287 			  FIELD_PREP(GENMASK(1, 0), 2));
1288 
1289 	/* Enable expanded apriv for targets that support it */
1290 	if (gpu->hw_apriv) {
1291 		if (adreno_is_a7xx(adreno_gpu)) {
1292 			gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1293 				  A7XX_BR_APRIVMASK);
1294 			gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL,
1295 				  A7XX_APRIV_MASK);
1296 			gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL,
1297 				  A7XX_APRIV_MASK);
1298 		} else
1299 			gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1300 				  BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1));
1301 	}
1302 
1303 	if (adreno_is_a750(adreno_gpu)) {
1304 		/* Disable ubwc merged UFC request feature */
1305 		gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19));
1306 
1307 		/* Enable TP flaghint and other performance settings */
1308 		gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700);
1309 	} else if (adreno_is_a7xx(adreno_gpu)) {
1310 		/* Disable non-ubwc read reqs from passing write reqs */
1311 		gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11));
1312 	}
1313 
1314 	/* Enable interrupts */
1315 	gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK,
1316 		  adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK);
1317 
1318 	ret = adreno_hw_init(gpu);
1319 	if (ret)
1320 		goto out;
1321 
1322 	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
1323 
1324 	/* Set the ringbuffer address */
1325 	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1326 
1327 	/* Targets that support extended APRIV can use the RPTR shadow from
1328 	 * hardware but all the other ones need to disable the feature. Targets
1329 	 * that support the WHERE_AM_I opcode can use that instead
1330 	 */
1331 	if (adreno_gpu->base.hw_apriv)
1332 		gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1333 	else
1334 		gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1335 			MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1336 
1337 	/* Configure the RPTR shadow if needed: */
1338 	if (a6xx_gpu->shadow_bo) {
1339 		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
1340 			shadowptr(a6xx_gpu, gpu->rb[0]));
1341 		for (unsigned int i = 0; i < gpu->nr_rings; i++)
1342 			a6xx_gpu->shadow[i] = 0;
1343 	}
1344 
1345 	/* ..which means "always" on A7xx, also for BV shadow */
1346 	if (adreno_is_a7xx(adreno_gpu)) {
1347 		gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR,
1348 			    rbmemptr(gpu->rb[0], bv_rptr));
1349 	}
1350 
1351 	a6xx_preempt_hw_init(gpu);
1352 
1353 	/* Always come up on rb 0 */
1354 	a6xx_gpu->cur_ring = gpu->rb[0];
1355 
1356 	for (i = 0; i < gpu->nr_rings; i++)
1357 		gpu->rb[i]->cur_ctx_seqno = 0;
1358 
1359 	/* Enable the SQE_to start the CP engine */
1360 	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1361 
1362 	if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) {
1363 		a7xx_patch_pwrup_reglist(gpu);
1364 		a6xx_gpu->pwrup_reglist_emitted = true;
1365 	}
1366 
1367 	ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu);
1368 	if (ret)
1369 		goto out;
1370 
1371 	/*
1372 	 * Try to load a zap shader into the secure world. If successful
1373 	 * we can use the CP to switch out of secure mode. If not then we
1374 	 * have no resource but to try to switch ourselves out manually. If we
1375 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1376 	 * be blocked and a permissions violation will soon follow.
1377 	 */
1378 	ret = a6xx_zap_shader_init(gpu);
1379 	if (!ret) {
1380 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1381 		OUT_RING(gpu->rb[0], 0x00000000);
1382 
1383 		a6xx_flush(gpu, gpu->rb[0]);
1384 		if (!a6xx_idle(gpu, gpu->rb[0]))
1385 			return -EINVAL;
1386 	} else if (ret == -ENODEV) {
1387 		/*
1388 		 * This device does not use zap shader (but print a warning
1389 		 * just in case someone got their dt wrong.. hopefully they
1390 		 * have a debug UART to realize the error of their ways...
1391 		 * if you mess this up you are about to crash horribly)
1392 		 */
1393 		dev_warn_once(gpu->dev->dev,
1394 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1395 		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1396 		ret = 0;
1397 	} else {
1398 		return ret;
1399 	}
1400 
1401 out:
1402 	if (adreno_has_gmu_wrapper(adreno_gpu))
1403 		return ret;
1404 
1405 	/* Last step - yield the ringbuffer */
1406 	a7xx_preempt_start(gpu);
1407 
1408 	/*
1409 	 * Tell the GMU that we are done touching the GPU and it can start power
1410 	 * management
1411 	 */
1412 	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1413 
1414 	if (a6xx_gpu->gmu.legacy) {
1415 		/* Take the GMU out of its special boot mode */
1416 		a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1417 	}
1418 
1419 	return ret;
1420 }
1421 
1422 static int a6xx_hw_init(struct msm_gpu *gpu)
1423 {
1424 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1425 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1426 	int ret;
1427 
1428 	mutex_lock(&a6xx_gpu->gmu.lock);
1429 	ret = hw_init(gpu);
1430 	mutex_unlock(&a6xx_gpu->gmu.lock);
1431 
1432 	return ret;
1433 }
1434 
1435 static void a6xx_dump(struct msm_gpu *gpu)
1436 {
1437 	DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n",
1438 			gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1439 	adreno_dump(gpu);
1440 }
1441 
1442 static void a6xx_recover(struct msm_gpu *gpu)
1443 {
1444 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1445 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1446 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1447 	int i, active_submits;
1448 
1449 	adreno_dump_info(gpu);
1450 
1451 	for (i = 0; i < 8; i++)
1452 		DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
1453 			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
1454 
1455 	if (hang_debug)
1456 		a6xx_dump(gpu);
1457 
1458 	/*
1459 	 * To handle recovery specific sequences during the rpm suspend we are
1460 	 * about to trigger
1461 	 */
1462 	a6xx_gpu->hung = true;
1463 
1464 	/* Halt SQE first */
1465 	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1466 
1467 	pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1468 
1469 	/* active_submit won't change until we make a submission */
1470 	mutex_lock(&gpu->active_lock);
1471 	active_submits = gpu->active_submits;
1472 
1473 	/*
1474 	 * Temporarily clear active_submits count to silence a WARN() in the
1475 	 * runtime suspend cb
1476 	 */
1477 	gpu->active_submits = 0;
1478 
1479 	if (adreno_has_gmu_wrapper(adreno_gpu)) {
1480 		/* Drain the outstanding traffic on memory buses */
1481 		a6xx_bus_clear_pending_transactions(adreno_gpu, true);
1482 
1483 		/* Reset the GPU to a clean state */
1484 		a6xx_gpu_sw_reset(gpu, true);
1485 		a6xx_gpu_sw_reset(gpu, false);
1486 	}
1487 
1488 	reinit_completion(&gmu->pd_gate);
1489 	dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
1490 	dev_pm_genpd_synced_poweroff(gmu->cxpd);
1491 
1492 	/* Drop the rpm refcount from active submits */
1493 	if (active_submits)
1494 		pm_runtime_put(&gpu->pdev->dev);
1495 
1496 	/* And the final one from recover worker */
1497 	pm_runtime_put_sync(&gpu->pdev->dev);
1498 
1499 	if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
1500 		DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
1501 
1502 	dev_pm_genpd_remove_notifier(gmu->cxpd);
1503 
1504 	pm_runtime_use_autosuspend(&gpu->pdev->dev);
1505 
1506 	if (active_submits)
1507 		pm_runtime_get(&gpu->pdev->dev);
1508 
1509 	pm_runtime_get_sync(&gpu->pdev->dev);
1510 
1511 	gpu->active_submits = active_submits;
1512 	mutex_unlock(&gpu->active_lock);
1513 
1514 	msm_gpu_hw_init(gpu);
1515 	a6xx_gpu->hung = false;
1516 }
1517 
1518 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1519 {
1520 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1521 	static const char *uche_clients[7] = {
1522 		"VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1523 	};
1524 	u32 val;
1525 
1526 	if (adreno_is_a7xx(adreno_gpu)) {
1527 		if (mid != 1 && mid != 2 && mid != 3 && mid != 8)
1528 			return "UNKNOWN";
1529 	} else {
1530 		if (mid < 1 || mid > 3)
1531 			return "UNKNOWN";
1532 	}
1533 
1534 	/*
1535 	 * The source of the data depends on the mid ID read from FSYNR1.
1536 	 * and the client ID read from the UCHE block
1537 	 */
1538 	val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1539 
1540 	if (adreno_is_a7xx(adreno_gpu)) {
1541 		/* Bit 3 for mid=3 indicates BR or BV */
1542 		static const char *uche_clients_a7xx[16] = {
1543 			"BR_VFD", "BR_SP", "BR_VSC", "BR_VPC",
1544 			"BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
1545 			"BV_VFD", "BV_SP", "BV_VSC", "BV_VPC",
1546 			"BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
1547 		};
1548 
1549 		/* LPAC has the same clients as BR and BV, but because it is
1550 		 * compute-only some of them do not exist and there are holes
1551 		 * in the array.
1552 		 */
1553 		static const char *uche_clients_lpac_a7xx[8] = {
1554 			"-", "LPAC_SP", "-", "-",
1555 			"LPAC_HLSQ", "-", "-", "LPAC_TP",
1556 		};
1557 
1558 		val &= GENMASK(6, 0);
1559 
1560 		/* mid=3 refers to BR or BV */
1561 		if (mid == 3) {
1562 			if (val < ARRAY_SIZE(uche_clients_a7xx))
1563 				return uche_clients_a7xx[val];
1564 			else
1565 				return "UCHE";
1566 		}
1567 
1568 		/* mid=8 refers to LPAC */
1569 		if (mid == 8) {
1570 			if (val < ARRAY_SIZE(uche_clients_lpac_a7xx))
1571 				return uche_clients_lpac_a7xx[val];
1572 			else
1573 				return "UCHE_LPAC";
1574 		}
1575 
1576 		/* mid=2 is a catchall for everything else in LPAC */
1577 		if (mid == 2)
1578 			return "UCHE_LPAC";
1579 
1580 		/* mid=1 is a catchall for everything else in BR/BV */
1581 		return "UCHE";
1582 	} else if (adreno_is_a660_family(adreno_gpu)) {
1583 		static const char *uche_clients_a660[8] = {
1584 			"VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP",
1585 		};
1586 
1587 		static const char *uche_clients_a660_not[8] = {
1588 			"not VFD", "not SP", "not VSC", "not VPC",
1589 			"not HLSQ", "not PC", "not LRZ", "not TP",
1590 		};
1591 
1592 		val &= GENMASK(6, 0);
1593 
1594 		if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660))
1595 			return uche_clients_a660[val];
1596 
1597 		if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not))
1598 			return uche_clients_a660_not[val];
1599 
1600 		return "UCHE";
1601 	} else {
1602 		/* mid = 3 is most precise and refers to only one block per client */
1603 		if (mid == 3)
1604 			return uche_clients[val & 7];
1605 
1606 		/* For mid=2 the source is TP or VFD except when the client id is 0 */
1607 		if (mid == 2)
1608 			return ((val & 7) == 0) ? "TP" : "TP|VFD";
1609 
1610 		/* For mid=1 just return "UCHE" as a catchall for everything else */
1611 		return "UCHE";
1612 	}
1613 }
1614 
1615 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1616 {
1617 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1618 
1619 	if (id == 0)
1620 		return "CP";
1621 	else if (id == 4)
1622 		return "CCU";
1623 	else if (id == 6)
1624 		return "CDP Prefetch";
1625 	else if (id == 7)
1626 		return "GMU";
1627 	else if (id == 5 && adreno_is_a7xx(adreno_gpu))
1628 		return "Flag cache";
1629 
1630 	return a6xx_uche_fault_block(gpu, id);
1631 }
1632 
1633 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1634 {
1635 	struct msm_gpu *gpu = arg;
1636 	struct adreno_smmu_fault_info *info = data;
1637 	const char *block = "unknown";
1638 
1639 	u32 scratch[] = {
1640 			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1641 			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1642 			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1643 			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)),
1644 	};
1645 
1646 	if (info)
1647 		block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1648 
1649 	return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1650 }
1651 
1652 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1653 {
1654 	u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1655 
1656 	if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1657 		u32 val;
1658 
1659 		gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1660 		val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1661 		dev_err_ratelimited(&gpu->pdev->dev,
1662 			"CP | opcode error | possible opcode=0x%8.8X\n",
1663 			val);
1664 	}
1665 
1666 	if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1667 		dev_err_ratelimited(&gpu->pdev->dev,
1668 			"CP ucode error interrupt\n");
1669 
1670 	if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1671 		dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1672 			gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1673 
1674 	if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1675 		u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1676 
1677 		dev_err_ratelimited(&gpu->pdev->dev,
1678 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1679 			val & (1 << 20) ? "READ" : "WRITE",
1680 			(val & 0x3ffff), val);
1681 	}
1682 
1683 	if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu)))
1684 		dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1685 
1686 	if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1687 		dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1688 
1689 	if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1690 		dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1691 
1692 }
1693 
1694 static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1695 {
1696 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1697 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1698 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1699 
1700 	/*
1701 	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1702 	 * but the fault handler will trigger the devcore dump, and we want
1703 	 * to otherwise resume normally rather than killing the submit, so
1704 	 * just bail.
1705 	 */
1706 	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1707 		return;
1708 
1709 	/*
1710 	 * Force the GPU to stay on until after we finish
1711 	 * collecting information
1712 	 */
1713 	if (!adreno_has_gmu_wrapper(adreno_gpu))
1714 		gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1715 
1716 	DRM_DEV_ERROR(&gpu->pdev->dev,
1717 		"gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1718 		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1719 		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1720 		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1721 		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1722 		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1723 		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1724 		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1725 		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1726 
1727 	/* Turn off the hangcheck timer to keep it from bothering us */
1728 	timer_delete(&gpu->hangcheck_timer);
1729 
1730 	kthread_queue_work(gpu->worker, &gpu->recover_work);
1731 }
1732 
1733 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1734 {
1735 	u32 status;
1736 
1737 	status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS);
1738 	gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0);
1739 
1740 	dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1741 
1742 	/*
1743 	 * Ignore FASTBLEND violations, because the HW will silently fall back
1744 	 * to legacy blending.
1745 	 */
1746 	if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1747 		      A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1748 		timer_delete(&gpu->hangcheck_timer);
1749 
1750 		kthread_queue_work(gpu->worker, &gpu->recover_work);
1751 	}
1752 }
1753 
1754 static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1755 {
1756 	struct msm_drm_private *priv = gpu->dev->dev_private;
1757 	u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1758 
1759 	gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1760 
1761 	if (priv->disable_err_irq)
1762 		status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1763 
1764 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1765 		a6xx_fault_detect_irq(gpu);
1766 
1767 	if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1768 		dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1769 
1770 	if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1771 		a6xx_cp_hw_err_irq(gpu);
1772 
1773 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1774 		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1775 
1776 	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1777 		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1778 
1779 	if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1780 		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1781 
1782 	if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1783 		a7xx_sw_fuse_violation_irq(gpu);
1784 
1785 	if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1786 		msm_gpu_retire(gpu);
1787 		a6xx_preempt_trigger(gpu);
1788 	}
1789 
1790 	if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
1791 		a6xx_preempt_irq(gpu);
1792 
1793 	return IRQ_HANDLED;
1794 }
1795 
1796 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1797 {
1798 	llcc_slice_deactivate(a6xx_gpu->llc_slice);
1799 	llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1800 }
1801 
1802 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1803 {
1804 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1805 	struct msm_gpu *gpu = &adreno_gpu->base;
1806 	u32 cntl1_regval = 0;
1807 
1808 	if (IS_ERR(a6xx_gpu->llc_mmio))
1809 		return;
1810 
1811 	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1812 		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1813 
1814 		gpu_scid &= 0x1f;
1815 		cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1816 			       (gpu_scid << 15) | (gpu_scid << 20);
1817 
1818 		/* On A660, the SCID programming for UCHE traffic is done in
1819 		 * A6XX_GBIF_SCACHE_CNTL0[14:10]
1820 		 */
1821 		if (adreno_is_a660_family(adreno_gpu))
1822 			gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
1823 				(1 << 8), (gpu_scid << 10) | (1 << 8));
1824 	}
1825 
1826 	/*
1827 	 * For targets with a MMU500, activate the slice but don't program the
1828 	 * register.  The XBL will take care of that.
1829 	 */
1830 	if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1831 		if (!a6xx_gpu->have_mmu500) {
1832 			u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1833 
1834 			gpuhtw_scid &= 0x1f;
1835 			cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1836 		}
1837 	}
1838 
1839 	if (!cntl1_regval)
1840 		return;
1841 
1842 	/*
1843 	 * Program the slice IDs for the various GPU blocks and GPU MMU
1844 	 * pagetables
1845 	 */
1846 	if (!a6xx_gpu->have_mmu500) {
1847 		a6xx_llc_write(a6xx_gpu,
1848 			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
1849 
1850 		/*
1851 		 * Program cacheability overrides to not allocate cache
1852 		 * lines on a write miss
1853 		 */
1854 		a6xx_llc_rmw(a6xx_gpu,
1855 			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
1856 		return;
1857 	}
1858 
1859 	gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
1860 }
1861 
1862 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1863 {
1864 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1865 	struct msm_gpu *gpu = &adreno_gpu->base;
1866 
1867 	if (IS_ERR(a6xx_gpu->llc_mmio))
1868 		return;
1869 
1870 	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1871 		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1872 
1873 		gpu_scid &= GENMASK(4, 0);
1874 
1875 		gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
1876 			  FIELD_PREP(GENMASK(29, 25), gpu_scid) |
1877 			  FIELD_PREP(GENMASK(24, 20), gpu_scid) |
1878 			  FIELD_PREP(GENMASK(19, 15), gpu_scid) |
1879 			  FIELD_PREP(GENMASK(14, 10), gpu_scid) |
1880 			  FIELD_PREP(GENMASK(9, 5), gpu_scid) |
1881 			  FIELD_PREP(GENMASK(4, 0), gpu_scid));
1882 
1883 		gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
1884 			  FIELD_PREP(GENMASK(14, 10), gpu_scid) |
1885 			  BIT(8));
1886 	}
1887 
1888 	llcc_slice_activate(a6xx_gpu->htw_llc_slice);
1889 }
1890 
1891 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
1892 {
1893 	/* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
1894 	if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
1895 		return;
1896 
1897 	llcc_slice_putd(a6xx_gpu->llc_slice);
1898 	llcc_slice_putd(a6xx_gpu->htw_llc_slice);
1899 }
1900 
1901 static void a6xx_llc_slices_init(struct platform_device *pdev,
1902 		struct a6xx_gpu *a6xx_gpu, bool is_a7xx)
1903 {
1904 	struct device_node *phandle;
1905 
1906 	/* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
1907 	if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
1908 		return;
1909 
1910 	/*
1911 	 * There is a different programming path for A6xx targets with an
1912 	 * mmu500 attached, so detect if that is the case
1913 	 */
1914 	phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
1915 	a6xx_gpu->have_mmu500 = (phandle &&
1916 		of_device_is_compatible(phandle, "arm,mmu-500"));
1917 	of_node_put(phandle);
1918 
1919 	if (is_a7xx || !a6xx_gpu->have_mmu500)
1920 		a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
1921 	else
1922 		a6xx_gpu->llc_mmio = NULL;
1923 
1924 	a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
1925 	a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
1926 
1927 	if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1928 		a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
1929 }
1930 
1931 static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
1932 {
1933 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1934 	struct msm_gpu *gpu = &adreno_gpu->base;
1935 	u32 fuse_val;
1936 	int ret;
1937 
1938 	if (adreno_is_a750(adreno_gpu)) {
1939 		/*
1940 		 * Assume that if qcom scm isn't available, that whatever
1941 		 * replacement allows writing the fuse register ourselves.
1942 		 * Users of alternative firmware need to make sure this
1943 		 * register is writeable or indicate that it's not somehow.
1944 		 * Print a warning because if you mess this up you're about to
1945 		 * crash horribly.
1946 		 */
1947 		if (!qcom_scm_is_available()) {
1948 			dev_warn_once(gpu->dev->dev,
1949 				"SCM is not available, poking fuse register\n");
1950 			a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE,
1951 				A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1952 				A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND |
1953 				A7XX_CX_MISC_SW_FUSE_VALUE_LPAC);
1954 			adreno_gpu->has_ray_tracing = true;
1955 			return 0;
1956 		}
1957 
1958 		ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ |
1959 					     QCOM_SCM_GPU_TSENSE_EN_REQ);
1960 		if (ret)
1961 			return ret;
1962 
1963 		/*
1964 		 * On a750 raytracing may be disabled by the firmware, find out
1965 		 * whether that's the case. The scm call above sets the fuse
1966 		 * register.
1967 		 */
1968 		fuse_val = a6xx_llc_read(a6xx_gpu,
1969 					 REG_A7XX_CX_MISC_SW_FUSE_VALUE);
1970 		adreno_gpu->has_ray_tracing =
1971 			!!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING);
1972 	} else if (adreno_is_a740(adreno_gpu)) {
1973 		/* Raytracing is always enabled on a740 */
1974 		adreno_gpu->has_ray_tracing = true;
1975 	}
1976 
1977 	return 0;
1978 }
1979 
1980 
1981 #define GBIF_CLIENT_HALT_MASK		BIT(0)
1982 #define GBIF_ARB_HALT_MASK		BIT(1)
1983 #define VBIF_XIN_HALT_CTRL0_MASK	GENMASK(3, 0)
1984 #define VBIF_RESET_ACK_MASK		0xF0
1985 #define GPR0_GBIF_HALT_REQUEST		0x1E0
1986 
1987 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
1988 {
1989 	struct msm_gpu *gpu = &adreno_gpu->base;
1990 
1991 	if (adreno_is_a619_holi(adreno_gpu)) {
1992 		gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST);
1993 		spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) &
1994 				(VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK);
1995 	} else if (!a6xx_has_gbif(adreno_gpu)) {
1996 		gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK);
1997 		spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
1998 				(VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK);
1999 		gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
2000 
2001 		return;
2002 	}
2003 
2004 	if (gx_off) {
2005 		/* Halt the gx side of GBIF */
2006 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
2007 		spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
2008 	}
2009 
2010 	/* Halt new client requests on GBIF */
2011 	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
2012 	spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
2013 			(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
2014 
2015 	/* Halt all AXI requests on GBIF */
2016 	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
2017 	spin_until((gpu_read(gpu,  REG_A6XX_GBIF_HALT_ACK) &
2018 			(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
2019 
2020 	/* The GBIF halt needs to be explicitly cleared */
2021 	gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
2022 }
2023 
2024 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
2025 {
2026 	/* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
2027 	if (adreno_is_a610(to_adreno_gpu(gpu)))
2028 		return;
2029 
2030 	gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
2031 	/* Perform a bogus read and add a brief delay to ensure ordering. */
2032 	gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD);
2033 	udelay(1);
2034 
2035 	/* The reset line needs to be asserted for at least 100 us */
2036 	if (assert)
2037 		udelay(100);
2038 }
2039 
2040 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
2041 {
2042 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2043 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2044 	int ret;
2045 
2046 	gpu->needs_hw_init = true;
2047 
2048 	trace_msm_gpu_resume(0);
2049 
2050 	mutex_lock(&a6xx_gpu->gmu.lock);
2051 	ret = a6xx_gmu_resume(a6xx_gpu);
2052 	mutex_unlock(&a6xx_gpu->gmu.lock);
2053 	if (ret)
2054 		return ret;
2055 
2056 	msm_devfreq_resume(gpu);
2057 
2058 	adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu);
2059 
2060 	return ret;
2061 }
2062 
2063 static int a6xx_pm_resume(struct msm_gpu *gpu)
2064 {
2065 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2066 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2067 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2068 	unsigned long freq = gpu->fast_rate;
2069 	struct dev_pm_opp *opp;
2070 	int ret;
2071 
2072 	gpu->needs_hw_init = true;
2073 
2074 	trace_msm_gpu_resume(0);
2075 
2076 	mutex_lock(&a6xx_gpu->gmu.lock);
2077 
2078 	opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq);
2079 	if (IS_ERR(opp)) {
2080 		ret = PTR_ERR(opp);
2081 		goto err_set_opp;
2082 	}
2083 	dev_pm_opp_put(opp);
2084 
2085 	/* Set the core clock and bus bw, having VDD scaling in mind */
2086 	dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
2087 
2088 	pm_runtime_resume_and_get(gmu->dev);
2089 	pm_runtime_resume_and_get(gmu->gxpd);
2090 
2091 	ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
2092 	if (ret)
2093 		goto err_bulk_clk;
2094 
2095 	if (adreno_is_a619_holi(adreno_gpu))
2096 		a6xx_sptprac_enable(gmu);
2097 
2098 	/* If anything goes south, tear the GPU down piece by piece.. */
2099 	if (ret) {
2100 err_bulk_clk:
2101 		pm_runtime_put(gmu->gxpd);
2102 		pm_runtime_put(gmu->dev);
2103 		dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2104 	}
2105 err_set_opp:
2106 	mutex_unlock(&a6xx_gpu->gmu.lock);
2107 
2108 	if (!ret)
2109 		msm_devfreq_resume(gpu);
2110 
2111 	return ret;
2112 }
2113 
2114 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu)
2115 {
2116 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2117 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2118 	int i, ret;
2119 
2120 	trace_msm_gpu_suspend(0);
2121 
2122 	a6xx_llc_deactivate(a6xx_gpu);
2123 
2124 	msm_devfreq_suspend(gpu);
2125 
2126 	mutex_lock(&a6xx_gpu->gmu.lock);
2127 	ret = a6xx_gmu_stop(a6xx_gpu);
2128 	mutex_unlock(&a6xx_gpu->gmu.lock);
2129 	if (ret)
2130 		return ret;
2131 
2132 	if (a6xx_gpu->shadow_bo)
2133 		for (i = 0; i < gpu->nr_rings; i++)
2134 			a6xx_gpu->shadow[i] = 0;
2135 
2136 	gpu->suspend_count++;
2137 
2138 	return 0;
2139 }
2140 
2141 static int a6xx_pm_suspend(struct msm_gpu *gpu)
2142 {
2143 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2144 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2145 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2146 	int i;
2147 
2148 	trace_msm_gpu_suspend(0);
2149 
2150 	msm_devfreq_suspend(gpu);
2151 
2152 	mutex_lock(&a6xx_gpu->gmu.lock);
2153 
2154 	/* Drain the outstanding traffic on memory buses */
2155 	a6xx_bus_clear_pending_transactions(adreno_gpu, true);
2156 
2157 	if (adreno_is_a619_holi(adreno_gpu))
2158 		a6xx_sptprac_disable(gmu);
2159 
2160 	clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
2161 
2162 	pm_runtime_put_sync(gmu->gxpd);
2163 	dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2164 	pm_runtime_put_sync(gmu->dev);
2165 
2166 	mutex_unlock(&a6xx_gpu->gmu.lock);
2167 
2168 	if (a6xx_gpu->shadow_bo)
2169 		for (i = 0; i < gpu->nr_rings; i++)
2170 			a6xx_gpu->shadow[i] = 0;
2171 
2172 	gpu->suspend_count++;
2173 
2174 	return 0;
2175 }
2176 
2177 static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
2178 {
2179 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2180 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2181 
2182 	mutex_lock(&a6xx_gpu->gmu.lock);
2183 
2184 	/* Force the GPU power on so we can read this register */
2185 	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
2186 
2187 	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
2188 
2189 	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
2190 
2191 	mutex_unlock(&a6xx_gpu->gmu.lock);
2192 
2193 	return 0;
2194 }
2195 
2196 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
2197 {
2198 	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
2199 	return 0;
2200 }
2201 
2202 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
2203 {
2204 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2205 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2206 
2207 	return a6xx_gpu->cur_ring;
2208 }
2209 
2210 static void a6xx_destroy(struct msm_gpu *gpu)
2211 {
2212 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2213 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2214 
2215 	if (a6xx_gpu->sqe_bo) {
2216 		msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
2217 		drm_gem_object_put(a6xx_gpu->sqe_bo);
2218 	}
2219 
2220 	if (a6xx_gpu->shadow_bo) {
2221 		msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->vm);
2222 		drm_gem_object_put(a6xx_gpu->shadow_bo);
2223 	}
2224 
2225 	a6xx_llc_slices_destroy(a6xx_gpu);
2226 
2227 	a6xx_gmu_remove(a6xx_gpu);
2228 
2229 	adreno_gpu_cleanup(adreno_gpu);
2230 
2231 	kfree(a6xx_gpu);
2232 }
2233 
2234 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
2235 {
2236 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2237 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2238 	u64 busy_cycles;
2239 
2240 	/* 19.2MHz */
2241 	*out_sample_rate = 19200000;
2242 
2243 	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
2244 			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
2245 			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
2246 
2247 	return busy_cycles;
2248 }
2249 
2250 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
2251 			      bool suspended)
2252 {
2253 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2254 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2255 
2256 	mutex_lock(&a6xx_gpu->gmu.lock);
2257 	a6xx_gmu_set_freq(gpu, opp, suspended);
2258 	mutex_unlock(&a6xx_gpu->gmu.lock);
2259 }
2260 
2261 static struct drm_gpuvm *
2262 a6xx_create_vm(struct msm_gpu *gpu, struct platform_device *pdev)
2263 {
2264 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2265 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2266 	unsigned long quirks = 0;
2267 
2268 	/*
2269 	 * This allows GPU to set the bus attributes required to use system
2270 	 * cache on behalf of the iommu page table walker.
2271 	 */
2272 	if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) &&
2273 	    !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY))
2274 		quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
2275 
2276 	return adreno_iommu_create_vm(gpu, pdev, quirks);
2277 }
2278 
2279 static struct drm_gpuvm *
2280 a6xx_create_private_vm(struct msm_gpu *gpu, bool kernel_managed)
2281 {
2282 	struct msm_mmu *mmu;
2283 
2284 	mmu = msm_iommu_pagetable_create(to_msm_vm(gpu->vm)->mmu, kernel_managed);
2285 
2286 	if (IS_ERR(mmu))
2287 		return ERR_CAST(mmu);
2288 
2289 	return msm_gem_vm_create(gpu->dev, mmu, "gpu", ADRENO_VM_START,
2290 				 adreno_private_vm_size(gpu), kernel_managed);
2291 }
2292 
2293 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2294 {
2295 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2296 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2297 
2298 	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
2299 		return a6xx_gpu->shadow[ring->id];
2300 
2301 	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
2302 }
2303 
2304 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2305 {
2306 	struct msm_cp_state cp_state = {
2307 		.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
2308 		.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
2309 		.ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
2310 		.ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
2311 	};
2312 	bool progress;
2313 
2314 	/*
2315 	 * Adjust the remaining data to account for what has already been
2316 	 * fetched from memory, but not yet consumed by the SQE.
2317 	 *
2318 	 * This is not *technically* correct, the amount buffered could
2319 	 * exceed the IB size due to hw prefetching ahead, but:
2320 	 *
2321 	 * (1) We aren't trying to find the exact position, just whether
2322 	 *     progress has been made
2323 	 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
2324 	 *     to prevent prefetching into an unrelated submit.  (And
2325 	 *     either way, at some point the ROQ will be full.)
2326 	 */
2327 	cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16;
2328 	cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16;
2329 
2330 	progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
2331 
2332 	ring->last_cp_state = cp_state;
2333 
2334 	return progress;
2335 }
2336 
2337 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse)
2338 {
2339 	if (!info->speedbins)
2340 		return UINT_MAX;
2341 
2342 	for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++)
2343 		if (info->speedbins[i].fuse == fuse)
2344 			return BIT(info->speedbins[i].speedbin);
2345 
2346 	return UINT_MAX;
2347 }
2348 
2349 static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *info)
2350 {
2351 	u32 supp_hw;
2352 	u32 speedbin;
2353 	int ret;
2354 
2355 	ret = adreno_read_speedbin(dev, &speedbin);
2356 	/*
2357 	 * -ENOENT means that the platform doesn't support speedbin which is
2358 	 * fine
2359 	 */
2360 	if (ret == -ENOENT) {
2361 		return 0;
2362 	} else if (ret) {
2363 		dev_err_probe(dev, ret,
2364 			      "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
2365 		return ret;
2366 	}
2367 
2368 	supp_hw = fuse_to_supp_hw(info, speedbin);
2369 
2370 	if (supp_hw == UINT_MAX) {
2371 		DRM_DEV_ERROR(dev,
2372 			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
2373 			speedbin);
2374 		supp_hw = BIT(0); /* Default */
2375 	}
2376 
2377 	ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
2378 	if (ret)
2379 		return ret;
2380 
2381 	return 0;
2382 }
2383 
2384 static const struct adreno_gpu_funcs funcs = {
2385 	.base = {
2386 		.get_param = adreno_get_param,
2387 		.set_param = adreno_set_param,
2388 		.hw_init = a6xx_hw_init,
2389 		.ucode_load = a6xx_ucode_load,
2390 		.pm_suspend = a6xx_gmu_pm_suspend,
2391 		.pm_resume = a6xx_gmu_pm_resume,
2392 		.recover = a6xx_recover,
2393 		.submit = a6xx_submit,
2394 		.active_ring = a6xx_active_ring,
2395 		.irq = a6xx_irq,
2396 		.destroy = a6xx_destroy,
2397 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2398 		.show = a6xx_show,
2399 #endif
2400 		.gpu_busy = a6xx_gpu_busy,
2401 		.gpu_get_freq = a6xx_gmu_get_freq,
2402 		.gpu_set_freq = a6xx_gpu_set_freq,
2403 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2404 		.gpu_state_get = a6xx_gpu_state_get,
2405 		.gpu_state_put = a6xx_gpu_state_put,
2406 #endif
2407 		.create_vm = a6xx_create_vm,
2408 		.create_private_vm = a6xx_create_private_vm,
2409 		.get_rptr = a6xx_get_rptr,
2410 		.progress = a6xx_progress,
2411 	},
2412 	.get_timestamp = a6xx_gmu_get_timestamp,
2413 };
2414 
2415 static const struct adreno_gpu_funcs funcs_gmuwrapper = {
2416 	.base = {
2417 		.get_param = adreno_get_param,
2418 		.set_param = adreno_set_param,
2419 		.hw_init = a6xx_hw_init,
2420 		.ucode_load = a6xx_ucode_load,
2421 		.pm_suspend = a6xx_pm_suspend,
2422 		.pm_resume = a6xx_pm_resume,
2423 		.recover = a6xx_recover,
2424 		.submit = a6xx_submit,
2425 		.active_ring = a6xx_active_ring,
2426 		.irq = a6xx_irq,
2427 		.destroy = a6xx_destroy,
2428 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2429 		.show = a6xx_show,
2430 #endif
2431 		.gpu_busy = a6xx_gpu_busy,
2432 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2433 		.gpu_state_get = a6xx_gpu_state_get,
2434 		.gpu_state_put = a6xx_gpu_state_put,
2435 #endif
2436 		.create_vm = a6xx_create_vm,
2437 		.create_private_vm = a6xx_create_private_vm,
2438 		.get_rptr = a6xx_get_rptr,
2439 		.progress = a6xx_progress,
2440 	},
2441 	.get_timestamp = a6xx_get_timestamp,
2442 };
2443 
2444 static const struct adreno_gpu_funcs funcs_a7xx = {
2445 	.base = {
2446 		.get_param = adreno_get_param,
2447 		.set_param = adreno_set_param,
2448 		.hw_init = a6xx_hw_init,
2449 		.ucode_load = a6xx_ucode_load,
2450 		.pm_suspend = a6xx_gmu_pm_suspend,
2451 		.pm_resume = a6xx_gmu_pm_resume,
2452 		.recover = a6xx_recover,
2453 		.submit = a7xx_submit,
2454 		.active_ring = a6xx_active_ring,
2455 		.irq = a6xx_irq,
2456 		.destroy = a6xx_destroy,
2457 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2458 		.show = a6xx_show,
2459 #endif
2460 		.gpu_busy = a6xx_gpu_busy,
2461 		.gpu_get_freq = a6xx_gmu_get_freq,
2462 		.gpu_set_freq = a6xx_gpu_set_freq,
2463 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2464 		.gpu_state_get = a6xx_gpu_state_get,
2465 		.gpu_state_put = a6xx_gpu_state_put,
2466 #endif
2467 		.create_vm = a6xx_create_vm,
2468 		.create_private_vm = a6xx_create_private_vm,
2469 		.get_rptr = a6xx_get_rptr,
2470 		.progress = a6xx_progress,
2471 	},
2472 	.get_timestamp = a6xx_gmu_get_timestamp,
2473 };
2474 
2475 struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
2476 {
2477 	struct msm_drm_private *priv = dev->dev_private;
2478 	struct platform_device *pdev = priv->gpu_pdev;
2479 	struct adreno_platform_config *config = pdev->dev.platform_data;
2480 	struct device_node *node;
2481 	struct a6xx_gpu *a6xx_gpu;
2482 	struct adreno_gpu *adreno_gpu;
2483 	struct msm_gpu *gpu;
2484 	extern int enable_preemption;
2485 	bool is_a7xx;
2486 	int ret;
2487 
2488 	a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
2489 	if (!a6xx_gpu)
2490 		return ERR_PTR(-ENOMEM);
2491 
2492 	adreno_gpu = &a6xx_gpu->base;
2493 	gpu = &adreno_gpu->base;
2494 
2495 	mutex_init(&a6xx_gpu->gmu.lock);
2496 
2497 	adreno_gpu->registers = NULL;
2498 
2499 	/* Check if there is a GMU phandle and set it up */
2500 	node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2501 	/* FIXME: How do we gracefully handle this? */
2502 	BUG_ON(!node);
2503 
2504 	adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper");
2505 
2506 	adreno_gpu->base.hw_apriv =
2507 		!!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
2508 
2509 	/* gpu->info only gets assigned in adreno_gpu_init() */
2510 	is_a7xx = config->info->family == ADRENO_7XX_GEN1 ||
2511 		  config->info->family == ADRENO_7XX_GEN2 ||
2512 		  config->info->family == ADRENO_7XX_GEN3;
2513 
2514 	a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
2515 
2516 	ret = a6xx_set_supported_hw(&pdev->dev, config->info);
2517 	if (ret) {
2518 		a6xx_llc_slices_destroy(a6xx_gpu);
2519 		kfree(a6xx_gpu);
2520 		return ERR_PTR(ret);
2521 	}
2522 
2523 	if ((enable_preemption == 1) || (enable_preemption == -1 &&
2524 	    (config->info->quirks & ADRENO_QUIRK_PREEMPTION)))
2525 		ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 4);
2526 	else if (is_a7xx)
2527 		ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1);
2528 	else if (adreno_has_gmu_wrapper(adreno_gpu))
2529 		ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1);
2530 	else
2531 		ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
2532 	if (ret) {
2533 		a6xx_destroy(&(a6xx_gpu->base.base));
2534 		return ERR_PTR(ret);
2535 	}
2536 
2537 	/*
2538 	 * For now only clamp to idle freq for devices where this is known not
2539 	 * to cause power supply issues:
2540 	 */
2541 	if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2542 		priv->gpu_clamp_to_idle = true;
2543 
2544 	if (adreno_has_gmu_wrapper(adreno_gpu))
2545 		ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
2546 	else
2547 		ret = a6xx_gmu_init(a6xx_gpu, node);
2548 	of_node_put(node);
2549 	if (ret) {
2550 		a6xx_destroy(&(a6xx_gpu->base.base));
2551 		return ERR_PTR(ret);
2552 	}
2553 
2554 	if (adreno_is_a7xx(adreno_gpu)) {
2555 		ret = a7xx_cx_mem_init(a6xx_gpu);
2556 		if (ret) {
2557 			a6xx_destroy(&(a6xx_gpu->base.base));
2558 			return ERR_PTR(ret);
2559 		}
2560 	}
2561 
2562 	adreno_gpu->uche_trap_base = 0x1fffffffff000ull;
2563 
2564 	msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu,
2565 				  a6xx_fault_handler);
2566 
2567 	ret = a6xx_calc_ubwc_config(adreno_gpu);
2568 	if (ret) {
2569 		a6xx_destroy(&(a6xx_gpu->base.base));
2570 		return ERR_PTR(ret);
2571 	}
2572 
2573 	/* Set up the preemption specific bits and pieces for each ringbuffer */
2574 	a6xx_preempt_init(gpu);
2575 
2576 	return gpu;
2577 }
2578