xref: /linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision ec8a42e7343234802b9054874fe01810880289ce)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4 
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15 
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18 
19 #define GPU_PAS_ID 13
20 
21 void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
22 		bool sync)
23 {
24 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
25 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
26 	uint32_t wptr;
27 	unsigned long flags;
28 
29 	/*
30 	 * Most flush operations need to issue a WHERE_AM_I opcode to sync up
31 	 * the rptr shadow
32 	 */
33 	if (a5xx_gpu->has_whereami && sync) {
34 		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
35 		OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
36 		OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
37 	}
38 
39 	spin_lock_irqsave(&ring->preempt_lock, flags);
40 
41 	/* Copy the shadow to the actual register */
42 	ring->cur = ring->next;
43 
44 	/* Make sure to wrap wptr if we need to */
45 	wptr = get_wptr(ring);
46 
47 	spin_unlock_irqrestore(&ring->preempt_lock, flags);
48 
49 	/* Make sure everything is posted before making a decision */
50 	mb();
51 
52 	/* Update HW if this is the current ring and we are not in preempt */
53 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
54 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
55 }
56 
57 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
58 {
59 	struct msm_drm_private *priv = gpu->dev->dev_private;
60 	struct msm_ringbuffer *ring = submit->ring;
61 	struct msm_gem_object *obj;
62 	uint32_t *ptr, dwords;
63 	unsigned int i;
64 
65 	for (i = 0; i < submit->nr_cmds; i++) {
66 		switch (submit->cmd[i].type) {
67 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
68 			break;
69 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
70 			if (priv->lastctx == submit->queue->ctx)
71 				break;
72 			fallthrough;
73 		case MSM_SUBMIT_CMD_BUF:
74 			/* copy commands into RB: */
75 			obj = submit->bos[submit->cmd[i].idx].obj;
76 			dwords = submit->cmd[i].size;
77 
78 			ptr = msm_gem_get_vaddr(&obj->base);
79 
80 			/* _get_vaddr() shouldn't fail at this point,
81 			 * since we've already mapped it once in
82 			 * submit_reloc()
83 			 */
84 			if (WARN_ON(!ptr))
85 				return;
86 
87 			for (i = 0; i < dwords; i++) {
88 				/* normally the OUT_PKTn() would wait
89 				 * for space for the packet.  But since
90 				 * we just OUT_RING() the whole thing,
91 				 * need to call adreno_wait_ring()
92 				 * ourself:
93 				 */
94 				adreno_wait_ring(ring, 1);
95 				OUT_RING(ring, ptr[i]);
96 			}
97 
98 			msm_gem_put_vaddr(&obj->base);
99 
100 			break;
101 		}
102 	}
103 
104 	a5xx_flush(gpu, ring, true);
105 	a5xx_preempt_trigger(gpu);
106 
107 	/* we might not necessarily have a cmd from userspace to
108 	 * trigger an event to know that submit has completed, so
109 	 * do this manually:
110 	 */
111 	a5xx_idle(gpu, ring);
112 	ring->memptrs->fence = submit->seqno;
113 	msm_gpu_retire(gpu);
114 }
115 
116 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
117 {
118 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
119 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
120 	struct msm_drm_private *priv = gpu->dev->dev_private;
121 	struct msm_ringbuffer *ring = submit->ring;
122 	unsigned int i, ibs = 0;
123 
124 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
125 		priv->lastctx = NULL;
126 		a5xx_submit_in_rb(gpu, submit);
127 		return;
128 	}
129 
130 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
131 	OUT_RING(ring, 0x02);
132 
133 	/* Turn off protected mode to write to special registers */
134 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135 	OUT_RING(ring, 0);
136 
137 	/* Set the save preemption record for the ring/command */
138 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
139 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
140 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
141 
142 	/* Turn back on protected mode */
143 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
144 	OUT_RING(ring, 1);
145 
146 	/* Enable local preemption for finegrain preemption */
147 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
148 	OUT_RING(ring, 0x02);
149 
150 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
151 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
152 	OUT_RING(ring, 0x02);
153 
154 	/* Submit the commands */
155 	for (i = 0; i < submit->nr_cmds; i++) {
156 		switch (submit->cmd[i].type) {
157 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
158 			break;
159 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
160 			if (priv->lastctx == submit->queue->ctx)
161 				break;
162 			fallthrough;
163 		case MSM_SUBMIT_CMD_BUF:
164 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
165 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
166 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
167 			OUT_RING(ring, submit->cmd[i].size);
168 			ibs++;
169 			break;
170 		}
171 	}
172 
173 	/*
174 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
175 	 * are done rendering - otherwise a lucky preemption would start
176 	 * replaying from the last checkpoint
177 	 */
178 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
179 	OUT_RING(ring, 0);
180 	OUT_RING(ring, 0);
181 	OUT_RING(ring, 0);
182 	OUT_RING(ring, 0);
183 	OUT_RING(ring, 0);
184 
185 	/* Turn off IB level preemptions */
186 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
187 	OUT_RING(ring, 0x01);
188 
189 	/* Write the fence to the scratch register */
190 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
191 	OUT_RING(ring, submit->seqno);
192 
193 	/*
194 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
195 	 * timestamp is written to the memory and then triggers the interrupt
196 	 */
197 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
198 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
199 		CP_EVENT_WRITE_0_IRQ);
200 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
201 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
202 	OUT_RING(ring, submit->seqno);
203 
204 	/* Yield the floor on command completion */
205 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
206 	/*
207 	 * If dword[2:1] are non zero, they specify an address for the CP to
208 	 * write the value of dword[3] to on preemption complete. Write 0 to
209 	 * skip the write
210 	 */
211 	OUT_RING(ring, 0x00);
212 	OUT_RING(ring, 0x00);
213 	/* Data value - not used if the address above is 0 */
214 	OUT_RING(ring, 0x01);
215 	/* Set bit 0 to trigger an interrupt on preempt complete */
216 	OUT_RING(ring, 0x01);
217 
218 	/* A WHERE_AM_I packet is not needed after a YIELD */
219 	a5xx_flush(gpu, ring, false);
220 
221 	/* Check to see if we need to start preemption */
222 	a5xx_preempt_trigger(gpu);
223 }
224 
225 static const struct {
226 	u32 offset;
227 	u32 value;
228 } a5xx_hwcg[] = {
229 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
230 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
231 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
232 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
233 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
234 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
235 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
236 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
237 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
238 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
239 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
240 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
241 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
242 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
243 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
244 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
245 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
246 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
247 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
248 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
249 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
250 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
251 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
252 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
253 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
254 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
255 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
256 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
257 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
258 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
259 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
260 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
261 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
262 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
263 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
264 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
265 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
266 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
267 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
268 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
269 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
270 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
271 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
272 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
273 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
274 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
275 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
276 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
277 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
278 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
279 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
280 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
281 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
282 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
283 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
284 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
285 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
286 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
287 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
288 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
289 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
290 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
291 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
292 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
293 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
294 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
295 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
296 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
297 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
298 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
299 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
300 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
301 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
302 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
303 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
304 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
305 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
306 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
307 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
308 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
309 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
310 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
311 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
312 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
313 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
314 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
315 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
316 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
317 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
318 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
319 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
320 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
321 };
322 
323 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
324 {
325 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
326 	unsigned int i;
327 
328 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
329 		gpu_write(gpu, a5xx_hwcg[i].offset,
330 			state ? a5xx_hwcg[i].value : 0);
331 
332 	if (adreno_is_a540(adreno_gpu)) {
333 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
334 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
335 	}
336 
337 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
338 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
339 }
340 
341 static int a5xx_me_init(struct msm_gpu *gpu)
342 {
343 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
344 	struct msm_ringbuffer *ring = gpu->rb[0];
345 
346 	OUT_PKT7(ring, CP_ME_INIT, 8);
347 
348 	OUT_RING(ring, 0x0000002F);
349 
350 	/* Enable multiple hardware contexts */
351 	OUT_RING(ring, 0x00000003);
352 
353 	/* Enable error detection */
354 	OUT_RING(ring, 0x20000000);
355 
356 	/* Don't enable header dump */
357 	OUT_RING(ring, 0x00000000);
358 	OUT_RING(ring, 0x00000000);
359 
360 	/* Specify workarounds for various microcode issues */
361 	if (adreno_is_a530(adreno_gpu)) {
362 		/* Workaround for token end syncs
363 		 * Force a WFI after every direct-render 3D mode draw and every
364 		 * 2D mode 3 draw
365 		 */
366 		OUT_RING(ring, 0x0000000B);
367 	} else if (adreno_is_a510(adreno_gpu)) {
368 		/* Workaround for token and syncs */
369 		OUT_RING(ring, 0x00000001);
370 	} else {
371 		/* No workarounds enabled */
372 		OUT_RING(ring, 0x00000000);
373 	}
374 
375 	OUT_RING(ring, 0x00000000);
376 	OUT_RING(ring, 0x00000000);
377 
378 	a5xx_flush(gpu, ring, true);
379 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
380 }
381 
382 static int a5xx_preempt_start(struct msm_gpu *gpu)
383 {
384 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
385 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
386 	struct msm_ringbuffer *ring = gpu->rb[0];
387 
388 	if (gpu->nr_rings == 1)
389 		return 0;
390 
391 	/* Turn off protected mode to write to special registers */
392 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
393 	OUT_RING(ring, 0);
394 
395 	/* Set the save preemption record for the ring/command */
396 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
397 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
398 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
399 
400 	/* Turn back on protected mode */
401 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
402 	OUT_RING(ring, 1);
403 
404 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
405 	OUT_RING(ring, 0x00);
406 
407 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
408 	OUT_RING(ring, 0x01);
409 
410 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
411 	OUT_RING(ring, 0x01);
412 
413 	/* Yield the floor on command completion */
414 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
415 	OUT_RING(ring, 0x00);
416 	OUT_RING(ring, 0x00);
417 	OUT_RING(ring, 0x01);
418 	OUT_RING(ring, 0x01);
419 
420 	/* The WHERE_AMI_I packet is not needed after a YIELD is issued */
421 	a5xx_flush(gpu, ring, false);
422 
423 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
424 }
425 
426 static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
427 		struct drm_gem_object *obj)
428 {
429 	u32 *buf = msm_gem_get_vaddr(obj);
430 
431 	if (IS_ERR(buf))
432 		return;
433 
434 	/*
435 	 * If the lowest nibble is 0xa that is an indication that this microcode
436 	 * has been patched. The actual version is in dword [3] but we only care
437 	 * about the patchlevel which is the lowest nibble of dword [3]
438 	 */
439 	if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
440 		a5xx_gpu->has_whereami = true;
441 
442 	msm_gem_put_vaddr(obj);
443 }
444 
445 static int a5xx_ucode_init(struct msm_gpu *gpu)
446 {
447 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
448 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
449 	int ret;
450 
451 	if (!a5xx_gpu->pm4_bo) {
452 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
453 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
454 
455 
456 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
457 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
458 			a5xx_gpu->pm4_bo = NULL;
459 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
460 				ret);
461 			return ret;
462 		}
463 
464 		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
465 	}
466 
467 	if (!a5xx_gpu->pfp_bo) {
468 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
469 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
470 
471 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
472 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
473 			a5xx_gpu->pfp_bo = NULL;
474 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
475 				ret);
476 			return ret;
477 		}
478 
479 		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
480 		a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
481 	}
482 
483 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
484 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
485 
486 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
487 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
488 
489 	return 0;
490 }
491 
492 #define SCM_GPU_ZAP_SHADER_RESUME 0
493 
494 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
495 {
496 	int ret;
497 
498 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
499 	if (ret)
500 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
501 			gpu->name, ret);
502 
503 	return ret;
504 }
505 
506 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
507 {
508 	static bool loaded;
509 	int ret;
510 
511 	/*
512 	 * If the zap shader is already loaded into memory we just need to kick
513 	 * the remote processor to reinitialize it
514 	 */
515 	if (loaded)
516 		return a5xx_zap_shader_resume(gpu);
517 
518 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
519 
520 	loaded = !ret;
521 	return ret;
522 }
523 
524 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
525 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
526 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
527 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
528 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
529 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
530 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
531 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
532 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
533 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
534 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
535 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
536 
537 static int a5xx_hw_init(struct msm_gpu *gpu)
538 {
539 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
540 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
541 	int ret;
542 
543 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
544 
545 	if (adreno_is_a540(adreno_gpu))
546 		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
547 
548 	/* Make all blocks contribute to the GPU BUSY perf counter */
549 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
550 
551 	/* Enable RBBM error reporting bits */
552 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
553 
554 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
555 		/*
556 		 * Mask out the activity signals from RB1-3 to avoid false
557 		 * positives
558 		 */
559 
560 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
561 			0xF0000000);
562 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
563 			0xFFFFFFFF);
564 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
565 			0xFFFFFFFF);
566 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
567 			0xFFFFFFFF);
568 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
569 			0xFFFFFFFF);
570 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
571 			0xFFFFFFFF);
572 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
573 			0xFFFFFFFF);
574 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
575 			0xFFFFFFFF);
576 	}
577 
578 	/* Enable fault detection */
579 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
580 		(1 << 30) | 0xFFFF);
581 
582 	/* Turn on performance counters */
583 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
584 
585 	/* Select CP0 to always count cycles */
586 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
587 
588 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
589 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
590 
591 	/* Increase VFD cache access so LRZ and other data gets evicted less */
592 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
593 
594 	/* Disable L2 bypass in the UCHE */
595 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
596 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
597 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
598 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
599 
600 	/* Set the GMEM VA range (0 to gpu->gmem) */
601 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
602 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
603 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
604 		0x00100000 + adreno_gpu->gmem - 1);
605 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
606 
607 	if (adreno_is_a510(adreno_gpu)) {
608 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
609 		gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
610 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
611 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
612 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
613 			  (0x200 << 11 | 0x200 << 22));
614 	} else {
615 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
616 		if (adreno_is_a530(adreno_gpu))
617 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
618 		if (adreno_is_a540(adreno_gpu))
619 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
620 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
621 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
622 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
623 			  (0x400 << 11 | 0x300 << 22));
624 	}
625 
626 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
627 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
628 
629 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
630 
631 	/* Enable USE_RETENTION_FLOPS */
632 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
633 
634 	/* Enable ME/PFP split notification */
635 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
636 
637 	/*
638 	 *  In A5x, CCU can send context_done event of a particular context to
639 	 *  UCHE which ultimately reaches CP even when there is valid
640 	 *  transaction of that context inside CCU. This can let CP to program
641 	 *  config registers, which will make the "valid transaction" inside
642 	 *  CCU to be interpreted differently. This can cause gpu fault. This
643 	 *  bug is fixed in latest A510 revision. To enable this bug fix -
644 	 *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
645 	 *  (disable). For older A510 version this bit is unused.
646 	 */
647 	if (adreno_is_a510(adreno_gpu))
648 		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
649 
650 	/* Enable HWCG */
651 	a5xx_set_hwcg(gpu, true);
652 
653 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
654 
655 	/* Set the highest bank bit */
656 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
657 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
658 	if (adreno_is_a540(adreno_gpu))
659 		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
660 
661 	/* Protect registers from the CP */
662 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
663 
664 	/* RBBM */
665 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
666 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
667 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
668 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
669 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
670 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
671 
672 	/* Content protect */
673 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
674 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
675 			16));
676 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
677 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
678 
679 	/* CP */
680 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
681 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
682 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
683 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
684 
685 	/* RB */
686 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
687 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
688 
689 	/* VPC */
690 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
691 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
692 
693 	/* UCHE */
694 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
695 
696 	if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
697 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
698 			ADRENO_PROTECT_RW(0x10000, 0x8000));
699 
700 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
701 	/*
702 	 * Disable the trusted memory range - we don't actually supported secure
703 	 * memory rendering at this point in time and we don't want to block off
704 	 * part of the virtual memory space.
705 	 */
706 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
707 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
708 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
709 
710 	/* Put the GPU into 64 bit by default */
711 	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
712 	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
713 	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
714 	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
715 	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
716 	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
717 	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
718 	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
719 	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
720 	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
721 	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
722 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
723 
724 	/*
725 	 * VPC corner case with local memory load kill leads to corrupt
726 	 * internal state. Normal Disable does not work for all a5x chips.
727 	 * So do the following setting to disable it.
728 	 */
729 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
730 		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
731 		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
732 	}
733 
734 	ret = adreno_hw_init(gpu);
735 	if (ret)
736 		return ret;
737 
738 	if (!adreno_is_a510(adreno_gpu))
739 		a5xx_gpmu_ucode_init(gpu);
740 
741 	ret = a5xx_ucode_init(gpu);
742 	if (ret)
743 		return ret;
744 
745 	/* Set the ringbuffer address */
746 	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
747 		gpu->rb[0]->iova);
748 
749 	/*
750 	 * If the microcode supports the WHERE_AM_I opcode then we can use that
751 	 * in lieu of the RPTR shadow and enable preemption. Otherwise, we
752 	 * can't safely use the RPTR shadow or preemption. In either case, the
753 	 * RPTR shadow should be disabled in hardware.
754 	 */
755 	gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
756 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
757 
758 	/* Create a privileged buffer for the RPTR shadow */
759 	if (a5xx_gpu->has_whereami) {
760 		if (!a5xx_gpu->shadow_bo) {
761 			a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
762 				sizeof(u32) * gpu->nr_rings,
763 				MSM_BO_UNCACHED | MSM_BO_MAP_PRIV,
764 				gpu->aspace, &a5xx_gpu->shadow_bo,
765 				&a5xx_gpu->shadow_iova);
766 
767 			if (IS_ERR(a5xx_gpu->shadow))
768 				return PTR_ERR(a5xx_gpu->shadow);
769 		}
770 
771 		gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
772 			REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
773 	} else if (gpu->nr_rings > 1) {
774 		/* Disable preemption if WHERE_AM_I isn't available */
775 		a5xx_preempt_fini(gpu);
776 		gpu->nr_rings = 1;
777 	}
778 
779 	a5xx_preempt_hw_init(gpu);
780 
781 	/* Disable the interrupts through the initial bringup stage */
782 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
783 
784 	/* Clear ME_HALT to start the micro engine */
785 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
786 	ret = a5xx_me_init(gpu);
787 	if (ret)
788 		return ret;
789 
790 	ret = a5xx_power_init(gpu);
791 	if (ret)
792 		return ret;
793 
794 	/*
795 	 * Send a pipeline event stat to get misbehaving counters to start
796 	 * ticking correctly
797 	 */
798 	if (adreno_is_a530(adreno_gpu)) {
799 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
800 		OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
801 
802 		a5xx_flush(gpu, gpu->rb[0], true);
803 		if (!a5xx_idle(gpu, gpu->rb[0]))
804 			return -EINVAL;
805 	}
806 
807 	/*
808 	 * If the chip that we are using does support loading one, then
809 	 * try to load a zap shader into the secure world. If successful
810 	 * we can use the CP to switch out of secure mode. If not then we
811 	 * have no resource but to try to switch ourselves out manually. If we
812 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
813 	 * be blocked and a permissions violation will soon follow.
814 	 */
815 	ret = a5xx_zap_shader_init(gpu);
816 	if (!ret) {
817 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
818 		OUT_RING(gpu->rb[0], 0x00000000);
819 
820 		a5xx_flush(gpu, gpu->rb[0], true);
821 		if (!a5xx_idle(gpu, gpu->rb[0]))
822 			return -EINVAL;
823 	} else if (ret == -ENODEV) {
824 		/*
825 		 * This device does not use zap shader (but print a warning
826 		 * just in case someone got their dt wrong.. hopefully they
827 		 * have a debug UART to realize the error of their ways...
828 		 * if you mess this up you are about to crash horribly)
829 		 */
830 		dev_warn_once(gpu->dev->dev,
831 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
832 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
833 	} else {
834 		return ret;
835 	}
836 
837 	/* Last step - yield the ringbuffer */
838 	a5xx_preempt_start(gpu);
839 
840 	return 0;
841 }
842 
843 static void a5xx_recover(struct msm_gpu *gpu)
844 {
845 	int i;
846 
847 	adreno_dump_info(gpu);
848 
849 	for (i = 0; i < 8; i++) {
850 		printk("CP_SCRATCH_REG%d: %u\n", i,
851 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
852 	}
853 
854 	if (hang_debug)
855 		a5xx_dump(gpu);
856 
857 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
858 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
859 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
860 	adreno_recover(gpu);
861 }
862 
863 static void a5xx_destroy(struct msm_gpu *gpu)
864 {
865 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
866 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
867 
868 	DBG("%s", gpu->name);
869 
870 	a5xx_preempt_fini(gpu);
871 
872 	if (a5xx_gpu->pm4_bo) {
873 		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
874 		drm_gem_object_put(a5xx_gpu->pm4_bo);
875 	}
876 
877 	if (a5xx_gpu->pfp_bo) {
878 		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
879 		drm_gem_object_put(a5xx_gpu->pfp_bo);
880 	}
881 
882 	if (a5xx_gpu->gpmu_bo) {
883 		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
884 		drm_gem_object_put(a5xx_gpu->gpmu_bo);
885 	}
886 
887 	if (a5xx_gpu->shadow_bo) {
888 		msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
889 		drm_gem_object_put(a5xx_gpu->shadow_bo);
890 	}
891 
892 	adreno_gpu_cleanup(adreno_gpu);
893 	kfree(a5xx_gpu);
894 }
895 
896 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
897 {
898 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
899 		return false;
900 
901 	/*
902 	 * Nearly every abnormality ends up pausing the GPU and triggering a
903 	 * fault so we can safely just watch for this one interrupt to fire
904 	 */
905 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
906 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
907 }
908 
909 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
910 {
911 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
912 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
913 
914 	if (ring != a5xx_gpu->cur_ring) {
915 		WARN(1, "Tried to idle a non-current ringbuffer\n");
916 		return false;
917 	}
918 
919 	/* wait for CP to drain ringbuffer: */
920 	if (!adreno_idle(gpu, ring))
921 		return false;
922 
923 	if (spin_until(_a5xx_check_idle(gpu))) {
924 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
925 			gpu->name, __builtin_return_address(0),
926 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
927 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
928 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
929 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
930 		return false;
931 	}
932 
933 	return true;
934 }
935 
936 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
937 {
938 	struct msm_gpu *gpu = arg;
939 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
940 			iova, flags,
941 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
942 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
943 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
944 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
945 
946 	return -EFAULT;
947 }
948 
949 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
950 {
951 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
952 
953 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
954 		u32 val;
955 
956 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
957 
958 		/*
959 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
960 		 * read it twice
961 		 */
962 
963 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
964 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
965 
966 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
967 			val);
968 	}
969 
970 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
971 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
972 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
973 
974 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
975 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
976 
977 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
978 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
979 
980 		dev_err_ratelimited(gpu->dev->dev,
981 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
982 			val & (1 << 24) ? "WRITE" : "READ",
983 			(val & 0xFFFFF) >> 2, val);
984 	}
985 
986 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
987 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
988 		const char *access[16] = { "reserved", "reserved",
989 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
990 			"", "", "me read", "me write", "", "", "crashdump read",
991 			"crashdump write" };
992 
993 		dev_err_ratelimited(gpu->dev->dev,
994 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
995 			status & 0xFFFFF, access[(status >> 24) & 0xF],
996 			(status & (1 << 31)), status);
997 	}
998 }
999 
1000 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
1001 {
1002 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1003 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1004 
1005 		dev_err_ratelimited(gpu->dev->dev,
1006 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1007 			val & (1 << 28) ? "WRITE" : "READ",
1008 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1009 			(val >> 24) & 0xF);
1010 
1011 		/* Clear the error */
1012 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1013 
1014 		/* Clear the interrupt */
1015 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1016 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1017 	}
1018 
1019 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1020 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1021 
1022 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1023 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1024 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1025 
1026 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1027 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1028 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1029 
1030 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1031 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1032 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1033 
1034 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1035 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1036 
1037 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1038 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1039 }
1040 
1041 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1042 {
1043 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1044 
1045 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1046 
1047 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1048 		addr);
1049 }
1050 
1051 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1052 {
1053 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1054 }
1055 
1056 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1057 {
1058 	struct drm_device *dev = gpu->dev;
1059 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1060 
1061 	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1062 		ring ? ring->id : -1, ring ? ring->seqno : 0,
1063 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1064 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1065 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1066 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1067 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1068 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1069 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1070 
1071 	/* Turn off the hangcheck timer to keep it from bothering us */
1072 	del_timer(&gpu->hangcheck_timer);
1073 
1074 	kthread_queue_work(gpu->worker, &gpu->recover_work);
1075 }
1076 
1077 #define RBBM_ERROR_MASK \
1078 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1079 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1080 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1081 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1082 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1083 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1084 
1085 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1086 {
1087 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1088 
1089 	/*
1090 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1091 	 * before the source is cleared the interrupt will storm.
1092 	 */
1093 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1094 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1095 
1096 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1097 	if (status & RBBM_ERROR_MASK)
1098 		a5xx_rbbm_err_irq(gpu, status);
1099 
1100 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1101 		a5xx_cp_err_irq(gpu);
1102 
1103 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1104 		a5xx_fault_detect_irq(gpu);
1105 
1106 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1107 		a5xx_uche_err_irq(gpu);
1108 
1109 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1110 		a5xx_gpmu_err_irq(gpu);
1111 
1112 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1113 		a5xx_preempt_trigger(gpu);
1114 		msm_gpu_retire(gpu);
1115 	}
1116 
1117 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1118 		a5xx_preempt_irq(gpu);
1119 
1120 	return IRQ_HANDLED;
1121 }
1122 
1123 static const u32 a5xx_registers[] = {
1124 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1125 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1126 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1127 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1128 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1129 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1130 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1131 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1132 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1133 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1134 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1135 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1136 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1137 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1138 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1139 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1140 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1141 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1142 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1143 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1144 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1145 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1146 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1147 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1148 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1149 	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1150 	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1151 	0xAC60, 0xAC60, ~0,
1152 };
1153 
1154 static void a5xx_dump(struct msm_gpu *gpu)
1155 {
1156 	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1157 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1158 	adreno_dump(gpu);
1159 }
1160 
1161 static int a5xx_pm_resume(struct msm_gpu *gpu)
1162 {
1163 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1164 	int ret;
1165 
1166 	/* Turn on the core power */
1167 	ret = msm_gpu_pm_resume(gpu);
1168 	if (ret)
1169 		return ret;
1170 
1171 	if (adreno_is_a510(adreno_gpu)) {
1172 		/* Halt the sp_input_clk at HM level */
1173 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1174 		a5xx_set_hwcg(gpu, true);
1175 		/* Turn on sp_input_clk at HM level */
1176 		gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1177 		return 0;
1178 	}
1179 
1180 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1181 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1182 
1183 	/* Wait 3 usecs before polling */
1184 	udelay(3);
1185 
1186 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1187 		(1 << 20), (1 << 20));
1188 	if (ret) {
1189 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1190 			gpu->name,
1191 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1192 		return ret;
1193 	}
1194 
1195 	/* Turn on the SP domain */
1196 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1197 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1198 		(1 << 20), (1 << 20));
1199 	if (ret)
1200 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1201 			gpu->name);
1202 
1203 	return ret;
1204 }
1205 
1206 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1207 {
1208 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1209 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1210 	u32 mask = 0xf;
1211 	int i, ret;
1212 
1213 	/* A510 has 3 XIN ports in VBIF */
1214 	if (adreno_is_a510(adreno_gpu))
1215 		mask = 0x7;
1216 
1217 	/* Clear the VBIF pipe before shutting down */
1218 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1219 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1220 				mask) == mask);
1221 
1222 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1223 
1224 	/*
1225 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1226 	 * entries
1227 	 */
1228 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1229 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1230 
1231 	ret = msm_gpu_pm_suspend(gpu);
1232 	if (ret)
1233 		return ret;
1234 
1235 	if (a5xx_gpu->has_whereami)
1236 		for (i = 0; i < gpu->nr_rings; i++)
1237 			a5xx_gpu->shadow[i] = 0;
1238 
1239 	return 0;
1240 }
1241 
1242 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1243 {
1244 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1245 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1246 
1247 	return 0;
1248 }
1249 
1250 struct a5xx_crashdumper {
1251 	void *ptr;
1252 	struct drm_gem_object *bo;
1253 	u64 iova;
1254 };
1255 
1256 struct a5xx_gpu_state {
1257 	struct msm_gpu_state base;
1258 	u32 *hlsqregs;
1259 };
1260 
1261 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1262 		struct a5xx_crashdumper *dumper)
1263 {
1264 	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1265 		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1266 		&dumper->bo, &dumper->iova);
1267 
1268 	if (!IS_ERR(dumper->ptr))
1269 		msm_gem_object_set_name(dumper->bo, "crashdump");
1270 
1271 	return PTR_ERR_OR_ZERO(dumper->ptr);
1272 }
1273 
1274 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1275 		struct a5xx_crashdumper *dumper)
1276 {
1277 	u32 val;
1278 
1279 	if (IS_ERR_OR_NULL(dumper->ptr))
1280 		return -EINVAL;
1281 
1282 	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1283 		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1284 
1285 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1286 
1287 	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1288 		val & 0x04, 100, 10000);
1289 }
1290 
1291 /*
1292  * These are a list of the registers that need to be read through the HLSQ
1293  * aperture through the crashdumper.  These are not nominally accessible from
1294  * the CPU on a secure platform.
1295  */
1296 static const struct {
1297 	u32 type;
1298 	u32 regoffset;
1299 	u32 count;
1300 } a5xx_hlsq_aperture_regs[] = {
1301 	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1302 	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1303 	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1304 	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1305 	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1306 	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1307 	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1308 	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1309 	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1310 	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1311 	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1312 	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1313 	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1314 	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1315 	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1316 };
1317 
1318 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1319 		struct a5xx_gpu_state *a5xx_state)
1320 {
1321 	struct a5xx_crashdumper dumper = { 0 };
1322 	u32 offset, count = 0;
1323 	u64 *ptr;
1324 	int i;
1325 
1326 	if (a5xx_crashdumper_init(gpu, &dumper))
1327 		return;
1328 
1329 	/* The script will be written at offset 0 */
1330 	ptr = dumper.ptr;
1331 
1332 	/* Start writing the data at offset 256k */
1333 	offset = dumper.iova + (256 * SZ_1K);
1334 
1335 	/* Count how many additional registers to get from the HLSQ aperture */
1336 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1337 		count += a5xx_hlsq_aperture_regs[i].count;
1338 
1339 	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1340 	if (!a5xx_state->hlsqregs)
1341 		return;
1342 
1343 	/* Build the crashdump script */
1344 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1345 		u32 type = a5xx_hlsq_aperture_regs[i].type;
1346 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1347 
1348 		/* Write the register to select the desired bank */
1349 		*ptr++ = ((u64) type << 8);
1350 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1351 			(1 << 21) | 1;
1352 
1353 		*ptr++ = offset;
1354 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1355 			| c;
1356 
1357 		offset += c * sizeof(u32);
1358 	}
1359 
1360 	/* Write two zeros to close off the script */
1361 	*ptr++ = 0;
1362 	*ptr++ = 0;
1363 
1364 	if (a5xx_crashdumper_run(gpu, &dumper)) {
1365 		kfree(a5xx_state->hlsqregs);
1366 		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1367 		return;
1368 	}
1369 
1370 	/* Copy the data from the crashdumper to the state */
1371 	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1372 		count * sizeof(u32));
1373 
1374 	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1375 }
1376 
1377 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1378 {
1379 	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1380 			GFP_KERNEL);
1381 
1382 	if (!a5xx_state)
1383 		return ERR_PTR(-ENOMEM);
1384 
1385 	/* Temporarily disable hardware clock gating before reading the hw */
1386 	a5xx_set_hwcg(gpu, false);
1387 
1388 	/* First get the generic state from the adreno core */
1389 	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1390 
1391 	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1392 
1393 	/* Get the HLSQ regs with the help of the crashdumper */
1394 	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1395 
1396 	a5xx_set_hwcg(gpu, true);
1397 
1398 	return &a5xx_state->base;
1399 }
1400 
1401 static void a5xx_gpu_state_destroy(struct kref *kref)
1402 {
1403 	struct msm_gpu_state *state = container_of(kref,
1404 		struct msm_gpu_state, ref);
1405 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1406 		struct a5xx_gpu_state, base);
1407 
1408 	kfree(a5xx_state->hlsqregs);
1409 
1410 	adreno_gpu_state_destroy(state);
1411 	kfree(a5xx_state);
1412 }
1413 
1414 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1415 {
1416 	if (IS_ERR_OR_NULL(state))
1417 		return 1;
1418 
1419 	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1420 }
1421 
1422 
1423 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1424 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1425 		      struct drm_printer *p)
1426 {
1427 	int i, j;
1428 	u32 pos = 0;
1429 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1430 		struct a5xx_gpu_state, base);
1431 
1432 	if (IS_ERR_OR_NULL(state))
1433 		return;
1434 
1435 	adreno_show(gpu, state, p);
1436 
1437 	/* Dump the additional a5xx HLSQ registers */
1438 	if (!a5xx_state->hlsqregs)
1439 		return;
1440 
1441 	drm_printf(p, "registers-hlsq:\n");
1442 
1443 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1444 		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1445 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1446 
1447 		for (j = 0; j < c; j++, pos++, o++) {
1448 			/*
1449 			 * To keep the crashdump simple we pull the entire range
1450 			 * for each register type but not all of the registers
1451 			 * in the range are valid. Fortunately invalid registers
1452 			 * stick out like a sore thumb with a value of
1453 			 * 0xdeadbeef
1454 			 */
1455 			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1456 				continue;
1457 
1458 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1459 				o << 2, a5xx_state->hlsqregs[pos]);
1460 		}
1461 	}
1462 }
1463 #endif
1464 
1465 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1466 {
1467 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1468 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1469 
1470 	return a5xx_gpu->cur_ring;
1471 }
1472 
1473 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1474 {
1475 	u64 busy_cycles, busy_time;
1476 
1477 	/* Only read the gpu busy if the hardware is already active */
1478 	if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1479 		return 0;
1480 
1481 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1482 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1483 
1484 	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1485 	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1486 
1487 	gpu->devfreq.busy_cycles = busy_cycles;
1488 
1489 	pm_runtime_put(&gpu->pdev->dev);
1490 
1491 	if (WARN_ON(busy_time > ~0LU))
1492 		return ~0LU;
1493 
1494 	return (unsigned long)busy_time;
1495 }
1496 
1497 static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1498 {
1499 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1500 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1501 
1502 	if (a5xx_gpu->has_whereami)
1503 		return a5xx_gpu->shadow[ring->id];
1504 
1505 	return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1506 }
1507 
1508 static const struct adreno_gpu_funcs funcs = {
1509 	.base = {
1510 		.get_param = adreno_get_param,
1511 		.hw_init = a5xx_hw_init,
1512 		.pm_suspend = a5xx_pm_suspend,
1513 		.pm_resume = a5xx_pm_resume,
1514 		.recover = a5xx_recover,
1515 		.submit = a5xx_submit,
1516 		.active_ring = a5xx_active_ring,
1517 		.irq = a5xx_irq,
1518 		.destroy = a5xx_destroy,
1519 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1520 		.show = a5xx_show,
1521 #endif
1522 #if defined(CONFIG_DEBUG_FS)
1523 		.debugfs_init = a5xx_debugfs_init,
1524 #endif
1525 		.gpu_busy = a5xx_gpu_busy,
1526 		.gpu_state_get = a5xx_gpu_state_get,
1527 		.gpu_state_put = a5xx_gpu_state_put,
1528 		.create_address_space = adreno_iommu_create_address_space,
1529 		.get_rptr = a5xx_get_rptr,
1530 	},
1531 	.get_timestamp = a5xx_get_timestamp,
1532 };
1533 
1534 static void check_speed_bin(struct device *dev)
1535 {
1536 	struct nvmem_cell *cell;
1537 	u32 val;
1538 
1539 	/*
1540 	 * If the OPP table specifies a opp-supported-hw property then we have
1541 	 * to set something with dev_pm_opp_set_supported_hw() or the table
1542 	 * doesn't get populated so pick an arbitrary value that should
1543 	 * ensure the default frequencies are selected but not conflict with any
1544 	 * actual bins
1545 	 */
1546 	val = 0x80;
1547 
1548 	cell = nvmem_cell_get(dev, "speed_bin");
1549 
1550 	if (!IS_ERR(cell)) {
1551 		void *buf = nvmem_cell_read(cell, NULL);
1552 
1553 		if (!IS_ERR(buf)) {
1554 			u8 bin = *((u8 *) buf);
1555 
1556 			val = (1 << bin);
1557 			kfree(buf);
1558 		}
1559 
1560 		nvmem_cell_put(cell);
1561 	}
1562 
1563 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1564 }
1565 
1566 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1567 {
1568 	struct msm_drm_private *priv = dev->dev_private;
1569 	struct platform_device *pdev = priv->gpu_pdev;
1570 	struct a5xx_gpu *a5xx_gpu = NULL;
1571 	struct adreno_gpu *adreno_gpu;
1572 	struct msm_gpu *gpu;
1573 	int ret;
1574 
1575 	if (!pdev) {
1576 		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1577 		return ERR_PTR(-ENXIO);
1578 	}
1579 
1580 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1581 	if (!a5xx_gpu)
1582 		return ERR_PTR(-ENOMEM);
1583 
1584 	adreno_gpu = &a5xx_gpu->base;
1585 	gpu = &adreno_gpu->base;
1586 
1587 	adreno_gpu->registers = a5xx_registers;
1588 
1589 	a5xx_gpu->lm_leakage = 0x4E001A;
1590 
1591 	check_speed_bin(&pdev->dev);
1592 
1593 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1594 	if (ret) {
1595 		a5xx_destroy(&(a5xx_gpu->base.base));
1596 		return ERR_PTR(ret);
1597 	}
1598 
1599 	if (gpu->aspace)
1600 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1601 
1602 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1603 	a5xx_preempt_init(gpu);
1604 
1605 	return gpu;
1606 }
1607