xref: /linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 6477bd5ef0f696484fc12ae7a3902e038df1f71d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Red Hat
4  * Author: Rob Clark <robdclark@gmail.com>
5  *
6  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7  */
8 
9 #include "a3xx_gpu.h"
10 
11 #define A3XX_INT0_MASK \
12 	(A3XX_INT0_RBBM_AHB_ERROR |        \
13 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
14 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
15 	 A3XX_INT0_CP_OPCODE_ERROR |       \
16 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
17 	 A3XX_INT0_CP_HW_FAULT |           \
18 	 A3XX_INT0_CP_IB1_INT |            \
19 	 A3XX_INT0_CP_IB2_INT |            \
20 	 A3XX_INT0_CP_RB_INT |             \
21 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
22 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
23 	 A3XX_INT0_CACHE_FLUSH_TS |        \
24 	 A3XX_INT0_UCHE_OOB_ACCESS)
25 
26 extern bool hang_debug;
27 
28 static void a3xx_dump(struct msm_gpu *gpu);
29 static bool a3xx_idle(struct msm_gpu *gpu);
30 
31 static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
32 {
33 	struct msm_ringbuffer *ring = submit->ring;
34 	unsigned int i;
35 
36 	for (i = 0; i < submit->nr_cmds; i++) {
37 		switch (submit->cmd[i].type) {
38 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
39 			/* ignore IB-targets */
40 			break;
41 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
42 			/* ignore if there has not been a ctx switch: */
43 			if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
44 				break;
45 			fallthrough;
46 		case MSM_SUBMIT_CMD_BUF:
47 			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
48 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
49 			OUT_RING(ring, submit->cmd[i].size);
50 			OUT_PKT2(ring);
51 			break;
52 		}
53 	}
54 
55 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
56 	OUT_RING(ring, submit->seqno);
57 
58 	/* Flush HLSQ lazy updates to make sure there is nothing
59 	 * pending for indirect loads after the timestamp has
60 	 * passed:
61 	 */
62 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
63 	OUT_RING(ring, HLSQ_FLUSH);
64 
65 	/* wait for idle before cache flush/interrupt */
66 	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
67 	OUT_RING(ring, 0x00000000);
68 
69 	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
70 	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
71 	OUT_RING(ring, CACHE_FLUSH_TS | CP_EVENT_WRITE_0_IRQ);
72 	OUT_RING(ring, rbmemptr(ring, fence));
73 	OUT_RING(ring, submit->seqno);
74 
75 #if 0
76 	/* Dummy set-constant to trigger context rollover */
77 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
78 	OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
79 	OUT_RING(ring, 0x00000000);
80 #endif
81 
82 	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
83 }
84 
85 static bool a3xx_me_init(struct msm_gpu *gpu)
86 {
87 	struct msm_ringbuffer *ring = gpu->rb[0];
88 
89 	OUT_PKT3(ring, CP_ME_INIT, 17);
90 	OUT_RING(ring, 0x000003f7);
91 	OUT_RING(ring, 0x00000000);
92 	OUT_RING(ring, 0x00000000);
93 	OUT_RING(ring, 0x00000000);
94 	OUT_RING(ring, 0x00000080);
95 	OUT_RING(ring, 0x00000100);
96 	OUT_RING(ring, 0x00000180);
97 	OUT_RING(ring, 0x00006600);
98 	OUT_RING(ring, 0x00000150);
99 	OUT_RING(ring, 0x0000014e);
100 	OUT_RING(ring, 0x00000154);
101 	OUT_RING(ring, 0x00000001);
102 	OUT_RING(ring, 0x00000000);
103 	OUT_RING(ring, 0x00000000);
104 	OUT_RING(ring, 0x00000000);
105 	OUT_RING(ring, 0x00000000);
106 	OUT_RING(ring, 0x00000000);
107 
108 	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
109 	return a3xx_idle(gpu);
110 }
111 
112 static int a3xx_hw_init(struct msm_gpu *gpu)
113 {
114 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
115 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
116 	uint32_t *ptr, len;
117 	int i, ret;
118 
119 	DBG("%s", gpu->name);
120 
121 	if (adreno_is_a305(adreno_gpu)) {
122 		/* Set up 16 deep read/write request queues: */
123 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
124 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
125 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
126 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
127 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
128 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
129 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
130 		/* Enable WR-REQ: */
131 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
132 		/* Set up round robin arbitration between both AXI ports: */
133 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
134 		/* Set up AOOO: */
135 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
136 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
137 	} else if (adreno_is_a305b(adreno_gpu)) {
138 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x00181818);
139 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x00181818);
140 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000018);
141 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000018);
142 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
143 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
144 	} else if (adreno_is_a306(adreno_gpu)) {
145 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
146 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
147 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
148 	} else if (adreno_is_a306a(adreno_gpu)) {
149 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
150 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010);
151 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010);
152 	} else if (adreno_is_a320(adreno_gpu)) {
153 		/* Set up 16 deep read/write request queues: */
154 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
155 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
156 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
157 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
158 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
159 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
160 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
161 		/* Enable WR-REQ: */
162 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
163 		/* Set up round robin arbitration between both AXI ports: */
164 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
165 		/* Set up AOOO: */
166 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
167 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
168 		/* Enable 1K sort: */
169 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
170 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
171 
172 	} else if (adreno_is_a330v2(adreno_gpu)) {
173 		/*
174 		 * Most of the VBIF registers on 8974v2 have the correct
175 		 * values at power on, so we won't modify those if we don't
176 		 * need to
177 		 */
178 		/* Enable 1k sort: */
179 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
180 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
181 		/* Enable WR-REQ: */
182 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
183 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
184 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
185 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
186 
187 	} else if (adreno_is_a330(adreno_gpu)) {
188 		/* Set up 16 deep read/write request queues: */
189 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
190 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
191 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
192 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
193 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
194 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
195 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
196 		/* Enable WR-REQ: */
197 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
198 		/* Set up round robin arbitration between both AXI ports: */
199 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
200 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
201 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
202 		/* Set up AOOO: */
203 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
204 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
205 		/* Enable 1K sort: */
206 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
207 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
208 		/* Disable VBIF clock gating. This is to enable AXI running
209 		 * higher frequency than GPU:
210 		 */
211 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
212 
213 	} else {
214 		BUG();
215 	}
216 
217 	/* Make all blocks contribute to the GPU BUSY perf counter: */
218 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
219 
220 	/* Tune the hystersis counters for SP and CP idle detection: */
221 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
222 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
223 
224 	/* Enable the RBBM error reporting bits.  This lets us get
225 	 * useful information on failure:
226 	 */
227 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
228 
229 	/* Enable AHB error reporting: */
230 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
231 
232 	/* Turn on the power counters: */
233 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
234 
235 	/* Turn on hang detection - this spews a lot of useful information
236 	 * into the RBBM registers on a hang:
237 	 */
238 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
239 
240 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
241 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
242 
243 	/* Enable Clock gating: */
244 	if (adreno_is_a305b(adreno_gpu) ||
245 	    adreno_is_a306(adreno_gpu) ||
246 	    adreno_is_a306a(adreno_gpu))
247 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
248 	else if (adreno_is_a320(adreno_gpu))
249 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
250 	else if (adreno_is_a330v2(adreno_gpu))
251 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
252 	else if (adreno_is_a330(adreno_gpu))
253 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
254 
255 	if (adreno_is_a330v2(adreno_gpu))
256 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
257 	else if (adreno_is_a330(adreno_gpu))
258 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
259 
260 	/* Set the OCMEM base address for A330, etc */
261 	if (a3xx_gpu->ocmem.hdl) {
262 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
263 			(unsigned int)(a3xx_gpu->ocmem.base >> 14));
264 	}
265 
266 	/* Turn on performance counters: */
267 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
268 
269 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
270 
271 	ret = adreno_hw_init(gpu);
272 	if (ret)
273 		return ret;
274 
275 	/*
276 	 * Use the default ringbuffer size and block size but disable the RPTR
277 	 * shadow
278 	 */
279 	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
280 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
281 
282 	/* Set the ringbuffer address */
283 	gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
284 
285 	/* setup access protection: */
286 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
287 
288 	/* RBBM registers */
289 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
290 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
291 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
292 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
293 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
294 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
295 
296 	/* CP registers */
297 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
298 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
299 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
300 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
301 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
302 
303 	/* RB registers */
304 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
305 
306 	/* VBIF registers */
307 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
308 
309 	/* NOTE: PM4/micro-engine firmware registers look to be the same
310 	 * for a2xx and a3xx.. we could possibly push that part down to
311 	 * adreno_gpu base class.  Or push both PM4 and PFP but
312 	 * parameterize the pfp ucode addr/data registers..
313 	 */
314 
315 	/* Load PM4: */
316 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
317 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
318 	DBG("loading PM4 ucode version: %x", ptr[1]);
319 
320 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
321 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
322 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
323 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
324 	for (i = 1; i < len; i++)
325 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
326 
327 	/* Load PFP: */
328 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
329 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
330 	DBG("loading PFP ucode version: %x", ptr[5]);
331 
332 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
333 	for (i = 1; i < len; i++)
334 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
335 
336 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
337 	if (adreno_is_a305(adreno_gpu) ||
338 	    adreno_is_a306(adreno_gpu) ||
339 	    adreno_is_a306a(adreno_gpu) ||
340 	    adreno_is_a320(adreno_gpu)) {
341 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
342 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
343 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
344 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
345 	} else if (adreno_is_a330(adreno_gpu) || adreno_is_a305b(adreno_gpu)) {
346 		/* NOTE: this (value take from downstream android driver)
347 		 * includes some bits outside of the known bitfields.  But
348 		 * A330 has this "MERCIU queue" thing too, which might
349 		 * explain a new bitfield or reshuffling:
350 		 */
351 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
352 	}
353 
354 	/* clear ME_HALT to start micro engine */
355 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
356 
357 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
358 }
359 
360 static void a3xx_recover(struct msm_gpu *gpu)
361 {
362 	int i;
363 
364 	adreno_dump_info(gpu);
365 
366 	for (i = 0; i < 8; i++) {
367 		printk("CP_SCRATCH_REG%d: %u\n", i,
368 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
369 	}
370 
371 	/* dump registers before resetting gpu, if enabled: */
372 	if (hang_debug)
373 		a3xx_dump(gpu);
374 
375 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
376 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
377 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
378 	adreno_recover(gpu);
379 }
380 
381 static void a3xx_destroy(struct msm_gpu *gpu)
382 {
383 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
384 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
385 
386 	DBG("%s", gpu->name);
387 
388 	adreno_gpu_cleanup(adreno_gpu);
389 
390 	adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
391 
392 	kfree(a3xx_gpu);
393 }
394 
395 static bool a3xx_idle(struct msm_gpu *gpu)
396 {
397 	/* wait for ringbuffer to drain: */
398 	if (!adreno_idle(gpu, gpu->rb[0]))
399 		return false;
400 
401 	/* then wait for GPU to finish: */
402 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
403 			A3XX_RBBM_STATUS_GPU_BUSY))) {
404 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
405 
406 		/* TODO maybe we need to reset GPU here to recover from hang? */
407 		return false;
408 	}
409 
410 	return true;
411 }
412 
413 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
414 {
415 	uint32_t status;
416 
417 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
418 	DBG("%s: %08x", gpu->name, status);
419 
420 	// TODO
421 
422 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
423 
424 	msm_gpu_retire(gpu);
425 
426 	return IRQ_HANDLED;
427 }
428 
429 static const unsigned int a3xx_registers[] = {
430 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
431 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
432 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
433 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
434 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
435 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
436 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
437 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
438 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
439 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
440 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
441 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
442 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
443 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
444 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
445 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
446 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
447 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
448 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
449 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
450 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
451 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
452 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
453 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
454 	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
455 	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
456 	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
457 	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
458 	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
459 	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
460 	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
461 	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
462 	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
463 	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
464 	~0   /* sentinel */
465 };
466 
467 /* would be nice to not have to duplicate the _show() stuff with printk(): */
468 static void a3xx_dump(struct msm_gpu *gpu)
469 {
470 	printk("status:   %08x\n",
471 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
472 	adreno_dump(gpu);
473 }
474 
475 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
476 {
477 	struct msm_gpu_state *state = kzalloc_obj(*state);
478 
479 	if (!state)
480 		return ERR_PTR(-ENOMEM);
481 
482 	adreno_gpu_state_get(gpu, state);
483 
484 	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
485 
486 	return state;
487 }
488 
489 static u64 a3xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
490 {
491 	u64 busy_cycles;
492 
493 	busy_cycles = gpu_read64(gpu, REG_A3XX_RBBM_PERFCTR_RBBM_1_LO);
494 	*out_sample_rate = clk_get_rate(gpu->core_clk);
495 
496 	return busy_cycles;
497 }
498 
499 static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
500 {
501 	ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
502 	return ring->memptrs->rptr;
503 }
504 
505 static struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
506 {
507 	struct a3xx_gpu *a3xx_gpu = NULL;
508 	struct adreno_gpu *adreno_gpu;
509 	struct msm_gpu *gpu;
510 	struct msm_drm_private *priv = dev->dev_private;
511 	struct platform_device *pdev = priv->gpu_pdev;
512 	struct adreno_platform_config *config = pdev->dev.platform_data;
513 	struct icc_path *ocmem_icc_path;
514 	struct icc_path *icc_path;
515 	int ret;
516 
517 	if (!pdev) {
518 		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
519 		ret = -ENXIO;
520 		goto fail;
521 	}
522 
523 	a3xx_gpu = kzalloc_obj(*a3xx_gpu);
524 	if (!a3xx_gpu) {
525 		ret = -ENOMEM;
526 		goto fail;
527 	}
528 
529 	adreno_gpu = &a3xx_gpu->base;
530 	gpu = &adreno_gpu->base;
531 
532 	adreno_gpu->registers = a3xx_registers;
533 
534 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, config->info->funcs, 1);
535 	if (ret)
536 		goto fail;
537 
538 	/* if needed, allocate gmem: */
539 	if (adreno_is_a330(adreno_gpu) || adreno_is_a305b(adreno_gpu)) {
540 		ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
541 					    adreno_gpu, &a3xx_gpu->ocmem);
542 		if (ret)
543 			goto fail;
544 	}
545 
546 	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
547 	if (IS_ERR(icc_path)) {
548 		ret = PTR_ERR(icc_path);
549 		goto fail;
550 	}
551 
552 	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
553 	if (IS_ERR(ocmem_icc_path)) {
554 		ret = PTR_ERR(ocmem_icc_path);
555 		/* allow -ENODATA, ocmem icc is optional */
556 		if (ret != -ENODATA)
557 			goto fail;
558 		ocmem_icc_path = NULL;
559 	}
560 
561 
562 	/*
563 	 * Set the ICC path to maximum speed for now by multiplying the fastest
564 	 * frequency by the bus width (8). We'll want to scale this later on to
565 	 * improve battery life.
566 	 */
567 	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
568 	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
569 
570 	return gpu;
571 
572 fail:
573 	if (a3xx_gpu)
574 		a3xx_destroy(&a3xx_gpu->base.base);
575 
576 	return ERR_PTR(ret);
577 }
578 
579 const struct adreno_gpu_funcs a3xx_gpu_funcs = {
580 	.base = {
581 		.get_param = adreno_get_param,
582 		.set_param = adreno_set_param,
583 		.hw_init = a3xx_hw_init,
584 		.pm_suspend = msm_gpu_pm_suspend,
585 		.pm_resume = msm_gpu_pm_resume,
586 		.recover = a3xx_recover,
587 		.submit = a3xx_submit,
588 		.active_ring = adreno_active_ring,
589 		.irq = a3xx_irq,
590 		.destroy = a3xx_destroy,
591 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
592 		.show = adreno_show,
593 #endif
594 		.gpu_busy = a3xx_gpu_busy,
595 		.gpu_state_get = a3xx_gpu_state_get,
596 		.gpu_state_put = adreno_gpu_state_put,
597 		.create_vm = adreno_create_vm,
598 		.get_rptr = a3xx_get_rptr,
599 	},
600 	.init = a3xx_gpu_init,
601 };
602