xref: /linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 3a546a67a4cc251b4ec2ba02d8f7aacf0bfc3148)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Red Hat
4  * Author: Rob Clark <robdclark@gmail.com>
5  *
6  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7  */
8 
9 #include "a3xx_gpu.h"
10 
11 #define A3XX_INT0_MASK \
12 	(A3XX_INT0_RBBM_AHB_ERROR |        \
13 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
14 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
15 	 A3XX_INT0_CP_OPCODE_ERROR |       \
16 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
17 	 A3XX_INT0_CP_HW_FAULT |           \
18 	 A3XX_INT0_CP_IB1_INT |            \
19 	 A3XX_INT0_CP_IB2_INT |            \
20 	 A3XX_INT0_CP_RB_INT |             \
21 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
22 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
23 	 A3XX_INT0_CACHE_FLUSH_TS |        \
24 	 A3XX_INT0_UCHE_OOB_ACCESS)
25 
26 extern bool hang_debug;
27 
28 static void a3xx_dump(struct msm_gpu *gpu);
29 static bool a3xx_idle(struct msm_gpu *gpu);
30 
31 static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
32 {
33 	struct msm_ringbuffer *ring = submit->ring;
34 	unsigned int i;
35 
36 	for (i = 0; i < submit->nr_cmds; i++) {
37 		switch (submit->cmd[i].type) {
38 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
39 			/* ignore IB-targets */
40 			break;
41 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
42 			/* ignore if there has not been a ctx switch: */
43 			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
44 				break;
45 			fallthrough;
46 		case MSM_SUBMIT_CMD_BUF:
47 			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
48 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
49 			OUT_RING(ring, submit->cmd[i].size);
50 			OUT_PKT2(ring);
51 			break;
52 		}
53 	}
54 
55 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
56 	OUT_RING(ring, submit->seqno);
57 
58 	/* Flush HLSQ lazy updates to make sure there is nothing
59 	 * pending for indirect loads after the timestamp has
60 	 * passed:
61 	 */
62 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
63 	OUT_RING(ring, HLSQ_FLUSH);
64 
65 	/* wait for idle before cache flush/interrupt */
66 	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
67 	OUT_RING(ring, 0x00000000);
68 
69 	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
70 	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
71 	OUT_RING(ring, CACHE_FLUSH_TS | CP_EVENT_WRITE_0_IRQ);
72 	OUT_RING(ring, rbmemptr(ring, fence));
73 	OUT_RING(ring, submit->seqno);
74 
75 #if 0
76 	/* Dummy set-constant to trigger context rollover */
77 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
78 	OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
79 	OUT_RING(ring, 0x00000000);
80 #endif
81 
82 	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
83 }
84 
85 static bool a3xx_me_init(struct msm_gpu *gpu)
86 {
87 	struct msm_ringbuffer *ring = gpu->rb[0];
88 
89 	OUT_PKT3(ring, CP_ME_INIT, 17);
90 	OUT_RING(ring, 0x000003f7);
91 	OUT_RING(ring, 0x00000000);
92 	OUT_RING(ring, 0x00000000);
93 	OUT_RING(ring, 0x00000000);
94 	OUT_RING(ring, 0x00000080);
95 	OUT_RING(ring, 0x00000100);
96 	OUT_RING(ring, 0x00000180);
97 	OUT_RING(ring, 0x00006600);
98 	OUT_RING(ring, 0x00000150);
99 	OUT_RING(ring, 0x0000014e);
100 	OUT_RING(ring, 0x00000154);
101 	OUT_RING(ring, 0x00000001);
102 	OUT_RING(ring, 0x00000000);
103 	OUT_RING(ring, 0x00000000);
104 	OUT_RING(ring, 0x00000000);
105 	OUT_RING(ring, 0x00000000);
106 	OUT_RING(ring, 0x00000000);
107 
108 	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
109 	return a3xx_idle(gpu);
110 }
111 
112 static int a3xx_hw_init(struct msm_gpu *gpu)
113 {
114 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
115 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
116 	uint32_t *ptr, len;
117 	int i, ret;
118 
119 	DBG("%s", gpu->name);
120 
121 	if (adreno_is_a305(adreno_gpu)) {
122 		/* Set up 16 deep read/write request queues: */
123 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
124 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
125 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
126 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
127 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
128 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
129 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
130 		/* Enable WR-REQ: */
131 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
132 		/* Set up round robin arbitration between both AXI ports: */
133 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
134 		/* Set up AOOO: */
135 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
136 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
137 	} else if (adreno_is_a305b(adreno_gpu)) {
138 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x00181818);
139 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x00181818);
140 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000018);
141 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000018);
142 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
143 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
144 	} else if (adreno_is_a306(adreno_gpu)) {
145 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
146 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
147 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
148 	} else if (adreno_is_a306a(adreno_gpu)) {
149 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
150 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010);
151 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010);
152 	} else if (adreno_is_a320(adreno_gpu)) {
153 		/* Set up 16 deep read/write request queues: */
154 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
155 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
156 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
157 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
158 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
159 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
160 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
161 		/* Enable WR-REQ: */
162 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
163 		/* Set up round robin arbitration between both AXI ports: */
164 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
165 		/* Set up AOOO: */
166 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
167 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
168 		/* Enable 1K sort: */
169 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
170 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
171 
172 	} else if (adreno_is_a330v2(adreno_gpu)) {
173 		/*
174 		 * Most of the VBIF registers on 8974v2 have the correct
175 		 * values at power on, so we won't modify those if we don't
176 		 * need to
177 		 */
178 		/* Enable 1k sort: */
179 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
180 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
181 		/* Enable WR-REQ: */
182 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
183 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
184 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
185 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
186 
187 	} else if (adreno_is_a330(adreno_gpu)) {
188 		/* Set up 16 deep read/write request queues: */
189 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
190 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
191 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
192 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
193 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
194 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
195 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
196 		/* Enable WR-REQ: */
197 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
198 		/* Set up round robin arbitration between both AXI ports: */
199 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
200 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
201 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
202 		/* Set up AOOO: */
203 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
204 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
205 		/* Enable 1K sort: */
206 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
207 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
208 		/* Disable VBIF clock gating. This is to enable AXI running
209 		 * higher frequency than GPU:
210 		 */
211 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
212 
213 	} else {
214 		BUG();
215 	}
216 
217 	/* Make all blocks contribute to the GPU BUSY perf counter: */
218 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
219 
220 	/* Tune the hystersis counters for SP and CP idle detection: */
221 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
222 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
223 
224 	/* Enable the RBBM error reporting bits.  This lets us get
225 	 * useful information on failure:
226 	 */
227 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
228 
229 	/* Enable AHB error reporting: */
230 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
231 
232 	/* Turn on the power counters: */
233 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
234 
235 	/* Turn on hang detection - this spews a lot of useful information
236 	 * into the RBBM registers on a hang:
237 	 */
238 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
239 
240 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
241 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
242 
243 	/* Enable Clock gating: */
244 	if (adreno_is_a305b(adreno_gpu) ||
245 	    adreno_is_a306(adreno_gpu) ||
246 	    adreno_is_a306a(adreno_gpu))
247 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
248 	else if (adreno_is_a320(adreno_gpu))
249 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
250 	else if (adreno_is_a330v2(adreno_gpu))
251 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
252 	else if (adreno_is_a330(adreno_gpu))
253 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
254 
255 	if (adreno_is_a330v2(adreno_gpu))
256 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
257 	else if (adreno_is_a330(adreno_gpu))
258 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
259 
260 	/* Set the OCMEM base address for A330, etc */
261 	if (a3xx_gpu->ocmem.hdl) {
262 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
263 			(unsigned int)(a3xx_gpu->ocmem.base >> 14));
264 	}
265 
266 	/* Turn on performance counters: */
267 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
268 
269 	/* Enable the perfcntrs that we use.. */
270 	for (i = 0; i < gpu->num_perfcntrs; i++) {
271 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
272 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
273 	}
274 
275 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
276 
277 	ret = adreno_hw_init(gpu);
278 	if (ret)
279 		return ret;
280 
281 	/*
282 	 * Use the default ringbuffer size and block size but disable the RPTR
283 	 * shadow
284 	 */
285 	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
286 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
287 
288 	/* Set the ringbuffer address */
289 	gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
290 
291 	/* setup access protection: */
292 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
293 
294 	/* RBBM registers */
295 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
296 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
297 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
298 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
299 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
300 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
301 
302 	/* CP registers */
303 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
304 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
305 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
306 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
307 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
308 
309 	/* RB registers */
310 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
311 
312 	/* VBIF registers */
313 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
314 
315 	/* NOTE: PM4/micro-engine firmware registers look to be the same
316 	 * for a2xx and a3xx.. we could possibly push that part down to
317 	 * adreno_gpu base class.  Or push both PM4 and PFP but
318 	 * parameterize the pfp ucode addr/data registers..
319 	 */
320 
321 	/* Load PM4: */
322 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
323 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
324 	DBG("loading PM4 ucode version: %x", ptr[1]);
325 
326 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
327 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
328 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
329 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
330 	for (i = 1; i < len; i++)
331 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
332 
333 	/* Load PFP: */
334 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
335 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
336 	DBG("loading PFP ucode version: %x", ptr[5]);
337 
338 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
339 	for (i = 1; i < len; i++)
340 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
341 
342 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
343 	if (adreno_is_a305(adreno_gpu) ||
344 	    adreno_is_a306(adreno_gpu) ||
345 	    adreno_is_a306a(adreno_gpu) ||
346 	    adreno_is_a320(adreno_gpu)) {
347 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
348 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
349 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
350 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
351 	} else if (adreno_is_a330(adreno_gpu) || adreno_is_a305b(adreno_gpu)) {
352 		/* NOTE: this (value take from downstream android driver)
353 		 * includes some bits outside of the known bitfields.  But
354 		 * A330 has this "MERCIU queue" thing too, which might
355 		 * explain a new bitfield or reshuffling:
356 		 */
357 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
358 	}
359 
360 	/* clear ME_HALT to start micro engine */
361 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
362 
363 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
364 }
365 
366 static void a3xx_recover(struct msm_gpu *gpu)
367 {
368 	int i;
369 
370 	adreno_dump_info(gpu);
371 
372 	for (i = 0; i < 8; i++) {
373 		printk("CP_SCRATCH_REG%d: %u\n", i,
374 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
375 	}
376 
377 	/* dump registers before resetting gpu, if enabled: */
378 	if (hang_debug)
379 		a3xx_dump(gpu);
380 
381 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
382 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
383 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
384 	adreno_recover(gpu);
385 }
386 
387 static void a3xx_destroy(struct msm_gpu *gpu)
388 {
389 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
390 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
391 
392 	DBG("%s", gpu->name);
393 
394 	adreno_gpu_cleanup(adreno_gpu);
395 
396 	adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
397 
398 	kfree(a3xx_gpu);
399 }
400 
401 static bool a3xx_idle(struct msm_gpu *gpu)
402 {
403 	/* wait for ringbuffer to drain: */
404 	if (!adreno_idle(gpu, gpu->rb[0]))
405 		return false;
406 
407 	/* then wait for GPU to finish: */
408 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
409 			A3XX_RBBM_STATUS_GPU_BUSY))) {
410 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
411 
412 		/* TODO maybe we need to reset GPU here to recover from hang? */
413 		return false;
414 	}
415 
416 	return true;
417 }
418 
419 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
420 {
421 	uint32_t status;
422 
423 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
424 	DBG("%s: %08x", gpu->name, status);
425 
426 	// TODO
427 
428 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
429 
430 	msm_gpu_retire(gpu);
431 
432 	return IRQ_HANDLED;
433 }
434 
435 static const unsigned int a3xx_registers[] = {
436 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
437 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
438 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
439 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
440 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
441 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
442 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
443 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
444 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
445 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
446 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
447 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
448 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
449 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
450 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
451 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
452 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
453 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
454 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
455 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
456 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
457 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
458 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
459 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
460 	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
461 	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
462 	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
463 	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
464 	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
465 	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
466 	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
467 	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
468 	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
469 	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
470 	~0   /* sentinel */
471 };
472 
473 /* would be nice to not have to duplicate the _show() stuff with printk(): */
474 static void a3xx_dump(struct msm_gpu *gpu)
475 {
476 	printk("status:   %08x\n",
477 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
478 	adreno_dump(gpu);
479 }
480 
481 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
482 {
483 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
484 
485 	if (!state)
486 		return ERR_PTR(-ENOMEM);
487 
488 	adreno_gpu_state_get(gpu, state);
489 
490 	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
491 
492 	return state;
493 }
494 
495 static u64 a3xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
496 {
497 	u64 busy_cycles;
498 
499 	busy_cycles = gpu_read64(gpu, REG_A3XX_RBBM_PERFCTR_RBBM_1_LO);
500 	*out_sample_rate = clk_get_rate(gpu->core_clk);
501 
502 	return busy_cycles;
503 }
504 
505 static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
506 {
507 	ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
508 	return ring->memptrs->rptr;
509 }
510 
511 static const struct adreno_gpu_funcs funcs = {
512 	.base = {
513 		.get_param = adreno_get_param,
514 		.set_param = adreno_set_param,
515 		.hw_init = a3xx_hw_init,
516 		.pm_suspend = msm_gpu_pm_suspend,
517 		.pm_resume = msm_gpu_pm_resume,
518 		.recover = a3xx_recover,
519 		.submit = a3xx_submit,
520 		.active_ring = adreno_active_ring,
521 		.irq = a3xx_irq,
522 		.destroy = a3xx_destroy,
523 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
524 		.show = adreno_show,
525 #endif
526 		.gpu_busy = a3xx_gpu_busy,
527 		.gpu_state_get = a3xx_gpu_state_get,
528 		.gpu_state_put = adreno_gpu_state_put,
529 		.create_address_space = adreno_create_address_space,
530 		.get_rptr = a3xx_get_rptr,
531 	},
532 };
533 
534 static const struct msm_gpu_perfcntr perfcntrs[] = {
535 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
536 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
537 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
538 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
539 };
540 
541 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
542 {
543 	struct a3xx_gpu *a3xx_gpu = NULL;
544 	struct adreno_gpu *adreno_gpu;
545 	struct msm_gpu *gpu;
546 	struct msm_drm_private *priv = dev->dev_private;
547 	struct platform_device *pdev = priv->gpu_pdev;
548 	struct icc_path *ocmem_icc_path;
549 	struct icc_path *icc_path;
550 	int ret;
551 
552 	if (!pdev) {
553 		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
554 		ret = -ENXIO;
555 		goto fail;
556 	}
557 
558 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
559 	if (!a3xx_gpu) {
560 		ret = -ENOMEM;
561 		goto fail;
562 	}
563 
564 	adreno_gpu = &a3xx_gpu->base;
565 	gpu = &adreno_gpu->base;
566 
567 	gpu->perfcntrs = perfcntrs;
568 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
569 
570 	adreno_gpu->registers = a3xx_registers;
571 
572 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
573 	if (ret)
574 		goto fail;
575 
576 	/* if needed, allocate gmem: */
577 	if (adreno_is_a330(adreno_gpu) || adreno_is_a305b(adreno_gpu)) {
578 		ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
579 					    adreno_gpu, &a3xx_gpu->ocmem);
580 		if (ret)
581 			goto fail;
582 	}
583 
584 	if (!gpu->aspace) {
585 		/* TODO we think it is possible to configure the GPU to
586 		 * restrict access to VRAM carveout.  But the required
587 		 * registers are unknown.  For now just bail out and
588 		 * limp along with just modesetting.  If it turns out
589 		 * to not be possible to restrict access, then we must
590 		 * implement a cmdstream validator.
591 		 */
592 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
593 		if (!allow_vram_carveout) {
594 			ret = -ENXIO;
595 			goto fail;
596 		}
597 	}
598 
599 	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
600 	if (IS_ERR(icc_path)) {
601 		ret = PTR_ERR(icc_path);
602 		goto fail;
603 	}
604 
605 	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
606 	if (IS_ERR(ocmem_icc_path)) {
607 		ret = PTR_ERR(ocmem_icc_path);
608 		/* allow -ENODATA, ocmem icc is optional */
609 		if (ret != -ENODATA)
610 			goto fail;
611 		ocmem_icc_path = NULL;
612 	}
613 
614 
615 	/*
616 	 * Set the ICC path to maximum speed for now by multiplying the fastest
617 	 * frequency by the bus width (8). We'll want to scale this later on to
618 	 * improve battery life.
619 	 */
620 	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
621 	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
622 
623 	return gpu;
624 
625 fail:
626 	if (a3xx_gpu)
627 		a3xx_destroy(&a3xx_gpu->base.base);
628 
629 	return ERR_PTR(ret);
630 }
631