xref: /linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 6084a6e23c971ef703229ee1aec68d01688578d6)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifdef CONFIG_MSM_OCMEM
19 #  include <mach/ocmem.h>
20 #endif
21 
22 #include "a3xx_gpu.h"
23 
24 #define A3XX_INT0_MASK \
25 	(A3XX_INT0_RBBM_AHB_ERROR |        \
26 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
27 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
28 	 A3XX_INT0_CP_OPCODE_ERROR |       \
29 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
30 	 A3XX_INT0_CP_HW_FAULT |           \
31 	 A3XX_INT0_CP_IB1_INT |            \
32 	 A3XX_INT0_CP_IB2_INT |            \
33 	 A3XX_INT0_CP_RB_INT |             \
34 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
35 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
36 	 A3XX_INT0_UCHE_OOB_ACCESS)
37 
38 
39 static bool hang_debug = false;
40 MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can be slow!)");
41 module_param_named(hang_debug, hang_debug, bool, 0600);
42 static void a3xx_dump(struct msm_gpu *gpu);
43 
44 static void a3xx_me_init(struct msm_gpu *gpu)
45 {
46 	struct msm_ringbuffer *ring = gpu->rb;
47 
48 	OUT_PKT3(ring, CP_ME_INIT, 17);
49 	OUT_RING(ring, 0x000003f7);
50 	OUT_RING(ring, 0x00000000);
51 	OUT_RING(ring, 0x00000000);
52 	OUT_RING(ring, 0x00000000);
53 	OUT_RING(ring, 0x00000080);
54 	OUT_RING(ring, 0x00000100);
55 	OUT_RING(ring, 0x00000180);
56 	OUT_RING(ring, 0x00006600);
57 	OUT_RING(ring, 0x00000150);
58 	OUT_RING(ring, 0x0000014e);
59 	OUT_RING(ring, 0x00000154);
60 	OUT_RING(ring, 0x00000001);
61 	OUT_RING(ring, 0x00000000);
62 	OUT_RING(ring, 0x00000000);
63 	OUT_RING(ring, 0x00000000);
64 	OUT_RING(ring, 0x00000000);
65 	OUT_RING(ring, 0x00000000);
66 
67 	gpu->funcs->flush(gpu);
68 	gpu->funcs->idle(gpu);
69 }
70 
71 static int a3xx_hw_init(struct msm_gpu *gpu)
72 {
73 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
74 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
75 	uint32_t *ptr, len;
76 	int i, ret;
77 
78 	DBG("%s", gpu->name);
79 
80 	if (adreno_is_a305(adreno_gpu)) {
81 		/* Set up 16 deep read/write request queues: */
82 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
83 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
84 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
85 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
86 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
87 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
88 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
89 		/* Enable WR-REQ: */
90 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
91 		/* Set up round robin arbitration between both AXI ports: */
92 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
93 		/* Set up AOOO: */
94 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
95 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
96 
97 	} else if (adreno_is_a320(adreno_gpu)) {
98 		/* Set up 16 deep read/write request queues: */
99 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
100 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
101 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
102 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
103 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
104 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
105 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
106 		/* Enable WR-REQ: */
107 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
108 		/* Set up round robin arbitration between both AXI ports: */
109 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
110 		/* Set up AOOO: */
111 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
112 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
113 		/* Enable 1K sort: */
114 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
115 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
116 
117 	} else if (adreno_is_a330v2(adreno_gpu)) {
118 		/*
119 		 * Most of the VBIF registers on 8974v2 have the correct
120 		 * values at power on, so we won't modify those if we don't
121 		 * need to
122 		 */
123 		/* Enable 1k sort: */
124 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
125 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
126 		/* Enable WR-REQ: */
127 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
128 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
129 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
130 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
131 
132 	} else if (adreno_is_a330(adreno_gpu)) {
133 		/* Set up 16 deep read/write request queues: */
134 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
135 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
136 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
137 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
138 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
139 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
140 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
141 		/* Enable WR-REQ: */
142 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
143 		/* Set up round robin arbitration between both AXI ports: */
144 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
145 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
146 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
147 		/* Set up AOOO: */
148 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
149 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
150 		/* Enable 1K sort: */
151 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
152 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
153 		/* Disable VBIF clock gating. This is to enable AXI running
154 		 * higher frequency than GPU:
155 		 */
156 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
157 
158 	} else {
159 		BUG();
160 	}
161 
162 	/* Make all blocks contribute to the GPU BUSY perf counter: */
163 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
164 
165 	/* Tune the hystersis counters for SP and CP idle detection: */
166 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
167 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
168 
169 	/* Enable the RBBM error reporting bits.  This lets us get
170 	 * useful information on failure:
171 	 */
172 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
173 
174 	/* Enable AHB error reporting: */
175 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
176 
177 	/* Turn on the power counters: */
178 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
179 
180 	/* Turn on hang detection - this spews a lot of useful information
181 	 * into the RBBM registers on a hang:
182 	 */
183 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
184 
185 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
186 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
187 
188 	/* Enable Clock gating: */
189 	if (adreno_is_a320(adreno_gpu))
190 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
191 	else if (adreno_is_a330v2(adreno_gpu))
192 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
193 	else if (adreno_is_a330(adreno_gpu))
194 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
195 
196 	if (adreno_is_a330v2(adreno_gpu))
197 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
198 	else if (adreno_is_a330(adreno_gpu))
199 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
200 
201 	/* Set the OCMEM base address for A330, etc */
202 	if (a3xx_gpu->ocmem_hdl) {
203 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
204 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
205 	}
206 
207 	/* Turn on performance counters: */
208 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
209 
210 	/* Enable the perfcntrs that we use.. */
211 	for (i = 0; i < gpu->num_perfcntrs; i++) {
212 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
213 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
214 	}
215 
216 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
217 
218 	ret = adreno_hw_init(gpu);
219 	if (ret)
220 		return ret;
221 
222 	/* setup access protection: */
223 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
224 
225 	/* RBBM registers */
226 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
227 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
228 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
229 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
230 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
231 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
232 
233 	/* CP registers */
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
235 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
237 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
238 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
239 
240 	/* RB registers */
241 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
242 
243 	/* VBIF registers */
244 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
245 
246 	/* NOTE: PM4/micro-engine firmware registers look to be the same
247 	 * for a2xx and a3xx.. we could possibly push that part down to
248 	 * adreno_gpu base class.  Or push both PM4 and PFP but
249 	 * parameterize the pfp ucode addr/data registers..
250 	 */
251 
252 	/* Load PM4: */
253 	ptr = (uint32_t *)(adreno_gpu->pm4->data);
254 	len = adreno_gpu->pm4->size / 4;
255 	DBG("loading PM4 ucode version: %x", ptr[1]);
256 
257 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
258 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
259 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
260 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
261 	for (i = 1; i < len; i++)
262 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
263 
264 	/* Load PFP: */
265 	ptr = (uint32_t *)(adreno_gpu->pfp->data);
266 	len = adreno_gpu->pfp->size / 4;
267 	DBG("loading PFP ucode version: %x", ptr[5]);
268 
269 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
270 	for (i = 1; i < len; i++)
271 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
272 
273 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
274 	if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) {
275 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
276 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
277 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
278 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
279 	} else if (adreno_is_a330(adreno_gpu)) {
280 		/* NOTE: this (value take from downstream android driver)
281 		 * includes some bits outside of the known bitfields.  But
282 		 * A330 has this "MERCIU queue" thing too, which might
283 		 * explain a new bitfield or reshuffling:
284 		 */
285 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
286 	}
287 
288 	/* clear ME_HALT to start micro engine */
289 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
290 
291 	a3xx_me_init(gpu);
292 
293 	return 0;
294 }
295 
296 static void a3xx_recover(struct msm_gpu *gpu)
297 {
298 	/* dump registers before resetting gpu, if enabled: */
299 	if (hang_debug)
300 		a3xx_dump(gpu);
301 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
302 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
303 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
304 	adreno_recover(gpu);
305 }
306 
307 static void a3xx_destroy(struct msm_gpu *gpu)
308 {
309 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
310 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
311 
312 	DBG("%s", gpu->name);
313 
314 	adreno_gpu_cleanup(adreno_gpu);
315 
316 #ifdef CONFIG_MSM_OCMEM
317 	if (a3xx_gpu->ocmem_base)
318 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
319 #endif
320 
321 	kfree(a3xx_gpu);
322 }
323 
324 static void a3xx_idle(struct msm_gpu *gpu)
325 {
326 	/* wait for ringbuffer to drain: */
327 	adreno_idle(gpu);
328 
329 	/* then wait for GPU to finish: */
330 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
331 			A3XX_RBBM_STATUS_GPU_BUSY)))
332 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
333 
334 	/* TODO maybe we need to reset GPU here to recover from hang? */
335 }
336 
337 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
338 {
339 	uint32_t status;
340 
341 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
342 	DBG("%s: %08x", gpu->name, status);
343 
344 	// TODO
345 
346 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
347 
348 	msm_gpu_retire(gpu);
349 
350 	return IRQ_HANDLED;
351 }
352 
353 static const unsigned int a3xx_registers[] = {
354 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
355 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
356 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
357 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
358 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
359 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
360 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
361 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
362 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
363 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
364 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
365 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
366 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
367 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
368 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
369 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
370 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
371 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
372 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
373 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
374 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
375 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
376 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
377 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
378 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
379 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
380 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
381 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
382 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
383 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
384 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
385 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
386 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
387 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
388 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
389 	0x303c, 0x303c, 0x305e, 0x305f,
390 };
391 
392 #ifdef CONFIG_DEBUG_FS
393 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
394 {
395 	struct drm_device *dev = gpu->dev;
396 	int i;
397 
398 	adreno_show(gpu, m);
399 
400 	mutex_lock(&dev->struct_mutex);
401 
402 	gpu->funcs->pm_resume(gpu);
403 
404 	seq_printf(m, "status:   %08x\n",
405 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
406 
407 	/* dump these out in a form that can be parsed by demsm: */
408 	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
409 	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
410 		uint32_t start = a3xx_registers[i];
411 		uint32_t end   = a3xx_registers[i+1];
412 		uint32_t addr;
413 
414 		for (addr = start; addr <= end; addr++) {
415 			uint32_t val = gpu_read(gpu, addr);
416 			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
417 		}
418 	}
419 
420 	gpu->funcs->pm_suspend(gpu);
421 
422 	mutex_unlock(&dev->struct_mutex);
423 }
424 #endif
425 
426 /* would be nice to not have to duplicate the _show() stuff with printk(): */
427 static void a3xx_dump(struct msm_gpu *gpu)
428 {
429 	int i;
430 
431 	adreno_dump(gpu);
432 	printk("status:   %08x\n",
433 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
434 
435 	/* dump these out in a form that can be parsed by demsm: */
436 	printk("IO:region %s 00000000 00020000\n", gpu->name);
437 	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
438 		uint32_t start = a3xx_registers[i];
439 		uint32_t end   = a3xx_registers[i+1];
440 		uint32_t addr;
441 
442 		for (addr = start; addr <= end; addr++) {
443 			uint32_t val = gpu_read(gpu, addr);
444 			printk("IO:R %08x %08x\n", addr<<2, val);
445 		}
446 	}
447 }
448 
449 static const struct adreno_gpu_funcs funcs = {
450 	.base = {
451 		.get_param = adreno_get_param,
452 		.hw_init = a3xx_hw_init,
453 		.pm_suspend = msm_gpu_pm_suspend,
454 		.pm_resume = msm_gpu_pm_resume,
455 		.recover = a3xx_recover,
456 		.last_fence = adreno_last_fence,
457 		.submit = adreno_submit,
458 		.flush = adreno_flush,
459 		.idle = a3xx_idle,
460 		.irq = a3xx_irq,
461 		.destroy = a3xx_destroy,
462 #ifdef CONFIG_DEBUG_FS
463 		.show = a3xx_show,
464 #endif
465 	},
466 };
467 
468 static const struct msm_gpu_perfcntr perfcntrs[] = {
469 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
470 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
471 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
472 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
473 };
474 
475 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
476 {
477 	struct a3xx_gpu *a3xx_gpu = NULL;
478 	struct adreno_gpu *adreno_gpu;
479 	struct msm_gpu *gpu;
480 	struct msm_drm_private *priv = dev->dev_private;
481 	struct platform_device *pdev = priv->gpu_pdev;
482 	struct adreno_platform_config *config;
483 	int ret;
484 
485 	if (!pdev) {
486 		dev_err(dev->dev, "no a3xx device\n");
487 		ret = -ENXIO;
488 		goto fail;
489 	}
490 
491 	config = pdev->dev.platform_data;
492 
493 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
494 	if (!a3xx_gpu) {
495 		ret = -ENOMEM;
496 		goto fail;
497 	}
498 
499 	adreno_gpu = &a3xx_gpu->base;
500 	gpu = &adreno_gpu->base;
501 
502 	a3xx_gpu->pdev = pdev;
503 
504 	gpu->fast_rate = config->fast_rate;
505 	gpu->slow_rate = config->slow_rate;
506 	gpu->bus_freq  = config->bus_freq;
507 #ifdef CONFIG_MSM_BUS_SCALING
508 	gpu->bus_scale_table = config->bus_scale_table;
509 #endif
510 
511 	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
512 			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
513 
514 	gpu->perfcntrs = perfcntrs;
515 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
516 
517 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev);
518 	if (ret)
519 		goto fail;
520 
521 	/* if needed, allocate gmem: */
522 	if (adreno_is_a330(adreno_gpu)) {
523 #ifdef CONFIG_MSM_OCMEM
524 		/* TODO this is different/missing upstream: */
525 		struct ocmem_buf *ocmem_hdl =
526 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
527 
528 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
529 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
530 		adreno_gpu->gmem = ocmem_hdl->len;
531 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
532 				a3xx_gpu->ocmem_base);
533 #endif
534 	}
535 
536 	if (!gpu->mmu) {
537 		/* TODO we think it is possible to configure the GPU to
538 		 * restrict access to VRAM carveout.  But the required
539 		 * registers are unknown.  For now just bail out and
540 		 * limp along with just modesetting.  If it turns out
541 		 * to not be possible to restrict access, then we must
542 		 * implement a cmdstream validator.
543 		 */
544 		dev_err(dev->dev, "No memory protection without IOMMU\n");
545 		ret = -ENXIO;
546 		goto fail;
547 	}
548 
549 	return gpu;
550 
551 fail:
552 	if (a3xx_gpu)
553 		a3xx_destroy(&a3xx_gpu->base.base);
554 
555 	return ERR_PTR(ret);
556 }
557 
558 /*
559  * The a3xx device:
560  */
561 
562 #if defined(CONFIG_MSM_BUS_SCALING) && !defined(CONFIG_OF)
563 #  include <mach/kgsl.h>
564 #endif
565 
566 static void set_gpu_pdev(struct drm_device *dev,
567 		struct platform_device *pdev)
568 {
569 	struct msm_drm_private *priv = dev->dev_private;
570 	priv->gpu_pdev = pdev;
571 }
572 
573 static int a3xx_bind(struct device *dev, struct device *master, void *data)
574 {
575 	static struct adreno_platform_config config = {};
576 #ifdef CONFIG_OF
577 	struct device_node *child, *node = dev->of_node;
578 	u32 val;
579 	int ret;
580 
581 	ret = of_property_read_u32(node, "qcom,chipid", &val);
582 	if (ret) {
583 		dev_err(dev, "could not find chipid: %d\n", ret);
584 		return ret;
585 	}
586 
587 	config.rev = ADRENO_REV((val >> 24) & 0xff,
588 			(val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
589 
590 	/* find clock rates: */
591 	config.fast_rate = 0;
592 	config.slow_rate = ~0;
593 	for_each_child_of_node(node, child) {
594 		if (of_device_is_compatible(child, "qcom,gpu-pwrlevels")) {
595 			struct device_node *pwrlvl;
596 			for_each_child_of_node(child, pwrlvl) {
597 				ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val);
598 				if (ret) {
599 					dev_err(dev, "could not find gpu-freq: %d\n", ret);
600 					return ret;
601 				}
602 				config.fast_rate = max(config.fast_rate, val);
603 				config.slow_rate = min(config.slow_rate, val);
604 			}
605 		}
606 	}
607 
608 	if (!config.fast_rate) {
609 		dev_err(dev, "could not find clk rates\n");
610 		return -ENXIO;
611 	}
612 
613 #else
614 	struct kgsl_device_platform_data *pdata = dev->platform_data;
615 	uint32_t version = socinfo_get_version();
616 	if (cpu_is_apq8064ab()) {
617 		config.fast_rate = 450000000;
618 		config.slow_rate = 27000000;
619 		config.bus_freq  = 4;
620 		config.rev = ADRENO_REV(3, 2, 1, 0);
621 	} else if (cpu_is_apq8064()) {
622 		config.fast_rate = 400000000;
623 		config.slow_rate = 27000000;
624 		config.bus_freq  = 4;
625 
626 		if (SOCINFO_VERSION_MAJOR(version) == 2)
627 			config.rev = ADRENO_REV(3, 2, 0, 2);
628 		else if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
629 				(SOCINFO_VERSION_MINOR(version) == 1))
630 			config.rev = ADRENO_REV(3, 2, 0, 1);
631 		else
632 			config.rev = ADRENO_REV(3, 2, 0, 0);
633 
634 	} else if (cpu_is_msm8960ab()) {
635 		config.fast_rate = 400000000;
636 		config.slow_rate = 320000000;
637 		config.bus_freq  = 4;
638 
639 		if (SOCINFO_VERSION_MINOR(version) == 0)
640 			config.rev = ADRENO_REV(3, 2, 1, 0);
641 		else
642 			config.rev = ADRENO_REV(3, 2, 1, 1);
643 
644 	} else if (cpu_is_msm8930()) {
645 		config.fast_rate = 400000000;
646 		config.slow_rate = 27000000;
647 		config.bus_freq  = 3;
648 
649 		if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
650 			(SOCINFO_VERSION_MINOR(version) == 2))
651 			config.rev = ADRENO_REV(3, 0, 5, 2);
652 		else
653 			config.rev = ADRENO_REV(3, 0, 5, 0);
654 
655 	}
656 #  ifdef CONFIG_MSM_BUS_SCALING
657 	config.bus_scale_table = pdata->bus_scale_table;
658 #  endif
659 #endif
660 	dev->platform_data = &config;
661 	set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
662 	return 0;
663 }
664 
665 static void a3xx_unbind(struct device *dev, struct device *master,
666 		void *data)
667 {
668 	set_gpu_pdev(dev_get_drvdata(master), NULL);
669 }
670 
671 static const struct component_ops a3xx_ops = {
672 		.bind   = a3xx_bind,
673 		.unbind = a3xx_unbind,
674 };
675 
676 static int a3xx_probe(struct platform_device *pdev)
677 {
678 	return component_add(&pdev->dev, &a3xx_ops);
679 }
680 
681 static int a3xx_remove(struct platform_device *pdev)
682 {
683 	component_del(&pdev->dev, &a3xx_ops);
684 	return 0;
685 }
686 
687 static const struct of_device_id dt_match[] = {
688 	{ .compatible = "qcom,kgsl-3d0" },
689 	{}
690 };
691 
692 static struct platform_driver a3xx_driver = {
693 	.probe = a3xx_probe,
694 	.remove = a3xx_remove,
695 	.driver = {
696 		.name = "kgsl-3d0",
697 		.of_match_table = dt_match,
698 	},
699 };
700 
701 void __init a3xx_register(void)
702 {
703 	platform_driver_register(&a3xx_driver);
704 }
705 
706 void __exit a3xx_unregister(void)
707 {
708 	platform_driver_unregister(&a3xx_driver);
709 }
710