xref: /linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision 156010ed9c2ac1e9df6c11b1f688cf8a6e0152e6)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3  */
4 #include "a4xx_gpu.h"
5 
6 #define A4XX_INT0_MASK \
7 	(A4XX_INT0_RBBM_AHB_ERROR |        \
8 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
10 	 A4XX_INT0_CP_OPCODE_ERROR |       \
11 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12 	 A4XX_INT0_CP_HW_FAULT |           \
13 	 A4XX_INT0_CP_IB1_INT |            \
14 	 A4XX_INT0_CP_IB2_INT |            \
15 	 A4XX_INT0_CP_RB_INT |             \
16 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
17 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
18 	 A4XX_INT0_CACHE_FLUSH_TS |        \
19 	 A4XX_INT0_UCHE_OOB_ACCESS)
20 
21 extern bool hang_debug;
22 static void a4xx_dump(struct msm_gpu *gpu);
23 static bool a4xx_idle(struct msm_gpu *gpu);
24 
25 static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
26 {
27 	struct msm_ringbuffer *ring = submit->ring;
28 	unsigned int i;
29 
30 	for (i = 0; i < submit->nr_cmds; i++) {
31 		switch (submit->cmd[i].type) {
32 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
33 			/* ignore IB-targets */
34 			break;
35 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
36 			/* ignore if there has not been a ctx switch: */
37 			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
38 				break;
39 			fallthrough;
40 		case MSM_SUBMIT_CMD_BUF:
41 			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
42 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
43 			OUT_RING(ring, submit->cmd[i].size);
44 			OUT_PKT2(ring);
45 			break;
46 		}
47 	}
48 
49 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
50 	OUT_RING(ring, submit->seqno);
51 
52 	/* Flush HLSQ lazy updates to make sure there is nothing
53 	 * pending for indirect loads after the timestamp has
54 	 * passed:
55 	 */
56 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
57 	OUT_RING(ring, HLSQ_FLUSH);
58 
59 	/* wait for idle before cache flush/interrupt */
60 	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
61 	OUT_RING(ring, 0x00000000);
62 
63 	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
64 	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
65 	OUT_RING(ring, CACHE_FLUSH_TS | CP_EVENT_WRITE_0_IRQ);
66 	OUT_RING(ring, rbmemptr(ring, fence));
67 	OUT_RING(ring, submit->seqno);
68 
69 	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
70 }
71 
72 /*
73  * a4xx_enable_hwcg() - Program the clock control registers
74  * @device: The adreno device pointer
75  */
76 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
77 {
78 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
79 	unsigned int i;
80 	for (i = 0; i < 4; i++)
81 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
82 	for (i = 0; i < 4; i++)
83 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
84 	for (i = 0; i < 4; i++)
85 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
86 	for (i = 0; i < 4; i++)
87 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
88 	for (i = 0; i < 4; i++)
89 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
90 	for (i = 0; i < 4; i++)
91 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
92 	for (i = 0; i < 4; i++)
93 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
94 	for (i = 0; i < 4; i++)
95 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
96 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
97 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
98 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
99 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
100 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
101 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
102 	for (i = 0; i < 4; i++)
103 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
104 
105 	/* Disable L1 clocking in A420 due to CCU issues with it */
106 	for (i = 0; i < 4; i++) {
107 		if (adreno_is_a420(adreno_gpu)) {
108 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
109 					0x00002020);
110 		} else {
111 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
112 					0x00022020);
113 		}
114 	}
115 
116 	/* No CCU for A405 */
117 	if (!adreno_is_a405(adreno_gpu)) {
118 		for (i = 0; i < 4; i++) {
119 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
120 					0x00000922);
121 		}
122 
123 		for (i = 0; i < 4; i++) {
124 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
125 					0x00000000);
126 		}
127 
128 		for (i = 0; i < 4; i++) {
129 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
130 					0x00000001);
131 		}
132 	}
133 
134 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
135 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
136 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
137 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
138 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
139 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
140 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
141 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
142 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
143 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
144 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
145 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
146 	/* Early A430's have a timing issue with SP/TP power collapse;
147 	   disabling HW clock gating prevents it. */
148 	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
149 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
150 	else
151 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
152 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
153 }
154 
155 
156 static bool a4xx_me_init(struct msm_gpu *gpu)
157 {
158 	struct msm_ringbuffer *ring = gpu->rb[0];
159 
160 	OUT_PKT3(ring, CP_ME_INIT, 17);
161 	OUT_RING(ring, 0x000003f7);
162 	OUT_RING(ring, 0x00000000);
163 	OUT_RING(ring, 0x00000000);
164 	OUT_RING(ring, 0x00000000);
165 	OUT_RING(ring, 0x00000080);
166 	OUT_RING(ring, 0x00000100);
167 	OUT_RING(ring, 0x00000180);
168 	OUT_RING(ring, 0x00006600);
169 	OUT_RING(ring, 0x00000150);
170 	OUT_RING(ring, 0x0000014e);
171 	OUT_RING(ring, 0x00000154);
172 	OUT_RING(ring, 0x00000001);
173 	OUT_RING(ring, 0x00000000);
174 	OUT_RING(ring, 0x00000000);
175 	OUT_RING(ring, 0x00000000);
176 	OUT_RING(ring, 0x00000000);
177 	OUT_RING(ring, 0x00000000);
178 
179 	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
180 	return a4xx_idle(gpu);
181 }
182 
183 static int a4xx_hw_init(struct msm_gpu *gpu)
184 {
185 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
186 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
187 	uint32_t *ptr, len;
188 	int i, ret;
189 
190 	if (adreno_is_a405(adreno_gpu)) {
191 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
192 	} else if (adreno_is_a420(adreno_gpu)) {
193 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
194 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
195 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
196 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
197 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
198 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
199 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
200 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
201 	} else if (adreno_is_a430(adreno_gpu)) {
202 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
203 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
204 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
205 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
206 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
207 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
208 	} else {
209 		BUG();
210 	}
211 
212 	/* Make all blocks contribute to the GPU BUSY perf counter */
213 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
214 
215 	/* Tune the hystersis counters for SP and CP idle detection */
216 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
217 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
218 
219 	if (adreno_is_a430(adreno_gpu)) {
220 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
221 	}
222 
223 	 /* Enable the RBBM error reporting bits */
224 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
225 
226 	/* Enable AHB error reporting*/
227 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
228 
229 	/* Enable power counters*/
230 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
231 
232 	/*
233 	 * Turn on hang detection - this spews a lot of useful information
234 	 * into the RBBM registers on a hang:
235 	 */
236 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
237 			(1 << 30) | 0xFFFF);
238 
239 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
240 			(unsigned int)(a4xx_gpu->ocmem.base >> 14));
241 
242 	/* Turn on performance counters: */
243 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
244 
245 	/* use the first CP counter for timestamp queries.. userspace may set
246 	 * this as well but it selects the same counter/countable:
247 	 */
248 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
249 
250 	if (adreno_is_a430(adreno_gpu))
251 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
252 
253 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
254 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
255 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
256 
257 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
258 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
259 
260 	/* On A430 enable SP regfile sleep for power savings */
261 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
262 	if (!adreno_is_a420(adreno_gpu)) {
263 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
264 			0x00000441);
265 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
266 			0x00000441);
267 	}
268 
269 	a4xx_enable_hwcg(gpu);
270 
271 	/*
272 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
273 	 * due to timing issue with HLSQ_TP_CLK_EN
274 	 */
275 	if (adreno_is_a420(adreno_gpu)) {
276 		unsigned int val;
277 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
278 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
279 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
280 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
281 	}
282 
283 	/* setup access protection: */
284 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
285 
286 	/* RBBM registers */
287 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
288 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
289 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
290 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
291 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
292 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
293 
294 	/* CP registers */
295 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
296 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
297 
298 
299 	/* RB registers */
300 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
301 
302 	/* HLSQ registers */
303 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
304 
305 	/* VPC registers */
306 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
307 
308 	/* SMMU registers */
309 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
310 
311 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
312 
313 	ret = adreno_hw_init(gpu);
314 	if (ret)
315 		return ret;
316 
317 	/*
318 	 * Use the default ringbuffer size and block size but disable the RPTR
319 	 * shadow
320 	 */
321 	gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
322 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
323 
324 	/* Set the ringbuffer address */
325 	gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
326 
327 	/* Load PM4: */
328 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
329 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
330 	DBG("loading PM4 ucode version: %u", ptr[0]);
331 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
332 	for (i = 1; i < len; i++)
333 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
334 
335 	/* Load PFP: */
336 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
337 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
338 	DBG("loading PFP ucode version: %u", ptr[0]);
339 
340 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
341 	for (i = 1; i < len; i++)
342 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
343 
344 	/* clear ME_HALT to start micro engine */
345 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
346 
347 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
348 }
349 
350 static void a4xx_recover(struct msm_gpu *gpu)
351 {
352 	int i;
353 
354 	adreno_dump_info(gpu);
355 
356 	for (i = 0; i < 8; i++) {
357 		printk("CP_SCRATCH_REG%d: %u\n", i,
358 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
359 	}
360 
361 	/* dump registers before resetting gpu, if enabled: */
362 	if (hang_debug)
363 		a4xx_dump(gpu);
364 
365 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
366 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
367 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
368 	adreno_recover(gpu);
369 }
370 
371 static void a4xx_destroy(struct msm_gpu *gpu)
372 {
373 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
374 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
375 
376 	DBG("%s", gpu->name);
377 
378 	adreno_gpu_cleanup(adreno_gpu);
379 
380 	adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
381 
382 	kfree(a4xx_gpu);
383 }
384 
385 static bool a4xx_idle(struct msm_gpu *gpu)
386 {
387 	/* wait for ringbuffer to drain: */
388 	if (!adreno_idle(gpu, gpu->rb[0]))
389 		return false;
390 
391 	/* then wait for GPU to finish: */
392 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
393 					A4XX_RBBM_STATUS_GPU_BUSY))) {
394 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
395 		/* TODO maybe we need to reset GPU here to recover from hang? */
396 		return false;
397 	}
398 
399 	return true;
400 }
401 
402 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
403 {
404 	uint32_t status;
405 
406 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
407 	DBG("%s: Int status %08x", gpu->name, status);
408 
409 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
410 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
411 		printk("CP | Protected mode error| %s | addr=%x\n",
412 			reg & (1 << 24) ? "WRITE" : "READ",
413 			(reg & 0xFFFFF) >> 2);
414 	}
415 
416 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
417 
418 	msm_gpu_retire(gpu);
419 
420 	return IRQ_HANDLED;
421 }
422 
423 static const unsigned int a4xx_registers[] = {
424 	/* RBBM */
425 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
426 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
427 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
428 	/* CP */
429 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
430 	0x0578, 0x058F,
431 	/* VSC */
432 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
433 	/* GRAS */
434 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
435 	/* RB */
436 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
437 	/* PC */
438 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
439 	/* VFD */
440 	0x0E40, 0x0E4A,
441 	/* VPC */
442 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
443 	/* UCHE */
444 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
445 	/* VMIDMT */
446 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
447 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
448 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
449 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
450 	0x1380, 0x1380,
451 	/* GRAS CTX 0 */
452 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
453 	/* PC CTX 0 */
454 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
455 	/* VFD CTX 0 */
456 	0x2200, 0x2204, 0x2208, 0x22A9,
457 	/* GRAS CTX 1 */
458 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
459 	/* PC CTX 1 */
460 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
461 	/* VFD CTX 1 */
462 	0x2600, 0x2604, 0x2608, 0x26A9,
463 	/* XPU */
464 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
465 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
466 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
467 	/* VBIF */
468 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
469 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
470 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
471 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
472 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
473 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
474 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
475 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
476 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
477 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
478 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
479 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
480 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
481 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
482 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
483 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
484 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
485 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
486 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
487 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
488 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
489 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
490 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
491 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
492 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
493 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
494 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
495 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
496 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
497 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
498 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
499 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
500 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
501 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
502 	~0 /* sentinel */
503 };
504 
505 static const unsigned int a405_registers[] = {
506 	/* RBBM */
507 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
508 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
509 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
510 	/* CP */
511 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
512 	0x0578, 0x058F,
513 	/* VSC */
514 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
515 	/* GRAS */
516 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
517 	/* RB */
518 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
519 	/* PC */
520 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
521 	/* VFD */
522 	0x0E40, 0x0E4A,
523 	/* VPC */
524 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
525 	/* UCHE */
526 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
527 	/* GRAS CTX 0 */
528 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
529 	/* PC CTX 0 */
530 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
531 	/* VFD CTX 0 */
532 	0x2200, 0x2204, 0x2208, 0x22A9,
533 	/* GRAS CTX 1 */
534 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
535 	/* PC CTX 1 */
536 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
537 	/* VFD CTX 1 */
538 	0x2600, 0x2604, 0x2608, 0x26A9,
539 	/* VBIF version 0x20050000*/
540 	0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
541 	0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
542 	0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
543 	0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
544 	0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
545 	0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
546 	0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
547 	0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
548 	~0 /* sentinel */
549 };
550 
551 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
552 {
553 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
554 
555 	if (!state)
556 		return ERR_PTR(-ENOMEM);
557 
558 	adreno_gpu_state_get(gpu, state);
559 
560 	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
561 
562 	return state;
563 }
564 
565 static void a4xx_dump(struct msm_gpu *gpu)
566 {
567 	printk("status:   %08x\n",
568 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
569 	adreno_dump(gpu);
570 }
571 
572 static int a4xx_pm_resume(struct msm_gpu *gpu) {
573 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
574 	int ret;
575 
576 	ret = msm_gpu_pm_resume(gpu);
577 	if (ret)
578 		return ret;
579 
580 	if (adreno_is_a430(adreno_gpu)) {
581 		unsigned int reg;
582 		/* Set the default register values; set SW_COLLAPSE to 0 */
583 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
584 		do {
585 			udelay(5);
586 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
587 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
588 	}
589 	return 0;
590 }
591 
592 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
593 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
594 	int ret;
595 
596 	ret = msm_gpu_pm_suspend(gpu);
597 	if (ret)
598 		return ret;
599 
600 	if (adreno_is_a430(adreno_gpu)) {
601 		/* Set the default register values; set SW_COLLAPSE to 1 */
602 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
603 	}
604 	return 0;
605 }
606 
607 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
608 {
609 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO);
610 
611 	return 0;
612 }
613 
614 static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
615 {
616 	ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
617 	return ring->memptrs->rptr;
618 }
619 
620 static const struct adreno_gpu_funcs funcs = {
621 	.base = {
622 		.get_param = adreno_get_param,
623 		.set_param = adreno_set_param,
624 		.hw_init = a4xx_hw_init,
625 		.pm_suspend = a4xx_pm_suspend,
626 		.pm_resume = a4xx_pm_resume,
627 		.recover = a4xx_recover,
628 		.submit = a4xx_submit,
629 		.active_ring = adreno_active_ring,
630 		.irq = a4xx_irq,
631 		.destroy = a4xx_destroy,
632 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
633 		.show = adreno_show,
634 #endif
635 		.gpu_state_get = a4xx_gpu_state_get,
636 		.gpu_state_put = adreno_gpu_state_put,
637 		.create_address_space = adreno_create_address_space,
638 		.get_rptr = a4xx_get_rptr,
639 	},
640 	.get_timestamp = a4xx_get_timestamp,
641 };
642 
643 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
644 {
645 	struct a4xx_gpu *a4xx_gpu = NULL;
646 	struct adreno_gpu *adreno_gpu;
647 	struct msm_gpu *gpu;
648 	struct msm_drm_private *priv = dev->dev_private;
649 	struct platform_device *pdev = priv->gpu_pdev;
650 	struct icc_path *ocmem_icc_path;
651 	struct icc_path *icc_path;
652 	int ret;
653 
654 	if (!pdev) {
655 		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
656 		ret = -ENXIO;
657 		goto fail;
658 	}
659 
660 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
661 	if (!a4xx_gpu) {
662 		ret = -ENOMEM;
663 		goto fail;
664 	}
665 
666 	adreno_gpu = &a4xx_gpu->base;
667 	gpu = &adreno_gpu->base;
668 
669 	gpu->perfcntrs = NULL;
670 	gpu->num_perfcntrs = 0;
671 
672 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
673 	if (ret)
674 		goto fail;
675 
676 	adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
677 							     a4xx_registers;
678 
679 	/* if needed, allocate gmem: */
680 	ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
681 				    &a4xx_gpu->ocmem);
682 	if (ret)
683 		goto fail;
684 
685 	if (!gpu->aspace) {
686 		/* TODO we think it is possible to configure the GPU to
687 		 * restrict access to VRAM carveout.  But the required
688 		 * registers are unknown.  For now just bail out and
689 		 * limp along with just modesetting.  If it turns out
690 		 * to not be possible to restrict access, then we must
691 		 * implement a cmdstream validator.
692 		 */
693 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
694 		if (!allow_vram_carveout) {
695 			ret = -ENXIO;
696 			goto fail;
697 		}
698 	}
699 
700 	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
701 	if (IS_ERR(icc_path)) {
702 		ret = PTR_ERR(icc_path);
703 		goto fail;
704 	}
705 
706 	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
707 	if (IS_ERR(ocmem_icc_path)) {
708 		ret = PTR_ERR(ocmem_icc_path);
709 		/* allow -ENODATA, ocmem icc is optional */
710 		if (ret != -ENODATA)
711 			goto fail;
712 		ocmem_icc_path = NULL;
713 	}
714 
715 	/*
716 	 * Set the ICC path to maximum speed for now by multiplying the fastest
717 	 * frequency by the bus width (8). We'll want to scale this later on to
718 	 * improve battery life.
719 	 */
720 	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
721 	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
722 
723 	return gpu;
724 
725 fail:
726 	if (a4xx_gpu)
727 		a4xx_destroy(&a4xx_gpu->base.base);
728 
729 	return ERR_PTR(ret);
730 }
731