xref: /linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3  */
4 #include "a4xx_gpu.h"
5 
6 #define A4XX_INT0_MASK \
7 	(A4XX_INT0_RBBM_AHB_ERROR |        \
8 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
10 	 A4XX_INT0_CP_OPCODE_ERROR |       \
11 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12 	 A4XX_INT0_CP_HW_FAULT |           \
13 	 A4XX_INT0_CP_IB1_INT |            \
14 	 A4XX_INT0_CP_IB2_INT |            \
15 	 A4XX_INT0_CP_RB_INT |             \
16 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
17 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
18 	 A4XX_INT0_CACHE_FLUSH_TS |        \
19 	 A4XX_INT0_UCHE_OOB_ACCESS)
20 
21 extern bool hang_debug;
22 static void a4xx_dump(struct msm_gpu *gpu);
23 static bool a4xx_idle(struct msm_gpu *gpu);
24 
25 static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
26 {
27 	struct msm_ringbuffer *ring = submit->ring;
28 	unsigned int i;
29 
30 	for (i = 0; i < submit->nr_cmds; i++) {
31 		switch (submit->cmd[i].type) {
32 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
33 			/* ignore IB-targets */
34 			break;
35 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
36 			/* ignore if there has not been a ctx switch: */
37 			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
38 				break;
39 			fallthrough;
40 		case MSM_SUBMIT_CMD_BUF:
41 			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
42 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
43 			OUT_RING(ring, submit->cmd[i].size);
44 			OUT_PKT2(ring);
45 			break;
46 		}
47 	}
48 
49 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
50 	OUT_RING(ring, submit->seqno);
51 
52 	/* Flush HLSQ lazy updates to make sure there is nothing
53 	 * pending for indirect loads after the timestamp has
54 	 * passed:
55 	 */
56 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
57 	OUT_RING(ring, HLSQ_FLUSH);
58 
59 	/* wait for idle before cache flush/interrupt */
60 	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
61 	OUT_RING(ring, 0x00000000);
62 
63 	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
64 	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
65 	OUT_RING(ring, CACHE_FLUSH_TS | CP_EVENT_WRITE_0_IRQ);
66 	OUT_RING(ring, rbmemptr(ring, fence));
67 	OUT_RING(ring, submit->seqno);
68 
69 	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
70 }
71 
72 /*
73  * a4xx_enable_hwcg() - Program the clock control registers
74  * @device: The adreno device pointer
75  */
76 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
77 {
78 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
79 	unsigned int i;
80 	for (i = 0; i < 4; i++)
81 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
82 	for (i = 0; i < 4; i++)
83 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
84 	for (i = 0; i < 4; i++)
85 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
86 	for (i = 0; i < 4; i++)
87 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
88 	for (i = 0; i < 4; i++)
89 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
90 	for (i = 0; i < 4; i++)
91 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
92 	for (i = 0; i < 4; i++)
93 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
94 	for (i = 0; i < 4; i++)
95 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
96 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
97 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
98 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
99 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
100 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
101 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
102 	for (i = 0; i < 4; i++)
103 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
104 
105 	/* Disable L1 clocking in A420 due to CCU issues with it */
106 	for (i = 0; i < 4; i++) {
107 		if (adreno_is_a420(adreno_gpu)) {
108 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
109 					0x00002020);
110 		} else {
111 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
112 					0x00022020);
113 		}
114 	}
115 
116 	/* No CCU for A405 */
117 	if (!adreno_is_a405(adreno_gpu)) {
118 		for (i = 0; i < 4; i++) {
119 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
120 					0x00000922);
121 		}
122 
123 		for (i = 0; i < 4; i++) {
124 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
125 					0x00000000);
126 		}
127 
128 		for (i = 0; i < 4; i++) {
129 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
130 					0x00000001);
131 		}
132 	}
133 
134 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
135 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
136 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
137 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
138 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
139 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
140 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
141 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
142 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
143 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
144 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
145 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
146 	/* Early A430's have a timing issue with SP/TP power collapse;
147 	   disabling HW clock gating prevents it. */
148 	if (adreno_is_a430(adreno_gpu) && adreno_patchid(adreno_gpu) < 2)
149 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
150 	else
151 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
152 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
153 }
154 
155 
156 static bool a4xx_me_init(struct msm_gpu *gpu)
157 {
158 	struct msm_ringbuffer *ring = gpu->rb[0];
159 
160 	OUT_PKT3(ring, CP_ME_INIT, 17);
161 	OUT_RING(ring, 0x000003f7);
162 	OUT_RING(ring, 0x00000000);
163 	OUT_RING(ring, 0x00000000);
164 	OUT_RING(ring, 0x00000000);
165 	OUT_RING(ring, 0x00000080);
166 	OUT_RING(ring, 0x00000100);
167 	OUT_RING(ring, 0x00000180);
168 	OUT_RING(ring, 0x00006600);
169 	OUT_RING(ring, 0x00000150);
170 	OUT_RING(ring, 0x0000014e);
171 	OUT_RING(ring, 0x00000154);
172 	OUT_RING(ring, 0x00000001);
173 	OUT_RING(ring, 0x00000000);
174 	OUT_RING(ring, 0x00000000);
175 	OUT_RING(ring, 0x00000000);
176 	OUT_RING(ring, 0x00000000);
177 	OUT_RING(ring, 0x00000000);
178 
179 	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
180 	return a4xx_idle(gpu);
181 }
182 
183 static int a4xx_hw_init(struct msm_gpu *gpu)
184 {
185 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
186 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
187 	uint32_t *ptr, len;
188 	int i, ret;
189 
190 	if (adreno_is_a405(adreno_gpu)) {
191 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
192 	} else if (adreno_is_a420(adreno_gpu)) {
193 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
194 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
195 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
196 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
197 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
198 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
199 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
200 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
201 	} else if (adreno_is_a430(adreno_gpu)) {
202 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
203 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
204 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
205 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
206 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
207 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
208 	} else {
209 		BUG();
210 	}
211 
212 	/* Make all blocks contribute to the GPU BUSY perf counter */
213 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
214 
215 	/* Tune the hystersis counters for SP and CP idle detection */
216 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
217 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
218 
219 	if (adreno_is_a430(adreno_gpu)) {
220 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
221 	}
222 
223 	 /* Enable the RBBM error reporting bits */
224 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
225 
226 	/* Enable AHB error reporting*/
227 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
228 
229 	/* Enable power counters*/
230 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
231 
232 	/*
233 	 * Turn on hang detection - this spews a lot of useful information
234 	 * into the RBBM registers on a hang:
235 	 */
236 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
237 			(1 << 30) | 0xFFFF);
238 
239 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
240 			(unsigned int)(a4xx_gpu->ocmem.base >> 14));
241 
242 	/* Turn on performance counters: */
243 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
244 
245 	/* use the first CP counter for timestamp queries.. userspace may set
246 	 * this as well but it selects the same counter/countable:
247 	 */
248 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
249 
250 	if (adreno_is_a430(adreno_gpu))
251 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
252 
253 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
254 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
255 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
256 
257 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
258 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
259 
260 	/* On A430 enable SP regfile sleep for power savings */
261 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
262 	if (!adreno_is_a420(adreno_gpu)) {
263 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
264 			0x00000441);
265 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
266 			0x00000441);
267 	}
268 
269 	a4xx_enable_hwcg(gpu);
270 
271 	/*
272 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
273 	 * due to timing issue with HLSQ_TP_CLK_EN
274 	 */
275 	if (adreno_is_a420(adreno_gpu)) {
276 		unsigned int val;
277 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
278 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
279 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
280 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
281 	}
282 
283 	/* setup access protection: */
284 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
285 
286 	/* RBBM registers */
287 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
288 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
289 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
290 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
291 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
292 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
293 
294 	/* CP registers */
295 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
296 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
297 
298 
299 	/* RB registers */
300 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
301 
302 	/* HLSQ registers */
303 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
304 
305 	/* VPC registers */
306 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
307 
308 	/* SMMU registers */
309 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
310 
311 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
312 
313 	ret = adreno_hw_init(gpu);
314 	if (ret)
315 		return ret;
316 
317 	/*
318 	 * Use the default ringbuffer size and block size but disable the RPTR
319 	 * shadow
320 	 */
321 	gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
322 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
323 
324 	/* Set the ringbuffer address */
325 	gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
326 
327 	/* Load PM4: */
328 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
329 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
330 	DBG("loading PM4 ucode version: %u", ptr[0]);
331 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
332 	for (i = 1; i < len; i++)
333 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
334 
335 	/* Load PFP: */
336 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
337 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
338 	DBG("loading PFP ucode version: %u", ptr[0]);
339 
340 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
341 	for (i = 1; i < len; i++)
342 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
343 
344 	/* clear ME_HALT to start micro engine */
345 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
346 
347 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
348 }
349 
350 static void a4xx_recover(struct msm_gpu *gpu)
351 {
352 	int i;
353 
354 	adreno_dump_info(gpu);
355 
356 	for (i = 0; i < 8; i++) {
357 		printk("CP_SCRATCH_REG%d: %u\n", i,
358 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
359 	}
360 
361 	/* dump registers before resetting gpu, if enabled: */
362 	if (hang_debug)
363 		a4xx_dump(gpu);
364 
365 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
366 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
367 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
368 	adreno_recover(gpu);
369 }
370 
371 static void a4xx_destroy(struct msm_gpu *gpu)
372 {
373 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
374 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
375 
376 	DBG("%s", gpu->name);
377 
378 	adreno_gpu_cleanup(adreno_gpu);
379 
380 	adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
381 
382 	kfree(a4xx_gpu);
383 }
384 
385 static bool a4xx_idle(struct msm_gpu *gpu)
386 {
387 	/* wait for ringbuffer to drain: */
388 	if (!adreno_idle(gpu, gpu->rb[0]))
389 		return false;
390 
391 	/* then wait for GPU to finish: */
392 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
393 					A4XX_RBBM_STATUS_GPU_BUSY))) {
394 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
395 		/* TODO maybe we need to reset GPU here to recover from hang? */
396 		return false;
397 	}
398 
399 	return true;
400 }
401 
402 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
403 {
404 	uint32_t status;
405 
406 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
407 	DBG("%s: Int status %08x", gpu->name, status);
408 
409 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
410 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
411 		printk("CP | Protected mode error| %s | addr=%x\n",
412 			reg & (1 << 24) ? "WRITE" : "READ",
413 			(reg & 0xFFFFF) >> 2);
414 	}
415 
416 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
417 
418 	msm_gpu_retire(gpu);
419 
420 	return IRQ_HANDLED;
421 }
422 
423 static const unsigned int a4xx_registers[] = {
424 	/* RBBM */
425 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
426 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
427 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
428 	/* CP */
429 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
430 	0x0578, 0x058F,
431 	/* VSC */
432 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
433 	/* GRAS */
434 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
435 	/* RB */
436 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
437 	/* PC */
438 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
439 	/* VFD */
440 	0x0E40, 0x0E4A,
441 	/* VPC */
442 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
443 	/* UCHE */
444 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
445 	/* VMIDMT */
446 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
447 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
448 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
449 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
450 	0x1380, 0x1380,
451 	/* GRAS CTX 0 */
452 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
453 	/* PC CTX 0 */
454 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
455 	/* VFD CTX 0 */
456 	0x2200, 0x2204, 0x2208, 0x22A9,
457 	/* GRAS CTX 1 */
458 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
459 	/* PC CTX 1 */
460 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
461 	/* VFD CTX 1 */
462 	0x2600, 0x2604, 0x2608, 0x26A9,
463 	/* XPU */
464 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
465 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
466 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
467 	/* VBIF */
468 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
469 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
470 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
471 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
472 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
473 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
474 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
475 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
476 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
477 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
478 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
479 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
480 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
481 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
482 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
483 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
484 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
485 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
486 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
487 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
488 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
489 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
490 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
491 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
492 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
493 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
494 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
495 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
496 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
497 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
498 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
499 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
500 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
501 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
502 	~0 /* sentinel */
503 };
504 
505 static const unsigned int a405_registers[] = {
506 	/* RBBM */
507 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
508 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
509 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
510 	/* CP */
511 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
512 	0x0578, 0x058F,
513 	/* VSC */
514 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
515 	/* GRAS */
516 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
517 	/* RB */
518 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
519 	/* PC */
520 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
521 	/* VFD */
522 	0x0E40, 0x0E4A,
523 	/* VPC */
524 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
525 	/* UCHE */
526 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
527 	/* GRAS CTX 0 */
528 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
529 	/* PC CTX 0 */
530 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
531 	/* VFD CTX 0 */
532 	0x2200, 0x2204, 0x2208, 0x22A9,
533 	/* GRAS CTX 1 */
534 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
535 	/* PC CTX 1 */
536 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
537 	/* VFD CTX 1 */
538 	0x2600, 0x2604, 0x2608, 0x26A9,
539 	/* VBIF version 0x20050000*/
540 	0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
541 	0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
542 	0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
543 	0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
544 	0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
545 	0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
546 	0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
547 	0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
548 	~0 /* sentinel */
549 };
550 
551 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
552 {
553 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
554 
555 	if (!state)
556 		return ERR_PTR(-ENOMEM);
557 
558 	adreno_gpu_state_get(gpu, state);
559 
560 	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
561 
562 	return state;
563 }
564 
565 static void a4xx_dump(struct msm_gpu *gpu)
566 {
567 	printk("status:   %08x\n",
568 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
569 	adreno_dump(gpu);
570 }
571 
572 static int a4xx_pm_resume(struct msm_gpu *gpu) {
573 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
574 	int ret;
575 
576 	ret = msm_gpu_pm_resume(gpu);
577 	if (ret)
578 		return ret;
579 
580 	if (adreno_is_a430(adreno_gpu)) {
581 		unsigned int reg;
582 		/* Set the default register values; set SW_COLLAPSE to 0 */
583 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
584 		do {
585 			udelay(5);
586 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
587 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
588 	}
589 	return 0;
590 }
591 
592 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
593 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
594 	int ret;
595 
596 	ret = msm_gpu_pm_suspend(gpu);
597 	if (ret)
598 		return ret;
599 
600 	if (adreno_is_a430(adreno_gpu)) {
601 		/* Set the default register values; set SW_COLLAPSE to 1 */
602 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
603 	}
604 	return 0;
605 }
606 
607 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
608 {
609 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO);
610 
611 	return 0;
612 }
613 
614 static u64 a4xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
615 {
616 	u64 busy_cycles;
617 
618 	busy_cycles = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_RBBM_1_LO);
619 	*out_sample_rate = clk_get_rate(gpu->core_clk);
620 
621 	return busy_cycles;
622 }
623 
624 static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
625 {
626 	ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
627 	return ring->memptrs->rptr;
628 }
629 
630 static const struct adreno_gpu_funcs funcs = {
631 	.base = {
632 		.get_param = adreno_get_param,
633 		.set_param = adreno_set_param,
634 		.hw_init = a4xx_hw_init,
635 		.pm_suspend = a4xx_pm_suspend,
636 		.pm_resume = a4xx_pm_resume,
637 		.recover = a4xx_recover,
638 		.submit = a4xx_submit,
639 		.active_ring = adreno_active_ring,
640 		.irq = a4xx_irq,
641 		.destroy = a4xx_destroy,
642 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
643 		.show = adreno_show,
644 #endif
645 		.gpu_busy = a4xx_gpu_busy,
646 		.gpu_state_get = a4xx_gpu_state_get,
647 		.gpu_state_put = adreno_gpu_state_put,
648 		.create_address_space = adreno_create_address_space,
649 		.get_rptr = a4xx_get_rptr,
650 	},
651 	.get_timestamp = a4xx_get_timestamp,
652 };
653 
654 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
655 {
656 	struct a4xx_gpu *a4xx_gpu = NULL;
657 	struct adreno_gpu *adreno_gpu;
658 	struct msm_gpu *gpu;
659 	struct msm_drm_private *priv = dev->dev_private;
660 	struct platform_device *pdev = priv->gpu_pdev;
661 	struct icc_path *ocmem_icc_path;
662 	struct icc_path *icc_path;
663 	int ret;
664 
665 	if (!pdev) {
666 		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
667 		ret = -ENXIO;
668 		goto fail;
669 	}
670 
671 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
672 	if (!a4xx_gpu) {
673 		ret = -ENOMEM;
674 		goto fail;
675 	}
676 
677 	adreno_gpu = &a4xx_gpu->base;
678 	gpu = &adreno_gpu->base;
679 
680 	gpu->perfcntrs = NULL;
681 	gpu->num_perfcntrs = 0;
682 
683 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
684 	if (ret)
685 		goto fail;
686 
687 	adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
688 							     a4xx_registers;
689 
690 	/* if needed, allocate gmem: */
691 	ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
692 				    &a4xx_gpu->ocmem);
693 	if (ret)
694 		goto fail;
695 
696 	if (!gpu->aspace) {
697 		/* TODO we think it is possible to configure the GPU to
698 		 * restrict access to VRAM carveout.  But the required
699 		 * registers are unknown.  For now just bail out and
700 		 * limp along with just modesetting.  If it turns out
701 		 * to not be possible to restrict access, then we must
702 		 * implement a cmdstream validator.
703 		 */
704 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
705 		if (!allow_vram_carveout) {
706 			ret = -ENXIO;
707 			goto fail;
708 		}
709 	}
710 
711 	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
712 	if (IS_ERR(icc_path)) {
713 		ret = PTR_ERR(icc_path);
714 		goto fail;
715 	}
716 
717 	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
718 	if (IS_ERR(ocmem_icc_path)) {
719 		ret = PTR_ERR(ocmem_icc_path);
720 		/* allow -ENODATA, ocmem icc is optional */
721 		if (ret != -ENODATA)
722 			goto fail;
723 		ocmem_icc_path = NULL;
724 	}
725 
726 	/*
727 	 * Set the ICC path to maximum speed for now by multiplying the fastest
728 	 * frequency by the bus width (8). We'll want to scale this later on to
729 	 * improve battery life.
730 	 */
731 	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
732 	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
733 
734 	return gpu;
735 
736 fail:
737 	if (a4xx_gpu)
738 		a4xx_destroy(&a4xx_gpu->base.base);
739 
740 	return ERR_PTR(ret);
741 }
742