xref: /linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision f6828e0c4045f03f9cf2df6c2a768102641183f4)
1caab277bSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
27198e6b0SRob Clark /*
37198e6b0SRob Clark  * Copyright (C) 2013 Red Hat
47198e6b0SRob Clark  * Author: Rob Clark <robdclark@gmail.com>
57198e6b0SRob Clark  *
691b74e97SAravind Ganesan  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
77198e6b0SRob Clark  */
87198e6b0SRob Clark 
97198e6b0SRob Clark #include "a3xx_gpu.h"
107198e6b0SRob Clark 
117198e6b0SRob Clark #define A3XX_INT0_MASK \
127198e6b0SRob Clark 	(A3XX_INT0_RBBM_AHB_ERROR |        \
137198e6b0SRob Clark 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
147198e6b0SRob Clark 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
157198e6b0SRob Clark 	 A3XX_INT0_CP_OPCODE_ERROR |       \
167198e6b0SRob Clark 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
177198e6b0SRob Clark 	 A3XX_INT0_CP_HW_FAULT |           \
187198e6b0SRob Clark 	 A3XX_INT0_CP_IB1_INT |            \
197198e6b0SRob Clark 	 A3XX_INT0_CP_IB2_INT |            \
207198e6b0SRob Clark 	 A3XX_INT0_CP_RB_INT |             \
217198e6b0SRob Clark 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
227198e6b0SRob Clark 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
2379d57bf6SBjorn Andersson 	 A3XX_INT0_CACHE_FLUSH_TS |        \
247198e6b0SRob Clark 	 A3XX_INT0_UCHE_OOB_ACCESS)
257198e6b0SRob Clark 
263526e9fbSRob Clark extern bool hang_debug;
275b6ef08eSRob Clark 
285b6ef08eSRob Clark static void a3xx_dump(struct msm_gpu *gpu);
29e895c7bdSJordan Crouse static bool a3xx_idle(struct msm_gpu *gpu);
305b6ef08eSRob Clark 
31c4a8d475SJordan Crouse static bool a3xx_me_init(struct msm_gpu *gpu)
327198e6b0SRob Clark {
33f97decacSJordan Crouse 	struct msm_ringbuffer *ring = gpu->rb[0];
347198e6b0SRob Clark 
357198e6b0SRob Clark 	OUT_PKT3(ring, CP_ME_INIT, 17);
367198e6b0SRob Clark 	OUT_RING(ring, 0x000003f7);
377198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
387198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
397198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
407198e6b0SRob Clark 	OUT_RING(ring, 0x00000080);
417198e6b0SRob Clark 	OUT_RING(ring, 0x00000100);
427198e6b0SRob Clark 	OUT_RING(ring, 0x00000180);
437198e6b0SRob Clark 	OUT_RING(ring, 0x00006600);
447198e6b0SRob Clark 	OUT_RING(ring, 0x00000150);
457198e6b0SRob Clark 	OUT_RING(ring, 0x0000014e);
467198e6b0SRob Clark 	OUT_RING(ring, 0x00000154);
477198e6b0SRob Clark 	OUT_RING(ring, 0x00000001);
487198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
497198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
507198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
517198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
527198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
537198e6b0SRob Clark 
54f97decacSJordan Crouse 	gpu->funcs->flush(gpu, ring);
55e895c7bdSJordan Crouse 	return a3xx_idle(gpu);
567198e6b0SRob Clark }
577198e6b0SRob Clark 
587198e6b0SRob Clark static int a3xx_hw_init(struct msm_gpu *gpu)
597198e6b0SRob Clark {
607198e6b0SRob Clark 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
6155459968SRob Clark 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
627198e6b0SRob Clark 	uint32_t *ptr, len;
637198e6b0SRob Clark 	int i, ret;
647198e6b0SRob Clark 
657198e6b0SRob Clark 	DBG("%s", gpu->name);
667198e6b0SRob Clark 
677198e6b0SRob Clark 	if (adreno_is_a305(adreno_gpu)) {
687198e6b0SRob Clark 		/* Set up 16 deep read/write request queues: */
697198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
707198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
717198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
727198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
737198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
747198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
757198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
767198e6b0SRob Clark 		/* Enable WR-REQ: */
777198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
787198e6b0SRob Clark 		/* Set up round robin arbitration between both AXI ports: */
797198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
807198e6b0SRob Clark 		/* Set up AOOO: */
817198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
827198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
83de558cd2SRob Clark 	} else if (adreno_is_a306(adreno_gpu)) {
84de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
85de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
86de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
877198e6b0SRob Clark 	} else if (adreno_is_a320(adreno_gpu)) {
887198e6b0SRob Clark 		/* Set up 16 deep read/write request queues: */
897198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
907198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
917198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
927198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
937198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
947198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
957198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
967198e6b0SRob Clark 		/* Enable WR-REQ: */
977198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
987198e6b0SRob Clark 		/* Set up round robin arbitration between both AXI ports: */
997198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
1007198e6b0SRob Clark 		/* Set up AOOO: */
1017198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
1027198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
1037198e6b0SRob Clark 		/* Enable 1K sort: */
1047198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
1057198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
1067198e6b0SRob Clark 
10755459968SRob Clark 	} else if (adreno_is_a330v2(adreno_gpu)) {
10855459968SRob Clark 		/*
10955459968SRob Clark 		 * Most of the VBIF registers on 8974v2 have the correct
11055459968SRob Clark 		 * values at power on, so we won't modify those if we don't
11155459968SRob Clark 		 * need to
11255459968SRob Clark 		 */
11355459968SRob Clark 		/* Enable 1k sort: */
11455459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
11555459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
11655459968SRob Clark 		/* Enable WR-REQ: */
11755459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
11855459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
11955459968SRob Clark 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
12055459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
12155459968SRob Clark 
1227198e6b0SRob Clark 	} else if (adreno_is_a330(adreno_gpu)) {
1237198e6b0SRob Clark 		/* Set up 16 deep read/write request queues: */
1247198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
1257198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
1267198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
1277198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
1287198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
1297198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
1307198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
1317198e6b0SRob Clark 		/* Enable WR-REQ: */
1327198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
1337198e6b0SRob Clark 		/* Set up round robin arbitration between both AXI ports: */
1347198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
1357198e6b0SRob Clark 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
1367198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
1377198e6b0SRob Clark 		/* Set up AOOO: */
13855459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
13955459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
1407198e6b0SRob Clark 		/* Enable 1K sort: */
14155459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
1427198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
1437198e6b0SRob Clark 		/* Disable VBIF clock gating. This is to enable AXI running
1447198e6b0SRob Clark 		 * higher frequency than GPU:
1457198e6b0SRob Clark 		 */
1467198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
1477198e6b0SRob Clark 
1487198e6b0SRob Clark 	} else {
1497198e6b0SRob Clark 		BUG();
1507198e6b0SRob Clark 	}
1517198e6b0SRob Clark 
1527198e6b0SRob Clark 	/* Make all blocks contribute to the GPU BUSY perf counter: */
1537198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
1547198e6b0SRob Clark 
1557198e6b0SRob Clark 	/* Tune the hystersis counters for SP and CP idle detection: */
1567198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
1577198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
1587198e6b0SRob Clark 
1597198e6b0SRob Clark 	/* Enable the RBBM error reporting bits.  This lets us get
1607198e6b0SRob Clark 	 * useful information on failure:
1617198e6b0SRob Clark 	 */
1627198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
1637198e6b0SRob Clark 
1647198e6b0SRob Clark 	/* Enable AHB error reporting: */
1657198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
1667198e6b0SRob Clark 
1677198e6b0SRob Clark 	/* Turn on the power counters: */
1687198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
1697198e6b0SRob Clark 
1707198e6b0SRob Clark 	/* Turn on hang detection - this spews a lot of useful information
1717198e6b0SRob Clark 	 * into the RBBM registers on a hang:
1727198e6b0SRob Clark 	 */
1737198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
1747198e6b0SRob Clark 
1757198e6b0SRob Clark 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
1767198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
1777198e6b0SRob Clark 
1787198e6b0SRob Clark 	/* Enable Clock gating: */
179de558cd2SRob Clark 	if (adreno_is_a306(adreno_gpu))
180de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
181de558cd2SRob Clark 	else if (adreno_is_a320(adreno_gpu))
1827198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
18355459968SRob Clark 	else if (adreno_is_a330v2(adreno_gpu))
18455459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
18555459968SRob Clark 	else if (adreno_is_a330(adreno_gpu))
18655459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
1877198e6b0SRob Clark 
18855459968SRob Clark 	if (adreno_is_a330v2(adreno_gpu))
18955459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
19055459968SRob Clark 	else if (adreno_is_a330(adreno_gpu))
19155459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
19255459968SRob Clark 
19355459968SRob Clark 	/* Set the OCMEM base address for A330, etc */
19426c0b26dSBrian Masney 	if (a3xx_gpu->ocmem.hdl) {
19555459968SRob Clark 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
19626c0b26dSBrian Masney 			(unsigned int)(a3xx_gpu->ocmem.base >> 14));
19755459968SRob Clark 	}
1987198e6b0SRob Clark 
1997198e6b0SRob Clark 	/* Turn on performance counters: */
2007198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
2017198e6b0SRob Clark 
20270c70f09SRob Clark 	/* Enable the perfcntrs that we use.. */
20370c70f09SRob Clark 	for (i = 0; i < gpu->num_perfcntrs; i++) {
20470c70f09SRob Clark 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
20570c70f09SRob Clark 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
20670c70f09SRob Clark 	}
2077198e6b0SRob Clark 
2087198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
2097198e6b0SRob Clark 
2107198e6b0SRob Clark 	ret = adreno_hw_init(gpu);
2117198e6b0SRob Clark 	if (ret)
2127198e6b0SRob Clark 		return ret;
2137198e6b0SRob Clark 
214*f6828e0cSJordan Crouse 	/*
215*f6828e0cSJordan Crouse 	 * Use the default ringbuffer size and block size but disable the RPTR
216*f6828e0cSJordan Crouse 	 * shadow
217*f6828e0cSJordan Crouse 	 */
218*f6828e0cSJordan Crouse 	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
219*f6828e0cSJordan Crouse 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
220*f6828e0cSJordan Crouse 
221*f6828e0cSJordan Crouse 	/* Set the ringbuffer address */
222*f6828e0cSJordan Crouse 	gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
223*f6828e0cSJordan Crouse 
2247198e6b0SRob Clark 	/* setup access protection: */
2257198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
2267198e6b0SRob Clark 
2277198e6b0SRob Clark 	/* RBBM registers */
2287198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
2297198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
2307198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
2317198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
2327198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
2337198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
2347198e6b0SRob Clark 
2357198e6b0SRob Clark 	/* CP registers */
2367198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
2377198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
2387198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
2397198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
2407198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
2417198e6b0SRob Clark 
2427198e6b0SRob Clark 	/* RB registers */
2437198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
2447198e6b0SRob Clark 
2457198e6b0SRob Clark 	/* VBIF registers */
2467198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
2477198e6b0SRob Clark 
2487198e6b0SRob Clark 	/* NOTE: PM4/micro-engine firmware registers look to be the same
2497198e6b0SRob Clark 	 * for a2xx and a3xx.. we could possibly push that part down to
2507198e6b0SRob Clark 	 * adreno_gpu base class.  Or push both PM4 and PFP but
2517198e6b0SRob Clark 	 * parameterize the pfp ucode addr/data registers..
2527198e6b0SRob Clark 	 */
2537198e6b0SRob Clark 
2547198e6b0SRob Clark 	/* Load PM4: */
255c5e3548cSJordan Crouse 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
256c5e3548cSJordan Crouse 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
257e529c7e6SRob Clark 	DBG("loading PM4 ucode version: %x", ptr[1]);
2587198e6b0SRob Clark 
2597198e6b0SRob Clark 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
2607198e6b0SRob Clark 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
2617198e6b0SRob Clark 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
2627198e6b0SRob Clark 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
2637198e6b0SRob Clark 	for (i = 1; i < len; i++)
2647198e6b0SRob Clark 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
2657198e6b0SRob Clark 
2667198e6b0SRob Clark 	/* Load PFP: */
267c5e3548cSJordan Crouse 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
268c5e3548cSJordan Crouse 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
269e529c7e6SRob Clark 	DBG("loading PFP ucode version: %x", ptr[5]);
2707198e6b0SRob Clark 
2717198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
2727198e6b0SRob Clark 	for (i = 1; i < len; i++)
2737198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
2747198e6b0SRob Clark 
2757198e6b0SRob Clark 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
276de558cd2SRob Clark 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
277de558cd2SRob Clark 			adreno_is_a320(adreno_gpu)) {
2787198e6b0SRob Clark 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
2797198e6b0SRob Clark 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
2807198e6b0SRob Clark 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
2817198e6b0SRob Clark 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
28255459968SRob Clark 	} else if (adreno_is_a330(adreno_gpu)) {
28355459968SRob Clark 		/* NOTE: this (value take from downstream android driver)
28455459968SRob Clark 		 * includes some bits outside of the known bitfields.  But
28555459968SRob Clark 		 * A330 has this "MERCIU queue" thing too, which might
28655459968SRob Clark 		 * explain a new bitfield or reshuffling:
28755459968SRob Clark 		 */
28855459968SRob Clark 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
28955459968SRob Clark 	}
2907198e6b0SRob Clark 
2917198e6b0SRob Clark 	/* clear ME_HALT to start micro engine */
2927198e6b0SRob Clark 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
2937198e6b0SRob Clark 
294c4a8d475SJordan Crouse 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
2957198e6b0SRob Clark }
2967198e6b0SRob Clark 
29755459968SRob Clark static void a3xx_recover(struct msm_gpu *gpu)
29855459968SRob Clark {
299398efc46SRob Clark 	int i;
300398efc46SRob Clark 
30126716185SRob Clark 	adreno_dump_info(gpu);
30226716185SRob Clark 
303398efc46SRob Clark 	for (i = 0; i < 8; i++) {
304398efc46SRob Clark 		printk("CP_SCRATCH_REG%d: %u\n", i,
305398efc46SRob Clark 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
306398efc46SRob Clark 	}
307398efc46SRob Clark 
3085b6ef08eSRob Clark 	/* dump registers before resetting gpu, if enabled: */
3095b6ef08eSRob Clark 	if (hang_debug)
3105b6ef08eSRob Clark 		a3xx_dump(gpu);
31126716185SRob Clark 
31255459968SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
31355459968SRob Clark 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
31455459968SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
31555459968SRob Clark 	adreno_recover(gpu);
31655459968SRob Clark }
31755459968SRob Clark 
3187198e6b0SRob Clark static void a3xx_destroy(struct msm_gpu *gpu)
3197198e6b0SRob Clark {
3207198e6b0SRob Clark 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
3217198e6b0SRob Clark 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
3227198e6b0SRob Clark 
3237198e6b0SRob Clark 	DBG("%s", gpu->name);
3247198e6b0SRob Clark 
3257198e6b0SRob Clark 	adreno_gpu_cleanup(adreno_gpu);
32655459968SRob Clark 
32726c0b26dSBrian Masney 	adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
32855459968SRob Clark 
3297198e6b0SRob Clark 	kfree(a3xx_gpu);
3307198e6b0SRob Clark }
3317198e6b0SRob Clark 
332c4a8d475SJordan Crouse static bool a3xx_idle(struct msm_gpu *gpu)
3337198e6b0SRob Clark {
3347198e6b0SRob Clark 	/* wait for ringbuffer to drain: */
335f97decacSJordan Crouse 	if (!adreno_idle(gpu, gpu->rb[0]))
336c4a8d475SJordan Crouse 		return false;
3377198e6b0SRob Clark 
3387198e6b0SRob Clark 	/* then wait for GPU to finish: */
3390963756fSRob Clark 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
340c4a8d475SJordan Crouse 			A3XX_RBBM_STATUS_GPU_BUSY))) {
3410963756fSRob Clark 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
3427198e6b0SRob Clark 
3437198e6b0SRob Clark 		/* TODO maybe we need to reset GPU here to recover from hang? */
344c4a8d475SJordan Crouse 		return false;
345c4a8d475SJordan Crouse 	}
346c4a8d475SJordan Crouse 
347c4a8d475SJordan Crouse 	return true;
3487198e6b0SRob Clark }
3497198e6b0SRob Clark 
3507198e6b0SRob Clark static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
3517198e6b0SRob Clark {
3527198e6b0SRob Clark 	uint32_t status;
3537198e6b0SRob Clark 
3547198e6b0SRob Clark 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
3557198e6b0SRob Clark 	DBG("%s: %08x", gpu->name, status);
3567198e6b0SRob Clark 
3577198e6b0SRob Clark 	// TODO
3587198e6b0SRob Clark 
3597198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
3607198e6b0SRob Clark 
3617198e6b0SRob Clark 	msm_gpu_retire(gpu);
3627198e6b0SRob Clark 
3637198e6b0SRob Clark 	return IRQ_HANDLED;
3647198e6b0SRob Clark }
3657198e6b0SRob Clark 
3667198e6b0SRob Clark static const unsigned int a3xx_registers[] = {
3677198e6b0SRob Clark 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
3687198e6b0SRob Clark 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
3697198e6b0SRob Clark 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
3707198e6b0SRob Clark 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
3717198e6b0SRob Clark 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
3727198e6b0SRob Clark 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
3737198e6b0SRob Clark 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
3747198e6b0SRob Clark 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
3757198e6b0SRob Clark 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
3767198e6b0SRob Clark 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
3777198e6b0SRob Clark 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
3787198e6b0SRob Clark 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
3797198e6b0SRob Clark 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
3807198e6b0SRob Clark 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
3817198e6b0SRob Clark 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
3827198e6b0SRob Clark 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
3837198e6b0SRob Clark 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
3847198e6b0SRob Clark 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
3857198e6b0SRob Clark 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
3867198e6b0SRob Clark 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
3877198e6b0SRob Clark 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
3887198e6b0SRob Clark 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
3897198e6b0SRob Clark 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
390f47bee2bSRob Clark 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
391f47bee2bSRob Clark 	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
392f47bee2bSRob Clark 	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
393f47bee2bSRob Clark 	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
394f47bee2bSRob Clark 	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
395f47bee2bSRob Clark 	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
396f47bee2bSRob Clark 	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
397f47bee2bSRob Clark 	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
398f47bee2bSRob Clark 	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
399f47bee2bSRob Clark 	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
400f47bee2bSRob Clark 	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
4013bcefb04SRob Clark 	~0   /* sentinel */
4027198e6b0SRob Clark };
4037198e6b0SRob Clark 
4045b6ef08eSRob Clark /* would be nice to not have to duplicate the _show() stuff with printk(): */
4055b6ef08eSRob Clark static void a3xx_dump(struct msm_gpu *gpu)
4065b6ef08eSRob Clark {
4075b6ef08eSRob Clark 	printk("status:   %08x\n",
4085b6ef08eSRob Clark 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
4093bcefb04SRob Clark 	adreno_dump(gpu);
4105b6ef08eSRob Clark }
411e00e473dSJordan Crouse 
412e00e473dSJordan Crouse static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
413e00e473dSJordan Crouse {
41450f8d218SJordan Crouse 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
415e00e473dSJordan Crouse 
41650f8d218SJordan Crouse 	if (!state)
41750f8d218SJordan Crouse 		return ERR_PTR(-ENOMEM);
41850f8d218SJordan Crouse 
41950f8d218SJordan Crouse 	adreno_gpu_state_get(gpu, state);
420e00e473dSJordan Crouse 
421e00e473dSJordan Crouse 	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
422e00e473dSJordan Crouse 
423e00e473dSJordan Crouse 	return state;
424e00e473dSJordan Crouse }
425e00e473dSJordan Crouse 
42691b74e97SAravind Ganesan /* Register offset defines for A3XX */
42791b74e97SAravind Ganesan static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
42891b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
429fb039981SJordan Crouse 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
43091b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
431fb039981SJordan Crouse 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
43291b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
43391b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
43491b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
43591b74e97SAravind Ganesan };
4365b6ef08eSRob Clark 
4377198e6b0SRob Clark static const struct adreno_gpu_funcs funcs = {
4387198e6b0SRob Clark 	.base = {
4397198e6b0SRob Clark 		.get_param = adreno_get_param,
4407198e6b0SRob Clark 		.hw_init = a3xx_hw_init,
4417198e6b0SRob Clark 		.pm_suspend = msm_gpu_pm_suspend,
4427198e6b0SRob Clark 		.pm_resume = msm_gpu_pm_resume,
44355459968SRob Clark 		.recover = a3xx_recover,
4447198e6b0SRob Clark 		.submit = adreno_submit,
4457198e6b0SRob Clark 		.flush = adreno_flush,
446f97decacSJordan Crouse 		.active_ring = adreno_active_ring,
4477198e6b0SRob Clark 		.irq = a3xx_irq,
4487198e6b0SRob Clark 		.destroy = a3xx_destroy,
449c0fec7f5SJordan Crouse #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
4504f776f45SJordan Crouse 		.show = adreno_show,
4517198e6b0SRob Clark #endif
452e00e473dSJordan Crouse 		.gpu_state_get = a3xx_gpu_state_get,
453e00e473dSJordan Crouse 		.gpu_state_put = adreno_gpu_state_put,
454ccac7ce3SJordan Crouse 		.create_address_space = adreno_iommu_create_address_space,
4557198e6b0SRob Clark 	},
4567198e6b0SRob Clark };
4577198e6b0SRob Clark 
45870c70f09SRob Clark static const struct msm_gpu_perfcntr perfcntrs[] = {
45970c70f09SRob Clark 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
46070c70f09SRob Clark 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
46170c70f09SRob Clark 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
46270c70f09SRob Clark 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
46370c70f09SRob Clark };
46470c70f09SRob Clark 
4657198e6b0SRob Clark struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
4667198e6b0SRob Clark {
4677198e6b0SRob Clark 	struct a3xx_gpu *a3xx_gpu = NULL;
46855459968SRob Clark 	struct adreno_gpu *adreno_gpu;
4697198e6b0SRob Clark 	struct msm_gpu *gpu;
470060530f1SRob Clark 	struct msm_drm_private *priv = dev->dev_private;
471060530f1SRob Clark 	struct platform_device *pdev = priv->gpu_pdev;
4727198e6b0SRob Clark 	int ret;
4737198e6b0SRob Clark 
4747198e6b0SRob Clark 	if (!pdev) {
4756a41da17SMamta Shukla 		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
4767198e6b0SRob Clark 		ret = -ENXIO;
4777198e6b0SRob Clark 		goto fail;
4787198e6b0SRob Clark 	}
4797198e6b0SRob Clark 
4807198e6b0SRob Clark 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
4817198e6b0SRob Clark 	if (!a3xx_gpu) {
4827198e6b0SRob Clark 		ret = -ENOMEM;
4837198e6b0SRob Clark 		goto fail;
4847198e6b0SRob Clark 	}
4857198e6b0SRob Clark 
48655459968SRob Clark 	adreno_gpu = &a3xx_gpu->base;
48755459968SRob Clark 	gpu = &adreno_gpu->base;
4887198e6b0SRob Clark 
48970c70f09SRob Clark 	gpu->perfcntrs = perfcntrs;
49070c70f09SRob Clark 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
49170c70f09SRob Clark 
4923bcefb04SRob Clark 	adreno_gpu->registers = a3xx_registers;
49391b74e97SAravind Ganesan 	adreno_gpu->reg_offsets = a3xx_register_offsets;
4943bcefb04SRob Clark 
495f97decacSJordan Crouse 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
4967198e6b0SRob Clark 	if (ret)
4977198e6b0SRob Clark 		goto fail;
4987198e6b0SRob Clark 
49955459968SRob Clark 	/* if needed, allocate gmem: */
50055459968SRob Clark 	if (adreno_is_a330(adreno_gpu)) {
50126c0b26dSBrian Masney 		ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
50226c0b26dSBrian Masney 					    adreno_gpu, &a3xx_gpu->ocmem);
50326c0b26dSBrian Masney 		if (ret)
50426c0b26dSBrian Masney 			goto fail;
50555459968SRob Clark 	}
50655459968SRob Clark 
507667ce33eSRob Clark 	if (!gpu->aspace) {
508871d812aSRob Clark 		/* TODO we think it is possible to configure the GPU to
509871d812aSRob Clark 		 * restrict access to VRAM carveout.  But the required
510871d812aSRob Clark 		 * registers are unknown.  For now just bail out and
511871d812aSRob Clark 		 * limp along with just modesetting.  If it turns out
512871d812aSRob Clark 		 * to not be possible to restrict access, then we must
513871d812aSRob Clark 		 * implement a cmdstream validator.
514871d812aSRob Clark 		 */
5156a41da17SMamta Shukla 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
516871d812aSRob Clark 		ret = -ENXIO;
517871d812aSRob Clark 		goto fail;
518871d812aSRob Clark 	}
519871d812aSRob Clark 
520d163ba0bSBrian Masney 	/*
521d163ba0bSBrian Masney 	 * Set the ICC path to maximum speed for now by multiplying the fastest
522d163ba0bSBrian Masney 	 * frequency by the bus width (8). We'll want to scale this later on to
523d163ba0bSBrian Masney 	 * improve battery life.
524d163ba0bSBrian Masney 	 */
525d163ba0bSBrian Masney 	icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
526d163ba0bSBrian Masney 	icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
527d163ba0bSBrian Masney 
528871d812aSRob Clark 	return gpu;
5297198e6b0SRob Clark 
5307198e6b0SRob Clark fail:
5317198e6b0SRob Clark 	if (a3xx_gpu)
5327198e6b0SRob Clark 		a3xx_destroy(&a3xx_gpu->base.base);
5337198e6b0SRob Clark 
5347198e6b0SRob Clark 	return ERR_PTR(ret);
5357198e6b0SRob Clark }
536