xref: /linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision e00e473d9817e03cddbaf181a491c42ae8373482)
17198e6b0SRob Clark /*
27198e6b0SRob Clark  * Copyright (C) 2013 Red Hat
37198e6b0SRob Clark  * Author: Rob Clark <robdclark@gmail.com>
47198e6b0SRob Clark  *
591b74e97SAravind Ganesan  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
691b74e97SAravind Ganesan  *
77198e6b0SRob Clark  * This program is free software; you can redistribute it and/or modify it
87198e6b0SRob Clark  * under the terms of the GNU General Public License version 2 as published by
97198e6b0SRob Clark  * the Free Software Foundation.
107198e6b0SRob Clark  *
117198e6b0SRob Clark  * This program is distributed in the hope that it will be useful, but WITHOUT
127198e6b0SRob Clark  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
137198e6b0SRob Clark  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
147198e6b0SRob Clark  * more details.
157198e6b0SRob Clark  *
167198e6b0SRob Clark  * You should have received a copy of the GNU General Public License along with
177198e6b0SRob Clark  * this program.  If not, see <http://www.gnu.org/licenses/>.
187198e6b0SRob Clark  */
197198e6b0SRob Clark 
2055459968SRob Clark #ifdef CONFIG_MSM_OCMEM
2155459968SRob Clark #  include <mach/ocmem.h>
2255459968SRob Clark #endif
2355459968SRob Clark 
247198e6b0SRob Clark #include "a3xx_gpu.h"
257198e6b0SRob Clark 
267198e6b0SRob Clark #define A3XX_INT0_MASK \
277198e6b0SRob Clark 	(A3XX_INT0_RBBM_AHB_ERROR |        \
287198e6b0SRob Clark 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
297198e6b0SRob Clark 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
307198e6b0SRob Clark 	 A3XX_INT0_CP_OPCODE_ERROR |       \
317198e6b0SRob Clark 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
327198e6b0SRob Clark 	 A3XX_INT0_CP_HW_FAULT |           \
337198e6b0SRob Clark 	 A3XX_INT0_CP_IB1_INT |            \
347198e6b0SRob Clark 	 A3XX_INT0_CP_IB2_INT |            \
357198e6b0SRob Clark 	 A3XX_INT0_CP_RB_INT |             \
367198e6b0SRob Clark 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
377198e6b0SRob Clark 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
3879d57bf6SBjorn Andersson 	 A3XX_INT0_CACHE_FLUSH_TS |        \
397198e6b0SRob Clark 	 A3XX_INT0_UCHE_OOB_ACCESS)
407198e6b0SRob Clark 
413526e9fbSRob Clark extern bool hang_debug;
425b6ef08eSRob Clark 
435b6ef08eSRob Clark static void a3xx_dump(struct msm_gpu *gpu);
44e895c7bdSJordan Crouse static bool a3xx_idle(struct msm_gpu *gpu);
455b6ef08eSRob Clark 
46c4a8d475SJordan Crouse static bool a3xx_me_init(struct msm_gpu *gpu)
477198e6b0SRob Clark {
48f97decacSJordan Crouse 	struct msm_ringbuffer *ring = gpu->rb[0];
497198e6b0SRob Clark 
507198e6b0SRob Clark 	OUT_PKT3(ring, CP_ME_INIT, 17);
517198e6b0SRob Clark 	OUT_RING(ring, 0x000003f7);
527198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
537198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
547198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
557198e6b0SRob Clark 	OUT_RING(ring, 0x00000080);
567198e6b0SRob Clark 	OUT_RING(ring, 0x00000100);
577198e6b0SRob Clark 	OUT_RING(ring, 0x00000180);
587198e6b0SRob Clark 	OUT_RING(ring, 0x00006600);
597198e6b0SRob Clark 	OUT_RING(ring, 0x00000150);
607198e6b0SRob Clark 	OUT_RING(ring, 0x0000014e);
617198e6b0SRob Clark 	OUT_RING(ring, 0x00000154);
627198e6b0SRob Clark 	OUT_RING(ring, 0x00000001);
637198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
647198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
657198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
667198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
677198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
687198e6b0SRob Clark 
69f97decacSJordan Crouse 	gpu->funcs->flush(gpu, ring);
70e895c7bdSJordan Crouse 	return a3xx_idle(gpu);
717198e6b0SRob Clark }
727198e6b0SRob Clark 
737198e6b0SRob Clark static int a3xx_hw_init(struct msm_gpu *gpu)
747198e6b0SRob Clark {
757198e6b0SRob Clark 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
7655459968SRob Clark 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
777198e6b0SRob Clark 	uint32_t *ptr, len;
787198e6b0SRob Clark 	int i, ret;
797198e6b0SRob Clark 
807198e6b0SRob Clark 	DBG("%s", gpu->name);
817198e6b0SRob Clark 
827198e6b0SRob Clark 	if (adreno_is_a305(adreno_gpu)) {
837198e6b0SRob Clark 		/* Set up 16 deep read/write request queues: */
847198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
857198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
867198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
877198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
887198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
897198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
907198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
917198e6b0SRob Clark 		/* Enable WR-REQ: */
927198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
937198e6b0SRob Clark 		/* Set up round robin arbitration between both AXI ports: */
947198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
957198e6b0SRob Clark 		/* Set up AOOO: */
967198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
977198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
98de558cd2SRob Clark 	} else if (adreno_is_a306(adreno_gpu)) {
99de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
100de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
101de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
1027198e6b0SRob Clark 	} else if (adreno_is_a320(adreno_gpu)) {
1037198e6b0SRob Clark 		/* Set up 16 deep read/write request queues: */
1047198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
1057198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
1067198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
1077198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
1087198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
1097198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
1107198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
1117198e6b0SRob Clark 		/* Enable WR-REQ: */
1127198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
1137198e6b0SRob Clark 		/* Set up round robin arbitration between both AXI ports: */
1147198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
1157198e6b0SRob Clark 		/* Set up AOOO: */
1167198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
1177198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
1187198e6b0SRob Clark 		/* Enable 1K sort: */
1197198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
1207198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
1217198e6b0SRob Clark 
12255459968SRob Clark 	} else if (adreno_is_a330v2(adreno_gpu)) {
12355459968SRob Clark 		/*
12455459968SRob Clark 		 * Most of the VBIF registers on 8974v2 have the correct
12555459968SRob Clark 		 * values at power on, so we won't modify those if we don't
12655459968SRob Clark 		 * need to
12755459968SRob Clark 		 */
12855459968SRob Clark 		/* Enable 1k sort: */
12955459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
13055459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
13155459968SRob Clark 		/* Enable WR-REQ: */
13255459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
13355459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
13455459968SRob Clark 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
13555459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
13655459968SRob Clark 
1377198e6b0SRob Clark 	} else if (adreno_is_a330(adreno_gpu)) {
1387198e6b0SRob Clark 		/* Set up 16 deep read/write request queues: */
1397198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
1407198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
1417198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
1427198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
1437198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
1447198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
1457198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
1467198e6b0SRob Clark 		/* Enable WR-REQ: */
1477198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
1487198e6b0SRob Clark 		/* Set up round robin arbitration between both AXI ports: */
1497198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
1507198e6b0SRob Clark 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
1517198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
1527198e6b0SRob Clark 		/* Set up AOOO: */
15355459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
15455459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
1557198e6b0SRob Clark 		/* Enable 1K sort: */
15655459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
1577198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
1587198e6b0SRob Clark 		/* Disable VBIF clock gating. This is to enable AXI running
1597198e6b0SRob Clark 		 * higher frequency than GPU:
1607198e6b0SRob Clark 		 */
1617198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
1627198e6b0SRob Clark 
1637198e6b0SRob Clark 	} else {
1647198e6b0SRob Clark 		BUG();
1657198e6b0SRob Clark 	}
1667198e6b0SRob Clark 
1677198e6b0SRob Clark 	/* Make all blocks contribute to the GPU BUSY perf counter: */
1687198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
1697198e6b0SRob Clark 
1707198e6b0SRob Clark 	/* Tune the hystersis counters for SP and CP idle detection: */
1717198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
1727198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
1737198e6b0SRob Clark 
1747198e6b0SRob Clark 	/* Enable the RBBM error reporting bits.  This lets us get
1757198e6b0SRob Clark 	 * useful information on failure:
1767198e6b0SRob Clark 	 */
1777198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
1787198e6b0SRob Clark 
1797198e6b0SRob Clark 	/* Enable AHB error reporting: */
1807198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
1817198e6b0SRob Clark 
1827198e6b0SRob Clark 	/* Turn on the power counters: */
1837198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
1847198e6b0SRob Clark 
1857198e6b0SRob Clark 	/* Turn on hang detection - this spews a lot of useful information
1867198e6b0SRob Clark 	 * into the RBBM registers on a hang:
1877198e6b0SRob Clark 	 */
1887198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
1897198e6b0SRob Clark 
1907198e6b0SRob Clark 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
1917198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
1927198e6b0SRob Clark 
1937198e6b0SRob Clark 	/* Enable Clock gating: */
194de558cd2SRob Clark 	if (adreno_is_a306(adreno_gpu))
195de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
196de558cd2SRob Clark 	else if (adreno_is_a320(adreno_gpu))
1977198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
19855459968SRob Clark 	else if (adreno_is_a330v2(adreno_gpu))
19955459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
20055459968SRob Clark 	else if (adreno_is_a330(adreno_gpu))
20155459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
2027198e6b0SRob Clark 
20355459968SRob Clark 	if (adreno_is_a330v2(adreno_gpu))
20455459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
20555459968SRob Clark 	else if (adreno_is_a330(adreno_gpu))
20655459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
20755459968SRob Clark 
20855459968SRob Clark 	/* Set the OCMEM base address for A330, etc */
20955459968SRob Clark 	if (a3xx_gpu->ocmem_hdl) {
21055459968SRob Clark 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
21155459968SRob Clark 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
21255459968SRob Clark 	}
2137198e6b0SRob Clark 
2147198e6b0SRob Clark 	/* Turn on performance counters: */
2157198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
2167198e6b0SRob Clark 
21770c70f09SRob Clark 	/* Enable the perfcntrs that we use.. */
21870c70f09SRob Clark 	for (i = 0; i < gpu->num_perfcntrs; i++) {
21970c70f09SRob Clark 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
22070c70f09SRob Clark 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
22170c70f09SRob Clark 	}
2227198e6b0SRob Clark 
2237198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
2247198e6b0SRob Clark 
2257198e6b0SRob Clark 	ret = adreno_hw_init(gpu);
2267198e6b0SRob Clark 	if (ret)
2277198e6b0SRob Clark 		return ret;
2287198e6b0SRob Clark 
2297198e6b0SRob Clark 	/* setup access protection: */
2307198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
2317198e6b0SRob Clark 
2327198e6b0SRob Clark 	/* RBBM registers */
2337198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
2347198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
2357198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
2367198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
2377198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
2387198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
2397198e6b0SRob Clark 
2407198e6b0SRob Clark 	/* CP registers */
2417198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
2427198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
2437198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
2447198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
2457198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
2467198e6b0SRob Clark 
2477198e6b0SRob Clark 	/* RB registers */
2487198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
2497198e6b0SRob Clark 
2507198e6b0SRob Clark 	/* VBIF registers */
2517198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
2527198e6b0SRob Clark 
2537198e6b0SRob Clark 	/* NOTE: PM4/micro-engine firmware registers look to be the same
2547198e6b0SRob Clark 	 * for a2xx and a3xx.. we could possibly push that part down to
2557198e6b0SRob Clark 	 * adreno_gpu base class.  Or push both PM4 and PFP but
2567198e6b0SRob Clark 	 * parameterize the pfp ucode addr/data registers..
2577198e6b0SRob Clark 	 */
2587198e6b0SRob Clark 
2597198e6b0SRob Clark 	/* Load PM4: */
260c5e3548cSJordan Crouse 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
261c5e3548cSJordan Crouse 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
262e529c7e6SRob Clark 	DBG("loading PM4 ucode version: %x", ptr[1]);
2637198e6b0SRob Clark 
2647198e6b0SRob Clark 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
2657198e6b0SRob Clark 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
2667198e6b0SRob Clark 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
2677198e6b0SRob Clark 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
2687198e6b0SRob Clark 	for (i = 1; i < len; i++)
2697198e6b0SRob Clark 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
2707198e6b0SRob Clark 
2717198e6b0SRob Clark 	/* Load PFP: */
272c5e3548cSJordan Crouse 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
273c5e3548cSJordan Crouse 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
274e529c7e6SRob Clark 	DBG("loading PFP ucode version: %x", ptr[5]);
2757198e6b0SRob Clark 
2767198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
2777198e6b0SRob Clark 	for (i = 1; i < len; i++)
2787198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
2797198e6b0SRob Clark 
2807198e6b0SRob Clark 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
281de558cd2SRob Clark 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
282de558cd2SRob Clark 			adreno_is_a320(adreno_gpu)) {
2837198e6b0SRob Clark 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
2847198e6b0SRob Clark 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
2857198e6b0SRob Clark 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
2867198e6b0SRob Clark 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
28755459968SRob Clark 	} else if (adreno_is_a330(adreno_gpu)) {
28855459968SRob Clark 		/* NOTE: this (value take from downstream android driver)
28955459968SRob Clark 		 * includes some bits outside of the known bitfields.  But
29055459968SRob Clark 		 * A330 has this "MERCIU queue" thing too, which might
29155459968SRob Clark 		 * explain a new bitfield or reshuffling:
29255459968SRob Clark 		 */
29355459968SRob Clark 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
29455459968SRob Clark 	}
2957198e6b0SRob Clark 
2967198e6b0SRob Clark 	/* clear ME_HALT to start micro engine */
2977198e6b0SRob Clark 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
2987198e6b0SRob Clark 
299c4a8d475SJordan Crouse 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
3007198e6b0SRob Clark }
3017198e6b0SRob Clark 
30255459968SRob Clark static void a3xx_recover(struct msm_gpu *gpu)
30355459968SRob Clark {
304398efc46SRob Clark 	int i;
305398efc46SRob Clark 
30626716185SRob Clark 	adreno_dump_info(gpu);
30726716185SRob Clark 
308398efc46SRob Clark 	for (i = 0; i < 8; i++) {
309398efc46SRob Clark 		printk("CP_SCRATCH_REG%d: %u\n", i,
310398efc46SRob Clark 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
311398efc46SRob Clark 	}
312398efc46SRob Clark 
3135b6ef08eSRob Clark 	/* dump registers before resetting gpu, if enabled: */
3145b6ef08eSRob Clark 	if (hang_debug)
3155b6ef08eSRob Clark 		a3xx_dump(gpu);
31626716185SRob Clark 
31755459968SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
31855459968SRob Clark 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
31955459968SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
32055459968SRob Clark 	adreno_recover(gpu);
32155459968SRob Clark }
32255459968SRob Clark 
3237198e6b0SRob Clark static void a3xx_destroy(struct msm_gpu *gpu)
3247198e6b0SRob Clark {
3257198e6b0SRob Clark 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
3267198e6b0SRob Clark 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
3277198e6b0SRob Clark 
3287198e6b0SRob Clark 	DBG("%s", gpu->name);
3297198e6b0SRob Clark 
3307198e6b0SRob Clark 	adreno_gpu_cleanup(adreno_gpu);
33155459968SRob Clark 
33255459968SRob Clark #ifdef CONFIG_MSM_OCMEM
33355459968SRob Clark 	if (a3xx_gpu->ocmem_base)
33455459968SRob Clark 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
33555459968SRob Clark #endif
33655459968SRob Clark 
3377198e6b0SRob Clark 	kfree(a3xx_gpu);
3387198e6b0SRob Clark }
3397198e6b0SRob Clark 
340c4a8d475SJordan Crouse static bool a3xx_idle(struct msm_gpu *gpu)
3417198e6b0SRob Clark {
3427198e6b0SRob Clark 	/* wait for ringbuffer to drain: */
343f97decacSJordan Crouse 	if (!adreno_idle(gpu, gpu->rb[0]))
344c4a8d475SJordan Crouse 		return false;
3457198e6b0SRob Clark 
3467198e6b0SRob Clark 	/* then wait for GPU to finish: */
3470963756fSRob Clark 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
348c4a8d475SJordan Crouse 			A3XX_RBBM_STATUS_GPU_BUSY))) {
3490963756fSRob Clark 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
3507198e6b0SRob Clark 
3517198e6b0SRob Clark 		/* TODO maybe we need to reset GPU here to recover from hang? */
352c4a8d475SJordan Crouse 		return false;
353c4a8d475SJordan Crouse 	}
354c4a8d475SJordan Crouse 
355c4a8d475SJordan Crouse 	return true;
3567198e6b0SRob Clark }
3577198e6b0SRob Clark 
3587198e6b0SRob Clark static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
3597198e6b0SRob Clark {
3607198e6b0SRob Clark 	uint32_t status;
3617198e6b0SRob Clark 
3627198e6b0SRob Clark 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
3637198e6b0SRob Clark 	DBG("%s: %08x", gpu->name, status);
3647198e6b0SRob Clark 
3657198e6b0SRob Clark 	// TODO
3667198e6b0SRob Clark 
3677198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
3687198e6b0SRob Clark 
3697198e6b0SRob Clark 	msm_gpu_retire(gpu);
3707198e6b0SRob Clark 
3717198e6b0SRob Clark 	return IRQ_HANDLED;
3727198e6b0SRob Clark }
3737198e6b0SRob Clark 
3747198e6b0SRob Clark static const unsigned int a3xx_registers[] = {
3757198e6b0SRob Clark 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
3767198e6b0SRob Clark 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
3777198e6b0SRob Clark 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
3787198e6b0SRob Clark 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
3797198e6b0SRob Clark 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
3807198e6b0SRob Clark 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
3817198e6b0SRob Clark 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
3827198e6b0SRob Clark 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
3837198e6b0SRob Clark 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
3847198e6b0SRob Clark 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
3857198e6b0SRob Clark 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
3867198e6b0SRob Clark 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
3877198e6b0SRob Clark 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
3887198e6b0SRob Clark 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
3897198e6b0SRob Clark 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
3907198e6b0SRob Clark 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
3917198e6b0SRob Clark 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
3927198e6b0SRob Clark 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
3937198e6b0SRob Clark 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
3947198e6b0SRob Clark 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
3957198e6b0SRob Clark 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
3967198e6b0SRob Clark 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
3977198e6b0SRob Clark 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
3987198e6b0SRob Clark 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
3997198e6b0SRob Clark 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
4007198e6b0SRob Clark 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
4017198e6b0SRob Clark 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
4027198e6b0SRob Clark 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
4037198e6b0SRob Clark 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
4047198e6b0SRob Clark 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
4057198e6b0SRob Clark 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
4067198e6b0SRob Clark 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
4077198e6b0SRob Clark 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
4087198e6b0SRob Clark 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
4097198e6b0SRob Clark 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
4107198e6b0SRob Clark 	0x303c, 0x303c, 0x305e, 0x305f,
4113bcefb04SRob Clark 	~0   /* sentinel */
4127198e6b0SRob Clark };
4137198e6b0SRob Clark 
4145b6ef08eSRob Clark #ifdef CONFIG_DEBUG_FS
4157198e6b0SRob Clark static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
4167198e6b0SRob Clark {
4177198e6b0SRob Clark 	seq_printf(m, "status:   %08x\n",
4187198e6b0SRob Clark 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
4193bcefb04SRob Clark 	adreno_show(gpu, m);
4207198e6b0SRob Clark }
4217198e6b0SRob Clark #endif
4227198e6b0SRob Clark 
4235b6ef08eSRob Clark /* would be nice to not have to duplicate the _show() stuff with printk(): */
4245b6ef08eSRob Clark static void a3xx_dump(struct msm_gpu *gpu)
4255b6ef08eSRob Clark {
4265b6ef08eSRob Clark 	printk("status:   %08x\n",
4275b6ef08eSRob Clark 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
4283bcefb04SRob Clark 	adreno_dump(gpu);
4295b6ef08eSRob Clark }
430*e00e473dSJordan Crouse 
431*e00e473dSJordan Crouse static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
432*e00e473dSJordan Crouse {
433*e00e473dSJordan Crouse 	struct msm_gpu_state *state = adreno_gpu_state_get(gpu);
434*e00e473dSJordan Crouse 
435*e00e473dSJordan Crouse 	if (IS_ERR(state))
436*e00e473dSJordan Crouse 		return state;
437*e00e473dSJordan Crouse 
438*e00e473dSJordan Crouse 	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
439*e00e473dSJordan Crouse 
440*e00e473dSJordan Crouse 	return state;
441*e00e473dSJordan Crouse }
442*e00e473dSJordan Crouse 
44391b74e97SAravind Ganesan /* Register offset defines for A3XX */
44491b74e97SAravind Ganesan static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
44591b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
446fb039981SJordan Crouse 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
44791b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
448fb039981SJordan Crouse 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
44991b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
45091b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
45191b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
45291b74e97SAravind Ganesan };
4535b6ef08eSRob Clark 
4547198e6b0SRob Clark static const struct adreno_gpu_funcs funcs = {
4557198e6b0SRob Clark 	.base = {
4567198e6b0SRob Clark 		.get_param = adreno_get_param,
4577198e6b0SRob Clark 		.hw_init = a3xx_hw_init,
4587198e6b0SRob Clark 		.pm_suspend = msm_gpu_pm_suspend,
4597198e6b0SRob Clark 		.pm_resume = msm_gpu_pm_resume,
46055459968SRob Clark 		.recover = a3xx_recover,
4617198e6b0SRob Clark 		.submit = adreno_submit,
4627198e6b0SRob Clark 		.flush = adreno_flush,
463f97decacSJordan Crouse 		.active_ring = adreno_active_ring,
4647198e6b0SRob Clark 		.irq = a3xx_irq,
4657198e6b0SRob Clark 		.destroy = a3xx_destroy,
4667198e6b0SRob Clark #ifdef CONFIG_DEBUG_FS
4677198e6b0SRob Clark 		.show = a3xx_show,
4687198e6b0SRob Clark #endif
469*e00e473dSJordan Crouse 		.gpu_state_get = a3xx_gpu_state_get,
470*e00e473dSJordan Crouse 		.gpu_state_put = adreno_gpu_state_put,
4717198e6b0SRob Clark 	},
4727198e6b0SRob Clark };
4737198e6b0SRob Clark 
47470c70f09SRob Clark static const struct msm_gpu_perfcntr perfcntrs[] = {
47570c70f09SRob Clark 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
47670c70f09SRob Clark 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
47770c70f09SRob Clark 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
47870c70f09SRob Clark 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
47970c70f09SRob Clark };
48070c70f09SRob Clark 
4817198e6b0SRob Clark struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
4827198e6b0SRob Clark {
4837198e6b0SRob Clark 	struct a3xx_gpu *a3xx_gpu = NULL;
48455459968SRob Clark 	struct adreno_gpu *adreno_gpu;
4857198e6b0SRob Clark 	struct msm_gpu *gpu;
486060530f1SRob Clark 	struct msm_drm_private *priv = dev->dev_private;
487060530f1SRob Clark 	struct platform_device *pdev = priv->gpu_pdev;
4887198e6b0SRob Clark 	int ret;
4897198e6b0SRob Clark 
4907198e6b0SRob Clark 	if (!pdev) {
4917198e6b0SRob Clark 		dev_err(dev->dev, "no a3xx device\n");
4927198e6b0SRob Clark 		ret = -ENXIO;
4937198e6b0SRob Clark 		goto fail;
4947198e6b0SRob Clark 	}
4957198e6b0SRob Clark 
4967198e6b0SRob Clark 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
4977198e6b0SRob Clark 	if (!a3xx_gpu) {
4987198e6b0SRob Clark 		ret = -ENOMEM;
4997198e6b0SRob Clark 		goto fail;
5007198e6b0SRob Clark 	}
5017198e6b0SRob Clark 
50255459968SRob Clark 	adreno_gpu = &a3xx_gpu->base;
50355459968SRob Clark 	gpu = &adreno_gpu->base;
5047198e6b0SRob Clark 
50570c70f09SRob Clark 	gpu->perfcntrs = perfcntrs;
50670c70f09SRob Clark 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
50770c70f09SRob Clark 
5083bcefb04SRob Clark 	adreno_gpu->registers = a3xx_registers;
50991b74e97SAravind Ganesan 	adreno_gpu->reg_offsets = a3xx_register_offsets;
5103bcefb04SRob Clark 
511f97decacSJordan Crouse 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
5127198e6b0SRob Clark 	if (ret)
5137198e6b0SRob Clark 		goto fail;
5147198e6b0SRob Clark 
51555459968SRob Clark 	/* if needed, allocate gmem: */
51655459968SRob Clark 	if (adreno_is_a330(adreno_gpu)) {
51755459968SRob Clark #ifdef CONFIG_MSM_OCMEM
51855459968SRob Clark 		/* TODO this is different/missing upstream: */
51955459968SRob Clark 		struct ocmem_buf *ocmem_hdl =
52055459968SRob Clark 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
52155459968SRob Clark 
52255459968SRob Clark 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
52355459968SRob Clark 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
52455459968SRob Clark 		adreno_gpu->gmem = ocmem_hdl->len;
52555459968SRob Clark 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
52655459968SRob Clark 				a3xx_gpu->ocmem_base);
52755459968SRob Clark #endif
52855459968SRob Clark 	}
52955459968SRob Clark 
530667ce33eSRob Clark 	if (!gpu->aspace) {
531871d812aSRob Clark 		/* TODO we think it is possible to configure the GPU to
532871d812aSRob Clark 		 * restrict access to VRAM carveout.  But the required
533871d812aSRob Clark 		 * registers are unknown.  For now just bail out and
534871d812aSRob Clark 		 * limp along with just modesetting.  If it turns out
535871d812aSRob Clark 		 * to not be possible to restrict access, then we must
536871d812aSRob Clark 		 * implement a cmdstream validator.
537871d812aSRob Clark 		 */
538871d812aSRob Clark 		dev_err(dev->dev, "No memory protection without IOMMU\n");
539871d812aSRob Clark 		ret = -ENXIO;
540871d812aSRob Clark 		goto fail;
541871d812aSRob Clark 	}
542871d812aSRob Clark 
543871d812aSRob Clark 	return gpu;
5447198e6b0SRob Clark 
5457198e6b0SRob Clark fail:
5467198e6b0SRob Clark 	if (a3xx_gpu)
5477198e6b0SRob Clark 		a3xx_destroy(&a3xx_gpu->base.base);
5487198e6b0SRob Clark 
5497198e6b0SRob Clark 	return ERR_PTR(ret);
5507198e6b0SRob Clark }
551