xref: /linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision caab277b1de0a22b675c4c95fc7b285ec2eb5bf5)
1*caab277bSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
27198e6b0SRob Clark /*
37198e6b0SRob Clark  * Copyright (C) 2013 Red Hat
47198e6b0SRob Clark  * Author: Rob Clark <robdclark@gmail.com>
57198e6b0SRob Clark  *
691b74e97SAravind Ganesan  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
77198e6b0SRob Clark  */
87198e6b0SRob Clark 
955459968SRob Clark #ifdef CONFIG_MSM_OCMEM
1055459968SRob Clark #  include <mach/ocmem.h>
1155459968SRob Clark #endif
1255459968SRob Clark 
137198e6b0SRob Clark #include "a3xx_gpu.h"
147198e6b0SRob Clark 
157198e6b0SRob Clark #define A3XX_INT0_MASK \
167198e6b0SRob Clark 	(A3XX_INT0_RBBM_AHB_ERROR |        \
177198e6b0SRob Clark 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
187198e6b0SRob Clark 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
197198e6b0SRob Clark 	 A3XX_INT0_CP_OPCODE_ERROR |       \
207198e6b0SRob Clark 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
217198e6b0SRob Clark 	 A3XX_INT0_CP_HW_FAULT |           \
227198e6b0SRob Clark 	 A3XX_INT0_CP_IB1_INT |            \
237198e6b0SRob Clark 	 A3XX_INT0_CP_IB2_INT |            \
247198e6b0SRob Clark 	 A3XX_INT0_CP_RB_INT |             \
257198e6b0SRob Clark 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
267198e6b0SRob Clark 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
2779d57bf6SBjorn Andersson 	 A3XX_INT0_CACHE_FLUSH_TS |        \
287198e6b0SRob Clark 	 A3XX_INT0_UCHE_OOB_ACCESS)
297198e6b0SRob Clark 
303526e9fbSRob Clark extern bool hang_debug;
315b6ef08eSRob Clark 
325b6ef08eSRob Clark static void a3xx_dump(struct msm_gpu *gpu);
33e895c7bdSJordan Crouse static bool a3xx_idle(struct msm_gpu *gpu);
345b6ef08eSRob Clark 
35c4a8d475SJordan Crouse static bool a3xx_me_init(struct msm_gpu *gpu)
367198e6b0SRob Clark {
37f97decacSJordan Crouse 	struct msm_ringbuffer *ring = gpu->rb[0];
387198e6b0SRob Clark 
397198e6b0SRob Clark 	OUT_PKT3(ring, CP_ME_INIT, 17);
407198e6b0SRob Clark 	OUT_RING(ring, 0x000003f7);
417198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
427198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
437198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
447198e6b0SRob Clark 	OUT_RING(ring, 0x00000080);
457198e6b0SRob Clark 	OUT_RING(ring, 0x00000100);
467198e6b0SRob Clark 	OUT_RING(ring, 0x00000180);
477198e6b0SRob Clark 	OUT_RING(ring, 0x00006600);
487198e6b0SRob Clark 	OUT_RING(ring, 0x00000150);
497198e6b0SRob Clark 	OUT_RING(ring, 0x0000014e);
507198e6b0SRob Clark 	OUT_RING(ring, 0x00000154);
517198e6b0SRob Clark 	OUT_RING(ring, 0x00000001);
527198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
537198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
547198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
557198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
567198e6b0SRob Clark 	OUT_RING(ring, 0x00000000);
577198e6b0SRob Clark 
58f97decacSJordan Crouse 	gpu->funcs->flush(gpu, ring);
59e895c7bdSJordan Crouse 	return a3xx_idle(gpu);
607198e6b0SRob Clark }
617198e6b0SRob Clark 
627198e6b0SRob Clark static int a3xx_hw_init(struct msm_gpu *gpu)
637198e6b0SRob Clark {
647198e6b0SRob Clark 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
6555459968SRob Clark 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
667198e6b0SRob Clark 	uint32_t *ptr, len;
677198e6b0SRob Clark 	int i, ret;
687198e6b0SRob Clark 
697198e6b0SRob Clark 	DBG("%s", gpu->name);
707198e6b0SRob Clark 
717198e6b0SRob Clark 	if (adreno_is_a305(adreno_gpu)) {
727198e6b0SRob Clark 		/* Set up 16 deep read/write request queues: */
737198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
747198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
757198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
767198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
777198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
787198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
797198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
807198e6b0SRob Clark 		/* Enable WR-REQ: */
817198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
827198e6b0SRob Clark 		/* Set up round robin arbitration between both AXI ports: */
837198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
847198e6b0SRob Clark 		/* Set up AOOO: */
857198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
867198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
87de558cd2SRob Clark 	} else if (adreno_is_a306(adreno_gpu)) {
88de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
89de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
90de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
917198e6b0SRob Clark 	} else if (adreno_is_a320(adreno_gpu)) {
927198e6b0SRob Clark 		/* Set up 16 deep read/write request queues: */
937198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
947198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
957198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
967198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
977198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
987198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
997198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
1007198e6b0SRob Clark 		/* Enable WR-REQ: */
1017198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
1027198e6b0SRob Clark 		/* Set up round robin arbitration between both AXI ports: */
1037198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
1047198e6b0SRob Clark 		/* Set up AOOO: */
1057198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
1067198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
1077198e6b0SRob Clark 		/* Enable 1K sort: */
1087198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
1097198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
1107198e6b0SRob Clark 
11155459968SRob Clark 	} else if (adreno_is_a330v2(adreno_gpu)) {
11255459968SRob Clark 		/*
11355459968SRob Clark 		 * Most of the VBIF registers on 8974v2 have the correct
11455459968SRob Clark 		 * values at power on, so we won't modify those if we don't
11555459968SRob Clark 		 * need to
11655459968SRob Clark 		 */
11755459968SRob Clark 		/* Enable 1k sort: */
11855459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
11955459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
12055459968SRob Clark 		/* Enable WR-REQ: */
12155459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
12255459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
12355459968SRob Clark 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
12455459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
12555459968SRob Clark 
1267198e6b0SRob Clark 	} else if (adreno_is_a330(adreno_gpu)) {
1277198e6b0SRob Clark 		/* Set up 16 deep read/write request queues: */
1287198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
1297198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
1307198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
1317198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
1327198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
1337198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
1347198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
1357198e6b0SRob Clark 		/* Enable WR-REQ: */
1367198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
1377198e6b0SRob Clark 		/* Set up round robin arbitration between both AXI ports: */
1387198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
1397198e6b0SRob Clark 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
1407198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
1417198e6b0SRob Clark 		/* Set up AOOO: */
14255459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
14355459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
1447198e6b0SRob Clark 		/* Enable 1K sort: */
14555459968SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
1467198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
1477198e6b0SRob Clark 		/* Disable VBIF clock gating. This is to enable AXI running
1487198e6b0SRob Clark 		 * higher frequency than GPU:
1497198e6b0SRob Clark 		 */
1507198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
1517198e6b0SRob Clark 
1527198e6b0SRob Clark 	} else {
1537198e6b0SRob Clark 		BUG();
1547198e6b0SRob Clark 	}
1557198e6b0SRob Clark 
1567198e6b0SRob Clark 	/* Make all blocks contribute to the GPU BUSY perf counter: */
1577198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
1587198e6b0SRob Clark 
1597198e6b0SRob Clark 	/* Tune the hystersis counters for SP and CP idle detection: */
1607198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
1617198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
1627198e6b0SRob Clark 
1637198e6b0SRob Clark 	/* Enable the RBBM error reporting bits.  This lets us get
1647198e6b0SRob Clark 	 * useful information on failure:
1657198e6b0SRob Clark 	 */
1667198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
1677198e6b0SRob Clark 
1687198e6b0SRob Clark 	/* Enable AHB error reporting: */
1697198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
1707198e6b0SRob Clark 
1717198e6b0SRob Clark 	/* Turn on the power counters: */
1727198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
1737198e6b0SRob Clark 
1747198e6b0SRob Clark 	/* Turn on hang detection - this spews a lot of useful information
1757198e6b0SRob Clark 	 * into the RBBM registers on a hang:
1767198e6b0SRob Clark 	 */
1777198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
1787198e6b0SRob Clark 
1797198e6b0SRob Clark 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
1807198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
1817198e6b0SRob Clark 
1827198e6b0SRob Clark 	/* Enable Clock gating: */
183de558cd2SRob Clark 	if (adreno_is_a306(adreno_gpu))
184de558cd2SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
185de558cd2SRob Clark 	else if (adreno_is_a320(adreno_gpu))
1867198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
18755459968SRob Clark 	else if (adreno_is_a330v2(adreno_gpu))
18855459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
18955459968SRob Clark 	else if (adreno_is_a330(adreno_gpu))
19055459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
1917198e6b0SRob Clark 
19255459968SRob Clark 	if (adreno_is_a330v2(adreno_gpu))
19355459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
19455459968SRob Clark 	else if (adreno_is_a330(adreno_gpu))
19555459968SRob Clark 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
19655459968SRob Clark 
19755459968SRob Clark 	/* Set the OCMEM base address for A330, etc */
19855459968SRob Clark 	if (a3xx_gpu->ocmem_hdl) {
19955459968SRob Clark 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
20055459968SRob Clark 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
20155459968SRob Clark 	}
2027198e6b0SRob Clark 
2037198e6b0SRob Clark 	/* Turn on performance counters: */
2047198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
2057198e6b0SRob Clark 
20670c70f09SRob Clark 	/* Enable the perfcntrs that we use.. */
20770c70f09SRob Clark 	for (i = 0; i < gpu->num_perfcntrs; i++) {
20870c70f09SRob Clark 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
20970c70f09SRob Clark 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
21070c70f09SRob Clark 	}
2117198e6b0SRob Clark 
2127198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
2137198e6b0SRob Clark 
2147198e6b0SRob Clark 	ret = adreno_hw_init(gpu);
2157198e6b0SRob Clark 	if (ret)
2167198e6b0SRob Clark 		return ret;
2177198e6b0SRob Clark 
2187198e6b0SRob Clark 	/* setup access protection: */
2197198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
2207198e6b0SRob Clark 
2217198e6b0SRob Clark 	/* RBBM registers */
2227198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
2237198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
2247198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
2257198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
2267198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
2277198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
2287198e6b0SRob Clark 
2297198e6b0SRob Clark 	/* CP registers */
2307198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
2317198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
2327198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
2337198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
2347198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
2357198e6b0SRob Clark 
2367198e6b0SRob Clark 	/* RB registers */
2377198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
2387198e6b0SRob Clark 
2397198e6b0SRob Clark 	/* VBIF registers */
2407198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
2417198e6b0SRob Clark 
2427198e6b0SRob Clark 	/* NOTE: PM4/micro-engine firmware registers look to be the same
2437198e6b0SRob Clark 	 * for a2xx and a3xx.. we could possibly push that part down to
2447198e6b0SRob Clark 	 * adreno_gpu base class.  Or push both PM4 and PFP but
2457198e6b0SRob Clark 	 * parameterize the pfp ucode addr/data registers..
2467198e6b0SRob Clark 	 */
2477198e6b0SRob Clark 
2487198e6b0SRob Clark 	/* Load PM4: */
249c5e3548cSJordan Crouse 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
250c5e3548cSJordan Crouse 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
251e529c7e6SRob Clark 	DBG("loading PM4 ucode version: %x", ptr[1]);
2527198e6b0SRob Clark 
2537198e6b0SRob Clark 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
2547198e6b0SRob Clark 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
2557198e6b0SRob Clark 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
2567198e6b0SRob Clark 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
2577198e6b0SRob Clark 	for (i = 1; i < len; i++)
2587198e6b0SRob Clark 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
2597198e6b0SRob Clark 
2607198e6b0SRob Clark 	/* Load PFP: */
261c5e3548cSJordan Crouse 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
262c5e3548cSJordan Crouse 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
263e529c7e6SRob Clark 	DBG("loading PFP ucode version: %x", ptr[5]);
2647198e6b0SRob Clark 
2657198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
2667198e6b0SRob Clark 	for (i = 1; i < len; i++)
2677198e6b0SRob Clark 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
2687198e6b0SRob Clark 
2697198e6b0SRob Clark 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
270de558cd2SRob Clark 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
271de558cd2SRob Clark 			adreno_is_a320(adreno_gpu)) {
2727198e6b0SRob Clark 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
2737198e6b0SRob Clark 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
2747198e6b0SRob Clark 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
2757198e6b0SRob Clark 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
27655459968SRob Clark 	} else if (adreno_is_a330(adreno_gpu)) {
27755459968SRob Clark 		/* NOTE: this (value take from downstream android driver)
27855459968SRob Clark 		 * includes some bits outside of the known bitfields.  But
27955459968SRob Clark 		 * A330 has this "MERCIU queue" thing too, which might
28055459968SRob Clark 		 * explain a new bitfield or reshuffling:
28155459968SRob Clark 		 */
28255459968SRob Clark 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
28355459968SRob Clark 	}
2847198e6b0SRob Clark 
2857198e6b0SRob Clark 	/* clear ME_HALT to start micro engine */
2867198e6b0SRob Clark 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
2877198e6b0SRob Clark 
288c4a8d475SJordan Crouse 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
2897198e6b0SRob Clark }
2907198e6b0SRob Clark 
29155459968SRob Clark static void a3xx_recover(struct msm_gpu *gpu)
29255459968SRob Clark {
293398efc46SRob Clark 	int i;
294398efc46SRob Clark 
29526716185SRob Clark 	adreno_dump_info(gpu);
29626716185SRob Clark 
297398efc46SRob Clark 	for (i = 0; i < 8; i++) {
298398efc46SRob Clark 		printk("CP_SCRATCH_REG%d: %u\n", i,
299398efc46SRob Clark 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
300398efc46SRob Clark 	}
301398efc46SRob Clark 
3025b6ef08eSRob Clark 	/* dump registers before resetting gpu, if enabled: */
3035b6ef08eSRob Clark 	if (hang_debug)
3045b6ef08eSRob Clark 		a3xx_dump(gpu);
30526716185SRob Clark 
30655459968SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
30755459968SRob Clark 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
30855459968SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
30955459968SRob Clark 	adreno_recover(gpu);
31055459968SRob Clark }
31155459968SRob Clark 
3127198e6b0SRob Clark static void a3xx_destroy(struct msm_gpu *gpu)
3137198e6b0SRob Clark {
3147198e6b0SRob Clark 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
3157198e6b0SRob Clark 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
3167198e6b0SRob Clark 
3177198e6b0SRob Clark 	DBG("%s", gpu->name);
3187198e6b0SRob Clark 
3197198e6b0SRob Clark 	adreno_gpu_cleanup(adreno_gpu);
32055459968SRob Clark 
32155459968SRob Clark #ifdef CONFIG_MSM_OCMEM
32255459968SRob Clark 	if (a3xx_gpu->ocmem_base)
32355459968SRob Clark 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
32455459968SRob Clark #endif
32555459968SRob Clark 
3267198e6b0SRob Clark 	kfree(a3xx_gpu);
3277198e6b0SRob Clark }
3287198e6b0SRob Clark 
329c4a8d475SJordan Crouse static bool a3xx_idle(struct msm_gpu *gpu)
3307198e6b0SRob Clark {
3317198e6b0SRob Clark 	/* wait for ringbuffer to drain: */
332f97decacSJordan Crouse 	if (!adreno_idle(gpu, gpu->rb[0]))
333c4a8d475SJordan Crouse 		return false;
3347198e6b0SRob Clark 
3357198e6b0SRob Clark 	/* then wait for GPU to finish: */
3360963756fSRob Clark 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
337c4a8d475SJordan Crouse 			A3XX_RBBM_STATUS_GPU_BUSY))) {
3380963756fSRob Clark 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
3397198e6b0SRob Clark 
3407198e6b0SRob Clark 		/* TODO maybe we need to reset GPU here to recover from hang? */
341c4a8d475SJordan Crouse 		return false;
342c4a8d475SJordan Crouse 	}
343c4a8d475SJordan Crouse 
344c4a8d475SJordan Crouse 	return true;
3457198e6b0SRob Clark }
3467198e6b0SRob Clark 
3477198e6b0SRob Clark static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
3487198e6b0SRob Clark {
3497198e6b0SRob Clark 	uint32_t status;
3507198e6b0SRob Clark 
3517198e6b0SRob Clark 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
3527198e6b0SRob Clark 	DBG("%s: %08x", gpu->name, status);
3537198e6b0SRob Clark 
3547198e6b0SRob Clark 	// TODO
3557198e6b0SRob Clark 
3567198e6b0SRob Clark 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
3577198e6b0SRob Clark 
3587198e6b0SRob Clark 	msm_gpu_retire(gpu);
3597198e6b0SRob Clark 
3607198e6b0SRob Clark 	return IRQ_HANDLED;
3617198e6b0SRob Clark }
3627198e6b0SRob Clark 
3637198e6b0SRob Clark static const unsigned int a3xx_registers[] = {
3647198e6b0SRob Clark 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
3657198e6b0SRob Clark 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
3667198e6b0SRob Clark 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
3677198e6b0SRob Clark 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
3687198e6b0SRob Clark 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
3697198e6b0SRob Clark 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
3707198e6b0SRob Clark 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
3717198e6b0SRob Clark 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
3727198e6b0SRob Clark 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
3737198e6b0SRob Clark 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
3747198e6b0SRob Clark 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
3757198e6b0SRob Clark 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
3767198e6b0SRob Clark 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
3777198e6b0SRob Clark 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
3787198e6b0SRob Clark 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
3797198e6b0SRob Clark 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
3807198e6b0SRob Clark 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
3817198e6b0SRob Clark 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
3827198e6b0SRob Clark 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
3837198e6b0SRob Clark 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
3847198e6b0SRob Clark 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
3857198e6b0SRob Clark 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
3867198e6b0SRob Clark 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
3877198e6b0SRob Clark 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
3887198e6b0SRob Clark 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
3897198e6b0SRob Clark 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
3907198e6b0SRob Clark 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
3917198e6b0SRob Clark 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
3927198e6b0SRob Clark 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
3937198e6b0SRob Clark 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
3947198e6b0SRob Clark 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
3957198e6b0SRob Clark 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
3967198e6b0SRob Clark 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
3977198e6b0SRob Clark 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
3987198e6b0SRob Clark 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
3997198e6b0SRob Clark 	0x303c, 0x303c, 0x305e, 0x305f,
4003bcefb04SRob Clark 	~0   /* sentinel */
4017198e6b0SRob Clark };
4027198e6b0SRob Clark 
4035b6ef08eSRob Clark /* would be nice to not have to duplicate the _show() stuff with printk(): */
4045b6ef08eSRob Clark static void a3xx_dump(struct msm_gpu *gpu)
4055b6ef08eSRob Clark {
4065b6ef08eSRob Clark 	printk("status:   %08x\n",
4075b6ef08eSRob Clark 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
4083bcefb04SRob Clark 	adreno_dump(gpu);
4095b6ef08eSRob Clark }
410e00e473dSJordan Crouse 
411e00e473dSJordan Crouse static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
412e00e473dSJordan Crouse {
41350f8d218SJordan Crouse 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
414e00e473dSJordan Crouse 
41550f8d218SJordan Crouse 	if (!state)
41650f8d218SJordan Crouse 		return ERR_PTR(-ENOMEM);
41750f8d218SJordan Crouse 
41850f8d218SJordan Crouse 	adreno_gpu_state_get(gpu, state);
419e00e473dSJordan Crouse 
420e00e473dSJordan Crouse 	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
421e00e473dSJordan Crouse 
422e00e473dSJordan Crouse 	return state;
423e00e473dSJordan Crouse }
424e00e473dSJordan Crouse 
42591b74e97SAravind Ganesan /* Register offset defines for A3XX */
42691b74e97SAravind Ganesan static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
42791b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
428fb039981SJordan Crouse 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
42991b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
430fb039981SJordan Crouse 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
43191b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
43291b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
43391b74e97SAravind Ganesan 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
43491b74e97SAravind Ganesan };
4355b6ef08eSRob Clark 
4367198e6b0SRob Clark static const struct adreno_gpu_funcs funcs = {
4377198e6b0SRob Clark 	.base = {
4387198e6b0SRob Clark 		.get_param = adreno_get_param,
4397198e6b0SRob Clark 		.hw_init = a3xx_hw_init,
4407198e6b0SRob Clark 		.pm_suspend = msm_gpu_pm_suspend,
4417198e6b0SRob Clark 		.pm_resume = msm_gpu_pm_resume,
44255459968SRob Clark 		.recover = a3xx_recover,
4437198e6b0SRob Clark 		.submit = adreno_submit,
4447198e6b0SRob Clark 		.flush = adreno_flush,
445f97decacSJordan Crouse 		.active_ring = adreno_active_ring,
4467198e6b0SRob Clark 		.irq = a3xx_irq,
4477198e6b0SRob Clark 		.destroy = a3xx_destroy,
448c0fec7f5SJordan Crouse #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
4494f776f45SJordan Crouse 		.show = adreno_show,
4507198e6b0SRob Clark #endif
451e00e473dSJordan Crouse 		.gpu_state_get = a3xx_gpu_state_get,
452e00e473dSJordan Crouse 		.gpu_state_put = adreno_gpu_state_put,
4537198e6b0SRob Clark 	},
4547198e6b0SRob Clark };
4557198e6b0SRob Clark 
45670c70f09SRob Clark static const struct msm_gpu_perfcntr perfcntrs[] = {
45770c70f09SRob Clark 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
45870c70f09SRob Clark 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
45970c70f09SRob Clark 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
46070c70f09SRob Clark 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
46170c70f09SRob Clark };
46270c70f09SRob Clark 
4637198e6b0SRob Clark struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
4647198e6b0SRob Clark {
4657198e6b0SRob Clark 	struct a3xx_gpu *a3xx_gpu = NULL;
46655459968SRob Clark 	struct adreno_gpu *adreno_gpu;
4677198e6b0SRob Clark 	struct msm_gpu *gpu;
468060530f1SRob Clark 	struct msm_drm_private *priv = dev->dev_private;
469060530f1SRob Clark 	struct platform_device *pdev = priv->gpu_pdev;
4707198e6b0SRob Clark 	int ret;
4717198e6b0SRob Clark 
4727198e6b0SRob Clark 	if (!pdev) {
4736a41da17SMamta Shukla 		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
4747198e6b0SRob Clark 		ret = -ENXIO;
4757198e6b0SRob Clark 		goto fail;
4767198e6b0SRob Clark 	}
4777198e6b0SRob Clark 
4787198e6b0SRob Clark 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
4797198e6b0SRob Clark 	if (!a3xx_gpu) {
4807198e6b0SRob Clark 		ret = -ENOMEM;
4817198e6b0SRob Clark 		goto fail;
4827198e6b0SRob Clark 	}
4837198e6b0SRob Clark 
48455459968SRob Clark 	adreno_gpu = &a3xx_gpu->base;
48555459968SRob Clark 	gpu = &adreno_gpu->base;
4867198e6b0SRob Clark 
48770c70f09SRob Clark 	gpu->perfcntrs = perfcntrs;
48870c70f09SRob Clark 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
48970c70f09SRob Clark 
4903bcefb04SRob Clark 	adreno_gpu->registers = a3xx_registers;
49191b74e97SAravind Ganesan 	adreno_gpu->reg_offsets = a3xx_register_offsets;
4923bcefb04SRob Clark 
493f97decacSJordan Crouse 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
4947198e6b0SRob Clark 	if (ret)
4957198e6b0SRob Clark 		goto fail;
4967198e6b0SRob Clark 
49755459968SRob Clark 	/* if needed, allocate gmem: */
49855459968SRob Clark 	if (adreno_is_a330(adreno_gpu)) {
49955459968SRob Clark #ifdef CONFIG_MSM_OCMEM
50055459968SRob Clark 		/* TODO this is different/missing upstream: */
50155459968SRob Clark 		struct ocmem_buf *ocmem_hdl =
50255459968SRob Clark 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
50355459968SRob Clark 
50455459968SRob Clark 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
50555459968SRob Clark 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
50655459968SRob Clark 		adreno_gpu->gmem = ocmem_hdl->len;
50755459968SRob Clark 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
50855459968SRob Clark 				a3xx_gpu->ocmem_base);
50955459968SRob Clark #endif
51055459968SRob Clark 	}
51155459968SRob Clark 
512667ce33eSRob Clark 	if (!gpu->aspace) {
513871d812aSRob Clark 		/* TODO we think it is possible to configure the GPU to
514871d812aSRob Clark 		 * restrict access to VRAM carveout.  But the required
515871d812aSRob Clark 		 * registers are unknown.  For now just bail out and
516871d812aSRob Clark 		 * limp along with just modesetting.  If it turns out
517871d812aSRob Clark 		 * to not be possible to restrict access, then we must
518871d812aSRob Clark 		 * implement a cmdstream validator.
519871d812aSRob Clark 		 */
5206a41da17SMamta Shukla 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
521871d812aSRob Clark 		ret = -ENXIO;
522871d812aSRob Clark 		goto fail;
523871d812aSRob Clark 	}
524871d812aSRob Clark 
525871d812aSRob Clark 	return gpu;
5267198e6b0SRob Clark 
5277198e6b0SRob Clark fail:
5287198e6b0SRob Clark 	if (a3xx_gpu)
5297198e6b0SRob Clark 		a3xx_destroy(&a3xx_gpu->base.base);
5307198e6b0SRob Clark 
5317198e6b0SRob Clark 	return ERR_PTR(ret);
5327198e6b0SRob Clark }
533