1caab277bSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 27198e6b0SRob Clark /* 37198e6b0SRob Clark * Copyright (C) 2013 Red Hat 47198e6b0SRob Clark * Author: Rob Clark <robdclark@gmail.com> 57198e6b0SRob Clark * 691b74e97SAravind Ganesan * Copyright (c) 2014 The Linux Foundation. All rights reserved. 77198e6b0SRob Clark */ 87198e6b0SRob Clark 97198e6b0SRob Clark #include "a3xx_gpu.h" 107198e6b0SRob Clark 117198e6b0SRob Clark #define A3XX_INT0_MASK \ 127198e6b0SRob Clark (A3XX_INT0_RBBM_AHB_ERROR | \ 137198e6b0SRob Clark A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \ 147198e6b0SRob Clark A3XX_INT0_CP_T0_PACKET_IN_IB | \ 157198e6b0SRob Clark A3XX_INT0_CP_OPCODE_ERROR | \ 167198e6b0SRob Clark A3XX_INT0_CP_RESERVED_BIT_ERROR | \ 177198e6b0SRob Clark A3XX_INT0_CP_HW_FAULT | \ 187198e6b0SRob Clark A3XX_INT0_CP_IB1_INT | \ 197198e6b0SRob Clark A3XX_INT0_CP_IB2_INT | \ 207198e6b0SRob Clark A3XX_INT0_CP_RB_INT | \ 217198e6b0SRob Clark A3XX_INT0_CP_REG_PROTECT_FAULT | \ 227198e6b0SRob Clark A3XX_INT0_CP_AHB_ERROR_HALT | \ 2379d57bf6SBjorn Andersson A3XX_INT0_CACHE_FLUSH_TS | \ 247198e6b0SRob Clark A3XX_INT0_UCHE_OOB_ACCESS) 257198e6b0SRob Clark 263526e9fbSRob Clark extern bool hang_debug; 275b6ef08eSRob Clark 285b6ef08eSRob Clark static void a3xx_dump(struct msm_gpu *gpu); 29e895c7bdSJordan Crouse static bool a3xx_idle(struct msm_gpu *gpu); 305b6ef08eSRob Clark 31c4a8d475SJordan Crouse static bool a3xx_me_init(struct msm_gpu *gpu) 327198e6b0SRob Clark { 33f97decacSJordan Crouse struct msm_ringbuffer *ring = gpu->rb[0]; 347198e6b0SRob Clark 357198e6b0SRob Clark OUT_PKT3(ring, CP_ME_INIT, 17); 367198e6b0SRob Clark OUT_RING(ring, 0x000003f7); 377198e6b0SRob Clark OUT_RING(ring, 0x00000000); 387198e6b0SRob Clark OUT_RING(ring, 0x00000000); 397198e6b0SRob Clark OUT_RING(ring, 0x00000000); 407198e6b0SRob Clark OUT_RING(ring, 0x00000080); 417198e6b0SRob Clark OUT_RING(ring, 0x00000100); 427198e6b0SRob Clark OUT_RING(ring, 0x00000180); 437198e6b0SRob Clark OUT_RING(ring, 0x00006600); 447198e6b0SRob Clark OUT_RING(ring, 0x00000150); 457198e6b0SRob Clark OUT_RING(ring, 0x0000014e); 467198e6b0SRob Clark OUT_RING(ring, 0x00000154); 477198e6b0SRob Clark OUT_RING(ring, 0x00000001); 487198e6b0SRob Clark OUT_RING(ring, 0x00000000); 497198e6b0SRob Clark OUT_RING(ring, 0x00000000); 507198e6b0SRob Clark OUT_RING(ring, 0x00000000); 517198e6b0SRob Clark OUT_RING(ring, 0x00000000); 527198e6b0SRob Clark OUT_RING(ring, 0x00000000); 537198e6b0SRob Clark 54f97decacSJordan Crouse gpu->funcs->flush(gpu, ring); 55e895c7bdSJordan Crouse return a3xx_idle(gpu); 567198e6b0SRob Clark } 577198e6b0SRob Clark 587198e6b0SRob Clark static int a3xx_hw_init(struct msm_gpu *gpu) 597198e6b0SRob Clark { 607198e6b0SRob Clark struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 6155459968SRob Clark struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu); 627198e6b0SRob Clark uint32_t *ptr, len; 637198e6b0SRob Clark int i, ret; 647198e6b0SRob Clark 657198e6b0SRob Clark DBG("%s", gpu->name); 667198e6b0SRob Clark 677198e6b0SRob Clark if (adreno_is_a305(adreno_gpu)) { 687198e6b0SRob Clark /* Set up 16 deep read/write request queues: */ 697198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010); 707198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010); 717198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010); 727198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010); 737198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); 747198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010); 757198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010); 767198e6b0SRob Clark /* Enable WR-REQ: */ 777198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff); 787198e6b0SRob Clark /* Set up round robin arbitration between both AXI ports: */ 797198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); 807198e6b0SRob Clark /* Set up AOOO: */ 817198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c); 827198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c); 83de558cd2SRob Clark } else if (adreno_is_a306(adreno_gpu)) { 84de558cd2SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003); 85de558cd2SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a); 86de558cd2SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a); 877198e6b0SRob Clark } else if (adreno_is_a320(adreno_gpu)) { 887198e6b0SRob Clark /* Set up 16 deep read/write request queues: */ 897198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010); 907198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010); 917198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010); 927198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010); 937198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); 947198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010); 957198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010); 967198e6b0SRob Clark /* Enable WR-REQ: */ 977198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff); 987198e6b0SRob Clark /* Set up round robin arbitration between both AXI ports: */ 997198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); 1007198e6b0SRob Clark /* Set up AOOO: */ 1017198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c); 1027198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c); 1037198e6b0SRob Clark /* Enable 1K sort: */ 1047198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff); 1057198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); 1067198e6b0SRob Clark 10755459968SRob Clark } else if (adreno_is_a330v2(adreno_gpu)) { 10855459968SRob Clark /* 10955459968SRob Clark * Most of the VBIF registers on 8974v2 have the correct 11055459968SRob Clark * values at power on, so we won't modify those if we don't 11155459968SRob Clark * need to 11255459968SRob Clark */ 11355459968SRob Clark /* Enable 1k sort: */ 11455459968SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f); 11555459968SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); 11655459968SRob Clark /* Enable WR-REQ: */ 11755459968SRob Clark gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f); 11855459968SRob Clark gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); 11955459968SRob Clark /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */ 12055459968SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003); 12155459968SRob Clark 1227198e6b0SRob Clark } else if (adreno_is_a330(adreno_gpu)) { 1237198e6b0SRob Clark /* Set up 16 deep read/write request queues: */ 1247198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); 1257198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818); 1267198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818); 1277198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818); 1287198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); 1297198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); 1307198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818); 1317198e6b0SRob Clark /* Enable WR-REQ: */ 1327198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f); 1337198e6b0SRob Clark /* Set up round robin arbitration between both AXI ports: */ 1347198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); 1357198e6b0SRob Clark /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */ 1367198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001); 1377198e6b0SRob Clark /* Set up AOOO: */ 13855459968SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f); 13955459968SRob Clark gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f); 1407198e6b0SRob Clark /* Enable 1K sort: */ 14155459968SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f); 1427198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); 1437198e6b0SRob Clark /* Disable VBIF clock gating. This is to enable AXI running 1447198e6b0SRob Clark * higher frequency than GPU: 1457198e6b0SRob Clark */ 1467198e6b0SRob Clark gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001); 1477198e6b0SRob Clark 1487198e6b0SRob Clark } else { 1497198e6b0SRob Clark BUG(); 1507198e6b0SRob Clark } 1517198e6b0SRob Clark 1527198e6b0SRob Clark /* Make all blocks contribute to the GPU BUSY perf counter: */ 1537198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff); 1547198e6b0SRob Clark 1557198e6b0SRob Clark /* Tune the hystersis counters for SP and CP idle detection: */ 1567198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10); 1577198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); 1587198e6b0SRob Clark 1597198e6b0SRob Clark /* Enable the RBBM error reporting bits. This lets us get 1607198e6b0SRob Clark * useful information on failure: 1617198e6b0SRob Clark */ 1627198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001); 1637198e6b0SRob Clark 1647198e6b0SRob Clark /* Enable AHB error reporting: */ 1657198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff); 1667198e6b0SRob Clark 1677198e6b0SRob Clark /* Turn on the power counters: */ 1687198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000); 1697198e6b0SRob Clark 1707198e6b0SRob Clark /* Turn on hang detection - this spews a lot of useful information 1717198e6b0SRob Clark * into the RBBM registers on a hang: 1727198e6b0SRob Clark */ 1737198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff); 1747198e6b0SRob Clark 1757198e6b0SRob Clark /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */ 1767198e6b0SRob Clark gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001); 1777198e6b0SRob Clark 1787198e6b0SRob Clark /* Enable Clock gating: */ 179de558cd2SRob Clark if (adreno_is_a306(adreno_gpu)) 180de558cd2SRob Clark gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa); 181de558cd2SRob Clark else if (adreno_is_a320(adreno_gpu)) 1827198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff); 18355459968SRob Clark else if (adreno_is_a330v2(adreno_gpu)) 18455459968SRob Clark gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa); 18555459968SRob Clark else if (adreno_is_a330(adreno_gpu)) 18655459968SRob Clark gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff); 1877198e6b0SRob Clark 18855459968SRob Clark if (adreno_is_a330v2(adreno_gpu)) 18955459968SRob Clark gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455); 19055459968SRob Clark else if (adreno_is_a330(adreno_gpu)) 19155459968SRob Clark gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000); 19255459968SRob Clark 19355459968SRob Clark /* Set the OCMEM base address for A330, etc */ 19426c0b26dSBrian Masney if (a3xx_gpu->ocmem.hdl) { 19555459968SRob Clark gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, 19626c0b26dSBrian Masney (unsigned int)(a3xx_gpu->ocmem.base >> 14)); 19755459968SRob Clark } 1987198e6b0SRob Clark 1997198e6b0SRob Clark /* Turn on performance counters: */ 2007198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); 2017198e6b0SRob Clark 20270c70f09SRob Clark /* Enable the perfcntrs that we use.. */ 20370c70f09SRob Clark for (i = 0; i < gpu->num_perfcntrs; i++) { 20470c70f09SRob Clark const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; 20570c70f09SRob Clark gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val); 20670c70f09SRob Clark } 2077198e6b0SRob Clark 2087198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); 2097198e6b0SRob Clark 2107198e6b0SRob Clark ret = adreno_hw_init(gpu); 2117198e6b0SRob Clark if (ret) 2127198e6b0SRob Clark return ret; 2137198e6b0SRob Clark 2147198e6b0SRob Clark /* setup access protection: */ 2157198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007); 2167198e6b0SRob Clark 2177198e6b0SRob Clark /* RBBM registers */ 2187198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040); 2197198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080); 2207198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc); 2217198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108); 2227198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140); 2237198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400); 2247198e6b0SRob Clark 2257198e6b0SRob Clark /* CP registers */ 2267198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700); 2277198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8); 2287198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0); 2297198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178); 2307198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180); 2317198e6b0SRob Clark 2327198e6b0SRob Clark /* RB registers */ 2337198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300); 2347198e6b0SRob Clark 2357198e6b0SRob Clark /* VBIF registers */ 2367198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000); 2377198e6b0SRob Clark 2387198e6b0SRob Clark /* NOTE: PM4/micro-engine firmware registers look to be the same 2397198e6b0SRob Clark * for a2xx and a3xx.. we could possibly push that part down to 2407198e6b0SRob Clark * adreno_gpu base class. Or push both PM4 and PFP but 2417198e6b0SRob Clark * parameterize the pfp ucode addr/data registers.. 2427198e6b0SRob Clark */ 2437198e6b0SRob Clark 2447198e6b0SRob Clark /* Load PM4: */ 245c5e3548cSJordan Crouse ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data); 246c5e3548cSJordan Crouse len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4; 247e529c7e6SRob Clark DBG("loading PM4 ucode version: %x", ptr[1]); 2487198e6b0SRob Clark 2497198e6b0SRob Clark gpu_write(gpu, REG_AXXX_CP_DEBUG, 2507198e6b0SRob Clark AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE | 2517198e6b0SRob Clark AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE); 2527198e6b0SRob Clark gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0); 2537198e6b0SRob Clark for (i = 1; i < len; i++) 2547198e6b0SRob Clark gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]); 2557198e6b0SRob Clark 2567198e6b0SRob Clark /* Load PFP: */ 257c5e3548cSJordan Crouse ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data); 258c5e3548cSJordan Crouse len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4; 259e529c7e6SRob Clark DBG("loading PFP ucode version: %x", ptr[5]); 2607198e6b0SRob Clark 2617198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0); 2627198e6b0SRob Clark for (i = 1; i < len; i++) 2637198e6b0SRob Clark gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]); 2647198e6b0SRob Clark 2657198e6b0SRob Clark /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ 266de558cd2SRob Clark if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) || 267de558cd2SRob Clark adreno_is_a320(adreno_gpu)) { 2687198e6b0SRob Clark gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 2697198e6b0SRob Clark AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) | 2707198e6b0SRob Clark AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) | 2717198e6b0SRob Clark AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14)); 27255459968SRob Clark } else if (adreno_is_a330(adreno_gpu)) { 27355459968SRob Clark /* NOTE: this (value take from downstream android driver) 27455459968SRob Clark * includes some bits outside of the known bitfields. But 27555459968SRob Clark * A330 has this "MERCIU queue" thing too, which might 27655459968SRob Clark * explain a new bitfield or reshuffling: 27755459968SRob Clark */ 27855459968SRob Clark gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008); 27955459968SRob Clark } 2807198e6b0SRob Clark 2817198e6b0SRob Clark /* clear ME_HALT to start micro engine */ 2827198e6b0SRob Clark gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0); 2837198e6b0SRob Clark 284c4a8d475SJordan Crouse return a3xx_me_init(gpu) ? 0 : -EINVAL; 2857198e6b0SRob Clark } 2867198e6b0SRob Clark 28755459968SRob Clark static void a3xx_recover(struct msm_gpu *gpu) 28855459968SRob Clark { 289398efc46SRob Clark int i; 290398efc46SRob Clark 29126716185SRob Clark adreno_dump_info(gpu); 29226716185SRob Clark 293398efc46SRob Clark for (i = 0; i < 8; i++) { 294398efc46SRob Clark printk("CP_SCRATCH_REG%d: %u\n", i, 295398efc46SRob Clark gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i)); 296398efc46SRob Clark } 297398efc46SRob Clark 2985b6ef08eSRob Clark /* dump registers before resetting gpu, if enabled: */ 2995b6ef08eSRob Clark if (hang_debug) 3005b6ef08eSRob Clark a3xx_dump(gpu); 30126716185SRob Clark 30255459968SRob Clark gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1); 30355459968SRob Clark gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD); 30455459968SRob Clark gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0); 30555459968SRob Clark adreno_recover(gpu); 30655459968SRob Clark } 30755459968SRob Clark 3087198e6b0SRob Clark static void a3xx_destroy(struct msm_gpu *gpu) 3097198e6b0SRob Clark { 3107198e6b0SRob Clark struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 3117198e6b0SRob Clark struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu); 3127198e6b0SRob Clark 3137198e6b0SRob Clark DBG("%s", gpu->name); 3147198e6b0SRob Clark 3157198e6b0SRob Clark adreno_gpu_cleanup(adreno_gpu); 31655459968SRob Clark 31726c0b26dSBrian Masney adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem); 31855459968SRob Clark 3197198e6b0SRob Clark kfree(a3xx_gpu); 3207198e6b0SRob Clark } 3217198e6b0SRob Clark 322c4a8d475SJordan Crouse static bool a3xx_idle(struct msm_gpu *gpu) 3237198e6b0SRob Clark { 3247198e6b0SRob Clark /* wait for ringbuffer to drain: */ 325f97decacSJordan Crouse if (!adreno_idle(gpu, gpu->rb[0])) 326c4a8d475SJordan Crouse return false; 3277198e6b0SRob Clark 3287198e6b0SRob Clark /* then wait for GPU to finish: */ 3290963756fSRob Clark if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) & 330c4a8d475SJordan Crouse A3XX_RBBM_STATUS_GPU_BUSY))) { 3310963756fSRob Clark DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name); 3327198e6b0SRob Clark 3337198e6b0SRob Clark /* TODO maybe we need to reset GPU here to recover from hang? */ 334c4a8d475SJordan Crouse return false; 335c4a8d475SJordan Crouse } 336c4a8d475SJordan Crouse 337c4a8d475SJordan Crouse return true; 3387198e6b0SRob Clark } 3397198e6b0SRob Clark 3407198e6b0SRob Clark static irqreturn_t a3xx_irq(struct msm_gpu *gpu) 3417198e6b0SRob Clark { 3427198e6b0SRob Clark uint32_t status; 3437198e6b0SRob Clark 3447198e6b0SRob Clark status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS); 3457198e6b0SRob Clark DBG("%s: %08x", gpu->name, status); 3467198e6b0SRob Clark 3477198e6b0SRob Clark // TODO 3487198e6b0SRob Clark 3497198e6b0SRob Clark gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status); 3507198e6b0SRob Clark 3517198e6b0SRob Clark msm_gpu_retire(gpu); 3527198e6b0SRob Clark 3537198e6b0SRob Clark return IRQ_HANDLED; 3547198e6b0SRob Clark } 3557198e6b0SRob Clark 3567198e6b0SRob Clark static const unsigned int a3xx_registers[] = { 3577198e6b0SRob Clark 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, 3587198e6b0SRob Clark 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, 3597198e6b0SRob Clark 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5, 3607198e6b0SRob Clark 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1, 3617198e6b0SRob Clark 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd, 3627198e6b0SRob Clark 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff, 3637198e6b0SRob Clark 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f, 3647198e6b0SRob Clark 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f, 3657198e6b0SRob Clark 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e, 3667198e6b0SRob Clark 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f, 3677198e6b0SRob Clark 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7, 3687198e6b0SRob Clark 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05, 3697198e6b0SRob Clark 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65, 3707198e6b0SRob Clark 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7, 3717198e6b0SRob Clark 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09, 3727198e6b0SRob Clark 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069, 3737198e6b0SRob Clark 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075, 3747198e6b0SRob Clark 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109, 3757198e6b0SRob Clark 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115, 3767198e6b0SRob Clark 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0, 3777198e6b0SRob Clark 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e, 3787198e6b0SRob Clark 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, 3797198e6b0SRob Clark 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, 380f47bee2bSRob Clark 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444, 381f47bee2bSRob Clark 0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 382f47bee2bSRob Clark 0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 383f47bee2bSRob Clark 0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 384f47bee2bSRob Clark 0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 385f47bee2bSRob Clark 0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 386f47bee2bSRob Clark 0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 387f47bee2bSRob Clark 0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 388f47bee2bSRob Clark 0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 389f47bee2bSRob Clark 0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d, 390f47bee2bSRob Clark 0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f, 3913bcefb04SRob Clark ~0 /* sentinel */ 3927198e6b0SRob Clark }; 3937198e6b0SRob Clark 3945b6ef08eSRob Clark /* would be nice to not have to duplicate the _show() stuff with printk(): */ 3955b6ef08eSRob Clark static void a3xx_dump(struct msm_gpu *gpu) 3965b6ef08eSRob Clark { 3975b6ef08eSRob Clark printk("status: %08x\n", 3985b6ef08eSRob Clark gpu_read(gpu, REG_A3XX_RBBM_STATUS)); 3993bcefb04SRob Clark adreno_dump(gpu); 4005b6ef08eSRob Clark } 401e00e473dSJordan Crouse 402e00e473dSJordan Crouse static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu) 403e00e473dSJordan Crouse { 40450f8d218SJordan Crouse struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL); 405e00e473dSJordan Crouse 40650f8d218SJordan Crouse if (!state) 40750f8d218SJordan Crouse return ERR_PTR(-ENOMEM); 40850f8d218SJordan Crouse 40950f8d218SJordan Crouse adreno_gpu_state_get(gpu, state); 410e00e473dSJordan Crouse 411e00e473dSJordan Crouse state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS); 412e00e473dSJordan Crouse 413e00e473dSJordan Crouse return state; 414e00e473dSJordan Crouse } 415e00e473dSJordan Crouse 41691b74e97SAravind Ganesan /* Register offset defines for A3XX */ 41791b74e97SAravind Ganesan static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { 41891b74e97SAravind Ganesan REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE), 419fb039981SJordan Crouse REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI), 42091b74e97SAravind Ganesan REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR), 421fb039981SJordan Crouse REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI), 42291b74e97SAravind Ganesan REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR), 42391b74e97SAravind Ganesan REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR), 42491b74e97SAravind Ganesan REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL), 42591b74e97SAravind Ganesan }; 4265b6ef08eSRob Clark 4277198e6b0SRob Clark static const struct adreno_gpu_funcs funcs = { 4287198e6b0SRob Clark .base = { 4297198e6b0SRob Clark .get_param = adreno_get_param, 4307198e6b0SRob Clark .hw_init = a3xx_hw_init, 4317198e6b0SRob Clark .pm_suspend = msm_gpu_pm_suspend, 4327198e6b0SRob Clark .pm_resume = msm_gpu_pm_resume, 43355459968SRob Clark .recover = a3xx_recover, 4347198e6b0SRob Clark .submit = adreno_submit, 4357198e6b0SRob Clark .flush = adreno_flush, 436f97decacSJordan Crouse .active_ring = adreno_active_ring, 4377198e6b0SRob Clark .irq = a3xx_irq, 4387198e6b0SRob Clark .destroy = a3xx_destroy, 439c0fec7f5SJordan Crouse #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 4404f776f45SJordan Crouse .show = adreno_show, 4417198e6b0SRob Clark #endif 442e00e473dSJordan Crouse .gpu_state_get = a3xx_gpu_state_get, 443e00e473dSJordan Crouse .gpu_state_put = adreno_gpu_state_put, 444*ccac7ce3SJordan Crouse .create_address_space = adreno_iommu_create_address_space, 4457198e6b0SRob Clark }, 4467198e6b0SRob Clark }; 4477198e6b0SRob Clark 44870c70f09SRob Clark static const struct msm_gpu_perfcntr perfcntrs[] = { 44970c70f09SRob Clark { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO, 45070c70f09SRob Clark SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" }, 45170c70f09SRob Clark { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO, 45270c70f09SRob Clark SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" }, 45370c70f09SRob Clark }; 45470c70f09SRob Clark 4557198e6b0SRob Clark struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) 4567198e6b0SRob Clark { 4577198e6b0SRob Clark struct a3xx_gpu *a3xx_gpu = NULL; 45855459968SRob Clark struct adreno_gpu *adreno_gpu; 4597198e6b0SRob Clark struct msm_gpu *gpu; 460060530f1SRob Clark struct msm_drm_private *priv = dev->dev_private; 461060530f1SRob Clark struct platform_device *pdev = priv->gpu_pdev; 4627198e6b0SRob Clark int ret; 4637198e6b0SRob Clark 4647198e6b0SRob Clark if (!pdev) { 4656a41da17SMamta Shukla DRM_DEV_ERROR(dev->dev, "no a3xx device\n"); 4667198e6b0SRob Clark ret = -ENXIO; 4677198e6b0SRob Clark goto fail; 4687198e6b0SRob Clark } 4697198e6b0SRob Clark 4707198e6b0SRob Clark a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL); 4717198e6b0SRob Clark if (!a3xx_gpu) { 4727198e6b0SRob Clark ret = -ENOMEM; 4737198e6b0SRob Clark goto fail; 4747198e6b0SRob Clark } 4757198e6b0SRob Clark 47655459968SRob Clark adreno_gpu = &a3xx_gpu->base; 47755459968SRob Clark gpu = &adreno_gpu->base; 4787198e6b0SRob Clark 47970c70f09SRob Clark gpu->perfcntrs = perfcntrs; 48070c70f09SRob Clark gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); 48170c70f09SRob Clark 4823bcefb04SRob Clark adreno_gpu->registers = a3xx_registers; 48391b74e97SAravind Ganesan adreno_gpu->reg_offsets = a3xx_register_offsets; 4843bcefb04SRob Clark 485f97decacSJordan Crouse ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); 4867198e6b0SRob Clark if (ret) 4877198e6b0SRob Clark goto fail; 4887198e6b0SRob Clark 48955459968SRob Clark /* if needed, allocate gmem: */ 49055459968SRob Clark if (adreno_is_a330(adreno_gpu)) { 49126c0b26dSBrian Masney ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev, 49226c0b26dSBrian Masney adreno_gpu, &a3xx_gpu->ocmem); 49326c0b26dSBrian Masney if (ret) 49426c0b26dSBrian Masney goto fail; 49555459968SRob Clark } 49655459968SRob Clark 497667ce33eSRob Clark if (!gpu->aspace) { 498871d812aSRob Clark /* TODO we think it is possible to configure the GPU to 499871d812aSRob Clark * restrict access to VRAM carveout. But the required 500871d812aSRob Clark * registers are unknown. For now just bail out and 501871d812aSRob Clark * limp along with just modesetting. If it turns out 502871d812aSRob Clark * to not be possible to restrict access, then we must 503871d812aSRob Clark * implement a cmdstream validator. 504871d812aSRob Clark */ 5056a41da17SMamta Shukla DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); 506871d812aSRob Clark ret = -ENXIO; 507871d812aSRob Clark goto fail; 508871d812aSRob Clark } 509871d812aSRob Clark 510d163ba0bSBrian Masney /* 511d163ba0bSBrian Masney * Set the ICC path to maximum speed for now by multiplying the fastest 512d163ba0bSBrian Masney * frequency by the bus width (8). We'll want to scale this later on to 513d163ba0bSBrian Masney * improve battery life. 514d163ba0bSBrian Masney */ 515d163ba0bSBrian Masney icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); 516d163ba0bSBrian Masney icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); 517d163ba0bSBrian Masney 518871d812aSRob Clark return gpu; 5197198e6b0SRob Clark 5207198e6b0SRob Clark fail: 5217198e6b0SRob Clark if (a3xx_gpu) 5227198e6b0SRob Clark a3xx_destroy(&a3xx_gpu->base.base); 5237198e6b0SRob Clark 5247198e6b0SRob Clark return ERR_PTR(ret); 5257198e6b0SRob Clark } 526