1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved. 3 */ 4 #include "a4xx_gpu.h" 5 6 #define A4XX_INT0_MASK \ 7 (A4XX_INT0_RBBM_AHB_ERROR | \ 8 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \ 9 A4XX_INT0_CP_T0_PACKET_IN_IB | \ 10 A4XX_INT0_CP_OPCODE_ERROR | \ 11 A4XX_INT0_CP_RESERVED_BIT_ERROR | \ 12 A4XX_INT0_CP_HW_FAULT | \ 13 A4XX_INT0_CP_IB1_INT | \ 14 A4XX_INT0_CP_IB2_INT | \ 15 A4XX_INT0_CP_RB_INT | \ 16 A4XX_INT0_CP_REG_PROTECT_FAULT | \ 17 A4XX_INT0_CP_AHB_ERROR_HALT | \ 18 A4XX_INT0_CACHE_FLUSH_TS | \ 19 A4XX_INT0_UCHE_OOB_ACCESS) 20 21 extern bool hang_debug; 22 static void a4xx_dump(struct msm_gpu *gpu); 23 static bool a4xx_idle(struct msm_gpu *gpu); 24 25 static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 26 { 27 struct msm_ringbuffer *ring = submit->ring; 28 unsigned int i; 29 30 for (i = 0; i < submit->nr_cmds; i++) { 31 switch (submit->cmd[i].type) { 32 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 33 /* ignore IB-targets */ 34 break; 35 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 36 /* ignore if there has not been a ctx switch: */ 37 if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno) 38 break; 39 fallthrough; 40 case MSM_SUBMIT_CMD_BUF: 41 OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2); 42 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 43 OUT_RING(ring, submit->cmd[i].size); 44 OUT_PKT2(ring); 45 break; 46 } 47 } 48 49 OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); 50 OUT_RING(ring, submit->seqno); 51 52 /* Flush HLSQ lazy updates to make sure there is nothing 53 * pending for indirect loads after the timestamp has 54 * passed: 55 */ 56 OUT_PKT3(ring, CP_EVENT_WRITE, 1); 57 OUT_RING(ring, HLSQ_FLUSH); 58 59 /* wait for idle before cache flush/interrupt */ 60 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); 61 OUT_RING(ring, 0x00000000); 62 63 /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ 64 OUT_PKT3(ring, CP_EVENT_WRITE, 3); 65 OUT_RING(ring, CACHE_FLUSH_TS | CP_EVENT_WRITE_0_IRQ); 66 OUT_RING(ring, rbmemptr(ring, fence)); 67 OUT_RING(ring, submit->seqno); 68 69 adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR); 70 } 71 72 /* 73 * a4xx_enable_hwcg() - Program the clock control registers 74 * @device: The adreno device pointer 75 */ 76 static void a4xx_enable_hwcg(struct msm_gpu *gpu) 77 { 78 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 79 unsigned int i; 80 for (i = 0; i < 4; i++) 81 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202); 82 for (i = 0; i < 4; i++) 83 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222); 84 for (i = 0; i < 4; i++) 85 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7); 86 for (i = 0; i < 4; i++) 87 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111); 88 for (i = 0; i < 4; i++) 89 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222); 90 for (i = 0; i < 4; i++) 91 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222); 92 for (i = 0; i < 4; i++) 93 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104); 94 for (i = 0; i < 4; i++) 95 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081); 96 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222); 97 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222); 98 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000); 99 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000); 100 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444); 101 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112); 102 for (i = 0; i < 4; i++) 103 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222); 104 105 /* Disable L1 clocking in A420 due to CCU issues with it */ 106 for (i = 0; i < 4; i++) { 107 if (adreno_is_a420(adreno_gpu)) { 108 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i), 109 0x00002020); 110 } else { 111 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i), 112 0x00022020); 113 } 114 } 115 116 /* No CCU for A405 */ 117 if (!adreno_is_a405(adreno_gpu)) { 118 for (i = 0; i < 4; i++) { 119 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i), 120 0x00000922); 121 } 122 123 for (i = 0; i < 4; i++) { 124 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i), 125 0x00000000); 126 } 127 128 for (i = 0; i < 4; i++) { 129 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i), 130 0x00000001); 131 } 132 } 133 134 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222); 135 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104); 136 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222); 137 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022); 138 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F); 139 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022); 140 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222); 141 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104); 142 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222); 143 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000); 144 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000); 145 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000); 146 /* Early A430's have a timing issue with SP/TP power collapse; 147 disabling HW clock gating prevents it. */ 148 if (adreno_is_a430(adreno_gpu) && adreno_patchid(adreno_gpu) < 2) 149 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0); 150 else 151 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); 152 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0); 153 } 154 155 156 static bool a4xx_me_init(struct msm_gpu *gpu) 157 { 158 struct msm_ringbuffer *ring = gpu->rb[0]; 159 160 OUT_PKT3(ring, CP_ME_INIT, 17); 161 OUT_RING(ring, 0x000003f7); 162 OUT_RING(ring, 0x00000000); 163 OUT_RING(ring, 0x00000000); 164 OUT_RING(ring, 0x00000000); 165 OUT_RING(ring, 0x00000080); 166 OUT_RING(ring, 0x00000100); 167 OUT_RING(ring, 0x00000180); 168 OUT_RING(ring, 0x00006600); 169 OUT_RING(ring, 0x00000150); 170 OUT_RING(ring, 0x0000014e); 171 OUT_RING(ring, 0x00000154); 172 OUT_RING(ring, 0x00000001); 173 OUT_RING(ring, 0x00000000); 174 OUT_RING(ring, 0x00000000); 175 OUT_RING(ring, 0x00000000); 176 OUT_RING(ring, 0x00000000); 177 OUT_RING(ring, 0x00000000); 178 179 adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR); 180 return a4xx_idle(gpu); 181 } 182 183 static int a4xx_hw_init(struct msm_gpu *gpu) 184 { 185 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 186 struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu); 187 uint32_t *ptr, len; 188 int i, ret; 189 190 if (adreno_is_a405(adreno_gpu)) { 191 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 192 } else if (adreno_is_a420(adreno_gpu)) { 193 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F); 194 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4); 195 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); 196 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); 197 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018); 198 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); 199 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); 200 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 201 } else if (adreno_is_a430(adreno_gpu)) { 202 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); 203 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); 204 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018); 205 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); 206 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); 207 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 208 } else { 209 BUG(); 210 } 211 212 /* Make all blocks contribute to the GPU BUSY perf counter */ 213 gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff); 214 215 /* Tune the hystersis counters for SP and CP idle detection */ 216 gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10); 217 gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); 218 219 if (adreno_is_a430(adreno_gpu)) { 220 gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30); 221 } 222 223 /* Enable the RBBM error reporting bits */ 224 gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001); 225 226 /* Enable AHB error reporting*/ 227 gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff); 228 229 /* Enable power counters*/ 230 gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030); 231 232 /* 233 * Turn on hang detection - this spews a lot of useful information 234 * into the RBBM registers on a hang: 235 */ 236 gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL, 237 (1 << 30) | 0xFFFF); 238 239 gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR, 240 (unsigned int)(a4xx_gpu->ocmem.base >> 14)); 241 242 /* Turn on performance counters: */ 243 gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01); 244 245 /* use the first CP counter for timestamp queries.. userspace may set 246 * this as well but it selects the same counter/countable: 247 */ 248 gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT); 249 250 if (adreno_is_a430(adreno_gpu)) 251 gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07); 252 253 /* Disable L2 bypass to avoid UCHE out of bounds errors */ 254 gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); 255 gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000); 256 257 gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | 258 (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0)); 259 260 /* On A430 enable SP regfile sleep for power savings */ 261 /* TODO downstream does this for !420, so maybe applies for 405 too? */ 262 if (!adreno_is_a420(adreno_gpu)) { 263 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0, 264 0x00000441); 265 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1, 266 0x00000441); 267 } 268 269 a4xx_enable_hwcg(gpu); 270 271 /* 272 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2 273 * due to timing issue with HLSQ_TP_CLK_EN 274 */ 275 if (adreno_is_a420(adreno_gpu)) { 276 unsigned int val; 277 val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ); 278 val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK; 279 val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT; 280 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val); 281 } 282 283 /* setup access protection: */ 284 gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007); 285 286 /* RBBM registers */ 287 gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010); 288 gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020); 289 gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040); 290 gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080); 291 gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100); 292 gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200); 293 294 /* CP registers */ 295 gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800); 296 gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600); 297 298 299 /* RB registers */ 300 gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300); 301 302 /* HLSQ registers */ 303 gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800); 304 305 /* VPC registers */ 306 gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980); 307 308 /* SMMU registers */ 309 gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000); 310 311 gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK); 312 313 ret = adreno_hw_init(gpu); 314 if (ret) 315 return ret; 316 317 /* 318 * Use the default ringbuffer size and block size but disable the RPTR 319 * shadow 320 */ 321 gpu_write(gpu, REG_A4XX_CP_RB_CNTL, 322 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); 323 324 /* Set the ringbuffer address */ 325 gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova)); 326 327 /* Load PM4: */ 328 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data); 329 len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4; 330 DBG("loading PM4 ucode version: %u", ptr[0]); 331 gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0); 332 for (i = 1; i < len; i++) 333 gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]); 334 335 /* Load PFP: */ 336 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data); 337 len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4; 338 DBG("loading PFP ucode version: %u", ptr[0]); 339 340 gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0); 341 for (i = 1; i < len; i++) 342 gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]); 343 344 /* clear ME_HALT to start micro engine */ 345 gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0); 346 347 return a4xx_me_init(gpu) ? 0 : -EINVAL; 348 } 349 350 static void a4xx_recover(struct msm_gpu *gpu) 351 { 352 int i; 353 354 adreno_dump_info(gpu); 355 356 for (i = 0; i < 8; i++) { 357 printk("CP_SCRATCH_REG%d: %u\n", i, 358 gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i)); 359 } 360 361 /* dump registers before resetting gpu, if enabled: */ 362 if (hang_debug) 363 a4xx_dump(gpu); 364 365 gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1); 366 gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD); 367 gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0); 368 adreno_recover(gpu); 369 } 370 371 static void a4xx_destroy(struct msm_gpu *gpu) 372 { 373 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 374 struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu); 375 376 DBG("%s", gpu->name); 377 378 adreno_gpu_cleanup(adreno_gpu); 379 380 adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem); 381 382 kfree(a4xx_gpu); 383 } 384 385 static bool a4xx_idle(struct msm_gpu *gpu) 386 { 387 /* wait for ringbuffer to drain: */ 388 if (!adreno_idle(gpu, gpu->rb[0])) 389 return false; 390 391 /* then wait for GPU to finish: */ 392 if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) & 393 A4XX_RBBM_STATUS_GPU_BUSY))) { 394 DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name); 395 /* TODO maybe we need to reset GPU here to recover from hang? */ 396 return false; 397 } 398 399 return true; 400 } 401 402 static irqreturn_t a4xx_irq(struct msm_gpu *gpu) 403 { 404 uint32_t status; 405 406 status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS); 407 DBG("%s: Int status %08x", gpu->name, status); 408 409 if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) { 410 uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS); 411 printk("CP | Protected mode error| %s | addr=%x\n", 412 reg & (1 << 24) ? "WRITE" : "READ", 413 (reg & 0xFFFFF) >> 2); 414 } 415 416 gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status); 417 418 msm_gpu_retire(gpu); 419 420 return IRQ_HANDLED; 421 } 422 423 static const unsigned int a4xx_registers[] = { 424 /* RBBM */ 425 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026, 426 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066, 427 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF, 428 /* CP */ 429 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B, 430 0x0578, 0x058F, 431 /* VSC */ 432 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51, 433 /* GRAS */ 434 0x0C80, 0x0C81, 0x0C88, 0x0C8F, 435 /* RB */ 436 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2, 437 /* PC */ 438 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23, 439 /* VFD */ 440 0x0E40, 0x0E4A, 441 /* VPC */ 442 0x0E60, 0x0E61, 0x0E63, 0x0E68, 443 /* UCHE */ 444 0x0E80, 0x0E84, 0x0E88, 0x0E95, 445 /* VMIDMT */ 446 0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A, 447 0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024, 448 0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104, 449 0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300, 450 0x1380, 0x1380, 451 /* GRAS CTX 0 */ 452 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E, 453 /* PC CTX 0 */ 454 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7, 455 /* VFD CTX 0 */ 456 0x2200, 0x2204, 0x2208, 0x22A9, 457 /* GRAS CTX 1 */ 458 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E, 459 /* PC CTX 1 */ 460 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7, 461 /* VFD CTX 1 */ 462 0x2600, 0x2604, 0x2608, 0x26A9, 463 /* XPU */ 464 0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20, 465 0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40, 466 0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95, 467 /* VBIF */ 468 0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022, 469 0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 470 0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 471 0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 472 0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 473 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 474 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 475 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 476 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C, 477 0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416, 478 0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436, 479 0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480, 480 0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004, 481 0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016, 482 0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200, 483 0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802, 484 0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816, 485 0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF, 486 0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925, 487 0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E, 488 0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00, 489 0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10, 490 0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60, 491 0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3, 492 0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B, 493 0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0, 494 0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6, 495 0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416, 496 0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780, 497 0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4, 498 0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F, 499 0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C, 500 0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9, 501 0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE, 502 ~0 /* sentinel */ 503 }; 504 505 static const unsigned int a405_registers[] = { 506 /* RBBM */ 507 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026, 508 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066, 509 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF, 510 /* CP */ 511 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B, 512 0x0578, 0x058F, 513 /* VSC */ 514 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51, 515 /* GRAS */ 516 0x0C80, 0x0C81, 0x0C88, 0x0C8F, 517 /* RB */ 518 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2, 519 /* PC */ 520 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23, 521 /* VFD */ 522 0x0E40, 0x0E4A, 523 /* VPC */ 524 0x0E60, 0x0E61, 0x0E63, 0x0E68, 525 /* UCHE */ 526 0x0E80, 0x0E84, 0x0E88, 0x0E95, 527 /* GRAS CTX 0 */ 528 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E, 529 /* PC CTX 0 */ 530 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7, 531 /* VFD CTX 0 */ 532 0x2200, 0x2204, 0x2208, 0x22A9, 533 /* GRAS CTX 1 */ 534 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E, 535 /* PC CTX 1 */ 536 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7, 537 /* VFD CTX 1 */ 538 0x2600, 0x2604, 0x2608, 0x26A9, 539 /* VBIF version 0x20050000*/ 540 0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036, 541 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049, 542 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D, 543 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098, 544 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0, 545 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108, 546 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125, 547 0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410, 548 ~0 /* sentinel */ 549 }; 550 551 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu) 552 { 553 struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL); 554 555 if (!state) 556 return ERR_PTR(-ENOMEM); 557 558 adreno_gpu_state_get(gpu, state); 559 560 state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS); 561 562 return state; 563 } 564 565 static void a4xx_dump(struct msm_gpu *gpu) 566 { 567 printk("status: %08x\n", 568 gpu_read(gpu, REG_A4XX_RBBM_STATUS)); 569 adreno_dump(gpu); 570 } 571 572 static int a4xx_pm_resume(struct msm_gpu *gpu) { 573 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 574 int ret; 575 576 ret = msm_gpu_pm_resume(gpu); 577 if (ret) 578 return ret; 579 580 if (adreno_is_a430(adreno_gpu)) { 581 unsigned int reg; 582 /* Set the default register values; set SW_COLLAPSE to 0 */ 583 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000); 584 do { 585 udelay(5); 586 reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS); 587 } while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON)); 588 } 589 return 0; 590 } 591 592 static int a4xx_pm_suspend(struct msm_gpu *gpu) { 593 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 594 int ret; 595 596 ret = msm_gpu_pm_suspend(gpu); 597 if (ret) 598 return ret; 599 600 if (adreno_is_a430(adreno_gpu)) { 601 /* Set the default register values; set SW_COLLAPSE to 1 */ 602 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001); 603 } 604 return 0; 605 } 606 607 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 608 { 609 *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO); 610 611 return 0; 612 } 613 614 static u64 a4xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 615 { 616 u64 busy_cycles; 617 618 busy_cycles = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_RBBM_1_LO); 619 *out_sample_rate = clk_get_rate(gpu->core_clk); 620 621 return busy_cycles; 622 } 623 624 static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 625 { 626 ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR); 627 return ring->memptrs->rptr; 628 } 629 630 static const struct adreno_gpu_funcs funcs = { 631 .base = { 632 .get_param = adreno_get_param, 633 .set_param = adreno_set_param, 634 .hw_init = a4xx_hw_init, 635 .pm_suspend = a4xx_pm_suspend, 636 .pm_resume = a4xx_pm_resume, 637 .recover = a4xx_recover, 638 .submit = a4xx_submit, 639 .active_ring = adreno_active_ring, 640 .irq = a4xx_irq, 641 .destroy = a4xx_destroy, 642 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 643 .show = adreno_show, 644 #endif 645 .gpu_busy = a4xx_gpu_busy, 646 .gpu_state_get = a4xx_gpu_state_get, 647 .gpu_state_put = adreno_gpu_state_put, 648 .create_address_space = adreno_create_address_space, 649 .get_rptr = a4xx_get_rptr, 650 }, 651 .get_timestamp = a4xx_get_timestamp, 652 }; 653 654 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) 655 { 656 struct a4xx_gpu *a4xx_gpu = NULL; 657 struct adreno_gpu *adreno_gpu; 658 struct msm_gpu *gpu; 659 struct msm_drm_private *priv = dev->dev_private; 660 struct platform_device *pdev = priv->gpu_pdev; 661 struct icc_path *ocmem_icc_path; 662 struct icc_path *icc_path; 663 int ret; 664 665 if (!pdev) { 666 DRM_DEV_ERROR(dev->dev, "no a4xx device\n"); 667 ret = -ENXIO; 668 goto fail; 669 } 670 671 a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL); 672 if (!a4xx_gpu) { 673 ret = -ENOMEM; 674 goto fail; 675 } 676 677 adreno_gpu = &a4xx_gpu->base; 678 gpu = &adreno_gpu->base; 679 680 gpu->perfcntrs = NULL; 681 gpu->num_perfcntrs = 0; 682 683 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); 684 if (ret) 685 goto fail; 686 687 adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers : 688 a4xx_registers; 689 690 /* if needed, allocate gmem: */ 691 ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu, 692 &a4xx_gpu->ocmem); 693 if (ret) 694 goto fail; 695 696 if (!gpu->aspace) { 697 /* TODO we think it is possible to configure the GPU to 698 * restrict access to VRAM carveout. But the required 699 * registers are unknown. For now just bail out and 700 * limp along with just modesetting. If it turns out 701 * to not be possible to restrict access, then we must 702 * implement a cmdstream validator. 703 */ 704 DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); 705 if (!allow_vram_carveout) { 706 ret = -ENXIO; 707 goto fail; 708 } 709 } 710 711 icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); 712 if (IS_ERR(icc_path)) { 713 ret = PTR_ERR(icc_path); 714 goto fail; 715 } 716 717 ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem"); 718 if (IS_ERR(ocmem_icc_path)) { 719 ret = PTR_ERR(ocmem_icc_path); 720 /* allow -ENODATA, ocmem icc is optional */ 721 if (ret != -ENODATA) 722 goto fail; 723 ocmem_icc_path = NULL; 724 } 725 726 /* 727 * Set the ICC path to maximum speed for now by multiplying the fastest 728 * frequency by the bus width (8). We'll want to scale this later on to 729 * improve battery life. 730 */ 731 icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); 732 icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); 733 734 return gpu; 735 736 fail: 737 if (a4xx_gpu) 738 a4xx_destroy(&a4xx_gpu->base.base); 739 740 return ERR_PTR(ret); 741 } 742