1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. 3 */ 4 5 #include <linux/kernel.h> 6 #include <linux/types.h> 7 #include <linux/cpumask.h> 8 #include <linux/qcom_scm.h> 9 #include <linux/pm_opp.h> 10 #include <linux/nvmem-consumer.h> 11 #include <linux/slab.h> 12 #include "msm_gem.h" 13 #include "msm_mmu.h" 14 #include "a5xx_gpu.h" 15 16 extern bool hang_debug; 17 static void a5xx_dump(struct msm_gpu *gpu); 18 19 #define GPU_PAS_ID 13 20 21 void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, 22 bool sync) 23 { 24 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 25 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 26 uint32_t wptr; 27 unsigned long flags; 28 29 /* 30 * Most flush operations need to issue a WHERE_AM_I opcode to sync up 31 * the rptr shadow 32 */ 33 if (a5xx_gpu->has_whereami && sync) { 34 OUT_PKT7(ring, CP_WHERE_AM_I, 2); 35 OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring))); 36 OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring))); 37 } 38 39 spin_lock_irqsave(&ring->preempt_lock, flags); 40 41 /* Copy the shadow to the actual register */ 42 ring->cur = ring->next; 43 44 /* Make sure to wrap wptr if we need to */ 45 wptr = get_wptr(ring); 46 47 spin_unlock_irqrestore(&ring->preempt_lock, flags); 48 49 /* Make sure everything is posted before making a decision */ 50 mb(); 51 52 /* Update HW if this is the current ring and we are not in preempt */ 53 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) 54 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); 55 } 56 57 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit) 58 { 59 struct msm_drm_private *priv = gpu->dev->dev_private; 60 struct msm_ringbuffer *ring = submit->ring; 61 struct msm_gem_object *obj; 62 uint32_t *ptr, dwords; 63 unsigned int i; 64 65 for (i = 0; i < submit->nr_cmds; i++) { 66 switch (submit->cmd[i].type) { 67 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 68 break; 69 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 70 if (priv->lastctx == submit->queue->ctx) 71 break; 72 fallthrough; 73 case MSM_SUBMIT_CMD_BUF: 74 /* copy commands into RB: */ 75 obj = submit->bos[submit->cmd[i].idx].obj; 76 dwords = submit->cmd[i].size; 77 78 ptr = msm_gem_get_vaddr(&obj->base); 79 80 /* _get_vaddr() shouldn't fail at this point, 81 * since we've already mapped it once in 82 * submit_reloc() 83 */ 84 if (WARN_ON(!ptr)) 85 return; 86 87 for (i = 0; i < dwords; i++) { 88 /* normally the OUT_PKTn() would wait 89 * for space for the packet. But since 90 * we just OUT_RING() the whole thing, 91 * need to call adreno_wait_ring() 92 * ourself: 93 */ 94 adreno_wait_ring(ring, 1); 95 OUT_RING(ring, ptr[i]); 96 } 97 98 msm_gem_put_vaddr(&obj->base); 99 100 break; 101 } 102 } 103 104 a5xx_flush(gpu, ring, true); 105 a5xx_preempt_trigger(gpu); 106 107 /* we might not necessarily have a cmd from userspace to 108 * trigger an event to know that submit has completed, so 109 * do this manually: 110 */ 111 a5xx_idle(gpu, ring); 112 ring->memptrs->fence = submit->seqno; 113 msm_gpu_retire(gpu); 114 } 115 116 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 117 { 118 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 119 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 120 struct msm_drm_private *priv = gpu->dev->dev_private; 121 struct msm_ringbuffer *ring = submit->ring; 122 unsigned int i, ibs = 0; 123 124 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) { 125 priv->lastctx = NULL; 126 a5xx_submit_in_rb(gpu, submit); 127 return; 128 } 129 130 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 131 OUT_RING(ring, 0x02); 132 133 /* Turn off protected mode to write to special registers */ 134 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 135 OUT_RING(ring, 0); 136 137 /* Set the save preemption record for the ring/command */ 138 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 139 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 140 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 141 142 /* Turn back on protected mode */ 143 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 144 OUT_RING(ring, 1); 145 146 /* Enable local preemption for finegrain preemption */ 147 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 148 OUT_RING(ring, 0x02); 149 150 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ 151 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 152 OUT_RING(ring, 0x02); 153 154 /* Submit the commands */ 155 for (i = 0; i < submit->nr_cmds; i++) { 156 switch (submit->cmd[i].type) { 157 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 158 break; 159 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 160 if (priv->lastctx == submit->queue->ctx) 161 break; 162 fallthrough; 163 case MSM_SUBMIT_CMD_BUF: 164 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 165 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 166 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 167 OUT_RING(ring, submit->cmd[i].size); 168 ibs++; 169 break; 170 } 171 } 172 173 /* 174 * Write the render mode to NULL (0) to indicate to the CP that the IBs 175 * are done rendering - otherwise a lucky preemption would start 176 * replaying from the last checkpoint 177 */ 178 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); 179 OUT_RING(ring, 0); 180 OUT_RING(ring, 0); 181 OUT_RING(ring, 0); 182 OUT_RING(ring, 0); 183 OUT_RING(ring, 0); 184 185 /* Turn off IB level preemptions */ 186 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 187 OUT_RING(ring, 0x01); 188 189 /* Write the fence to the scratch register */ 190 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); 191 OUT_RING(ring, submit->seqno); 192 193 /* 194 * Execute a CACHE_FLUSH_TS event. This will ensure that the 195 * timestamp is written to the memory and then triggers the interrupt 196 */ 197 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 198 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) | 199 CP_EVENT_WRITE_0_IRQ); 200 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 201 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 202 OUT_RING(ring, submit->seqno); 203 204 /* Yield the floor on command completion */ 205 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 206 /* 207 * If dword[2:1] are non zero, they specify an address for the CP to 208 * write the value of dword[3] to on preemption complete. Write 0 to 209 * skip the write 210 */ 211 OUT_RING(ring, 0x00); 212 OUT_RING(ring, 0x00); 213 /* Data value - not used if the address above is 0 */ 214 OUT_RING(ring, 0x01); 215 /* Set bit 0 to trigger an interrupt on preempt complete */ 216 OUT_RING(ring, 0x01); 217 218 /* A WHERE_AM_I packet is not needed after a YIELD */ 219 a5xx_flush(gpu, ring, false); 220 221 /* Check to see if we need to start preemption */ 222 a5xx_preempt_trigger(gpu); 223 } 224 225 static const struct { 226 u32 offset; 227 u32 value; 228 } a5xx_hwcg[] = { 229 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, 230 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, 231 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, 232 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, 233 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, 234 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, 235 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, 236 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, 237 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, 238 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, 239 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, 240 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, 241 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, 242 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, 243 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, 244 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, 245 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, 246 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, 247 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, 248 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, 249 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, 250 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, 251 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, 252 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, 253 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, 254 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, 255 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, 256 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, 257 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, 258 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, 259 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, 260 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, 261 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, 262 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, 263 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, 264 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, 265 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, 266 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, 267 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, 268 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, 269 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, 270 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, 271 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, 272 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, 273 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, 274 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, 275 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, 276 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, 277 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, 278 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, 279 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, 280 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, 281 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, 282 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, 283 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, 284 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, 285 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, 286 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, 287 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, 288 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, 289 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, 290 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, 291 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, 292 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, 293 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, 294 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, 295 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, 296 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, 297 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, 298 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, 299 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, 300 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, 301 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, 302 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, 303 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, 304 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, 305 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, 306 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, 307 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, 308 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, 309 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, 310 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, 311 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, 312 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, 313 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, 314 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, 315 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, 316 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, 317 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, 318 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, 319 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, 320 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} 321 }; 322 323 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) 324 { 325 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 326 unsigned int i; 327 328 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++) 329 gpu_write(gpu, a5xx_hwcg[i].offset, 330 state ? a5xx_hwcg[i].value : 0); 331 332 if (adreno_is_a540(adreno_gpu)) { 333 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0); 334 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0); 335 } 336 337 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0); 338 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180); 339 } 340 341 static int a5xx_me_init(struct msm_gpu *gpu) 342 { 343 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 344 struct msm_ringbuffer *ring = gpu->rb[0]; 345 346 OUT_PKT7(ring, CP_ME_INIT, 8); 347 348 OUT_RING(ring, 0x0000002F); 349 350 /* Enable multiple hardware contexts */ 351 OUT_RING(ring, 0x00000003); 352 353 /* Enable error detection */ 354 OUT_RING(ring, 0x20000000); 355 356 /* Don't enable header dump */ 357 OUT_RING(ring, 0x00000000); 358 OUT_RING(ring, 0x00000000); 359 360 /* Specify workarounds for various microcode issues */ 361 if (adreno_is_a530(adreno_gpu)) { 362 /* Workaround for token end syncs 363 * Force a WFI after every direct-render 3D mode draw and every 364 * 2D mode 3 draw 365 */ 366 OUT_RING(ring, 0x0000000B); 367 } else if (adreno_is_a510(adreno_gpu)) { 368 /* Workaround for token and syncs */ 369 OUT_RING(ring, 0x00000001); 370 } else { 371 /* No workarounds enabled */ 372 OUT_RING(ring, 0x00000000); 373 } 374 375 OUT_RING(ring, 0x00000000); 376 OUT_RING(ring, 0x00000000); 377 378 a5xx_flush(gpu, ring, true); 379 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 380 } 381 382 static int a5xx_preempt_start(struct msm_gpu *gpu) 383 { 384 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 385 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 386 struct msm_ringbuffer *ring = gpu->rb[0]; 387 388 if (gpu->nr_rings == 1) 389 return 0; 390 391 /* Turn off protected mode to write to special registers */ 392 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 393 OUT_RING(ring, 0); 394 395 /* Set the save preemption record for the ring/command */ 396 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 397 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); 398 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); 399 400 /* Turn back on protected mode */ 401 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 402 OUT_RING(ring, 1); 403 404 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 405 OUT_RING(ring, 0x00); 406 407 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); 408 OUT_RING(ring, 0x01); 409 410 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 411 OUT_RING(ring, 0x01); 412 413 /* Yield the floor on command completion */ 414 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 415 OUT_RING(ring, 0x00); 416 OUT_RING(ring, 0x00); 417 OUT_RING(ring, 0x01); 418 OUT_RING(ring, 0x01); 419 420 /* The WHERE_AMI_I packet is not needed after a YIELD is issued */ 421 a5xx_flush(gpu, ring, false); 422 423 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 424 } 425 426 static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu, 427 struct drm_gem_object *obj) 428 { 429 u32 *buf = msm_gem_get_vaddr(obj); 430 431 if (IS_ERR(buf)) 432 return; 433 434 /* 435 * If the lowest nibble is 0xa that is an indication that this microcode 436 * has been patched. The actual version is in dword [3] but we only care 437 * about the patchlevel which is the lowest nibble of dword [3] 438 */ 439 if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) 440 a5xx_gpu->has_whereami = true; 441 442 msm_gem_put_vaddr(obj); 443 } 444 445 static int a5xx_ucode_init(struct msm_gpu *gpu) 446 { 447 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 448 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 449 int ret; 450 451 if (!a5xx_gpu->pm4_bo) { 452 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu, 453 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova); 454 455 456 if (IS_ERR(a5xx_gpu->pm4_bo)) { 457 ret = PTR_ERR(a5xx_gpu->pm4_bo); 458 a5xx_gpu->pm4_bo = NULL; 459 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n", 460 ret); 461 return ret; 462 } 463 464 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw"); 465 } 466 467 if (!a5xx_gpu->pfp_bo) { 468 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu, 469 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova); 470 471 if (IS_ERR(a5xx_gpu->pfp_bo)) { 472 ret = PTR_ERR(a5xx_gpu->pfp_bo); 473 a5xx_gpu->pfp_bo = NULL; 474 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n", 475 ret); 476 return ret; 477 } 478 479 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw"); 480 a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo); 481 } 482 483 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, 484 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova); 485 486 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, 487 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova); 488 489 return 0; 490 } 491 492 #define SCM_GPU_ZAP_SHADER_RESUME 0 493 494 static int a5xx_zap_shader_resume(struct msm_gpu *gpu) 495 { 496 int ret; 497 498 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID); 499 if (ret) 500 DRM_ERROR("%s: zap-shader resume failed: %d\n", 501 gpu->name, ret); 502 503 return ret; 504 } 505 506 static int a5xx_zap_shader_init(struct msm_gpu *gpu) 507 { 508 static bool loaded; 509 int ret; 510 511 /* 512 * If the zap shader is already loaded into memory we just need to kick 513 * the remote processor to reinitialize it 514 */ 515 if (loaded) 516 return a5xx_zap_shader_resume(gpu); 517 518 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID); 519 520 loaded = !ret; 521 return ret; 522 } 523 524 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 525 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 526 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 527 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 528 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 529 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ 530 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 531 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ 532 A5XX_RBBM_INT_0_MASK_CP_SW | \ 533 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 534 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 535 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 536 537 static int a5xx_hw_init(struct msm_gpu *gpu) 538 { 539 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 540 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 541 int ret; 542 543 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 544 545 if (adreno_is_a540(adreno_gpu)) 546 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); 547 548 /* Make all blocks contribute to the GPU BUSY perf counter */ 549 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); 550 551 /* Enable RBBM error reporting bits */ 552 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001); 553 554 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) { 555 /* 556 * Mask out the activity signals from RB1-3 to avoid false 557 * positives 558 */ 559 560 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11, 561 0xF0000000); 562 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12, 563 0xFFFFFFFF); 564 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13, 565 0xFFFFFFFF); 566 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14, 567 0xFFFFFFFF); 568 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15, 569 0xFFFFFFFF); 570 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16, 571 0xFFFFFFFF); 572 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17, 573 0xFFFFFFFF); 574 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18, 575 0xFFFFFFFF); 576 } 577 578 /* Enable fault detection */ 579 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL, 580 (1 << 30) | 0xFFFF); 581 582 /* Turn on performance counters */ 583 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01); 584 585 /* Select CP0 to always count cycles */ 586 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT); 587 588 /* Select RBBM0 to countable 6 to get the busy status for devfreq */ 589 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6); 590 591 /* Increase VFD cache access so LRZ and other data gets evicted less */ 592 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); 593 594 /* Disable L2 bypass in the UCHE */ 595 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); 596 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); 597 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); 598 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); 599 600 /* Set the GMEM VA range (0 to gpu->gmem) */ 601 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); 602 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000); 603 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO, 604 0x00100000 + adreno_gpu->gmem - 1); 605 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); 606 607 if (adreno_is_a510(adreno_gpu)) { 608 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20); 609 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20); 610 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); 611 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); 612 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 613 (0x200 << 11 | 0x200 << 22)); 614 } else { 615 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); 616 if (adreno_is_a530(adreno_gpu)) 617 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); 618 if (adreno_is_a540(adreno_gpu)) 619 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); 620 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); 621 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); 622 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 623 (0x400 << 11 | 0x300 << 22)); 624 } 625 626 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) 627 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); 628 629 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100); 630 631 /* Enable USE_RETENTION_FLOPS */ 632 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000); 633 634 /* Enable ME/PFP split notification */ 635 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); 636 637 /* 638 * In A5x, CCU can send context_done event of a particular context to 639 * UCHE which ultimately reaches CP even when there is valid 640 * transaction of that context inside CCU. This can let CP to program 641 * config registers, which will make the "valid transaction" inside 642 * CCU to be interpreted differently. This can cause gpu fault. This 643 * bug is fixed in latest A510 revision. To enable this bug fix - 644 * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1 645 * (disable). For older A510 version this bit is unused. 646 */ 647 if (adreno_is_a510(adreno_gpu)) 648 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0); 649 650 /* Enable HWCG */ 651 a5xx_set_hwcg(gpu, true); 652 653 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); 654 655 /* Set the highest bank bit */ 656 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7); 657 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1); 658 if (adreno_is_a540(adreno_gpu)) 659 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2); 660 661 /* Protect registers from the CP */ 662 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007); 663 664 /* RBBM */ 665 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4)); 666 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8)); 667 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16)); 668 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32)); 669 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64)); 670 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64)); 671 672 /* Content protect */ 673 gpu_write(gpu, REG_A5XX_CP_PROTECT(6), 674 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 675 16)); 676 gpu_write(gpu, REG_A5XX_CP_PROTECT(7), 677 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2)); 678 679 /* CP */ 680 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64)); 681 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8)); 682 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32)); 683 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1)); 684 685 /* RB */ 686 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1)); 687 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2)); 688 689 /* VPC */ 690 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8)); 691 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4)); 692 693 /* UCHE */ 694 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); 695 696 if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu)) 697 gpu_write(gpu, REG_A5XX_CP_PROTECT(17), 698 ADRENO_PROTECT_RW(0x10000, 0x8000)); 699 700 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0); 701 /* 702 * Disable the trusted memory range - we don't actually supported secure 703 * memory rendering at this point in time and we don't want to block off 704 * part of the virtual memory space. 705 */ 706 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 707 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); 708 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 709 710 /* Put the GPU into 64 bit by default */ 711 gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1); 712 gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1); 713 gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1); 714 gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1); 715 gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1); 716 gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1); 717 gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1); 718 gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1); 719 gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1); 720 gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1); 721 gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1); 722 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); 723 724 /* 725 * VPC corner case with local memory load kill leads to corrupt 726 * internal state. Normal Disable does not work for all a5x chips. 727 * So do the following setting to disable it. 728 */ 729 if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) { 730 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23)); 731 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0); 732 } 733 734 ret = adreno_hw_init(gpu); 735 if (ret) 736 return ret; 737 738 if (!adreno_is_a510(adreno_gpu)) 739 a5xx_gpmu_ucode_init(gpu); 740 741 ret = a5xx_ucode_init(gpu); 742 if (ret) 743 return ret; 744 745 /* Set the ringbuffer address */ 746 gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI, 747 gpu->rb[0]->iova); 748 749 /* 750 * If the microcode supports the WHERE_AM_I opcode then we can use that 751 * in lieu of the RPTR shadow and enable preemption. Otherwise, we 752 * can't safely use the RPTR shadow or preemption. In either case, the 753 * RPTR shadow should be disabled in hardware. 754 */ 755 gpu_write(gpu, REG_A5XX_CP_RB_CNTL, 756 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); 757 758 /* Create a privileged buffer for the RPTR shadow */ 759 if (a5xx_gpu->has_whereami) { 760 if (!a5xx_gpu->shadow_bo) { 761 a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, 762 sizeof(u32) * gpu->nr_rings, 763 MSM_BO_UNCACHED | MSM_BO_MAP_PRIV, 764 gpu->aspace, &a5xx_gpu->shadow_bo, 765 &a5xx_gpu->shadow_iova); 766 767 if (IS_ERR(a5xx_gpu->shadow)) 768 return PTR_ERR(a5xx_gpu->shadow); 769 } 770 771 gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR, 772 REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0])); 773 } else if (gpu->nr_rings > 1) { 774 /* Disable preemption if WHERE_AM_I isn't available */ 775 a5xx_preempt_fini(gpu); 776 gpu->nr_rings = 1; 777 } 778 779 a5xx_preempt_hw_init(gpu); 780 781 /* Disable the interrupts through the initial bringup stage */ 782 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK); 783 784 /* Clear ME_HALT to start the micro engine */ 785 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0); 786 ret = a5xx_me_init(gpu); 787 if (ret) 788 return ret; 789 790 ret = a5xx_power_init(gpu); 791 if (ret) 792 return ret; 793 794 /* 795 * Send a pipeline event stat to get misbehaving counters to start 796 * ticking correctly 797 */ 798 if (adreno_is_a530(adreno_gpu)) { 799 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); 800 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT)); 801 802 a5xx_flush(gpu, gpu->rb[0], true); 803 if (!a5xx_idle(gpu, gpu->rb[0])) 804 return -EINVAL; 805 } 806 807 /* 808 * If the chip that we are using does support loading one, then 809 * try to load a zap shader into the secure world. If successful 810 * we can use the CP to switch out of secure mode. If not then we 811 * have no resource but to try to switch ourselves out manually. If we 812 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 813 * be blocked and a permissions violation will soon follow. 814 */ 815 ret = a5xx_zap_shader_init(gpu); 816 if (!ret) { 817 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 818 OUT_RING(gpu->rb[0], 0x00000000); 819 820 a5xx_flush(gpu, gpu->rb[0], true); 821 if (!a5xx_idle(gpu, gpu->rb[0])) 822 return -EINVAL; 823 } else if (ret == -ENODEV) { 824 /* 825 * This device does not use zap shader (but print a warning 826 * just in case someone got their dt wrong.. hopefully they 827 * have a debug UART to realize the error of their ways... 828 * if you mess this up you are about to crash horribly) 829 */ 830 dev_warn_once(gpu->dev->dev, 831 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 832 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); 833 } else { 834 return ret; 835 } 836 837 /* Last step - yield the ringbuffer */ 838 a5xx_preempt_start(gpu); 839 840 return 0; 841 } 842 843 static void a5xx_recover(struct msm_gpu *gpu) 844 { 845 int i; 846 847 adreno_dump_info(gpu); 848 849 for (i = 0; i < 8; i++) { 850 printk("CP_SCRATCH_REG%d: %u\n", i, 851 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i))); 852 } 853 854 if (hang_debug) 855 a5xx_dump(gpu); 856 857 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1); 858 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD); 859 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0); 860 adreno_recover(gpu); 861 } 862 863 static void a5xx_destroy(struct msm_gpu *gpu) 864 { 865 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 866 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 867 868 DBG("%s", gpu->name); 869 870 a5xx_preempt_fini(gpu); 871 872 if (a5xx_gpu->pm4_bo) { 873 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace); 874 drm_gem_object_put(a5xx_gpu->pm4_bo); 875 } 876 877 if (a5xx_gpu->pfp_bo) { 878 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace); 879 drm_gem_object_put(a5xx_gpu->pfp_bo); 880 } 881 882 if (a5xx_gpu->gpmu_bo) { 883 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace); 884 drm_gem_object_put(a5xx_gpu->gpmu_bo); 885 } 886 887 if (a5xx_gpu->shadow_bo) { 888 msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace); 889 drm_gem_object_put(a5xx_gpu->shadow_bo); 890 } 891 892 adreno_gpu_cleanup(adreno_gpu); 893 kfree(a5xx_gpu); 894 } 895 896 static inline bool _a5xx_check_idle(struct msm_gpu *gpu) 897 { 898 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY) 899 return false; 900 901 /* 902 * Nearly every abnormality ends up pausing the GPU and triggering a 903 * fault so we can safely just watch for this one interrupt to fire 904 */ 905 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) & 906 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); 907 } 908 909 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 910 { 911 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 912 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 913 914 if (ring != a5xx_gpu->cur_ring) { 915 WARN(1, "Tried to idle a non-current ringbuffer\n"); 916 return false; 917 } 918 919 /* wait for CP to drain ringbuffer: */ 920 if (!adreno_idle(gpu, ring)) 921 return false; 922 923 if (spin_until(_a5xx_check_idle(gpu))) { 924 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 925 gpu->name, __builtin_return_address(0), 926 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 927 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), 928 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 929 gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); 930 return false; 931 } 932 933 return true; 934 } 935 936 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags) 937 { 938 struct msm_gpu *gpu = arg; 939 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n", 940 iova, flags, 941 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)), 942 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)), 943 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)), 944 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7))); 945 946 return -EFAULT; 947 } 948 949 static void a5xx_cp_err_irq(struct msm_gpu *gpu) 950 { 951 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS); 952 953 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) { 954 u32 val; 955 956 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0); 957 958 /* 959 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so 960 * read it twice 961 */ 962 963 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 964 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 965 966 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n", 967 val); 968 } 969 970 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR) 971 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n", 972 gpu_read(gpu, REG_A5XX_CP_HW_FAULT)); 973 974 if (status & A5XX_CP_INT_CP_DMA_ERROR) 975 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n"); 976 977 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 978 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS); 979 980 dev_err_ratelimited(gpu->dev->dev, 981 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 982 val & (1 << 24) ? "WRITE" : "READ", 983 (val & 0xFFFFF) >> 2, val); 984 } 985 986 if (status & A5XX_CP_INT_CP_AHB_ERROR) { 987 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT); 988 const char *access[16] = { "reserved", "reserved", 989 "timestamp lo", "timestamp hi", "pfp read", "pfp write", 990 "", "", "me read", "me write", "", "", "crashdump read", 991 "crashdump write" }; 992 993 dev_err_ratelimited(gpu->dev->dev, 994 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n", 995 status & 0xFFFFF, access[(status >> 24) & 0xF], 996 (status & (1 << 31)), status); 997 } 998 } 999 1000 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status) 1001 { 1002 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) { 1003 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS); 1004 1005 dev_err_ratelimited(gpu->dev->dev, 1006 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n", 1007 val & (1 << 28) ? "WRITE" : "READ", 1008 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3, 1009 (val >> 24) & 0xF); 1010 1011 /* Clear the error */ 1012 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4)); 1013 1014 /* Clear the interrupt */ 1015 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 1016 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 1017 } 1018 1019 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT) 1020 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n"); 1021 1022 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT) 1023 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n", 1024 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS)); 1025 1026 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT) 1027 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n", 1028 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS)); 1029 1030 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT) 1031 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n", 1032 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS)); 1033 1034 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 1035 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n"); 1036 1037 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1038 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n"); 1039 } 1040 1041 static void a5xx_uche_err_irq(struct msm_gpu *gpu) 1042 { 1043 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI); 1044 1045 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO); 1046 1047 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n", 1048 addr); 1049 } 1050 1051 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu) 1052 { 1053 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n"); 1054 } 1055 1056 static void a5xx_fault_detect_irq(struct msm_gpu *gpu) 1057 { 1058 struct drm_device *dev = gpu->dev; 1059 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1060 1061 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 1062 ring ? ring->id : -1, ring ? ring->seqno : 0, 1063 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 1064 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 1065 gpu_read(gpu, REG_A5XX_CP_RB_WPTR), 1066 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), 1067 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), 1068 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), 1069 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); 1070 1071 /* Turn off the hangcheck timer to keep it from bothering us */ 1072 del_timer(&gpu->hangcheck_timer); 1073 1074 kthread_queue_work(gpu->worker, &gpu->recover_work); 1075 } 1076 1077 #define RBBM_ERROR_MASK \ 1078 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 1079 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 1080 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 1081 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 1082 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 1083 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 1084 1085 static irqreturn_t a5xx_irq(struct msm_gpu *gpu) 1086 { 1087 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS); 1088 1089 /* 1090 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it 1091 * before the source is cleared the interrupt will storm. 1092 */ 1093 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 1094 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 1095 1096 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */ 1097 if (status & RBBM_ERROR_MASK) 1098 a5xx_rbbm_err_irq(gpu, status); 1099 1100 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1101 a5xx_cp_err_irq(gpu); 1102 1103 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT) 1104 a5xx_fault_detect_irq(gpu); 1105 1106 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1107 a5xx_uche_err_irq(gpu); 1108 1109 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 1110 a5xx_gpmu_err_irq(gpu); 1111 1112 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1113 a5xx_preempt_trigger(gpu); 1114 msm_gpu_retire(gpu); 1115 } 1116 1117 if (status & A5XX_RBBM_INT_0_MASK_CP_SW) 1118 a5xx_preempt_irq(gpu); 1119 1120 return IRQ_HANDLED; 1121 } 1122 1123 static const u32 a5xx_registers[] = { 1124 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, 1125 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, 1126 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, 1127 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841, 1128 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28, 1129 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 1130 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 1131 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, 1132 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 1133 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 1134 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, 1135 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, 1136 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 1137 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 1138 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 1139 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 1140 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A, 1141 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F, 1142 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 1143 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947, 1144 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 1145 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68, 1146 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 1147 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 1148 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 1149 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D, 1150 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5, 1151 0xAC60, 0xAC60, ~0, 1152 }; 1153 1154 static void a5xx_dump(struct msm_gpu *gpu) 1155 { 1156 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n", 1157 gpu_read(gpu, REG_A5XX_RBBM_STATUS)); 1158 adreno_dump(gpu); 1159 } 1160 1161 static int a5xx_pm_resume(struct msm_gpu *gpu) 1162 { 1163 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1164 int ret; 1165 1166 /* Turn on the core power */ 1167 ret = msm_gpu_pm_resume(gpu); 1168 if (ret) 1169 return ret; 1170 1171 if (adreno_is_a510(adreno_gpu)) { 1172 /* Halt the sp_input_clk at HM level */ 1173 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055); 1174 a5xx_set_hwcg(gpu, true); 1175 /* Turn on sp_input_clk at HM level */ 1176 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0); 1177 return 0; 1178 } 1179 1180 /* Turn the RBCCU domain first to limit the chances of voltage droop */ 1181 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); 1182 1183 /* Wait 3 usecs before polling */ 1184 udelay(3); 1185 1186 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1187 (1 << 20), (1 << 20)); 1188 if (ret) { 1189 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n", 1190 gpu->name, 1191 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS)); 1192 return ret; 1193 } 1194 1195 /* Turn on the SP domain */ 1196 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000); 1197 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS, 1198 (1 << 20), (1 << 20)); 1199 if (ret) 1200 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n", 1201 gpu->name); 1202 1203 return ret; 1204 } 1205 1206 static int a5xx_pm_suspend(struct msm_gpu *gpu) 1207 { 1208 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1209 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1210 u32 mask = 0xf; 1211 int i, ret; 1212 1213 /* A510 has 3 XIN ports in VBIF */ 1214 if (adreno_is_a510(adreno_gpu)) 1215 mask = 0x7; 1216 1217 /* Clear the VBIF pipe before shutting down */ 1218 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask); 1219 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 1220 mask) == mask); 1221 1222 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0); 1223 1224 /* 1225 * Reset the VBIF before power collapse to avoid issue with FIFO 1226 * entries 1227 */ 1228 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000); 1229 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000); 1230 1231 ret = msm_gpu_pm_suspend(gpu); 1232 if (ret) 1233 return ret; 1234 1235 if (a5xx_gpu->has_whereami) 1236 for (i = 0; i < gpu->nr_rings; i++) 1237 a5xx_gpu->shadow[i] = 0; 1238 1239 return 0; 1240 } 1241 1242 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1243 { 1244 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO, 1245 REG_A5XX_RBBM_PERFCTR_CP_0_HI); 1246 1247 return 0; 1248 } 1249 1250 struct a5xx_crashdumper { 1251 void *ptr; 1252 struct drm_gem_object *bo; 1253 u64 iova; 1254 }; 1255 1256 struct a5xx_gpu_state { 1257 struct msm_gpu_state base; 1258 u32 *hlsqregs; 1259 }; 1260 1261 static int a5xx_crashdumper_init(struct msm_gpu *gpu, 1262 struct a5xx_crashdumper *dumper) 1263 { 1264 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, 1265 SZ_1M, MSM_BO_UNCACHED, gpu->aspace, 1266 &dumper->bo, &dumper->iova); 1267 1268 if (!IS_ERR(dumper->ptr)) 1269 msm_gem_object_set_name(dumper->bo, "crashdump"); 1270 1271 return PTR_ERR_OR_ZERO(dumper->ptr); 1272 } 1273 1274 static int a5xx_crashdumper_run(struct msm_gpu *gpu, 1275 struct a5xx_crashdumper *dumper) 1276 { 1277 u32 val; 1278 1279 if (IS_ERR_OR_NULL(dumper->ptr)) 1280 return -EINVAL; 1281 1282 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, 1283 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); 1284 1285 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1); 1286 1287 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val, 1288 val & 0x04, 100, 10000); 1289 } 1290 1291 /* 1292 * These are a list of the registers that need to be read through the HLSQ 1293 * aperture through the crashdumper. These are not nominally accessible from 1294 * the CPU on a secure platform. 1295 */ 1296 static const struct { 1297 u32 type; 1298 u32 regoffset; 1299 u32 count; 1300 } a5xx_hlsq_aperture_regs[] = { 1301 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */ 1302 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */ 1303 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */ 1304 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */ 1305 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */ 1306 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */ 1307 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */ 1308 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */ 1309 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */ 1310 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */ 1311 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */ 1312 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */ 1313 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */ 1314 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */ 1315 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */ 1316 }; 1317 1318 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu, 1319 struct a5xx_gpu_state *a5xx_state) 1320 { 1321 struct a5xx_crashdumper dumper = { 0 }; 1322 u32 offset, count = 0; 1323 u64 *ptr; 1324 int i; 1325 1326 if (a5xx_crashdumper_init(gpu, &dumper)) 1327 return; 1328 1329 /* The script will be written at offset 0 */ 1330 ptr = dumper.ptr; 1331 1332 /* Start writing the data at offset 256k */ 1333 offset = dumper.iova + (256 * SZ_1K); 1334 1335 /* Count how many additional registers to get from the HLSQ aperture */ 1336 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) 1337 count += a5xx_hlsq_aperture_regs[i].count; 1338 1339 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL); 1340 if (!a5xx_state->hlsqregs) 1341 return; 1342 1343 /* Build the crashdump script */ 1344 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { 1345 u32 type = a5xx_hlsq_aperture_regs[i].type; 1346 u32 c = a5xx_hlsq_aperture_regs[i].count; 1347 1348 /* Write the register to select the desired bank */ 1349 *ptr++ = ((u64) type << 8); 1350 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) | 1351 (1 << 21) | 1; 1352 1353 *ptr++ = offset; 1354 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44) 1355 | c; 1356 1357 offset += c * sizeof(u32); 1358 } 1359 1360 /* Write two zeros to close off the script */ 1361 *ptr++ = 0; 1362 *ptr++ = 0; 1363 1364 if (a5xx_crashdumper_run(gpu, &dumper)) { 1365 kfree(a5xx_state->hlsqregs); 1366 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 1367 return; 1368 } 1369 1370 /* Copy the data from the crashdumper to the state */ 1371 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K), 1372 count * sizeof(u32)); 1373 1374 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 1375 } 1376 1377 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu) 1378 { 1379 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state), 1380 GFP_KERNEL); 1381 1382 if (!a5xx_state) 1383 return ERR_PTR(-ENOMEM); 1384 1385 /* Temporarily disable hardware clock gating before reading the hw */ 1386 a5xx_set_hwcg(gpu, false); 1387 1388 /* First get the generic state from the adreno core */ 1389 adreno_gpu_state_get(gpu, &(a5xx_state->base)); 1390 1391 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS); 1392 1393 /* Get the HLSQ regs with the help of the crashdumper */ 1394 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state); 1395 1396 a5xx_set_hwcg(gpu, true); 1397 1398 return &a5xx_state->base; 1399 } 1400 1401 static void a5xx_gpu_state_destroy(struct kref *kref) 1402 { 1403 struct msm_gpu_state *state = container_of(kref, 1404 struct msm_gpu_state, ref); 1405 struct a5xx_gpu_state *a5xx_state = container_of(state, 1406 struct a5xx_gpu_state, base); 1407 1408 kfree(a5xx_state->hlsqregs); 1409 1410 adreno_gpu_state_destroy(state); 1411 kfree(a5xx_state); 1412 } 1413 1414 static int a5xx_gpu_state_put(struct msm_gpu_state *state) 1415 { 1416 if (IS_ERR_OR_NULL(state)) 1417 return 1; 1418 1419 return kref_put(&state->ref, a5xx_gpu_state_destroy); 1420 } 1421 1422 1423 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 1424 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1425 struct drm_printer *p) 1426 { 1427 int i, j; 1428 u32 pos = 0; 1429 struct a5xx_gpu_state *a5xx_state = container_of(state, 1430 struct a5xx_gpu_state, base); 1431 1432 if (IS_ERR_OR_NULL(state)) 1433 return; 1434 1435 adreno_show(gpu, state, p); 1436 1437 /* Dump the additional a5xx HLSQ registers */ 1438 if (!a5xx_state->hlsqregs) 1439 return; 1440 1441 drm_printf(p, "registers-hlsq:\n"); 1442 1443 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { 1444 u32 o = a5xx_hlsq_aperture_regs[i].regoffset; 1445 u32 c = a5xx_hlsq_aperture_regs[i].count; 1446 1447 for (j = 0; j < c; j++, pos++, o++) { 1448 /* 1449 * To keep the crashdump simple we pull the entire range 1450 * for each register type but not all of the registers 1451 * in the range are valid. Fortunately invalid registers 1452 * stick out like a sore thumb with a value of 1453 * 0xdeadbeef 1454 */ 1455 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef) 1456 continue; 1457 1458 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n", 1459 o << 2, a5xx_state->hlsqregs[pos]); 1460 } 1461 } 1462 } 1463 #endif 1464 1465 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) 1466 { 1467 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1468 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1469 1470 return a5xx_gpu->cur_ring; 1471 } 1472 1473 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu) 1474 { 1475 u64 busy_cycles, busy_time; 1476 1477 /* Only read the gpu busy if the hardware is already active */ 1478 if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0) 1479 return 0; 1480 1481 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, 1482 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); 1483 1484 busy_time = busy_cycles - gpu->devfreq.busy_cycles; 1485 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000); 1486 1487 gpu->devfreq.busy_cycles = busy_cycles; 1488 1489 pm_runtime_put(&gpu->pdev->dev); 1490 1491 if (WARN_ON(busy_time > ~0LU)) 1492 return ~0LU; 1493 1494 return (unsigned long)busy_time; 1495 } 1496 1497 static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 1498 { 1499 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1500 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1501 1502 if (a5xx_gpu->has_whereami) 1503 return a5xx_gpu->shadow[ring->id]; 1504 1505 return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR); 1506 } 1507 1508 static const struct adreno_gpu_funcs funcs = { 1509 .base = { 1510 .get_param = adreno_get_param, 1511 .hw_init = a5xx_hw_init, 1512 .pm_suspend = a5xx_pm_suspend, 1513 .pm_resume = a5xx_pm_resume, 1514 .recover = a5xx_recover, 1515 .submit = a5xx_submit, 1516 .active_ring = a5xx_active_ring, 1517 .irq = a5xx_irq, 1518 .destroy = a5xx_destroy, 1519 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 1520 .show = a5xx_show, 1521 #endif 1522 #if defined(CONFIG_DEBUG_FS) 1523 .debugfs_init = a5xx_debugfs_init, 1524 #endif 1525 .gpu_busy = a5xx_gpu_busy, 1526 .gpu_state_get = a5xx_gpu_state_get, 1527 .gpu_state_put = a5xx_gpu_state_put, 1528 .create_address_space = adreno_iommu_create_address_space, 1529 .get_rptr = a5xx_get_rptr, 1530 }, 1531 .get_timestamp = a5xx_get_timestamp, 1532 }; 1533 1534 static void check_speed_bin(struct device *dev) 1535 { 1536 struct nvmem_cell *cell; 1537 u32 val; 1538 1539 /* 1540 * If the OPP table specifies a opp-supported-hw property then we have 1541 * to set something with dev_pm_opp_set_supported_hw() or the table 1542 * doesn't get populated so pick an arbitrary value that should 1543 * ensure the default frequencies are selected but not conflict with any 1544 * actual bins 1545 */ 1546 val = 0x80; 1547 1548 cell = nvmem_cell_get(dev, "speed_bin"); 1549 1550 if (!IS_ERR(cell)) { 1551 void *buf = nvmem_cell_read(cell, NULL); 1552 1553 if (!IS_ERR(buf)) { 1554 u8 bin = *((u8 *) buf); 1555 1556 val = (1 << bin); 1557 kfree(buf); 1558 } 1559 1560 nvmem_cell_put(cell); 1561 } 1562 1563 dev_pm_opp_set_supported_hw(dev, &val, 1); 1564 } 1565 1566 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) 1567 { 1568 struct msm_drm_private *priv = dev->dev_private; 1569 struct platform_device *pdev = priv->gpu_pdev; 1570 struct a5xx_gpu *a5xx_gpu = NULL; 1571 struct adreno_gpu *adreno_gpu; 1572 struct msm_gpu *gpu; 1573 int ret; 1574 1575 if (!pdev) { 1576 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n"); 1577 return ERR_PTR(-ENXIO); 1578 } 1579 1580 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL); 1581 if (!a5xx_gpu) 1582 return ERR_PTR(-ENOMEM); 1583 1584 adreno_gpu = &a5xx_gpu->base; 1585 gpu = &adreno_gpu->base; 1586 1587 adreno_gpu->registers = a5xx_registers; 1588 1589 a5xx_gpu->lm_leakage = 0x4E001A; 1590 1591 check_speed_bin(&pdev->dev); 1592 1593 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); 1594 if (ret) { 1595 a5xx_destroy(&(a5xx_gpu->base.base)); 1596 return ERR_PTR(ret); 1597 } 1598 1599 if (gpu->aspace) 1600 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); 1601 1602 /* Set up the preemption specific bits and pieces for each ringbuffer */ 1603 a5xx_preempt_init(gpu); 1604 1605 return gpu; 1606 } 1607