1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018, The Linux Foundation. All rights reserved. */ 3 /* Copyright (c) 2023 Collabora, Ltd. */ 4 /* Copyright (c) 2024 Valve Corporation */ 5 6 #include "msm_gem.h" 7 #include "a6xx_gpu.h" 8 #include "a6xx_gmu.xml.h" 9 #include "msm_mmu.h" 10 #include "msm_gpu_trace.h" 11 12 /* 13 * Try to transition the preemption state from old to new. Return 14 * true on success or false if the original state wasn't 'old' 15 */ 16 static inline bool try_preempt_state(struct a6xx_gpu *a6xx_gpu, 17 enum a6xx_preempt_state old, enum a6xx_preempt_state new) 18 { 19 enum a6xx_preempt_state cur = atomic_cmpxchg(&a6xx_gpu->preempt_state, 20 old, new); 21 22 return (cur == old); 23 } 24 25 /* 26 * Force the preemption state to the specified state. This is used in cases 27 * where the current state is known and won't change 28 */ 29 static inline void set_preempt_state(struct a6xx_gpu *gpu, 30 enum a6xx_preempt_state new) 31 { 32 /* 33 * preempt_state may be read by other cores trying to trigger a 34 * preemption or in the interrupt handler so barriers are needed 35 * before... 36 */ 37 smp_mb__before_atomic(); 38 atomic_set(&gpu->preempt_state, new); 39 /* ... and after*/ 40 smp_mb__after_atomic(); 41 } 42 43 /* Write the most recent wptr for the given ring into the hardware */ 44 static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring) 45 { 46 unsigned long flags; 47 uint32_t wptr; 48 49 spin_lock_irqsave(&ring->preempt_lock, flags); 50 51 if (ring->restore_wptr) { 52 wptr = get_wptr(ring); 53 54 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); 55 56 ring->restore_wptr = false; 57 } 58 59 spin_unlock_irqrestore(&ring->preempt_lock, flags); 60 } 61 62 /* Return the highest priority ringbuffer with something in it */ 63 static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) 64 { 65 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 66 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 67 68 unsigned long flags; 69 int i; 70 71 for (i = 0; i < gpu->nr_rings; i++) { 72 bool empty; 73 struct msm_ringbuffer *ring = gpu->rb[i]; 74 75 spin_lock_irqsave(&ring->preempt_lock, flags); 76 empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring)); 77 if (!empty && ring == a6xx_gpu->cur_ring) 78 empty = ring->memptrs->fence == a6xx_gpu->last_seqno[i]; 79 spin_unlock_irqrestore(&ring->preempt_lock, flags); 80 81 if (!empty) 82 return ring; 83 } 84 85 return NULL; 86 } 87 88 static void a6xx_preempt_timer(struct timer_list *t) 89 { 90 struct a6xx_gpu *a6xx_gpu = timer_container_of(a6xx_gpu, t, 91 preempt_timer); 92 struct msm_gpu *gpu = &a6xx_gpu->base.base; 93 struct drm_device *dev = gpu->dev; 94 95 if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED)) 96 return; 97 98 dev_err(dev->dev, "%s: preemption timed out\n", gpu->name); 99 kthread_queue_work(gpu->worker, &gpu->recover_work); 100 } 101 102 static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu) 103 { 104 u32 *postamble = a6xx_gpu->preempt_postamble_ptr; 105 u32 count = 0; 106 107 postamble[count++] = PKT7(CP_REG_RMW, 3); 108 postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD; 109 postamble[count++] = 0; 110 postamble[count++] = 1; 111 112 postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6); 113 postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ); 114 postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_LO( 115 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS); 116 postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_HI(0); 117 postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1); 118 postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1); 119 postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0); 120 121 a6xx_gpu->preempt_postamble_len = count; 122 123 a6xx_gpu->postamble_enabled = true; 124 } 125 126 static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu) 127 { 128 u32 *postamble = a6xx_gpu->preempt_postamble_ptr; 129 130 /* 131 * Disable the postamble by replacing the first packet header with a NOP 132 * that covers the whole buffer. 133 */ 134 *postamble = PKT7(CP_NOP, (a6xx_gpu->preempt_postamble_len - 1)); 135 136 a6xx_gpu->postamble_enabled = false; 137 } 138 139 /* 140 * Set preemption keepalive vote. Please note that this vote is different from the one used in 141 * a6xx_irq() 142 */ 143 static void a6xx_preempt_keepalive_vote(struct msm_gpu *gpu, bool on) 144 { 145 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 146 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 147 148 if (adreno_has_gmu_wrapper(adreno_gpu)) 149 return; 150 151 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, on); 152 } 153 154 void a6xx_preempt_irq(struct msm_gpu *gpu) 155 { 156 uint32_t status; 157 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 158 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 159 struct drm_device *dev = gpu->dev; 160 161 if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING)) 162 return; 163 164 /* Delete the preemption watchdog timer */ 165 timer_delete(&a6xx_gpu->preempt_timer); 166 167 /* 168 * The hardware should be setting the stop bit of CP_CONTEXT_SWITCH_CNTL 169 * to zero before firing the interrupt, but there is a non zero chance 170 * of a hardware condition or a software race that could set it again 171 * before we have a chance to finish. If that happens, log and go for 172 * recovery 173 */ 174 status = gpu_read(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL); 175 if (unlikely(status & A6XX_CP_CONTEXT_SWITCH_CNTL_STOP)) { 176 DRM_DEV_ERROR(&gpu->pdev->dev, 177 "!!!!!!!!!!!!!!!! preemption faulted !!!!!!!!!!!!!! irq\n"); 178 set_preempt_state(a6xx_gpu, PREEMPT_FAULTED); 179 dev_err(dev->dev, "%s: Preemption failed to complete\n", 180 gpu->name); 181 kthread_queue_work(gpu->worker, &gpu->recover_work); 182 return; 183 } 184 185 a6xx_gpu->cur_ring = a6xx_gpu->next_ring; 186 a6xx_gpu->next_ring = NULL; 187 188 set_preempt_state(a6xx_gpu, PREEMPT_FINISH); 189 190 update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); 191 192 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 193 194 a6xx_preempt_keepalive_vote(gpu, false); 195 196 trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id); 197 198 /* 199 * Retrigger preemption to avoid a deadlock that might occur when preemption 200 * is skipped due to it being already in flight when requested. 201 */ 202 a6xx_preempt_trigger(gpu); 203 } 204 205 void a6xx_preempt_hw_init(struct msm_gpu *gpu) 206 { 207 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 208 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 209 int i; 210 211 /* No preemption if we only have one ring */ 212 if (gpu->nr_rings == 1) 213 return; 214 215 for (i = 0; i < gpu->nr_rings; i++) { 216 struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[i]; 217 218 record_ptr->wptr = 0; 219 record_ptr->rptr = 0; 220 record_ptr->rptr_addr = shadowptr(a6xx_gpu, gpu->rb[i]); 221 record_ptr->info = 0; 222 record_ptr->data = 0; 223 record_ptr->rbase = gpu->rb[i]->iova; 224 } 225 226 /* Write a 0 to signal that we aren't switching pagetables */ 227 gpu_write64(gpu, REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 0); 228 229 /* Enable the GMEM save/restore feature for preemption */ 230 gpu_write(gpu, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 0x1); 231 232 /* Reset the preemption state */ 233 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 234 235 spin_lock_init(&a6xx_gpu->eval_lock); 236 237 /* Always come up on rb 0 */ 238 a6xx_gpu->cur_ring = gpu->rb[0]; 239 } 240 241 void a6xx_preempt_trigger(struct msm_gpu *gpu) 242 { 243 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 244 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 245 unsigned long flags; 246 struct msm_ringbuffer *ring; 247 unsigned int cntl; 248 bool sysprof; 249 250 if (gpu->nr_rings == 1) 251 return; 252 253 /* 254 * Lock to make sure another thread attempting preemption doesn't skip it 255 * while we are still evaluating the next ring. This makes sure the other 256 * thread does start preemption if we abort it and avoids a soft lock. 257 */ 258 spin_lock_irqsave(&a6xx_gpu->eval_lock, flags); 259 260 /* 261 * Try to start preemption by moving from NONE to START. If 262 * unsuccessful, a preemption is already in flight 263 */ 264 if (!try_preempt_state(a6xx_gpu, PREEMPT_NONE, PREEMPT_START)) { 265 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 266 return; 267 } 268 269 cntl = A6XX_CP_CONTEXT_SWITCH_CNTL_LEVEL(a6xx_gpu->preempt_level); 270 271 if (a6xx_gpu->skip_save_restore) 272 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_SKIP_SAVE_RESTORE; 273 274 if (a6xx_gpu->uses_gmem) 275 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_USES_GMEM; 276 277 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_STOP; 278 279 /* Get the next ring to preempt to */ 280 ring = get_next_ring(gpu); 281 282 /* 283 * If no ring is populated or the highest priority ring is the current 284 * one do nothing except to update the wptr to the latest and greatest 285 */ 286 if (!ring || (a6xx_gpu->cur_ring == ring)) { 287 set_preempt_state(a6xx_gpu, PREEMPT_FINISH); 288 update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); 289 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 290 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 291 return; 292 } 293 294 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 295 296 spin_lock_irqsave(&ring->preempt_lock, flags); 297 298 struct a7xx_cp_smmu_info *smmu_info_ptr = 299 a6xx_gpu->preempt_smmu[ring->id]; 300 struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[ring->id]; 301 u64 ttbr0 = ring->memptrs->ttbr0; 302 u32 context_idr = ring->memptrs->context_idr; 303 304 smmu_info_ptr->ttbr0 = ttbr0; 305 smmu_info_ptr->context_idr = context_idr; 306 record_ptr->wptr = get_wptr(ring); 307 308 /* 309 * The GPU will write the wptr we set above when we preempt. Reset 310 * restore_wptr to make sure that we don't write WPTR to the same 311 * thing twice. It's still possible subsequent submissions will update 312 * wptr again, in which case they will set the flag to true. This has 313 * to be protected by the lock for setting the flag and updating wptr 314 * to be atomic. 315 */ 316 ring->restore_wptr = false; 317 318 trace_msm_gpu_preemption_trigger(a6xx_gpu->cur_ring->id, ring->id); 319 320 spin_unlock_irqrestore(&ring->preempt_lock, flags); 321 322 /* Set the keepalive bit to keep the GPU ON until preemption is complete */ 323 a6xx_preempt_keepalive_vote(gpu, true); 324 325 a6xx_fenced_write(a6xx_gpu, 326 REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id], 327 BIT(1), true); 328 329 a6xx_fenced_write(a6xx_gpu, 330 REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR, 331 a6xx_gpu->preempt_iova[ring->id], BIT(1), true); 332 333 a6xx_gpu->next_ring = ring; 334 335 /* Start a timer to catch a stuck preemption */ 336 mod_timer(&a6xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); 337 338 /* Enable or disable postamble as needed */ 339 sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; 340 341 if (!sysprof && !a6xx_gpu->postamble_enabled) 342 preempt_prepare_postamble(a6xx_gpu); 343 344 if (sysprof && a6xx_gpu->postamble_enabled) 345 preempt_disable_postamble(a6xx_gpu); 346 347 /* Set the preemption state to triggered */ 348 set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED); 349 350 /* Trigger the preemption */ 351 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false); 352 } 353 354 static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu, 355 struct msm_ringbuffer *ring) 356 { 357 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 358 struct msm_gpu *gpu = &adreno_gpu->base; 359 struct drm_gem_object *bo = NULL; 360 phys_addr_t ttbr; 361 u64 iova = 0; 362 void *ptr; 363 int asid; 364 365 ptr = msm_gem_kernel_new(gpu->dev, 366 PREEMPT_RECORD_SIZE(adreno_gpu), 367 MSM_BO_WC | MSM_BO_MAP_PRIV, gpu->vm, &bo, &iova); 368 369 if (IS_ERR(ptr)) 370 return PTR_ERR(ptr); 371 372 memset(ptr, 0, PREEMPT_RECORD_SIZE(adreno_gpu)); 373 374 msm_gem_object_set_name(bo, "preempt_record ring%d", ring->id); 375 376 a6xx_gpu->preempt_bo[ring->id] = bo; 377 a6xx_gpu->preempt_iova[ring->id] = iova; 378 a6xx_gpu->preempt[ring->id] = ptr; 379 380 struct a6xx_preempt_record *record_ptr = ptr; 381 382 ptr = msm_gem_kernel_new(gpu->dev, 383 PREEMPT_SMMU_INFO_SIZE, 384 MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY, 385 gpu->vm, &bo, &iova); 386 387 if (IS_ERR(ptr)) 388 return PTR_ERR(ptr); 389 390 memset(ptr, 0, PREEMPT_SMMU_INFO_SIZE); 391 392 msm_gem_object_set_name(bo, "preempt_smmu_info ring%d", ring->id); 393 394 a6xx_gpu->preempt_smmu_bo[ring->id] = bo; 395 a6xx_gpu->preempt_smmu_iova[ring->id] = iova; 396 a6xx_gpu->preempt_smmu[ring->id] = ptr; 397 398 struct a7xx_cp_smmu_info *smmu_info_ptr = ptr; 399 400 msm_iommu_pagetable_params(to_msm_vm(gpu->vm)->mmu, &ttbr, &asid); 401 402 smmu_info_ptr->magic = GEN7_CP_SMMU_INFO_MAGIC; 403 smmu_info_ptr->ttbr0 = ttbr; 404 smmu_info_ptr->asid = 0xdecafbad; 405 smmu_info_ptr->context_idr = 0; 406 407 /* Set up the defaults on the preemption record */ 408 record_ptr->magic = A6XX_PREEMPT_RECORD_MAGIC; 409 record_ptr->info = 0; 410 record_ptr->data = 0; 411 record_ptr->rptr = 0; 412 record_ptr->wptr = 0; 413 record_ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; 414 record_ptr->rbase = ring->iova; 415 record_ptr->counter = 0; 416 record_ptr->bv_rptr_addr = rbmemptr(ring, bv_rptr); 417 418 return 0; 419 } 420 421 void a6xx_preempt_fini(struct msm_gpu *gpu) 422 { 423 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 424 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 425 int i; 426 427 for (i = 0; i < gpu->nr_rings; i++) 428 msm_gem_kernel_put(a6xx_gpu->preempt_bo[i], gpu->vm); 429 } 430 431 void a6xx_preempt_init(struct msm_gpu *gpu) 432 { 433 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 434 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 435 int i; 436 437 /* No preemption if we only have one ring */ 438 if (gpu->nr_rings <= 1) 439 return; 440 441 for (i = 0; i < gpu->nr_rings; i++) { 442 if (preempt_init_ring(a6xx_gpu, gpu->rb[i])) 443 goto fail; 444 } 445 446 /* TODO: make this configurable? */ 447 a6xx_gpu->preempt_level = 1; 448 a6xx_gpu->uses_gmem = 1; 449 a6xx_gpu->skip_save_restore = 1; 450 451 a6xx_gpu->preempt_postamble_ptr = msm_gem_kernel_new(gpu->dev, 452 PAGE_SIZE, 453 MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY, 454 gpu->vm, &a6xx_gpu->preempt_postamble_bo, 455 &a6xx_gpu->preempt_postamble_iova); 456 457 preempt_prepare_postamble(a6xx_gpu); 458 459 if (IS_ERR(a6xx_gpu->preempt_postamble_ptr)) 460 goto fail; 461 462 timer_setup(&a6xx_gpu->preempt_timer, a6xx_preempt_timer, 0); 463 464 return; 465 fail: 466 /* 467 * On any failure our adventure is over. Clean up and 468 * set nr_rings to 1 to force preemption off 469 */ 470 a6xx_preempt_fini(gpu); 471 gpu->nr_rings = 1; 472 473 DRM_DEV_ERROR(&gpu->pdev->dev, 474 "preemption init failed, disabling preemption\n"); 475 476 return; 477 } 478