1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018, The Linux Foundation. All rights reserved. */ 3 /* Copyright (c) 2023 Collabora, Ltd. */ 4 /* Copyright (c) 2024 Valve Corporation */ 5 6 #include "msm_gem.h" 7 #include "a6xx_gpu.h" 8 #include "a6xx_gmu.xml.h" 9 #include "msm_mmu.h" 10 #include "msm_gpu_trace.h" 11 12 /* 13 * Try to transition the preemption state from old to new. Return 14 * true on success or false if the original state wasn't 'old' 15 */ 16 static inline bool try_preempt_state(struct a6xx_gpu *a6xx_gpu, 17 enum a6xx_preempt_state old, enum a6xx_preempt_state new) 18 { 19 enum a6xx_preempt_state cur = atomic_cmpxchg(&a6xx_gpu->preempt_state, 20 old, new); 21 22 return (cur == old); 23 } 24 25 /* 26 * Force the preemption state to the specified state. This is used in cases 27 * where the current state is known and won't change 28 */ 29 static inline void set_preempt_state(struct a6xx_gpu *gpu, 30 enum a6xx_preempt_state new) 31 { 32 /* 33 * preempt_state may be read by other cores trying to trigger a 34 * preemption or in the interrupt handler so barriers are needed 35 * before... 36 */ 37 smp_mb__before_atomic(); 38 atomic_set(&gpu->preempt_state, new); 39 /* ... and after*/ 40 smp_mb__after_atomic(); 41 } 42 43 /* Write the most recent wptr for the given ring into the hardware */ 44 static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 45 { 46 unsigned long flags; 47 uint32_t wptr; 48 49 spin_lock_irqsave(&ring->preempt_lock, flags); 50 51 if (ring->restore_wptr) { 52 wptr = get_wptr(ring); 53 54 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); 55 56 ring->restore_wptr = false; 57 } 58 59 spin_unlock_irqrestore(&ring->preempt_lock, flags); 60 } 61 62 /* Return the highest priority ringbuffer with something in it */ 63 static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) 64 { 65 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 66 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 67 68 unsigned long flags; 69 int i; 70 71 for (i = 0; i < gpu->nr_rings; i++) { 72 bool empty; 73 struct msm_ringbuffer *ring = gpu->rb[i]; 74 75 spin_lock_irqsave(&ring->preempt_lock, flags); 76 empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring)); 77 if (!empty && ring == a6xx_gpu->cur_ring) 78 empty = ring->memptrs->fence == a6xx_gpu->last_seqno[i]; 79 spin_unlock_irqrestore(&ring->preempt_lock, flags); 80 81 if (!empty) 82 return ring; 83 } 84 85 return NULL; 86 } 87 88 static void a6xx_preempt_timer(struct timer_list *t) 89 { 90 struct a6xx_gpu *a6xx_gpu = timer_container_of(a6xx_gpu, t, 91 preempt_timer); 92 struct msm_gpu *gpu = &a6xx_gpu->base.base; 93 struct drm_device *dev = gpu->dev; 94 95 if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED)) 96 return; 97 98 dev_err(dev->dev, "%s: preemption timed out\n", gpu->name); 99 kthread_queue_work(gpu->worker, &gpu->recover_work); 100 } 101 102 static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu) 103 { 104 u32 *postamble = a6xx_gpu->preempt_postamble_ptr; 105 u32 count = 0; 106 107 postamble[count++] = PKT7(CP_REG_RMW, 3); 108 postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD; 109 postamble[count++] = 0; 110 postamble[count++] = 1; 111 112 postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6); 113 postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ); 114 postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO( 115 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS); 116 postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0); 117 postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1); 118 postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1); 119 postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0); 120 121 a6xx_gpu->preempt_postamble_len = count; 122 123 a6xx_gpu->postamble_enabled = true; 124 } 125 126 static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu) 127 { 128 u32 *postamble = a6xx_gpu->preempt_postamble_ptr; 129 130 /* 131 * Disable the postamble by replacing the first packet header with a NOP 132 * that covers the whole buffer. 133 */ 134 *postamble = PKT7(CP_NOP, (a6xx_gpu->preempt_postamble_len - 1)); 135 136 a6xx_gpu->postamble_enabled = false; 137 } 138 139 void a6xx_preempt_irq(struct msm_gpu *gpu) 140 { 141 uint32_t status; 142 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 143 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 144 struct drm_device *dev = gpu->dev; 145 146 if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING)) 147 return; 148 149 /* Delete the preemption watchdog timer */ 150 timer_delete(&a6xx_gpu->preempt_timer); 151 152 /* 153 * The hardware should be setting the stop bit of CP_CONTEXT_SWITCH_CNTL 154 * to zero before firing the interrupt, but there is a non zero chance 155 * of a hardware condition or a software race that could set it again 156 * before we have a chance to finish. If that happens, log and go for 157 * recovery 158 */ 159 status = gpu_read(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL); 160 if (unlikely(status & A6XX_CP_CONTEXT_SWITCH_CNTL_STOP)) { 161 DRM_DEV_ERROR(&gpu->pdev->dev, 162 "!!!!!!!!!!!!!!!! preemption faulted !!!!!!!!!!!!!! irq\n"); 163 set_preempt_state(a6xx_gpu, PREEMPT_FAULTED); 164 dev_err(dev->dev, "%s: Preemption failed to complete\n", 165 gpu->name); 166 kthread_queue_work(gpu->worker, &gpu->recover_work); 167 return; 168 } 169 170 a6xx_gpu->cur_ring = a6xx_gpu->next_ring; 171 a6xx_gpu->next_ring = NULL; 172 173 set_preempt_state(a6xx_gpu, PREEMPT_FINISH); 174 175 update_wptr(gpu, a6xx_gpu->cur_ring); 176 177 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 178 179 trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id); 180 181 /* 182 * Retrigger preemption to avoid a deadlock that might occur when preemption 183 * is skipped due to it being already in flight when requested. 184 */ 185 a6xx_preempt_trigger(gpu); 186 } 187 188 void a6xx_preempt_hw_init(struct msm_gpu *gpu) 189 { 190 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 191 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 192 int i; 193 194 /* No preemption if we only have one ring */ 195 if (gpu->nr_rings == 1) 196 return; 197 198 for (i = 0; i < gpu->nr_rings; i++) { 199 struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[i]; 200 201 record_ptr->wptr = 0; 202 record_ptr->rptr = 0; 203 record_ptr->rptr_addr = shadowptr(a6xx_gpu, gpu->rb[i]); 204 record_ptr->info = 0; 205 record_ptr->data = 0; 206 record_ptr->rbase = gpu->rb[i]->iova; 207 } 208 209 /* Write a 0 to signal that we aren't switching pagetables */ 210 gpu_write64(gpu, REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 0); 211 212 /* Enable the GMEM save/restore feature for preemption */ 213 gpu_write(gpu, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 0x1); 214 215 /* Reset the preemption state */ 216 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 217 218 spin_lock_init(&a6xx_gpu->eval_lock); 219 220 /* Always come up on rb 0 */ 221 a6xx_gpu->cur_ring = gpu->rb[0]; 222 } 223 224 void a6xx_preempt_trigger(struct msm_gpu *gpu) 225 { 226 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 227 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 228 unsigned long flags; 229 struct msm_ringbuffer *ring; 230 unsigned int cntl; 231 bool sysprof; 232 233 if (gpu->nr_rings == 1) 234 return; 235 236 /* 237 * Lock to make sure another thread attempting preemption doesn't skip it 238 * while we are still evaluating the next ring. This makes sure the other 239 * thread does start preemption if we abort it and avoids a soft lock. 240 */ 241 spin_lock_irqsave(&a6xx_gpu->eval_lock, flags); 242 243 /* 244 * Try to start preemption by moving from NONE to START. If 245 * unsuccessful, a preemption is already in flight 246 */ 247 if (!try_preempt_state(a6xx_gpu, PREEMPT_NONE, PREEMPT_START)) { 248 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 249 return; 250 } 251 252 cntl = A6XX_CP_CONTEXT_SWITCH_CNTL_LEVEL(a6xx_gpu->preempt_level); 253 254 if (a6xx_gpu->skip_save_restore) 255 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_SKIP_SAVE_RESTORE; 256 257 if (a6xx_gpu->uses_gmem) 258 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_USES_GMEM; 259 260 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_STOP; 261 262 /* Get the next ring to preempt to */ 263 ring = get_next_ring(gpu); 264 265 /* 266 * If no ring is populated or the highest priority ring is the current 267 * one do nothing except to update the wptr to the latest and greatest 268 */ 269 if (!ring || (a6xx_gpu->cur_ring == ring)) { 270 set_preempt_state(a6xx_gpu, PREEMPT_FINISH); 271 update_wptr(gpu, a6xx_gpu->cur_ring); 272 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 273 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 274 return; 275 } 276 277 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 278 279 spin_lock_irqsave(&ring->preempt_lock, flags); 280 281 struct a7xx_cp_smmu_info *smmu_info_ptr = 282 a6xx_gpu->preempt_smmu[ring->id]; 283 struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[ring->id]; 284 u64 ttbr0 = ring->memptrs->ttbr0; 285 u32 context_idr = ring->memptrs->context_idr; 286 287 smmu_info_ptr->ttbr0 = ttbr0; 288 smmu_info_ptr->context_idr = context_idr; 289 record_ptr->wptr = get_wptr(ring); 290 291 /* 292 * The GPU will write the wptr we set above when we preempt. Reset 293 * restore_wptr to make sure that we don't write WPTR to the same 294 * thing twice. It's still possible subsequent submissions will update 295 * wptr again, in which case they will set the flag to true. This has 296 * to be protected by the lock for setting the flag and updating wptr 297 * to be atomic. 298 */ 299 ring->restore_wptr = false; 300 301 trace_msm_gpu_preemption_trigger(a6xx_gpu->cur_ring->id, ring->id); 302 303 spin_unlock_irqrestore(&ring->preempt_lock, flags); 304 305 gpu_write64(gpu, 306 REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 307 a6xx_gpu->preempt_smmu_iova[ring->id]); 308 309 gpu_write64(gpu, 310 REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR, 311 a6xx_gpu->preempt_iova[ring->id]); 312 313 a6xx_gpu->next_ring = ring; 314 315 /* Start a timer to catch a stuck preemption */ 316 mod_timer(&a6xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); 317 318 /* Enable or disable postamble as needed */ 319 sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; 320 321 if (!sysprof && !a6xx_gpu->postamble_enabled) 322 preempt_prepare_postamble(a6xx_gpu); 323 324 if (sysprof && a6xx_gpu->postamble_enabled) 325 preempt_disable_postamble(a6xx_gpu); 326 327 /* Set the preemption state to triggered */ 328 set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED); 329 330 /* Trigger the preemption */ 331 gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl); 332 } 333 334 static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu, 335 struct msm_ringbuffer *ring) 336 { 337 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 338 struct msm_gpu *gpu = &adreno_gpu->base; 339 struct drm_gem_object *bo = NULL; 340 phys_addr_t ttbr; 341 u64 iova = 0; 342 void *ptr; 343 int asid; 344 345 ptr = msm_gem_kernel_new(gpu->dev, 346 PREEMPT_RECORD_SIZE(adreno_gpu), 347 MSM_BO_WC | MSM_BO_MAP_PRIV, gpu->vm, &bo, &iova); 348 349 if (IS_ERR(ptr)) 350 return PTR_ERR(ptr); 351 352 memset(ptr, 0, PREEMPT_RECORD_SIZE(adreno_gpu)); 353 354 msm_gem_object_set_name(bo, "preempt_record ring%d", ring->id); 355 356 a6xx_gpu->preempt_bo[ring->id] = bo; 357 a6xx_gpu->preempt_iova[ring->id] = iova; 358 a6xx_gpu->preempt[ring->id] = ptr; 359 360 struct a6xx_preempt_record *record_ptr = ptr; 361 362 ptr = msm_gem_kernel_new(gpu->dev, 363 PREEMPT_SMMU_INFO_SIZE, 364 MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY, 365 gpu->vm, &bo, &iova); 366 367 if (IS_ERR(ptr)) 368 return PTR_ERR(ptr); 369 370 memset(ptr, 0, PREEMPT_SMMU_INFO_SIZE); 371 372 msm_gem_object_set_name(bo, "preempt_smmu_info ring%d", ring->id); 373 374 a6xx_gpu->preempt_smmu_bo[ring->id] = bo; 375 a6xx_gpu->preempt_smmu_iova[ring->id] = iova; 376 a6xx_gpu->preempt_smmu[ring->id] = ptr; 377 378 struct a7xx_cp_smmu_info *smmu_info_ptr = ptr; 379 380 msm_iommu_pagetable_params(to_msm_vm(gpu->vm)->mmu, &ttbr, &asid); 381 382 smmu_info_ptr->magic = GEN7_CP_SMMU_INFO_MAGIC; 383 smmu_info_ptr->ttbr0 = ttbr; 384 smmu_info_ptr->asid = 0xdecafbad; 385 smmu_info_ptr->context_idr = 0; 386 387 /* Set up the defaults on the preemption record */ 388 record_ptr->magic = A6XX_PREEMPT_RECORD_MAGIC; 389 record_ptr->info = 0; 390 record_ptr->data = 0; 391 record_ptr->rptr = 0; 392 record_ptr->wptr = 0; 393 record_ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; 394 record_ptr->rbase = ring->iova; 395 record_ptr->counter = 0; 396 record_ptr->bv_rptr_addr = rbmemptr(ring, bv_rptr); 397 398 return 0; 399 } 400 401 void a6xx_preempt_fini(struct msm_gpu *gpu) 402 { 403 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 404 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 405 int i; 406 407 for (i = 0; i < gpu->nr_rings; i++) 408 msm_gem_kernel_put(a6xx_gpu->preempt_bo[i], gpu->vm); 409 } 410 411 void a6xx_preempt_init(struct msm_gpu *gpu) 412 { 413 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 414 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 415 int i; 416 417 /* No preemption if we only have one ring */ 418 if (gpu->nr_rings <= 1) 419 return; 420 421 for (i = 0; i < gpu->nr_rings; i++) { 422 if (preempt_init_ring(a6xx_gpu, gpu->rb[i])) 423 goto fail; 424 } 425 426 /* TODO: make this configurable? */ 427 a6xx_gpu->preempt_level = 1; 428 a6xx_gpu->uses_gmem = 1; 429 a6xx_gpu->skip_save_restore = 1; 430 431 a6xx_gpu->preempt_postamble_ptr = msm_gem_kernel_new(gpu->dev, 432 PAGE_SIZE, 433 MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY, 434 gpu->vm, &a6xx_gpu->preempt_postamble_bo, 435 &a6xx_gpu->preempt_postamble_iova); 436 437 preempt_prepare_postamble(a6xx_gpu); 438 439 if (IS_ERR(a6xx_gpu->preempt_postamble_ptr)) 440 goto fail; 441 442 timer_setup(&a6xx_gpu->preempt_timer, a6xx_preempt_timer, 0); 443 444 return; 445 fail: 446 /* 447 * On any failure our adventure is over. Clean up and 448 * set nr_rings to 1 to force preemption off 449 */ 450 a6xx_preempt_fini(gpu); 451 gpu->nr_rings = 1; 452 453 DRM_DEV_ERROR(&gpu->pdev->dev, 454 "preemption init failed, disabling preemption\n"); 455 456 return; 457 } 458