1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018, The Linux Foundation. All rights reserved. */ 3 /* Copyright (c) 2023 Collabora, Ltd. */ 4 /* Copyright (c) 2024 Valve Corporation */ 5 6 #include "msm_gem.h" 7 #include "a6xx_gpu.h" 8 #include "a6xx_gmu.xml.h" 9 #include "msm_mmu.h" 10 #include "msm_gpu_trace.h" 11 12 /* 13 * Try to transition the preemption state from old to new. Return 14 * true on success or false if the original state wasn't 'old' 15 */ 16 static inline bool try_preempt_state(struct a6xx_gpu *a6xx_gpu, 17 enum a6xx_preempt_state old, enum a6xx_preempt_state new) 18 { 19 enum a6xx_preempt_state cur = atomic_cmpxchg(&a6xx_gpu->preempt_state, 20 old, new); 21 22 return (cur == old); 23 } 24 25 /* 26 * Force the preemption state to the specified state. This is used in cases 27 * where the current state is known and won't change 28 */ 29 static inline void set_preempt_state(struct a6xx_gpu *gpu, 30 enum a6xx_preempt_state new) 31 { 32 /* 33 * preempt_state may be read by other cores trying to trigger a 34 * preemption or in the interrupt handler so barriers are needed 35 * before... 36 */ 37 smp_mb__before_atomic(); 38 atomic_set(&gpu->preempt_state, new); 39 /* ... and after*/ 40 smp_mb__after_atomic(); 41 } 42 43 /* Write the most recent wptr for the given ring into the hardware */ 44 static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 45 { 46 unsigned long flags; 47 uint32_t wptr; 48 49 spin_lock_irqsave(&ring->preempt_lock, flags); 50 51 if (ring->restore_wptr) { 52 wptr = get_wptr(ring); 53 54 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); 55 56 ring->restore_wptr = false; 57 } 58 59 spin_unlock_irqrestore(&ring->preempt_lock, flags); 60 } 61 62 /* Return the highest priority ringbuffer with something in it */ 63 static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) 64 { 65 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 66 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 67 68 unsigned long flags; 69 int i; 70 71 for (i = 0; i < gpu->nr_rings; i++) { 72 bool empty; 73 struct msm_ringbuffer *ring = gpu->rb[i]; 74 75 spin_lock_irqsave(&ring->preempt_lock, flags); 76 empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring)); 77 if (!empty && ring == a6xx_gpu->cur_ring) 78 empty = ring->memptrs->fence == a6xx_gpu->last_seqno[i]; 79 spin_unlock_irqrestore(&ring->preempt_lock, flags); 80 81 if (!empty) 82 return ring; 83 } 84 85 return NULL; 86 } 87 88 static void a6xx_preempt_timer(struct timer_list *t) 89 { 90 struct a6xx_gpu *a6xx_gpu = from_timer(a6xx_gpu, t, preempt_timer); 91 struct msm_gpu *gpu = &a6xx_gpu->base.base; 92 struct drm_device *dev = gpu->dev; 93 94 if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED)) 95 return; 96 97 dev_err(dev->dev, "%s: preemption timed out\n", gpu->name); 98 kthread_queue_work(gpu->worker, &gpu->recover_work); 99 } 100 101 static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu) 102 { 103 u32 *postamble = a6xx_gpu->preempt_postamble_ptr; 104 u32 count = 0; 105 106 postamble[count++] = PKT7(CP_REG_RMW, 3); 107 postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD; 108 postamble[count++] = 0; 109 postamble[count++] = 1; 110 111 postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6); 112 postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ); 113 postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO( 114 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS); 115 postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0); 116 postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1); 117 postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1); 118 postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0); 119 120 a6xx_gpu->preempt_postamble_len = count; 121 122 a6xx_gpu->postamble_enabled = true; 123 } 124 125 static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu) 126 { 127 u32 *postamble = a6xx_gpu->preempt_postamble_ptr; 128 129 /* 130 * Disable the postamble by replacing the first packet header with a NOP 131 * that covers the whole buffer. 132 */ 133 *postamble = PKT7(CP_NOP, (a6xx_gpu->preempt_postamble_len - 1)); 134 135 a6xx_gpu->postamble_enabled = false; 136 } 137 138 void a6xx_preempt_irq(struct msm_gpu *gpu) 139 { 140 uint32_t status; 141 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 142 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 143 struct drm_device *dev = gpu->dev; 144 145 if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING)) 146 return; 147 148 /* Delete the preemption watchdog timer */ 149 del_timer(&a6xx_gpu->preempt_timer); 150 151 /* 152 * The hardware should be setting the stop bit of CP_CONTEXT_SWITCH_CNTL 153 * to zero before firing the interrupt, but there is a non zero chance 154 * of a hardware condition or a software race that could set it again 155 * before we have a chance to finish. If that happens, log and go for 156 * recovery 157 */ 158 status = gpu_read(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL); 159 if (unlikely(status & A6XX_CP_CONTEXT_SWITCH_CNTL_STOP)) { 160 DRM_DEV_ERROR(&gpu->pdev->dev, 161 "!!!!!!!!!!!!!!!! preemption faulted !!!!!!!!!!!!!! irq\n"); 162 set_preempt_state(a6xx_gpu, PREEMPT_FAULTED); 163 dev_err(dev->dev, "%s: Preemption failed to complete\n", 164 gpu->name); 165 kthread_queue_work(gpu->worker, &gpu->recover_work); 166 return; 167 } 168 169 a6xx_gpu->cur_ring = a6xx_gpu->next_ring; 170 a6xx_gpu->next_ring = NULL; 171 172 set_preempt_state(a6xx_gpu, PREEMPT_FINISH); 173 174 update_wptr(gpu, a6xx_gpu->cur_ring); 175 176 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 177 178 trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id); 179 180 /* 181 * Retrigger preemption to avoid a deadlock that might occur when preemption 182 * is skipped due to it being already in flight when requested. 183 */ 184 a6xx_preempt_trigger(gpu); 185 } 186 187 void a6xx_preempt_hw_init(struct msm_gpu *gpu) 188 { 189 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 190 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 191 int i; 192 193 /* No preemption if we only have one ring */ 194 if (gpu->nr_rings == 1) 195 return; 196 197 for (i = 0; i < gpu->nr_rings; i++) { 198 struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[i]; 199 200 record_ptr->wptr = 0; 201 record_ptr->rptr = 0; 202 record_ptr->rptr_addr = shadowptr(a6xx_gpu, gpu->rb[i]); 203 record_ptr->info = 0; 204 record_ptr->data = 0; 205 record_ptr->rbase = gpu->rb[i]->iova; 206 } 207 208 /* Write a 0 to signal that we aren't switching pagetables */ 209 gpu_write64(gpu, REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 0); 210 211 /* Enable the GMEM save/restore feature for preemption */ 212 gpu_write(gpu, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, 0x1); 213 214 /* Reset the preemption state */ 215 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 216 217 spin_lock_init(&a6xx_gpu->eval_lock); 218 219 /* Always come up on rb 0 */ 220 a6xx_gpu->cur_ring = gpu->rb[0]; 221 } 222 223 void a6xx_preempt_trigger(struct msm_gpu *gpu) 224 { 225 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 226 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 227 unsigned long flags; 228 struct msm_ringbuffer *ring; 229 unsigned int cntl; 230 bool sysprof; 231 232 if (gpu->nr_rings == 1) 233 return; 234 235 /* 236 * Lock to make sure another thread attempting preemption doesn't skip it 237 * while we are still evaluating the next ring. This makes sure the other 238 * thread does start preemption if we abort it and avoids a soft lock. 239 */ 240 spin_lock_irqsave(&a6xx_gpu->eval_lock, flags); 241 242 /* 243 * Try to start preemption by moving from NONE to START. If 244 * unsuccessful, a preemption is already in flight 245 */ 246 if (!try_preempt_state(a6xx_gpu, PREEMPT_NONE, PREEMPT_START)) { 247 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 248 return; 249 } 250 251 cntl = A6XX_CP_CONTEXT_SWITCH_CNTL_LEVEL(a6xx_gpu->preempt_level); 252 253 if (a6xx_gpu->skip_save_restore) 254 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_SKIP_SAVE_RESTORE; 255 256 if (a6xx_gpu->uses_gmem) 257 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_USES_GMEM; 258 259 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_STOP; 260 261 /* Get the next ring to preempt to */ 262 ring = get_next_ring(gpu); 263 264 /* 265 * If no ring is populated or the highest priority ring is the current 266 * one do nothing except to update the wptr to the latest and greatest 267 */ 268 if (!ring || (a6xx_gpu->cur_ring == ring)) { 269 set_preempt_state(a6xx_gpu, PREEMPT_FINISH); 270 update_wptr(gpu, a6xx_gpu->cur_ring); 271 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 272 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 273 return; 274 } 275 276 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 277 278 spin_lock_irqsave(&ring->preempt_lock, flags); 279 280 struct a7xx_cp_smmu_info *smmu_info_ptr = 281 a6xx_gpu->preempt_smmu[ring->id]; 282 struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[ring->id]; 283 u64 ttbr0 = ring->memptrs->ttbr0; 284 u32 context_idr = ring->memptrs->context_idr; 285 286 smmu_info_ptr->ttbr0 = ttbr0; 287 smmu_info_ptr->context_idr = context_idr; 288 record_ptr->wptr = get_wptr(ring); 289 290 /* 291 * The GPU will write the wptr we set above when we preempt. Reset 292 * restore_wptr to make sure that we don't write WPTR to the same 293 * thing twice. It's still possible subsequent submissions will update 294 * wptr again, in which case they will set the flag to true. This has 295 * to be protected by the lock for setting the flag and updating wptr 296 * to be atomic. 297 */ 298 ring->restore_wptr = false; 299 300 trace_msm_gpu_preemption_trigger(a6xx_gpu->cur_ring->id, 301 ring ? ring->id : -1); 302 303 spin_unlock_irqrestore(&ring->preempt_lock, flags); 304 305 gpu_write64(gpu, 306 REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 307 a6xx_gpu->preempt_smmu_iova[ring->id]); 308 309 gpu_write64(gpu, 310 REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR, 311 a6xx_gpu->preempt_iova[ring->id]); 312 313 a6xx_gpu->next_ring = ring; 314 315 /* Start a timer to catch a stuck preemption */ 316 mod_timer(&a6xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); 317 318 /* Enable or disable postamble as needed */ 319 sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; 320 321 if (!sysprof && !a6xx_gpu->postamble_enabled) 322 preempt_prepare_postamble(a6xx_gpu); 323 324 if (sysprof && a6xx_gpu->postamble_enabled) 325 preempt_disable_postamble(a6xx_gpu); 326 327 /* Set the preemption state to triggered */ 328 set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED); 329 330 /* Trigger the preemption */ 331 gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl); 332 } 333 334 static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu, 335 struct msm_ringbuffer *ring) 336 { 337 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 338 struct msm_gpu *gpu = &adreno_gpu->base; 339 struct drm_gem_object *bo = NULL; 340 phys_addr_t ttbr; 341 u64 iova = 0; 342 void *ptr; 343 int asid; 344 345 ptr = msm_gem_kernel_new(gpu->dev, 346 PREEMPT_RECORD_SIZE(adreno_gpu), 347 MSM_BO_WC | MSM_BO_MAP_PRIV, gpu->aspace, &bo, &iova); 348 349 if (IS_ERR(ptr)) 350 return PTR_ERR(ptr); 351 352 memset(ptr, 0, PREEMPT_RECORD_SIZE(adreno_gpu)); 353 354 msm_gem_object_set_name(bo, "preempt_record ring%d", ring->id); 355 356 a6xx_gpu->preempt_bo[ring->id] = bo; 357 a6xx_gpu->preempt_iova[ring->id] = iova; 358 a6xx_gpu->preempt[ring->id] = ptr; 359 360 struct a6xx_preempt_record *record_ptr = ptr; 361 362 ptr = msm_gem_kernel_new(gpu->dev, 363 PREEMPT_SMMU_INFO_SIZE, 364 MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY, 365 gpu->aspace, &bo, &iova); 366 367 if (IS_ERR(ptr)) 368 return PTR_ERR(ptr); 369 370 memset(ptr, 0, PREEMPT_SMMU_INFO_SIZE); 371 372 msm_gem_object_set_name(bo, "preempt_smmu_info ring%d", ring->id); 373 374 a6xx_gpu->preempt_smmu_bo[ring->id] = bo; 375 a6xx_gpu->preempt_smmu_iova[ring->id] = iova; 376 a6xx_gpu->preempt_smmu[ring->id] = ptr; 377 378 struct a7xx_cp_smmu_info *smmu_info_ptr = ptr; 379 380 msm_iommu_pagetable_params(gpu->aspace->mmu, &ttbr, &asid); 381 382 smmu_info_ptr->magic = GEN7_CP_SMMU_INFO_MAGIC; 383 smmu_info_ptr->ttbr0 = ttbr; 384 smmu_info_ptr->asid = 0xdecafbad; 385 smmu_info_ptr->context_idr = 0; 386 387 /* Set up the defaults on the preemption record */ 388 record_ptr->magic = A6XX_PREEMPT_RECORD_MAGIC; 389 record_ptr->info = 0; 390 record_ptr->data = 0; 391 record_ptr->rptr = 0; 392 record_ptr->wptr = 0; 393 record_ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; 394 record_ptr->rbase = ring->iova; 395 record_ptr->counter = 0; 396 record_ptr->bv_rptr_addr = rbmemptr(ring, bv_rptr); 397 398 return 0; 399 } 400 401 void a6xx_preempt_fini(struct msm_gpu *gpu) 402 { 403 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 404 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 405 int i; 406 407 for (i = 0; i < gpu->nr_rings; i++) 408 msm_gem_kernel_put(a6xx_gpu->preempt_bo[i], gpu->aspace); 409 } 410 411 void a6xx_preempt_init(struct msm_gpu *gpu) 412 { 413 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 414 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 415 int i; 416 417 /* No preemption if we only have one ring */ 418 if (gpu->nr_rings <= 1) 419 return; 420 421 for (i = 0; i < gpu->nr_rings; i++) { 422 if (preempt_init_ring(a6xx_gpu, gpu->rb[i])) 423 goto fail; 424 } 425 426 /* TODO: make this configurable? */ 427 a6xx_gpu->preempt_level = 1; 428 a6xx_gpu->uses_gmem = 1; 429 a6xx_gpu->skip_save_restore = 1; 430 431 a6xx_gpu->preempt_postamble_ptr = msm_gem_kernel_new(gpu->dev, 432 PAGE_SIZE, 433 MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY, 434 gpu->aspace, &a6xx_gpu->preempt_postamble_bo, 435 &a6xx_gpu->preempt_postamble_iova); 436 437 preempt_prepare_postamble(a6xx_gpu); 438 439 if (IS_ERR(a6xx_gpu->preempt_postamble_ptr)) 440 goto fail; 441 442 timer_setup(&a6xx_gpu->preempt_timer, a6xx_preempt_timer, 0); 443 444 return; 445 fail: 446 /* 447 * On any failure our adventure is over. Clean up and 448 * set nr_rings to 1 to force preemption off 449 */ 450 a6xx_preempt_fini(gpu); 451 gpu->nr_rings = 1; 452 453 DRM_DEV_ERROR(&gpu->pdev->dev, 454 "preemption init failed, disabling preemption\n"); 455 456 return; 457 } 458