1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018, The Linux Foundation. All rights reserved. */ 3 /* Copyright (c) 2023 Collabora, Ltd. */ 4 /* Copyright (c) 2024 Valve Corporation */ 5 6 #include "msm_gem.h" 7 #include "a6xx_gpu.h" 8 #include "a6xx_gmu.xml.h" 9 #include "msm_mmu.h" 10 #include "msm_gpu_trace.h" 11 12 /* 13 * Try to transition the preemption state from old to new. Return 14 * true on success or false if the original state wasn't 'old' 15 */ 16 static inline bool try_preempt_state(struct a6xx_gpu *a6xx_gpu, 17 enum a6xx_preempt_state old, enum a6xx_preempt_state new) 18 { 19 enum a6xx_preempt_state cur = atomic_cmpxchg(&a6xx_gpu->preempt_state, 20 old, new); 21 22 return (cur == old); 23 } 24 25 /* 26 * Force the preemption state to the specified state. This is used in cases 27 * where the current state is known and won't change 28 */ 29 static inline void set_preempt_state(struct a6xx_gpu *gpu, 30 enum a6xx_preempt_state new) 31 { 32 /* 33 * preempt_state may be read by other cores trying to trigger a 34 * preemption or in the interrupt handler so barriers are needed 35 * before... 36 */ 37 smp_mb__before_atomic(); 38 atomic_set(&gpu->preempt_state, new); 39 /* ... and after*/ 40 smp_mb__after_atomic(); 41 } 42 43 /* Write the most recent wptr for the given ring into the hardware */ 44 static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 45 { 46 unsigned long flags; 47 uint32_t wptr; 48 49 spin_lock_irqsave(&ring->preempt_lock, flags); 50 51 if (ring->restore_wptr) { 52 wptr = get_wptr(ring); 53 54 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); 55 56 ring->restore_wptr = false; 57 } 58 59 spin_unlock_irqrestore(&ring->preempt_lock, flags); 60 } 61 62 /* Return the highest priority ringbuffer with something in it */ 63 static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) 64 { 65 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 66 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 67 68 unsigned long flags; 69 int i; 70 71 for (i = 0; i < gpu->nr_rings; i++) { 72 bool empty; 73 struct msm_ringbuffer *ring = gpu->rb[i]; 74 75 spin_lock_irqsave(&ring->preempt_lock, flags); 76 empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring)); 77 if (!empty && ring == a6xx_gpu->cur_ring) 78 empty = ring->memptrs->fence == a6xx_gpu->last_seqno[i]; 79 spin_unlock_irqrestore(&ring->preempt_lock, flags); 80 81 if (!empty) 82 return ring; 83 } 84 85 return NULL; 86 } 87 88 static void a6xx_preempt_timer(struct timer_list *t) 89 { 90 struct a6xx_gpu *a6xx_gpu = from_timer(a6xx_gpu, t, preempt_timer); 91 struct msm_gpu *gpu = &a6xx_gpu->base.base; 92 struct drm_device *dev = gpu->dev; 93 94 if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED)) 95 return; 96 97 dev_err(dev->dev, "%s: preemption timed out\n", gpu->name); 98 kthread_queue_work(gpu->worker, &gpu->recover_work); 99 } 100 101 static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu) 102 { 103 u32 *postamble = a6xx_gpu->preempt_postamble_ptr; 104 u32 count = 0; 105 106 postamble[count++] = PKT7(CP_REG_RMW, 3); 107 postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD; 108 postamble[count++] = 0; 109 postamble[count++] = 1; 110 111 postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6); 112 postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ); 113 postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO( 114 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS); 115 postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0); 116 postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1); 117 postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1); 118 postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0); 119 120 a6xx_gpu->preempt_postamble_len = count; 121 122 a6xx_gpu->postamble_enabled = true; 123 } 124 125 static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu) 126 { 127 u32 *postamble = a6xx_gpu->preempt_postamble_ptr; 128 129 /* 130 * Disable the postamble by replacing the first packet header with a NOP 131 * that covers the whole buffer. 132 */ 133 *postamble = PKT7(CP_NOP, (a6xx_gpu->preempt_postamble_len - 1)); 134 135 a6xx_gpu->postamble_enabled = false; 136 } 137 138 void a6xx_preempt_irq(struct msm_gpu *gpu) 139 { 140 uint32_t status; 141 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 142 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 143 struct drm_device *dev = gpu->dev; 144 145 if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING)) 146 return; 147 148 /* Delete the preemption watchdog timer */ 149 del_timer(&a6xx_gpu->preempt_timer); 150 151 /* 152 * The hardware should be setting the stop bit of CP_CONTEXT_SWITCH_CNTL 153 * to zero before firing the interrupt, but there is a non zero chance 154 * of a hardware condition or a software race that could set it again 155 * before we have a chance to finish. If that happens, log and go for 156 * recovery 157 */ 158 status = gpu_read(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL); 159 if (unlikely(status & A6XX_CP_CONTEXT_SWITCH_CNTL_STOP)) { 160 DRM_DEV_ERROR(&gpu->pdev->dev, 161 "!!!!!!!!!!!!!!!! preemption faulted !!!!!!!!!!!!!! irq\n"); 162 set_preempt_state(a6xx_gpu, PREEMPT_FAULTED); 163 dev_err(dev->dev, "%s: Preemption failed to complete\n", 164 gpu->name); 165 kthread_queue_work(gpu->worker, &gpu->recover_work); 166 return; 167 } 168 169 a6xx_gpu->cur_ring = a6xx_gpu->next_ring; 170 a6xx_gpu->next_ring = NULL; 171 172 set_preempt_state(a6xx_gpu, PREEMPT_FINISH); 173 174 update_wptr(gpu, a6xx_gpu->cur_ring); 175 176 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 177 178 trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id); 179 180 /* 181 * Retrigger preemption to avoid a deadlock that might occur when preemption 182 * is skipped due to it being already in flight when requested. 183 */ 184 a6xx_preempt_trigger(gpu); 185 } 186 187 void a6xx_preempt_hw_init(struct msm_gpu *gpu) 188 { 189 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 190 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 191 int i; 192 193 /* No preemption if we only have one ring */ 194 if (gpu->nr_rings == 1) 195 return; 196 197 for (i = 0; i < gpu->nr_rings; i++) { 198 struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[i]; 199 200 record_ptr->wptr = 0; 201 record_ptr->rptr = 0; 202 record_ptr->rptr_addr = shadowptr(a6xx_gpu, gpu->rb[i]); 203 record_ptr->info = 0; 204 record_ptr->data = 0; 205 record_ptr->rbase = gpu->rb[i]->iova; 206 } 207 208 /* Write a 0 to signal that we aren't switching pagetables */ 209 gpu_write64(gpu, REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 0); 210 211 /* Enable the GMEM save/restore feature for preemption */ 212 gpu_write(gpu, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, 0x1); 213 214 /* Reset the preemption state */ 215 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 216 217 spin_lock_init(&a6xx_gpu->eval_lock); 218 219 /* Always come up on rb 0 */ 220 a6xx_gpu->cur_ring = gpu->rb[0]; 221 } 222 223 void a6xx_preempt_trigger(struct msm_gpu *gpu) 224 { 225 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 226 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 227 unsigned long flags; 228 struct msm_ringbuffer *ring; 229 unsigned int cntl; 230 bool sysprof; 231 232 if (gpu->nr_rings == 1) 233 return; 234 235 /* 236 * Lock to make sure another thread attempting preemption doesn't skip it 237 * while we are still evaluating the next ring. This makes sure the other 238 * thread does start preemption if we abort it and avoids a soft lock. 239 */ 240 spin_lock_irqsave(&a6xx_gpu->eval_lock, flags); 241 242 /* 243 * Try to start preemption by moving from NONE to START. If 244 * unsuccessful, a preemption is already in flight 245 */ 246 if (!try_preempt_state(a6xx_gpu, PREEMPT_NONE, PREEMPT_START)) { 247 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 248 return; 249 } 250 251 cntl = A6XX_CP_CONTEXT_SWITCH_CNTL_LEVEL(a6xx_gpu->preempt_level); 252 253 if (a6xx_gpu->skip_save_restore) 254 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_SKIP_SAVE_RESTORE; 255 256 if (a6xx_gpu->uses_gmem) 257 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_USES_GMEM; 258 259 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_STOP; 260 261 /* Get the next ring to preempt to */ 262 ring = get_next_ring(gpu); 263 264 /* 265 * If no ring is populated or the highest priority ring is the current 266 * one do nothing except to update the wptr to the latest and greatest 267 */ 268 if (!ring || (a6xx_gpu->cur_ring == ring)) { 269 set_preempt_state(a6xx_gpu, PREEMPT_FINISH); 270 update_wptr(gpu, a6xx_gpu->cur_ring); 271 set_preempt_state(a6xx_gpu, PREEMPT_NONE); 272 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 273 return; 274 } 275 276 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); 277 278 spin_lock_irqsave(&ring->preempt_lock, flags); 279 280 struct a7xx_cp_smmu_info *smmu_info_ptr = 281 a6xx_gpu->preempt_smmu[ring->id]; 282 struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[ring->id]; 283 u64 ttbr0 = ring->memptrs->ttbr0; 284 u32 context_idr = ring->memptrs->context_idr; 285 286 smmu_info_ptr->ttbr0 = ttbr0; 287 smmu_info_ptr->context_idr = context_idr; 288 record_ptr->wptr = get_wptr(ring); 289 290 /* 291 * The GPU will write the wptr we set above when we preempt. Reset 292 * restore_wptr to make sure that we don't write WPTR to the same 293 * thing twice. It's still possible subsequent submissions will update 294 * wptr again, in which case they will set the flag to true. This has 295 * to be protected by the lock for setting the flag and updating wptr 296 * to be atomic. 297 */ 298 ring->restore_wptr = false; 299 300 trace_msm_gpu_preemption_trigger(a6xx_gpu->cur_ring->id, ring->id); 301 302 spin_unlock_irqrestore(&ring->preempt_lock, flags); 303 304 gpu_write64(gpu, 305 REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 306 a6xx_gpu->preempt_smmu_iova[ring->id]); 307 308 gpu_write64(gpu, 309 REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR, 310 a6xx_gpu->preempt_iova[ring->id]); 311 312 a6xx_gpu->next_ring = ring; 313 314 /* Start a timer to catch a stuck preemption */ 315 mod_timer(&a6xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); 316 317 /* Enable or disable postamble as needed */ 318 sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; 319 320 if (!sysprof && !a6xx_gpu->postamble_enabled) 321 preempt_prepare_postamble(a6xx_gpu); 322 323 if (sysprof && a6xx_gpu->postamble_enabled) 324 preempt_disable_postamble(a6xx_gpu); 325 326 /* Set the preemption state to triggered */ 327 set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED); 328 329 /* Trigger the preemption */ 330 gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl); 331 } 332 333 static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu, 334 struct msm_ringbuffer *ring) 335 { 336 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 337 struct msm_gpu *gpu = &adreno_gpu->base; 338 struct drm_gem_object *bo = NULL; 339 phys_addr_t ttbr; 340 u64 iova = 0; 341 void *ptr; 342 int asid; 343 344 ptr = msm_gem_kernel_new(gpu->dev, 345 PREEMPT_RECORD_SIZE(adreno_gpu), 346 MSM_BO_WC | MSM_BO_MAP_PRIV, gpu->aspace, &bo, &iova); 347 348 if (IS_ERR(ptr)) 349 return PTR_ERR(ptr); 350 351 memset(ptr, 0, PREEMPT_RECORD_SIZE(adreno_gpu)); 352 353 msm_gem_object_set_name(bo, "preempt_record ring%d", ring->id); 354 355 a6xx_gpu->preempt_bo[ring->id] = bo; 356 a6xx_gpu->preempt_iova[ring->id] = iova; 357 a6xx_gpu->preempt[ring->id] = ptr; 358 359 struct a6xx_preempt_record *record_ptr = ptr; 360 361 ptr = msm_gem_kernel_new(gpu->dev, 362 PREEMPT_SMMU_INFO_SIZE, 363 MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY, 364 gpu->aspace, &bo, &iova); 365 366 if (IS_ERR(ptr)) 367 return PTR_ERR(ptr); 368 369 memset(ptr, 0, PREEMPT_SMMU_INFO_SIZE); 370 371 msm_gem_object_set_name(bo, "preempt_smmu_info ring%d", ring->id); 372 373 a6xx_gpu->preempt_smmu_bo[ring->id] = bo; 374 a6xx_gpu->preempt_smmu_iova[ring->id] = iova; 375 a6xx_gpu->preempt_smmu[ring->id] = ptr; 376 377 struct a7xx_cp_smmu_info *smmu_info_ptr = ptr; 378 379 msm_iommu_pagetable_params(gpu->aspace->mmu, &ttbr, &asid); 380 381 smmu_info_ptr->magic = GEN7_CP_SMMU_INFO_MAGIC; 382 smmu_info_ptr->ttbr0 = ttbr; 383 smmu_info_ptr->asid = 0xdecafbad; 384 smmu_info_ptr->context_idr = 0; 385 386 /* Set up the defaults on the preemption record */ 387 record_ptr->magic = A6XX_PREEMPT_RECORD_MAGIC; 388 record_ptr->info = 0; 389 record_ptr->data = 0; 390 record_ptr->rptr = 0; 391 record_ptr->wptr = 0; 392 record_ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; 393 record_ptr->rbase = ring->iova; 394 record_ptr->counter = 0; 395 record_ptr->bv_rptr_addr = rbmemptr(ring, bv_rptr); 396 397 return 0; 398 } 399 400 void a6xx_preempt_fini(struct msm_gpu *gpu) 401 { 402 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 403 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 404 int i; 405 406 for (i = 0; i < gpu->nr_rings; i++) 407 msm_gem_kernel_put(a6xx_gpu->preempt_bo[i], gpu->aspace); 408 } 409 410 void a6xx_preempt_init(struct msm_gpu *gpu) 411 { 412 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 413 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 414 int i; 415 416 /* No preemption if we only have one ring */ 417 if (gpu->nr_rings <= 1) 418 return; 419 420 for (i = 0; i < gpu->nr_rings; i++) { 421 if (preempt_init_ring(a6xx_gpu, gpu->rb[i])) 422 goto fail; 423 } 424 425 /* TODO: make this configurable? */ 426 a6xx_gpu->preempt_level = 1; 427 a6xx_gpu->uses_gmem = 1; 428 a6xx_gpu->skip_save_restore = 1; 429 430 a6xx_gpu->preempt_postamble_ptr = msm_gem_kernel_new(gpu->dev, 431 PAGE_SIZE, 432 MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY, 433 gpu->aspace, &a6xx_gpu->preempt_postamble_bo, 434 &a6xx_gpu->preempt_postamble_iova); 435 436 preempt_prepare_postamble(a6xx_gpu); 437 438 if (IS_ERR(a6xx_gpu->preempt_postamble_ptr)) 439 goto fail; 440 441 timer_setup(&a6xx_gpu->preempt_timer, a6xx_preempt_timer, 0); 442 443 return; 444 fail: 445 /* 446 * On any failure our adventure is over. Clean up and 447 * set nr_rings to 1 to force preemption off 448 */ 449 a6xx_preempt_fini(gpu); 450 gpu->nr_rings = 1; 451 452 DRM_DEV_ERROR(&gpu->pdev->dev, 453 "preemption init failed, disabling preemption\n"); 454 455 return; 456 } 457