1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, The Linux Foundation. All rights reserved. */
3 /* Copyright (c) 2023 Collabora, Ltd. */
4 /* Copyright (c) 2024 Valve Corporation */
5
6 #include "msm_gem.h"
7 #include "a6xx_gpu.h"
8 #include "a6xx_gmu.xml.h"
9 #include "msm_mmu.h"
10 #include "msm_gpu_trace.h"
11
12 /*
13 * Try to transition the preemption state from old to new. Return
14 * true on success or false if the original state wasn't 'old'
15 */
try_preempt_state(struct a6xx_gpu * a6xx_gpu,enum a6xx_preempt_state old,enum a6xx_preempt_state new)16 static inline bool try_preempt_state(struct a6xx_gpu *a6xx_gpu,
17 enum a6xx_preempt_state old, enum a6xx_preempt_state new)
18 {
19 enum a6xx_preempt_state cur = atomic_cmpxchg(&a6xx_gpu->preempt_state,
20 old, new);
21
22 return (cur == old);
23 }
24
25 /*
26 * Force the preemption state to the specified state. This is used in cases
27 * where the current state is known and won't change
28 */
set_preempt_state(struct a6xx_gpu * gpu,enum a6xx_preempt_state new)29 static inline void set_preempt_state(struct a6xx_gpu *gpu,
30 enum a6xx_preempt_state new)
31 {
32 /*
33 * preempt_state may be read by other cores trying to trigger a
34 * preemption or in the interrupt handler so barriers are needed
35 * before...
36 */
37 smp_mb__before_atomic();
38 atomic_set(&gpu->preempt_state, new);
39 /* ... and after*/
40 smp_mb__after_atomic();
41 }
42
43 /* Write the most recent wptr for the given ring into the hardware */
update_wptr(struct a6xx_gpu * a6xx_gpu,struct msm_ringbuffer * ring)44 static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring)
45 {
46 unsigned long flags;
47 uint32_t wptr;
48
49 spin_lock_irqsave(&ring->preempt_lock, flags);
50
51 if (ring->restore_wptr) {
52 wptr = get_wptr(ring);
53
54 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
55
56 ring->restore_wptr = false;
57 }
58
59 spin_unlock_irqrestore(&ring->preempt_lock, flags);
60 }
61
62 /* Return the highest priority ringbuffer with something in it */
get_next_ring(struct msm_gpu * gpu)63 static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu)
64 {
65 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
66 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
67
68 unsigned long flags;
69 int i;
70
71 for (i = 0; i < gpu->nr_rings; i++) {
72 bool empty;
73 struct msm_ringbuffer *ring = gpu->rb[i];
74
75 spin_lock_irqsave(&ring->preempt_lock, flags);
76 empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring));
77 if (!empty && ring == a6xx_gpu->cur_ring)
78 empty = ring->memptrs->fence == a6xx_gpu->last_seqno[i];
79 spin_unlock_irqrestore(&ring->preempt_lock, flags);
80
81 if (!empty)
82 return ring;
83 }
84
85 return NULL;
86 }
87
a6xx_preempt_timer(struct timer_list * t)88 static void a6xx_preempt_timer(struct timer_list *t)
89 {
90 struct a6xx_gpu *a6xx_gpu = timer_container_of(a6xx_gpu, t,
91 preempt_timer);
92 struct msm_gpu *gpu = &a6xx_gpu->base.base;
93 struct drm_device *dev = gpu->dev;
94
95 if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED))
96 return;
97
98 dev_err(dev->dev, "%s: preemption timed out\n", gpu->name);
99 kthread_queue_work(gpu->worker, &gpu->recover_work);
100 }
101
preempt_prepare_postamble(struct a6xx_gpu * a6xx_gpu)102 static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu)
103 {
104 u32 *postamble = a6xx_gpu->preempt_postamble_ptr;
105 u32 count = 0;
106
107 postamble[count++] = PKT7(CP_REG_RMW, 3);
108 postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD;
109 postamble[count++] = 0;
110 postamble[count++] = 1;
111
112 postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6);
113 postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ);
114 postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_LO(
115 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS);
116 postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_HI(0);
117 postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1);
118 postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1);
119 postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0);
120
121 a6xx_gpu->preempt_postamble_len = count;
122
123 a6xx_gpu->postamble_enabled = true;
124 }
125
preempt_disable_postamble(struct a6xx_gpu * a6xx_gpu)126 static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu)
127 {
128 u32 *postamble = a6xx_gpu->preempt_postamble_ptr;
129
130 /*
131 * Disable the postamble by replacing the first packet header with a NOP
132 * that covers the whole buffer.
133 */
134 *postamble = PKT7(CP_NOP, (a6xx_gpu->preempt_postamble_len - 1));
135
136 a6xx_gpu->postamble_enabled = false;
137 }
138
139 /*
140 * Set preemption keepalive vote. Please note that this vote is different from the one used in
141 * a6xx_irq()
142 */
a6xx_preempt_keepalive_vote(struct msm_gpu * gpu,bool on)143 static void a6xx_preempt_keepalive_vote(struct msm_gpu *gpu, bool on)
144 {
145 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
146 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
147
148 if (adreno_has_gmu_wrapper(adreno_gpu))
149 return;
150
151 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, on);
152 }
153
a6xx_preempt_irq(struct msm_gpu * gpu)154 void a6xx_preempt_irq(struct msm_gpu *gpu)
155 {
156 uint32_t status;
157 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
158 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
159 struct drm_device *dev = gpu->dev;
160
161 if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING))
162 return;
163
164 /* Delete the preemption watchdog timer */
165 timer_delete(&a6xx_gpu->preempt_timer);
166
167 /*
168 * The hardware should be setting the stop bit of CP_CONTEXT_SWITCH_CNTL
169 * to zero before firing the interrupt, but there is a non zero chance
170 * of a hardware condition or a software race that could set it again
171 * before we have a chance to finish. If that happens, log and go for
172 * recovery
173 */
174 status = gpu_read(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL);
175 if (unlikely(status & A6XX_CP_CONTEXT_SWITCH_CNTL_STOP)) {
176 DRM_DEV_ERROR(&gpu->pdev->dev,
177 "!!!!!!!!!!!!!!!! preemption faulted !!!!!!!!!!!!!! irq\n");
178 set_preempt_state(a6xx_gpu, PREEMPT_FAULTED);
179 dev_err(dev->dev, "%s: Preemption failed to complete\n",
180 gpu->name);
181 kthread_queue_work(gpu->worker, &gpu->recover_work);
182 return;
183 }
184
185 a6xx_gpu->cur_ring = a6xx_gpu->next_ring;
186 a6xx_gpu->next_ring = NULL;
187
188 set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
189
190 update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
191
192 set_preempt_state(a6xx_gpu, PREEMPT_NONE);
193
194 a6xx_preempt_keepalive_vote(gpu, false);
195
196 trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id);
197
198 /*
199 * Retrigger preemption to avoid a deadlock that might occur when preemption
200 * is skipped due to it being already in flight when requested.
201 */
202 a6xx_preempt_trigger(gpu);
203 }
204
a6xx_preempt_hw_init(struct msm_gpu * gpu)205 void a6xx_preempt_hw_init(struct msm_gpu *gpu)
206 {
207 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
208 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
209 int i;
210
211 /* No preemption if we only have one ring */
212 if (gpu->nr_rings == 1)
213 return;
214
215 for (i = 0; i < gpu->nr_rings; i++) {
216 struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[i];
217
218 record_ptr->wptr = 0;
219 record_ptr->rptr = 0;
220 record_ptr->rptr_addr = shadowptr(a6xx_gpu, gpu->rb[i]);
221 record_ptr->info = 0;
222 record_ptr->data = 0;
223 record_ptr->rbase = gpu->rb[i]->iova;
224 }
225
226 /* Write a 0 to signal that we aren't switching pagetables */
227 gpu_write64(gpu, REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 0);
228
229 /* Enable the GMEM save/restore feature for preemption */
230 gpu_write(gpu, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 0x1);
231
232 /* Reset the preemption state */
233 set_preempt_state(a6xx_gpu, PREEMPT_NONE);
234
235 spin_lock_init(&a6xx_gpu->eval_lock);
236
237 /* Always come up on rb 0 */
238 a6xx_gpu->cur_ring = gpu->rb[0];
239 }
240
a6xx_preempt_trigger(struct msm_gpu * gpu)241 void a6xx_preempt_trigger(struct msm_gpu *gpu)
242 {
243 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
244 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
245 unsigned long flags;
246 struct msm_ringbuffer *ring;
247 unsigned int cntl;
248 bool sysprof;
249
250 if (gpu->nr_rings == 1)
251 return;
252
253 /*
254 * Lock to make sure another thread attempting preemption doesn't skip it
255 * while we are still evaluating the next ring. This makes sure the other
256 * thread does start preemption if we abort it and avoids a soft lock.
257 */
258 spin_lock_irqsave(&a6xx_gpu->eval_lock, flags);
259
260 /*
261 * Try to start preemption by moving from NONE to START. If
262 * unsuccessful, a preemption is already in flight
263 */
264 if (!try_preempt_state(a6xx_gpu, PREEMPT_NONE, PREEMPT_START)) {
265 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
266 return;
267 }
268
269 cntl = A6XX_CP_CONTEXT_SWITCH_CNTL_LEVEL(a6xx_gpu->preempt_level);
270
271 if (a6xx_gpu->skip_save_restore)
272 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_SKIP_SAVE_RESTORE;
273
274 if (a6xx_gpu->uses_gmem)
275 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_USES_GMEM;
276
277 cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_STOP;
278
279 /* Get the next ring to preempt to */
280 ring = get_next_ring(gpu);
281
282 /*
283 * If no ring is populated or the highest priority ring is the current
284 * one do nothing except to update the wptr to the latest and greatest
285 */
286 if (!ring || (a6xx_gpu->cur_ring == ring)) {
287 set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
288 update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
289 set_preempt_state(a6xx_gpu, PREEMPT_NONE);
290 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
291 return;
292 }
293
294 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
295
296 spin_lock_irqsave(&ring->preempt_lock, flags);
297
298 struct a7xx_cp_smmu_info *smmu_info_ptr =
299 a6xx_gpu->preempt_smmu[ring->id];
300 struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[ring->id];
301 u64 ttbr0 = ring->memptrs->ttbr0;
302 u32 context_idr = ring->memptrs->context_idr;
303
304 smmu_info_ptr->ttbr0 = ttbr0;
305 smmu_info_ptr->context_idr = context_idr;
306 record_ptr->wptr = get_wptr(ring);
307
308 /*
309 * The GPU will write the wptr we set above when we preempt. Reset
310 * restore_wptr to make sure that we don't write WPTR to the same
311 * thing twice. It's still possible subsequent submissions will update
312 * wptr again, in which case they will set the flag to true. This has
313 * to be protected by the lock for setting the flag and updating wptr
314 * to be atomic.
315 */
316 ring->restore_wptr = false;
317
318 trace_msm_gpu_preemption_trigger(a6xx_gpu->cur_ring->id, ring->id);
319
320 spin_unlock_irqrestore(&ring->preempt_lock, flags);
321
322 /* Set the keepalive bit to keep the GPU ON until preemption is complete */
323 a6xx_preempt_keepalive_vote(gpu, true);
324
325 a6xx_fenced_write(a6xx_gpu,
326 REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id],
327 BIT(1), true);
328
329 a6xx_fenced_write(a6xx_gpu,
330 REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
331 a6xx_gpu->preempt_iova[ring->id], BIT(1), true);
332
333 a6xx_gpu->next_ring = ring;
334
335 /* Start a timer to catch a stuck preemption */
336 mod_timer(&a6xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000));
337
338 /* Enable or disable postamble as needed */
339 sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
340
341 if (!sysprof && !a6xx_gpu->postamble_enabled)
342 preempt_prepare_postamble(a6xx_gpu);
343
344 if (sysprof && a6xx_gpu->postamble_enabled)
345 preempt_disable_postamble(a6xx_gpu);
346
347 /* Set the preemption state to triggered */
348 set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED);
349
350 /* Trigger the preemption */
351 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false);
352 }
353
preempt_init_ring(struct a6xx_gpu * a6xx_gpu,struct msm_ringbuffer * ring)354 static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
355 struct msm_ringbuffer *ring)
356 {
357 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
358 struct msm_gpu *gpu = &adreno_gpu->base;
359 struct drm_gem_object *bo = NULL;
360 phys_addr_t ttbr;
361 u64 iova = 0;
362 void *ptr;
363 int asid;
364
365 ptr = msm_gem_kernel_new(gpu->dev,
366 PREEMPT_RECORD_SIZE(adreno_gpu),
367 MSM_BO_WC | MSM_BO_MAP_PRIV, gpu->vm, &bo, &iova);
368
369 if (IS_ERR(ptr))
370 return PTR_ERR(ptr);
371
372 memset(ptr, 0, PREEMPT_RECORD_SIZE(adreno_gpu));
373
374 msm_gem_object_set_name(bo, "preempt_record ring%d", ring->id);
375
376 a6xx_gpu->preempt_bo[ring->id] = bo;
377 a6xx_gpu->preempt_iova[ring->id] = iova;
378 a6xx_gpu->preempt[ring->id] = ptr;
379
380 struct a6xx_preempt_record *record_ptr = ptr;
381
382 ptr = msm_gem_kernel_new(gpu->dev,
383 PREEMPT_SMMU_INFO_SIZE,
384 MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY,
385 gpu->vm, &bo, &iova);
386
387 if (IS_ERR(ptr))
388 return PTR_ERR(ptr);
389
390 memset(ptr, 0, PREEMPT_SMMU_INFO_SIZE);
391
392 msm_gem_object_set_name(bo, "preempt_smmu_info ring%d", ring->id);
393
394 a6xx_gpu->preempt_smmu_bo[ring->id] = bo;
395 a6xx_gpu->preempt_smmu_iova[ring->id] = iova;
396 a6xx_gpu->preempt_smmu[ring->id] = ptr;
397
398 struct a7xx_cp_smmu_info *smmu_info_ptr = ptr;
399
400 msm_iommu_pagetable_params(to_msm_vm(gpu->vm)->mmu, &ttbr, &asid);
401
402 smmu_info_ptr->magic = GEN7_CP_SMMU_INFO_MAGIC;
403 smmu_info_ptr->ttbr0 = ttbr;
404 smmu_info_ptr->asid = 0xdecafbad;
405 smmu_info_ptr->context_idr = 0;
406
407 /* Set up the defaults on the preemption record */
408 record_ptr->magic = A6XX_PREEMPT_RECORD_MAGIC;
409 record_ptr->info = 0;
410 record_ptr->data = 0;
411 record_ptr->rptr = 0;
412 record_ptr->wptr = 0;
413 record_ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT;
414 record_ptr->rbase = ring->iova;
415 record_ptr->counter = 0;
416 record_ptr->bv_rptr_addr = rbmemptr(ring, bv_rptr);
417
418 return 0;
419 }
420
a6xx_preempt_fini(struct msm_gpu * gpu)421 void a6xx_preempt_fini(struct msm_gpu *gpu)
422 {
423 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
424 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
425 int i;
426
427 for (i = 0; i < gpu->nr_rings; i++)
428 msm_gem_kernel_put(a6xx_gpu->preempt_bo[i], gpu->vm);
429 }
430
a6xx_preempt_init(struct msm_gpu * gpu)431 void a6xx_preempt_init(struct msm_gpu *gpu)
432 {
433 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
434 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
435 int i;
436
437 /* No preemption if we only have one ring */
438 if (gpu->nr_rings <= 1)
439 return;
440
441 for (i = 0; i < gpu->nr_rings; i++) {
442 if (preempt_init_ring(a6xx_gpu, gpu->rb[i]))
443 goto fail;
444 }
445
446 /* TODO: make this configurable? */
447 a6xx_gpu->preempt_level = 1;
448 a6xx_gpu->uses_gmem = 1;
449 a6xx_gpu->skip_save_restore = 1;
450
451 a6xx_gpu->preempt_postamble_ptr = msm_gem_kernel_new(gpu->dev,
452 PAGE_SIZE,
453 MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY,
454 gpu->vm, &a6xx_gpu->preempt_postamble_bo,
455 &a6xx_gpu->preempt_postamble_iova);
456
457 if (IS_ERR(a6xx_gpu->preempt_postamble_ptr))
458 goto fail;
459
460 preempt_prepare_postamble(a6xx_gpu);
461
462 timer_setup(&a6xx_gpu->preempt_timer, a6xx_preempt_timer, 0);
463
464 return;
465 fail:
466 /*
467 * On any failure our adventure is over. Clean up and
468 * set nr_rings to 1 to force preemption off
469 */
470 a6xx_preempt_fini(gpu);
471 gpu->nr_rings = 1;
472
473 DRM_DEV_ERROR(&gpu->pdev->dev,
474 "preemption init failed, disabling preemption\n");
475
476 return;
477 }
478