xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c (revision bd096a56da7cad1c93c0138a64478b43f5a94736)
1 /*
2  * Copyright 2009 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 /*
27  * Authors:
28  *    Jerome Glisse <glisse@freedesktop.org>
29  *    Dave Airlie
30  */
31 #include <linux/seq_file.h>
32 #include <linux/atomic.h>
33 #include <linux/wait.h>
34 #include <linux/kref.h>
35 #include <linux/slab.h>
36 #include <linux/firmware.h>
37 #include <linux/pm_runtime.h>
38 
39 #include <drm/drm_drv.h>
40 #include "amdgpu.h"
41 #include "amdgpu_trace.h"
42 #include "amdgpu_reset.h"
43 
44 /*
45  * Cast helper
46  */
47 static const struct dma_fence_ops amdgpu_fence_ops;
48 static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
49 {
50 	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
51 
52 	return __f;
53 }
54 
55 /**
56  * amdgpu_fence_write - write a fence value
57  *
58  * @ring: ring the fence is associated with
59  * @seq: sequence number to write
60  *
61  * Writes a fence value to memory (all asics).
62  */
63 static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
64 {
65 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
66 
67 	if (drv->cpu_addr)
68 		*drv->cpu_addr = cpu_to_le32(seq);
69 }
70 
71 /**
72  * amdgpu_fence_read - read a fence value
73  *
74  * @ring: ring the fence is associated with
75  *
76  * Reads a fence value from memory (all asics).
77  * Returns the value of the fence read from memory.
78  */
79 static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
80 {
81 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
82 	u32 seq = 0;
83 
84 	if (drv->cpu_addr)
85 		seq = le32_to_cpu(*drv->cpu_addr);
86 	else
87 		seq = atomic_read(&drv->last_seq);
88 
89 	return seq;
90 }
91 
92 static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af)
93 {
94 	af->fence_wptr_start = af->ring->wptr;
95 }
96 
97 static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
98 {
99 	af->fence_wptr_end = af->ring->wptr;
100 }
101 
102 /**
103  * amdgpu_fence_emit - emit a fence on the requested ring
104  *
105  * @ring: ring the fence is associated with
106  * @af: amdgpu fence input
107  * @flags: flags to pass into the subordinate .emit_fence() call
108  *
109  * Emits a fence command on the requested ring (all asics).
110  */
111 void amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
112 		       unsigned int flags)
113 {
114 	struct amdgpu_device *adev = ring->adev;
115 	struct dma_fence *fence;
116 	struct dma_fence __rcu **ptr;
117 	uint32_t seq;
118 
119 	fence = &af->base;
120 	af->ring = ring;
121 
122 	seq = ++ring->fence_drv.sync_seq;
123 	dma_fence_init(fence, &amdgpu_fence_ops,
124 		       &ring->fence_drv.lock,
125 		       adev->fence_context + ring->idx, seq);
126 
127 	amdgpu_fence_save_fence_wptr_start(af);
128 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
129 			       seq, flags | AMDGPU_FENCE_FLAG_INT);
130 	amdgpu_fence_save_fence_wptr_end(af);
131 	amdgpu_fence_save_wptr(af);
132 	pm_runtime_get_noresume(adev_to_drm(adev)->dev);
133 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
134 	if (unlikely(rcu_dereference_protected(*ptr, 1))) {
135 		struct dma_fence *old;
136 
137 		rcu_read_lock();
138 		old = dma_fence_get_rcu_safe(ptr);
139 		rcu_read_unlock();
140 
141 		if (old) {
142 			/*
143 			 * dma_fence_wait(old, false) is not interruptible.
144 			 * It will not return an error in this case.
145 			 * So we can safely ignore the return value.
146 			 */
147 			dma_fence_wait(old, false);
148 			dma_fence_put(old);
149 		}
150 	}
151 
152 	to_amdgpu_fence(fence)->start_timestamp = ktime_get();
153 
154 	/* This function can't be called concurrently anyway, otherwise
155 	 * emitting the fence would mess up the hardware ring buffer.
156 	 */
157 	rcu_assign_pointer(*ptr, dma_fence_get(fence));
158 }
159 
160 /**
161  * amdgpu_fence_emit_polling - emit a fence on the requeste ring
162  *
163  * @ring: ring the fence is associated with
164  * @s: resulting sequence number
165  * @timeout: the timeout for waiting in usecs
166  *
167  * Emits a fence command on the requested ring (all asics).
168  * Used For polling fence.
169  * Returns 0 on success, -ENOMEM on failure.
170  */
171 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
172 			      uint32_t timeout)
173 {
174 	uint32_t seq;
175 	signed long r;
176 
177 	if (!s)
178 		return -EINVAL;
179 
180 	seq = ++ring->fence_drv.sync_seq;
181 	r = amdgpu_fence_wait_polling(ring,
182 				      seq - ring->fence_drv.num_fences_mask,
183 				      timeout);
184 	if (r < 1)
185 		return -ETIMEDOUT;
186 
187 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
188 			       seq, 0);
189 
190 	*s = seq;
191 
192 	return 0;
193 }
194 
195 /**
196  * amdgpu_fence_schedule_fallback - schedule fallback check
197  *
198  * @ring: pointer to struct amdgpu_ring
199  *
200  * Start a timer as fallback to our interrupts.
201  */
202 static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
203 {
204 	mod_timer(&ring->fence_drv.fallback_timer,
205 		  jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
206 }
207 
208 /**
209  * amdgpu_fence_process - check for fence activity
210  *
211  * @ring: pointer to struct amdgpu_ring
212  *
213  * Checks the current fence value and calculates the last
214  * signalled fence value. Wakes the fence queue if the
215  * sequence number has increased.
216  *
217  * Returns true if fence was processed
218  */
219 bool amdgpu_fence_process(struct amdgpu_ring *ring)
220 {
221 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
222 	struct amdgpu_device *adev = ring->adev;
223 	uint32_t seq, last_seq;
224 
225 	do {
226 		last_seq = atomic_read(&ring->fence_drv.last_seq);
227 		seq = amdgpu_fence_read(ring);
228 
229 	} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
230 
231 	if (timer_delete(&ring->fence_drv.fallback_timer) &&
232 	    seq != ring->fence_drv.sync_seq)
233 		amdgpu_fence_schedule_fallback(ring);
234 
235 	if (unlikely(seq == last_seq))
236 		return false;
237 
238 	last_seq &= drv->num_fences_mask;
239 	seq &= drv->num_fences_mask;
240 
241 	do {
242 		struct dma_fence *fence, **ptr;
243 		struct amdgpu_fence *am_fence;
244 
245 		++last_seq;
246 		last_seq &= drv->num_fences_mask;
247 		ptr = &drv->fences[last_seq];
248 
249 		/* There is always exactly one thread signaling this fence slot */
250 		fence = rcu_dereference_protected(*ptr, 1);
251 		RCU_INIT_POINTER(*ptr, NULL);
252 
253 		if (!fence)
254 			continue;
255 
256 		/* Save the wptr in the fence driver so we know what the last processed
257 		 * wptr was.  This is required for re-emitting the ring state for
258 		 * queues that are reset but are not guilty and thus have no guilty fence.
259 		 */
260 		am_fence = container_of(fence, struct amdgpu_fence, base);
261 		drv->signalled_wptr = am_fence->wptr;
262 		dma_fence_signal(fence);
263 		dma_fence_put(fence);
264 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
265 	} while (last_seq != seq);
266 
267 	return true;
268 }
269 
270 /**
271  * amdgpu_fence_fallback - fallback for hardware interrupts
272  *
273  * @t: timer context used to obtain the pointer to ring structure
274  *
275  * Checks for fence activity.
276  */
277 static void amdgpu_fence_fallback(struct timer_list *t)
278 {
279 	struct amdgpu_ring *ring = timer_container_of(ring, t,
280 						      fence_drv.fallback_timer);
281 
282 	if (amdgpu_fence_process(ring))
283 		dev_warn(ring->adev->dev,
284 			 "Fence fallback timer expired on ring %s\n",
285 			 ring->name);
286 }
287 
288 /**
289  * amdgpu_fence_wait_empty - wait for all fences to signal
290  *
291  * @ring: ring index the fence is associated with
292  *
293  * Wait for all fences on the requested ring to signal (all asics).
294  * Returns 0 if the fences have passed, error for all other cases.
295  */
296 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
297 {
298 	uint64_t seq = READ_ONCE(ring->fence_drv.sync_seq);
299 	struct dma_fence *fence, **ptr;
300 	int r;
301 
302 	if (!seq)
303 		return 0;
304 
305 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
306 	rcu_read_lock();
307 	fence = rcu_dereference(*ptr);
308 	if (!fence || !dma_fence_get_rcu(fence)) {
309 		rcu_read_unlock();
310 		return 0;
311 	}
312 	rcu_read_unlock();
313 
314 	r = dma_fence_wait(fence, false);
315 	dma_fence_put(fence);
316 	return r;
317 }
318 
319 /**
320  * amdgpu_fence_wait_polling - busy wait for givn sequence number
321  *
322  * @ring: ring index the fence is associated with
323  * @wait_seq: sequence number to wait
324  * @timeout: the timeout for waiting in usecs
325  *
326  * Wait for all fences on the requested ring to signal (all asics).
327  * Returns left time if no timeout, 0 or minus if timeout.
328  */
329 signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
330 				      uint32_t wait_seq,
331 				      signed long timeout)
332 {
333 
334 	while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) {
335 		udelay(2);
336 		timeout -= 2;
337 	}
338 	return timeout > 0 ? timeout : 0;
339 }
340 /**
341  * amdgpu_fence_count_emitted - get the count of emitted fences
342  *
343  * @ring: ring the fence is associated with
344  *
345  * Get the number of fences emitted on the requested ring (all asics).
346  * Returns the number of emitted fences on the ring.  Used by the
347  * dynpm code to ring track activity.
348  */
349 unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
350 {
351 	uint64_t emitted;
352 
353 	/* We are not protected by ring lock when reading the last sequence
354 	 * but it's ok to report slightly wrong fence count here.
355 	 */
356 	emitted = 0x100000000ull;
357 	emitted -= atomic_read(&ring->fence_drv.last_seq);
358 	emitted += READ_ONCE(ring->fence_drv.sync_seq);
359 	return lower_32_bits(emitted);
360 }
361 
362 /**
363  * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
364  * @ring: ring the fence is associated with
365  *
366  * Find the earliest fence unsignaled until now, calculate the time delta
367  * between the time fence emitted and now.
368  */
369 u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
370 {
371 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
372 	struct dma_fence *fence;
373 	uint32_t last_seq, sync_seq;
374 
375 	last_seq = atomic_read(&ring->fence_drv.last_seq);
376 	sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
377 	if (last_seq == sync_seq)
378 		return 0;
379 
380 	++last_seq;
381 	last_seq &= drv->num_fences_mask;
382 	fence = drv->fences[last_seq];
383 	if (!fence)
384 		return 0;
385 
386 	return ktime_us_delta(ktime_get(),
387 		to_amdgpu_fence(fence)->start_timestamp);
388 }
389 
390 /**
391  * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
392  * @ring: ring the fence is associated with
393  * @seq: the fence seq number to update.
394  * @timestamp: the start timestamp to update.
395  *
396  * The function called at the time the fence and related ib is about to
397  * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
398  * with amdgpu_fence_process to modify the same fence.
399  */
400 void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
401 {
402 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
403 	struct dma_fence *fence;
404 
405 	seq &= drv->num_fences_mask;
406 	fence = drv->fences[seq];
407 	if (!fence)
408 		return;
409 
410 	to_amdgpu_fence(fence)->start_timestamp = timestamp;
411 }
412 
413 /**
414  * amdgpu_fence_driver_start_ring - make the fence driver
415  * ready for use on the requested ring.
416  *
417  * @ring: ring to start the fence driver on
418  * @irq_src: interrupt source to use for this ring
419  * @irq_type: interrupt type to use for this ring
420  *
421  * Make the fence driver ready for processing (all asics).
422  * Not all asics have all rings, so each asic will only
423  * start the fence driver on the rings it has.
424  * Returns 0 for success, errors for failure.
425  */
426 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
427 				   struct amdgpu_irq_src *irq_src,
428 				   unsigned int irq_type)
429 {
430 	struct amdgpu_device *adev = ring->adev;
431 	uint64_t index;
432 
433 	if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) {
434 		ring->fence_drv.cpu_addr = ring->fence_cpu_addr;
435 		ring->fence_drv.gpu_addr = ring->fence_gpu_addr;
436 	} else {
437 		/* put fence directly behind firmware */
438 		index = ALIGN(adev->uvd.fw->size, 8);
439 		ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
440 		ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
441 	}
442 	amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
443 
444 	ring->fence_drv.irq_src = irq_src;
445 	ring->fence_drv.irq_type = irq_type;
446 	ring->fence_drv.initialized = true;
447 
448 	DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr 0x%016llx\n",
449 		      ring->name, ring->fence_drv.gpu_addr);
450 	return 0;
451 }
452 
453 /**
454  * amdgpu_fence_driver_init_ring - init the fence driver
455  * for the requested ring.
456  *
457  * @ring: ring to init the fence driver on
458  *
459  * Init the fence driver for the requested ring (all asics).
460  * Helper function for amdgpu_fence_driver_init().
461  */
462 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
463 {
464 	struct amdgpu_device *adev = ring->adev;
465 
466 	if (!adev)
467 		return -EINVAL;
468 
469 	if (!is_power_of_2(ring->num_hw_submission))
470 		return -EINVAL;
471 
472 	ring->fence_drv.cpu_addr = NULL;
473 	ring->fence_drv.gpu_addr = 0;
474 	ring->fence_drv.sync_seq = 0;
475 	atomic_set(&ring->fence_drv.last_seq, 0);
476 	ring->fence_drv.initialized = false;
477 
478 	timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
479 
480 	ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
481 	spin_lock_init(&ring->fence_drv.lock);
482 	ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
483 					 GFP_KERNEL);
484 
485 	if (!ring->fence_drv.fences)
486 		return -ENOMEM;
487 
488 	return 0;
489 }
490 
491 /**
492  * amdgpu_fence_driver_sw_init - init the fence driver
493  * for all possible rings.
494  *
495  * @adev: amdgpu device pointer
496  *
497  * Init the fence driver for all possible rings (all asics).
498  * Not all asics have all rings, so each asic will only
499  * start the fence driver on the rings it has using
500  * amdgpu_fence_driver_start_ring().
501  * Returns 0 for success.
502  */
503 int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
504 {
505 	return 0;
506 }
507 
508 /**
509  * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether
510  * fence driver interrupts need to be restored.
511  *
512  * @ring: ring that to be checked
513  *
514  * Interrupts for rings that belong to GFX IP don't need to be restored
515  * when the target power state is s0ix.
516  *
517  * Return true if need to restore interrupts, false otherwise.
518  */
519 static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
520 {
521 	struct amdgpu_device *adev = ring->adev;
522 	bool is_gfx_power_domain = false;
523 
524 	switch (ring->funcs->type) {
525 	case AMDGPU_RING_TYPE_SDMA:
526 	/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
527 		if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
528 		    IP_VERSION(5, 0, 0))
529 			is_gfx_power_domain = true;
530 		break;
531 	case AMDGPU_RING_TYPE_GFX:
532 	case AMDGPU_RING_TYPE_COMPUTE:
533 	case AMDGPU_RING_TYPE_KIQ:
534 	case AMDGPU_RING_TYPE_MES:
535 		is_gfx_power_domain = true;
536 		break;
537 	default:
538 		break;
539 	}
540 
541 	return !(adev->in_s0ix && is_gfx_power_domain);
542 }
543 
544 /**
545  * amdgpu_fence_driver_hw_fini - tear down the fence driver
546  * for all possible rings.
547  *
548  * @adev: amdgpu device pointer
549  *
550  * Tear down the fence driver for all possible rings (all asics).
551  */
552 void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
553 {
554 	int i, r;
555 
556 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
557 		struct amdgpu_ring *ring = adev->rings[i];
558 
559 		if (!ring || !ring->fence_drv.initialized)
560 			continue;
561 
562 		/* You can't wait for HW to signal if it's gone */
563 		if (!drm_dev_is_unplugged(adev_to_drm(adev)))
564 			r = amdgpu_fence_wait_empty(ring);
565 		else
566 			r = -ENODEV;
567 		/* no need to trigger GPU reset as we are unloading */
568 		if (r)
569 			amdgpu_fence_driver_force_completion(ring);
570 
571 		if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
572 		    ring->fence_drv.irq_src &&
573 		    amdgpu_fence_need_ring_interrupt_restore(ring))
574 			amdgpu_irq_put(adev, ring->fence_drv.irq_src,
575 				       ring->fence_drv.irq_type);
576 
577 		timer_delete_sync(&ring->fence_drv.fallback_timer);
578 	}
579 }
580 
581 /* Will either stop and flush handlers for amdgpu interrupt or reanble it */
582 void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
583 {
584 	int i;
585 
586 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
587 		struct amdgpu_ring *ring = adev->rings[i];
588 
589 		if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
590 			continue;
591 
592 		if (stop)
593 			disable_irq(adev->irq.irq);
594 		else
595 			enable_irq(adev->irq.irq);
596 	}
597 }
598 
599 void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
600 {
601 	unsigned int i, j;
602 
603 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
604 		struct amdgpu_ring *ring = adev->rings[i];
605 
606 		if (!ring || !ring->fence_drv.initialized)
607 			continue;
608 
609 		/*
610 		 * Notice we check for sched.ops since there's some
611 		 * override on the meaning of sched.ready by amdgpu.
612 		 * The natural check would be sched.ready, which is
613 		 * set as drm_sched_init() finishes...
614 		 */
615 		if (ring->sched.ops)
616 			drm_sched_fini(&ring->sched);
617 
618 		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
619 			dma_fence_put(ring->fence_drv.fences[j]);
620 		kfree(ring->fence_drv.fences);
621 		ring->fence_drv.fences = NULL;
622 		ring->fence_drv.initialized = false;
623 	}
624 }
625 
626 /**
627  * amdgpu_fence_driver_hw_init - enable the fence driver
628  * for all possible rings.
629  *
630  * @adev: amdgpu device pointer
631  *
632  * Enable the fence driver for all possible rings (all asics).
633  * Not all asics have all rings, so each asic will only
634  * start the fence driver on the rings it has using
635  * amdgpu_fence_driver_start_ring().
636  * Returns 0 for success.
637  */
638 void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
639 {
640 	int i;
641 
642 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
643 		struct amdgpu_ring *ring = adev->rings[i];
644 
645 		if (!ring || !ring->fence_drv.initialized)
646 			continue;
647 
648 		/* enable the interrupt */
649 		if (ring->fence_drv.irq_src &&
650 		    amdgpu_fence_need_ring_interrupt_restore(ring))
651 			amdgpu_irq_get(adev, ring->fence_drv.irq_src,
652 				       ring->fence_drv.irq_type);
653 	}
654 }
655 
656 /**
657  * amdgpu_fence_driver_set_error - set error code on fences
658  * @ring: the ring which contains the fences
659  * @error: the error code to set
660  *
661  * Set an error code to all the fences pending on the ring.
662  */
663 void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error)
664 {
665 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
666 	unsigned long flags;
667 
668 	spin_lock_irqsave(&drv->lock, flags);
669 	for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) {
670 		struct dma_fence *fence;
671 
672 		fence = rcu_dereference_protected(drv->fences[i],
673 						  lockdep_is_held(&drv->lock));
674 		if (fence && !dma_fence_is_signaled_locked(fence))
675 			dma_fence_set_error(fence, error);
676 	}
677 	spin_unlock_irqrestore(&drv->lock, flags);
678 }
679 
680 /**
681  * amdgpu_fence_driver_force_completion - force signal latest fence of ring
682  *
683  * @ring: fence of the ring to signal
684  *
685  */
686 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
687 {
688 	amdgpu_fence_driver_set_error(ring, -ECANCELED);
689 	amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
690 	amdgpu_fence_process(ring);
691 }
692 
693 
694 /*
695  * Kernel queue reset handling
696  *
697  * The driver can reset individual queues for most engines, but those queues
698  * may contain work from multiple contexts.  Resetting the queue will reset
699  * lose all of that state.  In order to minimize the collateral damage, the
700  * driver will save the ring contents which are not associated with the guilty
701  * context prior to resetting the queue.  After resetting the queue the queue
702  * contents from the other contexts is re-emitted to the rings so that it can
703  * be processed by the engine.  To handle this, we save the queue's write
704  * pointer (wptr) in the fences associated with each context.  If we get a
705  * queue timeout, we can then use the wptrs from the fences to determine
706  * which data needs to be saved out of the queue's ring buffer.
707  */
708 
709 /**
710  * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and set errors
711  *
712  * @af: fence of the ring to update
713  *
714  */
715 void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
716 {
717 	struct dma_fence *unprocessed;
718 	struct dma_fence __rcu **ptr;
719 	struct amdgpu_fence *fence;
720 	struct amdgpu_ring *ring = af->ring;
721 	unsigned long flags;
722 	u32 seq, last_seq;
723 	bool reemitted = false;
724 
725 	last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
726 	seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
727 
728 	/* mark all fences from the guilty context with an error */
729 	spin_lock_irqsave(&ring->fence_drv.lock, flags);
730 	do {
731 		last_seq++;
732 		last_seq &= ring->fence_drv.num_fences_mask;
733 
734 		ptr = &ring->fence_drv.fences[last_seq];
735 		rcu_read_lock();
736 		unprocessed = rcu_dereference(*ptr);
737 
738 		if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
739 			fence = container_of(unprocessed, struct amdgpu_fence, base);
740 
741 			if (fence->reemitted > 1)
742 				reemitted = true;
743 			else if (fence == af)
744 				dma_fence_set_error(&fence->base, -ETIME);
745 			else if (fence->context == af->context)
746 				dma_fence_set_error(&fence->base, -ECANCELED);
747 		}
748 		rcu_read_unlock();
749 	} while (last_seq != seq);
750 	spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
751 
752 	if (reemitted) {
753 		/* if we've already reemitted once then just cancel everything */
754 		amdgpu_fence_driver_force_completion(af->ring);
755 		af->ring->ring_backup_entries_to_copy = 0;
756 	}
757 }
758 
759 void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
760 {
761 	af->wptr = af->ring->wptr;
762 }
763 
764 static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
765 						   u64 start_wptr, u64 end_wptr)
766 {
767 	unsigned int first_idx = start_wptr & ring->buf_mask;
768 	unsigned int last_idx = end_wptr & ring->buf_mask;
769 	unsigned int i;
770 
771 	/* Backup the contents of the ring buffer. */
772 	for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
773 		ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
774 }
775 
776 void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
777 					     struct amdgpu_fence *guilty_fence)
778 {
779 	struct dma_fence *unprocessed;
780 	struct dma_fence __rcu **ptr;
781 	struct amdgpu_fence *fence;
782 	u64 wptr;
783 	u32 seq, last_seq;
784 
785 	last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
786 	seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
787 	wptr = ring->fence_drv.signalled_wptr;
788 	ring->ring_backup_entries_to_copy = 0;
789 
790 	do {
791 		last_seq++;
792 		last_seq &= ring->fence_drv.num_fences_mask;
793 
794 		ptr = &ring->fence_drv.fences[last_seq];
795 		rcu_read_lock();
796 		unprocessed = rcu_dereference(*ptr);
797 
798 		if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
799 			fence = container_of(unprocessed, struct amdgpu_fence, base);
800 
801 			/* save everything if the ring is not guilty, otherwise
802 			 * just save the content from other contexts.
803 			 */
804 			if (!fence->reemitted &&
805 			    (!guilty_fence || (fence->context != guilty_fence->context))) {
806 				amdgpu_ring_backup_unprocessed_command(ring, wptr,
807 								       fence->wptr);
808 			} else if (!fence->reemitted) {
809 				/* always save the fence */
810 				amdgpu_ring_backup_unprocessed_command(ring,
811 								       fence->fence_wptr_start,
812 								       fence->fence_wptr_end);
813 			}
814 			wptr = fence->wptr;
815 			fence->reemitted++;
816 		}
817 		rcu_read_unlock();
818 	} while (last_seq != seq);
819 }
820 
821 /*
822  * Common fence implementation
823  */
824 
825 static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
826 {
827 	return "amdgpu";
828 }
829 
830 static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
831 {
832 	return (const char *)to_amdgpu_fence(f)->ring->name;
833 }
834 
835 /**
836  * amdgpu_fence_enable_signaling - enable signalling on fence
837  * @f: fence
838  *
839  * This function is called with fence_queue lock held, and adds a callback
840  * to fence_queue that checks if this fence is signaled, and if so it
841  * signals the fence and removes itself.
842  */
843 static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
844 {
845 	if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
846 		amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);
847 
848 	return true;
849 }
850 
851 /**
852  * amdgpu_fence_free - free up the fence memory
853  *
854  * @rcu: RCU callback head
855  *
856  * Free up the fence memory after the RCU grace period.
857  */
858 static void amdgpu_fence_free(struct rcu_head *rcu)
859 {
860 	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
861 
862 	/* free fence_slab if it's separated fence*/
863 	kfree(to_amdgpu_fence(f));
864 }
865 
866 /**
867  * amdgpu_fence_release - callback that fence can be freed
868  *
869  * @f: fence
870  *
871  * This function is called when the reference count becomes zero.
872  * It just RCU schedules freeing up the fence.
873  */
874 static void amdgpu_fence_release(struct dma_fence *f)
875 {
876 	call_rcu(&f->rcu, amdgpu_fence_free);
877 }
878 
879 static const struct dma_fence_ops amdgpu_fence_ops = {
880 	.get_driver_name = amdgpu_fence_get_driver_name,
881 	.get_timeline_name = amdgpu_fence_get_timeline_name,
882 	.enable_signaling = amdgpu_fence_enable_signaling,
883 	.release = amdgpu_fence_release,
884 };
885 
886 /*
887  * Fence debugfs
888  */
889 #if defined(CONFIG_DEBUG_FS)
890 static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
891 {
892 	struct amdgpu_device *adev = m->private;
893 	int i;
894 
895 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
896 		struct amdgpu_ring *ring = adev->rings[i];
897 
898 		if (!ring || !ring->fence_drv.initialized)
899 			continue;
900 
901 		amdgpu_fence_process(ring);
902 
903 		seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
904 		seq_printf(m, "Last signaled fence          0x%08x\n",
905 			   atomic_read(&ring->fence_drv.last_seq));
906 		seq_printf(m, "Last emitted                 0x%08x\n",
907 			   ring->fence_drv.sync_seq);
908 
909 		if (ring->funcs->type == AMDGPU_RING_TYPE_GFX ||
910 		    ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {
911 			seq_printf(m, "Last signaled trailing fence 0x%08x\n",
912 				   le32_to_cpu(*ring->trail_fence_cpu_addr));
913 			seq_printf(m, "Last emitted                 0x%08x\n",
914 				   ring->trail_seq);
915 		}
916 
917 		if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
918 			continue;
919 
920 		/* set in CP_VMID_PREEMPT and preemption occurred */
921 		seq_printf(m, "Last preempted               0x%08x\n",
922 			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 2)));
923 		/* set in CP_VMID_RESET and reset occurred */
924 		seq_printf(m, "Last reset                   0x%08x\n",
925 			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 4)));
926 		/* Both preemption and reset occurred */
927 		seq_printf(m, "Last both                    0x%08x\n",
928 			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 6)));
929 	}
930 	return 0;
931 }
932 
933 /*
934  * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover
935  *
936  * Manually trigger a gpu reset at the next fence wait.
937  */
938 static int gpu_recover_get(void *data, u64 *val)
939 {
940 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
941 	struct drm_device *dev = adev_to_drm(adev);
942 	int r;
943 
944 	r = pm_runtime_get_sync(dev->dev);
945 	if (r < 0) {
946 		pm_runtime_put_autosuspend(dev->dev);
947 		return 0;
948 	}
949 
950 	if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
951 		flush_work(&adev->reset_work);
952 
953 	*val = atomic_read(&adev->reset_domain->reset_res);
954 
955 	pm_runtime_put_autosuspend(dev->dev);
956 
957 	return 0;
958 }
959 
960 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
961 DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
962 			 "%lld\n");
963 
964 static void amdgpu_debugfs_reset_work(struct work_struct *work)
965 {
966 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
967 						  reset_work);
968 
969 	struct amdgpu_reset_context reset_context;
970 
971 	memset(&reset_context, 0, sizeof(reset_context));
972 
973 	reset_context.method = AMD_RESET_METHOD_NONE;
974 	reset_context.reset_req_dev = adev;
975 	reset_context.src = AMDGPU_RESET_SRC_USER;
976 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
977 	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
978 
979 	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
980 }
981 
982 #endif
983 
984 void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
985 {
986 #if defined(CONFIG_DEBUG_FS)
987 	struct drm_minor *minor = adev_to_drm(adev)->primary;
988 	struct dentry *root = minor->debugfs_root;
989 
990 	debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
991 			    &amdgpu_debugfs_fence_info_fops);
992 
993 	if (!amdgpu_sriov_vf(adev)) {
994 
995 		INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
996 		debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
997 				    &amdgpu_debugfs_gpu_recover_fops);
998 	}
999 #endif
1000 }
1001 
1002