xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c (revision 009bfc5ec5c953534d0f528d1c1e4f60668b7371)
1 /*
2  * Copyright 2009 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 /*
27  * Authors:
28  *    Jerome Glisse <glisse@freedesktop.org>
29  *    Dave Airlie
30  */
31 #include <linux/seq_file.h>
32 #include <linux/atomic.h>
33 #include <linux/wait.h>
34 #include <linux/kref.h>
35 #include <linux/slab.h>
36 #include <linux/firmware.h>
37 #include <linux/pm_runtime.h>
38 
39 #include <drm/drm_drv.h>
40 #include "amdgpu.h"
41 #include "amdgpu_trace.h"
42 #include "amdgpu_reset.h"
43 
44 /*
45  * Cast helper
46  */
47 static const struct dma_fence_ops amdgpu_fence_ops;
48 static const struct dma_fence_ops amdgpu_job_fence_ops;
49 static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
50 {
51 	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
52 
53 	if (__f->base.ops == &amdgpu_fence_ops ||
54 	    __f->base.ops == &amdgpu_job_fence_ops)
55 		return __f;
56 
57 	return NULL;
58 }
59 
60 /**
61  * amdgpu_fence_write - write a fence value
62  *
63  * @ring: ring the fence is associated with
64  * @seq: sequence number to write
65  *
66  * Writes a fence value to memory (all asics).
67  */
68 static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
69 {
70 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
71 
72 	if (drv->cpu_addr)
73 		*drv->cpu_addr = cpu_to_le32(seq);
74 }
75 
76 /**
77  * amdgpu_fence_read - read a fence value
78  *
79  * @ring: ring the fence is associated with
80  *
81  * Reads a fence value from memory (all asics).
82  * Returns the value of the fence read from memory.
83  */
84 static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
85 {
86 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
87 	u32 seq = 0;
88 
89 	if (drv->cpu_addr)
90 		seq = le32_to_cpu(*drv->cpu_addr);
91 	else
92 		seq = atomic_read(&drv->last_seq);
93 
94 	return seq;
95 }
96 
97 /**
98  * amdgpu_fence_emit - emit a fence on the requested ring
99  *
100  * @ring: ring the fence is associated with
101  * @f: resulting fence object
102  * @job: job the fence is embedded in
103  * @flags: flags to pass into the subordinate .emit_fence() call
104  *
105  * Emits a fence command on the requested ring (all asics).
106  * Returns 0 on success, -ENOMEM on failure.
107  */
108 int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job,
109 		      unsigned int flags)
110 {
111 	struct amdgpu_device *adev = ring->adev;
112 	struct dma_fence *fence;
113 	struct amdgpu_fence *am_fence;
114 	struct dma_fence __rcu **ptr;
115 	uint32_t seq;
116 	int r;
117 
118 	if (job == NULL) {
119 		/* create a separate hw fence */
120 		am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL);
121 		if (!am_fence)
122 			return -ENOMEM;
123 	} else {
124 		/* take use of job-embedded fence */
125 		am_fence = &job->hw_fence;
126 	}
127 	fence = &am_fence->base;
128 	am_fence->ring = ring;
129 
130 	seq = ++ring->fence_drv.sync_seq;
131 	if (job && job->job_run_counter) {
132 		/* reinit seq for resubmitted jobs */
133 		fence->seqno = seq;
134 		/* TO be inline with external fence creation and other drivers */
135 		dma_fence_get(fence);
136 	} else {
137 		if (job) {
138 			dma_fence_init(fence, &amdgpu_job_fence_ops,
139 				       &ring->fence_drv.lock,
140 				       adev->fence_context + ring->idx, seq);
141 			/* Against remove in amdgpu_job_{free, free_cb} */
142 			dma_fence_get(fence);
143 		} else {
144 			dma_fence_init(fence, &amdgpu_fence_ops,
145 				       &ring->fence_drv.lock,
146 				       adev->fence_context + ring->idx, seq);
147 		}
148 	}
149 
150 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
151 			       seq, flags | AMDGPU_FENCE_FLAG_INT);
152 	pm_runtime_get_noresume(adev_to_drm(adev)->dev);
153 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
154 	if (unlikely(rcu_dereference_protected(*ptr, 1))) {
155 		struct dma_fence *old;
156 
157 		rcu_read_lock();
158 		old = dma_fence_get_rcu_safe(ptr);
159 		rcu_read_unlock();
160 
161 		if (old) {
162 			r = dma_fence_wait(old, false);
163 			dma_fence_put(old);
164 			if (r)
165 				return r;
166 		}
167 	}
168 
169 	to_amdgpu_fence(fence)->start_timestamp = ktime_get();
170 
171 	/* This function can't be called concurrently anyway, otherwise
172 	 * emitting the fence would mess up the hardware ring buffer.
173 	 */
174 	rcu_assign_pointer(*ptr, dma_fence_get(fence));
175 
176 	*f = fence;
177 
178 	return 0;
179 }
180 
181 /**
182  * amdgpu_fence_emit_polling - emit a fence on the requeste ring
183  *
184  * @ring: ring the fence is associated with
185  * @s: resulting sequence number
186  * @timeout: the timeout for waiting in usecs
187  *
188  * Emits a fence command on the requested ring (all asics).
189  * Used For polling fence.
190  * Returns 0 on success, -ENOMEM on failure.
191  */
192 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
193 			      uint32_t timeout)
194 {
195 	uint32_t seq;
196 	signed long r;
197 
198 	if (!s)
199 		return -EINVAL;
200 
201 	seq = ++ring->fence_drv.sync_seq;
202 	r = amdgpu_fence_wait_polling(ring,
203 				      seq - ring->fence_drv.num_fences_mask,
204 				      timeout);
205 	if (r < 1)
206 		return -ETIMEDOUT;
207 
208 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
209 			       seq, 0);
210 
211 	*s = seq;
212 
213 	return 0;
214 }
215 
216 /**
217  * amdgpu_fence_schedule_fallback - schedule fallback check
218  *
219  * @ring: pointer to struct amdgpu_ring
220  *
221  * Start a timer as fallback to our interrupts.
222  */
223 static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
224 {
225 	mod_timer(&ring->fence_drv.fallback_timer,
226 		  jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
227 }
228 
229 /**
230  * amdgpu_fence_process - check for fence activity
231  *
232  * @ring: pointer to struct amdgpu_ring
233  *
234  * Checks the current fence value and calculates the last
235  * signalled fence value. Wakes the fence queue if the
236  * sequence number has increased.
237  *
238  * Returns true if fence was processed
239  */
240 bool amdgpu_fence_process(struct amdgpu_ring *ring)
241 {
242 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
243 	struct amdgpu_device *adev = ring->adev;
244 	uint32_t seq, last_seq;
245 
246 	do {
247 		last_seq = atomic_read(&ring->fence_drv.last_seq);
248 		seq = amdgpu_fence_read(ring);
249 
250 	} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
251 
252 	if (timer_delete(&ring->fence_drv.fallback_timer) &&
253 	    seq != ring->fence_drv.sync_seq)
254 		amdgpu_fence_schedule_fallback(ring);
255 
256 	if (unlikely(seq == last_seq))
257 		return false;
258 
259 	last_seq &= drv->num_fences_mask;
260 	seq &= drv->num_fences_mask;
261 
262 	do {
263 		struct dma_fence *fence, **ptr;
264 
265 		++last_seq;
266 		last_seq &= drv->num_fences_mask;
267 		ptr = &drv->fences[last_seq];
268 
269 		/* There is always exactly one thread signaling this fence slot */
270 		fence = rcu_dereference_protected(*ptr, 1);
271 		RCU_INIT_POINTER(*ptr, NULL);
272 
273 		if (!fence)
274 			continue;
275 
276 		dma_fence_signal(fence);
277 		dma_fence_put(fence);
278 		pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
279 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
280 	} while (last_seq != seq);
281 
282 	return true;
283 }
284 
285 /**
286  * amdgpu_fence_fallback - fallback for hardware interrupts
287  *
288  * @t: timer context used to obtain the pointer to ring structure
289  *
290  * Checks for fence activity.
291  */
292 static void amdgpu_fence_fallback(struct timer_list *t)
293 {
294 	struct amdgpu_ring *ring = from_timer(ring, t,
295 					      fence_drv.fallback_timer);
296 
297 	if (amdgpu_fence_process(ring))
298 		DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
299 }
300 
301 /**
302  * amdgpu_fence_wait_empty - wait for all fences to signal
303  *
304  * @ring: ring index the fence is associated with
305  *
306  * Wait for all fences on the requested ring to signal (all asics).
307  * Returns 0 if the fences have passed, error for all other cases.
308  */
309 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
310 {
311 	uint64_t seq = READ_ONCE(ring->fence_drv.sync_seq);
312 	struct dma_fence *fence, **ptr;
313 	int r;
314 
315 	if (!seq)
316 		return 0;
317 
318 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
319 	rcu_read_lock();
320 	fence = rcu_dereference(*ptr);
321 	if (!fence || !dma_fence_get_rcu(fence)) {
322 		rcu_read_unlock();
323 		return 0;
324 	}
325 	rcu_read_unlock();
326 
327 	r = dma_fence_wait(fence, false);
328 	dma_fence_put(fence);
329 	return r;
330 }
331 
332 /**
333  * amdgpu_fence_wait_polling - busy wait for givn sequence number
334  *
335  * @ring: ring index the fence is associated with
336  * @wait_seq: sequence number to wait
337  * @timeout: the timeout for waiting in usecs
338  *
339  * Wait for all fences on the requested ring to signal (all asics).
340  * Returns left time if no timeout, 0 or minus if timeout.
341  */
342 signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
343 				      uint32_t wait_seq,
344 				      signed long timeout)
345 {
346 
347 	while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) {
348 		udelay(2);
349 		timeout -= 2;
350 	}
351 	return timeout > 0 ? timeout : 0;
352 }
353 /**
354  * amdgpu_fence_count_emitted - get the count of emitted fences
355  *
356  * @ring: ring the fence is associated with
357  *
358  * Get the number of fences emitted on the requested ring (all asics).
359  * Returns the number of emitted fences on the ring.  Used by the
360  * dynpm code to ring track activity.
361  */
362 unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
363 {
364 	uint64_t emitted;
365 
366 	/* We are not protected by ring lock when reading the last sequence
367 	 * but it's ok to report slightly wrong fence count here.
368 	 */
369 	emitted = 0x100000000ull;
370 	emitted -= atomic_read(&ring->fence_drv.last_seq);
371 	emitted += READ_ONCE(ring->fence_drv.sync_seq);
372 	return lower_32_bits(emitted);
373 }
374 
375 /**
376  * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
377  * @ring: ring the fence is associated with
378  *
379  * Find the earliest fence unsignaled until now, calculate the time delta
380  * between the time fence emitted and now.
381  */
382 u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
383 {
384 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
385 	struct dma_fence *fence;
386 	uint32_t last_seq, sync_seq;
387 
388 	last_seq = atomic_read(&ring->fence_drv.last_seq);
389 	sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
390 	if (last_seq == sync_seq)
391 		return 0;
392 
393 	++last_seq;
394 	last_seq &= drv->num_fences_mask;
395 	fence = drv->fences[last_seq];
396 	if (!fence)
397 		return 0;
398 
399 	return ktime_us_delta(ktime_get(),
400 		to_amdgpu_fence(fence)->start_timestamp);
401 }
402 
403 /**
404  * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
405  * @ring: ring the fence is associated with
406  * @seq: the fence seq number to update.
407  * @timestamp: the start timestamp to update.
408  *
409  * The function called at the time the fence and related ib is about to
410  * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
411  * with amdgpu_fence_process to modify the same fence.
412  */
413 void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
414 {
415 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
416 	struct dma_fence *fence;
417 
418 	seq &= drv->num_fences_mask;
419 	fence = drv->fences[seq];
420 	if (!fence)
421 		return;
422 
423 	to_amdgpu_fence(fence)->start_timestamp = timestamp;
424 }
425 
426 /**
427  * amdgpu_fence_driver_start_ring - make the fence driver
428  * ready for use on the requested ring.
429  *
430  * @ring: ring to start the fence driver on
431  * @irq_src: interrupt source to use for this ring
432  * @irq_type: interrupt type to use for this ring
433  *
434  * Make the fence driver ready for processing (all asics).
435  * Not all asics have all rings, so each asic will only
436  * start the fence driver on the rings it has.
437  * Returns 0 for success, errors for failure.
438  */
439 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
440 				   struct amdgpu_irq_src *irq_src,
441 				   unsigned int irq_type)
442 {
443 	struct amdgpu_device *adev = ring->adev;
444 	uint64_t index;
445 
446 	if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) {
447 		ring->fence_drv.cpu_addr = ring->fence_cpu_addr;
448 		ring->fence_drv.gpu_addr = ring->fence_gpu_addr;
449 	} else {
450 		/* put fence directly behind firmware */
451 		index = ALIGN(adev->uvd.fw->size, 8);
452 		ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
453 		ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
454 	}
455 	amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
456 
457 	ring->fence_drv.irq_src = irq_src;
458 	ring->fence_drv.irq_type = irq_type;
459 	ring->fence_drv.initialized = true;
460 
461 	DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr 0x%016llx\n",
462 		      ring->name, ring->fence_drv.gpu_addr);
463 	return 0;
464 }
465 
466 /**
467  * amdgpu_fence_driver_init_ring - init the fence driver
468  * for the requested ring.
469  *
470  * @ring: ring to init the fence driver on
471  *
472  * Init the fence driver for the requested ring (all asics).
473  * Helper function for amdgpu_fence_driver_init().
474  */
475 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
476 {
477 	struct amdgpu_device *adev = ring->adev;
478 
479 	if (!adev)
480 		return -EINVAL;
481 
482 	if (!is_power_of_2(ring->num_hw_submission))
483 		return -EINVAL;
484 
485 	ring->fence_drv.cpu_addr = NULL;
486 	ring->fence_drv.gpu_addr = 0;
487 	ring->fence_drv.sync_seq = 0;
488 	atomic_set(&ring->fence_drv.last_seq, 0);
489 	ring->fence_drv.initialized = false;
490 
491 	timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
492 
493 	ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
494 	spin_lock_init(&ring->fence_drv.lock);
495 	ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
496 					 GFP_KERNEL);
497 
498 	if (!ring->fence_drv.fences)
499 		return -ENOMEM;
500 
501 	return 0;
502 }
503 
504 /**
505  * amdgpu_fence_driver_sw_init - init the fence driver
506  * for all possible rings.
507  *
508  * @adev: amdgpu device pointer
509  *
510  * Init the fence driver for all possible rings (all asics).
511  * Not all asics have all rings, so each asic will only
512  * start the fence driver on the rings it has using
513  * amdgpu_fence_driver_start_ring().
514  * Returns 0 for success.
515  */
516 int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
517 {
518 	return 0;
519 }
520 
521 /**
522  * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether
523  * fence driver interrupts need to be restored.
524  *
525  * @ring: ring that to be checked
526  *
527  * Interrupts for rings that belong to GFX IP don't need to be restored
528  * when the target power state is s0ix.
529  *
530  * Return true if need to restore interrupts, false otherwise.
531  */
532 static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
533 {
534 	struct amdgpu_device *adev = ring->adev;
535 	bool is_gfx_power_domain = false;
536 
537 	switch (ring->funcs->type) {
538 	case AMDGPU_RING_TYPE_SDMA:
539 	/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
540 		if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
541 		    IP_VERSION(5, 0, 0))
542 			is_gfx_power_domain = true;
543 		break;
544 	case AMDGPU_RING_TYPE_GFX:
545 	case AMDGPU_RING_TYPE_COMPUTE:
546 	case AMDGPU_RING_TYPE_KIQ:
547 	case AMDGPU_RING_TYPE_MES:
548 		is_gfx_power_domain = true;
549 		break;
550 	default:
551 		break;
552 	}
553 
554 	return !(adev->in_s0ix && is_gfx_power_domain);
555 }
556 
557 /**
558  * amdgpu_fence_driver_hw_fini - tear down the fence driver
559  * for all possible rings.
560  *
561  * @adev: amdgpu device pointer
562  *
563  * Tear down the fence driver for all possible rings (all asics).
564  */
565 void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
566 {
567 	int i, r;
568 
569 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
570 		struct amdgpu_ring *ring = adev->rings[i];
571 
572 		if (!ring || !ring->fence_drv.initialized)
573 			continue;
574 
575 		/* You can't wait for HW to signal if it's gone */
576 		if (!drm_dev_is_unplugged(adev_to_drm(adev)))
577 			r = amdgpu_fence_wait_empty(ring);
578 		else
579 			r = -ENODEV;
580 		/* no need to trigger GPU reset as we are unloading */
581 		if (r)
582 			amdgpu_fence_driver_force_completion(ring);
583 
584 		if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
585 		    ring->fence_drv.irq_src &&
586 		    amdgpu_fence_need_ring_interrupt_restore(ring))
587 			amdgpu_irq_put(adev, ring->fence_drv.irq_src,
588 				       ring->fence_drv.irq_type);
589 
590 		timer_delete_sync(&ring->fence_drv.fallback_timer);
591 	}
592 }
593 
594 /* Will either stop and flush handlers for amdgpu interrupt or reanble it */
595 void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
596 {
597 	int i;
598 
599 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
600 		struct amdgpu_ring *ring = adev->rings[i];
601 
602 		if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
603 			continue;
604 
605 		if (stop)
606 			disable_irq(adev->irq.irq);
607 		else
608 			enable_irq(adev->irq.irq);
609 	}
610 }
611 
612 void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
613 {
614 	unsigned int i, j;
615 
616 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
617 		struct amdgpu_ring *ring = adev->rings[i];
618 
619 		if (!ring || !ring->fence_drv.initialized)
620 			continue;
621 
622 		/*
623 		 * Notice we check for sched.ops since there's some
624 		 * override on the meaning of sched.ready by amdgpu.
625 		 * The natural check would be sched.ready, which is
626 		 * set as drm_sched_init() finishes...
627 		 */
628 		if (ring->sched.ops)
629 			drm_sched_fini(&ring->sched);
630 
631 		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
632 			dma_fence_put(ring->fence_drv.fences[j]);
633 		kfree(ring->fence_drv.fences);
634 		ring->fence_drv.fences = NULL;
635 		ring->fence_drv.initialized = false;
636 	}
637 }
638 
639 /**
640  * amdgpu_fence_driver_hw_init - enable the fence driver
641  * for all possible rings.
642  *
643  * @adev: amdgpu device pointer
644  *
645  * Enable the fence driver for all possible rings (all asics).
646  * Not all asics have all rings, so each asic will only
647  * start the fence driver on the rings it has using
648  * amdgpu_fence_driver_start_ring().
649  * Returns 0 for success.
650  */
651 void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
652 {
653 	int i;
654 
655 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
656 		struct amdgpu_ring *ring = adev->rings[i];
657 
658 		if (!ring || !ring->fence_drv.initialized)
659 			continue;
660 
661 		/* enable the interrupt */
662 		if (ring->fence_drv.irq_src &&
663 		    amdgpu_fence_need_ring_interrupt_restore(ring))
664 			amdgpu_irq_get(adev, ring->fence_drv.irq_src,
665 				       ring->fence_drv.irq_type);
666 	}
667 }
668 
669 /**
670  * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
671  *
672  * @ring: fence of the ring to be cleared
673  *
674  */
675 void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
676 {
677 	int i;
678 	struct dma_fence *old, **ptr;
679 
680 	for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
681 		ptr = &ring->fence_drv.fences[i];
682 		old = rcu_dereference_protected(*ptr, 1);
683 		if (old && old->ops == &amdgpu_job_fence_ops) {
684 			struct amdgpu_job *job;
685 
686 			/* For non-scheduler bad job, i.e. failed ib test, we need to signal
687 			 * it right here or we won't be able to track them in fence_drv
688 			 * and they will remain unsignaled during sa_bo free.
689 			 */
690 			job = container_of(old, struct amdgpu_job, hw_fence.base);
691 			if (!job->base.s_fence && !dma_fence_is_signaled(old))
692 				dma_fence_signal(old);
693 			RCU_INIT_POINTER(*ptr, NULL);
694 			dma_fence_put(old);
695 		}
696 	}
697 }
698 
699 /**
700  * amdgpu_fence_driver_set_error - set error code on fences
701  * @ring: the ring which contains the fences
702  * @error: the error code to set
703  *
704  * Set an error code to all the fences pending on the ring.
705  */
706 void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error)
707 {
708 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
709 	unsigned long flags;
710 
711 	spin_lock_irqsave(&drv->lock, flags);
712 	for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) {
713 		struct dma_fence *fence;
714 
715 		fence = rcu_dereference_protected(drv->fences[i],
716 						  lockdep_is_held(&drv->lock));
717 		if (fence && !dma_fence_is_signaled_locked(fence))
718 			dma_fence_set_error(fence, error);
719 	}
720 	spin_unlock_irqrestore(&drv->lock, flags);
721 }
722 
723 /**
724  * amdgpu_fence_driver_force_completion - force signal latest fence of ring
725  *
726  * @ring: fence of the ring to signal
727  *
728  */
729 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
730 {
731 	amdgpu_fence_driver_set_error(ring, -ECANCELED);
732 	amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
733 	amdgpu_fence_process(ring);
734 }
735 
736 /*
737  * Common fence implementation
738  */
739 
740 static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
741 {
742 	return "amdgpu";
743 }
744 
745 static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
746 {
747 	return (const char *)to_amdgpu_fence(f)->ring->name;
748 }
749 
750 static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
751 {
752 	struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
753 
754 	return (const char *)to_amdgpu_ring(job->base.sched)->name;
755 }
756 
757 /**
758  * amdgpu_fence_enable_signaling - enable signalling on fence
759  * @f: fence
760  *
761  * This function is called with fence_queue lock held, and adds a callback
762  * to fence_queue that checks if this fence is signaled, and if so it
763  * signals the fence and removes itself.
764  */
765 static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
766 {
767 	if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
768 		amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);
769 
770 	return true;
771 }
772 
773 /**
774  * amdgpu_job_fence_enable_signaling - enable signalling on job fence
775  * @f: fence
776  *
777  * This is the simliar function with amdgpu_fence_enable_signaling above, it
778  * only handles the job embedded fence.
779  */
780 static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
781 {
782 	struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
783 
784 	if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
785 		amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
786 
787 	return true;
788 }
789 
790 /**
791  * amdgpu_fence_free - free up the fence memory
792  *
793  * @rcu: RCU callback head
794  *
795  * Free up the fence memory after the RCU grace period.
796  */
797 static void amdgpu_fence_free(struct rcu_head *rcu)
798 {
799 	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
800 
801 	/* free fence_slab if it's separated fence*/
802 	kfree(to_amdgpu_fence(f));
803 }
804 
805 /**
806  * amdgpu_job_fence_free - free up the job with embedded fence
807  *
808  * @rcu: RCU callback head
809  *
810  * Free up the job with embedded fence after the RCU grace period.
811  */
812 static void amdgpu_job_fence_free(struct rcu_head *rcu)
813 {
814 	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
815 
816 	/* free job if fence has a parent job */
817 	kfree(container_of(f, struct amdgpu_job, hw_fence.base));
818 }
819 
820 /**
821  * amdgpu_fence_release - callback that fence can be freed
822  *
823  * @f: fence
824  *
825  * This function is called when the reference count becomes zero.
826  * It just RCU schedules freeing up the fence.
827  */
828 static void amdgpu_fence_release(struct dma_fence *f)
829 {
830 	call_rcu(&f->rcu, amdgpu_fence_free);
831 }
832 
833 /**
834  * amdgpu_job_fence_release - callback that job embedded fence can be freed
835  *
836  * @f: fence
837  *
838  * This is the simliar function with amdgpu_fence_release above, it
839  * only handles the job embedded fence.
840  */
841 static void amdgpu_job_fence_release(struct dma_fence *f)
842 {
843 	call_rcu(&f->rcu, amdgpu_job_fence_free);
844 }
845 
846 static const struct dma_fence_ops amdgpu_fence_ops = {
847 	.get_driver_name = amdgpu_fence_get_driver_name,
848 	.get_timeline_name = amdgpu_fence_get_timeline_name,
849 	.enable_signaling = amdgpu_fence_enable_signaling,
850 	.release = amdgpu_fence_release,
851 };
852 
853 static const struct dma_fence_ops amdgpu_job_fence_ops = {
854 	.get_driver_name = amdgpu_fence_get_driver_name,
855 	.get_timeline_name = amdgpu_job_fence_get_timeline_name,
856 	.enable_signaling = amdgpu_job_fence_enable_signaling,
857 	.release = amdgpu_job_fence_release,
858 };
859 
860 /*
861  * Fence debugfs
862  */
863 #if defined(CONFIG_DEBUG_FS)
864 static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
865 {
866 	struct amdgpu_device *adev = m->private;
867 	int i;
868 
869 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
870 		struct amdgpu_ring *ring = adev->rings[i];
871 
872 		if (!ring || !ring->fence_drv.initialized)
873 			continue;
874 
875 		amdgpu_fence_process(ring);
876 
877 		seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
878 		seq_printf(m, "Last signaled fence          0x%08x\n",
879 			   atomic_read(&ring->fence_drv.last_seq));
880 		seq_printf(m, "Last emitted                 0x%08x\n",
881 			   ring->fence_drv.sync_seq);
882 
883 		if (ring->funcs->type == AMDGPU_RING_TYPE_GFX ||
884 		    ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {
885 			seq_printf(m, "Last signaled trailing fence 0x%08x\n",
886 				   le32_to_cpu(*ring->trail_fence_cpu_addr));
887 			seq_printf(m, "Last emitted                 0x%08x\n",
888 				   ring->trail_seq);
889 		}
890 
891 		if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
892 			continue;
893 
894 		/* set in CP_VMID_PREEMPT and preemption occurred */
895 		seq_printf(m, "Last preempted               0x%08x\n",
896 			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 2)));
897 		/* set in CP_VMID_RESET and reset occurred */
898 		seq_printf(m, "Last reset                   0x%08x\n",
899 			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 4)));
900 		/* Both preemption and reset occurred */
901 		seq_printf(m, "Last both                    0x%08x\n",
902 			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 6)));
903 	}
904 	return 0;
905 }
906 
907 /*
908  * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover
909  *
910  * Manually trigger a gpu reset at the next fence wait.
911  */
912 static int gpu_recover_get(void *data, u64 *val)
913 {
914 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
915 	struct drm_device *dev = adev_to_drm(adev);
916 	int r;
917 
918 	r = pm_runtime_get_sync(dev->dev);
919 	if (r < 0) {
920 		pm_runtime_put_autosuspend(dev->dev);
921 		return 0;
922 	}
923 
924 	if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
925 		flush_work(&adev->reset_work);
926 
927 	*val = atomic_read(&adev->reset_domain->reset_res);
928 
929 	pm_runtime_mark_last_busy(dev->dev);
930 	pm_runtime_put_autosuspend(dev->dev);
931 
932 	return 0;
933 }
934 
935 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
936 DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
937 			 "%lld\n");
938 
939 static void amdgpu_debugfs_reset_work(struct work_struct *work)
940 {
941 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
942 						  reset_work);
943 
944 	struct amdgpu_reset_context reset_context;
945 
946 	memset(&reset_context, 0, sizeof(reset_context));
947 
948 	reset_context.method = AMD_RESET_METHOD_NONE;
949 	reset_context.reset_req_dev = adev;
950 	reset_context.src = AMDGPU_RESET_SRC_USER;
951 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
952 	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
953 
954 	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
955 }
956 
957 #endif
958 
959 void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
960 {
961 #if defined(CONFIG_DEBUG_FS)
962 	struct drm_minor *minor = adev_to_drm(adev)->primary;
963 	struct dentry *root = minor->debugfs_root;
964 
965 	debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
966 			    &amdgpu_debugfs_fence_info_fops);
967 
968 	if (!amdgpu_sriov_vf(adev)) {
969 
970 		INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
971 		debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
972 				    &amdgpu_debugfs_gpu_recover_fops);
973 	}
974 #endif
975 }
976 
977