xref: /linux/kernel/bpf/mmap_unlock_work.h (revision 7c7e3d31e7856a8260a254f8c71db416f7f9f5a1)
1*7c7e3d31SSong Liu /* SPDX-License-Identifier: GPL-2.0-only */
2*7c7e3d31SSong Liu /* Copyright (c) 2021 Facebook
3*7c7e3d31SSong Liu  */
4*7c7e3d31SSong Liu 
5*7c7e3d31SSong Liu #ifndef __MMAP_UNLOCK_WORK_H__
6*7c7e3d31SSong Liu #define __MMAP_UNLOCK_WORK_H__
7*7c7e3d31SSong Liu #include <linux/irq_work.h>
8*7c7e3d31SSong Liu 
9*7c7e3d31SSong Liu /* irq_work to run mmap_read_unlock() in irq_work */
10*7c7e3d31SSong Liu struct mmap_unlock_irq_work {
11*7c7e3d31SSong Liu 	struct irq_work irq_work;
12*7c7e3d31SSong Liu 	struct mm_struct *mm;
13*7c7e3d31SSong Liu };
14*7c7e3d31SSong Liu 
15*7c7e3d31SSong Liu DECLARE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
16*7c7e3d31SSong Liu 
17*7c7e3d31SSong Liu /*
18*7c7e3d31SSong Liu  * We cannot do mmap_read_unlock() when the irq is disabled, because of
19*7c7e3d31SSong Liu  * risk to deadlock with rq_lock. To look up vma when the irqs are
20*7c7e3d31SSong Liu  * disabled, we need to run mmap_read_unlock() in irq_work. We use a
21*7c7e3d31SSong Liu  * percpu variable to do the irq_work. If the irq_work is already used
22*7c7e3d31SSong Liu  * by another lookup, we fall over.
23*7c7e3d31SSong Liu  */
24*7c7e3d31SSong Liu static inline bool bpf_mmap_unlock_get_irq_work(struct mmap_unlock_irq_work **work_ptr)
25*7c7e3d31SSong Liu {
26*7c7e3d31SSong Liu 	struct mmap_unlock_irq_work *work = NULL;
27*7c7e3d31SSong Liu 	bool irq_work_busy = false;
28*7c7e3d31SSong Liu 
29*7c7e3d31SSong Liu 	if (irqs_disabled()) {
30*7c7e3d31SSong Liu 		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
31*7c7e3d31SSong Liu 			work = this_cpu_ptr(&mmap_unlock_work);
32*7c7e3d31SSong Liu 			if (irq_work_is_busy(&work->irq_work)) {
33*7c7e3d31SSong Liu 				/* cannot queue more up_read, fallback */
34*7c7e3d31SSong Liu 				irq_work_busy = true;
35*7c7e3d31SSong Liu 			}
36*7c7e3d31SSong Liu 		} else {
37*7c7e3d31SSong Liu 			/*
38*7c7e3d31SSong Liu 			 * PREEMPT_RT does not allow to trylock mmap sem in
39*7c7e3d31SSong Liu 			 * interrupt disabled context. Force the fallback code.
40*7c7e3d31SSong Liu 			 */
41*7c7e3d31SSong Liu 			irq_work_busy = true;
42*7c7e3d31SSong Liu 		}
43*7c7e3d31SSong Liu 	}
44*7c7e3d31SSong Liu 
45*7c7e3d31SSong Liu 	*work_ptr = work;
46*7c7e3d31SSong Liu 	return irq_work_busy;
47*7c7e3d31SSong Liu }
48*7c7e3d31SSong Liu 
49*7c7e3d31SSong Liu static inline void bpf_mmap_unlock_mm(struct mmap_unlock_irq_work *work, struct mm_struct *mm)
50*7c7e3d31SSong Liu {
51*7c7e3d31SSong Liu 	if (!work) {
52*7c7e3d31SSong Liu 		mmap_read_unlock(mm);
53*7c7e3d31SSong Liu 	} else {
54*7c7e3d31SSong Liu 		work->mm = mm;
55*7c7e3d31SSong Liu 
56*7c7e3d31SSong Liu 		/* The lock will be released once we're out of interrupt
57*7c7e3d31SSong Liu 		 * context. Tell lockdep that we've released it now so
58*7c7e3d31SSong Liu 		 * it doesn't complain that we forgot to release it.
59*7c7e3d31SSong Liu 		 */
60*7c7e3d31SSong Liu 		rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
61*7c7e3d31SSong Liu 		irq_work_queue(&work->irq_work);
62*7c7e3d31SSong Liu 	}
63*7c7e3d31SSong Liu }
64*7c7e3d31SSong Liu 
65*7c7e3d31SSong Liu #endif /* __MMAP_UNLOCK_WORK_H__ */
66