1*7c7e3d31SSong Liu /* SPDX-License-Identifier: GPL-2.0-only */ 2*7c7e3d31SSong Liu /* Copyright (c) 2021 Facebook 3*7c7e3d31SSong Liu */ 4*7c7e3d31SSong Liu 5*7c7e3d31SSong Liu #ifndef __MMAP_UNLOCK_WORK_H__ 6*7c7e3d31SSong Liu #define __MMAP_UNLOCK_WORK_H__ 7*7c7e3d31SSong Liu #include <linux/irq_work.h> 8*7c7e3d31SSong Liu 9*7c7e3d31SSong Liu /* irq_work to run mmap_read_unlock() in irq_work */ 10*7c7e3d31SSong Liu struct mmap_unlock_irq_work { 11*7c7e3d31SSong Liu struct irq_work irq_work; 12*7c7e3d31SSong Liu struct mm_struct *mm; 13*7c7e3d31SSong Liu }; 14*7c7e3d31SSong Liu 15*7c7e3d31SSong Liu DECLARE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); 16*7c7e3d31SSong Liu 17*7c7e3d31SSong Liu /* 18*7c7e3d31SSong Liu * We cannot do mmap_read_unlock() when the irq is disabled, because of 19*7c7e3d31SSong Liu * risk to deadlock with rq_lock. To look up vma when the irqs are 20*7c7e3d31SSong Liu * disabled, we need to run mmap_read_unlock() in irq_work. We use a 21*7c7e3d31SSong Liu * percpu variable to do the irq_work. If the irq_work is already used 22*7c7e3d31SSong Liu * by another lookup, we fall over. 23*7c7e3d31SSong Liu */ 24*7c7e3d31SSong Liu static inline bool bpf_mmap_unlock_get_irq_work(struct mmap_unlock_irq_work **work_ptr) 25*7c7e3d31SSong Liu { 26*7c7e3d31SSong Liu struct mmap_unlock_irq_work *work = NULL; 27*7c7e3d31SSong Liu bool irq_work_busy = false; 28*7c7e3d31SSong Liu 29*7c7e3d31SSong Liu if (irqs_disabled()) { 30*7c7e3d31SSong Liu if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 31*7c7e3d31SSong Liu work = this_cpu_ptr(&mmap_unlock_work); 32*7c7e3d31SSong Liu if (irq_work_is_busy(&work->irq_work)) { 33*7c7e3d31SSong Liu /* cannot queue more up_read, fallback */ 34*7c7e3d31SSong Liu irq_work_busy = true; 35*7c7e3d31SSong Liu } 36*7c7e3d31SSong Liu } else { 37*7c7e3d31SSong Liu /* 38*7c7e3d31SSong Liu * PREEMPT_RT does not allow to trylock mmap sem in 39*7c7e3d31SSong Liu * interrupt disabled context. Force the fallback code. 40*7c7e3d31SSong Liu */ 41*7c7e3d31SSong Liu irq_work_busy = true; 42*7c7e3d31SSong Liu } 43*7c7e3d31SSong Liu } 44*7c7e3d31SSong Liu 45*7c7e3d31SSong Liu *work_ptr = work; 46*7c7e3d31SSong Liu return irq_work_busy; 47*7c7e3d31SSong Liu } 48*7c7e3d31SSong Liu 49*7c7e3d31SSong Liu static inline void bpf_mmap_unlock_mm(struct mmap_unlock_irq_work *work, struct mm_struct *mm) 50*7c7e3d31SSong Liu { 51*7c7e3d31SSong Liu if (!work) { 52*7c7e3d31SSong Liu mmap_read_unlock(mm); 53*7c7e3d31SSong Liu } else { 54*7c7e3d31SSong Liu work->mm = mm; 55*7c7e3d31SSong Liu 56*7c7e3d31SSong Liu /* The lock will be released once we're out of interrupt 57*7c7e3d31SSong Liu * context. Tell lockdep that we've released it now so 58*7c7e3d31SSong Liu * it doesn't complain that we forgot to release it. 59*7c7e3d31SSong Liu */ 60*7c7e3d31SSong Liu rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_); 61*7c7e3d31SSong Liu irq_work_queue(&work->irq_work); 62*7c7e3d31SSong Liu } 63*7c7e3d31SSong Liu } 64*7c7e3d31SSong Liu 65*7c7e3d31SSong Liu #endif /* __MMAP_UNLOCK_WORK_H__ */ 66