xref: /freebsd/sys/kern/kern_rangelock.c (revision 8f0e91308a0838d4462fe26025010f99b334e990)
1*8f0e9130SKonstantin Belousov /*-
2*8f0e9130SKonstantin Belousov  * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
3*8f0e9130SKonstantin Belousov  * All rights reserved.
4*8f0e9130SKonstantin Belousov  *
5*8f0e9130SKonstantin Belousov  * Redistribution and use in source and binary forms, with or without
6*8f0e9130SKonstantin Belousov  * modification, are permitted provided that the following conditions
7*8f0e9130SKonstantin Belousov  * are met:
8*8f0e9130SKonstantin Belousov  * 1. Redistributions of source code must retain the above copyright
9*8f0e9130SKonstantin Belousov  *    notice unmodified, this list of conditions, and the following
10*8f0e9130SKonstantin Belousov  *    disclaimer.
11*8f0e9130SKonstantin Belousov  * 2. Redistributions in binary form must reproduce the above copyright
12*8f0e9130SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer in the
13*8f0e9130SKonstantin Belousov  *    documentation and/or other materials provided with the distribution.
14*8f0e9130SKonstantin Belousov  *
15*8f0e9130SKonstantin Belousov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16*8f0e9130SKonstantin Belousov  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17*8f0e9130SKonstantin Belousov  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18*8f0e9130SKonstantin Belousov  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19*8f0e9130SKonstantin Belousov  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20*8f0e9130SKonstantin Belousov  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21*8f0e9130SKonstantin Belousov  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22*8f0e9130SKonstantin Belousov  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23*8f0e9130SKonstantin Belousov  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24*8f0e9130SKonstantin Belousov  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*8f0e9130SKonstantin Belousov  */
26*8f0e9130SKonstantin Belousov 
27*8f0e9130SKonstantin Belousov #include <sys/cdefs.h>
28*8f0e9130SKonstantin Belousov __FBSDID("$FreeBSD$");
29*8f0e9130SKonstantin Belousov 
30*8f0e9130SKonstantin Belousov #include <sys/param.h>
31*8f0e9130SKonstantin Belousov #include <sys/kernel.h>
32*8f0e9130SKonstantin Belousov #include <sys/lock.h>
33*8f0e9130SKonstantin Belousov #include <sys/mutex.h>
34*8f0e9130SKonstantin Belousov #include <sys/proc.h>
35*8f0e9130SKonstantin Belousov #include <sys/rangelock.h>
36*8f0e9130SKonstantin Belousov #include <sys/systm.h>
37*8f0e9130SKonstantin Belousov 
38*8f0e9130SKonstantin Belousov #include <vm/uma.h>
39*8f0e9130SKonstantin Belousov 
40*8f0e9130SKonstantin Belousov struct rl_q_entry {
41*8f0e9130SKonstantin Belousov 	TAILQ_ENTRY(rl_q_entry) rl_q_link;
42*8f0e9130SKonstantin Belousov 	off_t		rl_q_start, rl_q_end;
43*8f0e9130SKonstantin Belousov 	int		rl_q_flags;
44*8f0e9130SKonstantin Belousov };
45*8f0e9130SKonstantin Belousov 
46*8f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone;
47*8f0e9130SKonstantin Belousov 
48*8f0e9130SKonstantin Belousov static void
49*8f0e9130SKonstantin Belousov rangelock_sys_init(void)
50*8f0e9130SKonstantin Belousov {
51*8f0e9130SKonstantin Belousov 
52*8f0e9130SKonstantin Belousov 	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
53*8f0e9130SKonstantin Belousov 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
54*8f0e9130SKonstantin Belousov }
55*8f0e9130SKonstantin Belousov SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
56*8f0e9130SKonstantin Belousov 
57*8f0e9130SKonstantin Belousov static struct rl_q_entry *
58*8f0e9130SKonstantin Belousov rlqentry_alloc(void)
59*8f0e9130SKonstantin Belousov {
60*8f0e9130SKonstantin Belousov 
61*8f0e9130SKonstantin Belousov 	return (uma_zalloc(rl_entry_zone, M_WAITOK));
62*8f0e9130SKonstantin Belousov }
63*8f0e9130SKonstantin Belousov 
64*8f0e9130SKonstantin Belousov void
65*8f0e9130SKonstantin Belousov rlqentry_free(struct rl_q_entry *rleq)
66*8f0e9130SKonstantin Belousov {
67*8f0e9130SKonstantin Belousov 
68*8f0e9130SKonstantin Belousov 	uma_zfree(rl_entry_zone, rleq);
69*8f0e9130SKonstantin Belousov }
70*8f0e9130SKonstantin Belousov 
71*8f0e9130SKonstantin Belousov void
72*8f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock)
73*8f0e9130SKonstantin Belousov {
74*8f0e9130SKonstantin Belousov 
75*8f0e9130SKonstantin Belousov 	TAILQ_INIT(&lock->rl_waiters);
76*8f0e9130SKonstantin Belousov 	lock->rl_currdep = NULL;
77*8f0e9130SKonstantin Belousov }
78*8f0e9130SKonstantin Belousov 
79*8f0e9130SKonstantin Belousov void
80*8f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock)
81*8f0e9130SKonstantin Belousov {
82*8f0e9130SKonstantin Belousov 
83*8f0e9130SKonstantin Belousov 	KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
84*8f0e9130SKonstantin Belousov }
85*8f0e9130SKonstantin Belousov 
86*8f0e9130SKonstantin Belousov /*
87*8f0e9130SKonstantin Belousov  * Verifies the supplied rl_q_entries for compatibility.  Returns true
88*8f0e9130SKonstantin Belousov  * if the rangelock queue entries are not compatible, false if they are.
89*8f0e9130SKonstantin Belousov  *
90*8f0e9130SKonstantin Belousov  * Two entries are compatible if their ranges do not overlap, or both
91*8f0e9130SKonstantin Belousov  * entries are for read.
92*8f0e9130SKonstantin Belousov  */
93*8f0e9130SKonstantin Belousov static int
94*8f0e9130SKonstantin Belousov rangelock_incompatible(const struct rl_q_entry *e1,
95*8f0e9130SKonstantin Belousov     const struct rl_q_entry *e2)
96*8f0e9130SKonstantin Belousov {
97*8f0e9130SKonstantin Belousov 
98*8f0e9130SKonstantin Belousov 	if ((e1->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ &&
99*8f0e9130SKonstantin Belousov 	    (e2->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ)
100*8f0e9130SKonstantin Belousov 		return (0);
101*8f0e9130SKonstantin Belousov 	if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
102*8f0e9130SKonstantin Belousov 		return (1);
103*8f0e9130SKonstantin Belousov 	return (0);
104*8f0e9130SKonstantin Belousov }
105*8f0e9130SKonstantin Belousov 
106*8f0e9130SKonstantin Belousov /*
107*8f0e9130SKonstantin Belousov  * Recalculate the lock->rl_currdep after an unlock.
108*8f0e9130SKonstantin Belousov  */
109*8f0e9130SKonstantin Belousov static void
110*8f0e9130SKonstantin Belousov rangelock_calc_block(struct rangelock *lock)
111*8f0e9130SKonstantin Belousov {
112*8f0e9130SKonstantin Belousov 	struct rl_q_entry *entry, *entry1, *whead;
113*8f0e9130SKonstantin Belousov 
114*8f0e9130SKonstantin Belousov 	if (lock->rl_currdep == TAILQ_FIRST(&lock->rl_waiters) &&
115*8f0e9130SKonstantin Belousov 	    lock->rl_currdep != NULL)
116*8f0e9130SKonstantin Belousov 		lock->rl_currdep = TAILQ_NEXT(lock->rl_currdep, rl_q_link);
117*8f0e9130SKonstantin Belousov 	for (entry = lock->rl_currdep; entry != NULL;
118*8f0e9130SKonstantin Belousov 	     entry = TAILQ_NEXT(entry, rl_q_link)) {
119*8f0e9130SKonstantin Belousov 		TAILQ_FOREACH(entry1, &lock->rl_waiters, rl_q_link) {
120*8f0e9130SKonstantin Belousov 			if (rangelock_incompatible(entry, entry1))
121*8f0e9130SKonstantin Belousov 				goto out;
122*8f0e9130SKonstantin Belousov 			if (entry1 == entry)
123*8f0e9130SKonstantin Belousov 				break;
124*8f0e9130SKonstantin Belousov 		}
125*8f0e9130SKonstantin Belousov 	}
126*8f0e9130SKonstantin Belousov out:
127*8f0e9130SKonstantin Belousov 	lock->rl_currdep = entry;
128*8f0e9130SKonstantin Belousov 	TAILQ_FOREACH(whead, &lock->rl_waiters, rl_q_link) {
129*8f0e9130SKonstantin Belousov 		if (whead == lock->rl_currdep)
130*8f0e9130SKonstantin Belousov 			break;
131*8f0e9130SKonstantin Belousov 		if (!(whead->rl_q_flags & RL_LOCK_GRANTED)) {
132*8f0e9130SKonstantin Belousov 			whead->rl_q_flags |= RL_LOCK_GRANTED;
133*8f0e9130SKonstantin Belousov 			wakeup(whead);
134*8f0e9130SKonstantin Belousov 		}
135*8f0e9130SKonstantin Belousov 	}
136*8f0e9130SKonstantin Belousov }
137*8f0e9130SKonstantin Belousov 
138*8f0e9130SKonstantin Belousov static void
139*8f0e9130SKonstantin Belousov rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
140*8f0e9130SKonstantin Belousov     struct mtx *ilk)
141*8f0e9130SKonstantin Belousov {
142*8f0e9130SKonstantin Belousov 
143*8f0e9130SKonstantin Belousov 	MPASS(lock != NULL && entry != NULL && ilk != NULL);
144*8f0e9130SKonstantin Belousov 	mtx_assert(ilk, MA_OWNED);
145*8f0e9130SKonstantin Belousov 	KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
146*8f0e9130SKonstantin Belousov 
147*8f0e9130SKonstantin Belousov 	TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
148*8f0e9130SKonstantin Belousov 	rangelock_calc_block(lock);
149*8f0e9130SKonstantin Belousov 	mtx_unlock(ilk);
150*8f0e9130SKonstantin Belousov 	if (curthread->td_rlqe == NULL)
151*8f0e9130SKonstantin Belousov 		curthread->td_rlqe = entry;
152*8f0e9130SKonstantin Belousov 	else
153*8f0e9130SKonstantin Belousov 		rlqentry_free(entry);
154*8f0e9130SKonstantin Belousov }
155*8f0e9130SKonstantin Belousov 
156*8f0e9130SKonstantin Belousov void
157*8f0e9130SKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
158*8f0e9130SKonstantin Belousov {
159*8f0e9130SKonstantin Belousov 
160*8f0e9130SKonstantin Belousov 	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
161*8f0e9130SKonstantin Belousov 
162*8f0e9130SKonstantin Belousov 	mtx_lock(ilk);
163*8f0e9130SKonstantin Belousov 	rangelock_unlock_locked(lock, cookie, ilk);
164*8f0e9130SKonstantin Belousov }
165*8f0e9130SKonstantin Belousov 
166*8f0e9130SKonstantin Belousov /*
167*8f0e9130SKonstantin Belousov  * Unlock the sub-range of granted lock.
168*8f0e9130SKonstantin Belousov  */
169*8f0e9130SKonstantin Belousov void *
170*8f0e9130SKonstantin Belousov rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
171*8f0e9130SKonstantin Belousov     off_t end, struct mtx *ilk)
172*8f0e9130SKonstantin Belousov {
173*8f0e9130SKonstantin Belousov 	struct rl_q_entry *entry;
174*8f0e9130SKonstantin Belousov 
175*8f0e9130SKonstantin Belousov 	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
176*8f0e9130SKonstantin Belousov 	entry = cookie;
177*8f0e9130SKonstantin Belousov 	KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
178*8f0e9130SKonstantin Belousov 	    ("Unlocking non-granted lock"));
179*8f0e9130SKonstantin Belousov 	KASSERT(entry->rl_q_start == start, ("wrong start"));
180*8f0e9130SKonstantin Belousov 	KASSERT(entry->rl_q_end >= end, ("wrong end"));
181*8f0e9130SKonstantin Belousov 
182*8f0e9130SKonstantin Belousov 	mtx_lock(ilk);
183*8f0e9130SKonstantin Belousov 	if (entry->rl_q_end == end) {
184*8f0e9130SKonstantin Belousov 		rangelock_unlock_locked(lock, cookie, ilk);
185*8f0e9130SKonstantin Belousov 		return (NULL);
186*8f0e9130SKonstantin Belousov 	}
187*8f0e9130SKonstantin Belousov 	entry->rl_q_end = end;
188*8f0e9130SKonstantin Belousov 	rangelock_calc_block(lock);
189*8f0e9130SKonstantin Belousov 	mtx_unlock(ilk);
190*8f0e9130SKonstantin Belousov 	return (cookie);
191*8f0e9130SKonstantin Belousov }
192*8f0e9130SKonstantin Belousov 
193*8f0e9130SKonstantin Belousov /*
194*8f0e9130SKonstantin Belousov  * Add the lock request to the queue of the pending requests for
195*8f0e9130SKonstantin Belousov  * rangelock.  Sleep until the request can be granted.
196*8f0e9130SKonstantin Belousov  */
197*8f0e9130SKonstantin Belousov static void *
198*8f0e9130SKonstantin Belousov rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
199*8f0e9130SKonstantin Belousov     struct mtx *ilk)
200*8f0e9130SKonstantin Belousov {
201*8f0e9130SKonstantin Belousov 	struct rl_q_entry *entry;
202*8f0e9130SKonstantin Belousov 	struct thread *td;
203*8f0e9130SKonstantin Belousov 
204*8f0e9130SKonstantin Belousov 	MPASS(lock != NULL && ilk != NULL);
205*8f0e9130SKonstantin Belousov 
206*8f0e9130SKonstantin Belousov 	td = curthread;
207*8f0e9130SKonstantin Belousov 	if (td->td_rlqe != NULL) {
208*8f0e9130SKonstantin Belousov 		entry = td->td_rlqe;
209*8f0e9130SKonstantin Belousov 		td->td_rlqe = NULL;
210*8f0e9130SKonstantin Belousov 	} else
211*8f0e9130SKonstantin Belousov 		entry = rlqentry_alloc();
212*8f0e9130SKonstantin Belousov 	MPASS(entry != NULL);
213*8f0e9130SKonstantin Belousov 	entry->rl_q_flags = mode;
214*8f0e9130SKonstantin Belousov 	entry->rl_q_start = start;
215*8f0e9130SKonstantin Belousov 	entry->rl_q_end = end;
216*8f0e9130SKonstantin Belousov 
217*8f0e9130SKonstantin Belousov 	mtx_lock(ilk);
218*8f0e9130SKonstantin Belousov 	/*
219*8f0e9130SKonstantin Belousov 	 * XXXKIB TODO. Check that a thread does not try to enqueue a
220*8f0e9130SKonstantin Belousov 	 * lock that is incompatible with another request from the same
221*8f0e9130SKonstantin Belousov 	 * thread.
222*8f0e9130SKonstantin Belousov 	 */
223*8f0e9130SKonstantin Belousov 
224*8f0e9130SKonstantin Belousov 	TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
225*8f0e9130SKonstantin Belousov 	if (lock->rl_currdep == NULL)
226*8f0e9130SKonstantin Belousov 		lock->rl_currdep = entry;
227*8f0e9130SKonstantin Belousov 	rangelock_calc_block(lock);
228*8f0e9130SKonstantin Belousov 	while (!(entry->rl_q_flags & RL_LOCK_GRANTED))
229*8f0e9130SKonstantin Belousov 		msleep(entry, ilk, 0, "range", 0);
230*8f0e9130SKonstantin Belousov 	mtx_unlock(ilk);
231*8f0e9130SKonstantin Belousov 	return (entry);
232*8f0e9130SKonstantin Belousov }
233*8f0e9130SKonstantin Belousov 
234*8f0e9130SKonstantin Belousov void *
235*8f0e9130SKonstantin Belousov rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
236*8f0e9130SKonstantin Belousov {
237*8f0e9130SKonstantin Belousov 
238*8f0e9130SKonstantin Belousov 	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk));
239*8f0e9130SKonstantin Belousov }
240*8f0e9130SKonstantin Belousov 
241*8f0e9130SKonstantin Belousov void *
242*8f0e9130SKonstantin Belousov rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
243*8f0e9130SKonstantin Belousov {
244*8f0e9130SKonstantin Belousov 
245*8f0e9130SKonstantin Belousov 	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk));
246*8f0e9130SKonstantin Belousov }
247