xref: /freebsd/sys/kern/kern_rangelock.c (revision 5badbeeaf0614568d37b2466d0f52676ff08a049)
18f0e9130SKonstantin Belousov /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
48f0e9130SKonstantin Belousov  * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
58f0e9130SKonstantin Belousov  * All rights reserved.
68f0e9130SKonstantin Belousov  *
78f0e9130SKonstantin Belousov  * Redistribution and use in source and binary forms, with or without
88f0e9130SKonstantin Belousov  * modification, are permitted provided that the following conditions
98f0e9130SKonstantin Belousov  * are met:
108f0e9130SKonstantin Belousov  * 1. Redistributions of source code must retain the above copyright
118f0e9130SKonstantin Belousov  *    notice unmodified, this list of conditions, and the following
128f0e9130SKonstantin Belousov  *    disclaimer.
138f0e9130SKonstantin Belousov  * 2. Redistributions in binary form must reproduce the above copyright
148f0e9130SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer in the
158f0e9130SKonstantin Belousov  *    documentation and/or other materials provided with the distribution.
168f0e9130SKonstantin Belousov  *
178f0e9130SKonstantin Belousov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
188f0e9130SKonstantin Belousov  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
198f0e9130SKonstantin Belousov  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
208f0e9130SKonstantin Belousov  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
218f0e9130SKonstantin Belousov  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
228f0e9130SKonstantin Belousov  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
238f0e9130SKonstantin Belousov  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
248f0e9130SKonstantin Belousov  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
258f0e9130SKonstantin Belousov  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
268f0e9130SKonstantin Belousov  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
278f0e9130SKonstantin Belousov  */
288f0e9130SKonstantin Belousov 
298f0e9130SKonstantin Belousov #include <sys/param.h>
30c3d8a931SKonstantin Belousov #include <sys/kassert.h>
318f0e9130SKonstantin Belousov #include <sys/kernel.h>
328f0e9130SKonstantin Belousov #include <sys/lock.h>
338f0e9130SKonstantin Belousov #include <sys/mutex.h>
348f0e9130SKonstantin Belousov #include <sys/proc.h>
358f0e9130SKonstantin Belousov #include <sys/rangelock.h>
36c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h>
37c3d8a931SKonstantin Belousov #include <sys/smr.h>
388f0e9130SKonstantin Belousov 
398f0e9130SKonstantin Belousov #include <vm/uma.h>
408f0e9130SKonstantin Belousov 
41c3d8a931SKonstantin Belousov /*
42c3d8a931SKonstantin Belousov  * Implementation of range locks based on the paper
43c3d8a931SKonstantin Belousov  * https://doi.org/10.1145/3342195.3387533
44c3d8a931SKonstantin Belousov  * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020
45c3d8a931SKonstantin Belousov  * Scalable Range Locks for Scalable Address Spaces and Beyond
46c3d8a931SKonstantin Belousov  * by Alex Kogan, Dave Dice, and Shady Issa
47c3d8a931SKonstantin Belousov  */
48c3d8a931SKonstantin Belousov 
49c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e);
50c3d8a931SKonstantin Belousov 
51c3d8a931SKonstantin Belousov /*
52c3d8a931SKonstantin Belousov  * rl_q_next links all granted ranges in the lock.  We cannot free an
53c3d8a931SKonstantin Belousov  * rl_q_entry while in the smr section, and cannot reuse rl_q_next
54c3d8a931SKonstantin Belousov  * linkage since other threads might follow it even after CAS removed
55c3d8a931SKonstantin Belousov  * the range.  Use rl_q_free for local list of ranges to remove after
56c3d8a931SKonstantin Belousov  * the smr section is dropped.
57c3d8a931SKonstantin Belousov  */
588f0e9130SKonstantin Belousov struct rl_q_entry {
59c3d8a931SKonstantin Belousov 	struct rl_q_entry *rl_q_next;
60c3d8a931SKonstantin Belousov 	struct rl_q_entry *rl_q_free;
618f0e9130SKonstantin Belousov 	off_t		rl_q_start, rl_q_end;
628f0e9130SKonstantin Belousov 	int		rl_q_flags;
63c3d8a931SKonstantin Belousov #ifdef INVARIANTS
64c3d8a931SKonstantin Belousov 	struct thread	*rl_q_owner;
65c3d8a931SKonstantin Belousov #endif
668f0e9130SKonstantin Belousov };
678f0e9130SKonstantin Belousov 
688f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone;
69c3d8a931SKonstantin Belousov static smr_t rl_smr;
708f0e9130SKonstantin Belousov 
718f0e9130SKonstantin Belousov static void
728f0e9130SKonstantin Belousov rangelock_sys_init(void)
738f0e9130SKonstantin Belousov {
748f0e9130SKonstantin Belousov 	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
75c3d8a931SKonstantin Belousov 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
76c3d8a931SKonstantin Belousov 	    UMA_ZONE_SMR);
77c3d8a931SKonstantin Belousov 	rl_smr = uma_zone_get_smr(rl_entry_zone);
788f0e9130SKonstantin Belousov }
79c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
808f0e9130SKonstantin Belousov 
818f0e9130SKonstantin Belousov static struct rl_q_entry *
82c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
838f0e9130SKonstantin Belousov {
84c3d8a931SKonstantin Belousov 	struct rl_q_entry *e;
858f0e9130SKonstantin Belousov 
86c3d8a931SKonstantin Belousov 	e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
87c3d8a931SKonstantin Belousov 	e->rl_q_next = NULL;
88c3d8a931SKonstantin Belousov 	e->rl_q_free = NULL;
89c3d8a931SKonstantin Belousov 	e->rl_q_start = start;
90c3d8a931SKonstantin Belousov 	e->rl_q_end = end;
91c3d8a931SKonstantin Belousov 	e->rl_q_flags = flags;
92c3d8a931SKonstantin Belousov #ifdef INVARIANTS
93c3d8a931SKonstantin Belousov 	e->rl_q_owner = curthread;
94c3d8a931SKonstantin Belousov #endif
95c3d8a931SKonstantin Belousov 	return (e);
968f0e9130SKonstantin Belousov }
978f0e9130SKonstantin Belousov 
988f0e9130SKonstantin Belousov void
998f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock)
1008f0e9130SKonstantin Belousov {
101c3d8a931SKonstantin Belousov 	lock->sleepers = false;
102c3d8a931SKonstantin Belousov 	atomic_store_ptr(&lock->head, NULL);
1038f0e9130SKonstantin Belousov }
1048f0e9130SKonstantin Belousov 
1058f0e9130SKonstantin Belousov void
1068f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock)
1078f0e9130SKonstantin Belousov {
108c3d8a931SKonstantin Belousov 	struct rl_q_entry *e, *ep;
1098f0e9130SKonstantin Belousov 
110c3d8a931SKonstantin Belousov 	MPASS(!lock->sleepers);
111c3d8a931SKonstantin Belousov 	for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head);
112c3d8a931SKonstantin Belousov 	    e != NULL; e = rl_e_unmark(ep)) {
113c3d8a931SKonstantin Belousov 		ep = atomic_load_ptr(&e->rl_q_next);
114c3d8a931SKonstantin Belousov 		uma_zfree_smr(rl_entry_zone, e);
115c3d8a931SKonstantin Belousov 	}
1168f0e9130SKonstantin Belousov }
1178f0e9130SKonstantin Belousov 
118c3d8a931SKonstantin Belousov static bool
119c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e)
1208f0e9130SKonstantin Belousov {
121c3d8a931SKonstantin Belousov 	return (((uintptr_t)e & 1) != 0);
1228f0e9130SKonstantin Belousov }
1238f0e9130SKonstantin Belousov 
124c3d8a931SKonstantin Belousov static struct rl_q_entry *
125*5badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e)
126*5badbeeaSKonstantin Belousov {
127*5badbeeaSKonstantin Belousov 	return ((struct rl_q_entry *)((uintptr_t)e & ~1));
128*5badbeeaSKonstantin Belousov }
129*5badbeeaSKonstantin Belousov 
130*5badbeeaSKonstantin Belousov static struct rl_q_entry *
131c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e)
1328f0e9130SKonstantin Belousov {
133c3d8a931SKonstantin Belousov 	MPASS(rl_e_is_marked(e));
134*5badbeeaSKonstantin Belousov 	return (rl_e_unmark_unchecked(e));
135*5badbeeaSKonstantin Belousov }
136*5badbeeaSKonstantin Belousov 
137*5badbeeaSKonstantin Belousov static void
138*5badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e)
139*5badbeeaSKonstantin Belousov {
140*5badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__)
141*5badbeeaSKonstantin Belousov 	int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0);
142*5badbeeaSKonstantin Belousov 	MPASS(r == 0);
143*5badbeeaSKonstantin Belousov #else
144*5badbeeaSKonstantin Belousov 	atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
145*5badbeeaSKonstantin Belousov #endif
1462bb93f2dSColin Percival }
1472bb93f2dSColin Percival 
148c3d8a931SKonstantin Belousov static struct rl_q_entry *
149c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p)
1508f0e9130SKonstantin Belousov {
151c3d8a931SKonstantin Belousov 	return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p));
1528f0e9130SKonstantin Belousov }
1538f0e9130SKonstantin Belousov 
1546c32d89eSKonstantin Belousov static bool
1556c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e)
1566c32d89eSKonstantin Belousov {
1576c32d89eSKonstantin Belousov 	return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ);
1586c32d89eSKonstantin Belousov }
1596c32d89eSKonstantin Belousov 
160*5badbeeaSKonstantin Belousov static void
161*5badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e)
1628f0e9130SKonstantin Belousov {
163c3d8a931SKonstantin Belousov 	MPASS(lock != NULL && e != NULL);
164c3d8a931SKonstantin Belousov 	MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
165c3d8a931SKonstantin Belousov 	MPASS(e->rl_q_owner == curthread);
1668f0e9130SKonstantin Belousov 
167*5badbeeaSKonstantin Belousov 	rl_e_mark(e);
168c3d8a931SKonstantin Belousov 	lock->sleepers = false;
169c3d8a931SKonstantin Belousov 	sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
170*5badbeeaSKonstantin Belousov }
171*5badbeeaSKonstantin Belousov 
172*5badbeeaSKonstantin Belousov void
173*5badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie)
174*5badbeeaSKonstantin Belousov {
175*5badbeeaSKonstantin Belousov 	sleepq_lock(&lock->sleepers);
176*5badbeeaSKonstantin Belousov 	rangelock_unlock_int(lock, cookie);
177c3d8a931SKonstantin Belousov 	sleepq_release(&lock->sleepers);
1788f0e9130SKonstantin Belousov }
1798f0e9130SKonstantin Belousov 
1808f0e9130SKonstantin Belousov /*
181*5badbeeaSKonstantin Belousov  * result: -1 if e1 before e2, or both locks are readers and e1
182*5badbeeaSKonstantin Belousov  *		starts before or at e2
183*5badbeeaSKonstantin Belousov  *          1 if e1 after e2, or both locks are readers and e1
184*5badbeeaSKonstantin Belousov  *		starts after e2
185*5badbeeaSKonstantin Belousov  *          0 if e1 and e2 overlap and at least one lock is writer
1868f0e9130SKonstantin Belousov  */
187c3d8a931SKonstantin Belousov static int
188c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
1898f0e9130SKonstantin Belousov {
190*5badbeeaSKonstantin Belousov 	bool rds;
191*5badbeeaSKonstantin Belousov 
192c3d8a931SKonstantin Belousov 	if (e1 == NULL)
193c3d8a931SKonstantin Belousov 		return (1);
194c3d8a931SKonstantin Belousov 	if (e2->rl_q_start >= e1->rl_q_end)
195c3d8a931SKonstantin Belousov 		return (-1);
196*5badbeeaSKonstantin Belousov 	rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2);
197*5badbeeaSKonstantin Belousov 	if (e2->rl_q_start >= e1->rl_q_start && rds)
198*5badbeeaSKonstantin Belousov 		return (-1);
199*5badbeeaSKonstantin Belousov 	if (e1->rl_q_start >= e2->rl_q_end)
200*5badbeeaSKonstantin Belousov 		return (1);
201*5badbeeaSKonstantin Belousov 	if (e1->rl_q_start >= e2->rl_q_start && rds)
202*5badbeeaSKonstantin Belousov 		return (1);
203c3d8a931SKonstantin Belousov 	return (0);
2048f0e9130SKonstantin Belousov }
2058f0e9130SKonstantin Belousov 
206c3d8a931SKonstantin Belousov static void
207c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock)
2088f0e9130SKonstantin Belousov {
209c3d8a931SKonstantin Belousov 	smr_exit(rl_smr);
210c3d8a931SKonstantin Belousov 	DROP_GIANT();
211c3d8a931SKonstantin Belousov 	lock->sleepers = true;
212c3d8a931SKonstantin Belousov 	sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0);
213c3d8a931SKonstantin Belousov 	sleepq_wait(&lock->sleepers, PRI_USER);
214c3d8a931SKonstantin Belousov 	PICKUP_GIANT();
215c3d8a931SKonstantin Belousov 	smr_enter(rl_smr);
216c3d8a931SKonstantin Belousov }
2178f0e9130SKonstantin Belousov 
218c3d8a931SKonstantin Belousov static bool
219c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
220c3d8a931SKonstantin Belousov     struct rl_q_entry *new)
221c3d8a931SKonstantin Belousov {
222c3d8a931SKonstantin Belousov 	return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old,
223c3d8a931SKonstantin Belousov 	    (uintptr_t)new) != 0);
224c3d8a931SKonstantin Belousov }
2258f0e9130SKonstantin Belousov 
226*5badbeeaSKonstantin Belousov enum RL_INSERT_RES {
227*5badbeeaSKonstantin Belousov 	RL_TRYLOCK_FAILED,
228*5badbeeaSKonstantin Belousov 	RL_LOCK_SUCCESS,
229*5badbeeaSKonstantin Belousov 	RL_LOCK_RETRY,
230*5badbeeaSKonstantin Belousov };
231*5badbeeaSKonstantin Belousov 
232*5badbeeaSKonstantin Belousov static enum RL_INSERT_RES
233*5badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
234*5badbeeaSKonstantin Belousov     struct rl_q_entry **free)
235*5badbeeaSKonstantin Belousov {
236*5badbeeaSKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
237*5badbeeaSKonstantin Belousov 
238*5badbeeaSKonstantin Belousov 	prev = &e->rl_q_next;
239*5badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
240*5badbeeaSKonstantin Belousov 	MPASS(!rl_e_is_marked(cur));	/* nobody can unlock e yet */
241*5badbeeaSKonstantin Belousov 	for (;;) {
242*5badbeeaSKonstantin Belousov 		if (cur == NULL || cur->rl_q_start > e->rl_q_end)
243*5badbeeaSKonstantin Belousov 			return (RL_LOCK_SUCCESS);
244*5badbeeaSKonstantin Belousov 		next = rl_q_load(&cur->rl_q_next);
245*5badbeeaSKonstantin Belousov 		if (rl_e_is_marked(next)) {
246*5badbeeaSKonstantin Belousov 			next = rl_e_unmark(next);
247*5badbeeaSKonstantin Belousov 			if (rl_q_cas(prev, cur, next)) {
248*5badbeeaSKonstantin Belousov 				cur->rl_q_free = *free;
249*5badbeeaSKonstantin Belousov 				*free = cur;
250*5badbeeaSKonstantin Belousov 			}
251*5badbeeaSKonstantin Belousov 			cur = next;
252*5badbeeaSKonstantin Belousov 			continue;
253*5badbeeaSKonstantin Belousov 		}
254*5badbeeaSKonstantin Belousov 		if (rl_e_is_rlock(cur)) {
255*5badbeeaSKonstantin Belousov 			prev = &cur->rl_q_next;
256*5badbeeaSKonstantin Belousov 			cur = rl_e_unmark_unchecked(rl_q_load(prev));
257*5badbeeaSKonstantin Belousov 			continue;
258*5badbeeaSKonstantin Belousov 		}
259*5badbeeaSKonstantin Belousov 		if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
260*5badbeeaSKonstantin Belousov 			sleepq_lock(&lock->sleepers);
261*5badbeeaSKonstantin Belousov 			if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
262*5badbeeaSKonstantin Belousov 				sleepq_release(&lock->sleepers);
263*5badbeeaSKonstantin Belousov 				continue;
264*5badbeeaSKonstantin Belousov 			}
265*5badbeeaSKonstantin Belousov 			rangelock_unlock_int(lock, e);
266*5badbeeaSKonstantin Belousov 			if (trylock) {
267*5badbeeaSKonstantin Belousov 				sleepq_release(&lock->sleepers);
268*5badbeeaSKonstantin Belousov 				return (RL_TRYLOCK_FAILED);
269*5badbeeaSKonstantin Belousov 			}
270*5badbeeaSKonstantin Belousov 			rl_insert_sleep(lock);
271*5badbeeaSKonstantin Belousov 			return (RL_LOCK_RETRY);
272*5badbeeaSKonstantin Belousov 		}
273*5badbeeaSKonstantin Belousov 	}
274*5badbeeaSKonstantin Belousov }
275*5badbeeaSKonstantin Belousov 
276*5badbeeaSKonstantin Belousov static enum RL_INSERT_RES
277*5badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e,
278*5badbeeaSKonstantin Belousov     bool trylock, struct rl_q_entry **free)
279*5badbeeaSKonstantin Belousov {
280*5badbeeaSKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
281*5badbeeaSKonstantin Belousov 
282*5badbeeaSKonstantin Belousov 	prev = &lock->head;
283*5badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
284*5badbeeaSKonstantin Belousov 	MPASS(!rl_e_is_marked(cur));	/* head is not marked */
285*5badbeeaSKonstantin Belousov 	for (;;) {
286*5badbeeaSKonstantin Belousov 		if (cur == e)
287*5badbeeaSKonstantin Belousov 			return (RL_LOCK_SUCCESS);
288*5badbeeaSKonstantin Belousov 		next = rl_q_load(&cur->rl_q_next);
289*5badbeeaSKonstantin Belousov 		if (rl_e_is_marked(next)) {
290*5badbeeaSKonstantin Belousov 			next = rl_e_unmark(next);
291*5badbeeaSKonstantin Belousov 			if (rl_q_cas(prev, cur, next)) {
292*5badbeeaSKonstantin Belousov 				cur->rl_q_next = *free;
293*5badbeeaSKonstantin Belousov 				*free = cur;
294*5badbeeaSKonstantin Belousov 			}
295*5badbeeaSKonstantin Belousov 			cur = next;
296*5badbeeaSKonstantin Belousov 			continue;
297*5badbeeaSKonstantin Belousov 		}
298*5badbeeaSKonstantin Belousov 		if (cur->rl_q_end <= e->rl_q_start) {
299*5badbeeaSKonstantin Belousov 			prev = &cur->rl_q_next;
300*5badbeeaSKonstantin Belousov 			cur = rl_e_unmark_unchecked(rl_q_load(prev));
301*5badbeeaSKonstantin Belousov 			continue;
302*5badbeeaSKonstantin Belousov 		}
303*5badbeeaSKonstantin Belousov 		sleepq_lock(&lock->sleepers);
304*5badbeeaSKonstantin Belousov 		rangelock_unlock_int(lock, e);
305*5badbeeaSKonstantin Belousov 		if (trylock) {
306*5badbeeaSKonstantin Belousov 			sleepq_release(&lock->sleepers);
307*5badbeeaSKonstantin Belousov 			return (RL_TRYLOCK_FAILED);
308*5badbeeaSKonstantin Belousov 		}
309*5badbeeaSKonstantin Belousov 		rl_insert_sleep(lock);
310*5badbeeaSKonstantin Belousov 		return (RL_LOCK_RETRY);
311*5badbeeaSKonstantin Belousov 	}
312*5badbeeaSKonstantin Belousov }
313*5badbeeaSKonstantin Belousov 
314*5badbeeaSKonstantin Belousov static enum RL_INSERT_RES
315c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
316c3d8a931SKonstantin Belousov     struct rl_q_entry **free)
317c3d8a931SKonstantin Belousov {
318c3d8a931SKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
319c3d8a931SKonstantin Belousov 	int r;
3208f0e9130SKonstantin Belousov 
321c3d8a931SKonstantin Belousov again:
322c3d8a931SKonstantin Belousov 	prev = &lock->head;
323*5badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
324*5badbeeaSKonstantin Belousov 	if (cur == NULL && rl_q_cas(prev, NULL, e))
325*5badbeeaSKonstantin Belousov 		return (RL_LOCK_SUCCESS);
3268f0e9130SKonstantin Belousov 
327*5badbeeaSKonstantin Belousov 	for (;;) {
328*5badbeeaSKonstantin Belousov 		if (cur != NULL) {
329c3d8a931SKonstantin Belousov 			if (rl_e_is_marked(cur))
330c3d8a931SKonstantin Belousov 				goto again;
331c3d8a931SKonstantin Belousov 
332c3d8a931SKonstantin Belousov 			next = rl_q_load(&cur->rl_q_next);
333c3d8a931SKonstantin Belousov 			if (rl_e_is_marked(next)) {
334c3d8a931SKonstantin Belousov 				next = rl_e_unmark(next);
335c3d8a931SKonstantin Belousov 				if (rl_q_cas(prev, cur, next)) {
336c3d8a931SKonstantin Belousov #ifdef INVARIANTS
337c3d8a931SKonstantin Belousov 					cur->rl_q_owner = NULL;
338c3d8a931SKonstantin Belousov #endif
339c3d8a931SKonstantin Belousov 					cur->rl_q_free = *free;
340c3d8a931SKonstantin Belousov 					*free = cur;
341c3d8a931SKonstantin Belousov 				}
342c3d8a931SKonstantin Belousov 				cur = next;
343c3d8a931SKonstantin Belousov 				continue;
344c3d8a931SKonstantin Belousov 			}
345c3d8a931SKonstantin Belousov 		}
346c3d8a931SKonstantin Belousov 
347c3d8a931SKonstantin Belousov 		r = rl_e_compare(cur, e);
348c3d8a931SKonstantin Belousov 		if (r == -1) {
349c3d8a931SKonstantin Belousov 			prev = &cur->rl_q_next;
350c3d8a931SKonstantin Belousov 			cur = rl_q_load(prev);
351c3d8a931SKonstantin Belousov 		} else if (r == 0) {
352c3d8a931SKonstantin Belousov 			sleepq_lock(&lock->sleepers);
353c3d8a931SKonstantin Belousov 			if (__predict_false(rl_e_is_marked(rl_q_load(
354c3d8a931SKonstantin Belousov 			    &cur->rl_q_next)))) {
355c3d8a931SKonstantin Belousov 				sleepq_release(&lock->sleepers);
356c3d8a931SKonstantin Belousov 				continue;
357c3d8a931SKonstantin Belousov 			}
358e3680954SRick Macklem 			if (trylock) {
359c3d8a931SKonstantin Belousov 				sleepq_release(&lock->sleepers);
360*5badbeeaSKonstantin Belousov 				return (RL_TRYLOCK_FAILED);
361e3680954SRick Macklem 			}
362c3d8a931SKonstantin Belousov 			rl_insert_sleep(lock);
363c3d8a931SKonstantin Belousov 			/* e is still valid */
364c3d8a931SKonstantin Belousov 			goto again;
365c3d8a931SKonstantin Belousov 		} else /* r == 1 */ {
366c3d8a931SKonstantin Belousov 			e->rl_q_next = cur;
367c3d8a931SKonstantin Belousov 			if (rl_q_cas(prev, cur, e)) {
368c3d8a931SKonstantin Belousov 				atomic_thread_fence_acq();
369*5badbeeaSKonstantin Belousov 				return (rl_e_is_rlock(e) ?
370*5badbeeaSKonstantin Belousov 				    rl_r_validate(lock, e, trylock, free) :
371*5badbeeaSKonstantin Belousov 				    rl_w_validate(lock, e, trylock, free));
372e3680954SRick Macklem 			}
373c3d8a931SKonstantin Belousov 			/* Reset rl_q_next in case we hit fast path. */
374c3d8a931SKonstantin Belousov 			e->rl_q_next = NULL;
375c3d8a931SKonstantin Belousov 			cur = rl_q_load(prev);
376c3d8a931SKonstantin Belousov 		}
377c3d8a931SKonstantin Belousov 	}
378c3d8a931SKonstantin Belousov }
379c3d8a931SKonstantin Belousov 
380c3d8a931SKonstantin Belousov static struct rl_q_entry *
381*5badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
382*5badbeeaSKonstantin Belousov     vm_ooffset_t end, int locktype)
383c3d8a931SKonstantin Belousov {
384*5badbeeaSKonstantin Belousov 	struct rl_q_entry *e, *free, *x, *xp;
385*5badbeeaSKonstantin Belousov 	enum RL_INSERT_RES res;
386c3d8a931SKonstantin Belousov 
387*5badbeeaSKonstantin Belousov 	for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
388c3d8a931SKonstantin Belousov 		free = NULL;
389*5badbeeaSKonstantin Belousov 		e = rlqentry_alloc(start, end, locktype);
390c3d8a931SKonstantin Belousov 		smr_enter(rl_smr);
391c3d8a931SKonstantin Belousov 		res = rl_insert(lock, e, trylock, &free);
392c3d8a931SKonstantin Belousov 		smr_exit(rl_smr);
393*5badbeeaSKonstantin Belousov 		if (res == RL_TRYLOCK_FAILED) {
394*5badbeeaSKonstantin Belousov 			MPASS(trylock);
395c3d8a931SKonstantin Belousov 			e->rl_q_free = free;
396c3d8a931SKonstantin Belousov 			free = e;
397c3d8a931SKonstantin Belousov 			e = NULL;
398c3d8a931SKonstantin Belousov 		}
399c3d8a931SKonstantin Belousov 		for (x = free; x != NULL; x = xp) {
400c3d8a931SKonstantin Belousov 		  MPASS(!rl_e_is_marked(x));
401c3d8a931SKonstantin Belousov 		  xp = x->rl_q_free;
402c3d8a931SKonstantin Belousov 		  MPASS(!rl_e_is_marked(xp));
403c3d8a931SKonstantin Belousov 		  uma_zfree_smr(rl_entry_zone, x);
404c3d8a931SKonstantin Belousov 		}
405*5badbeeaSKonstantin Belousov 	}
406c3d8a931SKonstantin Belousov 	return (e);
4078f0e9130SKonstantin Belousov }
4088f0e9130SKonstantin Belousov 
4098f0e9130SKonstantin Belousov void *
410c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
4118f0e9130SKonstantin Belousov {
412*5badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ));
413e3680954SRick Macklem }
414e3680954SRick Macklem 
415e3680954SRick Macklem void *
416c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
417e3680954SRick Macklem {
418*5badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ));
4198f0e9130SKonstantin Belousov }
4208f0e9130SKonstantin Belousov 
4218f0e9130SKonstantin Belousov void *
422c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
4238f0e9130SKonstantin Belousov {
424*5badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
425e3680954SRick Macklem }
426e3680954SRick Macklem 
427e3680954SRick Macklem void *
428c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
429e3680954SRick Macklem {
430*5badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
4318f0e9130SKonstantin Belousov }
4323155f2f0SKyle Evans 
4333155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT
4343155f2f0SKyle Evans void
4353155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
4363155f2f0SKyle Evans {
4373155f2f0SKyle Evans }
4383155f2f0SKyle Evans #endif	/* INVARIANT_SUPPORT */
439c3d8a931SKonstantin Belousov 
440c3d8a931SKonstantin Belousov #include "opt_ddb.h"
441c3d8a931SKonstantin Belousov #ifdef DDB
442c3d8a931SKonstantin Belousov #include <ddb/ddb.h>
443c3d8a931SKonstantin Belousov 
444c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock)
445c3d8a931SKonstantin Belousov {
446c3d8a931SKonstantin Belousov 	struct rangelock *lock;
447c3d8a931SKonstantin Belousov 	struct rl_q_entry *e, *x;
448c3d8a931SKonstantin Belousov 
449c3d8a931SKonstantin Belousov 	if (!have_addr) {
450c3d8a931SKonstantin Belousov 		db_printf("show rangelock addr\n");
451c3d8a931SKonstantin Belousov 		return;
452c3d8a931SKonstantin Belousov 	}
453c3d8a931SKonstantin Belousov 
454c3d8a931SKonstantin Belousov 	lock = (struct rangelock *)addr;
455c3d8a931SKonstantin Belousov 	db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers);
456c3d8a931SKonstantin Belousov 	for (e = lock->head;;) {
457c3d8a931SKonstantin Belousov 		x = rl_e_is_marked(e) ? rl_e_unmark(e) : e;
458c3d8a931SKonstantin Belousov 		if (x == NULL)
459c3d8a931SKonstantin Belousov 			break;
460c3d8a931SKonstantin Belousov 		db_printf("  entry %p marked %d %d start %#jx end %#jx "
461c3d8a931SKonstantin Belousov 		    "flags %x next %p",
462c3d8a931SKonstantin Belousov 		    e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next),
463c3d8a931SKonstantin Belousov 		    x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next);
464c3d8a931SKonstantin Belousov #ifdef INVARIANTS
465c3d8a931SKonstantin Belousov 		db_printf(" owner %p (%d)", x->rl_q_owner,
466c3d8a931SKonstantin Belousov 		    x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1);
467c3d8a931SKonstantin Belousov #endif
468c3d8a931SKonstantin Belousov 		db_printf("\n");
469c3d8a931SKonstantin Belousov 		e = x->rl_q_next;
470c3d8a931SKonstantin Belousov 	}
471c3d8a931SKonstantin Belousov }
472c3d8a931SKonstantin Belousov 
473c3d8a931SKonstantin Belousov #endif	/* DDB */
474