xref: /freebsd/sys/kern/kern_rangelock.c (revision c31580080e055573a32d886b1ea83bebd9ef4e77)
18f0e9130SKonstantin Belousov /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
48f0e9130SKonstantin Belousov  * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
58f0e9130SKonstantin Belousov  * All rights reserved.
68f0e9130SKonstantin Belousov  *
78f0e9130SKonstantin Belousov  * Redistribution and use in source and binary forms, with or without
88f0e9130SKonstantin Belousov  * modification, are permitted provided that the following conditions
98f0e9130SKonstantin Belousov  * are met:
108f0e9130SKonstantin Belousov  * 1. Redistributions of source code must retain the above copyright
118f0e9130SKonstantin Belousov  *    notice unmodified, this list of conditions, and the following
128f0e9130SKonstantin Belousov  *    disclaimer.
138f0e9130SKonstantin Belousov  * 2. Redistributions in binary form must reproduce the above copyright
148f0e9130SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer in the
158f0e9130SKonstantin Belousov  *    documentation and/or other materials provided with the distribution.
168f0e9130SKonstantin Belousov  *
178f0e9130SKonstantin Belousov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
188f0e9130SKonstantin Belousov  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
198f0e9130SKonstantin Belousov  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
208f0e9130SKonstantin Belousov  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
218f0e9130SKonstantin Belousov  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
228f0e9130SKonstantin Belousov  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
238f0e9130SKonstantin Belousov  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
248f0e9130SKonstantin Belousov  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
258f0e9130SKonstantin Belousov  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
268f0e9130SKonstantin Belousov  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
278f0e9130SKonstantin Belousov  */
288f0e9130SKonstantin Belousov 
298f0e9130SKonstantin Belousov #include <sys/param.h>
30c3d8a931SKonstantin Belousov #include <sys/kassert.h>
318f0e9130SKonstantin Belousov #include <sys/kernel.h>
328f0e9130SKonstantin Belousov #include <sys/lock.h>
338f0e9130SKonstantin Belousov #include <sys/mutex.h>
348f0e9130SKonstantin Belousov #include <sys/proc.h>
358f0e9130SKonstantin Belousov #include <sys/rangelock.h>
36c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h>
37c3d8a931SKonstantin Belousov #include <sys/smr.h>
388f0e9130SKonstantin Belousov 
398f0e9130SKonstantin Belousov #include <vm/uma.h>
408f0e9130SKonstantin Belousov 
41c3d8a931SKonstantin Belousov /*
42c3d8a931SKonstantin Belousov  * Implementation of range locks based on the paper
43c3d8a931SKonstantin Belousov  * https://doi.org/10.1145/3342195.3387533
44c3d8a931SKonstantin Belousov  * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020
45c3d8a931SKonstantin Belousov  * Scalable Range Locks for Scalable Address Spaces and Beyond
46c3d8a931SKonstantin Belousov  * by Alex Kogan, Dave Dice, and Shady Issa
47c3d8a931SKonstantin Belousov  */
48c3d8a931SKonstantin Belousov 
49c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e);
50c3d8a931SKonstantin Belousov 
51c3d8a931SKonstantin Belousov /*
52c3d8a931SKonstantin Belousov  * rl_q_next links all granted ranges in the lock.  We cannot free an
53c3d8a931SKonstantin Belousov  * rl_q_entry while in the smr section, and cannot reuse rl_q_next
54c3d8a931SKonstantin Belousov  * linkage since other threads might follow it even after CAS removed
55c3d8a931SKonstantin Belousov  * the range.  Use rl_q_free for local list of ranges to remove after
56c3d8a931SKonstantin Belousov  * the smr section is dropped.
57c3d8a931SKonstantin Belousov  */
588f0e9130SKonstantin Belousov struct rl_q_entry {
59c3d8a931SKonstantin Belousov 	struct rl_q_entry *rl_q_next;
60c3d8a931SKonstantin Belousov 	struct rl_q_entry *rl_q_free;
618f0e9130SKonstantin Belousov 	off_t		rl_q_start, rl_q_end;
628f0e9130SKonstantin Belousov 	int		rl_q_flags;
63c3d8a931SKonstantin Belousov #ifdef INVARIANTS
64c3d8a931SKonstantin Belousov 	struct thread	*rl_q_owner;
65c3d8a931SKonstantin Belousov #endif
668f0e9130SKonstantin Belousov };
678f0e9130SKonstantin Belousov 
688f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone;
69c3d8a931SKonstantin Belousov static smr_t rl_smr;
708f0e9130SKonstantin Belousov 
718f0e9130SKonstantin Belousov static void
728f0e9130SKonstantin Belousov rangelock_sys_init(void)
738f0e9130SKonstantin Belousov {
748f0e9130SKonstantin Belousov 	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
75c3d8a931SKonstantin Belousov 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
76c3d8a931SKonstantin Belousov 	    UMA_ZONE_SMR);
77c3d8a931SKonstantin Belousov 	rl_smr = uma_zone_get_smr(rl_entry_zone);
788f0e9130SKonstantin Belousov }
79c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
808f0e9130SKonstantin Belousov 
818f0e9130SKonstantin Belousov static struct rl_q_entry *
82c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
838f0e9130SKonstantin Belousov {
84c3d8a931SKonstantin Belousov 	struct rl_q_entry *e;
858f0e9130SKonstantin Belousov 
86c3d8a931SKonstantin Belousov 	e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
87c3d8a931SKonstantin Belousov 	e->rl_q_next = NULL;
88c3d8a931SKonstantin Belousov 	e->rl_q_free = NULL;
89c3d8a931SKonstantin Belousov 	e->rl_q_start = start;
90c3d8a931SKonstantin Belousov 	e->rl_q_end = end;
91c3d8a931SKonstantin Belousov 	e->rl_q_flags = flags;
92c3d8a931SKonstantin Belousov #ifdef INVARIANTS
93c3d8a931SKonstantin Belousov 	e->rl_q_owner = curthread;
94c3d8a931SKonstantin Belousov #endif
95c3d8a931SKonstantin Belousov 	return (e);
968f0e9130SKonstantin Belousov }
978f0e9130SKonstantin Belousov 
988f0e9130SKonstantin Belousov void
998f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock)
1008f0e9130SKonstantin Belousov {
101c3d8a931SKonstantin Belousov 	lock->sleepers = false;
102c3d8a931SKonstantin Belousov 	atomic_store_ptr(&lock->head, NULL);
1038f0e9130SKonstantin Belousov }
1048f0e9130SKonstantin Belousov 
1058f0e9130SKonstantin Belousov void
1068f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock)
1078f0e9130SKonstantin Belousov {
108c3d8a931SKonstantin Belousov 	struct rl_q_entry *e, *ep;
1098f0e9130SKonstantin Belousov 
110c3d8a931SKonstantin Belousov 	MPASS(!lock->sleepers);
111c3d8a931SKonstantin Belousov 	for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head);
112c3d8a931SKonstantin Belousov 	    e != NULL; e = rl_e_unmark(ep)) {
113c3d8a931SKonstantin Belousov 		ep = atomic_load_ptr(&e->rl_q_next);
114c3d8a931SKonstantin Belousov 		uma_zfree_smr(rl_entry_zone, e);
115c3d8a931SKonstantin Belousov 	}
1168f0e9130SKonstantin Belousov }
1178f0e9130SKonstantin Belousov 
118c3d8a931SKonstantin Belousov static bool
119c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e)
1208f0e9130SKonstantin Belousov {
121c3d8a931SKonstantin Belousov 	return (((uintptr_t)e & 1) != 0);
1228f0e9130SKonstantin Belousov }
1238f0e9130SKonstantin Belousov 
124c3d8a931SKonstantin Belousov static struct rl_q_entry *
1255badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e)
1265badbeeaSKonstantin Belousov {
1275badbeeaSKonstantin Belousov 	return ((struct rl_q_entry *)((uintptr_t)e & ~1));
1285badbeeaSKonstantin Belousov }
1295badbeeaSKonstantin Belousov 
1305badbeeaSKonstantin Belousov static struct rl_q_entry *
131c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e)
1328f0e9130SKonstantin Belousov {
133c3d8a931SKonstantin Belousov 	MPASS(rl_e_is_marked(e));
1345badbeeaSKonstantin Belousov 	return (rl_e_unmark_unchecked(e));
1355badbeeaSKonstantin Belousov }
1365badbeeaSKonstantin Belousov 
1375badbeeaSKonstantin Belousov static void
1385badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e)
1395badbeeaSKonstantin Belousov {
1405badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__)
1415badbeeaSKonstantin Belousov 	int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0);
1425badbeeaSKonstantin Belousov 	MPASS(r == 0);
1435badbeeaSKonstantin Belousov #else
1445badbeeaSKonstantin Belousov 	atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
1455badbeeaSKonstantin Belousov #endif
1462bb93f2dSColin Percival }
1472bb93f2dSColin Percival 
148c3d8a931SKonstantin Belousov static struct rl_q_entry *
149c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p)
1508f0e9130SKonstantin Belousov {
151c3d8a931SKonstantin Belousov 	return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p));
1528f0e9130SKonstantin Belousov }
1538f0e9130SKonstantin Belousov 
1546c32d89eSKonstantin Belousov static bool
1556c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e)
1566c32d89eSKonstantin Belousov {
1576c32d89eSKonstantin Belousov 	return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ);
1586c32d89eSKonstantin Belousov }
1596c32d89eSKonstantin Belousov 
1605badbeeaSKonstantin Belousov static void
1615badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e)
1628f0e9130SKonstantin Belousov {
163*c3158008SKonstantin Belousov 	bool sleepers;
164*c3158008SKonstantin Belousov 
165c3d8a931SKonstantin Belousov 	MPASS(lock != NULL && e != NULL);
166c3d8a931SKonstantin Belousov 	MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
167c3d8a931SKonstantin Belousov 	MPASS(e->rl_q_owner == curthread);
1688f0e9130SKonstantin Belousov 
1695badbeeaSKonstantin Belousov 	rl_e_mark(e);
170*c3158008SKonstantin Belousov 	sleepers = lock->sleepers;
171c3d8a931SKonstantin Belousov 	lock->sleepers = false;
172*c3158008SKonstantin Belousov 	if (sleepers)
173c3d8a931SKonstantin Belousov 		sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
1745badbeeaSKonstantin Belousov }
1755badbeeaSKonstantin Belousov 
1765badbeeaSKonstantin Belousov void
1775badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie)
1785badbeeaSKonstantin Belousov {
1795badbeeaSKonstantin Belousov 	sleepq_lock(&lock->sleepers);
1805badbeeaSKonstantin Belousov 	rangelock_unlock_int(lock, cookie);
181c3d8a931SKonstantin Belousov 	sleepq_release(&lock->sleepers);
1828f0e9130SKonstantin Belousov }
1838f0e9130SKonstantin Belousov 
1848f0e9130SKonstantin Belousov /*
1855badbeeaSKonstantin Belousov  * result: -1 if e1 before e2, or both locks are readers and e1
1865badbeeaSKonstantin Belousov  *		starts before or at e2
1875badbeeaSKonstantin Belousov  *          1 if e1 after e2, or both locks are readers and e1
1885badbeeaSKonstantin Belousov  *		starts after e2
1895badbeeaSKonstantin Belousov  *          0 if e1 and e2 overlap and at least one lock is writer
1908f0e9130SKonstantin Belousov  */
191c3d8a931SKonstantin Belousov static int
192c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
1938f0e9130SKonstantin Belousov {
1945badbeeaSKonstantin Belousov 	bool rds;
1955badbeeaSKonstantin Belousov 
196c3d8a931SKonstantin Belousov 	if (e1 == NULL)
197c3d8a931SKonstantin Belousov 		return (1);
198c3d8a931SKonstantin Belousov 	if (e2->rl_q_start >= e1->rl_q_end)
199c3d8a931SKonstantin Belousov 		return (-1);
2005badbeeaSKonstantin Belousov 	rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2);
2015badbeeaSKonstantin Belousov 	if (e2->rl_q_start >= e1->rl_q_start && rds)
2025badbeeaSKonstantin Belousov 		return (-1);
2035badbeeaSKonstantin Belousov 	if (e1->rl_q_start >= e2->rl_q_end)
2045badbeeaSKonstantin Belousov 		return (1);
2055badbeeaSKonstantin Belousov 	if (e1->rl_q_start >= e2->rl_q_start && rds)
2065badbeeaSKonstantin Belousov 		return (1);
207c3d8a931SKonstantin Belousov 	return (0);
2088f0e9130SKonstantin Belousov }
2098f0e9130SKonstantin Belousov 
210c3d8a931SKonstantin Belousov static void
211c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock)
2128f0e9130SKonstantin Belousov {
213c3d8a931SKonstantin Belousov 	smr_exit(rl_smr);
214c3d8a931SKonstantin Belousov 	DROP_GIANT();
215c3d8a931SKonstantin Belousov 	lock->sleepers = true;
216c3d8a931SKonstantin Belousov 	sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0);
217c3d8a931SKonstantin Belousov 	sleepq_wait(&lock->sleepers, PRI_USER);
218c3d8a931SKonstantin Belousov 	PICKUP_GIANT();
219c3d8a931SKonstantin Belousov 	smr_enter(rl_smr);
220c3d8a931SKonstantin Belousov }
2218f0e9130SKonstantin Belousov 
222c3d8a931SKonstantin Belousov static bool
223c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
224c3d8a931SKonstantin Belousov     struct rl_q_entry *new)
225c3d8a931SKonstantin Belousov {
226c3d8a931SKonstantin Belousov 	return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old,
227c3d8a931SKonstantin Belousov 	    (uintptr_t)new) != 0);
228c3d8a931SKonstantin Belousov }
2298f0e9130SKonstantin Belousov 
2305badbeeaSKonstantin Belousov enum RL_INSERT_RES {
2315badbeeaSKonstantin Belousov 	RL_TRYLOCK_FAILED,
2325badbeeaSKonstantin Belousov 	RL_LOCK_SUCCESS,
2335badbeeaSKonstantin Belousov 	RL_LOCK_RETRY,
2345badbeeaSKonstantin Belousov };
2355badbeeaSKonstantin Belousov 
2365badbeeaSKonstantin Belousov static enum RL_INSERT_RES
2375badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
2385badbeeaSKonstantin Belousov     struct rl_q_entry **free)
2395badbeeaSKonstantin Belousov {
2405badbeeaSKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
2415badbeeaSKonstantin Belousov 
2425badbeeaSKonstantin Belousov 	prev = &e->rl_q_next;
2435badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
2445badbeeaSKonstantin Belousov 	MPASS(!rl_e_is_marked(cur));	/* nobody can unlock e yet */
2455badbeeaSKonstantin Belousov 	for (;;) {
2465badbeeaSKonstantin Belousov 		if (cur == NULL || cur->rl_q_start > e->rl_q_end)
2475badbeeaSKonstantin Belousov 			return (RL_LOCK_SUCCESS);
2485badbeeaSKonstantin Belousov 		next = rl_q_load(&cur->rl_q_next);
2495badbeeaSKonstantin Belousov 		if (rl_e_is_marked(next)) {
2505badbeeaSKonstantin Belousov 			next = rl_e_unmark(next);
2515badbeeaSKonstantin Belousov 			if (rl_q_cas(prev, cur, next)) {
2525badbeeaSKonstantin Belousov 				cur->rl_q_free = *free;
2535badbeeaSKonstantin Belousov 				*free = cur;
2545badbeeaSKonstantin Belousov 			}
2555badbeeaSKonstantin Belousov 			cur = next;
2565badbeeaSKonstantin Belousov 			continue;
2575badbeeaSKonstantin Belousov 		}
2585badbeeaSKonstantin Belousov 		if (rl_e_is_rlock(cur)) {
2595badbeeaSKonstantin Belousov 			prev = &cur->rl_q_next;
2605badbeeaSKonstantin Belousov 			cur = rl_e_unmark_unchecked(rl_q_load(prev));
2615badbeeaSKonstantin Belousov 			continue;
2625badbeeaSKonstantin Belousov 		}
2635badbeeaSKonstantin Belousov 		if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
2645badbeeaSKonstantin Belousov 			sleepq_lock(&lock->sleepers);
2655badbeeaSKonstantin Belousov 			if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
2665badbeeaSKonstantin Belousov 				sleepq_release(&lock->sleepers);
2675badbeeaSKonstantin Belousov 				continue;
2685badbeeaSKonstantin Belousov 			}
2695badbeeaSKonstantin Belousov 			rangelock_unlock_int(lock, e);
2705badbeeaSKonstantin Belousov 			if (trylock) {
2715badbeeaSKonstantin Belousov 				sleepq_release(&lock->sleepers);
2725badbeeaSKonstantin Belousov 				return (RL_TRYLOCK_FAILED);
2735badbeeaSKonstantin Belousov 			}
2745badbeeaSKonstantin Belousov 			rl_insert_sleep(lock);
2755badbeeaSKonstantin Belousov 			return (RL_LOCK_RETRY);
2765badbeeaSKonstantin Belousov 		}
2775badbeeaSKonstantin Belousov 	}
2785badbeeaSKonstantin Belousov }
2795badbeeaSKonstantin Belousov 
2805badbeeaSKonstantin Belousov static enum RL_INSERT_RES
2815badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e,
2825badbeeaSKonstantin Belousov     bool trylock, struct rl_q_entry **free)
2835badbeeaSKonstantin Belousov {
2845badbeeaSKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
2855badbeeaSKonstantin Belousov 
2865badbeeaSKonstantin Belousov 	prev = &lock->head;
2875badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
2885badbeeaSKonstantin Belousov 	MPASS(!rl_e_is_marked(cur));	/* head is not marked */
2895badbeeaSKonstantin Belousov 	for (;;) {
2905badbeeaSKonstantin Belousov 		if (cur == e)
2915badbeeaSKonstantin Belousov 			return (RL_LOCK_SUCCESS);
2925badbeeaSKonstantin Belousov 		next = rl_q_load(&cur->rl_q_next);
2935badbeeaSKonstantin Belousov 		if (rl_e_is_marked(next)) {
2945badbeeaSKonstantin Belousov 			next = rl_e_unmark(next);
2955badbeeaSKonstantin Belousov 			if (rl_q_cas(prev, cur, next)) {
2965badbeeaSKonstantin Belousov 				cur->rl_q_next = *free;
2975badbeeaSKonstantin Belousov 				*free = cur;
2985badbeeaSKonstantin Belousov 			}
2995badbeeaSKonstantin Belousov 			cur = next;
3005badbeeaSKonstantin Belousov 			continue;
3015badbeeaSKonstantin Belousov 		}
3025badbeeaSKonstantin Belousov 		if (cur->rl_q_end <= e->rl_q_start) {
3035badbeeaSKonstantin Belousov 			prev = &cur->rl_q_next;
3045badbeeaSKonstantin Belousov 			cur = rl_e_unmark_unchecked(rl_q_load(prev));
3055badbeeaSKonstantin Belousov 			continue;
3065badbeeaSKonstantin Belousov 		}
3075badbeeaSKonstantin Belousov 		sleepq_lock(&lock->sleepers);
3085badbeeaSKonstantin Belousov 		rangelock_unlock_int(lock, e);
3095badbeeaSKonstantin Belousov 		if (trylock) {
3105badbeeaSKonstantin Belousov 			sleepq_release(&lock->sleepers);
3115badbeeaSKonstantin Belousov 			return (RL_TRYLOCK_FAILED);
3125badbeeaSKonstantin Belousov 		}
3135badbeeaSKonstantin Belousov 		rl_insert_sleep(lock);
3145badbeeaSKonstantin Belousov 		return (RL_LOCK_RETRY);
3155badbeeaSKonstantin Belousov 	}
3165badbeeaSKonstantin Belousov }
3175badbeeaSKonstantin Belousov 
3185badbeeaSKonstantin Belousov static enum RL_INSERT_RES
319c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
320c3d8a931SKonstantin Belousov     struct rl_q_entry **free)
321c3d8a931SKonstantin Belousov {
322c3d8a931SKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
323c3d8a931SKonstantin Belousov 	int r;
3248f0e9130SKonstantin Belousov 
325c3d8a931SKonstantin Belousov again:
326c3d8a931SKonstantin Belousov 	prev = &lock->head;
3275badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
3285badbeeaSKonstantin Belousov 	if (cur == NULL && rl_q_cas(prev, NULL, e))
3295badbeeaSKonstantin Belousov 		return (RL_LOCK_SUCCESS);
3308f0e9130SKonstantin Belousov 
3315badbeeaSKonstantin Belousov 	for (;;) {
3325badbeeaSKonstantin Belousov 		if (cur != NULL) {
333c3d8a931SKonstantin Belousov 			if (rl_e_is_marked(cur))
334c3d8a931SKonstantin Belousov 				goto again;
335c3d8a931SKonstantin Belousov 
336c3d8a931SKonstantin Belousov 			next = rl_q_load(&cur->rl_q_next);
337c3d8a931SKonstantin Belousov 			if (rl_e_is_marked(next)) {
338c3d8a931SKonstantin Belousov 				next = rl_e_unmark(next);
339c3d8a931SKonstantin Belousov 				if (rl_q_cas(prev, cur, next)) {
340c3d8a931SKonstantin Belousov #ifdef INVARIANTS
341c3d8a931SKonstantin Belousov 					cur->rl_q_owner = NULL;
342c3d8a931SKonstantin Belousov #endif
343c3d8a931SKonstantin Belousov 					cur->rl_q_free = *free;
344c3d8a931SKonstantin Belousov 					*free = cur;
345c3d8a931SKonstantin Belousov 				}
346c3d8a931SKonstantin Belousov 				cur = next;
347c3d8a931SKonstantin Belousov 				continue;
348c3d8a931SKonstantin Belousov 			}
349c3d8a931SKonstantin Belousov 		}
350c3d8a931SKonstantin Belousov 
351c3d8a931SKonstantin Belousov 		r = rl_e_compare(cur, e);
352c3d8a931SKonstantin Belousov 		if (r == -1) {
353c3d8a931SKonstantin Belousov 			prev = &cur->rl_q_next;
354c3d8a931SKonstantin Belousov 			cur = rl_q_load(prev);
355c3d8a931SKonstantin Belousov 		} else if (r == 0) {
356c3d8a931SKonstantin Belousov 			sleepq_lock(&lock->sleepers);
357c3d8a931SKonstantin Belousov 			if (__predict_false(rl_e_is_marked(rl_q_load(
358c3d8a931SKonstantin Belousov 			    &cur->rl_q_next)))) {
359c3d8a931SKonstantin Belousov 				sleepq_release(&lock->sleepers);
360c3d8a931SKonstantin Belousov 				continue;
361c3d8a931SKonstantin Belousov 			}
362e3680954SRick Macklem 			if (trylock) {
363c3d8a931SKonstantin Belousov 				sleepq_release(&lock->sleepers);
3645badbeeaSKonstantin Belousov 				return (RL_TRYLOCK_FAILED);
365e3680954SRick Macklem 			}
366c3d8a931SKonstantin Belousov 			rl_insert_sleep(lock);
367c3d8a931SKonstantin Belousov 			/* e is still valid */
368c3d8a931SKonstantin Belousov 			goto again;
369c3d8a931SKonstantin Belousov 		} else /* r == 1 */ {
370c3d8a931SKonstantin Belousov 			e->rl_q_next = cur;
371c3d8a931SKonstantin Belousov 			if (rl_q_cas(prev, cur, e)) {
372c3d8a931SKonstantin Belousov 				atomic_thread_fence_acq();
3735badbeeaSKonstantin Belousov 				return (rl_e_is_rlock(e) ?
3745badbeeaSKonstantin Belousov 				    rl_r_validate(lock, e, trylock, free) :
3755badbeeaSKonstantin Belousov 				    rl_w_validate(lock, e, trylock, free));
376e3680954SRick Macklem 			}
377c3d8a931SKonstantin Belousov 			/* Reset rl_q_next in case we hit fast path. */
378c3d8a931SKonstantin Belousov 			e->rl_q_next = NULL;
379c3d8a931SKonstantin Belousov 			cur = rl_q_load(prev);
380c3d8a931SKonstantin Belousov 		}
381c3d8a931SKonstantin Belousov 	}
382c3d8a931SKonstantin Belousov }
383c3d8a931SKonstantin Belousov 
384c3d8a931SKonstantin Belousov static struct rl_q_entry *
3855badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
3865badbeeaSKonstantin Belousov     vm_ooffset_t end, int locktype)
387c3d8a931SKonstantin Belousov {
3885badbeeaSKonstantin Belousov 	struct rl_q_entry *e, *free, *x, *xp;
3895badbeeaSKonstantin Belousov 	enum RL_INSERT_RES res;
390c3d8a931SKonstantin Belousov 
3915badbeeaSKonstantin Belousov 	for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
392c3d8a931SKonstantin Belousov 		free = NULL;
3935badbeeaSKonstantin Belousov 		e = rlqentry_alloc(start, end, locktype);
394c3d8a931SKonstantin Belousov 		smr_enter(rl_smr);
395c3d8a931SKonstantin Belousov 		res = rl_insert(lock, e, trylock, &free);
396c3d8a931SKonstantin Belousov 		smr_exit(rl_smr);
3975badbeeaSKonstantin Belousov 		if (res == RL_TRYLOCK_FAILED) {
3985badbeeaSKonstantin Belousov 			MPASS(trylock);
399c3d8a931SKonstantin Belousov 			e->rl_q_free = free;
400c3d8a931SKonstantin Belousov 			free = e;
401c3d8a931SKonstantin Belousov 			e = NULL;
402c3d8a931SKonstantin Belousov 		}
403c3d8a931SKonstantin Belousov 		for (x = free; x != NULL; x = xp) {
404c3d8a931SKonstantin Belousov 		  MPASS(!rl_e_is_marked(x));
405c3d8a931SKonstantin Belousov 		  xp = x->rl_q_free;
406c3d8a931SKonstantin Belousov 		  MPASS(!rl_e_is_marked(xp));
407c3d8a931SKonstantin Belousov 		  uma_zfree_smr(rl_entry_zone, x);
408c3d8a931SKonstantin Belousov 		}
4095badbeeaSKonstantin Belousov 	}
410c3d8a931SKonstantin Belousov 	return (e);
4118f0e9130SKonstantin Belousov }
4128f0e9130SKonstantin Belousov 
4138f0e9130SKonstantin Belousov void *
414c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
4158f0e9130SKonstantin Belousov {
4165badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ));
417e3680954SRick Macklem }
418e3680954SRick Macklem 
419e3680954SRick Macklem void *
420c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
421e3680954SRick Macklem {
4225badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ));
4238f0e9130SKonstantin Belousov }
4248f0e9130SKonstantin Belousov 
4258f0e9130SKonstantin Belousov void *
426c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
4278f0e9130SKonstantin Belousov {
4285badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
429e3680954SRick Macklem }
430e3680954SRick Macklem 
431e3680954SRick Macklem void *
432c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
433e3680954SRick Macklem {
4345badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
4358f0e9130SKonstantin Belousov }
4363155f2f0SKyle Evans 
4373155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT
4383155f2f0SKyle Evans void
4393155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
4403155f2f0SKyle Evans {
4413155f2f0SKyle Evans }
4423155f2f0SKyle Evans #endif	/* INVARIANT_SUPPORT */
443c3d8a931SKonstantin Belousov 
444c3d8a931SKonstantin Belousov #include "opt_ddb.h"
445c3d8a931SKonstantin Belousov #ifdef DDB
446c3d8a931SKonstantin Belousov #include <ddb/ddb.h>
447c3d8a931SKonstantin Belousov 
448c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock)
449c3d8a931SKonstantin Belousov {
450c3d8a931SKonstantin Belousov 	struct rangelock *lock;
451c3d8a931SKonstantin Belousov 	struct rl_q_entry *e, *x;
452c3d8a931SKonstantin Belousov 
453c3d8a931SKonstantin Belousov 	if (!have_addr) {
454c3d8a931SKonstantin Belousov 		db_printf("show rangelock addr\n");
455c3d8a931SKonstantin Belousov 		return;
456c3d8a931SKonstantin Belousov 	}
457c3d8a931SKonstantin Belousov 
458c3d8a931SKonstantin Belousov 	lock = (struct rangelock *)addr;
459c3d8a931SKonstantin Belousov 	db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers);
460c3d8a931SKonstantin Belousov 	for (e = lock->head;;) {
461c3d8a931SKonstantin Belousov 		x = rl_e_is_marked(e) ? rl_e_unmark(e) : e;
462c3d8a931SKonstantin Belousov 		if (x == NULL)
463c3d8a931SKonstantin Belousov 			break;
464c3d8a931SKonstantin Belousov 		db_printf("  entry %p marked %d %d start %#jx end %#jx "
465c3d8a931SKonstantin Belousov 		    "flags %x next %p",
466c3d8a931SKonstantin Belousov 		    e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next),
467c3d8a931SKonstantin Belousov 		    x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next);
468c3d8a931SKonstantin Belousov #ifdef INVARIANTS
469c3d8a931SKonstantin Belousov 		db_printf(" owner %p (%d)", x->rl_q_owner,
470c3d8a931SKonstantin Belousov 		    x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1);
471c3d8a931SKonstantin Belousov #endif
472c3d8a931SKonstantin Belousov 		db_printf("\n");
473c3d8a931SKonstantin Belousov 		e = x->rl_q_next;
474c3d8a931SKonstantin Belousov 	}
475c3d8a931SKonstantin Belousov }
476c3d8a931SKonstantin Belousov 
477c3d8a931SKonstantin Belousov #endif	/* DDB */
478