xref: /freebsd/sys/kern/kern_rangelock.c (revision ff1ae3b3639d39a6485cfc655bf565cd04b9caa6)
18f0e9130SKonstantin Belousov /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
48f0e9130SKonstantin Belousov  * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
58f0e9130SKonstantin Belousov  * All rights reserved.
68f0e9130SKonstantin Belousov  *
78f0e9130SKonstantin Belousov  * Redistribution and use in source and binary forms, with or without
88f0e9130SKonstantin Belousov  * modification, are permitted provided that the following conditions
98f0e9130SKonstantin Belousov  * are met:
108f0e9130SKonstantin Belousov  * 1. Redistributions of source code must retain the above copyright
118f0e9130SKonstantin Belousov  *    notice unmodified, this list of conditions, and the following
128f0e9130SKonstantin Belousov  *    disclaimer.
138f0e9130SKonstantin Belousov  * 2. Redistributions in binary form must reproduce the above copyright
148f0e9130SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer in the
158f0e9130SKonstantin Belousov  *    documentation and/or other materials provided with the distribution.
168f0e9130SKonstantin Belousov  *
178f0e9130SKonstantin Belousov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
188f0e9130SKonstantin Belousov  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
198f0e9130SKonstantin Belousov  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
208f0e9130SKonstantin Belousov  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
218f0e9130SKonstantin Belousov  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
228f0e9130SKonstantin Belousov  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
238f0e9130SKonstantin Belousov  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
248f0e9130SKonstantin Belousov  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
258f0e9130SKonstantin Belousov  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
268f0e9130SKonstantin Belousov  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
278f0e9130SKonstantin Belousov  */
288f0e9130SKonstantin Belousov 
298f0e9130SKonstantin Belousov #include <sys/param.h>
30c3d8a931SKonstantin Belousov #include <sys/kassert.h>
318f0e9130SKonstantin Belousov #include <sys/kernel.h>
328f0e9130SKonstantin Belousov #include <sys/lock.h>
338f0e9130SKonstantin Belousov #include <sys/mutex.h>
348f0e9130SKonstantin Belousov #include <sys/proc.h>
358f0e9130SKonstantin Belousov #include <sys/rangelock.h>
36c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h>
37c3d8a931SKonstantin Belousov #include <sys/smr.h>
388f0e9130SKonstantin Belousov 
398f0e9130SKonstantin Belousov #include <vm/uma.h>
408f0e9130SKonstantin Belousov 
41c3d8a931SKonstantin Belousov /*
42c3d8a931SKonstantin Belousov  * Implementation of range locks based on the paper
43c3d8a931SKonstantin Belousov  * https://doi.org/10.1145/3342195.3387533
44c3d8a931SKonstantin Belousov  * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020
45c3d8a931SKonstantin Belousov  * Scalable Range Locks for Scalable Address Spaces and Beyond
46c3d8a931SKonstantin Belousov  * by Alex Kogan, Dave Dice, and Shady Issa
47c3d8a931SKonstantin Belousov  */
48c3d8a931SKonstantin Belousov 
49c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e);
50c3d8a931SKonstantin Belousov 
51c3d8a931SKonstantin Belousov /*
52c3d8a931SKonstantin Belousov  * rl_q_next links all granted ranges in the lock.  We cannot free an
53c3d8a931SKonstantin Belousov  * rl_q_entry while in the smr section, and cannot reuse rl_q_next
54c3d8a931SKonstantin Belousov  * linkage since other threads might follow it even after CAS removed
55c3d8a931SKonstantin Belousov  * the range.  Use rl_q_free for local list of ranges to remove after
56c3d8a931SKonstantin Belousov  * the smr section is dropped.
57c3d8a931SKonstantin Belousov  */
588f0e9130SKonstantin Belousov struct rl_q_entry {
59c3d8a931SKonstantin Belousov 	struct rl_q_entry *rl_q_next;
60c3d8a931SKonstantin Belousov 	struct rl_q_entry *rl_q_free;
618f0e9130SKonstantin Belousov 	off_t		rl_q_start, rl_q_end;
628f0e9130SKonstantin Belousov 	int		rl_q_flags;
63c3d8a931SKonstantin Belousov #ifdef INVARIANTS
64c3d8a931SKonstantin Belousov 	struct thread	*rl_q_owner;
65c3d8a931SKonstantin Belousov #endif
668f0e9130SKonstantin Belousov };
678f0e9130SKonstantin Belousov 
688f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone;
69c3d8a931SKonstantin Belousov static smr_t rl_smr;
708f0e9130SKonstantin Belousov 
718f0e9130SKonstantin Belousov static void
728f0e9130SKonstantin Belousov rangelock_sys_init(void)
738f0e9130SKonstantin Belousov {
748f0e9130SKonstantin Belousov 	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
75c3d8a931SKonstantin Belousov 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
76c3d8a931SKonstantin Belousov 	    UMA_ZONE_SMR);
77c3d8a931SKonstantin Belousov 	rl_smr = uma_zone_get_smr(rl_entry_zone);
788f0e9130SKonstantin Belousov }
79c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
808f0e9130SKonstantin Belousov 
818f0e9130SKonstantin Belousov static struct rl_q_entry *
82c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
838f0e9130SKonstantin Belousov {
84c3d8a931SKonstantin Belousov 	struct rl_q_entry *e;
85*ff1ae3b3SKonstantin Belousov 	struct thread *td;
868f0e9130SKonstantin Belousov 
87*ff1ae3b3SKonstantin Belousov 	td = curthread;
88*ff1ae3b3SKonstantin Belousov 	if (td->td_rlqe != NULL) {
89*ff1ae3b3SKonstantin Belousov 		e = td->td_rlqe;
90*ff1ae3b3SKonstantin Belousov 		td->td_rlqe = NULL;
91*ff1ae3b3SKonstantin Belousov 	} else {
92c3d8a931SKonstantin Belousov 		e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
93*ff1ae3b3SKonstantin Belousov 	}
94c3d8a931SKonstantin Belousov 	e->rl_q_next = NULL;
95c3d8a931SKonstantin Belousov 	e->rl_q_free = NULL;
96c3d8a931SKonstantin Belousov 	e->rl_q_start = start;
97c3d8a931SKonstantin Belousov 	e->rl_q_end = end;
98c3d8a931SKonstantin Belousov 	e->rl_q_flags = flags;
99c3d8a931SKonstantin Belousov #ifdef INVARIANTS
100c3d8a931SKonstantin Belousov 	e->rl_q_owner = curthread;
101c3d8a931SKonstantin Belousov #endif
102c3d8a931SKonstantin Belousov 	return (e);
1038f0e9130SKonstantin Belousov }
1048f0e9130SKonstantin Belousov 
1058f0e9130SKonstantin Belousov void
106*ff1ae3b3SKonstantin Belousov rangelock_entry_free(struct rl_q_entry *e)
107*ff1ae3b3SKonstantin Belousov {
108*ff1ae3b3SKonstantin Belousov 	uma_zfree_smr(rl_entry_zone, e);
109*ff1ae3b3SKonstantin Belousov }
110*ff1ae3b3SKonstantin Belousov 
111*ff1ae3b3SKonstantin Belousov void
1128f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock)
1138f0e9130SKonstantin Belousov {
114c3d8a931SKonstantin Belousov 	lock->sleepers = false;
115c3d8a931SKonstantin Belousov 	atomic_store_ptr(&lock->head, NULL);
1168f0e9130SKonstantin Belousov }
1178f0e9130SKonstantin Belousov 
1188f0e9130SKonstantin Belousov void
1198f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock)
1208f0e9130SKonstantin Belousov {
121c3d8a931SKonstantin Belousov 	struct rl_q_entry *e, *ep;
122*ff1ae3b3SKonstantin Belousov 	struct thread *td;
1238f0e9130SKonstantin Belousov 
124c3d8a931SKonstantin Belousov 	MPASS(!lock->sleepers);
125c3d8a931SKonstantin Belousov 	for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head);
126c3d8a931SKonstantin Belousov 	    e != NULL; e = rl_e_unmark(ep)) {
127c3d8a931SKonstantin Belousov 		ep = atomic_load_ptr(&e->rl_q_next);
128c3d8a931SKonstantin Belousov 		uma_zfree_smr(rl_entry_zone, e);
129c3d8a931SKonstantin Belousov 	}
1308f0e9130SKonstantin Belousov }
1318f0e9130SKonstantin Belousov 
132c3d8a931SKonstantin Belousov static bool
133c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e)
1348f0e9130SKonstantin Belousov {
135c3d8a931SKonstantin Belousov 	return (((uintptr_t)e & 1) != 0);
1368f0e9130SKonstantin Belousov }
1378f0e9130SKonstantin Belousov 
138c3d8a931SKonstantin Belousov static struct rl_q_entry *
1395badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e)
1405badbeeaSKonstantin Belousov {
1415badbeeaSKonstantin Belousov 	return ((struct rl_q_entry *)((uintptr_t)e & ~1));
1425badbeeaSKonstantin Belousov }
1435badbeeaSKonstantin Belousov 
1445badbeeaSKonstantin Belousov static struct rl_q_entry *
145c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e)
1468f0e9130SKonstantin Belousov {
147c3d8a931SKonstantin Belousov 	MPASS(rl_e_is_marked(e));
1485badbeeaSKonstantin Belousov 	return (rl_e_unmark_unchecked(e));
1495badbeeaSKonstantin Belousov }
1505badbeeaSKonstantin Belousov 
1515badbeeaSKonstantin Belousov static void
1525badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e)
1535badbeeaSKonstantin Belousov {
1545badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__)
1555badbeeaSKonstantin Belousov 	int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0);
1565badbeeaSKonstantin Belousov 	MPASS(r == 0);
1575badbeeaSKonstantin Belousov #else
1585badbeeaSKonstantin Belousov 	atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
1595badbeeaSKonstantin Belousov #endif
1602bb93f2dSColin Percival }
1612bb93f2dSColin Percival 
162c3d8a931SKonstantin Belousov static struct rl_q_entry *
163c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p)
1648f0e9130SKonstantin Belousov {
165c3d8a931SKonstantin Belousov 	return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p));
1668f0e9130SKonstantin Belousov }
1678f0e9130SKonstantin Belousov 
1686c32d89eSKonstantin Belousov static bool
1696c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e)
1706c32d89eSKonstantin Belousov {
1716c32d89eSKonstantin Belousov 	return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ);
1726c32d89eSKonstantin Belousov }
1736c32d89eSKonstantin Belousov 
1745badbeeaSKonstantin Belousov static void
1755badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e)
1768f0e9130SKonstantin Belousov {
177c3158008SKonstantin Belousov 	bool sleepers;
178c3158008SKonstantin Belousov 
179c3d8a931SKonstantin Belousov 	MPASS(lock != NULL && e != NULL);
180c3d8a931SKonstantin Belousov 	MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
181c3d8a931SKonstantin Belousov 	MPASS(e->rl_q_owner == curthread);
1828f0e9130SKonstantin Belousov 
1835badbeeaSKonstantin Belousov 	rl_e_mark(e);
184c3158008SKonstantin Belousov 	sleepers = lock->sleepers;
185c3d8a931SKonstantin Belousov 	lock->sleepers = false;
186c3158008SKonstantin Belousov 	if (sleepers)
187c3d8a931SKonstantin Belousov 		sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
1885badbeeaSKonstantin Belousov }
1895badbeeaSKonstantin Belousov 
1905badbeeaSKonstantin Belousov void
1915badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie)
1925badbeeaSKonstantin Belousov {
1935badbeeaSKonstantin Belousov 	sleepq_lock(&lock->sleepers);
1945badbeeaSKonstantin Belousov 	rangelock_unlock_int(lock, cookie);
195c3d8a931SKonstantin Belousov 	sleepq_release(&lock->sleepers);
1968f0e9130SKonstantin Belousov }
1978f0e9130SKonstantin Belousov 
1988f0e9130SKonstantin Belousov /*
1995badbeeaSKonstantin Belousov  * result: -1 if e1 before e2, or both locks are readers and e1
2005badbeeaSKonstantin Belousov  *		starts before or at e2
2015badbeeaSKonstantin Belousov  *          1 if e1 after e2, or both locks are readers and e1
2025badbeeaSKonstantin Belousov  *		starts after e2
2035badbeeaSKonstantin Belousov  *          0 if e1 and e2 overlap and at least one lock is writer
2048f0e9130SKonstantin Belousov  */
205c3d8a931SKonstantin Belousov static int
206c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
2078f0e9130SKonstantin Belousov {
2085badbeeaSKonstantin Belousov 	bool rds;
2095badbeeaSKonstantin Belousov 
210c3d8a931SKonstantin Belousov 	if (e1 == NULL)
211c3d8a931SKonstantin Belousov 		return (1);
212c3d8a931SKonstantin Belousov 	if (e2->rl_q_start >= e1->rl_q_end)
213c3d8a931SKonstantin Belousov 		return (-1);
2145badbeeaSKonstantin Belousov 	rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2);
2155badbeeaSKonstantin Belousov 	if (e2->rl_q_start >= e1->rl_q_start && rds)
2165badbeeaSKonstantin Belousov 		return (-1);
2175badbeeaSKonstantin Belousov 	if (e1->rl_q_start >= e2->rl_q_end)
2185badbeeaSKonstantin Belousov 		return (1);
2195badbeeaSKonstantin Belousov 	if (e1->rl_q_start >= e2->rl_q_start && rds)
2205badbeeaSKonstantin Belousov 		return (1);
221c3d8a931SKonstantin Belousov 	return (0);
2228f0e9130SKonstantin Belousov }
2238f0e9130SKonstantin Belousov 
224c3d8a931SKonstantin Belousov static void
225c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock)
2268f0e9130SKonstantin Belousov {
227c3d8a931SKonstantin Belousov 	smr_exit(rl_smr);
228c3d8a931SKonstantin Belousov 	DROP_GIANT();
229c3d8a931SKonstantin Belousov 	lock->sleepers = true;
230c3d8a931SKonstantin Belousov 	sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0);
231c3d8a931SKonstantin Belousov 	sleepq_wait(&lock->sleepers, PRI_USER);
232c3d8a931SKonstantin Belousov 	PICKUP_GIANT();
233c3d8a931SKonstantin Belousov 	smr_enter(rl_smr);
234c3d8a931SKonstantin Belousov }
2358f0e9130SKonstantin Belousov 
236c3d8a931SKonstantin Belousov static bool
237c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
238c3d8a931SKonstantin Belousov     struct rl_q_entry *new)
239c3d8a931SKonstantin Belousov {
240c3d8a931SKonstantin Belousov 	return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old,
241c3d8a931SKonstantin Belousov 	    (uintptr_t)new) != 0);
242c3d8a931SKonstantin Belousov }
2438f0e9130SKonstantin Belousov 
2445badbeeaSKonstantin Belousov enum RL_INSERT_RES {
2455badbeeaSKonstantin Belousov 	RL_TRYLOCK_FAILED,
2465badbeeaSKonstantin Belousov 	RL_LOCK_SUCCESS,
2475badbeeaSKonstantin Belousov 	RL_LOCK_RETRY,
2485badbeeaSKonstantin Belousov };
2495badbeeaSKonstantin Belousov 
2505badbeeaSKonstantin Belousov static enum RL_INSERT_RES
2515badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
2525badbeeaSKonstantin Belousov     struct rl_q_entry **free)
2535badbeeaSKonstantin Belousov {
2545badbeeaSKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
2555badbeeaSKonstantin Belousov 
2565badbeeaSKonstantin Belousov 	prev = &e->rl_q_next;
2575badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
2585badbeeaSKonstantin Belousov 	MPASS(!rl_e_is_marked(cur));	/* nobody can unlock e yet */
2595badbeeaSKonstantin Belousov 	for (;;) {
2605badbeeaSKonstantin Belousov 		if (cur == NULL || cur->rl_q_start > e->rl_q_end)
2615badbeeaSKonstantin Belousov 			return (RL_LOCK_SUCCESS);
2625badbeeaSKonstantin Belousov 		next = rl_q_load(&cur->rl_q_next);
2635badbeeaSKonstantin Belousov 		if (rl_e_is_marked(next)) {
2645badbeeaSKonstantin Belousov 			next = rl_e_unmark(next);
2655badbeeaSKonstantin Belousov 			if (rl_q_cas(prev, cur, next)) {
2665badbeeaSKonstantin Belousov 				cur->rl_q_free = *free;
2675badbeeaSKonstantin Belousov 				*free = cur;
2685badbeeaSKonstantin Belousov 			}
2695badbeeaSKonstantin Belousov 			cur = next;
2705badbeeaSKonstantin Belousov 			continue;
2715badbeeaSKonstantin Belousov 		}
2725badbeeaSKonstantin Belousov 		if (rl_e_is_rlock(cur)) {
2735badbeeaSKonstantin Belousov 			prev = &cur->rl_q_next;
2745badbeeaSKonstantin Belousov 			cur = rl_e_unmark_unchecked(rl_q_load(prev));
2755badbeeaSKonstantin Belousov 			continue;
2765badbeeaSKonstantin Belousov 		}
2775badbeeaSKonstantin Belousov 		if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
2785badbeeaSKonstantin Belousov 			sleepq_lock(&lock->sleepers);
2795badbeeaSKonstantin Belousov 			if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
2805badbeeaSKonstantin Belousov 				sleepq_release(&lock->sleepers);
2815badbeeaSKonstantin Belousov 				continue;
2825badbeeaSKonstantin Belousov 			}
2835badbeeaSKonstantin Belousov 			rangelock_unlock_int(lock, e);
2845badbeeaSKonstantin Belousov 			if (trylock) {
2855badbeeaSKonstantin Belousov 				sleepq_release(&lock->sleepers);
2865badbeeaSKonstantin Belousov 				return (RL_TRYLOCK_FAILED);
2875badbeeaSKonstantin Belousov 			}
2885badbeeaSKonstantin Belousov 			rl_insert_sleep(lock);
2895badbeeaSKonstantin Belousov 			return (RL_LOCK_RETRY);
2905badbeeaSKonstantin Belousov 		}
2915badbeeaSKonstantin Belousov 	}
2925badbeeaSKonstantin Belousov }
2935badbeeaSKonstantin Belousov 
2945badbeeaSKonstantin Belousov static enum RL_INSERT_RES
2955badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e,
2965badbeeaSKonstantin Belousov     bool trylock, struct rl_q_entry **free)
2975badbeeaSKonstantin Belousov {
2985badbeeaSKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
2995badbeeaSKonstantin Belousov 
3005badbeeaSKonstantin Belousov 	prev = &lock->head;
3015badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
3025badbeeaSKonstantin Belousov 	MPASS(!rl_e_is_marked(cur));	/* head is not marked */
3035badbeeaSKonstantin Belousov 	for (;;) {
3045badbeeaSKonstantin Belousov 		if (cur == e)
3055badbeeaSKonstantin Belousov 			return (RL_LOCK_SUCCESS);
3065badbeeaSKonstantin Belousov 		next = rl_q_load(&cur->rl_q_next);
3075badbeeaSKonstantin Belousov 		if (rl_e_is_marked(next)) {
3085badbeeaSKonstantin Belousov 			next = rl_e_unmark(next);
3095badbeeaSKonstantin Belousov 			if (rl_q_cas(prev, cur, next)) {
3105badbeeaSKonstantin Belousov 				cur->rl_q_next = *free;
3115badbeeaSKonstantin Belousov 				*free = cur;
3125badbeeaSKonstantin Belousov 			}
3135badbeeaSKonstantin Belousov 			cur = next;
3145badbeeaSKonstantin Belousov 			continue;
3155badbeeaSKonstantin Belousov 		}
3165badbeeaSKonstantin Belousov 		if (cur->rl_q_end <= e->rl_q_start) {
3175badbeeaSKonstantin Belousov 			prev = &cur->rl_q_next;
3185badbeeaSKonstantin Belousov 			cur = rl_e_unmark_unchecked(rl_q_load(prev));
3195badbeeaSKonstantin Belousov 			continue;
3205badbeeaSKonstantin Belousov 		}
3215badbeeaSKonstantin Belousov 		sleepq_lock(&lock->sleepers);
3225badbeeaSKonstantin Belousov 		rangelock_unlock_int(lock, e);
3235badbeeaSKonstantin Belousov 		if (trylock) {
3245badbeeaSKonstantin Belousov 			sleepq_release(&lock->sleepers);
3255badbeeaSKonstantin Belousov 			return (RL_TRYLOCK_FAILED);
3265badbeeaSKonstantin Belousov 		}
3275badbeeaSKonstantin Belousov 		rl_insert_sleep(lock);
3285badbeeaSKonstantin Belousov 		return (RL_LOCK_RETRY);
3295badbeeaSKonstantin Belousov 	}
3305badbeeaSKonstantin Belousov }
3315badbeeaSKonstantin Belousov 
3325badbeeaSKonstantin Belousov static enum RL_INSERT_RES
333c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
334c3d8a931SKonstantin Belousov     struct rl_q_entry **free)
335c3d8a931SKonstantin Belousov {
336c3d8a931SKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
337c3d8a931SKonstantin Belousov 	int r;
3388f0e9130SKonstantin Belousov 
339c3d8a931SKonstantin Belousov again:
340c3d8a931SKonstantin Belousov 	prev = &lock->head;
3415badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
3425badbeeaSKonstantin Belousov 	if (cur == NULL && rl_q_cas(prev, NULL, e))
3435badbeeaSKonstantin Belousov 		return (RL_LOCK_SUCCESS);
3448f0e9130SKonstantin Belousov 
3455badbeeaSKonstantin Belousov 	for (;;) {
3465badbeeaSKonstantin Belousov 		if (cur != NULL) {
347c3d8a931SKonstantin Belousov 			if (rl_e_is_marked(cur))
348c3d8a931SKonstantin Belousov 				goto again;
349c3d8a931SKonstantin Belousov 
350c3d8a931SKonstantin Belousov 			next = rl_q_load(&cur->rl_q_next);
351c3d8a931SKonstantin Belousov 			if (rl_e_is_marked(next)) {
352c3d8a931SKonstantin Belousov 				next = rl_e_unmark(next);
353c3d8a931SKonstantin Belousov 				if (rl_q_cas(prev, cur, next)) {
354c3d8a931SKonstantin Belousov #ifdef INVARIANTS
355c3d8a931SKonstantin Belousov 					cur->rl_q_owner = NULL;
356c3d8a931SKonstantin Belousov #endif
357c3d8a931SKonstantin Belousov 					cur->rl_q_free = *free;
358c3d8a931SKonstantin Belousov 					*free = cur;
359c3d8a931SKonstantin Belousov 				}
360c3d8a931SKonstantin Belousov 				cur = next;
361c3d8a931SKonstantin Belousov 				continue;
362c3d8a931SKonstantin Belousov 			}
363c3d8a931SKonstantin Belousov 		}
364c3d8a931SKonstantin Belousov 
365c3d8a931SKonstantin Belousov 		r = rl_e_compare(cur, e);
366c3d8a931SKonstantin Belousov 		if (r == -1) {
367c3d8a931SKonstantin Belousov 			prev = &cur->rl_q_next;
368c3d8a931SKonstantin Belousov 			cur = rl_q_load(prev);
369c3d8a931SKonstantin Belousov 		} else if (r == 0) {
370c3d8a931SKonstantin Belousov 			sleepq_lock(&lock->sleepers);
371c3d8a931SKonstantin Belousov 			if (__predict_false(rl_e_is_marked(rl_q_load(
372c3d8a931SKonstantin Belousov 			    &cur->rl_q_next)))) {
373c3d8a931SKonstantin Belousov 				sleepq_release(&lock->sleepers);
374c3d8a931SKonstantin Belousov 				continue;
375c3d8a931SKonstantin Belousov 			}
376e3680954SRick Macklem 			if (trylock) {
377c3d8a931SKonstantin Belousov 				sleepq_release(&lock->sleepers);
3785badbeeaSKonstantin Belousov 				return (RL_TRYLOCK_FAILED);
379e3680954SRick Macklem 			}
380c3d8a931SKonstantin Belousov 			rl_insert_sleep(lock);
381c3d8a931SKonstantin Belousov 			/* e is still valid */
382c3d8a931SKonstantin Belousov 			goto again;
383c3d8a931SKonstantin Belousov 		} else /* r == 1 */ {
384c3d8a931SKonstantin Belousov 			e->rl_q_next = cur;
385c3d8a931SKonstantin Belousov 			if (rl_q_cas(prev, cur, e)) {
386c3d8a931SKonstantin Belousov 				atomic_thread_fence_acq();
3875badbeeaSKonstantin Belousov 				return (rl_e_is_rlock(e) ?
3885badbeeaSKonstantin Belousov 				    rl_r_validate(lock, e, trylock, free) :
3895badbeeaSKonstantin Belousov 				    rl_w_validate(lock, e, trylock, free));
390e3680954SRick Macklem 			}
391c3d8a931SKonstantin Belousov 			/* Reset rl_q_next in case we hit fast path. */
392c3d8a931SKonstantin Belousov 			e->rl_q_next = NULL;
393c3d8a931SKonstantin Belousov 			cur = rl_q_load(prev);
394c3d8a931SKonstantin Belousov 		}
395c3d8a931SKonstantin Belousov 	}
396c3d8a931SKonstantin Belousov }
397c3d8a931SKonstantin Belousov 
398c3d8a931SKonstantin Belousov static struct rl_q_entry *
3995badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
4005badbeeaSKonstantin Belousov     vm_ooffset_t end, int locktype)
401c3d8a931SKonstantin Belousov {
4025badbeeaSKonstantin Belousov 	struct rl_q_entry *e, *free, *x, *xp;
403*ff1ae3b3SKonstantin Belousov 	struct thread *td;
4045badbeeaSKonstantin Belousov 	enum RL_INSERT_RES res;
405c3d8a931SKonstantin Belousov 
406*ff1ae3b3SKonstantin Belousov 	td = curthread;
4075badbeeaSKonstantin Belousov 	for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
408c3d8a931SKonstantin Belousov 		free = NULL;
4095badbeeaSKonstantin Belousov 		e = rlqentry_alloc(start, end, locktype);
410c3d8a931SKonstantin Belousov 		smr_enter(rl_smr);
411c3d8a931SKonstantin Belousov 		res = rl_insert(lock, e, trylock, &free);
412c3d8a931SKonstantin Belousov 		smr_exit(rl_smr);
4135badbeeaSKonstantin Belousov 		if (res == RL_TRYLOCK_FAILED) {
4145badbeeaSKonstantin Belousov 			MPASS(trylock);
415c3d8a931SKonstantin Belousov 			e->rl_q_free = free;
416c3d8a931SKonstantin Belousov 			free = e;
417c3d8a931SKonstantin Belousov 			e = NULL;
418c3d8a931SKonstantin Belousov 		}
419c3d8a931SKonstantin Belousov 		for (x = free; x != NULL; x = xp) {
420c3d8a931SKonstantin Belousov 			MPASS(!rl_e_is_marked(x));
421c3d8a931SKonstantin Belousov 			xp = x->rl_q_free;
422c3d8a931SKonstantin Belousov 			MPASS(!rl_e_is_marked(xp));
423*ff1ae3b3SKonstantin Belousov 			if (td->td_rlqe == NULL) {
424*ff1ae3b3SKonstantin Belousov 				smr_synchronize(rl_smr);
425*ff1ae3b3SKonstantin Belousov 				td->td_rlqe = x;
426*ff1ae3b3SKonstantin Belousov 			} else {
427c3d8a931SKonstantin Belousov 				uma_zfree_smr(rl_entry_zone, x);
428c3d8a931SKonstantin Belousov 			}
4295badbeeaSKonstantin Belousov 		}
430*ff1ae3b3SKonstantin Belousov 	}
431c3d8a931SKonstantin Belousov 	return (e);
4328f0e9130SKonstantin Belousov }
4338f0e9130SKonstantin Belousov 
4348f0e9130SKonstantin Belousov void *
435c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
4368f0e9130SKonstantin Belousov {
4375badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ));
438e3680954SRick Macklem }
439e3680954SRick Macklem 
440e3680954SRick Macklem void *
441c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
442e3680954SRick Macklem {
4435badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ));
4448f0e9130SKonstantin Belousov }
4458f0e9130SKonstantin Belousov 
4468f0e9130SKonstantin Belousov void *
447c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
4488f0e9130SKonstantin Belousov {
4495badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
450e3680954SRick Macklem }
451e3680954SRick Macklem 
452e3680954SRick Macklem void *
453c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
454e3680954SRick Macklem {
4555badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
4568f0e9130SKonstantin Belousov }
4573155f2f0SKyle Evans 
4583155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT
4593155f2f0SKyle Evans void
4603155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
4613155f2f0SKyle Evans {
4623155f2f0SKyle Evans }
4633155f2f0SKyle Evans #endif	/* INVARIANT_SUPPORT */
464c3d8a931SKonstantin Belousov 
465c3d8a931SKonstantin Belousov #include "opt_ddb.h"
466c3d8a931SKonstantin Belousov #ifdef DDB
467c3d8a931SKonstantin Belousov #include <ddb/ddb.h>
468c3d8a931SKonstantin Belousov 
469c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock)
470c3d8a931SKonstantin Belousov {
471c3d8a931SKonstantin Belousov 	struct rangelock *lock;
472c3d8a931SKonstantin Belousov 	struct rl_q_entry *e, *x;
473c3d8a931SKonstantin Belousov 
474c3d8a931SKonstantin Belousov 	if (!have_addr) {
475c3d8a931SKonstantin Belousov 		db_printf("show rangelock addr\n");
476c3d8a931SKonstantin Belousov 		return;
477c3d8a931SKonstantin Belousov 	}
478c3d8a931SKonstantin Belousov 
479c3d8a931SKonstantin Belousov 	lock = (struct rangelock *)addr;
480c3d8a931SKonstantin Belousov 	db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers);
481c3d8a931SKonstantin Belousov 	for (e = lock->head;;) {
482c3d8a931SKonstantin Belousov 		x = rl_e_is_marked(e) ? rl_e_unmark(e) : e;
483c3d8a931SKonstantin Belousov 		if (x == NULL)
484c3d8a931SKonstantin Belousov 			break;
485c3d8a931SKonstantin Belousov 		db_printf("  entry %p marked %d %d start %#jx end %#jx "
486c3d8a931SKonstantin Belousov 		    "flags %x next %p",
487c3d8a931SKonstantin Belousov 		    e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next),
488c3d8a931SKonstantin Belousov 		    x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next);
489c3d8a931SKonstantin Belousov #ifdef INVARIANTS
490c3d8a931SKonstantin Belousov 		db_printf(" owner %p (%d)", x->rl_q_owner,
491c3d8a931SKonstantin Belousov 		    x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1);
492c3d8a931SKonstantin Belousov #endif
493c3d8a931SKonstantin Belousov 		db_printf("\n");
494c3d8a931SKonstantin Belousov 		e = x->rl_q_next;
495c3d8a931SKonstantin Belousov 	}
496c3d8a931SKonstantin Belousov }
497c3d8a931SKonstantin Belousov 
498c3d8a931SKonstantin Belousov #endif	/* DDB */
499