xref: /freebsd/sys/kern/kern_rangelock.c (revision a3f10d0882e1aebef27698f1e0f94ffadade5935)
18f0e9130SKonstantin Belousov /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
48f0e9130SKonstantin Belousov  * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
5d8a16b6aSKonstantin Belousov  * Copyright (c) 2023 The FreeBSD Foundation
6d8a16b6aSKonstantin Belousov  *
7d8a16b6aSKonstantin Belousov  * Portions of this software were developed by
8d8a16b6aSKonstantin Belousov  * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9d8a16b6aSKonstantin Belousov  * the FreeBSD Foundation.
108f0e9130SKonstantin Belousov  *
118f0e9130SKonstantin Belousov  * Redistribution and use in source and binary forms, with or without
128f0e9130SKonstantin Belousov  * modification, are permitted provided that the following conditions
138f0e9130SKonstantin Belousov  * are met:
148f0e9130SKonstantin Belousov  * 1. Redistributions of source code must retain the above copyright
158f0e9130SKonstantin Belousov  *    notice unmodified, this list of conditions, and the following
168f0e9130SKonstantin Belousov  *    disclaimer.
178f0e9130SKonstantin Belousov  * 2. Redistributions in binary form must reproduce the above copyright
188f0e9130SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer in the
198f0e9130SKonstantin Belousov  *    documentation and/or other materials provided with the distribution.
208f0e9130SKonstantin Belousov  *
218f0e9130SKonstantin Belousov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
228f0e9130SKonstantin Belousov  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
238f0e9130SKonstantin Belousov  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
248f0e9130SKonstantin Belousov  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
258f0e9130SKonstantin Belousov  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
268f0e9130SKonstantin Belousov  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
278f0e9130SKonstantin Belousov  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
288f0e9130SKonstantin Belousov  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
298f0e9130SKonstantin Belousov  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
308f0e9130SKonstantin Belousov  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
318f0e9130SKonstantin Belousov  */
328f0e9130SKonstantin Belousov 
338f0e9130SKonstantin Belousov #include <sys/param.h>
34c3d8a931SKonstantin Belousov #include <sys/kassert.h>
358f0e9130SKonstantin Belousov #include <sys/kernel.h>
368f0e9130SKonstantin Belousov #include <sys/lock.h>
378f0e9130SKonstantin Belousov #include <sys/mutex.h>
388f0e9130SKonstantin Belousov #include <sys/proc.h>
398f0e9130SKonstantin Belousov #include <sys/rangelock.h>
40c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h>
41c3d8a931SKonstantin Belousov #include <sys/smr.h>
429ef425e5SKonstantin Belousov #include <sys/sysctl.h>
438f0e9130SKonstantin Belousov 
448f0e9130SKonstantin Belousov #include <vm/uma.h>
458f0e9130SKonstantin Belousov 
46c3d8a931SKonstantin Belousov /*
479ef425e5SKonstantin Belousov  * Immediately after initialization (subject to 'rangelock_cheat'
489ef425e5SKonstantin Belousov  * below), and until a request comes that conflicts with granted ones
499ef425e5SKonstantin Belousov  * based on type, rangelocks serve requests in the "cheating" mode.
509ef425e5SKonstantin Belousov  * In this mode, a rangelock behaves like a sxlock, as if each request
519ef425e5SKonstantin Belousov  * covered the whole range of the protected object.  On receiving a
529ef425e5SKonstantin Belousov  * conflicting request (any request while a write request is
539ef425e5SKonstantin Belousov  * effective, or any write request while some read ones are
549ef425e5SKonstantin Belousov  * effective), all requests granted in "cheating" mode are drained,
559ef425e5SKonstantin Belousov  * and the rangelock then switches to effectively keeping track of the
569ef425e5SKonstantin Belousov  * precise range of each new request.
579ef425e5SKonstantin Belousov  *
589ef425e5SKonstantin Belousov  * Normal sx implementation is not used to not bloat structures (most
599ef425e5SKonstantin Belousov  * important, vnodes) which embeds rangelocks.
609ef425e5SKonstantin Belousov  *
619ef425e5SKonstantin Belousov  * The cheating greatly helps very common pattern where file is first
629ef425e5SKonstantin Belousov  * written single-threaded, and then read by many processes.
639ef425e5SKonstantin Belousov  *
649ef425e5SKonstantin Belousov  * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the
659ef425e5SKonstantin Belousov  * lock->head.  Special cookies are returned in this mode, and
669ef425e5SKonstantin Belousov  * trylocks are same as normal locks but do not drain.
679ef425e5SKonstantin Belousov  */
689ef425e5SKonstantin Belousov 
699ef425e5SKonstantin Belousov static int rangelock_cheat = 1;
709ef425e5SKonstantin Belousov SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN,
719ef425e5SKonstantin Belousov     &rangelock_cheat, 0,
729ef425e5SKonstantin Belousov     "");
739ef425e5SKonstantin Belousov 
749ef425e5SKonstantin Belousov #define	RL_CHEAT_MASK		0x7
759ef425e5SKonstantin Belousov #define	RL_CHEAT_CHEATING	0x1
769ef425e5SKonstantin Belousov /* #define	RL_CHEAT_RLOCKED	0x0 */
779ef425e5SKonstantin Belousov #define	RL_CHEAT_WLOCKED	0x2
789ef425e5SKonstantin Belousov #define	RL_CHEAT_DRAINING	0x4
799ef425e5SKonstantin Belousov 
809ef425e5SKonstantin Belousov #define	RL_CHEAT_READER		0x8
819ef425e5SKonstantin Belousov 
829ef425e5SKonstantin Belousov #define	RL_RET_CHEAT_RLOCKED	0x1100
839ef425e5SKonstantin Belousov #define	RL_RET_CHEAT_WLOCKED	0x2200
849ef425e5SKonstantin Belousov 
859ef425e5SKonstantin Belousov static bool
869ef425e5SKonstantin Belousov rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock,
879ef425e5SKonstantin Belousov     void **cookie)
889ef425e5SKonstantin Belousov {
899ef425e5SKonstantin Belousov 	uintptr_t v, x;
909ef425e5SKonstantin Belousov 
919ef425e5SKonstantin Belousov 	v = (uintptr_t)atomic_load_ptr(&lock->head);
929ef425e5SKonstantin Belousov 	if ((v & RL_CHEAT_CHEATING) == 0)
939ef425e5SKonstantin Belousov 		return (false);
949ef425e5SKonstantin Belousov 	if ((v & RL_CHEAT_DRAINING) != 0) {
959ef425e5SKonstantin Belousov drain:
969ef425e5SKonstantin Belousov 		if (trylock) {
979ef425e5SKonstantin Belousov 			*cookie = NULL;
989ef425e5SKonstantin Belousov 			return (true);
999ef425e5SKonstantin Belousov 		}
1009ef425e5SKonstantin Belousov 		sleepq_lock(&lock->head);
1019ef425e5SKonstantin Belousov drain1:
1029ef425e5SKonstantin Belousov 		DROP_GIANT();
1039ef425e5SKonstantin Belousov 		for (;;) {
1049ef425e5SKonstantin Belousov 			v = (uintptr_t)atomic_load_ptr(&lock->head);
1059ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_DRAINING) == 0)
1069ef425e5SKonstantin Belousov 				break;
1079ef425e5SKonstantin Belousov 			sleepq_add(&lock->head, NULL, "ranged1", 0, 0);
1089ef425e5SKonstantin Belousov 			sleepq_wait(&lock->head, PRI_USER);
1099ef425e5SKonstantin Belousov 			sleepq_lock(&lock->head);
1109ef425e5SKonstantin Belousov 		}
1119ef425e5SKonstantin Belousov 		sleepq_release(&lock->head);
1129ef425e5SKonstantin Belousov 		PICKUP_GIANT();
1139ef425e5SKonstantin Belousov 		return (false);
1149ef425e5SKonstantin Belousov 	}
1159ef425e5SKonstantin Belousov 
1169ef425e5SKonstantin Belousov 	switch (locktype) {
1179ef425e5SKonstantin Belousov 	case RL_LOCK_READ:
1189ef425e5SKonstantin Belousov 		for (;;) {
1199ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_WLOCKED) != 0) {
1209ef425e5SKonstantin Belousov 				if (trylock) {
1219ef425e5SKonstantin Belousov 					*cookie = NULL;
1229ef425e5SKonstantin Belousov 					return (true);
1239ef425e5SKonstantin Belousov 				}
1249ef425e5SKonstantin Belousov 				x = v | RL_CHEAT_DRAINING;
1259ef425e5SKonstantin Belousov 				sleepq_lock(&lock->head);
1269ef425e5SKonstantin Belousov 				if (atomic_fcmpset_rel_ptr(&lock->head, &v,
1279ef425e5SKonstantin Belousov 				    x) != 0)
1289ef425e5SKonstantin Belousov 					goto drain1;
1299ef425e5SKonstantin Belousov 				sleepq_release(&lock->head);
1309ef425e5SKonstantin Belousov 				/* Possibly forgive passed conflict */
13157cc80e6SKonstantin Belousov 			} else {
1329ef425e5SKonstantin Belousov 				x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER;
1339ef425e5SKonstantin Belousov 				x |= RL_CHEAT_CHEATING;
13457cc80e6SKonstantin Belousov 				if (atomic_fcmpset_acq_ptr(&lock->head, &v,
13557cc80e6SKonstantin Belousov 				    x) != 0)
1369ef425e5SKonstantin Belousov 					break;
13757cc80e6SKonstantin Belousov 			}
1389ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_CHEATING) == 0)
1399ef425e5SKonstantin Belousov 				return (false);
1409ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_DRAINING) != 0)
1419ef425e5SKonstantin Belousov 				goto drain;
1429ef425e5SKonstantin Belousov 		}
1439ef425e5SKonstantin Belousov 		*(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED;
1449ef425e5SKonstantin Belousov 		break;
1459ef425e5SKonstantin Belousov 	case RL_LOCK_WRITE:
1469ef425e5SKonstantin Belousov 		for (;;) {
1479ef425e5SKonstantin Belousov 			if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER ||
1489ef425e5SKonstantin Belousov 			    (v & RL_CHEAT_WLOCKED) != 0) {
1499ef425e5SKonstantin Belousov 				if (trylock) {
1509ef425e5SKonstantin Belousov 					*cookie = NULL;
1519ef425e5SKonstantin Belousov 					return (true);
1529ef425e5SKonstantin Belousov 				}
1539ef425e5SKonstantin Belousov 				x = v | RL_CHEAT_DRAINING;
1549ef425e5SKonstantin Belousov 				sleepq_lock(&lock->head);
1559ef425e5SKonstantin Belousov 				if (atomic_fcmpset_rel_ptr(&lock->head, &v,
1569ef425e5SKonstantin Belousov 				    x) != 0)
1579ef425e5SKonstantin Belousov 					goto drain1;
1589ef425e5SKonstantin Belousov 				sleepq_release(&lock->head);
1599ef425e5SKonstantin Belousov 				/* Possibly forgive passed conflict */
16057cc80e6SKonstantin Belousov 			} else {
1619ef425e5SKonstantin Belousov 				x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING;
16257cc80e6SKonstantin Belousov 				if (atomic_fcmpset_acq_ptr(&lock->head, &v,
16357cc80e6SKonstantin Belousov 				    x) != 0)
1649ef425e5SKonstantin Belousov 					break;
16557cc80e6SKonstantin Belousov 			}
1669ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_CHEATING) == 0)
1679ef425e5SKonstantin Belousov 				return (false);
1689ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_DRAINING) != 0)
1699ef425e5SKonstantin Belousov 				goto drain;
1709ef425e5SKonstantin Belousov 		}
1719ef425e5SKonstantin Belousov 		*(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED;
1729ef425e5SKonstantin Belousov 		break;
1739ef425e5SKonstantin Belousov 	default:
1749ef425e5SKonstantin Belousov 		__assert_unreachable();
1759ef425e5SKonstantin Belousov 		break;
1769ef425e5SKonstantin Belousov 	}
1779ef425e5SKonstantin Belousov 	return (true);
1789ef425e5SKonstantin Belousov }
1799ef425e5SKonstantin Belousov 
1809ef425e5SKonstantin Belousov static bool
1819ef425e5SKonstantin Belousov rangelock_cheat_unlock(struct rangelock *lock, void *cookie)
1829ef425e5SKonstantin Belousov {
1839ef425e5SKonstantin Belousov 	uintptr_t v, x;
1849ef425e5SKonstantin Belousov 
1859ef425e5SKonstantin Belousov 	v = (uintptr_t)atomic_load_ptr(&lock->head);
1869ef425e5SKonstantin Belousov 	if ((v & RL_CHEAT_CHEATING) == 0)
1879ef425e5SKonstantin Belousov 		return (false);
1889ef425e5SKonstantin Belousov 
1899ef425e5SKonstantin Belousov 	MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED ||
1909ef425e5SKonstantin Belousov 	    (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED);
1919ef425e5SKonstantin Belousov 
1929ef425e5SKonstantin Belousov 	switch ((uintptr_t)cookie) {
1939ef425e5SKonstantin Belousov 	case RL_RET_CHEAT_RLOCKED:
1949ef425e5SKonstantin Belousov 		for (;;) {
1959ef425e5SKonstantin Belousov 			MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER);
1969ef425e5SKonstantin Belousov 			MPASS((v & RL_CHEAT_WLOCKED) == 0);
1979ef425e5SKonstantin Belousov 			x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER;
1989ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_DRAINING) != 0) {
1999ef425e5SKonstantin Belousov 				if (x != 0) {
2009ef425e5SKonstantin Belousov 					x |= RL_CHEAT_DRAINING |
2019ef425e5SKonstantin Belousov 					    RL_CHEAT_CHEATING;
2029ef425e5SKonstantin Belousov 					if (atomic_fcmpset_rel_ptr(&lock->head,
2039ef425e5SKonstantin Belousov 					    &v, x) != 0)
2049ef425e5SKonstantin Belousov 						break;
2059ef425e5SKonstantin Belousov 				} else {
2069ef425e5SKonstantin Belousov 					sleepq_lock(&lock->head);
2079ef425e5SKonstantin Belousov 					if (atomic_fcmpset_rel_ptr(&lock->head,
2089ef425e5SKonstantin Belousov 					    &v, x) != 0) {
2099ef425e5SKonstantin Belousov 						sleepq_broadcast(
2109ef425e5SKonstantin Belousov 						    &lock->head,
2119ef425e5SKonstantin Belousov 						    SLEEPQ_SLEEP, 0, 0);
2129ef425e5SKonstantin Belousov 						sleepq_release(&lock->head);
2139ef425e5SKonstantin Belousov 						break;
2149ef425e5SKonstantin Belousov 					}
2159ef425e5SKonstantin Belousov 					sleepq_release(&lock->head);
2169ef425e5SKonstantin Belousov 				}
2179ef425e5SKonstantin Belousov 			} else {
2189ef425e5SKonstantin Belousov 				x |= RL_CHEAT_CHEATING;
2199ef425e5SKonstantin Belousov 				if (atomic_fcmpset_rel_ptr(&lock->head, &v,
2209ef425e5SKonstantin Belousov 				    x) != 0)
2219ef425e5SKonstantin Belousov 					break;
2229ef425e5SKonstantin Belousov 			}
2239ef425e5SKonstantin Belousov 		}
2249ef425e5SKonstantin Belousov 		break;
2259ef425e5SKonstantin Belousov 	case RL_RET_CHEAT_WLOCKED:
2269ef425e5SKonstantin Belousov 		for (;;) {
2279ef425e5SKonstantin Belousov 			MPASS((v & RL_CHEAT_WLOCKED) != 0);
2289ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_DRAINING) != 0) {
2299ef425e5SKonstantin Belousov 				sleepq_lock(&lock->head);
2309ef425e5SKonstantin Belousov 				atomic_store_ptr(&lock->head, 0);
2319ef425e5SKonstantin Belousov 				sleepq_broadcast(&lock->head,
2329ef425e5SKonstantin Belousov 				    SLEEPQ_SLEEP, 0, 0);
2339ef425e5SKonstantin Belousov 				sleepq_release(&lock->head);
2349ef425e5SKonstantin Belousov 				break;
2359ef425e5SKonstantin Belousov 			} else {
2369ef425e5SKonstantin Belousov 				if (atomic_fcmpset_ptr(&lock->head, &v,
2379ef425e5SKonstantin Belousov 				    RL_CHEAT_CHEATING) != 0)
2389ef425e5SKonstantin Belousov 					break;
2399ef425e5SKonstantin Belousov 			}
2409ef425e5SKonstantin Belousov 		}
2419ef425e5SKonstantin Belousov 		break;
2429ef425e5SKonstantin Belousov 	default:
2439ef425e5SKonstantin Belousov 		__assert_unreachable();
2449ef425e5SKonstantin Belousov 		break;
2459ef425e5SKonstantin Belousov 	}
2469ef425e5SKonstantin Belousov 	return (true);
2479ef425e5SKonstantin Belousov }
2489ef425e5SKonstantin Belousov 
2499ef425e5SKonstantin Belousov static bool
2509ef425e5SKonstantin Belousov rangelock_cheat_destroy(struct rangelock *lock)
2519ef425e5SKonstantin Belousov {
2529ef425e5SKonstantin Belousov 	uintptr_t v;
2539ef425e5SKonstantin Belousov 
2549ef425e5SKonstantin Belousov 	v = (uintptr_t)atomic_load_ptr(&lock->head);
2559ef425e5SKonstantin Belousov 	if ((v & RL_CHEAT_CHEATING) == 0)
2569ef425e5SKonstantin Belousov 		return (false);
2579ef425e5SKonstantin Belousov 	MPASS(v == RL_CHEAT_CHEATING);
2589ef425e5SKonstantin Belousov 	return (true);
2599ef425e5SKonstantin Belousov }
2609ef425e5SKonstantin Belousov 
2619ef425e5SKonstantin Belousov /*
262c3d8a931SKonstantin Belousov  * Implementation of range locks based on the paper
263c3d8a931SKonstantin Belousov  * https://doi.org/10.1145/3342195.3387533
264c3d8a931SKonstantin Belousov  * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020
265c3d8a931SKonstantin Belousov  * Scalable Range Locks for Scalable Address Spaces and Beyond
266c3d8a931SKonstantin Belousov  * by Alex Kogan, Dave Dice, and Shady Issa
267c3d8a931SKonstantin Belousov  */
268c3d8a931SKonstantin Belousov 
269c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e);
270c3d8a931SKonstantin Belousov 
271c3d8a931SKonstantin Belousov /*
272c3d8a931SKonstantin Belousov  * rl_q_next links all granted ranges in the lock.  We cannot free an
273c3d8a931SKonstantin Belousov  * rl_q_entry while in the smr section, and cannot reuse rl_q_next
274c3d8a931SKonstantin Belousov  * linkage since other threads might follow it even after CAS removed
275c3d8a931SKonstantin Belousov  * the range.  Use rl_q_free for local list of ranges to remove after
276c3d8a931SKonstantin Belousov  * the smr section is dropped.
277c3d8a931SKonstantin Belousov  */
2788f0e9130SKonstantin Belousov struct rl_q_entry {
279c3d8a931SKonstantin Belousov 	struct rl_q_entry *rl_q_next;
280c3d8a931SKonstantin Belousov 	struct rl_q_entry *rl_q_free;
2818f0e9130SKonstantin Belousov 	off_t		rl_q_start, rl_q_end;
2828f0e9130SKonstantin Belousov 	int		rl_q_flags;
283c3d8a931SKonstantin Belousov #ifdef INVARIANTS
284c3d8a931SKonstantin Belousov 	struct thread	*rl_q_owner;
285c3d8a931SKonstantin Belousov #endif
2868f0e9130SKonstantin Belousov };
2878f0e9130SKonstantin Belousov 
2888f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone;
289c3d8a931SKonstantin Belousov static smr_t rl_smr;
2908f0e9130SKonstantin Belousov 
291*a3f10d08SKonstantin Belousov static void rangelock_free_free(struct rl_q_entry *free);
292*a3f10d08SKonstantin Belousov 
2938f0e9130SKonstantin Belousov static void
2948f0e9130SKonstantin Belousov rangelock_sys_init(void)
2958f0e9130SKonstantin Belousov {
2968f0e9130SKonstantin Belousov 	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
297c3d8a931SKonstantin Belousov 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
298c3d8a931SKonstantin Belousov 	    UMA_ZONE_SMR);
299c3d8a931SKonstantin Belousov 	rl_smr = uma_zone_get_smr(rl_entry_zone);
3008f0e9130SKonstantin Belousov }
301c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
3028f0e9130SKonstantin Belousov 
3038f0e9130SKonstantin Belousov static struct rl_q_entry *
304c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
3058f0e9130SKonstantin Belousov {
306c3d8a931SKonstantin Belousov 	struct rl_q_entry *e;
307ff1ae3b3SKonstantin Belousov 	struct thread *td;
3088f0e9130SKonstantin Belousov 
309ff1ae3b3SKonstantin Belousov 	td = curthread;
310ff1ae3b3SKonstantin Belousov 	if (td->td_rlqe != NULL) {
311ff1ae3b3SKonstantin Belousov 		e = td->td_rlqe;
312ff1ae3b3SKonstantin Belousov 		td->td_rlqe = NULL;
313ff1ae3b3SKonstantin Belousov 	} else {
314c3d8a931SKonstantin Belousov 		e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
315ff1ae3b3SKonstantin Belousov 	}
316c3d8a931SKonstantin Belousov 	e->rl_q_next = NULL;
317c3d8a931SKonstantin Belousov 	e->rl_q_free = NULL;
318c3d8a931SKonstantin Belousov 	e->rl_q_start = start;
319c3d8a931SKonstantin Belousov 	e->rl_q_end = end;
320c3d8a931SKonstantin Belousov 	e->rl_q_flags = flags;
321c3d8a931SKonstantin Belousov #ifdef INVARIANTS
322c3d8a931SKonstantin Belousov 	e->rl_q_owner = curthread;
323c3d8a931SKonstantin Belousov #endif
324c3d8a931SKonstantin Belousov 	return (e);
3258f0e9130SKonstantin Belousov }
3268f0e9130SKonstantin Belousov 
3278f0e9130SKonstantin Belousov void
328ff1ae3b3SKonstantin Belousov rangelock_entry_free(struct rl_q_entry *e)
329ff1ae3b3SKonstantin Belousov {
330ff1ae3b3SKonstantin Belousov 	uma_zfree_smr(rl_entry_zone, e);
331ff1ae3b3SKonstantin Belousov }
332ff1ae3b3SKonstantin Belousov 
333ff1ae3b3SKonstantin Belousov void
3348f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock)
3358f0e9130SKonstantin Belousov {
336c3d8a931SKonstantin Belousov 	lock->sleepers = false;
3379ef425e5SKonstantin Belousov 	atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0);
3388f0e9130SKonstantin Belousov }
3398f0e9130SKonstantin Belousov 
3408f0e9130SKonstantin Belousov void
3418f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock)
3428f0e9130SKonstantin Belousov {
343c3d8a931SKonstantin Belousov 	struct rl_q_entry *e, *ep;
3448f0e9130SKonstantin Belousov 
345c3d8a931SKonstantin Belousov 	MPASS(!lock->sleepers);
3469ef425e5SKonstantin Belousov 	if (rangelock_cheat_destroy(lock))
3479ef425e5SKonstantin Belousov 		return;
348c3d8a931SKonstantin Belousov 	for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head);
349c3d8a931SKonstantin Belousov 	    e != NULL; e = rl_e_unmark(ep)) {
350c3d8a931SKonstantin Belousov 		ep = atomic_load_ptr(&e->rl_q_next);
351c3d8a931SKonstantin Belousov 		uma_zfree_smr(rl_entry_zone, e);
352c3d8a931SKonstantin Belousov 	}
3538f0e9130SKonstantin Belousov }
3548f0e9130SKonstantin Belousov 
355c3d8a931SKonstantin Belousov static bool
356c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e)
3578f0e9130SKonstantin Belousov {
358c3d8a931SKonstantin Belousov 	return (((uintptr_t)e & 1) != 0);
3598f0e9130SKonstantin Belousov }
3608f0e9130SKonstantin Belousov 
361c3d8a931SKonstantin Belousov static struct rl_q_entry *
3625badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e)
3635badbeeaSKonstantin Belousov {
3645badbeeaSKonstantin Belousov 	return ((struct rl_q_entry *)((uintptr_t)e & ~1));
3655badbeeaSKonstantin Belousov }
3665badbeeaSKonstantin Belousov 
3675badbeeaSKonstantin Belousov static struct rl_q_entry *
368c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e)
3698f0e9130SKonstantin Belousov {
370c3d8a931SKonstantin Belousov 	MPASS(rl_e_is_marked(e));
3715badbeeaSKonstantin Belousov 	return (rl_e_unmark_unchecked(e));
3725badbeeaSKonstantin Belousov }
3735badbeeaSKonstantin Belousov 
3745badbeeaSKonstantin Belousov static void
3755badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e)
3765badbeeaSKonstantin Belousov {
3775badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__)
3785badbeeaSKonstantin Belousov 	int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0);
3795badbeeaSKonstantin Belousov 	MPASS(r == 0);
3805badbeeaSKonstantin Belousov #else
3815badbeeaSKonstantin Belousov 	atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
3825badbeeaSKonstantin Belousov #endif
3832bb93f2dSColin Percival }
3842bb93f2dSColin Percival 
385c3d8a931SKonstantin Belousov static struct rl_q_entry *
386c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p)
3878f0e9130SKonstantin Belousov {
388c3d8a931SKonstantin Belousov 	return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p));
3898f0e9130SKonstantin Belousov }
3908f0e9130SKonstantin Belousov 
3916c32d89eSKonstantin Belousov static bool
3926c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e)
3936c32d89eSKonstantin Belousov {
3946c32d89eSKonstantin Belousov 	return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ);
3956c32d89eSKonstantin Belousov }
3966c32d89eSKonstantin Belousov 
3975badbeeaSKonstantin Belousov static void
398*a3f10d08SKonstantin Belousov rangelock_free_free(struct rl_q_entry *free)
399*a3f10d08SKonstantin Belousov {
400*a3f10d08SKonstantin Belousov 	struct rl_q_entry *x, *xp;
401*a3f10d08SKonstantin Belousov 	struct thread *td;
402*a3f10d08SKonstantin Belousov 
403*a3f10d08SKonstantin Belousov 	td = curthread;
404*a3f10d08SKonstantin Belousov 	for (x = free; x != NULL; x = xp) {
405*a3f10d08SKonstantin Belousov 		MPASS(!rl_e_is_marked(x));
406*a3f10d08SKonstantin Belousov 		xp = x->rl_q_free;
407*a3f10d08SKonstantin Belousov 		MPASS(!rl_e_is_marked(xp));
408*a3f10d08SKonstantin Belousov 		if (td->td_rlqe == NULL) {
409*a3f10d08SKonstantin Belousov 			smr_synchronize(rl_smr);
410*a3f10d08SKonstantin Belousov 			td->td_rlqe = x;
411*a3f10d08SKonstantin Belousov 		} else {
412*a3f10d08SKonstantin Belousov 			uma_zfree_smr(rl_entry_zone, x);
413*a3f10d08SKonstantin Belousov 		}
414*a3f10d08SKonstantin Belousov 	}
415*a3f10d08SKonstantin Belousov }
416*a3f10d08SKonstantin Belousov 
417*a3f10d08SKonstantin Belousov static void
4185badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e)
4198f0e9130SKonstantin Belousov {
420c3158008SKonstantin Belousov 	bool sleepers;
421c3158008SKonstantin Belousov 
422c3d8a931SKonstantin Belousov 	MPASS(lock != NULL && e != NULL);
423c3d8a931SKonstantin Belousov 	MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
424c3d8a931SKonstantin Belousov 	MPASS(e->rl_q_owner == curthread);
4258f0e9130SKonstantin Belousov 
4265badbeeaSKonstantin Belousov 	rl_e_mark(e);
427c3158008SKonstantin Belousov 	sleepers = lock->sleepers;
428c3d8a931SKonstantin Belousov 	lock->sleepers = false;
429c3158008SKonstantin Belousov 	if (sleepers)
430c3d8a931SKonstantin Belousov 		sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
4315badbeeaSKonstantin Belousov }
4325badbeeaSKonstantin Belousov 
4335badbeeaSKonstantin Belousov void
4345badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie)
4355badbeeaSKonstantin Belousov {
4369ef425e5SKonstantin Belousov 	if (rangelock_cheat_unlock(lock, cookie))
4379ef425e5SKonstantin Belousov 		return;
4389ef425e5SKonstantin Belousov 
4395badbeeaSKonstantin Belousov 	sleepq_lock(&lock->sleepers);
4405badbeeaSKonstantin Belousov 	rangelock_unlock_int(lock, cookie);
441c3d8a931SKonstantin Belousov 	sleepq_release(&lock->sleepers);
4428f0e9130SKonstantin Belousov }
4438f0e9130SKonstantin Belousov 
4448f0e9130SKonstantin Belousov /*
4455badbeeaSKonstantin Belousov  * result: -1 if e1 before e2, or both locks are readers and e1
4465badbeeaSKonstantin Belousov  *		starts before or at e2
4475badbeeaSKonstantin Belousov  *          1 if e1 after e2, or both locks are readers and e1
4485badbeeaSKonstantin Belousov  *		starts after e2
4495badbeeaSKonstantin Belousov  *          0 if e1 and e2 overlap and at least one lock is writer
4508f0e9130SKonstantin Belousov  */
451c3d8a931SKonstantin Belousov static int
452c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
4538f0e9130SKonstantin Belousov {
4545badbeeaSKonstantin Belousov 	bool rds;
4555badbeeaSKonstantin Belousov 
456c3d8a931SKonstantin Belousov 	if (e1 == NULL)
457c3d8a931SKonstantin Belousov 		return (1);
458c3d8a931SKonstantin Belousov 	if (e2->rl_q_start >= e1->rl_q_end)
459c3d8a931SKonstantin Belousov 		return (-1);
4605badbeeaSKonstantin Belousov 	rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2);
4615badbeeaSKonstantin Belousov 	if (e2->rl_q_start >= e1->rl_q_start && rds)
4625badbeeaSKonstantin Belousov 		return (-1);
4635badbeeaSKonstantin Belousov 	if (e1->rl_q_start >= e2->rl_q_end)
4645badbeeaSKonstantin Belousov 		return (1);
4655badbeeaSKonstantin Belousov 	if (e1->rl_q_start >= e2->rl_q_start && rds)
4665badbeeaSKonstantin Belousov 		return (1);
467c3d8a931SKonstantin Belousov 	return (0);
4688f0e9130SKonstantin Belousov }
4698f0e9130SKonstantin Belousov 
470c3d8a931SKonstantin Belousov static void
471c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock)
4728f0e9130SKonstantin Belousov {
473c3d8a931SKonstantin Belousov 	smr_exit(rl_smr);
474c3d8a931SKonstantin Belousov 	DROP_GIANT();
475c3d8a931SKonstantin Belousov 	lock->sleepers = true;
476c3d8a931SKonstantin Belousov 	sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0);
477c3d8a931SKonstantin Belousov 	sleepq_wait(&lock->sleepers, PRI_USER);
478c3d8a931SKonstantin Belousov 	PICKUP_GIANT();
479c3d8a931SKonstantin Belousov 	smr_enter(rl_smr);
480c3d8a931SKonstantin Belousov }
4818f0e9130SKonstantin Belousov 
482c3d8a931SKonstantin Belousov static bool
483c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
484c3d8a931SKonstantin Belousov     struct rl_q_entry *new)
485c3d8a931SKonstantin Belousov {
486c3d8a931SKonstantin Belousov 	return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old,
487c3d8a931SKonstantin Belousov 	    (uintptr_t)new) != 0);
488c3d8a931SKonstantin Belousov }
4898f0e9130SKonstantin Belousov 
4905badbeeaSKonstantin Belousov enum RL_INSERT_RES {
4915badbeeaSKonstantin Belousov 	RL_TRYLOCK_FAILED,
4925badbeeaSKonstantin Belousov 	RL_LOCK_SUCCESS,
4935badbeeaSKonstantin Belousov 	RL_LOCK_RETRY,
4945badbeeaSKonstantin Belousov };
4955badbeeaSKonstantin Belousov 
4965badbeeaSKonstantin Belousov static enum RL_INSERT_RES
4975badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
4985badbeeaSKonstantin Belousov     struct rl_q_entry **free)
4995badbeeaSKonstantin Belousov {
5005badbeeaSKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
5015badbeeaSKonstantin Belousov 
5025badbeeaSKonstantin Belousov 	prev = &e->rl_q_next;
5035badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
5045badbeeaSKonstantin Belousov 	MPASS(!rl_e_is_marked(cur));	/* nobody can unlock e yet */
5055badbeeaSKonstantin Belousov 	for (;;) {
5065badbeeaSKonstantin Belousov 		if (cur == NULL || cur->rl_q_start > e->rl_q_end)
5075badbeeaSKonstantin Belousov 			return (RL_LOCK_SUCCESS);
5085badbeeaSKonstantin Belousov 		next = rl_q_load(&cur->rl_q_next);
5095badbeeaSKonstantin Belousov 		if (rl_e_is_marked(next)) {
5105badbeeaSKonstantin Belousov 			next = rl_e_unmark(next);
5115badbeeaSKonstantin Belousov 			if (rl_q_cas(prev, cur, next)) {
5125badbeeaSKonstantin Belousov 				cur->rl_q_free = *free;
5135badbeeaSKonstantin Belousov 				*free = cur;
5145badbeeaSKonstantin Belousov 			}
5155badbeeaSKonstantin Belousov 			cur = next;
5165badbeeaSKonstantin Belousov 			continue;
5175badbeeaSKonstantin Belousov 		}
5185badbeeaSKonstantin Belousov 		if (rl_e_is_rlock(cur)) {
5195badbeeaSKonstantin Belousov 			prev = &cur->rl_q_next;
5205badbeeaSKonstantin Belousov 			cur = rl_e_unmark_unchecked(rl_q_load(prev));
5215badbeeaSKonstantin Belousov 			continue;
5225badbeeaSKonstantin Belousov 		}
5235badbeeaSKonstantin Belousov 		if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
5245badbeeaSKonstantin Belousov 			sleepq_lock(&lock->sleepers);
5255badbeeaSKonstantin Belousov 			if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
5265badbeeaSKonstantin Belousov 				sleepq_release(&lock->sleepers);
5275badbeeaSKonstantin Belousov 				continue;
5285badbeeaSKonstantin Belousov 			}
5295badbeeaSKonstantin Belousov 			rangelock_unlock_int(lock, e);
5305badbeeaSKonstantin Belousov 			if (trylock) {
5315badbeeaSKonstantin Belousov 				sleepq_release(&lock->sleepers);
5325badbeeaSKonstantin Belousov 				return (RL_TRYLOCK_FAILED);
5335badbeeaSKonstantin Belousov 			}
5345badbeeaSKonstantin Belousov 			rl_insert_sleep(lock);
5355badbeeaSKonstantin Belousov 			return (RL_LOCK_RETRY);
5365badbeeaSKonstantin Belousov 		}
5375badbeeaSKonstantin Belousov 	}
5385badbeeaSKonstantin Belousov }
5395badbeeaSKonstantin Belousov 
5405badbeeaSKonstantin Belousov static enum RL_INSERT_RES
5415badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e,
5425badbeeaSKonstantin Belousov     bool trylock, struct rl_q_entry **free)
5435badbeeaSKonstantin Belousov {
5445badbeeaSKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
5455badbeeaSKonstantin Belousov 
5469ef425e5SKonstantin Belousov 	prev = (struct rl_q_entry **)&lock->head;
5475badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
5485badbeeaSKonstantin Belousov 	MPASS(!rl_e_is_marked(cur));	/* head is not marked */
5495badbeeaSKonstantin Belousov 	for (;;) {
5505badbeeaSKonstantin Belousov 		if (cur == e)
5515badbeeaSKonstantin Belousov 			return (RL_LOCK_SUCCESS);
5525badbeeaSKonstantin Belousov 		next = rl_q_load(&cur->rl_q_next);
5535badbeeaSKonstantin Belousov 		if (rl_e_is_marked(next)) {
5545badbeeaSKonstantin Belousov 			next = rl_e_unmark(next);
5555badbeeaSKonstantin Belousov 			if (rl_q_cas(prev, cur, next)) {
5565badbeeaSKonstantin Belousov 				cur->rl_q_next = *free;
5575badbeeaSKonstantin Belousov 				*free = cur;
5585badbeeaSKonstantin Belousov 			}
5595badbeeaSKonstantin Belousov 			cur = next;
5605badbeeaSKonstantin Belousov 			continue;
5615badbeeaSKonstantin Belousov 		}
5625badbeeaSKonstantin Belousov 		if (cur->rl_q_end <= e->rl_q_start) {
5635badbeeaSKonstantin Belousov 			prev = &cur->rl_q_next;
5645badbeeaSKonstantin Belousov 			cur = rl_e_unmark_unchecked(rl_q_load(prev));
5655badbeeaSKonstantin Belousov 			continue;
5665badbeeaSKonstantin Belousov 		}
5675badbeeaSKonstantin Belousov 		sleepq_lock(&lock->sleepers);
5685badbeeaSKonstantin Belousov 		rangelock_unlock_int(lock, e);
5695badbeeaSKonstantin Belousov 		if (trylock) {
5705badbeeaSKonstantin Belousov 			sleepq_release(&lock->sleepers);
5715badbeeaSKonstantin Belousov 			return (RL_TRYLOCK_FAILED);
5725badbeeaSKonstantin Belousov 		}
5735badbeeaSKonstantin Belousov 		rl_insert_sleep(lock);
5745badbeeaSKonstantin Belousov 		return (RL_LOCK_RETRY);
5755badbeeaSKonstantin Belousov 	}
5765badbeeaSKonstantin Belousov }
5775badbeeaSKonstantin Belousov 
5785badbeeaSKonstantin Belousov static enum RL_INSERT_RES
579c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
580c3d8a931SKonstantin Belousov     struct rl_q_entry **free)
581c3d8a931SKonstantin Belousov {
582c3d8a931SKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
583c3d8a931SKonstantin Belousov 	int r;
5848f0e9130SKonstantin Belousov 
585c3d8a931SKonstantin Belousov again:
5869ef425e5SKonstantin Belousov 	prev = (struct rl_q_entry **)&lock->head;
5875badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
5885badbeeaSKonstantin Belousov 	if (cur == NULL && rl_q_cas(prev, NULL, e))
5895badbeeaSKonstantin Belousov 		return (RL_LOCK_SUCCESS);
5908f0e9130SKonstantin Belousov 
5915badbeeaSKonstantin Belousov 	for (;;) {
5925badbeeaSKonstantin Belousov 		if (cur != NULL) {
593c3d8a931SKonstantin Belousov 			if (rl_e_is_marked(cur))
594c3d8a931SKonstantin Belousov 				goto again;
595c3d8a931SKonstantin Belousov 
596c3d8a931SKonstantin Belousov 			next = rl_q_load(&cur->rl_q_next);
597c3d8a931SKonstantin Belousov 			if (rl_e_is_marked(next)) {
598c3d8a931SKonstantin Belousov 				next = rl_e_unmark(next);
599c3d8a931SKonstantin Belousov 				if (rl_q_cas(prev, cur, next)) {
600c3d8a931SKonstantin Belousov #ifdef INVARIANTS
601c3d8a931SKonstantin Belousov 					cur->rl_q_owner = NULL;
602c3d8a931SKonstantin Belousov #endif
603c3d8a931SKonstantin Belousov 					cur->rl_q_free = *free;
604c3d8a931SKonstantin Belousov 					*free = cur;
605c3d8a931SKonstantin Belousov 				}
606c3d8a931SKonstantin Belousov 				cur = next;
607c3d8a931SKonstantin Belousov 				continue;
608c3d8a931SKonstantin Belousov 			}
609c3d8a931SKonstantin Belousov 		}
610c3d8a931SKonstantin Belousov 
611c3d8a931SKonstantin Belousov 		r = rl_e_compare(cur, e);
612c3d8a931SKonstantin Belousov 		if (r == -1) {
613c3d8a931SKonstantin Belousov 			prev = &cur->rl_q_next;
614c3d8a931SKonstantin Belousov 			cur = rl_q_load(prev);
615c3d8a931SKonstantin Belousov 		} else if (r == 0) {
616c3d8a931SKonstantin Belousov 			sleepq_lock(&lock->sleepers);
617c3d8a931SKonstantin Belousov 			if (__predict_false(rl_e_is_marked(rl_q_load(
618c3d8a931SKonstantin Belousov 			    &cur->rl_q_next)))) {
619c3d8a931SKonstantin Belousov 				sleepq_release(&lock->sleepers);
620c3d8a931SKonstantin Belousov 				continue;
621c3d8a931SKonstantin Belousov 			}
622e3680954SRick Macklem 			if (trylock) {
623c3d8a931SKonstantin Belousov 				sleepq_release(&lock->sleepers);
6245badbeeaSKonstantin Belousov 				return (RL_TRYLOCK_FAILED);
625e3680954SRick Macklem 			}
626c3d8a931SKonstantin Belousov 			rl_insert_sleep(lock);
627c3d8a931SKonstantin Belousov 			/* e is still valid */
628c3d8a931SKonstantin Belousov 			goto again;
629c3d8a931SKonstantin Belousov 		} else /* r == 1 */ {
630c3d8a931SKonstantin Belousov 			e->rl_q_next = cur;
631c3d8a931SKonstantin Belousov 			if (rl_q_cas(prev, cur, e)) {
632c3d8a931SKonstantin Belousov 				atomic_thread_fence_acq();
6335badbeeaSKonstantin Belousov 				return (rl_e_is_rlock(e) ?
6345badbeeaSKonstantin Belousov 				    rl_r_validate(lock, e, trylock, free) :
6355badbeeaSKonstantin Belousov 				    rl_w_validate(lock, e, trylock, free));
636e3680954SRick Macklem 			}
637c3d8a931SKonstantin Belousov 			/* Reset rl_q_next in case we hit fast path. */
638c3d8a931SKonstantin Belousov 			e->rl_q_next = NULL;
639c3d8a931SKonstantin Belousov 			cur = rl_q_load(prev);
640c3d8a931SKonstantin Belousov 		}
641c3d8a931SKonstantin Belousov 	}
642c3d8a931SKonstantin Belousov }
643c3d8a931SKonstantin Belousov 
644c3d8a931SKonstantin Belousov static struct rl_q_entry *
6455badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
6465badbeeaSKonstantin Belousov     vm_ooffset_t end, int locktype)
647c3d8a931SKonstantin Belousov {
648*a3f10d08SKonstantin Belousov 	struct rl_q_entry *e, *free;
6499ef425e5SKonstantin Belousov 	void *cookie;
6505badbeeaSKonstantin Belousov 	enum RL_INSERT_RES res;
651c3d8a931SKonstantin Belousov 
6529ef425e5SKonstantin Belousov 	if (rangelock_cheat_lock(lock, locktype, trylock, &cookie))
6539ef425e5SKonstantin Belousov 		return (cookie);
6545badbeeaSKonstantin Belousov 	for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
655c3d8a931SKonstantin Belousov 		free = NULL;
6565badbeeaSKonstantin Belousov 		e = rlqentry_alloc(start, end, locktype);
657c3d8a931SKonstantin Belousov 		smr_enter(rl_smr);
658c3d8a931SKonstantin Belousov 		res = rl_insert(lock, e, trylock, &free);
659c3d8a931SKonstantin Belousov 		smr_exit(rl_smr);
6605badbeeaSKonstantin Belousov 		if (res == RL_TRYLOCK_FAILED) {
6615badbeeaSKonstantin Belousov 			MPASS(trylock);
662c3d8a931SKonstantin Belousov 			e->rl_q_free = free;
663c3d8a931SKonstantin Belousov 			free = e;
664c3d8a931SKonstantin Belousov 			e = NULL;
665c3d8a931SKonstantin Belousov 		}
666*a3f10d08SKonstantin Belousov 		rangelock_free_free(free);
667ff1ae3b3SKonstantin Belousov 	}
668c3d8a931SKonstantin Belousov 	return (e);
6698f0e9130SKonstantin Belousov }
6708f0e9130SKonstantin Belousov 
6718f0e9130SKonstantin Belousov void *
672c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
6738f0e9130SKonstantin Belousov {
6745badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ));
675e3680954SRick Macklem }
676e3680954SRick Macklem 
677e3680954SRick Macklem void *
678c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
679e3680954SRick Macklem {
6805badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ));
6818f0e9130SKonstantin Belousov }
6828f0e9130SKonstantin Belousov 
6838f0e9130SKonstantin Belousov void *
684c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
6858f0e9130SKonstantin Belousov {
6869ef425e5SKonstantin Belousov 	return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE));
687e3680954SRick Macklem }
688e3680954SRick Macklem 
689e3680954SRick Macklem void *
690c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
691e3680954SRick Macklem {
6925badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
6938f0e9130SKonstantin Belousov }
6943155f2f0SKyle Evans 
6953155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT
6963155f2f0SKyle Evans void
6973155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
6983155f2f0SKyle Evans {
6993155f2f0SKyle Evans }
7003155f2f0SKyle Evans #endif	/* INVARIANT_SUPPORT */
701c3d8a931SKonstantin Belousov 
702c3d8a931SKonstantin Belousov #include "opt_ddb.h"
703c3d8a931SKonstantin Belousov #ifdef DDB
704c3d8a931SKonstantin Belousov #include <ddb/ddb.h>
705c3d8a931SKonstantin Belousov 
706c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock)
707c3d8a931SKonstantin Belousov {
708c3d8a931SKonstantin Belousov 	struct rangelock *lock;
709c3d8a931SKonstantin Belousov 	struct rl_q_entry *e, *x;
7109ef425e5SKonstantin Belousov 	uintptr_t v;
711c3d8a931SKonstantin Belousov 
712c3d8a931SKonstantin Belousov 	if (!have_addr) {
713c3d8a931SKonstantin Belousov 		db_printf("show rangelock addr\n");
714c3d8a931SKonstantin Belousov 		return;
715c3d8a931SKonstantin Belousov 	}
716c3d8a931SKonstantin Belousov 
717c3d8a931SKonstantin Belousov 	lock = (struct rangelock *)addr;
718c3d8a931SKonstantin Belousov 	db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers);
7199ef425e5SKonstantin Belousov 	v = lock->head;
7209ef425e5SKonstantin Belousov 	if ((v & RL_CHEAT_CHEATING) != 0) {
7219ef425e5SKonstantin Belousov 		db_printf("  cheating head %#jx\n", (uintmax_t)v);
7229ef425e5SKonstantin Belousov 		return;
7239ef425e5SKonstantin Belousov 	}
7249ef425e5SKonstantin Belousov 	for (e = (struct rl_q_entry *)(lock->head);;) {
725c3d8a931SKonstantin Belousov 		x = rl_e_is_marked(e) ? rl_e_unmark(e) : e;
726c3d8a931SKonstantin Belousov 		if (x == NULL)
727c3d8a931SKonstantin Belousov 			break;
728c3d8a931SKonstantin Belousov 		db_printf("  entry %p marked %d %d start %#jx end %#jx "
729c3d8a931SKonstantin Belousov 		    "flags %x next %p",
730c3d8a931SKonstantin Belousov 		    e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next),
731c3d8a931SKonstantin Belousov 		    x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next);
732c3d8a931SKonstantin Belousov #ifdef INVARIANTS
733c3d8a931SKonstantin Belousov 		db_printf(" owner %p (%d)", x->rl_q_owner,
734c3d8a931SKonstantin Belousov 		    x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1);
735c3d8a931SKonstantin Belousov #endif
736c3d8a931SKonstantin Belousov 		db_printf("\n");
737c3d8a931SKonstantin Belousov 		e = x->rl_q_next;
738c3d8a931SKonstantin Belousov 	}
739c3d8a931SKonstantin Belousov }
740c3d8a931SKonstantin Belousov 
741c3d8a931SKonstantin Belousov #endif	/* DDB */
742