xref: /freebsd/sys/kern/kern_rangelock.c (revision 9ef425e560a97cabd1862e803eeb48468f89de18)
18f0e9130SKonstantin Belousov /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
48f0e9130SKonstantin Belousov  * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
58f0e9130SKonstantin Belousov  * All rights reserved.
68f0e9130SKonstantin Belousov  *
78f0e9130SKonstantin Belousov  * Redistribution and use in source and binary forms, with or without
88f0e9130SKonstantin Belousov  * modification, are permitted provided that the following conditions
98f0e9130SKonstantin Belousov  * are met:
108f0e9130SKonstantin Belousov  * 1. Redistributions of source code must retain the above copyright
118f0e9130SKonstantin Belousov  *    notice unmodified, this list of conditions, and the following
128f0e9130SKonstantin Belousov  *    disclaimer.
138f0e9130SKonstantin Belousov  * 2. Redistributions in binary form must reproduce the above copyright
148f0e9130SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer in the
158f0e9130SKonstantin Belousov  *    documentation and/or other materials provided with the distribution.
168f0e9130SKonstantin Belousov  *
178f0e9130SKonstantin Belousov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
188f0e9130SKonstantin Belousov  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
198f0e9130SKonstantin Belousov  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
208f0e9130SKonstantin Belousov  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
218f0e9130SKonstantin Belousov  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
228f0e9130SKonstantin Belousov  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
238f0e9130SKonstantin Belousov  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
248f0e9130SKonstantin Belousov  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
258f0e9130SKonstantin Belousov  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
268f0e9130SKonstantin Belousov  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
278f0e9130SKonstantin Belousov  */
288f0e9130SKonstantin Belousov 
298f0e9130SKonstantin Belousov #include <sys/param.h>
30c3d8a931SKonstantin Belousov #include <sys/kassert.h>
318f0e9130SKonstantin Belousov #include <sys/kernel.h>
328f0e9130SKonstantin Belousov #include <sys/lock.h>
338f0e9130SKonstantin Belousov #include <sys/mutex.h>
348f0e9130SKonstantin Belousov #include <sys/proc.h>
358f0e9130SKonstantin Belousov #include <sys/rangelock.h>
36c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h>
37c3d8a931SKonstantin Belousov #include <sys/smr.h>
38*9ef425e5SKonstantin Belousov #include <sys/sysctl.h>
398f0e9130SKonstantin Belousov 
408f0e9130SKonstantin Belousov #include <vm/uma.h>
418f0e9130SKonstantin Belousov 
42c3d8a931SKonstantin Belousov /*
43*9ef425e5SKonstantin Belousov  * Immediately after initialization (subject to 'rangelock_cheat'
44*9ef425e5SKonstantin Belousov  * below), and until a request comes that conflicts with granted ones
45*9ef425e5SKonstantin Belousov  * based on type, rangelocks serve requests in the "cheating" mode.
46*9ef425e5SKonstantin Belousov  * In this mode, a rangelock behaves like a sxlock, as if each request
47*9ef425e5SKonstantin Belousov  * covered the whole range of the protected object.  On receiving a
48*9ef425e5SKonstantin Belousov  * conflicting request (any request while a write request is
49*9ef425e5SKonstantin Belousov  * effective, or any write request while some read ones are
50*9ef425e5SKonstantin Belousov  * effective), all requests granted in "cheating" mode are drained,
51*9ef425e5SKonstantin Belousov  * and the rangelock then switches to effectively keeping track of the
52*9ef425e5SKonstantin Belousov  * precise range of each new request.
53*9ef425e5SKonstantin Belousov  *
54*9ef425e5SKonstantin Belousov  * Normal sx implementation is not used to not bloat structures (most
55*9ef425e5SKonstantin Belousov  * important, vnodes) which embeds rangelocks.
56*9ef425e5SKonstantin Belousov  *
57*9ef425e5SKonstantin Belousov  * The cheating greatly helps very common pattern where file is first
58*9ef425e5SKonstantin Belousov  * written single-threaded, and then read by many processes.
59*9ef425e5SKonstantin Belousov  *
60*9ef425e5SKonstantin Belousov  * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the
61*9ef425e5SKonstantin Belousov  * lock->head.  Special cookies are returned in this mode, and
62*9ef425e5SKonstantin Belousov  * trylocks are same as normal locks but do not drain.
63*9ef425e5SKonstantin Belousov  */
64*9ef425e5SKonstantin Belousov 
65*9ef425e5SKonstantin Belousov static int rangelock_cheat = 1;
66*9ef425e5SKonstantin Belousov SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN,
67*9ef425e5SKonstantin Belousov     &rangelock_cheat, 0,
68*9ef425e5SKonstantin Belousov     "");
69*9ef425e5SKonstantin Belousov 
70*9ef425e5SKonstantin Belousov #define	RL_CHEAT_MASK		0x7
71*9ef425e5SKonstantin Belousov #define	RL_CHEAT_CHEATING	0x1
72*9ef425e5SKonstantin Belousov /* #define	RL_CHEAT_RLOCKED	0x0 */
73*9ef425e5SKonstantin Belousov #define	RL_CHEAT_WLOCKED	0x2
74*9ef425e5SKonstantin Belousov #define	RL_CHEAT_DRAINING	0x4
75*9ef425e5SKonstantin Belousov 
76*9ef425e5SKonstantin Belousov #define	RL_CHEAT_READER		0x8
77*9ef425e5SKonstantin Belousov 
78*9ef425e5SKonstantin Belousov #define	RL_RET_CHEAT_RLOCKED	0x1100
79*9ef425e5SKonstantin Belousov #define	RL_RET_CHEAT_WLOCKED	0x2200
80*9ef425e5SKonstantin Belousov 
81*9ef425e5SKonstantin Belousov static bool
82*9ef425e5SKonstantin Belousov rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock,
83*9ef425e5SKonstantin Belousov     void **cookie)
84*9ef425e5SKonstantin Belousov {
85*9ef425e5SKonstantin Belousov 	uintptr_t v, x;
86*9ef425e5SKonstantin Belousov 
87*9ef425e5SKonstantin Belousov 	v = (uintptr_t)atomic_load_ptr(&lock->head);
88*9ef425e5SKonstantin Belousov 	if ((v & RL_CHEAT_CHEATING) == 0)
89*9ef425e5SKonstantin Belousov 		return (false);
90*9ef425e5SKonstantin Belousov 	if ((v & RL_CHEAT_DRAINING) != 0) {
91*9ef425e5SKonstantin Belousov drain:
92*9ef425e5SKonstantin Belousov 		if (trylock) {
93*9ef425e5SKonstantin Belousov 			*cookie = NULL;
94*9ef425e5SKonstantin Belousov 			return (true);
95*9ef425e5SKonstantin Belousov 		}
96*9ef425e5SKonstantin Belousov 		sleepq_lock(&lock->head);
97*9ef425e5SKonstantin Belousov drain1:
98*9ef425e5SKonstantin Belousov 		DROP_GIANT();
99*9ef425e5SKonstantin Belousov 		for (;;) {
100*9ef425e5SKonstantin Belousov 			v = (uintptr_t)atomic_load_ptr(&lock->head);
101*9ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_DRAINING) == 0)
102*9ef425e5SKonstantin Belousov 				break;
103*9ef425e5SKonstantin Belousov 			sleepq_add(&lock->head, NULL, "ranged1", 0, 0);
104*9ef425e5SKonstantin Belousov 			sleepq_wait(&lock->head, PRI_USER);
105*9ef425e5SKonstantin Belousov 			sleepq_lock(&lock->head);
106*9ef425e5SKonstantin Belousov 		}
107*9ef425e5SKonstantin Belousov 		sleepq_release(&lock->head);
108*9ef425e5SKonstantin Belousov 		PICKUP_GIANT();
109*9ef425e5SKonstantin Belousov 		return (false);
110*9ef425e5SKonstantin Belousov 	}
111*9ef425e5SKonstantin Belousov 
112*9ef425e5SKonstantin Belousov 	switch (locktype) {
113*9ef425e5SKonstantin Belousov 	case RL_LOCK_READ:
114*9ef425e5SKonstantin Belousov 		for (;;) {
115*9ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_WLOCKED) != 0) {
116*9ef425e5SKonstantin Belousov 				if (trylock) {
117*9ef425e5SKonstantin Belousov 					*cookie = NULL;
118*9ef425e5SKonstantin Belousov 					return (true);
119*9ef425e5SKonstantin Belousov 				}
120*9ef425e5SKonstantin Belousov 				x = v | RL_CHEAT_DRAINING;
121*9ef425e5SKonstantin Belousov 				sleepq_lock(&lock->head);
122*9ef425e5SKonstantin Belousov 				if (atomic_fcmpset_rel_ptr(&lock->head, &v,
123*9ef425e5SKonstantin Belousov 				    x) != 0)
124*9ef425e5SKonstantin Belousov 					goto drain1;
125*9ef425e5SKonstantin Belousov 				sleepq_release(&lock->head);
126*9ef425e5SKonstantin Belousov 				/* Possibly forgive passed conflict */
127*9ef425e5SKonstantin Belousov 				continue;
128*9ef425e5SKonstantin Belousov 			}
129*9ef425e5SKonstantin Belousov 			x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER;
130*9ef425e5SKonstantin Belousov 			x |= RL_CHEAT_CHEATING;
131*9ef425e5SKonstantin Belousov 			if (atomic_fcmpset_acq_ptr(&lock->head, &v, x) != 0)
132*9ef425e5SKonstantin Belousov 				break;
133*9ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_CHEATING) == 0)
134*9ef425e5SKonstantin Belousov 				return (false);
135*9ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_DRAINING) != 0)
136*9ef425e5SKonstantin Belousov 				goto drain;
137*9ef425e5SKonstantin Belousov 		}
138*9ef425e5SKonstantin Belousov 		*(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED;
139*9ef425e5SKonstantin Belousov 		break;
140*9ef425e5SKonstantin Belousov 	case RL_LOCK_WRITE:
141*9ef425e5SKonstantin Belousov 		for (;;) {
142*9ef425e5SKonstantin Belousov 			if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER ||
143*9ef425e5SKonstantin Belousov 			    (v & RL_CHEAT_WLOCKED) != 0) {
144*9ef425e5SKonstantin Belousov 				if (trylock) {
145*9ef425e5SKonstantin Belousov 					*cookie = NULL;
146*9ef425e5SKonstantin Belousov 					return (true);
147*9ef425e5SKonstantin Belousov 				}
148*9ef425e5SKonstantin Belousov 				x = v | RL_CHEAT_DRAINING;
149*9ef425e5SKonstantin Belousov 				sleepq_lock(&lock->head);
150*9ef425e5SKonstantin Belousov 				if (atomic_fcmpset_rel_ptr(&lock->head, &v,
151*9ef425e5SKonstantin Belousov 				    x) != 0)
152*9ef425e5SKonstantin Belousov 					goto drain1;
153*9ef425e5SKonstantin Belousov 				sleepq_release(&lock->head);
154*9ef425e5SKonstantin Belousov 				/* Possibly forgive passed conflict */
155*9ef425e5SKonstantin Belousov 				continue;
156*9ef425e5SKonstantin Belousov 			}
157*9ef425e5SKonstantin Belousov 			x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING;
158*9ef425e5SKonstantin Belousov 			if (atomic_fcmpset_acq_ptr(&lock->head, &v, x) != 0)
159*9ef425e5SKonstantin Belousov 				break;
160*9ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_CHEATING) == 0)
161*9ef425e5SKonstantin Belousov 				return (false);
162*9ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_DRAINING) != 0)
163*9ef425e5SKonstantin Belousov 				goto drain;
164*9ef425e5SKonstantin Belousov 		}
165*9ef425e5SKonstantin Belousov 		*(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED;
166*9ef425e5SKonstantin Belousov 		break;
167*9ef425e5SKonstantin Belousov 	default:
168*9ef425e5SKonstantin Belousov 		__assert_unreachable();
169*9ef425e5SKonstantin Belousov 		break;
170*9ef425e5SKonstantin Belousov 	}
171*9ef425e5SKonstantin Belousov 	return (true);
172*9ef425e5SKonstantin Belousov }
173*9ef425e5SKonstantin Belousov 
174*9ef425e5SKonstantin Belousov static bool
175*9ef425e5SKonstantin Belousov rangelock_cheat_unlock(struct rangelock *lock, void *cookie)
176*9ef425e5SKonstantin Belousov {
177*9ef425e5SKonstantin Belousov 	uintptr_t v, x;
178*9ef425e5SKonstantin Belousov 
179*9ef425e5SKonstantin Belousov 	v = (uintptr_t)atomic_load_ptr(&lock->head);
180*9ef425e5SKonstantin Belousov 	if ((v & RL_CHEAT_CHEATING) == 0)
181*9ef425e5SKonstantin Belousov 		return (false);
182*9ef425e5SKonstantin Belousov 
183*9ef425e5SKonstantin Belousov 	MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED ||
184*9ef425e5SKonstantin Belousov 	    (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED);
185*9ef425e5SKonstantin Belousov 
186*9ef425e5SKonstantin Belousov 	switch ((uintptr_t)cookie) {
187*9ef425e5SKonstantin Belousov 	case RL_RET_CHEAT_RLOCKED:
188*9ef425e5SKonstantin Belousov 		for (;;) {
189*9ef425e5SKonstantin Belousov 			MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER);
190*9ef425e5SKonstantin Belousov 			MPASS((v & RL_CHEAT_WLOCKED) == 0);
191*9ef425e5SKonstantin Belousov 			x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER;
192*9ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_DRAINING) != 0) {
193*9ef425e5SKonstantin Belousov 				if (x != 0) {
194*9ef425e5SKonstantin Belousov 					x |= RL_CHEAT_DRAINING |
195*9ef425e5SKonstantin Belousov 					    RL_CHEAT_CHEATING;
196*9ef425e5SKonstantin Belousov 					if (atomic_fcmpset_rel_ptr(&lock->head,
197*9ef425e5SKonstantin Belousov 					    &v, x) != 0)
198*9ef425e5SKonstantin Belousov 						break;
199*9ef425e5SKonstantin Belousov 				} else {
200*9ef425e5SKonstantin Belousov 					sleepq_lock(&lock->head);
201*9ef425e5SKonstantin Belousov 					if (atomic_fcmpset_rel_ptr(&lock->head,
202*9ef425e5SKonstantin Belousov 					    &v, x) != 0) {
203*9ef425e5SKonstantin Belousov 						sleepq_broadcast(
204*9ef425e5SKonstantin Belousov 						    &lock->head,
205*9ef425e5SKonstantin Belousov 						    SLEEPQ_SLEEP, 0, 0);
206*9ef425e5SKonstantin Belousov 						sleepq_release(&lock->head);
207*9ef425e5SKonstantin Belousov 						break;
208*9ef425e5SKonstantin Belousov 					}
209*9ef425e5SKonstantin Belousov 					sleepq_release(&lock->head);
210*9ef425e5SKonstantin Belousov 				}
211*9ef425e5SKonstantin Belousov 			} else {
212*9ef425e5SKonstantin Belousov 				x |= RL_CHEAT_CHEATING;
213*9ef425e5SKonstantin Belousov 				if (atomic_fcmpset_rel_ptr(&lock->head, &v,
214*9ef425e5SKonstantin Belousov 				    x) != 0)
215*9ef425e5SKonstantin Belousov 					break;
216*9ef425e5SKonstantin Belousov 			}
217*9ef425e5SKonstantin Belousov 		}
218*9ef425e5SKonstantin Belousov 		break;
219*9ef425e5SKonstantin Belousov 	case RL_RET_CHEAT_WLOCKED:
220*9ef425e5SKonstantin Belousov 		for (;;) {
221*9ef425e5SKonstantin Belousov 			MPASS((v & RL_CHEAT_WLOCKED) != 0);
222*9ef425e5SKonstantin Belousov 			if ((v & RL_CHEAT_DRAINING) != 0) {
223*9ef425e5SKonstantin Belousov 				sleepq_lock(&lock->head);
224*9ef425e5SKonstantin Belousov 				atomic_store_ptr(&lock->head, 0);
225*9ef425e5SKonstantin Belousov 				sleepq_broadcast(&lock->head,
226*9ef425e5SKonstantin Belousov 				    SLEEPQ_SLEEP, 0, 0);
227*9ef425e5SKonstantin Belousov 				sleepq_release(&lock->head);
228*9ef425e5SKonstantin Belousov 				break;
229*9ef425e5SKonstantin Belousov 			} else {
230*9ef425e5SKonstantin Belousov 				if (atomic_fcmpset_ptr(&lock->head, &v,
231*9ef425e5SKonstantin Belousov 				    RL_CHEAT_CHEATING) != 0)
232*9ef425e5SKonstantin Belousov 					break;
233*9ef425e5SKonstantin Belousov 			}
234*9ef425e5SKonstantin Belousov 		}
235*9ef425e5SKonstantin Belousov 		break;
236*9ef425e5SKonstantin Belousov 	default:
237*9ef425e5SKonstantin Belousov 		__assert_unreachable();
238*9ef425e5SKonstantin Belousov 		break;
239*9ef425e5SKonstantin Belousov 	}
240*9ef425e5SKonstantin Belousov 	return (true);
241*9ef425e5SKonstantin Belousov }
242*9ef425e5SKonstantin Belousov 
243*9ef425e5SKonstantin Belousov static bool
244*9ef425e5SKonstantin Belousov rangelock_cheat_destroy(struct rangelock *lock)
245*9ef425e5SKonstantin Belousov {
246*9ef425e5SKonstantin Belousov 	uintptr_t v;
247*9ef425e5SKonstantin Belousov 
248*9ef425e5SKonstantin Belousov 	v = (uintptr_t)atomic_load_ptr(&lock->head);
249*9ef425e5SKonstantin Belousov 	if ((v & RL_CHEAT_CHEATING) == 0)
250*9ef425e5SKonstantin Belousov 		return (false);
251*9ef425e5SKonstantin Belousov 	MPASS(v == RL_CHEAT_CHEATING);
252*9ef425e5SKonstantin Belousov 	return (true);
253*9ef425e5SKonstantin Belousov }
254*9ef425e5SKonstantin Belousov 
255*9ef425e5SKonstantin Belousov /*
256c3d8a931SKonstantin Belousov  * Implementation of range locks based on the paper
257c3d8a931SKonstantin Belousov  * https://doi.org/10.1145/3342195.3387533
258c3d8a931SKonstantin Belousov  * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020
259c3d8a931SKonstantin Belousov  * Scalable Range Locks for Scalable Address Spaces and Beyond
260c3d8a931SKonstantin Belousov  * by Alex Kogan, Dave Dice, and Shady Issa
261c3d8a931SKonstantin Belousov  */
262c3d8a931SKonstantin Belousov 
263c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e);
264c3d8a931SKonstantin Belousov 
265c3d8a931SKonstantin Belousov /*
266c3d8a931SKonstantin Belousov  * rl_q_next links all granted ranges in the lock.  We cannot free an
267c3d8a931SKonstantin Belousov  * rl_q_entry while in the smr section, and cannot reuse rl_q_next
268c3d8a931SKonstantin Belousov  * linkage since other threads might follow it even after CAS removed
269c3d8a931SKonstantin Belousov  * the range.  Use rl_q_free for local list of ranges to remove after
270c3d8a931SKonstantin Belousov  * the smr section is dropped.
271c3d8a931SKonstantin Belousov  */
2728f0e9130SKonstantin Belousov struct rl_q_entry {
273c3d8a931SKonstantin Belousov 	struct rl_q_entry *rl_q_next;
274c3d8a931SKonstantin Belousov 	struct rl_q_entry *rl_q_free;
2758f0e9130SKonstantin Belousov 	off_t		rl_q_start, rl_q_end;
2768f0e9130SKonstantin Belousov 	int		rl_q_flags;
277c3d8a931SKonstantin Belousov #ifdef INVARIANTS
278c3d8a931SKonstantin Belousov 	struct thread	*rl_q_owner;
279c3d8a931SKonstantin Belousov #endif
2808f0e9130SKonstantin Belousov };
2818f0e9130SKonstantin Belousov 
2828f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone;
283c3d8a931SKonstantin Belousov static smr_t rl_smr;
2848f0e9130SKonstantin Belousov 
2858f0e9130SKonstantin Belousov static void
2868f0e9130SKonstantin Belousov rangelock_sys_init(void)
2878f0e9130SKonstantin Belousov {
2888f0e9130SKonstantin Belousov 	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
289c3d8a931SKonstantin Belousov 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
290c3d8a931SKonstantin Belousov 	    UMA_ZONE_SMR);
291c3d8a931SKonstantin Belousov 	rl_smr = uma_zone_get_smr(rl_entry_zone);
2928f0e9130SKonstantin Belousov }
293c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
2948f0e9130SKonstantin Belousov 
2958f0e9130SKonstantin Belousov static struct rl_q_entry *
296c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
2978f0e9130SKonstantin Belousov {
298c3d8a931SKonstantin Belousov 	struct rl_q_entry *e;
299ff1ae3b3SKonstantin Belousov 	struct thread *td;
3008f0e9130SKonstantin Belousov 
301ff1ae3b3SKonstantin Belousov 	td = curthread;
302ff1ae3b3SKonstantin Belousov 	if (td->td_rlqe != NULL) {
303ff1ae3b3SKonstantin Belousov 		e = td->td_rlqe;
304ff1ae3b3SKonstantin Belousov 		td->td_rlqe = NULL;
305ff1ae3b3SKonstantin Belousov 	} else {
306c3d8a931SKonstantin Belousov 		e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
307ff1ae3b3SKonstantin Belousov 	}
308c3d8a931SKonstantin Belousov 	e->rl_q_next = NULL;
309c3d8a931SKonstantin Belousov 	e->rl_q_free = NULL;
310c3d8a931SKonstantin Belousov 	e->rl_q_start = start;
311c3d8a931SKonstantin Belousov 	e->rl_q_end = end;
312c3d8a931SKonstantin Belousov 	e->rl_q_flags = flags;
313c3d8a931SKonstantin Belousov #ifdef INVARIANTS
314c3d8a931SKonstantin Belousov 	e->rl_q_owner = curthread;
315c3d8a931SKonstantin Belousov #endif
316c3d8a931SKonstantin Belousov 	return (e);
3178f0e9130SKonstantin Belousov }
3188f0e9130SKonstantin Belousov 
3198f0e9130SKonstantin Belousov void
320ff1ae3b3SKonstantin Belousov rangelock_entry_free(struct rl_q_entry *e)
321ff1ae3b3SKonstantin Belousov {
322ff1ae3b3SKonstantin Belousov 	uma_zfree_smr(rl_entry_zone, e);
323ff1ae3b3SKonstantin Belousov }
324ff1ae3b3SKonstantin Belousov 
325ff1ae3b3SKonstantin Belousov void
3268f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock)
3278f0e9130SKonstantin Belousov {
328c3d8a931SKonstantin Belousov 	lock->sleepers = false;
329*9ef425e5SKonstantin Belousov 	atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0);
3308f0e9130SKonstantin Belousov }
3318f0e9130SKonstantin Belousov 
3328f0e9130SKonstantin Belousov void
3338f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock)
3348f0e9130SKonstantin Belousov {
335c3d8a931SKonstantin Belousov 	struct rl_q_entry *e, *ep;
3368f0e9130SKonstantin Belousov 
337c3d8a931SKonstantin Belousov 	MPASS(!lock->sleepers);
338*9ef425e5SKonstantin Belousov 	if (rangelock_cheat_destroy(lock))
339*9ef425e5SKonstantin Belousov 		return;
340c3d8a931SKonstantin Belousov 	for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head);
341c3d8a931SKonstantin Belousov 	    e != NULL; e = rl_e_unmark(ep)) {
342c3d8a931SKonstantin Belousov 		ep = atomic_load_ptr(&e->rl_q_next);
343c3d8a931SKonstantin Belousov 		uma_zfree_smr(rl_entry_zone, e);
344c3d8a931SKonstantin Belousov 	}
3458f0e9130SKonstantin Belousov }
3468f0e9130SKonstantin Belousov 
347c3d8a931SKonstantin Belousov static bool
348c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e)
3498f0e9130SKonstantin Belousov {
350c3d8a931SKonstantin Belousov 	return (((uintptr_t)e & 1) != 0);
3518f0e9130SKonstantin Belousov }
3528f0e9130SKonstantin Belousov 
353c3d8a931SKonstantin Belousov static struct rl_q_entry *
3545badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e)
3555badbeeaSKonstantin Belousov {
3565badbeeaSKonstantin Belousov 	return ((struct rl_q_entry *)((uintptr_t)e & ~1));
3575badbeeaSKonstantin Belousov }
3585badbeeaSKonstantin Belousov 
3595badbeeaSKonstantin Belousov static struct rl_q_entry *
360c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e)
3618f0e9130SKonstantin Belousov {
362c3d8a931SKonstantin Belousov 	MPASS(rl_e_is_marked(e));
3635badbeeaSKonstantin Belousov 	return (rl_e_unmark_unchecked(e));
3645badbeeaSKonstantin Belousov }
3655badbeeaSKonstantin Belousov 
3665badbeeaSKonstantin Belousov static void
3675badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e)
3685badbeeaSKonstantin Belousov {
3695badbeeaSKonstantin Belousov #if defined(INVARIANTS) && defined(__LP64__)
3705badbeeaSKonstantin Belousov 	int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0);
3715badbeeaSKonstantin Belousov 	MPASS(r == 0);
3725badbeeaSKonstantin Belousov #else
3735badbeeaSKonstantin Belousov 	atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
3745badbeeaSKonstantin Belousov #endif
3752bb93f2dSColin Percival }
3762bb93f2dSColin Percival 
377c3d8a931SKonstantin Belousov static struct rl_q_entry *
378c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p)
3798f0e9130SKonstantin Belousov {
380c3d8a931SKonstantin Belousov 	return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p));
3818f0e9130SKonstantin Belousov }
3828f0e9130SKonstantin Belousov 
3836c32d89eSKonstantin Belousov static bool
3846c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e)
3856c32d89eSKonstantin Belousov {
3866c32d89eSKonstantin Belousov 	return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ);
3876c32d89eSKonstantin Belousov }
3886c32d89eSKonstantin Belousov 
3895badbeeaSKonstantin Belousov static void
3905badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e)
3918f0e9130SKonstantin Belousov {
392c3158008SKonstantin Belousov 	bool sleepers;
393c3158008SKonstantin Belousov 
394c3d8a931SKonstantin Belousov 	MPASS(lock != NULL && e != NULL);
395c3d8a931SKonstantin Belousov 	MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
396c3d8a931SKonstantin Belousov 	MPASS(e->rl_q_owner == curthread);
3978f0e9130SKonstantin Belousov 
3985badbeeaSKonstantin Belousov 	rl_e_mark(e);
399c3158008SKonstantin Belousov 	sleepers = lock->sleepers;
400c3d8a931SKonstantin Belousov 	lock->sleepers = false;
401c3158008SKonstantin Belousov 	if (sleepers)
402c3d8a931SKonstantin Belousov 		sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
4035badbeeaSKonstantin Belousov }
4045badbeeaSKonstantin Belousov 
4055badbeeaSKonstantin Belousov void
4065badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie)
4075badbeeaSKonstantin Belousov {
408*9ef425e5SKonstantin Belousov 	if (rangelock_cheat_unlock(lock, cookie))
409*9ef425e5SKonstantin Belousov 		return;
410*9ef425e5SKonstantin Belousov 
4115badbeeaSKonstantin Belousov 	sleepq_lock(&lock->sleepers);
4125badbeeaSKonstantin Belousov 	rangelock_unlock_int(lock, cookie);
413c3d8a931SKonstantin Belousov 	sleepq_release(&lock->sleepers);
4148f0e9130SKonstantin Belousov }
4158f0e9130SKonstantin Belousov 
4168f0e9130SKonstantin Belousov /*
4175badbeeaSKonstantin Belousov  * result: -1 if e1 before e2, or both locks are readers and e1
4185badbeeaSKonstantin Belousov  *		starts before or at e2
4195badbeeaSKonstantin Belousov  *          1 if e1 after e2, or both locks are readers and e1
4205badbeeaSKonstantin Belousov  *		starts after e2
4215badbeeaSKonstantin Belousov  *          0 if e1 and e2 overlap and at least one lock is writer
4228f0e9130SKonstantin Belousov  */
423c3d8a931SKonstantin Belousov static int
424c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
4258f0e9130SKonstantin Belousov {
4265badbeeaSKonstantin Belousov 	bool rds;
4275badbeeaSKonstantin Belousov 
428c3d8a931SKonstantin Belousov 	if (e1 == NULL)
429c3d8a931SKonstantin Belousov 		return (1);
430c3d8a931SKonstantin Belousov 	if (e2->rl_q_start >= e1->rl_q_end)
431c3d8a931SKonstantin Belousov 		return (-1);
4325badbeeaSKonstantin Belousov 	rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2);
4335badbeeaSKonstantin Belousov 	if (e2->rl_q_start >= e1->rl_q_start && rds)
4345badbeeaSKonstantin Belousov 		return (-1);
4355badbeeaSKonstantin Belousov 	if (e1->rl_q_start >= e2->rl_q_end)
4365badbeeaSKonstantin Belousov 		return (1);
4375badbeeaSKonstantin Belousov 	if (e1->rl_q_start >= e2->rl_q_start && rds)
4385badbeeaSKonstantin Belousov 		return (1);
439c3d8a931SKonstantin Belousov 	return (0);
4408f0e9130SKonstantin Belousov }
4418f0e9130SKonstantin Belousov 
442c3d8a931SKonstantin Belousov static void
443c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock)
4448f0e9130SKonstantin Belousov {
445c3d8a931SKonstantin Belousov 	smr_exit(rl_smr);
446c3d8a931SKonstantin Belousov 	DROP_GIANT();
447c3d8a931SKonstantin Belousov 	lock->sleepers = true;
448c3d8a931SKonstantin Belousov 	sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0);
449c3d8a931SKonstantin Belousov 	sleepq_wait(&lock->sleepers, PRI_USER);
450c3d8a931SKonstantin Belousov 	PICKUP_GIANT();
451c3d8a931SKonstantin Belousov 	smr_enter(rl_smr);
452c3d8a931SKonstantin Belousov }
4538f0e9130SKonstantin Belousov 
454c3d8a931SKonstantin Belousov static bool
455c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
456c3d8a931SKonstantin Belousov     struct rl_q_entry *new)
457c3d8a931SKonstantin Belousov {
458c3d8a931SKonstantin Belousov 	return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old,
459c3d8a931SKonstantin Belousov 	    (uintptr_t)new) != 0);
460c3d8a931SKonstantin Belousov }
4618f0e9130SKonstantin Belousov 
4625badbeeaSKonstantin Belousov enum RL_INSERT_RES {
4635badbeeaSKonstantin Belousov 	RL_TRYLOCK_FAILED,
4645badbeeaSKonstantin Belousov 	RL_LOCK_SUCCESS,
4655badbeeaSKonstantin Belousov 	RL_LOCK_RETRY,
4665badbeeaSKonstantin Belousov };
4675badbeeaSKonstantin Belousov 
4685badbeeaSKonstantin Belousov static enum RL_INSERT_RES
4695badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
4705badbeeaSKonstantin Belousov     struct rl_q_entry **free)
4715badbeeaSKonstantin Belousov {
4725badbeeaSKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
4735badbeeaSKonstantin Belousov 
4745badbeeaSKonstantin Belousov 	prev = &e->rl_q_next;
4755badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
4765badbeeaSKonstantin Belousov 	MPASS(!rl_e_is_marked(cur));	/* nobody can unlock e yet */
4775badbeeaSKonstantin Belousov 	for (;;) {
4785badbeeaSKonstantin Belousov 		if (cur == NULL || cur->rl_q_start > e->rl_q_end)
4795badbeeaSKonstantin Belousov 			return (RL_LOCK_SUCCESS);
4805badbeeaSKonstantin Belousov 		next = rl_q_load(&cur->rl_q_next);
4815badbeeaSKonstantin Belousov 		if (rl_e_is_marked(next)) {
4825badbeeaSKonstantin Belousov 			next = rl_e_unmark(next);
4835badbeeaSKonstantin Belousov 			if (rl_q_cas(prev, cur, next)) {
4845badbeeaSKonstantin Belousov 				cur->rl_q_free = *free;
4855badbeeaSKonstantin Belousov 				*free = cur;
4865badbeeaSKonstantin Belousov 			}
4875badbeeaSKonstantin Belousov 			cur = next;
4885badbeeaSKonstantin Belousov 			continue;
4895badbeeaSKonstantin Belousov 		}
4905badbeeaSKonstantin Belousov 		if (rl_e_is_rlock(cur)) {
4915badbeeaSKonstantin Belousov 			prev = &cur->rl_q_next;
4925badbeeaSKonstantin Belousov 			cur = rl_e_unmark_unchecked(rl_q_load(prev));
4935badbeeaSKonstantin Belousov 			continue;
4945badbeeaSKonstantin Belousov 		}
4955badbeeaSKonstantin Belousov 		if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
4965badbeeaSKonstantin Belousov 			sleepq_lock(&lock->sleepers);
4975badbeeaSKonstantin Belousov 			if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
4985badbeeaSKonstantin Belousov 				sleepq_release(&lock->sleepers);
4995badbeeaSKonstantin Belousov 				continue;
5005badbeeaSKonstantin Belousov 			}
5015badbeeaSKonstantin Belousov 			rangelock_unlock_int(lock, e);
5025badbeeaSKonstantin Belousov 			if (trylock) {
5035badbeeaSKonstantin Belousov 				sleepq_release(&lock->sleepers);
5045badbeeaSKonstantin Belousov 				return (RL_TRYLOCK_FAILED);
5055badbeeaSKonstantin Belousov 			}
5065badbeeaSKonstantin Belousov 			rl_insert_sleep(lock);
5075badbeeaSKonstantin Belousov 			return (RL_LOCK_RETRY);
5085badbeeaSKonstantin Belousov 		}
5095badbeeaSKonstantin Belousov 	}
5105badbeeaSKonstantin Belousov }
5115badbeeaSKonstantin Belousov 
5125badbeeaSKonstantin Belousov static enum RL_INSERT_RES
5135badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e,
5145badbeeaSKonstantin Belousov     bool trylock, struct rl_q_entry **free)
5155badbeeaSKonstantin Belousov {
5165badbeeaSKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
5175badbeeaSKonstantin Belousov 
518*9ef425e5SKonstantin Belousov 	prev = (struct rl_q_entry **)&lock->head;
5195badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
5205badbeeaSKonstantin Belousov 	MPASS(!rl_e_is_marked(cur));	/* head is not marked */
5215badbeeaSKonstantin Belousov 	for (;;) {
5225badbeeaSKonstantin Belousov 		if (cur == e)
5235badbeeaSKonstantin Belousov 			return (RL_LOCK_SUCCESS);
5245badbeeaSKonstantin Belousov 		next = rl_q_load(&cur->rl_q_next);
5255badbeeaSKonstantin Belousov 		if (rl_e_is_marked(next)) {
5265badbeeaSKonstantin Belousov 			next = rl_e_unmark(next);
5275badbeeaSKonstantin Belousov 			if (rl_q_cas(prev, cur, next)) {
5285badbeeaSKonstantin Belousov 				cur->rl_q_next = *free;
5295badbeeaSKonstantin Belousov 				*free = cur;
5305badbeeaSKonstantin Belousov 			}
5315badbeeaSKonstantin Belousov 			cur = next;
5325badbeeaSKonstantin Belousov 			continue;
5335badbeeaSKonstantin Belousov 		}
5345badbeeaSKonstantin Belousov 		if (cur->rl_q_end <= e->rl_q_start) {
5355badbeeaSKonstantin Belousov 			prev = &cur->rl_q_next;
5365badbeeaSKonstantin Belousov 			cur = rl_e_unmark_unchecked(rl_q_load(prev));
5375badbeeaSKonstantin Belousov 			continue;
5385badbeeaSKonstantin Belousov 		}
5395badbeeaSKonstantin Belousov 		sleepq_lock(&lock->sleepers);
5405badbeeaSKonstantin Belousov 		rangelock_unlock_int(lock, e);
5415badbeeaSKonstantin Belousov 		if (trylock) {
5425badbeeaSKonstantin Belousov 			sleepq_release(&lock->sleepers);
5435badbeeaSKonstantin Belousov 			return (RL_TRYLOCK_FAILED);
5445badbeeaSKonstantin Belousov 		}
5455badbeeaSKonstantin Belousov 		rl_insert_sleep(lock);
5465badbeeaSKonstantin Belousov 		return (RL_LOCK_RETRY);
5475badbeeaSKonstantin Belousov 	}
5485badbeeaSKonstantin Belousov }
5495badbeeaSKonstantin Belousov 
5505badbeeaSKonstantin Belousov static enum RL_INSERT_RES
551c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
552c3d8a931SKonstantin Belousov     struct rl_q_entry **free)
553c3d8a931SKonstantin Belousov {
554c3d8a931SKonstantin Belousov 	struct rl_q_entry *cur, *next, **prev;
555c3d8a931SKonstantin Belousov 	int r;
5568f0e9130SKonstantin Belousov 
557c3d8a931SKonstantin Belousov again:
558*9ef425e5SKonstantin Belousov 	prev = (struct rl_q_entry **)&lock->head;
5595badbeeaSKonstantin Belousov 	cur = rl_q_load(prev);
5605badbeeaSKonstantin Belousov 	if (cur == NULL && rl_q_cas(prev, NULL, e))
5615badbeeaSKonstantin Belousov 		return (RL_LOCK_SUCCESS);
5628f0e9130SKonstantin Belousov 
5635badbeeaSKonstantin Belousov 	for (;;) {
5645badbeeaSKonstantin Belousov 		if (cur != NULL) {
565c3d8a931SKonstantin Belousov 			if (rl_e_is_marked(cur))
566c3d8a931SKonstantin Belousov 				goto again;
567c3d8a931SKonstantin Belousov 
568c3d8a931SKonstantin Belousov 			next = rl_q_load(&cur->rl_q_next);
569c3d8a931SKonstantin Belousov 			if (rl_e_is_marked(next)) {
570c3d8a931SKonstantin Belousov 				next = rl_e_unmark(next);
571c3d8a931SKonstantin Belousov 				if (rl_q_cas(prev, cur, next)) {
572c3d8a931SKonstantin Belousov #ifdef INVARIANTS
573c3d8a931SKonstantin Belousov 					cur->rl_q_owner = NULL;
574c3d8a931SKonstantin Belousov #endif
575c3d8a931SKonstantin Belousov 					cur->rl_q_free = *free;
576c3d8a931SKonstantin Belousov 					*free = cur;
577c3d8a931SKonstantin Belousov 				}
578c3d8a931SKonstantin Belousov 				cur = next;
579c3d8a931SKonstantin Belousov 				continue;
580c3d8a931SKonstantin Belousov 			}
581c3d8a931SKonstantin Belousov 		}
582c3d8a931SKonstantin Belousov 
583c3d8a931SKonstantin Belousov 		r = rl_e_compare(cur, e);
584c3d8a931SKonstantin Belousov 		if (r == -1) {
585c3d8a931SKonstantin Belousov 			prev = &cur->rl_q_next;
586c3d8a931SKonstantin Belousov 			cur = rl_q_load(prev);
587c3d8a931SKonstantin Belousov 		} else if (r == 0) {
588c3d8a931SKonstantin Belousov 			sleepq_lock(&lock->sleepers);
589c3d8a931SKonstantin Belousov 			if (__predict_false(rl_e_is_marked(rl_q_load(
590c3d8a931SKonstantin Belousov 			    &cur->rl_q_next)))) {
591c3d8a931SKonstantin Belousov 				sleepq_release(&lock->sleepers);
592c3d8a931SKonstantin Belousov 				continue;
593c3d8a931SKonstantin Belousov 			}
594e3680954SRick Macklem 			if (trylock) {
595c3d8a931SKonstantin Belousov 				sleepq_release(&lock->sleepers);
5965badbeeaSKonstantin Belousov 				return (RL_TRYLOCK_FAILED);
597e3680954SRick Macklem 			}
598c3d8a931SKonstantin Belousov 			rl_insert_sleep(lock);
599c3d8a931SKonstantin Belousov 			/* e is still valid */
600c3d8a931SKonstantin Belousov 			goto again;
601c3d8a931SKonstantin Belousov 		} else /* r == 1 */ {
602c3d8a931SKonstantin Belousov 			e->rl_q_next = cur;
603c3d8a931SKonstantin Belousov 			if (rl_q_cas(prev, cur, e)) {
604c3d8a931SKonstantin Belousov 				atomic_thread_fence_acq();
6055badbeeaSKonstantin Belousov 				return (rl_e_is_rlock(e) ?
6065badbeeaSKonstantin Belousov 				    rl_r_validate(lock, e, trylock, free) :
6075badbeeaSKonstantin Belousov 				    rl_w_validate(lock, e, trylock, free));
608e3680954SRick Macklem 			}
609c3d8a931SKonstantin Belousov 			/* Reset rl_q_next in case we hit fast path. */
610c3d8a931SKonstantin Belousov 			e->rl_q_next = NULL;
611c3d8a931SKonstantin Belousov 			cur = rl_q_load(prev);
612c3d8a931SKonstantin Belousov 		}
613c3d8a931SKonstantin Belousov 	}
614c3d8a931SKonstantin Belousov }
615c3d8a931SKonstantin Belousov 
616c3d8a931SKonstantin Belousov static struct rl_q_entry *
6175badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
6185badbeeaSKonstantin Belousov     vm_ooffset_t end, int locktype)
619c3d8a931SKonstantin Belousov {
6205badbeeaSKonstantin Belousov 	struct rl_q_entry *e, *free, *x, *xp;
621ff1ae3b3SKonstantin Belousov 	struct thread *td;
622*9ef425e5SKonstantin Belousov 	void *cookie;
6235badbeeaSKonstantin Belousov 	enum RL_INSERT_RES res;
624c3d8a931SKonstantin Belousov 
625*9ef425e5SKonstantin Belousov 	if (rangelock_cheat_lock(lock, locktype, trylock, &cookie))
626*9ef425e5SKonstantin Belousov 		return (cookie);
627ff1ae3b3SKonstantin Belousov 	td = curthread;
6285badbeeaSKonstantin Belousov 	for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
629c3d8a931SKonstantin Belousov 		free = NULL;
6305badbeeaSKonstantin Belousov 		e = rlqentry_alloc(start, end, locktype);
631c3d8a931SKonstantin Belousov 		smr_enter(rl_smr);
632c3d8a931SKonstantin Belousov 		res = rl_insert(lock, e, trylock, &free);
633c3d8a931SKonstantin Belousov 		smr_exit(rl_smr);
6345badbeeaSKonstantin Belousov 		if (res == RL_TRYLOCK_FAILED) {
6355badbeeaSKonstantin Belousov 			MPASS(trylock);
636c3d8a931SKonstantin Belousov 			e->rl_q_free = free;
637c3d8a931SKonstantin Belousov 			free = e;
638c3d8a931SKonstantin Belousov 			e = NULL;
639c3d8a931SKonstantin Belousov 		}
640c3d8a931SKonstantin Belousov 		for (x = free; x != NULL; x = xp) {
641c3d8a931SKonstantin Belousov 			MPASS(!rl_e_is_marked(x));
642c3d8a931SKonstantin Belousov 			xp = x->rl_q_free;
643c3d8a931SKonstantin Belousov 			MPASS(!rl_e_is_marked(xp));
644ff1ae3b3SKonstantin Belousov 			if (td->td_rlqe == NULL) {
645ff1ae3b3SKonstantin Belousov 				smr_synchronize(rl_smr);
646ff1ae3b3SKonstantin Belousov 				td->td_rlqe = x;
647ff1ae3b3SKonstantin Belousov 			} else {
648c3d8a931SKonstantin Belousov 				uma_zfree_smr(rl_entry_zone, x);
649c3d8a931SKonstantin Belousov 			}
6505badbeeaSKonstantin Belousov 		}
651ff1ae3b3SKonstantin Belousov 	}
652c3d8a931SKonstantin Belousov 	return (e);
6538f0e9130SKonstantin Belousov }
6548f0e9130SKonstantin Belousov 
6558f0e9130SKonstantin Belousov void *
656c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
6578f0e9130SKonstantin Belousov {
6585badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ));
659e3680954SRick Macklem }
660e3680954SRick Macklem 
661e3680954SRick Macklem void *
662c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
663e3680954SRick Macklem {
6645badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ));
6658f0e9130SKonstantin Belousov }
6668f0e9130SKonstantin Belousov 
6678f0e9130SKonstantin Belousov void *
668c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
6698f0e9130SKonstantin Belousov {
670*9ef425e5SKonstantin Belousov 	return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE));
671e3680954SRick Macklem }
672e3680954SRick Macklem 
673e3680954SRick Macklem void *
674c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
675e3680954SRick Macklem {
6765badbeeaSKonstantin Belousov 	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
6778f0e9130SKonstantin Belousov }
6783155f2f0SKyle Evans 
6793155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT
6803155f2f0SKyle Evans void
6813155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
6823155f2f0SKyle Evans {
6833155f2f0SKyle Evans }
6843155f2f0SKyle Evans #endif	/* INVARIANT_SUPPORT */
685c3d8a931SKonstantin Belousov 
686c3d8a931SKonstantin Belousov #include "opt_ddb.h"
687c3d8a931SKonstantin Belousov #ifdef DDB
688c3d8a931SKonstantin Belousov #include <ddb/ddb.h>
689c3d8a931SKonstantin Belousov 
690c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock)
691c3d8a931SKonstantin Belousov {
692c3d8a931SKonstantin Belousov 	struct rangelock *lock;
693c3d8a931SKonstantin Belousov 	struct rl_q_entry *e, *x;
694*9ef425e5SKonstantin Belousov 	uintptr_t v;
695c3d8a931SKonstantin Belousov 
696c3d8a931SKonstantin Belousov 	if (!have_addr) {
697c3d8a931SKonstantin Belousov 		db_printf("show rangelock addr\n");
698c3d8a931SKonstantin Belousov 		return;
699c3d8a931SKonstantin Belousov 	}
700c3d8a931SKonstantin Belousov 
701c3d8a931SKonstantin Belousov 	lock = (struct rangelock *)addr;
702c3d8a931SKonstantin Belousov 	db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers);
703*9ef425e5SKonstantin Belousov 	v = lock->head;
704*9ef425e5SKonstantin Belousov 	if ((v & RL_CHEAT_CHEATING) != 0) {
705*9ef425e5SKonstantin Belousov 		db_printf("  cheating head %#jx\n", (uintmax_t)v);
706*9ef425e5SKonstantin Belousov 		return;
707*9ef425e5SKonstantin Belousov 	}
708*9ef425e5SKonstantin Belousov 	for (e = (struct rl_q_entry *)(lock->head);;) {
709c3d8a931SKonstantin Belousov 		x = rl_e_is_marked(e) ? rl_e_unmark(e) : e;
710c3d8a931SKonstantin Belousov 		if (x == NULL)
711c3d8a931SKonstantin Belousov 			break;
712c3d8a931SKonstantin Belousov 		db_printf("  entry %p marked %d %d start %#jx end %#jx "
713c3d8a931SKonstantin Belousov 		    "flags %x next %p",
714c3d8a931SKonstantin Belousov 		    e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next),
715c3d8a931SKonstantin Belousov 		    x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next);
716c3d8a931SKonstantin Belousov #ifdef INVARIANTS
717c3d8a931SKonstantin Belousov 		db_printf(" owner %p (%d)", x->rl_q_owner,
718c3d8a931SKonstantin Belousov 		    x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1);
719c3d8a931SKonstantin Belousov #endif
720c3d8a931SKonstantin Belousov 		db_printf("\n");
721c3d8a931SKonstantin Belousov 		e = x->rl_q_next;
722c3d8a931SKonstantin Belousov 	}
723c3d8a931SKonstantin Belousov }
724c3d8a931SKonstantin Belousov 
725c3d8a931SKonstantin Belousov #endif	/* DDB */
726