18f0e9130SKonstantin Belousov /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni *
48f0e9130SKonstantin Belousov * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
5d8a16b6aSKonstantin Belousov * Copyright (c) 2023 The FreeBSD Foundation
6d8a16b6aSKonstantin Belousov *
7d8a16b6aSKonstantin Belousov * Portions of this software were developed by
8d8a16b6aSKonstantin Belousov * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9d8a16b6aSKonstantin Belousov * the FreeBSD Foundation.
108f0e9130SKonstantin Belousov *
118f0e9130SKonstantin Belousov * Redistribution and use in source and binary forms, with or without
128f0e9130SKonstantin Belousov * modification, are permitted provided that the following conditions
138f0e9130SKonstantin Belousov * are met:
148f0e9130SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright
158f0e9130SKonstantin Belousov * notice unmodified, this list of conditions, and the following
168f0e9130SKonstantin Belousov * disclaimer.
178f0e9130SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright
188f0e9130SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the
198f0e9130SKonstantin Belousov * documentation and/or other materials provided with the distribution.
208f0e9130SKonstantin Belousov *
218f0e9130SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
228f0e9130SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
238f0e9130SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
248f0e9130SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
258f0e9130SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
268f0e9130SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
278f0e9130SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
288f0e9130SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
298f0e9130SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
308f0e9130SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
318f0e9130SKonstantin Belousov */
328f0e9130SKonstantin Belousov
338f0e9130SKonstantin Belousov #include <sys/param.h>
34c3d8a931SKonstantin Belousov #include <sys/kassert.h>
358f0e9130SKonstantin Belousov #include <sys/kernel.h>
368f0e9130SKonstantin Belousov #include <sys/lock.h>
378f0e9130SKonstantin Belousov #include <sys/mutex.h>
388f0e9130SKonstantin Belousov #include <sys/proc.h>
398f0e9130SKonstantin Belousov #include <sys/rangelock.h>
40c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h>
41c3d8a931SKonstantin Belousov #include <sys/smr.h>
429ef425e5SKonstantin Belousov #include <sys/sysctl.h>
438f0e9130SKonstantin Belousov
448f0e9130SKonstantin Belousov #include <vm/uma.h>
458f0e9130SKonstantin Belousov
46c3d8a931SKonstantin Belousov /*
479ef425e5SKonstantin Belousov * Immediately after initialization (subject to 'rangelock_cheat'
489ef425e5SKonstantin Belousov * below), and until a request comes that conflicts with granted ones
499ef425e5SKonstantin Belousov * based on type, rangelocks serve requests in the "cheating" mode.
509ef425e5SKonstantin Belousov * In this mode, a rangelock behaves like a sxlock, as if each request
519ef425e5SKonstantin Belousov * covered the whole range of the protected object. On receiving a
529ef425e5SKonstantin Belousov * conflicting request (any request while a write request is
539ef425e5SKonstantin Belousov * effective, or any write request while some read ones are
549ef425e5SKonstantin Belousov * effective), all requests granted in "cheating" mode are drained,
559ef425e5SKonstantin Belousov * and the rangelock then switches to effectively keeping track of the
569ef425e5SKonstantin Belousov * precise range of each new request.
579ef425e5SKonstantin Belousov *
589ef425e5SKonstantin Belousov * Normal sx implementation is not used to not bloat structures (most
599ef425e5SKonstantin Belousov * important, vnodes) which embeds rangelocks.
609ef425e5SKonstantin Belousov *
619ef425e5SKonstantin Belousov * The cheating greatly helps very common pattern where file is first
629ef425e5SKonstantin Belousov * written single-threaded, and then read by many processes.
639ef425e5SKonstantin Belousov *
649ef425e5SKonstantin Belousov * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the
659ef425e5SKonstantin Belousov * lock->head. Special cookies are returned in this mode, and
669ef425e5SKonstantin Belousov * trylocks are same as normal locks but do not drain.
679ef425e5SKonstantin Belousov */
689ef425e5SKonstantin Belousov
6953789621SKonstantin Belousov static int rangelock_cheat = 1;
709ef425e5SKonstantin Belousov SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN,
719ef425e5SKonstantin Belousov &rangelock_cheat, 0,
729ef425e5SKonstantin Belousov "");
739ef425e5SKonstantin Belousov
749ef425e5SKonstantin Belousov #define RL_CHEAT_MASK 0x7
759ef425e5SKonstantin Belousov #define RL_CHEAT_CHEATING 0x1
769ef425e5SKonstantin Belousov /* #define RL_CHEAT_RLOCKED 0x0 */
779ef425e5SKonstantin Belousov #define RL_CHEAT_WLOCKED 0x2
789ef425e5SKonstantin Belousov #define RL_CHEAT_DRAINING 0x4
799ef425e5SKonstantin Belousov
809ef425e5SKonstantin Belousov #define RL_CHEAT_READER 0x8
819ef425e5SKonstantin Belousov
829ef425e5SKonstantin Belousov #define RL_RET_CHEAT_RLOCKED 0x1100
839ef425e5SKonstantin Belousov #define RL_RET_CHEAT_WLOCKED 0x2200
849ef425e5SKonstantin Belousov
8575447afcSKonstantin Belousov static void
rangelock_cheat_drain(struct rangelock * lock)8675447afcSKonstantin Belousov rangelock_cheat_drain(struct rangelock *lock)
8775447afcSKonstantin Belousov {
8875447afcSKonstantin Belousov uintptr_t v;
8975447afcSKonstantin Belousov
9075447afcSKonstantin Belousov DROP_GIANT();
9175447afcSKonstantin Belousov for (;;) {
92e1f4d623SKonstantin Belousov v = atomic_load_ptr(&lock->head);
9375447afcSKonstantin Belousov if ((v & RL_CHEAT_DRAINING) == 0)
9475447afcSKonstantin Belousov break;
9575447afcSKonstantin Belousov sleepq_add(&lock->head, NULL, "ranged1", 0, 0);
9675447afcSKonstantin Belousov sleepq_wait(&lock->head, PRI_USER);
9775447afcSKonstantin Belousov sleepq_lock(&lock->head);
9875447afcSKonstantin Belousov }
9975447afcSKonstantin Belousov sleepq_release(&lock->head);
10075447afcSKonstantin Belousov PICKUP_GIANT();
10175447afcSKonstantin Belousov }
10275447afcSKonstantin Belousov
1039ef425e5SKonstantin Belousov static bool
rangelock_cheat_lock(struct rangelock * lock,int locktype,bool trylock,void ** cookie)1049ef425e5SKonstantin Belousov rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock,
1059ef425e5SKonstantin Belousov void **cookie)
1069ef425e5SKonstantin Belousov {
1079ef425e5SKonstantin Belousov uintptr_t v, x;
1089ef425e5SKonstantin Belousov
109e1f4d623SKonstantin Belousov v = atomic_load_ptr(&lock->head);
1109ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0)
1119ef425e5SKonstantin Belousov return (false);
1129ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) {
1139ef425e5SKonstantin Belousov drain:
1149ef425e5SKonstantin Belousov if (trylock) {
1159ef425e5SKonstantin Belousov *cookie = NULL;
1169ef425e5SKonstantin Belousov return (true);
1179ef425e5SKonstantin Belousov }
1189ef425e5SKonstantin Belousov sleepq_lock(&lock->head);
1199ef425e5SKonstantin Belousov drain1:
12075447afcSKonstantin Belousov rangelock_cheat_drain(lock);
1219ef425e5SKonstantin Belousov return (false);
1229ef425e5SKonstantin Belousov }
1239ef425e5SKonstantin Belousov
1249ef425e5SKonstantin Belousov switch (locktype) {
1259ef425e5SKonstantin Belousov case RL_LOCK_READ:
1269ef425e5SKonstantin Belousov for (;;) {
1279ef425e5SKonstantin Belousov if ((v & RL_CHEAT_WLOCKED) != 0) {
1289ef425e5SKonstantin Belousov if (trylock) {
1299ef425e5SKonstantin Belousov *cookie = NULL;
1309ef425e5SKonstantin Belousov return (true);
1319ef425e5SKonstantin Belousov }
1329ef425e5SKonstantin Belousov x = v | RL_CHEAT_DRAINING;
1339ef425e5SKonstantin Belousov sleepq_lock(&lock->head);
1349ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v,
1359ef425e5SKonstantin Belousov x) != 0)
1369ef425e5SKonstantin Belousov goto drain1;
1379ef425e5SKonstantin Belousov sleepq_release(&lock->head);
1389ef425e5SKonstantin Belousov /* Possibly forgive passed conflict */
13957cc80e6SKonstantin Belousov } else {
1409ef425e5SKonstantin Belousov x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER;
1419ef425e5SKonstantin Belousov x |= RL_CHEAT_CHEATING;
14257cc80e6SKonstantin Belousov if (atomic_fcmpset_acq_ptr(&lock->head, &v,
14357cc80e6SKonstantin Belousov x) != 0)
1449ef425e5SKonstantin Belousov break;
14557cc80e6SKonstantin Belousov }
1469ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0)
1479ef425e5SKonstantin Belousov return (false);
1489ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0)
1499ef425e5SKonstantin Belousov goto drain;
1509ef425e5SKonstantin Belousov }
1519ef425e5SKonstantin Belousov *(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED;
1529ef425e5SKonstantin Belousov break;
1539ef425e5SKonstantin Belousov case RL_LOCK_WRITE:
1549ef425e5SKonstantin Belousov for (;;) {
1559ef425e5SKonstantin Belousov if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER ||
1569ef425e5SKonstantin Belousov (v & RL_CHEAT_WLOCKED) != 0) {
1579ef425e5SKonstantin Belousov if (trylock) {
1589ef425e5SKonstantin Belousov *cookie = NULL;
1599ef425e5SKonstantin Belousov return (true);
1609ef425e5SKonstantin Belousov }
1619ef425e5SKonstantin Belousov x = v | RL_CHEAT_DRAINING;
1629ef425e5SKonstantin Belousov sleepq_lock(&lock->head);
1639ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v,
1649ef425e5SKonstantin Belousov x) != 0)
1659ef425e5SKonstantin Belousov goto drain1;
1669ef425e5SKonstantin Belousov sleepq_release(&lock->head);
1679ef425e5SKonstantin Belousov /* Possibly forgive passed conflict */
16857cc80e6SKonstantin Belousov } else {
1699ef425e5SKonstantin Belousov x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING;
17057cc80e6SKonstantin Belousov if (atomic_fcmpset_acq_ptr(&lock->head, &v,
17157cc80e6SKonstantin Belousov x) != 0)
1729ef425e5SKonstantin Belousov break;
17357cc80e6SKonstantin Belousov }
1749ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0)
1759ef425e5SKonstantin Belousov return (false);
1769ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0)
1779ef425e5SKonstantin Belousov goto drain;
1789ef425e5SKonstantin Belousov }
1799ef425e5SKonstantin Belousov *(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED;
1809ef425e5SKonstantin Belousov break;
1819ef425e5SKonstantin Belousov default:
1829ef425e5SKonstantin Belousov __assert_unreachable();
1839ef425e5SKonstantin Belousov break;
1849ef425e5SKonstantin Belousov }
1859ef425e5SKonstantin Belousov return (true);
1869ef425e5SKonstantin Belousov }
1879ef425e5SKonstantin Belousov
1889ef425e5SKonstantin Belousov static bool
rangelock_cheat_unlock(struct rangelock * lock,void * cookie)1899ef425e5SKonstantin Belousov rangelock_cheat_unlock(struct rangelock *lock, void *cookie)
1909ef425e5SKonstantin Belousov {
1919ef425e5SKonstantin Belousov uintptr_t v, x;
1929ef425e5SKonstantin Belousov
193e1f4d623SKonstantin Belousov v = atomic_load_ptr(&lock->head);
1949ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0)
1959ef425e5SKonstantin Belousov return (false);
1969ef425e5SKonstantin Belousov
1979ef425e5SKonstantin Belousov MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED ||
1989ef425e5SKonstantin Belousov (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED);
1999ef425e5SKonstantin Belousov
2009ef425e5SKonstantin Belousov switch ((uintptr_t)cookie) {
2019ef425e5SKonstantin Belousov case RL_RET_CHEAT_RLOCKED:
2029ef425e5SKonstantin Belousov for (;;) {
2039ef425e5SKonstantin Belousov MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER);
2049ef425e5SKonstantin Belousov MPASS((v & RL_CHEAT_WLOCKED) == 0);
2059ef425e5SKonstantin Belousov x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER;
2069ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) {
2079ef425e5SKonstantin Belousov if (x != 0) {
2089ef425e5SKonstantin Belousov x |= RL_CHEAT_DRAINING |
2099ef425e5SKonstantin Belousov RL_CHEAT_CHEATING;
2109ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head,
2119ef425e5SKonstantin Belousov &v, x) != 0)
2129ef425e5SKonstantin Belousov break;
2139ef425e5SKonstantin Belousov } else {
2149ef425e5SKonstantin Belousov sleepq_lock(&lock->head);
2159ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head,
2169ef425e5SKonstantin Belousov &v, x) != 0) {
2179ef425e5SKonstantin Belousov sleepq_broadcast(
2189ef425e5SKonstantin Belousov &lock->head,
2199ef425e5SKonstantin Belousov SLEEPQ_SLEEP, 0, 0);
2209ef425e5SKonstantin Belousov sleepq_release(&lock->head);
2219ef425e5SKonstantin Belousov break;
2229ef425e5SKonstantin Belousov }
2239ef425e5SKonstantin Belousov sleepq_release(&lock->head);
2249ef425e5SKonstantin Belousov }
2259ef425e5SKonstantin Belousov } else {
2269ef425e5SKonstantin Belousov x |= RL_CHEAT_CHEATING;
2279ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v,
2289ef425e5SKonstantin Belousov x) != 0)
2299ef425e5SKonstantin Belousov break;
2309ef425e5SKonstantin Belousov }
2319ef425e5SKonstantin Belousov }
2329ef425e5SKonstantin Belousov break;
2339ef425e5SKonstantin Belousov case RL_RET_CHEAT_WLOCKED:
2349ef425e5SKonstantin Belousov for (;;) {
2359ef425e5SKonstantin Belousov MPASS((v & RL_CHEAT_WLOCKED) != 0);
2369ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) {
2379ef425e5SKonstantin Belousov sleepq_lock(&lock->head);
2389ef425e5SKonstantin Belousov atomic_store_ptr(&lock->head, 0);
2399ef425e5SKonstantin Belousov sleepq_broadcast(&lock->head,
2409ef425e5SKonstantin Belousov SLEEPQ_SLEEP, 0, 0);
2419ef425e5SKonstantin Belousov sleepq_release(&lock->head);
2429ef425e5SKonstantin Belousov break;
2439ef425e5SKonstantin Belousov } else {
2449ef425e5SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v,
2459ef425e5SKonstantin Belousov RL_CHEAT_CHEATING) != 0)
2469ef425e5SKonstantin Belousov break;
2479ef425e5SKonstantin Belousov }
2489ef425e5SKonstantin Belousov }
2499ef425e5SKonstantin Belousov break;
2509ef425e5SKonstantin Belousov default:
2519ef425e5SKonstantin Belousov __assert_unreachable();
2529ef425e5SKonstantin Belousov break;
2539ef425e5SKonstantin Belousov }
2549ef425e5SKonstantin Belousov return (true);
2559ef425e5SKonstantin Belousov }
2569ef425e5SKonstantin Belousov
2579ef425e5SKonstantin Belousov static bool
rangelock_cheat_destroy(struct rangelock * lock)2589ef425e5SKonstantin Belousov rangelock_cheat_destroy(struct rangelock *lock)
2599ef425e5SKonstantin Belousov {
2609ef425e5SKonstantin Belousov uintptr_t v;
2619ef425e5SKonstantin Belousov
262e1f4d623SKonstantin Belousov v = atomic_load_ptr(&lock->head);
2639ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0)
2649ef425e5SKonstantin Belousov return (false);
2659ef425e5SKonstantin Belousov MPASS(v == RL_CHEAT_CHEATING);
2669ef425e5SKonstantin Belousov return (true);
2679ef425e5SKonstantin Belousov }
2689ef425e5SKonstantin Belousov
2699ef425e5SKonstantin Belousov /*
270c3d8a931SKonstantin Belousov * Implementation of range locks based on the paper
271c3d8a931SKonstantin Belousov * https://doi.org/10.1145/3342195.3387533
272c3d8a931SKonstantin Belousov * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020
273c3d8a931SKonstantin Belousov * Scalable Range Locks for Scalable Address Spaces and Beyond
274c3d8a931SKonstantin Belousov * by Alex Kogan, Dave Dice, and Shady Issa
275c3d8a931SKonstantin Belousov */
276c3d8a931SKonstantin Belousov
277c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e);
278c3d8a931SKonstantin Belousov
279c3d8a931SKonstantin Belousov /*
280c3d8a931SKonstantin Belousov * rl_q_next links all granted ranges in the lock. We cannot free an
281c3d8a931SKonstantin Belousov * rl_q_entry while in the smr section, and cannot reuse rl_q_next
282c3d8a931SKonstantin Belousov * linkage since other threads might follow it even after CAS removed
283c3d8a931SKonstantin Belousov * the range. Use rl_q_free for local list of ranges to remove after
284c3d8a931SKonstantin Belousov * the smr section is dropped.
285c3d8a931SKonstantin Belousov */
2868f0e9130SKonstantin Belousov struct rl_q_entry {
287c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_next;
288c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_free;
2898f0e9130SKonstantin Belousov off_t rl_q_start, rl_q_end;
2908f0e9130SKonstantin Belousov int rl_q_flags;
291c3d8a931SKonstantin Belousov #ifdef INVARIANTS
292c3d8a931SKonstantin Belousov struct thread *rl_q_owner;
293c3d8a931SKonstantin Belousov #endif
2948f0e9130SKonstantin Belousov };
2958f0e9130SKonstantin Belousov
2968f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone;
297c3d8a931SKonstantin Belousov static smr_t rl_smr;
2988f0e9130SKonstantin Belousov
299a3f10d08SKonstantin Belousov static void rangelock_free_free(struct rl_q_entry *free);
3008a5b2db3SKonstantin Belousov static void rangelock_noncheating_destroy(struct rangelock *lock);
301a3f10d08SKonstantin Belousov
3028f0e9130SKonstantin Belousov static void
rangelock_sys_init(void)3038f0e9130SKonstantin Belousov rangelock_sys_init(void)
3048f0e9130SKonstantin Belousov {
3058f0e9130SKonstantin Belousov rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
306c3d8a931SKonstantin Belousov NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
307c3d8a931SKonstantin Belousov UMA_ZONE_SMR);
308c3d8a931SKonstantin Belousov rl_smr = uma_zone_get_smr(rl_entry_zone);
3098f0e9130SKonstantin Belousov }
310c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
3118f0e9130SKonstantin Belousov
3128f0e9130SKonstantin Belousov static struct rl_q_entry *
rlqentry_alloc(vm_ooffset_t start,vm_ooffset_t end,int flags)313c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
3148f0e9130SKonstantin Belousov {
315c3d8a931SKonstantin Belousov struct rl_q_entry *e;
3168f0e9130SKonstantin Belousov
317c3d8a931SKonstantin Belousov e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
318c3d8a931SKonstantin Belousov e->rl_q_next = NULL;
319c3d8a931SKonstantin Belousov e->rl_q_free = NULL;
320c3d8a931SKonstantin Belousov e->rl_q_start = start;
321c3d8a931SKonstantin Belousov e->rl_q_end = end;
322c3d8a931SKonstantin Belousov e->rl_q_flags = flags;
323c3d8a931SKonstantin Belousov #ifdef INVARIANTS
324c3d8a931SKonstantin Belousov e->rl_q_owner = curthread;
325c3d8a931SKonstantin Belousov #endif
326c3d8a931SKonstantin Belousov return (e);
3278f0e9130SKonstantin Belousov }
3288f0e9130SKonstantin Belousov
3298f0e9130SKonstantin Belousov void
rangelock_init(struct rangelock * lock)3308f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock)
3318f0e9130SKonstantin Belousov {
332c3d8a931SKonstantin Belousov lock->sleepers = false;
3339ef425e5SKonstantin Belousov atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0);
3348f0e9130SKonstantin Belousov }
3358f0e9130SKonstantin Belousov
3368f0e9130SKonstantin Belousov void
rangelock_destroy(struct rangelock * lock)3378f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock)
3388f0e9130SKonstantin Belousov {
339c3d8a931SKonstantin Belousov MPASS(!lock->sleepers);
3408a5b2db3SKonstantin Belousov if (!rangelock_cheat_destroy(lock))
3418a5b2db3SKonstantin Belousov rangelock_noncheating_destroy(lock);
342e228961dSKonstantin Belousov DEBUG_POISON_POINTER(*(void **)&lock->head);
3438f0e9130SKonstantin Belousov }
3448f0e9130SKonstantin Belousov
345c3d8a931SKonstantin Belousov static bool
rl_e_is_marked(const struct rl_q_entry * e)346c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e)
3478f0e9130SKonstantin Belousov {
348c3d8a931SKonstantin Belousov return (((uintptr_t)e & 1) != 0);
3498f0e9130SKonstantin Belousov }
3508f0e9130SKonstantin Belousov
351c3d8a931SKonstantin Belousov static struct rl_q_entry *
rl_e_unmark_unchecked(const struct rl_q_entry * e)3525badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e)
3535badbeeaSKonstantin Belousov {
3545badbeeaSKonstantin Belousov return ((struct rl_q_entry *)((uintptr_t)e & ~1));
3555badbeeaSKonstantin Belousov }
3565badbeeaSKonstantin Belousov
3575badbeeaSKonstantin Belousov static struct rl_q_entry *
rl_e_unmark(const struct rl_q_entry * e)358c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e)
3598f0e9130SKonstantin Belousov {
360c3d8a931SKonstantin Belousov MPASS(rl_e_is_marked(e));
3615badbeeaSKonstantin Belousov return (rl_e_unmark_unchecked(e));
3625badbeeaSKonstantin Belousov }
3635badbeeaSKonstantin Belousov
3645badbeeaSKonstantin Belousov static void
rl_e_mark(struct rl_q_entry * e)3655badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e)
3665badbeeaSKonstantin Belousov {
367590e7a0eSJohn Baldwin #if defined(INVARIANTS)
368590e7a0eSJohn Baldwin int r = atomic_testandset_ptr((uintptr_t *)&e->rl_q_next, 0);
3695badbeeaSKonstantin Belousov MPASS(r == 0);
3705badbeeaSKonstantin Belousov #else
3715badbeeaSKonstantin Belousov atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
3725badbeeaSKonstantin Belousov #endif
3732bb93f2dSColin Percival }
3742bb93f2dSColin Percival
375c3d8a931SKonstantin Belousov static struct rl_q_entry *
rl_q_load(struct rl_q_entry ** p)376c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p)
3778f0e9130SKonstantin Belousov {
378c3d8a931SKonstantin Belousov return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p));
3798f0e9130SKonstantin Belousov }
3808f0e9130SKonstantin Belousov
3816c32d89eSKonstantin Belousov static bool
rl_e_is_rlock(const struct rl_q_entry * e)3826c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e)
3836c32d89eSKonstantin Belousov {
3846c32d89eSKonstantin Belousov return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ);
3856c32d89eSKonstantin Belousov }
3866c32d89eSKonstantin Belousov
3875badbeeaSKonstantin Belousov static void
rangelock_free_free(struct rl_q_entry * free)388a3f10d08SKonstantin Belousov rangelock_free_free(struct rl_q_entry *free)
389a3f10d08SKonstantin Belousov {
390a3f10d08SKonstantin Belousov struct rl_q_entry *x, *xp;
391a3f10d08SKonstantin Belousov
392a3f10d08SKonstantin Belousov for (x = free; x != NULL; x = xp) {
393a3f10d08SKonstantin Belousov MPASS(!rl_e_is_marked(x));
394a3f10d08SKonstantin Belousov xp = x->rl_q_free;
395a3f10d08SKonstantin Belousov MPASS(!rl_e_is_marked(xp));
396a3f10d08SKonstantin Belousov uma_zfree_smr(rl_entry_zone, x);
397a3f10d08SKonstantin Belousov }
398a3f10d08SKonstantin Belousov }
399a3f10d08SKonstantin Belousov
400a3f10d08SKonstantin Belousov static void
rangelock_unlock_int(struct rangelock * lock,struct rl_q_entry * e)4015badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e)
4028f0e9130SKonstantin Belousov {
403c3158008SKonstantin Belousov bool sleepers;
404c3158008SKonstantin Belousov
405c3d8a931SKonstantin Belousov MPASS(lock != NULL && e != NULL);
406c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
407c3d8a931SKonstantin Belousov MPASS(e->rl_q_owner == curthread);
4088f0e9130SKonstantin Belousov
4095badbeeaSKonstantin Belousov rl_e_mark(e);
410c3158008SKonstantin Belousov sleepers = lock->sleepers;
411c3d8a931SKonstantin Belousov lock->sleepers = false;
412c3158008SKonstantin Belousov if (sleepers)
413c3d8a931SKonstantin Belousov sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
4145badbeeaSKonstantin Belousov }
4155badbeeaSKonstantin Belousov
4165badbeeaSKonstantin Belousov void
rangelock_unlock(struct rangelock * lock,void * cookie)4175badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie)
4185badbeeaSKonstantin Belousov {
4199ef425e5SKonstantin Belousov if (rangelock_cheat_unlock(lock, cookie))
4209ef425e5SKonstantin Belousov return;
4219ef425e5SKonstantin Belousov
4225badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers);
4235badbeeaSKonstantin Belousov rangelock_unlock_int(lock, cookie);
424c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers);
4258f0e9130SKonstantin Belousov }
4268f0e9130SKonstantin Belousov
4278f0e9130SKonstantin Belousov /*
4285badbeeaSKonstantin Belousov * result: -1 if e1 before e2, or both locks are readers and e1
4295badbeeaSKonstantin Belousov * starts before or at e2
4305badbeeaSKonstantin Belousov * 1 if e1 after e2, or both locks are readers and e1
4315badbeeaSKonstantin Belousov * starts after e2
4325badbeeaSKonstantin Belousov * 0 if e1 and e2 overlap and at least one lock is writer
4338f0e9130SKonstantin Belousov */
434c3d8a931SKonstantin Belousov static int
rl_e_compare(const struct rl_q_entry * e1,const struct rl_q_entry * e2)435c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
4368f0e9130SKonstantin Belousov {
4375badbeeaSKonstantin Belousov bool rds;
4385badbeeaSKonstantin Belousov
439c3d8a931SKonstantin Belousov if (e1 == NULL)
440c3d8a931SKonstantin Belousov return (1);
441c3d8a931SKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_end)
442c3d8a931SKonstantin Belousov return (-1);
4435badbeeaSKonstantin Belousov rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2);
4445badbeeaSKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_start && rds)
4455badbeeaSKonstantin Belousov return (-1);
4465badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_end)
4475badbeeaSKonstantin Belousov return (1);
4485badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_start && rds)
4495badbeeaSKonstantin Belousov return (1);
450c3d8a931SKonstantin Belousov return (0);
4518f0e9130SKonstantin Belousov }
4528f0e9130SKonstantin Belousov
453c3d8a931SKonstantin Belousov static void
rl_insert_sleep(struct rangelock * lock)454c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock)
4558f0e9130SKonstantin Belousov {
456c3d8a931SKonstantin Belousov smr_exit(rl_smr);
457c3d8a931SKonstantin Belousov DROP_GIANT();
458c3d8a931SKonstantin Belousov lock->sleepers = true;
459c3d8a931SKonstantin Belousov sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0);
460c3d8a931SKonstantin Belousov sleepq_wait(&lock->sleepers, PRI_USER);
461c3d8a931SKonstantin Belousov PICKUP_GIANT();
462c3d8a931SKonstantin Belousov smr_enter(rl_smr);
463c3d8a931SKonstantin Belousov }
4648f0e9130SKonstantin Belousov
465*fe7fe3b1SMark Johnston /*
466*fe7fe3b1SMark Johnston * Try to insert an entry into the queue. Return true if successful, otherwise
467*fe7fe3b1SMark Johnston * false.
468*fe7fe3b1SMark Johnston */
469c3d8a931SKonstantin Belousov static bool
rl_q_cas(struct rl_q_entry ** prev,struct rl_q_entry * old,struct rl_q_entry * new)470c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
471c3d8a931SKonstantin Belousov struct rl_q_entry *new)
472c3d8a931SKonstantin Belousov {
4739467c1a6SKonstantin Belousov MPASS(!rl_e_is_marked(old));
474c3d8a931SKonstantin Belousov return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old,
475c3d8a931SKonstantin Belousov (uintptr_t)new) != 0);
476c3d8a931SKonstantin Belousov }
4778f0e9130SKonstantin Belousov
4788a5b2db3SKonstantin Belousov static void
rangelock_noncheating_destroy(struct rangelock * lock)4798a5b2db3SKonstantin Belousov rangelock_noncheating_destroy(struct rangelock *lock)
4808a5b2db3SKonstantin Belousov {
4818a5b2db3SKonstantin Belousov struct rl_q_entry *cur, *free, *next, **prev;
4828a5b2db3SKonstantin Belousov
4838a5b2db3SKonstantin Belousov free = NULL;
4848a5b2db3SKonstantin Belousov again:
4858a5b2db3SKonstantin Belousov smr_enter(rl_smr);
4868a5b2db3SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head;
4878a5b2db3SKonstantin Belousov cur = rl_q_load(prev);
4888a5b2db3SKonstantin Belousov MPASS(!rl_e_is_marked(cur));
4898a5b2db3SKonstantin Belousov
4908a5b2db3SKonstantin Belousov for (;;) {
4918a5b2db3SKonstantin Belousov if (cur == NULL)
4928a5b2db3SKonstantin Belousov break;
4938a5b2db3SKonstantin Belousov if (rl_e_is_marked(cur))
4948a5b2db3SKonstantin Belousov goto again;
4958a5b2db3SKonstantin Belousov
4968a5b2db3SKonstantin Belousov next = rl_q_load(&cur->rl_q_next);
4978a5b2db3SKonstantin Belousov if (rl_e_is_marked(next)) {
4988a5b2db3SKonstantin Belousov next = rl_e_unmark(next);
4998a5b2db3SKonstantin Belousov if (rl_q_cas(prev, cur, next)) {
5008a5b2db3SKonstantin Belousov #ifdef INVARIANTS
5018a5b2db3SKonstantin Belousov cur->rl_q_owner = NULL;
5028a5b2db3SKonstantin Belousov #endif
5038a5b2db3SKonstantin Belousov cur->rl_q_free = free;
5048a5b2db3SKonstantin Belousov free = cur;
5058a5b2db3SKonstantin Belousov cur = next;
5068a5b2db3SKonstantin Belousov continue;
5078a5b2db3SKonstantin Belousov }
5088a5b2db3SKonstantin Belousov smr_exit(rl_smr);
5098a5b2db3SKonstantin Belousov goto again;
5108a5b2db3SKonstantin Belousov }
5118a5b2db3SKonstantin Belousov
5128a5b2db3SKonstantin Belousov sleepq_lock(&lock->sleepers);
5138a5b2db3SKonstantin Belousov if (!rl_e_is_marked(cur)) {
5148a5b2db3SKonstantin Belousov rl_insert_sleep(lock);
5158a5b2db3SKonstantin Belousov goto again;
5168a5b2db3SKonstantin Belousov }
5178a5b2db3SKonstantin Belousov }
5188a5b2db3SKonstantin Belousov smr_exit(rl_smr);
5198a5b2db3SKonstantin Belousov rangelock_free_free(free);
5208a5b2db3SKonstantin Belousov }
5218a5b2db3SKonstantin Belousov
5225badbeeaSKonstantin Belousov enum RL_INSERT_RES {
5235badbeeaSKonstantin Belousov RL_TRYLOCK_FAILED,
524*fe7fe3b1SMark Johnston RL_TRYLOCK_FAILED_MARKED,
5255badbeeaSKonstantin Belousov RL_LOCK_SUCCESS,
5265badbeeaSKonstantin Belousov RL_LOCK_RETRY,
5275badbeeaSKonstantin Belousov };
5285badbeeaSKonstantin Belousov
529*fe7fe3b1SMark Johnston /*
530*fe7fe3b1SMark Johnston * Handle a possible lock conflict between cur and e. "inserted" is true if e
531*fe7fe3b1SMark Johnston * is already inserted into the queue.
532*fe7fe3b1SMark Johnston */
533*fe7fe3b1SMark Johnston static enum RL_INSERT_RES
rl_conflict(struct rangelock * lock,struct rl_q_entry * cur,struct rl_q_entry * e,bool trylock,bool inserted)534*fe7fe3b1SMark Johnston rl_conflict(struct rangelock *lock, struct rl_q_entry *cur, struct rl_q_entry *e,
535*fe7fe3b1SMark Johnston bool trylock, bool inserted)
536*fe7fe3b1SMark Johnston {
537*fe7fe3b1SMark Johnston sleepq_lock(&lock->sleepers);
538*fe7fe3b1SMark Johnston if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
539*fe7fe3b1SMark Johnston sleepq_release(&lock->sleepers);
540*fe7fe3b1SMark Johnston return (RL_LOCK_SUCCESS); /* no conflict after all */
541*fe7fe3b1SMark Johnston }
542*fe7fe3b1SMark Johnston
543*fe7fe3b1SMark Johnston /*
544*fe7fe3b1SMark Johnston * Make sure we're not conflicting with one of our own locks. This
545*fe7fe3b1SMark Johnston * scenario could conceivably happen for one of two reasons: a bug in
546*fe7fe3b1SMark Johnston * the rangelock consumer (if "inserted" is true), or a bug in the
547*fe7fe3b1SMark Johnston * rangelock implementation itself (if "inserted" is false).
548*fe7fe3b1SMark Johnston */
549*fe7fe3b1SMark Johnston KASSERT(cur->rl_q_owner != curthread,
550*fe7fe3b1SMark Johnston ("%s: conflicting range is locked by the current thread",
551*fe7fe3b1SMark Johnston __func__));
552*fe7fe3b1SMark Johnston
553*fe7fe3b1SMark Johnston if (inserted)
554*fe7fe3b1SMark Johnston rangelock_unlock_int(lock, e);
555*fe7fe3b1SMark Johnston if (trylock) {
556*fe7fe3b1SMark Johnston sleepq_release(&lock->sleepers);
557*fe7fe3b1SMark Johnston
558*fe7fe3b1SMark Johnston /*
559*fe7fe3b1SMark Johnston * If the new entry e has been enqueued and is thus visible to
560*fe7fe3b1SMark Johnston * other threads, it cannot be safely freed.
561*fe7fe3b1SMark Johnston */
562*fe7fe3b1SMark Johnston return (inserted ? RL_TRYLOCK_FAILED_MARKED: RL_TRYLOCK_FAILED);
563*fe7fe3b1SMark Johnston }
564*fe7fe3b1SMark Johnston rl_insert_sleep(lock);
565*fe7fe3b1SMark Johnston return (RL_LOCK_RETRY);
566*fe7fe3b1SMark Johnston }
567*fe7fe3b1SMark Johnston
568*fe7fe3b1SMark Johnston /*
569*fe7fe3b1SMark Johnston * Having inserted entry e, verify that no conflicting write locks are present;
570*fe7fe3b1SMark Johnston * clean up dead entries that we encounter along the way.
571*fe7fe3b1SMark Johnston */
5725badbeeaSKonstantin Belousov static enum RL_INSERT_RES
rl_r_validate(struct rangelock * lock,struct rl_q_entry * e,bool trylock,struct rl_q_entry ** free)5735badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
5745badbeeaSKonstantin Belousov struct rl_q_entry **free)
5755badbeeaSKonstantin Belousov {
5765badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev;
577*fe7fe3b1SMark Johnston enum RL_INSERT_RES res;
5785badbeeaSKonstantin Belousov
579a725d618SKonstantin Belousov again:
5805badbeeaSKonstantin Belousov prev = &e->rl_q_next;
5815badbeeaSKonstantin Belousov cur = rl_q_load(prev);
5825badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */
5835badbeeaSKonstantin Belousov for (;;) {
584e6651546SMark Johnston if (cur == NULL || cur->rl_q_start >= e->rl_q_end)
5855badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS);
5865badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next);
5875badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) {
5885badbeeaSKonstantin Belousov next = rl_e_unmark(next);
5895badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) {
5905badbeeaSKonstantin Belousov cur->rl_q_free = *free;
5915badbeeaSKonstantin Belousov *free = cur;
5925badbeeaSKonstantin Belousov cur = next;
5935badbeeaSKonstantin Belousov continue;
5945badbeeaSKonstantin Belousov }
595a725d618SKonstantin Belousov goto again;
596a725d618SKonstantin Belousov }
5975badbeeaSKonstantin Belousov if (rl_e_is_rlock(cur)) {
5985badbeeaSKonstantin Belousov prev = &cur->rl_q_next;
5995badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev));
6005badbeeaSKonstantin Belousov continue;
6015badbeeaSKonstantin Belousov }
602*fe7fe3b1SMark Johnston
603*fe7fe3b1SMark Johnston res = rl_conflict(lock, cur, e, trylock, true);
604*fe7fe3b1SMark Johnston if (res != RL_LOCK_SUCCESS)
605*fe7fe3b1SMark Johnston return (res);
6065badbeeaSKonstantin Belousov }
6075badbeeaSKonstantin Belousov }
6085badbeeaSKonstantin Belousov
609*fe7fe3b1SMark Johnston /*
610*fe7fe3b1SMark Johnston * Having inserted entry e, verify that no conflicting locks are present;
611*fe7fe3b1SMark Johnston * clean up dead entries that we encounter along the way.
612*fe7fe3b1SMark Johnston */
6135badbeeaSKonstantin Belousov static enum RL_INSERT_RES
rl_w_validate(struct rangelock * lock,struct rl_q_entry * e,bool trylock,struct rl_q_entry ** free)6145badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e,
6155badbeeaSKonstantin Belousov bool trylock, struct rl_q_entry **free)
6165badbeeaSKonstantin Belousov {
6175badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev;
618*fe7fe3b1SMark Johnston enum RL_INSERT_RES res;
6195badbeeaSKonstantin Belousov
620a725d618SKonstantin Belousov again:
6219ef425e5SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head;
6225badbeeaSKonstantin Belousov cur = rl_q_load(prev);
6235badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* head is not marked */
6245badbeeaSKonstantin Belousov for (;;) {
6255badbeeaSKonstantin Belousov if (cur == e)
6265badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS);
6275badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next);
6285badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) {
6295badbeeaSKonstantin Belousov next = rl_e_unmark(next);
6305badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) {
63140bffb7dSKonstantin Belousov cur->rl_q_free = *free;
6325badbeeaSKonstantin Belousov *free = cur;
6335badbeeaSKonstantin Belousov cur = next;
6345badbeeaSKonstantin Belousov continue;
6355badbeeaSKonstantin Belousov }
636a725d618SKonstantin Belousov goto again;
637a725d618SKonstantin Belousov }
6385badbeeaSKonstantin Belousov if (cur->rl_q_end <= e->rl_q_start) {
6395badbeeaSKonstantin Belousov prev = &cur->rl_q_next;
6405badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev));
6415badbeeaSKonstantin Belousov continue;
6425badbeeaSKonstantin Belousov }
643*fe7fe3b1SMark Johnston
644*fe7fe3b1SMark Johnston res = rl_conflict(lock, cur, e, trylock, true);
645*fe7fe3b1SMark Johnston if (res != RL_LOCK_SUCCESS)
646*fe7fe3b1SMark Johnston return (res);
6475badbeeaSKonstantin Belousov }
6485badbeeaSKonstantin Belousov }
6495badbeeaSKonstantin Belousov
6505badbeeaSKonstantin Belousov static enum RL_INSERT_RES
rl_insert(struct rangelock * lock,struct rl_q_entry * e,bool trylock,struct rl_q_entry ** free)651c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
652c3d8a931SKonstantin Belousov struct rl_q_entry **free)
653c3d8a931SKonstantin Belousov {
654c3d8a931SKonstantin Belousov struct rl_q_entry *cur, *next, **prev;
655c3d8a931SKonstantin Belousov int r;
6568f0e9130SKonstantin Belousov
657c3d8a931SKonstantin Belousov again:
6589ef425e5SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head;
6595badbeeaSKonstantin Belousov cur = rl_q_load(prev);
6605badbeeaSKonstantin Belousov if (cur == NULL && rl_q_cas(prev, NULL, e))
6615badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS);
6628f0e9130SKonstantin Belousov
6635badbeeaSKonstantin Belousov for (;;) {
6645badbeeaSKonstantin Belousov if (cur != NULL) {
665c3d8a931SKonstantin Belousov if (rl_e_is_marked(cur))
666c3d8a931SKonstantin Belousov goto again;
667c3d8a931SKonstantin Belousov
668c3d8a931SKonstantin Belousov next = rl_q_load(&cur->rl_q_next);
669c3d8a931SKonstantin Belousov if (rl_e_is_marked(next)) {
670c3d8a931SKonstantin Belousov next = rl_e_unmark(next);
671c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, next)) {
672c3d8a931SKonstantin Belousov #ifdef INVARIANTS
673c3d8a931SKonstantin Belousov cur->rl_q_owner = NULL;
674c3d8a931SKonstantin Belousov #endif
675c3d8a931SKonstantin Belousov cur->rl_q_free = *free;
676c3d8a931SKonstantin Belousov *free = cur;
677c3d8a931SKonstantin Belousov cur = next;
678c3d8a931SKonstantin Belousov continue;
679c3d8a931SKonstantin Belousov }
680a725d618SKonstantin Belousov goto again;
681a725d618SKonstantin Belousov }
682c3d8a931SKonstantin Belousov }
683c3d8a931SKonstantin Belousov
6849467c1a6SKonstantin Belousov MPASS(!rl_e_is_marked(cur));
685c3d8a931SKonstantin Belousov r = rl_e_compare(cur, e);
686c3d8a931SKonstantin Belousov if (r == -1) {
687c3d8a931SKonstantin Belousov prev = &cur->rl_q_next;
688c3d8a931SKonstantin Belousov cur = rl_q_load(prev);
689c3d8a931SKonstantin Belousov } else if (r == 0) {
690*fe7fe3b1SMark Johnston enum RL_INSERT_RES res;
691*fe7fe3b1SMark Johnston
692*fe7fe3b1SMark Johnston res = rl_conflict(lock, cur, e, trylock, false);
693*fe7fe3b1SMark Johnston switch (res) {
694*fe7fe3b1SMark Johnston case RL_LOCK_SUCCESS:
695*fe7fe3b1SMark Johnston /* cur does not conflict after all. */
696*fe7fe3b1SMark Johnston break;
697*fe7fe3b1SMark Johnston case RL_LOCK_RETRY:
698*fe7fe3b1SMark Johnston /* e is still valid. */
699c3d8a931SKonstantin Belousov goto again;
700*fe7fe3b1SMark Johnston default:
701*fe7fe3b1SMark Johnston return (res);
702*fe7fe3b1SMark Johnston }
703c3d8a931SKonstantin Belousov } else /* r == 1 */ {
704c3d8a931SKonstantin Belousov e->rl_q_next = cur;
705c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, e)) {
706c3d8a931SKonstantin Belousov atomic_thread_fence_acq();
7075badbeeaSKonstantin Belousov return (rl_e_is_rlock(e) ?
7085badbeeaSKonstantin Belousov rl_r_validate(lock, e, trylock, free) :
7095badbeeaSKonstantin Belousov rl_w_validate(lock, e, trylock, free));
710e3680954SRick Macklem }
711c3d8a931SKonstantin Belousov /* Reset rl_q_next in case we hit fast path. */
712c3d8a931SKonstantin Belousov e->rl_q_next = NULL;
713c3d8a931SKonstantin Belousov cur = rl_q_load(prev);
714c3d8a931SKonstantin Belousov }
715c3d8a931SKonstantin Belousov }
716c3d8a931SKonstantin Belousov }
717c3d8a931SKonstantin Belousov
718c3d8a931SKonstantin Belousov static struct rl_q_entry *
rangelock_lock_int(struct rangelock * lock,bool trylock,vm_ooffset_t start,vm_ooffset_t end,int locktype)7195badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
7205badbeeaSKonstantin Belousov vm_ooffset_t end, int locktype)
721c3d8a931SKonstantin Belousov {
722a3f10d08SKonstantin Belousov struct rl_q_entry *e, *free;
7239ef425e5SKonstantin Belousov void *cookie;
7245badbeeaSKonstantin Belousov enum RL_INSERT_RES res;
725c3d8a931SKonstantin Belousov
7269ef425e5SKonstantin Belousov if (rangelock_cheat_lock(lock, locktype, trylock, &cookie))
7279ef425e5SKonstantin Belousov return (cookie);
7285badbeeaSKonstantin Belousov for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
729c3d8a931SKonstantin Belousov free = NULL;
7305badbeeaSKonstantin Belousov e = rlqentry_alloc(start, end, locktype);
731c3d8a931SKonstantin Belousov smr_enter(rl_smr);
732c3d8a931SKonstantin Belousov res = rl_insert(lock, e, trylock, &free);
733c3d8a931SKonstantin Belousov smr_exit(rl_smr);
734*fe7fe3b1SMark Johnston if (res == RL_TRYLOCK_FAILED || res == RL_TRYLOCK_FAILED_MARKED) {
7355badbeeaSKonstantin Belousov MPASS(trylock);
736*fe7fe3b1SMark Johnston if (res == RL_TRYLOCK_FAILED) {
737c3d8a931SKonstantin Belousov e->rl_q_free = free;
738c3d8a931SKonstantin Belousov free = e;
739*fe7fe3b1SMark Johnston }
740c3d8a931SKonstantin Belousov e = NULL;
741c3d8a931SKonstantin Belousov }
742a3f10d08SKonstantin Belousov rangelock_free_free(free);
743ff1ae3b3SKonstantin Belousov }
744c3d8a931SKonstantin Belousov return (e);
7458f0e9130SKonstantin Belousov }
7468f0e9130SKonstantin Belousov
7478f0e9130SKonstantin Belousov void *
rangelock_rlock(struct rangelock * lock,vm_ooffset_t start,vm_ooffset_t end)748c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
7498f0e9130SKonstantin Belousov {
7505badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ));
751e3680954SRick Macklem }
752e3680954SRick Macklem
753e3680954SRick Macklem void *
rangelock_tryrlock(struct rangelock * lock,vm_ooffset_t start,vm_ooffset_t end)754c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
755e3680954SRick Macklem {
7565badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ));
7578f0e9130SKonstantin Belousov }
7588f0e9130SKonstantin Belousov
7598f0e9130SKonstantin Belousov void *
rangelock_wlock(struct rangelock * lock,vm_ooffset_t start,vm_ooffset_t end)760c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
7618f0e9130SKonstantin Belousov {
7629ef425e5SKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE));
763e3680954SRick Macklem }
764e3680954SRick Macklem
765e3680954SRick Macklem void *
rangelock_trywlock(struct rangelock * lock,vm_ooffset_t start,vm_ooffset_t end)766c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
767e3680954SRick Macklem {
7685badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
7698f0e9130SKonstantin Belousov }
7703155f2f0SKyle Evans
7710b6b1c28SKonstantin Belousov /*
7720b6b1c28SKonstantin Belousov * If the caller asserts that it can obtain the range locks on the
7730b6b1c28SKonstantin Belousov * same lock simultaneously, switch to the non-cheat mode. Cheat mode
7740b6b1c28SKonstantin Belousov * cannot handle it, hanging in drain or trylock retries.
7750b6b1c28SKonstantin Belousov */
7760b6b1c28SKonstantin Belousov void
rangelock_may_recurse(struct rangelock * lock)7770b6b1c28SKonstantin Belousov rangelock_may_recurse(struct rangelock *lock)
7780b6b1c28SKonstantin Belousov {
7790b6b1c28SKonstantin Belousov uintptr_t v, x;
7800b6b1c28SKonstantin Belousov
7810b6b1c28SKonstantin Belousov v = atomic_load_ptr(&lock->head);
7820b6b1c28SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0)
7830b6b1c28SKonstantin Belousov return;
7840b6b1c28SKonstantin Belousov
7850b6b1c28SKonstantin Belousov sleepq_lock(&lock->head);
7860b6b1c28SKonstantin Belousov for (;;) {
7870b6b1c28SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) {
7880b6b1c28SKonstantin Belousov sleepq_release(&lock->head);
7890b6b1c28SKonstantin Belousov return;
7900b6b1c28SKonstantin Belousov }
7910b6b1c28SKonstantin Belousov
7920b6b1c28SKonstantin Belousov /* Cheating and locked, drain. */
7930b6b1c28SKonstantin Belousov if ((v & RL_CHEAT_WLOCKED) != 0 ||
7940b6b1c28SKonstantin Belousov (v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER) {
7950b6b1c28SKonstantin Belousov x = v | RL_CHEAT_DRAINING;
7960b6b1c28SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) {
7970b6b1c28SKonstantin Belousov rangelock_cheat_drain(lock);
7980b6b1c28SKonstantin Belousov return;
7990b6b1c28SKonstantin Belousov }
8000b6b1c28SKonstantin Belousov continue;
8010b6b1c28SKonstantin Belousov }
8020b6b1c28SKonstantin Belousov
8030b6b1c28SKonstantin Belousov /* Cheating and unlocked, clear RL_CHEAT_CHEATING. */
8040b6b1c28SKonstantin Belousov x = 0;
8050b6b1c28SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) {
8060b6b1c28SKonstantin Belousov sleepq_release(&lock->head);
8070b6b1c28SKonstantin Belousov return;
8080b6b1c28SKonstantin Belousov }
8090b6b1c28SKonstantin Belousov }
8100b6b1c28SKonstantin Belousov }
8110b6b1c28SKonstantin Belousov
8123155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT
8133155f2f0SKyle Evans void
_rangelock_cookie_assert(void * cookie,int what,const char * file,int line)8143155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
8153155f2f0SKyle Evans {
8163155f2f0SKyle Evans }
8173155f2f0SKyle Evans #endif /* INVARIANT_SUPPORT */
818c3d8a931SKonstantin Belousov
819c3d8a931SKonstantin Belousov #include "opt_ddb.h"
820c3d8a931SKonstantin Belousov #ifdef DDB
821c3d8a931SKonstantin Belousov #include <ddb/ddb.h>
822c3d8a931SKonstantin Belousov
DB_SHOW_COMMAND(rangelock,db_show_rangelock)823c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock)
824c3d8a931SKonstantin Belousov {
825c3d8a931SKonstantin Belousov struct rangelock *lock;
826c3d8a931SKonstantin Belousov struct rl_q_entry *e, *x;
8279ef425e5SKonstantin Belousov uintptr_t v;
828c3d8a931SKonstantin Belousov
829c3d8a931SKonstantin Belousov if (!have_addr) {
830c3d8a931SKonstantin Belousov db_printf("show rangelock addr\n");
831c3d8a931SKonstantin Belousov return;
832c3d8a931SKonstantin Belousov }
833c3d8a931SKonstantin Belousov
834c3d8a931SKonstantin Belousov lock = (struct rangelock *)addr;
835c3d8a931SKonstantin Belousov db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers);
8369ef425e5SKonstantin Belousov v = lock->head;
8379ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) != 0) {
8389ef425e5SKonstantin Belousov db_printf(" cheating head %#jx\n", (uintmax_t)v);
8399ef425e5SKonstantin Belousov return;
8409ef425e5SKonstantin Belousov }
8419ef425e5SKonstantin Belousov for (e = (struct rl_q_entry *)(lock->head);;) {
842c3d8a931SKonstantin Belousov x = rl_e_is_marked(e) ? rl_e_unmark(e) : e;
843c3d8a931SKonstantin Belousov if (x == NULL)
844c3d8a931SKonstantin Belousov break;
845c3d8a931SKonstantin Belousov db_printf(" entry %p marked %d %d start %#jx end %#jx "
846c3d8a931SKonstantin Belousov "flags %x next %p",
847c3d8a931SKonstantin Belousov e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next),
848c3d8a931SKonstantin Belousov x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next);
849c3d8a931SKonstantin Belousov #ifdef INVARIANTS
850c3d8a931SKonstantin Belousov db_printf(" owner %p (%d)", x->rl_q_owner,
851c3d8a931SKonstantin Belousov x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1);
852c3d8a931SKonstantin Belousov #endif
853c3d8a931SKonstantin Belousov db_printf("\n");
854c3d8a931SKonstantin Belousov e = x->rl_q_next;
855c3d8a931SKonstantin Belousov }
856c3d8a931SKonstantin Belousov }
857c3d8a931SKonstantin Belousov
858c3d8a931SKonstantin Belousov #endif /* DDB */
859